You've got a host of challenges here, but I think they're all doable.
I think you've already solved the first which is to know which regions a given data point is inside of. Either STContains()
or STIntersects()
.
The second is that you're essentially looking for clusters of time-based contiguity. Assuming that you have reliable data collection, this is also solvable. Once you've got a set of (Person, Region, Timestamp) tuples (from above), it's a Gaps and Islands problem. A toy solution is presented below:
IF OBJECT_ID('tempdb.dbo.#observations') IS NOT NULL
DROP TABLE #observations;
IF OBJECT_ID('tempdb.dbo.#regions') IS NOT NULL
DROP TABLE #regions;
CREATE TABLE #observations (
ObservationID INT NOT NULL IDENTITY,
CONSTRAINT PK_Observations PRIMARY KEY CLUSTERED (ObservationID),
PersonID INT NOT null,
Point GEOMETRY NOT null,
TS DATETIME2(0) NOT NULL CONSTRAINT DF_Observations_TS DEFAULT SYSUTCDATETIME()
);
CREATE TABLE #regions (
RegionID INT NOT NULL IDENTITY,
CONSTRAINT PK_Regions PRIMARY KEY CLUSTERED (RegionID),
Area GEOMETRY NOT NULL
);
INSERT INTO #regions
(
Area
)
VALUES
( geometry::STGeomFromText('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))', 0) ),
( geometry::STGeomFromText('POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))', 0) ),
( geometry::STGeomFromText('POLYGON((0 0, 4 0, 4 4, 0 4, 0 0))', 0) );
INSERT INTO #observations
(
PersonID ,
Point ,
TS
)
VALUES
( 1 ,
geometry::Point(0.5, 0.5, 0) ,
'2018-01-01 00:00:00'
),
( 1 ,
geometry::Point(1.5, 1.5, 0) ,
'2018-01-01 00:00:05'
),
( 1 ,
geometry::Point(2.5, 2.5, 0) ,
'2018-01-01 00:00:10'
),
( 1 ,
geometry::Point(3.5, 3.5, 0) ,
'2018-01-01 00:00:15'
),
( 1 ,
geometry::Point(4.5, 4.5, 0) ,
'2018-01-01 00:00:20'
),
( 1 ,
geometry::Point(0.5, 0.5, 0) ,
'2018-01-01 01:00:00'
),
( 1 ,
geometry::Point(1.5, 1.5, 0) ,
'2018-01-01 01:00:05'
),
( 1 ,
geometry::Point(2.5, 2.5, 0) ,
'2018-01-01 01:00:10'
),
( 1 ,
geometry::Point(3.5, 3.5, 0) ,
'2018-01-01 01:00:15'
),
( 1 ,
geometry::Point(4.5, 4.5, 0) ,
'2018-01-01 01:00:20'
),
( 2 ,
geometry::Point(3.5, 3.5, 0) ,
'2018-01-01 00:00:00'
),
( 2 ,
geometry::Point(3.5, 3.5, 0) ,
'2018-01-01 00:00:05'
),
( 2 ,
geometry::Point(3.5, 3.5, 0) ,
'2018-01-01 00:00:10'
),
( 2 ,
geometry::Point(3.6, 3.6, 0) ,
'2018-01-01 00:00:15'
),
( 2 ,
geometry::Point(4.5, 4.5, 0) ,
'2018-01-01 00:00:20'
);
WITH cte AS (
SELECT o.ObservationID,
o.PersonID ,
o.TS ,
r.RegionID,
(DATEDIFF(SECOND, '2017-01-01', o.ts)/5) - ROW_NUMBER() OVER (PARTITION BY o.PersonID, r.RegionID ORDER BY o.ts) AS gid,
DATEDIFF(SECOND, '2017-01-01', o.ts)/5 AS diff,
ROW_NUMBER() OVER (PARTITION BY o.PersonID, r.RegionID ORDER BY o.ts) AS rn
FROM #observations AS o
JOIN #regions AS r
ON o.Point.STIntersects(r.Area) = 1
--JOIN #timestamps AS ts
-- ON ts.TS = o.TS
)
SELECT cte.PersonID, cte.RegionID, MIN(ts), MAX(ts)
FROM cte
GROUP BY cte.PersonID ,
cte.RegionID,
cte.gid;
The trick (if there is one) is realizing that row_number()
increments by 1 for each member in an Island and that (number of seconds)/5 should also increment by 1 for the same criteria. So the difference of them should be constant for rows that qualify as equivalent in the same Island. That gives us a convenient value to group by.