so the four sequences, they can be minimal matched with:
WITH data AS (
SELECT * FROM VALUES
(1,'e4',1),(1,'e2',2),(1,'e2',3),(1,'e3',4),(1,'e10',5),(1,'e4',6),
(2,'e4',1),(2,'e2',2),(2,'e4',3),
(3,'e4',1),(3,'e4',2),(3,'e2',3),(3,'e3',4),
(4,'e2',1),(4,'10',2),(4,'e2',3),(4,'e5',4),(4,'e4',5)
)
SELECT * FROM data
match_recognize(
partition by column1
order by column3
measures
match_number() as "MATCH_NUMBER",
match_sequence_number() as msq,
classifier() as cl
all rows per match with unmatched rows
PATTERN (d1 d2* d3)
DEFINE d1 as column2 = 'e2',
d2 as column2 NOT IN ('e2','e4'),
d3 as column2 = 'e4'
)
ORDER BY 1,3;
giving:
COLUMN1 |
COLUMN2 |
COLUMN3 |
MATCH_NUMBER |
MSQ |
CL |
1 |
e4 |
1 |
|
|
|
1 |
e2 |
2 |
|
|
|
1 |
e2 |
3 |
1 |
1 |
D1 |
1 |
e3 |
4 |
1 |
2 |
D2 |
1 |
e10 |
5 |
1 |
3 |
D2 |
1 |
e4 |
6 |
1 |
4 |
D3 |
2 |
e4 |
1 |
|
|
|
2 |
e2 |
2 |
1 |
1 |
D1 |
2 |
e4 |
3 |
1 |
2 |
D3 |
3 |
e4 |
1 |
|
|
|
3 |
e4 |
2 |
|
|
|
3 |
e2 |
3 |
|
|
|
3 |
e3 |
4 |
|
|
|
4 |
e2 |
1 |
|
|
|
4 |
10 |
2 |
|
|
|
4 |
e2 |
3 |
1 |
1 |
D1 |
4 |
e5 |
4 |
1 |
2 |
D2 |
4 |
e4 |
5 |
1 |
3 |
D3 |
But given you said you want "matches", then maybe you just want the range details, thus:
SELECT * FROM data
match_recognize(
partition by column1
order by column3
measures
first_value(column1) as batch,
first_value(column3) as seq_start,
last_value(column3) as seq_end,
match_number() as "MATCH_NUMBER",
match_sequence_number() as msq,
classifier() as cl
one row per match
PATTERN (d1 d2* d3)
DEFINE d1 as column2 = 'e2',
d2 as column2 NOT IN ('e2','e4'),
d3 as column2 = 'e4'
)
ORDER BY 1,3;
might be what you are after:
COLUMN1 |
BATCH |
SEQ_START |
SEQ_END |
MATCH_NUMBER |
MSQ |
CL |
1 |
1 |
3 |
6 |
1 |
4 |
D3 |
2 |
2 |
2 |
3 |
1 |
2 |
D3 |
4 |
4 |
3 |
5 |
1 |
3 |
D3 |