I want to execute a SQL script on a schedule in Snowflake. There are several CTEs and a JavaScript UDF in my query.
My idea is to
- Table the query of the result.
- Create one stored procedure
- Create a task to execute the procedure
How can I create one stored procedure for the entire query and update the result set?
Example data:
WITH dataset AS (
select $1 id, $2 status, $3 created_at, $4 completed_at
from values
('A','created','2021-07-15 10:30:00'::timestamp, NULL), ('A','missing_info','2021-07-15 11:10:00'::timestamp,NULL),
('A','pending','2021-07-15 12:05:00'::timestamp, NULL), ('A','successful','2021-07-15 16:05:00'::timestamp,'2021-07-15 17:05:00'::timestamp),
('B','created','2021-07-16 11:30:00'::timestamp, NULL), ('B',NULL,'2021-07-16 11:30:00'::timestamp, NULL),
('B','successful','2021-07-16 12:30:00'::timestamp, '2021-07-16 16:30:00'::timestamp) )
UDF to calculate timediff:
create or replace function tsrange_intersection(s string, e string)
RETURNS double
LANGUAGE JAVASCRIPT
AS
$$
let minutes = 0
start = new Date(S)
end = new Date(E)
let t = start
while(t < end) {
if ([1, 2, 3, 4, 5].includes(t.getDay())
&& [9, 10, 11, 12, 13, 14, 15, 16].includes(t.getHours())) {
minutes += 1
}
t = new Date(t.getTime() + 60*1000);
}
return minutes
$$;
The query:
-- simple filtering
cte1 AS (
SELECT *
FROM dataset
WHERE id IS NOT NULL AND status IS NOT NULL
ORDER BY created_at ),
-- retrieve only the id's which started from 'created'
cte2 AS (
SELECT *
FROM cte1
QUALIFY FIRST_VALUE(status) OVER (PARTITION BY id ORDER BY created_at) = 'created' )
-- pattern match
SELECT *
FROM cte2
MATCH_RECOGNIZE (
PARTITION BY id
ORDER BY created_at
MEASURES MATCH_NUMBER() AS mn,
MATCH_SEQUENCE_NUMBER AS msn
ALL ROWS PER MATCH
PATTERN (c+i+|p+i+|ps+)
DEFINE
c AS status='created',
i AS status='missing_info',
p AS status='pending',
s AS status='successful'
) mr
QUALIFY (ROW_NUMBER() OVER (PARTITION BY mn, id ORDER BY msn) = 1)
OR(ROW_NUMBER() OVER (PARTITION BY mn, id ORDER BY msn DESC) =1)
ORDER BY id, created_at;
-- retrieve the result set above
WITH cte3 AS (
SELECT *
FROM TABLE(RESULT_SCAN(LAST_QUERY_ID()))
),
-- start time of each status
cte4 AS (
SELECT
*,
CASE WHEN status = 'successful' THEN IFNULL(completed_at, created_at) ELSE created_at END AS start_timestamp
FROM cte3 ),
-- end time of each status
cte5 AS (
SELECT
*,
CASE WHEN status = 'successful' THEN COMPLETED_AT
ELSE IFNULL(LAG(start_timestamp, -1) OVER (PARTITION BY id ORDER BY start_timestamp), completed_at) END AS end_timestamp
FROM cte4 )
-- final query
SELECT
id, status, start_timestamp, end_timestamp,
tsrange_intersection(start_timestamp, end_timestamp) AS time_diff
FROM cte5
ORDER BY start_timestamp