This is following on from this question which gets me almost there, but I'd like to only return the rows that have the fullest amount of data.
Extending the solution example from the question linked above:
#standardSQL
WITH `project.dataset.your_table` AS (
SELECT 1 id, 'first_data' col_1, NULL col_2, '4/22' updated UNION ALL
SELECT 1, NULL, 'old', '4/23' UNION ALL
SELECT 1, NULL, 'correct', '4/24' UNION ALL
SELECT 1, 'next_data', NULL, '4/25' UNION ALL
SELECT 1, NULL, NULL, '4/26'
SELECT 2, NULL, 'old', '4/23' UNION ALL
SELECT 2, 'this_one', NULL, NULL, UNION ALL
)
SELECT id,
IFNULL(col_1, FIRST_VALUE(col_1 IGNORE NULLS) OVER(win)) col_1,
IFNULL(col_2, FIRST_VALUE(col_2 IGNORE NULLS) OVER(win)) col_2,
updated
FROM `project.dataset.your_table`
WINDOW win AS (PARTITION BY id ORDER BY updated DESC
ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING)
ORDER BY id, updated
I would like the result to look like:
id col_1 col_2 updated
1 next_data correct 4/26
2 this_one old 4/23
I tried this by using ROW_NUMBER()
and it works, but it was suggested it is not very memory efficient:
SELECT * EXCEPT(n) FROM (
WITH `project.dataset.your_table` AS (
SELECT 1 id, 'first_data' col_1, NULL col_2, '4/22' updated UNION ALL
SELECT 1, NULL, 'old', '4/23' UNION ALL
SELECT 1, NULL, 'correct', '4/24' UNION ALL
SELECT 1, 'next_data', NULL, '4/25' UNION ALL
SELECT 1, NULL, NULL, '4/26' UNION ALL
SELECT 2, NULL, 'old', '4/23' UNION ALL
SELECT 2, 'this_one', NULL, NULL
)
SELECT id,
IFNULL(col_1, FIRST_VALUE(col_1 IGNORE NULLS) OVER(win)) col_1,
IFNULL(col_2, FIRST_VALUE(col_2 IGNORE NULLS) OVER(win)) col_2,
updated,
ROW_NUMBER() OVER (PARTITION BY id) as n
FROM `project.dataset.your_table`
WINDOW win AS (PARTITION BY id ORDER BY updated DESC
ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING)
ORDER BY id, updated)
WHERE n = 1
which gives:
id col_1 col_2 updated
1 next_data correct 4/26
2 this_one old 4/23