I have 2 ideas to implement the report.
The first way is casting all values to VARCHAR
and then using UNPIVOT
:
-- Generate dummy data
create or replace table t1 (c1 int, c2 int, c3 int, c4 int, c5 int, c6 int, c7 int, c8 int, c9 int, c10 int) as
select
iff(random()%2=0, random(), null), iff(random()%2=0, random(), null),
iff(random()%2=0, random(), null), iff(random()%2=0, random(), null),
iff(random()%2=0, random(), null), iff(random()%2=0, random(), null),
iff(random()%2=0, random(), null), iff(random()%2=0, random(), null),
iff(random()%2=0, random(), null), iff(random()%2=0, random(), null)
from table(generator(rowcount => 1000000000))
;
-- Query
with
cols as (
select column_name, ordinal_position
from information_schema.columns
where table_catalog = current_database()
and table_schema = current_schema()
and table_name = 'T1'
),
stringified as (
select
c1::varchar c1, c2::varchar c2, c3::varchar c3, c4::varchar c4, c5::varchar c5,
c6::varchar c6, c7::varchar c7, c8::varchar c8, c9::varchar c9, c10::varchar c10
from t1
),
data as (
select column_name, column_value
from stringified
unpivot(column_value for column_name in (c1, c2, c3, c4, c5, c6, c7, c8, c9, c10))
)
select
c.column_name,
count(d.column_value)/(select count(*) from t1) fill_rate,
min(d.column_value) min,
max(d.column_value) max
from cols c
left join data d using (column_name)
group by c.column_name, c.ordinal_position
order by c.ordinal_position
;
/*
COLUMN_NAME FILL_RATE MIN MAX
C1 0.500000 -1000000069270747870 999999972962694409
C2 0.499980 -1000000027928146782 999999946877079818
C3 0.499996 -1000000012155323098 999999942281548701
C4 0.500017 -1000000056353213091 999999946421698482
C5 0.500015 -1000000015608859996 999999993977648967
C6 0.500003 -1000000007081089270 999999998851014730
C7 0.499987 -100000008605944993 999999968272328033
C8 0.499992 -1000000042470913027 999999977402822725
C9 0.500011 -1000000058928465662 999999969060696774
C10 0.500029 -1000000011306371004 99999996061390938
*/
It's a straightforward way, but it still needs to list up all column names twice and it's a bit tough in the case the number of columns is very massive (but I believe it's much better than a huge UNION ALL query).
Another solution is a bit tricky, but you can unpivot a table by using OBJECT_CONSTRUCT(*)
aggregation if the row length doesn't exceed a VARIANT value limit (16 MiB):
with
cols as (
select column_name, ordinal_position
from information_schema.columns
where table_catalog = current_database()
and table_schema = current_schema()
and table_name = 'T1'
),
data as (
select f.key column_name, f.value::varchar column_value
from (select object_construct(*) rec from t1) up,
lateral flatten(up.rec) f
)
select
c.column_name,
count(d.column_value)/(select count(*) from t1) fill_rate,
min(d.column_value) min,
max(d.column_value) max
from cols c
left join data d using (column_name)
group by c.column_name, c.ordinal_position
order by c.ordinal_position
;
/*
COLUMN_NAME FILL_RATE MIN MAX
C1 0.500000 -1000000069270747870 999999972962694409
C2 0.499980 -1000000027928146782 999999946877079818
C3 0.499996 -1000000012155323098 999999942281548701
C4 0.500017 -1000000056353213091 999999946421698482
C5 0.500015 -1000000015608859996 999999993977648967
C6 0.500003 -1000000007081089270 999999998851014730
C7 0.499987 -100000008605944993 999999968272328033
C8 0.499992 -1000000042470913027 999999977402822725
C9 0.500011 -1000000058928465662 999999969060696774
C10 0.500029 -1000000011306371004 99999996061390938
*/
OBJECT_CONSTRUCT(*)
aggregation is a special usage of the OBJECT_CONSTRUCT
function that extracts column names as a key of each JSON object. As far as I know, this is the only way to extract column names from a table along with values in a programmatic way.
Since OBJECT_CONSTRUCT
is relatively a heavy operation, it usually takes a longer time than the first solution, but you don't need to write all column names with this trick.