I dont understand what you mean with Min Length and Max Length, but considering the provided data, you could do something like below.
This query have basically two steps:
- Create a temporary table with flat data using the
WITH
clause
- Calculate the metrics by running one query for each column and use
UNION ALL
to compose everything in a single table.
Query:
WITH
t AS(
SELECT
first_name,
dob,
last_name,
a.zip addresses_zip,
a.state addresses_state,
a.city addresses_city,
a.numberOfYears addresses_numberOfYears,
a.status addresses_status,
a.phone.primarynumber addresses_phone_primarynumber,
a.phone.secondary addresses_phone_secondary
FROM
<your-table> t,
t.addresses a
)
SELECT
"first_name" AS column,
COUNT(first_name) total_count,
COUNT(DISTINCT first_name) total_distinct,
SUM(
IF
(first_name IS NULL,
1,
0)) total_null,
CAST(MIN(first_name) AS string) min_value,
CAST(MAX(first_name) AS string) max_value
FROM
t
UNION ALL
SELECT
"dob" AS column,
COUNT(dob) total_count,
COUNT(DISTINCT dob) total_distinct,
SUM(
IF
(dob IS NULL,
1,
0)) total_null,
CAST(MIN(dob) AS string) min_value,
CAST(MAX(dob) AS string) max_value
FROM
t
UNION ALL
SELECT
"last_name" AS column,
COUNT(last_name) total_count,
COUNT(DISTINCT last_name) total_distinct,
SUM(
IF
(last_name IS NULL,
1,
0)) total_null,
CAST(MIN(last_name) AS string) min_value,
CAST(MAX(last_name) AS string) max_value
FROM
t
UNION ALL
SELECT
"addresses.zip" AS column,
COUNT(addresses_zip) total_count,
COUNT(DISTINCT addresses_zip) total_distinct,
SUM(
IF
(addresses_zip IS NULL,
1,
0)) total_null,
CAST(MIN(addresses_zip) AS string) min_value,
CAST(MAX(addresses_zip) AS string) max_value
FROM
t
UNION ALL
SELECT
"addresses.state" AS column,
COUNT(addresses_state) total_count,
COUNT(DISTINCT addresses_state) total_distinct,
SUM(
IF
(addresses_state IS NULL,
1,
0)) total_null,
CAST(MIN(addresses_state) AS string) min_value,
CAST(MAX(addresses_state) AS string) max_value
FROM
t
UNION ALL
SELECT
"addresses.city" AS column,
COUNT(addresses_city) total_count,
COUNT(DISTINCT addresses_city) total_distinct,
SUM(
IF
(addresses_city IS NULL,
1,
0)) total_null,
CAST(MIN(addresses_city) AS string) min_value,
CAST(MAX(addresses_city) AS string) max_value
FROM
t
UNION ALL
SELECT
"addresses.numberOfYears" AS column,
COUNT(addresses_numberOfYears) total_count,
COUNT(DISTINCT addresses_numberOfYears) total_distinct,
SUM(
IF
(addresses_numberOfYears IS NULL,
1,
0)) total_null,
CAST(MIN(addresses_numberOfYears) AS string) min_value,
CAST(MAX(addresses_numberOfYears) AS string) max_value
FROM
t
UNION ALL
SELECT
"addresses.status" AS column,
COUNT(addresses_status) total_count,
COUNT(DISTINCT addresses_status) total_distinct,
SUM(
IF
(addresses_status IS NULL,
1,
0)) total_null,
CAST(MIN(addresses_status) AS string) min_value,
CAST(MAX(addresses_status) AS string) max_value
FROM
t
UNION ALL
SELECT
"addresses.phone.primarynumber" AS column,
COUNT(addresses_phone_primarynumber) total_count,
COUNT(DISTINCT addresses_phone_primarynumber) total_distinct,
SUM(
IF
(addresses_phone_primarynumber IS NULL,
1,
0)) total_null,
CAST(MIN(addresses_phone_primarynumber) AS string) min_value,
CAST(MAX(addresses_phone_primarynumber) AS string) max_value
FROM
t
UNION ALL
SELECT
"addresses.phone.secondary" AS column,
COUNT(addresses_phone_secondary) total_count,
COUNT(DISTINCT addresses_phone_secondary) total_distinct,
SUM(
IF
(addresses_phone_secondary IS NULL,
1,
0)) total_null,
CAST(MIN(addresses_phone_secondary) AS string) min_value,
CAST(MAX(addresses_phone_secondary) AS string) max_value
FROM
t