I see that data is getting duplicated or missing the sequence ad creating invalid data for the JSON Structure I have.
Sample InPut Data: have two columns job_id and "conv_column" Which has below JSON structure.
[ { "Source": "", "Number": "WP1", "Name": "Well #1", "SapNumber": "", "State": "", "Country": "", "Field": "", "Uwi": "", "Environment": "", "WellId": "WP1", "Latitude": "", "Longitude": "", "Type": "", "WaterDepth": "", "WellGeometry": "", "DrillFor": "", "County": "", "City": "", "WellPlaceholderId": "dd726299-26c3-447d-8367-ef1ec79d1c28", "IsNonMasteredWell": true }, { "Source": "", "Number": "WP2", "Name": "Well #2", "SapNumber": "", "State": "", "Country": "", "Field": "", "Uwi": "", "Environment": "", "WellId": "WP2", "Latitude": "", "Longitude": "", "Type": "", "WaterDepth": "", "WellGeometry": "", "DrillFor": "", "County": "", "City": "", "WellPlaceholderId": "129808ad-6f07-46f3-ab70-4140e4cc92ff", "IsNonMasteredWell": true }, { "Source": "", "Number": "1-6HQPMA", "Name": "BLANCA-004", "SapNumber": "", "State": "", "Country": "Ecuador", "Field": "BLANCA", "Uwi": "BLC-004", "Environment": "Land", "WellId": "0064004151", "Latitude": "0.311318889", "Longitude": "-76.17652111", "Type": "Development", "WaterDepth": "", "WellGeometry": "Vertical (0 - <15)", "DrillFor": "Oil", "County": "", "City": "", "WellPlaceholderId": "239cbe08-b691-4fe2-922a-9b55df027bff", "IsNonMasteredWell": false }, { "Source": "", "Number": "1-2CQM5S", "Name": "BLANCA-002", "SapNumber": "", "State": "", "Country": "Ecuador", "Field": "BLANCA", "Uwi": "BLC-002", "Environment": "Land", "WellId": "0064004099", "Latitude": "0.332268889", "Longitude": "-76.17651833", "Type": "Exploration", "WaterDepth": "", "WellGeometry": "", "DrillFor": "Oil", "County": "", "City": "", "WellPlaceholderId": "8f7c5d6c-3c07-4c02-a2f0-37b0230df306", "IsNonMasteredWell": false }, { "Source": "", "Number": "1-69H44K", "Name": "BLANCA-003", "SapNumber": "", "State": "", "Country": "Ecuador", "Field": "BLANCA", "Uwi": "BLC-003", "Environment": "Land", "WellId": "0064004139", "Latitude": "0.281760556", "Longitude": "-76.14325722", "Type": "Development", "WaterDepth": "0", "WellGeometry": "Deviated (15 - <60)", "DrillFor": "Oil", "County": "", "City": "", "WellPlaceholderId": "b8da860e-2fb6-4a46-8fd7-def7f0515d30", "IsNonMasteredWell": false }, { "Source": "", "Number": "1-290RHS", "Name": "BLANCA-001", "SapNumber": "", "State": "", "Country": "Ecuador", "Field": "BLANCA", "Uwi": "BLC-001", "Environment": "Land", "WellId": "0064001507", "Latitude": "0.332357778", "Longitude": "-76.17656278", "Type": "Development", "WaterDepth": "", "WellGeometry": "", "DrillFor": "Oil", "County": "", "City": "", "WellPlaceholderId": "e8b59fcf-739d-48f4-bf7f-14619c1c9304", "IsNonMasteredWell": false }, { "Source": "", "Number": "1-6IBUKB", "Name": "BLANCA-005", "SapNumber": "", "State": "", "Country": "Ecuador", "Field": "BLANCA", "Uwi": "BLC-005", "Environment": "Land", "WellId": "0064004159", "Latitude": "0.311257778", "Longitude": "-76.17659889", "Type": "Development", "WaterDepth": "", "WellGeometry": "Deviated (15 - <60)", "DrillFor": "Oil", "County": "", "City": "", "WellPlaceholderId": "719ae538-3d3e-4667-bdf0-cc2e9d2720b6", "IsNonMasteredWell": false } ]
Code I am using:
CREATE TEMPORARY FUNCTION CUSTOM_JSON_EXTRACT(json STRING, json_path STRING)
RETURNS ARRAY<STRING>
LANGUAGE js AS """
var result = jsonPath(JSON.parse(json), json_path);
if(result){return result;}
else {return [];}
"""
OPTIONS (
library="gs://dev-workspace/json_temp/jsonpath-0.8.0.js"
);
SELECT distinct job_id,well_id,
well_name,well_number,well_sap_number,well_field,well_uwi
from lz_fdp_op.dbm_temp_data
left join UNNEST(CUSTOM_JSON_EXTRACT(conv_column,"$.Operation.Wells[*].WellId")) Well_ID
left join UNNEST(CUSTOM_JSON_EXTRACT(conv_column,"$.Operation.Wells[*].Name")) well_name
left join UNNEST(CUSTOM_JSON_EXTRACT(conv_column,"$.Operation.Wells[*].Number")) well_number
left join UNNEST(CUSTOM_JSON_EXTRACT(conv_column,"$.Operation.Wells[*].SapNumber")) well_sap_number
left join UNNEST(CUSTOM_JSON_EXTRACT(conv_column,"$.Operation.Wells[*].Field")) well_field
left join UNNEST(CUSTOM_JSON_EXTRACT(conv_column,"$.Operation.Wells[*].Uwi")) well_uwi