I would like to get the seperate df
per level from JSON
file. Below code allows me to go 2 level deep but later I cant use explode
function due to data type mismatch: input to function explode should be array or map type, not struct
error on df_4
.
My code and JSON
structure:
start_df = spark.read.json(f'/mnt/bronze/products/**/*.json')
# Select eveything under _embedded
df = start_df.select('_embedded.*')
# explode items data
df1 = df.select(explode(df.items).alias('required'))
# get all data from
df2 = df1.select('required.*')
#create seperate df only including "VALUES" % "identifier"
df3= df2.select("identifier","values")
df3.display()
# expldoe "values" to present its key-value pari as col(key) and col(value)
df_4 = df3.select("identifier", explode("values"))
Structure of JSON
root
|-- _embedded: struct (nullable = true)
| |-- items: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- _links: struct (nullable = true)
| | | | |-- self: struct (nullable = true)
| | | | | |-- href: string (nullable = true)
| | | |-- values: struct (nullable = true)
| | | | |-- Contrex_table: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: string (nullable = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- UFI_Table: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: array (nullable = true)
| | | | | | | |-- element: struct (containsNull = true)
| | | | | | | | |-- UFI: string (nullable = true)
| | | | | | | | |-- company: string (nullable = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- add_reg_info: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: string (nullable = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- chem_can_product_id: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: string (nullable = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- chemical_name: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: string (nullable = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- chemical_name_marketing: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: string (nullable = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- chemical_stability: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: string (nullable = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- chemical_weapon_precursor: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: string (nullable = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- chempax_302_dewolf_id: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: string (nullable = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- chempax_311_case_id: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: string (nullable = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- chempax_312_ross_id: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: string (nullable = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- ci_number: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: string (nullable = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- coating_type: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: array (nullable = true)
| | | | | | | |-- element: string (containsNull = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- colour: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: string (nullable = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- commodity_code: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: string (nullable = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- compendial_grade: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: array (nullable = true)
| | | | | | | |-- element: string (containsNull = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- conditions_to_avoid: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: string (nullable = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- conflict_minerals: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: string (nullable = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- contains_nano_particles: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: string (nullable = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- country_of_origin: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: string (nullable = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
| | | | |-- dmf_status: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- data: array (nullable = true)
| | | | | | | |-- element: string (containsNull = true)
| | | | | | |-- locale: string (nullable = true)
| | | | | | |-- scope: string (nullable = true)
|-- _links: struct (nullable = true)
| |-- first: struct (nullable = true)
| | |-- href: string (nullable = true)
| |-- next: struct (nullable = true)
| | |-- href: string (nullable = true)
| |-- self: struct (nullable = true)
| | |-- href: string (nullable = true)