0

I would like to get the seperate df per level from JSON file. Below code allows me to go 2 level deep but later I cant use explode function due to data type mismatch: input to function explode should be array or map type, not struct error on df_4.

My code and JSON structure:

start_df = spark.read.json(f'/mnt/bronze/products/**/*.json')
# Select eveything under _embedded 
df = start_df.select('_embedded.*')
# explode items data
df1 = df.select(explode(df.items).alias('required'))
# get all data from 
df2 = df1.select('required.*')

#create seperate df only including "VALUES" % "identifier"
df3= df2.select("identifier","values")
df3.display()

# expldoe "values" to present its key-value pari as col(key) and col(value)
df_4 = df3.select("identifier", explode("values"))

Structure of JSON

  root
 |-- _embedded: struct (nullable = true)
 |    |-- items: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- _links: struct (nullable = true)
 |    |    |    |    |-- self: struct (nullable = true)
 |    |    |    |    |    |-- href: string (nullable = true)
 |    |    |    |-- values: struct (nullable = true)
 |    |    |    |    |-- Contrex_table: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: string (nullable = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- UFI_Table: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: array (nullable = true)
 |    |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |    |-- UFI: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- company: string (nullable = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- add_reg_info: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: string (nullable = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- chem_can_product_id: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: string (nullable = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- chemical_name: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: string (nullable = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- chemical_name_marketing: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: string (nullable = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- chemical_stability: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: string (nullable = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- chemical_weapon_precursor: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: string (nullable = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- chempax_302_dewolf_id: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: string (nullable = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- chempax_311_case_id: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: string (nullable = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- chempax_312_ross_id: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: string (nullable = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- ci_number: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: string (nullable = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- coating_type: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: array (nullable = true)
 |    |    |    |    |    |    |    |-- element: string (containsNull = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- colour: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: string (nullable = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- commodity_code: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: string (nullable = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- compendial_grade: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: array (nullable = true)
 |    |    |    |    |    |    |    |-- element: string (containsNull = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- conditions_to_avoid: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: string (nullable = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- conflict_minerals: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: string (nullable = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- contains_nano_particles: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: string (nullable = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- country_of_origin: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: string (nullable = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |    |    |    |    |-- dmf_status: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- data: array (nullable = true)
 |    |    |    |    |    |    |    |-- element: string (containsNull = true)
 |    |    |    |    |    |    |-- locale: string (nullable = true)
 |    |    |    |    |    |    |-- scope: string (nullable = true)
 |-- _links: struct (nullable = true)
 |    |-- first: struct (nullable = true)
 |    |    |-- href: string (nullable = true)
 |    |-- next: struct (nullable = true)
 |    |    |-- href: string (nullable = true)
 |    |-- self: struct (nullable = true)
 |    |    |-- href: string (nullable = true)
Greencolor
  • 501
  • 1
  • 5
  • 16
  • Have you checked out this post, https://stackoverflow.com/questions/56229314/pyspark-access-struct-field-name-and-value-when-exploding – smurphy Jan 20 '23 at 01:01
  • 1
    I tried to apply but it did not work, i might doing something wrong – Greencolor Jan 20 '23 at 07:32

0 Answers0