I have set up Auto Loader to regularly read json files and store them in a "bronze" table called fixture_raw using Delta Live Tables in Databricks. This works fine and the json data is stored in the specified table, but when I add a "silver" table called fixture_prepared and try to extract some of the json elements from the bronze table, I get an error:
org.apache.spark.sql.AnalysisException: Ambiguous reference to fields StructField(id,LongType,true), StructField(id,LongType,true)
How can I get around this?
Delta Live Table code:
CREATE OR REFRESH STREAMING LIVE TABLE fixture_raw AS
SELECT *, input_file_name() AS InputFile, now() AS LoadTime FROM cloud_files(
"/mnt/input/fixtures/",
"json",
map(
"cloudFiles.inferColumnTypes", "true",
"cloudFiles.schemaLocation", "/mnt/dlt/schema/fixture",
"cloudFiles.schemaEvolutionMode", "addNewColumns"
)
);
CREATE OR REFRESH LIVE TABLE fixture_prepared AS
WITH FixtureData (
SELECT
explode(response) AS FixtureJson
FROM live.fixture_raw
)
SELECT
FixtureJson.fixture.id AS FixtureID,
FixtureJson.fixture.date AS StartTime,
FixtureJson.fixture.venue.name AS Venue,
FixtureJson.teams.home.id AS HomeTeamID,
FixtureJson.teams.home.name AS HomeTeamName,
FixtureJson.teams.away.id AS AwayTeamID,
FixtureJson.teams.away.name AS AwayTeamName
FROM FixtureData;
Json data:
{
"get": "fixtures",
"parameters": {
"league": "39",
"season": "2022"
},
"response": [
{
"fixture": {
"id": 867946,
"date": "2022-08-05T19:00:00+00:00",
"venue": {
"id": 525,
"name": "Selhurst Park"
}
},
"teams": {
"home": {
"id": 52,
"name": "Crystal Palace"
},
"away": {
"id": 42,
"name": "Arsenal"
}
}
},
{
"fixture": {
"id": 867947,
"date": "2022-08-06T11:30:00+00:00",
"venue": {
"id": 535,
"name": "Craven Cottage"
}
},
"teams": {
"home": {
"id": 36,
"name": "Fulham"
},
"away": {
"id": 40,
"name": "Liverpool"
}
}
}
]
}