I have an avro schema file and I need to create a table in Databricks through pyspark. I don't need to load the data, just want to create the table. The easy way is to load the JSON string and take the "name"
and "type"
from fields
array. Then generate the CREATE
SQL query. I want to know if there is any programmatic way to do that with any API. Sample schema -
{
"type" : "record",
"name" : "kylosample",
"doc" : "Schema generated by Kite",
"fields" : [ {
"name" : "registration_dttm",
"type" : "string",
"doc" : "Type inferred from '2016-02-03T07:55:29Z'"
}, {
"name" : "id",
"type" : "long",
"doc" : "Type inferred from '1'"
}, {
"name" : "first_name",
"type" : "string",
"doc" : "Type inferred from 'Amanda'"
}, {
"name" : "last_name",
"type" : "string",
"doc" : "Type inferred from 'Jordan'"
}, {
"name" : "email",
"type" : "string",
"doc" : "Type inferred from 'ajordan0@com.com'"
}, {
"name" : "gender",
"type" : "string",
"doc" : "Type inferred from 'Female'"
}, {
"name" : "ip_address",
"type" : "string",
"doc" : "Type inferred from '1.197.201.2'"
}, {
"name" : "cc",
"type" : [ "null", "long" ],
"doc" : "Type inferred from '6759521864920116'",
"default" : null
}, {
"name" : "country",
"type" : "string",
"doc" : "Type inferred from 'Indonesia'"
}, {
"name" : "birthdate",
"type" : "string",
"doc" : "Type inferred from '3/8/1971'"
}, {
"name" : "salary",
"type" : [ "null", "double" ],
"doc" : "Type inferred from '49756.53'",
"default" : null
}, {
"name" : "title",
"type" : "string",
"doc" : "Type inferred from 'Internal Auditor'"
}, {
"name" : "comments",
"type" : "string",
"doc" : "Type inferred from '1E+02'"
} ]
}