I have a hive table tableA
with the following format:
> desc tableA;
+--------------------------+-----------------------+-----------------------+--+
| col_name | data_type | comment |
+--------------------------+-----------------------+-----------------------+--+
| statementid | string | |
| batchid | string | |
| requestparam | map<string,string> | |
+--------------------------+-----------------------+-----------------------+--+
I tried to load database with the following code:
val tempdf= spark.read.format("jdbc")
.option("driver", "org.apache.hive.jdbc.HiveDriver")
.option("url", "jdbc:hive2://localhost:10000/tempdb")
.option("user","user1")
.option("password","password1")
.option("query","select statementid, batchid, requestparam from tempdb.tableA")
.load()
And my second attempt:
val tempdf = spark.read.format("jdbc")
.option("driver", "org.apache.hive.jdbc.HiveDriver")
.option("url", "jdbc:hive2://localhost:10000/tempdb")
.option("user","user1")
.option("password","password1")
.option("dbtable","tempdb.tableA")
.load()
But map<string,string>
column is causing an issue while loading source hive table into spark dataset.
Exception in thread "main" java.sql.SQLException: Unsupported type JAVA_OBJECT at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.getCatalystType(JdbcUtils.scala:247) at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.$anonfun$getSchema$1(JdbcUtils.scala:312) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.getSchema(JdbcUtils.scala:312) at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$.resolveTable(JDBCRDD.scala:63) at org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation$.getSchema(JDBCRelation.scala:226) at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:35) at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:354) at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:326) at org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:308) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:308) at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:226)