I have this code and it's giving the error basepath must be a dir. Just want to run a simple streaming kafkaSink.
val checkPointDir = "/tmp/offsets/" // "hdfs://hdfscluster/user/yarn/tmp/"
def main(args: Array[String]): Unit ={
lazy val spark = SparkSession
.builder
.appName("KafkaProducer")
.master("local[*]")
.getOrCreate()
val query = writeStream(jsonDF, "test")
query.awaitTermination()
}
def writeStream(df:DataFrame, topic:String): StreamingQuery = {
// log.warn("Writing to kafka")
df
// .selectExpr( "CAST(value AS STRING)")
.writeStream
.format("kafka")
.option("kafka.bootstrap.servers", kafkaServers)
.option("topic", topic)
.option("checkpointLocation", checkPointDir)
.outputMode(OutputMode.Update)
.start()
}
My user is the owner of this folder /tmp/offsets. I'm getting this exception.
java.lang.IllegalArgumentException: Option 'basePath' must be a directory