I am trying to use zeppelin to plot a realtime graph. for that I'm developing structured streaming DataFrame with spark-highcharts (spark 2.1.0, zeppelin 0.7) following this example: https://github.com/knockdata/spark-highcharts/blob/master/docs/StructureStreaming.md but when I run the stream paragraph (Define Chart) the (Chart Paragraph) is remain "PENDING",
my (Define Chart) paragraph code is:
import com.knockdata.spark.highcharts._
import com.knockdata.spark.highcharts.model._
import org.apache.spark.sql.SparkSession
val spark = SparkSession
.builder()
.appName("Spark structured streaming Kafka example")
.master("yarn")
.getOrCreate()
val inputstream = spark.readStream
.format("kafka")
.option("kafka.bootstrap.servers", "n11.hdp.com:6667,n12.hdp.com:6667,n13.hdp.com:6667 ,n10.hdp.com:6667, n9.hdp.com:6667")
.option("subscribe", "st")
.load()
spark.conf.set("spark.sql.streaming.checkpointLocation", "checkpoint")
val ValueString = inputstream.selectExpr("CAST( value AS STRING)").as[(String)]
.select(
expr("(split(value, ','))[1]").cast("string").as("GSM"),
expr("(split(value, ','))[7]").cast("double").as("Duration"),
expr("(split(value, ','))[10]").cast("double").as("DataUpLink1"),
expr("(split(value, ','))[11]").cast("double").as("DataDownLink1")
)
.filter("GSM is not null and DataUpLink1 is not null and DataDownLink1 is not null and Duration is not null")
.groupBy("GSM").agg(sum("DataUpLink1") as "upload",sum("DataDownLink1") as "download", sum("Duration") as "duration")
val query = highcharts(
ValueString.seriesCol("GSM")
.series("y" -> "download","x" -> "duration")
.orderBy(col("GSM")), z, "complete")
query.processAllAvailable()
query.awaitTermination()
my (Chart Paragraph) code is:
println("%angular")
StreamingChart(z)