I am using spark streaming to read from AMQ. I want the streaming to stop when no data is left in the message queue. I created a custom receiver that connects to the AMQ topic and starts reading the data, but how can the worker tell the driver that there is no data left so that it can stop the streaming.
class CustomReceiver(brokerURL, topic, ...){
def onStart() {
new Thread("AMQ Receiver") {
override def run() { receive() }
}.start()
}
def onStop() {}
private def receive() {
activeMQStream = new ActiveMQStream(broker, topic, ...)
val topicSubscriber = activeMQStream.getTopicSubscriber()
while(!isStopped && !ActiveMQReceiver.stop){
val message = topicSubscriber.receive(timeOutInMilliseconds)
if (message != null && message.isInstanceOf[TextMessage]) {
val textMessage = message.asInstanceOf[TextMessage];
val text = textMessage.getText();
store(text)
println("ActiveMQReceiver: there is data from AMQ ....")
} else {
ActiveMQReceiver.stop = true
println("ActiveMQReceiver: No more data from AMQ .....")
}
}
def checkStatus(): Boolean ={
ActiveMQReceiver.stop
}
}
object ActiveMQReceiver{
@volatile var stop: Boolean = false
}
As you can see above I am trying to set stop flag to true when no data is left to read but when I run the following, the flag is always False, which after searching I found that workers don't share variables. I tried to replace it with Accumulator but that didn't work either.
var ssc = new StreamingContext(spark.sparkContext, Seconds(1))
val customReceiver = new CustomReceiver(brokerURL, topic, ...)
val stream: DStream[String] = ssc.receiverStream(customReceiver)
var driverList = List[String]()
stream.foreachRDD { rdd =>
if(rdd.count() > 0){
val fromWorker = rdd.collect().toList
driverList = driverList:::fromWorker
}
}
var stopFlag = false
var isStopped = false
val checkIntervalMillis = 10000
while (!isStopped) {
isStopped = ssc.awaitTerminationOrTimeout(checkIntervalMillis)
println("Check if stop flag was raised")
stopFlag = customReceiver.checkStatus()
if (!isStopped && stopFlag) {
var seq = driverList.toSeq
import spark.implicits._
val df = seq.toDS()
println("Request to stop")
ssc.stop(false, true)
}
}