3

I am using akka-streams-kafka to created a stream consumer from a kafka topic. Using broadcast to serve events from kafka topic to web socket clients.

I have found following three approaches to create a stream Source. Question: My goal is to serve hundreds/thousands of websocket clients (some of which might be slow consumers). Which approach scales better?

Appreciate any thoughts? Broadcast lowers the rate down to slowest consumer.
BUFFER_SIZE = 100000

  1. Source.ActorRef (source actor does not support backpressure option)

    val kafkaSourceActorWithBroadcast = {
    
      val (sourceActorRef, kafkaSource) = Source.actorRef[String](BUFFER_SIZE, OverflowStrategy.fail)
                                          .toMat(BroadcastHub.sink(bufferSize = 256))(Keep.both).run
    
      Consumer.plainSource(consumerSettings, 
                 Subscriptions.topics(KAFKA_TOPIC))
         .runForeach(record => sourceActorRef ! Util.toJson(record.value()))
    
    kafkaSource
    }
    
  2. Source.queue

    val kafkaSourceQueueWithBroadcast = {
    
      val (futureQueue, kafkaQueueSource) = Source.queue[String](BUFFER_SIZE, OverflowStrategy.backpressure)
                                            .toMat(BroadcastHub.sink(bufferSize = 256))(Keep.both).run
    
    Consumer.plainSource(consumerSettings, Subscriptions.topics(KAFKA_TOPIC))
            .runForeach(record => futureQueue.offer(Util.toJson(record.value())))
    
    kafkaQueueSource
    }
    
  3. buffer

    val kafkaSourceWithBuffer = Consumer.plainSource(consumerSettings, Subscriptions.topics(KAFKA_TOPIC))
                                  .map(record => Util.toJson(record.value()))
                                  .buffer(BUFFER_SIZE, OverflowStrategy.backpressure)
                                  .toMat(BroadcastHub.sink(bufferSize = 256))(Keep.right).run
    

Websocket route code for completeness:

val streamRoute =
    path("stream") {
      handleWebSocketMessages(websocketFlow)
    }


def websocketFlow(where: String): Flow[Message, Message, NotUsed] = {

  Flow[Message]
   .collect {
    case TextMessage.Strict(msg) => Future.successful(msg)
    case TextMessage.Streamed(stream) =>
      stream.runFold("")(_ + _).flatMap(msg => Future.successful(msg))
  }
  .mapAsync(parallelism = PARALLELISM)(identity)
  .via(logicStreamFlow)
  .map { msg: String => TextMessage.Strict(msg) }
}


private def logicStreamFlow: Flow[String, String, NotUsed] = 
         Flow.fromSinkAndSource(Sink.ignore, kafkaSourceActorWithBroadcast)
Vms
  • 199
  • 2
  • 11

0 Answers0