0

I am using nodefluent/kafka-streams to join a KStream and a KTable. This is with Strimzi on Kubernetes. I have set-up in such a way that, every message in KStream will definitely have a matching key in KTable. However, I see only a few (< 10) messages in the final topic. Can you please advise why the overlap is so rare?

const { KafkaStreams } = require("kafka-streams");
const kafkaBroker = process.env.KAFKA_BROKER
const kafkaConfig = {
  "noptions": {
    "metadata.broker.list": kafkaBroker,
    "group.id": "kafka-streams-test-native",
    "client.id": "kafka-streams-test-name-native",
    "event_cb": true,
    "compression.codec": "snappy",
    "api.version.request": true,
    "socket.keepalive.enable": true,
    "socket.blocking.max.ms": 100,
    "enable.auto.commit": false,
    "auto.commit.interval.ms": 100,
    "heartbeat.interval.ms": 250,
    "retry.backoff.ms": 250,
    "fetch.min.bytes": 100,
    "fetch.message.max.bytes": 2 * 1024 * 1024,
    "queued.min.messages": 100,
    "fetch.error.backoff.ms": 100,
    "queued.max.messages.kbytes": 50,
    "fetch.wait.max.ms": 1000,
    "queue.buffering.max.ms": 1000,
    "batch.num.messages": 10000
  },
  "tconf": {
    "auto.offset.reset": "earliest",
    "request.required.acks": 1
  },
  "batchOptions": {
    "batchSize": 5,
    "commitEveryNBatch": 1,
    "concurrency": 1,
    "commitSync": false,
    "noBatchCommits": false
  }
}

const kafkaStreams = new KafkaStreams(kafkaConfig);
const rawData = process.env.KAFKA_TOPIC;
const lookUp = process.env.KAFKA_TOPIC_LKUP;
const alerts = process.env.KAFKA_TOPIC_ALERTS;

const dataStream = kafkaStreams.getKStream(rawData)
const lookUpTable = kafkaStreams.getKTable(lookUp, (message) => {
  return {
    key: message.key,
    value: message.value
  };
})
// I don't know, if this should be done.
lookUpTable.consumeUntilCount(2)

const alertsStream = dataStream
  .innerJoin(lookUp)
  .mapBufferKeyToString()
  .mapStringValueToJSONObject()
  .map((msg) => {
    let key = msg.left.key
    let data = msg.left.value   
    // Some logic
    return `${key.toString()}|${JSON.stringify({ data:data })}`
  })
  .mapStringToKV("|", 0, 1)

Promise.all([
  dataStream.start(),
  lookUpTable.start(),
  alertsStream.to(alerts)
])
  .then(() => {
    console.log("both consumers and the producer have connected.");
  });

I am using KafkaJS to publish messages to rawData and lookUp topics as shown below.

const producer = kafka.producer()

const run = async () => {
  await producer.connect()

  await producer.send({
    topic: process.env.KAFKA_TOPIC_LKUP,
    messages: [
      { key: "k1", value: "Data 1" },
      { key: "k2", value: "Data 2" }
    ],
  })

  setInterval(async function () {
    await producer.send({
      topic: process.env.KAFKA_TOPIC,
      messages: [
        { key: "k1", value: "Random 1" },
        { key: "k2", value: "Random 2" }
      ],
    })
  }, 1000)
}

run().catch(e => console.error(` ${e.message}`, e))
OneCricketeer
  • 179,855
  • 19
  • 132
  • 245
Chain Head
  • 13
  • 3

0 Answers0