I am using nodefluent/kafka-streams
to join a KStream
and a KTable
. This is with Strimzi on Kubernetes. I have set-up in such a way that, every message in KStream
will definitely have a matching key in KTable
. However, I see only a few (< 10) messages in the final topic. Can you please advise why the overlap is so rare?
const { KafkaStreams } = require("kafka-streams");
const kafkaBroker = process.env.KAFKA_BROKER
const kafkaConfig = {
"noptions": {
"metadata.broker.list": kafkaBroker,
"group.id": "kafka-streams-test-native",
"client.id": "kafka-streams-test-name-native",
"event_cb": true,
"compression.codec": "snappy",
"api.version.request": true,
"socket.keepalive.enable": true,
"socket.blocking.max.ms": 100,
"enable.auto.commit": false,
"auto.commit.interval.ms": 100,
"heartbeat.interval.ms": 250,
"retry.backoff.ms": 250,
"fetch.min.bytes": 100,
"fetch.message.max.bytes": 2 * 1024 * 1024,
"queued.min.messages": 100,
"fetch.error.backoff.ms": 100,
"queued.max.messages.kbytes": 50,
"fetch.wait.max.ms": 1000,
"queue.buffering.max.ms": 1000,
"batch.num.messages": 10000
},
"tconf": {
"auto.offset.reset": "earliest",
"request.required.acks": 1
},
"batchOptions": {
"batchSize": 5,
"commitEveryNBatch": 1,
"concurrency": 1,
"commitSync": false,
"noBatchCommits": false
}
}
const kafkaStreams = new KafkaStreams(kafkaConfig);
const rawData = process.env.KAFKA_TOPIC;
const lookUp = process.env.KAFKA_TOPIC_LKUP;
const alerts = process.env.KAFKA_TOPIC_ALERTS;
const dataStream = kafkaStreams.getKStream(rawData)
const lookUpTable = kafkaStreams.getKTable(lookUp, (message) => {
return {
key: message.key,
value: message.value
};
})
// I don't know, if this should be done.
lookUpTable.consumeUntilCount(2)
const alertsStream = dataStream
.innerJoin(lookUp)
.mapBufferKeyToString()
.mapStringValueToJSONObject()
.map((msg) => {
let key = msg.left.key
let data = msg.left.value
// Some logic
return `${key.toString()}|${JSON.stringify({ data:data })}`
})
.mapStringToKV("|", 0, 1)
Promise.all([
dataStream.start(),
lookUpTable.start(),
alertsStream.to(alerts)
])
.then(() => {
console.log("both consumers and the producer have connected.");
});
I am using KafkaJS to publish messages to rawData
and lookUp
topics as shown below.
const producer = kafka.producer()
const run = async () => {
await producer.connect()
await producer.send({
topic: process.env.KAFKA_TOPIC_LKUP,
messages: [
{ key: "k1", value: "Data 1" },
{ key: "k2", value: "Data 2" }
],
})
setInterval(async function () {
await producer.send({
topic: process.env.KAFKA_TOPIC,
messages: [
{ key: "k1", value: "Random 1" },
{ key: "k2", value: "Random 2" }
],
})
}, 1000)
}
run().catch(e => console.error(` ${e.message}`, e))