0

I have been developing a Kafka Streams App that windowing aggregates id of events by 10 sec window size.

Here is the function of the stream app I want.

input

receiveLog : { id : "a" }, receiveLog : { id : "b" }

output

receiveLogSet : { idList: ["a", "b"] }

It leave a log when input event has come, But it seems blocked and never produce any output event and aggregating log neither.

So plz help me to figure out, why it is blocked from groupByKey or aggregate, even there isn't any exceptions in topology.

build.gradle

...    
dependencies {
    ...
    // Kafka
    implementation 'org.springframework.cloud:spring-cloud-stream-binder-kafka'
    implementation 'org.springframework.cloud:spring-cloud-stream-binder-kafka-streams'
}

application.yaml

spring:      
  cloud:
    stream:
      kafka:
        streams:
          binder:
            deserializationExceptionHandler: logAndContinue
            configuration:
              commit.interval.ms: 100
              key.serde: org.apache.kafka.common.serialization.Serdes$LongSerde
              value.serde: org.apache.kafka.common.serialization.Serdes$StringSerde
          bindings:
            process-receiveLog-in-0:
              consumer:
                applicationId: receiveLog-stream
      bindings:
        process-receiveLog-in-0:
          destination: receiveLog
        process-receiveLog-out-0:
          destination: receiveLogSet
    function:
      definition: process-receiveLog

StreamTopology.java

@RequiredArgsConstructor
@Configuration
public class StreamTopology {

private final ReceiveLogAggregator receiveLogAggregator;

@Bean(name = "process-receiveLog")
public Function<KStream<Long, String>, KStream<Long, String>> processReceiveLog() {

    return receiveLogAggregator.aggregate();
}

ReceiveLogAggregator.java

@RequiredArgsConstructor
@Component
public class ReceiveLogAggregator {

    private final ObjectMapper objectMapper;

    public Function<KStream<Long, String>, KStream<Long, String>> aggregate() {

        var materialized = Materialized
            .<Long, ReceiveLogSet, WindowStore<Bytes, byte[]>>as("rla")
            .withKeySerde(Serdes.Long())
            .withValueSerde(CustomSerdes.receiveLogSetSerde());
        var windowDuration = Duration.ofSeconds(10);

        return input -> input
            .map((key, value) -> {
                var receiveLog = objectMapper.readValue(value, ReceiveLog.class);
                log.info("receiveLog : {}", receiveLog);  // leave a log here
                return KeyValue.pair(key, receiveLog.getId());
                
            })
            .groupByKey(with(Serdes.Long(), Serdes.String()))
            .windowedBy(TimeWindows.of(windowDuration).grace(Duration.ZERO))
            .aggregate(ReceiveLogSet::new,
                (key, value, aggregate) -> {
                    log.info("value aggregate : {}", value); // never leave a log here (blocked before)
                    return aggregate.add(value);
                },
                materialized
            )
            .suppress(Suppressed.untilWindowCloses(Suppressed.BufferConfig.unbounded()))
            .toStream()
            .map((key, value) -> {
                var window = key.window();
                log.info("new record : {}", value); // never leave a log here (blocked before)
                return KeyValue.pair(key.key(), objectMapper.writeValueAsString(value));
            });
    }
}

ReceiveLogSet.java

@Getter
@Setter
@ToString
public class ReceiveLogSet {
    private final Set<String> idList;

    public ReceiveLogSet() {
        this.idList = new HashSet<>();
    }

    public ReceiveLogSet add(String id) {
        idList.add(id);
        return this;
    }
}

Thanks for your help :)

  • Did you try setting `commit.interval.ms` to a lower value? By default, it is 30 seconds. See [here](https://github.com/spring-cloud/spring-cloud-stream-samples/blob/main/kafka-streams-samples/kafka-streams-word-count/src/main/resources/application.yml#L18) for an example. – sobychacko May 18 '22 at 14:19
  • Thank you for your concern, sobychacko :) Yes, I have been setting commit.interval.ms = 100, same as example you shared. – Teasun Kim May 18 '22 at 14:53
  • Could you put together your project as a simple app somewhere on GitHub? That helps us further identify the issue. – sobychacko May 18 '22 at 15:06
  • https://github.com/thesun4sky/stream Here is my sample app on GitHub. I hope it helps to identify the issue. – Teasun Kim May 18 '22 at 16:02
  • I cannot build the project (`mvn clean package` doesn't work). Could you add the necessary dependencies? Did you try to debug the Kafka Streams processor and see how far along you get in your `aggregate` method? That should give some indications. – sobychacko May 19 '22 at 21:29

0 Answers0