I'm making a Flink CEP application that reads data through Kafka. When I try to catch the patterns, the sink operation does not occur when there is no data after it. For example, I expect A-> B-> C as a pattern. And from the kafka comes A, B, C data. However, in order for the sink operation I added to the patternProcess function to work, the data coming from the kafka must be like A, B, C, X. How do I fix this problem please help.
READ KAFKA
DataStream<String> dataStream = env.addSource(KAFKA).assignTimestampsAndWatermarks(WatermarkStrategy
.forBoundedOutOfOrderness(Duration.ofSeconds(0)));
dataStream.print("DS:"); //to see every incoming data
PATTERN
Pattern<Event, ?> pattern = Pattern.<Event>begin("start").where(
new SimpleCondition<Event>() {
@Override
public boolean filter(Event event) {
return event.actionId.equals("2.24");
}
}
).next("middle").where(
new SimpleCondition<Event>() {
@Override
public boolean filter(Event event) {
return event.actionId.equals("2.24");
}
}
).within(Time.seconds(5));
CEP And Sink
PatternStream<Event> patternStream = CEP.pattern(eventStringKeyedStream, pattern);
patternStream.process(new PatternProcessFunction<Event, Event>() {
@Override
public void processMatch(Map<String, List<Event>> map, Context context, Collector<Event> collector) throws Exception {
collector.collect(map.get("start").get(0));
}
}).print();//or sink function
My Program RESULT
DS::2> {"ActionID":"2.24"}
DS::2> {"ActionID":"2.24"}
DS::2> {"ActionID":"2.25"}
4> {ActionID='2.24'}
I was expecting
DS::2> {"ActionID":"2.24"}
DS::2> {"ActionID":"2.24"}
4> {ActionID='2.24'}
So why does it produce results when one more data comes after the conditions are met, not when the conditions are met for the pattern? Please help me.
EDIT
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.cep.CEP;
import org.apache.flink.cep.functions.PatternProcessFunction;
import org.apache.flink.cep.pattern.Pattern;
import org.apache.flink.cep.pattern.conditions.SimpleCondition;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
import java.time.Duration;
import java.util.List;
import java.util.Map;
public class EventTimePattern {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<String> input = env.socketTextStream("localhost",9999)
.map(new MapFunction<String, Tuple2<String, Long>>() {
@Override
public Tuple2<String, Long> map (String value) throws Exception {
String[] fields = value.split(",");
if (fields.length == 2) {
return new Tuple2<String, Long>(
fields[0] ,
Long.parseLong(fields[1]));
}
return null;
}
})
/* env.fromElements(
Tuple2.of("A", 5L),
Tuple2.of("A", 10L)
)*/
.assignTimestampsAndWatermarks(
WatermarkStrategy
.<Tuple2<String, Long>>forBoundedOutOfOrderness(Duration.ofMillis(0))
.withTimestampAssigner((event, timestamp) -> event.f1))
.map(event -> event.f0);
Pattern<String, ?> pattern =
Pattern.<String>begin("start")
.where(
new SimpleCondition<String>() {
@Override
public boolean filter(String value) throws Exception {
return value.equals("A");
}
})
.next("end")
.where(
new SimpleCondition<String>() {
@Override
public boolean filter(String value) throws Exception {
return value.equals("A");
}
})
.within(Time.seconds(5));
input.print("I");
DataStream<String> result =
CEP.pattern(input, pattern)
.process(new PatternProcessFunction<String, String>() {
@Override
public void processMatch(
Map<String, List<String>> map,
Context context,
Collector<String> out) throws Exception {
StringBuilder builder = new StringBuilder();
builder.append(map.get("start").get(0))
.append(",")
.append(map.get("end").get(0));
out.collect(builder.toString());
}
});
result.print();
env.execute();
}
}