0

I'm making a Flink CEP application that reads data through Kafka. When I try to catch the patterns, the sink operation does not occur when there is no data after it. For example, I expect A-> B-> C as a pattern. And from the kafka comes A, B, C data. However, in order for the sink operation I added to the patternProcess function to work, the data coming from the kafka must be like A, B, C, X. How do I fix this problem please help.

READ KAFKA

DataStream<String> dataStream = env.addSource(KAFKA).assignTimestampsAndWatermarks(WatermarkStrategy
                    .forBoundedOutOfOrderness(Duration.ofSeconds(0))); 
dataStream.print("DS:"); //to see every incoming data

PATTERN

Pattern<Event, ?> pattern = Pattern.<Event>begin("start").where(
            new SimpleCondition<Event>() {
                @Override
                public boolean filter(Event event) {
                    return event.actionId.equals("2.24");
                }
            }
    ).next("middle").where(
            new SimpleCondition<Event>() {
                @Override
                public boolean filter(Event event) {
                    return event.actionId.equals("2.24");
                }
            }
    ).within(Time.seconds(5));

CEP And Sink

PatternStream<Event> patternStream = CEP.pattern(eventStringKeyedStream, pattern);
    patternStream.process(new PatternProcessFunction<Event, Event>() {
        @Override
        public void processMatch(Map<String, List<Event>> map, Context context, Collector<Event> collector) throws Exception {

            collector.collect(map.get("start").get(0));
        }


    }).print();//or sink function

My Program RESULT

DS::2> {"ActionID":"2.24"}

DS::2> {"ActionID":"2.24"}

DS::2> {"ActionID":"2.25"}

4> {ActionID='2.24'}

I was expecting

DS::2> {"ActionID":"2.24"}

DS::2> {"ActionID":"2.24"}

4> {ActionID='2.24'}

So why does it produce results when one more data comes after the conditions are met, not when the conditions are met for the pattern? Please help me.

EDIT

import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.cep.CEP;
import org.apache.flink.cep.functions.PatternProcessFunction;
import org.apache.flink.cep.pattern.Pattern;
import org.apache.flink.cep.pattern.conditions.SimpleCondition;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;

import java.time.Duration;
import java.util.List;
import java.util.Map;

public class EventTimePattern {

public static void main(String[] args) throws Exception {

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStream<String> input = env.socketTextStream("localhost",9999)
            .map(new MapFunction<String, Tuple2<String, Long>>() {
                @Override
                public Tuple2<String, Long> map (String value) throws Exception {
                    String[] fields = value.split(",");
                    if (fields.length == 2) {
                        return new Tuple2<String, Long>(
                                fields[0] ,
                                Long.parseLong(fields[1]));
                    }
                    return null;
                }
            })

           /* env.fromElements(
                    Tuple2.of("A", 5L),
                    Tuple2.of("A", 10L)
            )*/
                    .assignTimestampsAndWatermarks(
                            WatermarkStrategy
                                    .<Tuple2<String, Long>>forBoundedOutOfOrderness(Duration.ofMillis(0))
                                    .withTimestampAssigner((event, timestamp) -> event.f1))
                    .map(event -> event.f0);

    Pattern<String, ?> pattern =
            Pattern.<String>begin("start")
                    .where(
                            new SimpleCondition<String>() {

                                @Override
                                public boolean filter(String value) throws Exception {
                                    return value.equals("A");
                                }
                            })
                    .next("end")
                    .where(
                            new SimpleCondition<String>() {

                                @Override
                                public boolean filter(String value) throws Exception {
                                    return value.equals("A");
                                }
                            })
                    .within(Time.seconds(5));
    input.print("I");

    DataStream<String> result =
            CEP.pattern(input, pattern)
                    .process(new PatternProcessFunction<String, String>() {
                        @Override
                        public void processMatch(
                                Map<String, List<String>> map,
                                Context context,
                                Collector<String> out) throws Exception {

                            StringBuilder builder = new StringBuilder();

                            builder.append(map.get("start").get(0))
                                    .append(",")
                                    .append(map.get("end").get(0));

                            out.collect(builder.toString());
                        }
                    });

    result.print();

    env.execute();
}
}
Furkan
  • 1
  • 1

1 Answers1

0

I failed to reproduce your problem. Here's a similar example that works fine (I used Flink 1.12.2):

import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.cep.CEP;
import org.apache.flink.cep.functions.PatternProcessFunction;
import org.apache.flink.cep.pattern.Pattern;
import org.apache.flink.cep.pattern.conditions.SimpleCondition;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;

public class EventTimePattern {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream<String> input =
                env.fromElements(
                        Tuple2.of("A", 5L),
                        Tuple2.of("A", 10L)
                )
                .assignTimestampsAndWatermarks(
                        WatermarkStrategy
                                .<Tuple2<String, Long>>forBoundedOutOfOrderness(Duration.ofMillis(0))
                                .withTimestampAssigner((event, timestamp) -> event.f1))
                .map(event -> event.f0);

        Pattern<String, ?> pattern =
                Pattern.<String>begin("start")
                        .where(
                                new SimpleCondition<String>() {

                                    @Override
                                    public boolean filter(String value) throws Exception {
                                        return value.equals("A");
                                    }
                                })
                        .next("end")
                        .where(
                                new SimpleCondition<String>() {

                                    @Override
                                    public boolean filter(String value) throws Exception {
                                        return value.equals("A");
                                    }
                                })
                        .within(Time.seconds(5));

        DataStream<String> result =
                CEP.pattern(input, pattern)
                        .process(new PatternProcessFunction<String, String>() {
                            @Override
                            public void processMatch(
                                    Map<String, List<String>> map,
                                    Context context,
                                    Collector<String> out) throws Exception {

                                StringBuilder builder = new StringBuilder();

                                builder.append(map.get("start").get(0))
                                        .append(",")
                                        .append(map.get("end").get(0));

                                out.collect(builder.toString());
                            }
                        });

        result.print();

        env.execute();
    }
}

Please share a simple, complete, reproducible example that illustrates the problem you're having.

David Anderson
  • 39,434
  • 4
  • 33
  • 60
  • I tried the code you wrote and it works. However, I want the flowing data to come from outside (kafka). I changed your code to receive data over the socket. I enter A, 5 and A, 10 as values. However, the code does not produce any results. – Furkan May 24 '21 at 06:50
  • I add the changed code to the EDIT section of the question. – Furkan May 24 '21 at 06:56