0

Trying to run the a flink application locally with a local Kinesis stream.

The following code works perfectly (as in, records can be seen in the sink table path), but when I change the watermark from event_ts to event_ts - INTERVAL '10' SECOND, the flink job keeps running without producing any records at the sink table.

Any thoughts?

Thanks.

source = (
    TableDescriptor.for_connector("kinesis")
    .schema(
        Schema.new_builder()
        .column("x", DataTypes.INT())
        # some more columns... 
        .column("event_ts", DataTypes.TIMESTAMP(3))
        .watermark("event_ts", "event_ts") 
        .build()
    )
    .option("stream.name", "some_stream")
    .option("aws.endpoint", "http://localhost:4566")
    .option("aws.trust.all.certificates", "true")
    .option("aws.credentials.basic.accesskeyid", "nokey")
    .option("aws.credentials.basic.secretkey", "nokey")
    .option("scan.shard.idle.interval", "200")
    .option("scan.watermark.sync.interval", "200")
    .option("scan.watermark.sync.queue.capacity", "1")
    .option("scan.stream.initpos", "TRIM_HORIZON")
    .option("format", "json")
)

sink = (
    TableDescriptor.for_connector("filesystem")
    .schema(
        Schema.new_builder()
        .column("x", DataTypes.INT()) 
        # some more columns... 
        .column("event_ts", DataTypes.TIMESTAMP(3))
        .build()
    )
    .option("sink.partition-commit.policy.kind", "success-file")
    .option("sink.rolling-policy.file-size", "1KB")
    .option("sink.rolling-policy.rollover-interval", "1s")
    .option("path", "file://some_path/")
    .option("format", "json")
)

result = table_env.execute_sql("""
    INSERT INTO sink_table 
    SELECT 
        some_other_columns...,
        AVG(X, temp) OVER ( 
            PARTITION BY some column
            ORDER BY event_ts
            RANGE BETWEEN INTERVAL '5' SECOND PRECEDING AND CURRENT ROW
        ) AS x2,
        event_ts
    FROM source_table
""") 

SinkingTitanic
  • 181
  • 1
  • 12

0 Answers0