1

I have a use case to refresh side input periodically. I've tried different possible ways, but no luck.

    final PCollectionView<Map<String, String>> userMap = pipeline
            // This is trick for emitting a single long element in every N seconds.
            .apply("update-very-n-seconds",
                    GenerateSequence.from(0).withRate(1, Duration.standardSeconds(120L)))
            // Applying it to Fixed Window
            .apply("assign-to-fixed-window", Window
                    .<Long>into(FixedWindows.of(Duration.standardSeconds(120)))
            )
            .apply(Sum.longsGlobally().withoutDefaults())
            .apply("read-user-data", new ReadUserData(userDsFn))
            .apply("global-window",
                    Window.<KV<String, String>>into(new GlobalWindows())
                            .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()))
                            .discardingFiredPanes())
            // Caching results as Map.
            .apply("cache-results-as-map", View.<String, String>asMap());

ReadUserData.java

import org.apache.beam.sdk.coders.KvCoder;
import org.apache.beam.sdk.coders.StringUtf8Coder;
import org.apache.beam.sdk.io.jdbc.JdbcIO;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.SerializableFunction;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;

import javax.sql.DataSource;

public class ReadUserData extends PTransform<PCollection<Long>, PCollection<KV<String, String>>> {

    private SerializableFunction<Void, DataSource> dataSourceProviderFn;

    public ReadUserData(SerializableFunction<Void, DataSource> dataSourceProviderFn) {
        this.dataSourceProviderFn = dataSourceProviderFn;
    }

    @Override
    public PCollection<KV<String, String>> expand(PCollection<Long> input) {

        return input.apply("Read User table.", JdbcIO.<Long, KV<String, String>>readAll().withDataSourceProviderFn(dataSourceProviderFn)
                .withQuery("select id, concat(first_name, ' ', last_name) from users")
                .withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))
                .withRowMapper(
                        (JdbcIO.RowMapper<KV<String, String>>) rs -> KV.of(rs.getString(1), rs.getString(2))));
    }

}

Here is the error.

org.apache.flink.runtime.executiongraph.ExecutionGraph       [] - Combine.globally(SumLong)/Combine.perKey(SumLong) -> Combine.globally(SumLong)/Values/Values/Map/ParMultiDo(Anonymous) -> read-user-data/Read User table./ParDo(Read)/ParMultiDo(Read) -> read-user-data/Read User table./JdbcIO.Reparallelize/Consume/ParDo(Anonymous)/ParMultiDo(Anonymous) -> (read-user-data/Read User table./JdbcIO.Reparallelize/View.AsIterable/MapElements/Map/ParMultiDo(Anonymous), read-user-data/Read User table./JdbcIO.Reparallelize/View.AsIterable/View.CreatePCollectionView/Combine.globally(Concatenate)/WithKeys/AddKeys/Map/ParMultiDo(Anonymous) -> ToKeyedWorkItem) (2/2) (984107a1613dc7e05ddf37750140a5fb) switched from RUNNING to FAILED on contact-pipeline-taskmanager-1-1 @ ip-192-168-183-51.us-west-2.compute.internal (dataPort=46571).
org.apache.beam.sdk.util.UserCodeException: org.apache.flink.streaming.runtime.tasks.ExceptionInChainedOperatorException: Could not forward element to next operator
    at org.apache.beam.sdk.util.UserCodeException.wrap(UserCodeException.java:39) ~[?:?]

The side input patterns recommend using ParDo to read external data. I've not found any working example to refresh side input from the database.

Please let me know if there is a way to refresh side input from database.

Thanks, Suresh

Suresh
  • 31
  • 1
  • 5

0 Answers0