0

I tried to use rocksdb to cache information required by a ProcessFunction, and following seems to be the only way to get it to work by far:

(1) load data from datastore (eg. mysql) and put the data into rocksdb then close the rocksdb handle in open().

(2) open & close rocksdb handle whenever the processElement() is invoked.

like this:

public static class MatchFunction extends ProcessFunction<TaxiRide, TaxiRide> {
    // keyed, managed state
    // holds an END event if the ride has ended, otherwise a START event
    private ValueState<TaxiRide> rideState;

    private RocksDB rocksdb = null;
    private String dbPath = null;

    @Override
    public void close() throws Exception {
        super.close();
        if(rocksdb != null) {
            rocksdb.close();
        }
    }

    @Override
    public void open(Configuration config) {
        ValueStateDescriptor<TaxiRide> startDescriptor =
                new ValueStateDescriptor<>("saved ride", TaxiRide.class);
        rideState = getRuntimeContext().getState(startDescriptor);

        if(rocksdb == null) {
            try {
                Class.forName("com.mysql.jdbc.Driver");
            } catch (ClassNotFoundException e) {
                e.printStackTrace();
            }
            Connection connect = null;
            PreparedStatement preparedStatement = null;
            ResultSet resultSet = null;

            try {
                connect = DriverManager
                        .getConnection("jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&"
                                + "user=user&password=password");
                preparedStatement = connect.prepareStatement("select * from test.feature");
                resultSet = preparedStatement.executeQuery();

                RocksDB.loadLibrary();

                try (final Options options = new Options().setCreateIfMissing(true)) {
                    // a factory method that returns a RocksDB instance
                    dbPath = "/tmp/checkpoints/rocksdb/test01_" + UUID.randomUUID();
                    try (final RocksDB db = RocksDB.open(options, dbPath)) {
                        rocksdb = db;
                        System.out.println("db opened: " + dbPath);
                        String key01 = "key01";
                        String val01 = "val01";

                        while (resultSet.next()) {
                            key01 = resultSet.getString(1);
                            val01 = resultSet.getString(2);

                            System.out.println("before put " + key01 + ":" + val01);
                            rocksdb.put(key01.getBytes(), val01.getBytes());
                            System.out.println("after put " + key01 + ":" + val01);
                        }

                    }
                } catch (RocksDBException e) {
                    // do some error handling
                    e.printStackTrace();
                } finally {
                    if(rocksdb != null) {
                        rocksdb.close();
                        System.out.println("db closed: " + dbPath);
                    }
                }
            } catch (SQLException e) {
                e.printStackTrace();
            } finally {
                if(resultSet != null) {
                    try {
                        resultSet.close();
                    } catch (SQLException e) {
                        e.printStackTrace();
                    }
                }
                if(preparedStatement != null) {
                    try {
                        preparedStatement.close();
                    } catch (SQLException e) {
                        e.printStackTrace();
                    }
                }
                if(connect != null) {
                    try {
                        connect.close();
                    } catch (SQLException e) {
                        e.printStackTrace();
                    }
                }
            }
        }

    }

    @Override
    public void processElement(TaxiRide ride, Context context, Collector<TaxiRide> out) throws Exception {
        TimerService timerService = context.timerService();

        try (final Options options = new Options().setCreateIfMissing(true)) {
            // a factory method that returns a RocksDB instance
            try (final RocksDB db = RocksDB.open(options, dbPath)) {
                rocksdb = db;
                // System.out.println("db opened: " + dbPath);

                String val01 = new String(rocksdb.get("f8416af7-b895-4f28-bcea-be1eef6bbdb2".getBytes()));
                // System.out.println(">>> val01 = " + val01);

                rocksdb.close();
                // System.out.println("db closed: " + dbPath);
            }
        } catch (RocksDBException e) {
            // do some error handling
            e.printStackTrace();
        }

        if (ride.isStart) {
            // the matching END might have arrived first (out of order); don't overwrite it
            if (rideState.value() == null) {
                rideState.update(ride);
            }
        } else {
            rideState.update(ride);
        }

        timerService.registerEventTimeTimer(ride.getEventTime() + 120 * 60 * 1000);
    }

    @Override
    public void onTimer(long timestamp, OnTimerContext context, Collector<TaxiRide> out) throws Exception {
        TaxiRide savedRide = rideState.value();
        if (savedRide != null && savedRide.isStart) {
            out.collect(savedRide);
        }
        rideState.clear();
    }
}

This is very inefficient since lots of IO happens in processElement(). This ProcessFunction was able to process all data in 10 minutes, it takes more then 40 minutes to process partial data after adding the rocksdb related lines. So I tried to resuse the rocksdb handled created in open() with the following implementation.

public static class MatchFunction extends ProcessFunction<TaxiRide, TaxiRide> {
        // keyed, managed state
        // holds an END event if the ride has ended, otherwise a START event
        private ValueState<TaxiRide> rideState;

        private RocksDB rocksdb = null;
        private String dbPath = null;

        @Override
        public void close() throws Exception {
            super.close();
            if(rocksdb != null) {
                rocksdb.close();
            }
        }

        @Override
        public void open(Configuration config) {
            ValueStateDescriptor<TaxiRide> startDescriptor =
                    new ValueStateDescriptor<>("saved ride", TaxiRide.class);
            rideState = getRuntimeContext().getState(startDescriptor);

            if(rocksdb == null) {
                try {
                    Class.forName("com.mysql.jdbc.Driver");
                } catch (ClassNotFoundException e) {
                    e.printStackTrace();
                }
                Connection connect = null;
                PreparedStatement preparedStatement = null;
                ResultSet resultSet = null;

                try {
                    connect = DriverManager
                            .getConnection("jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&"
                                    + "user=user&password=password");
                    preparedStatement = connect.prepareStatement("select * from test.feature");
                    resultSet = preparedStatement.executeQuery();

                    RocksDB.loadLibrary();

                    try (final Options options = new Options().setCreateIfMissing(true)) {
                        // a factory method that returns a RocksDB instance
                        dbPath = "/tmp/checkpoints/rocksdb/test01_" + UUID.randomUUID();
                        try (final RocksDB db = RocksDB.open(options, dbPath)) {
                            rocksdb = db;
                            System.out.println("db opened: " + dbPath);
                            String key01 = "key01";
                            String val01 = "val01";

                            while (resultSet.next()) {
                                key01 = resultSet.getString(1);
                                val01 = resultSet.getString(2);

                                System.out.println("before put " + key01 + ":" + val01);
                                rocksdb.put(key01.getBytes(), val01.getBytes());
                                System.out.println("after put " + key01 + ":" + val01);
                            }

                        }
                    } catch (RocksDBException e) {
                        // do some error handling
                        e.printStackTrace();
                    } finally {
                    //  if(rocksdb != null) {
                    //      rocksdb.close();
                    //      System.out.println("db closed: " + dbPath);
                    //  }
                    }
                } catch (SQLException e) {
                    e.printStackTrace();
                } finally {
                    if(resultSet != null) {
                        try {
                            resultSet.close();
                        } catch (SQLException e) {
                            e.printStackTrace();
                        }
                    }
                    if(preparedStatement != null) {
                        try {
                            preparedStatement.close();
                        } catch (SQLException e) {
                            e.printStackTrace();
                        }
                    }
                    if(connect != null) {
                        try {
                            connect.close();
                        } catch (SQLException e) {
                            e.printStackTrace();
                        }
                    }
                }
            }

        }

        @Override
        public void processElement(TaxiRide ride, Context context, Collector<TaxiRide> out) throws Exception {
            TimerService timerService = context.timerService();

            //try (final Options options = new Options().setCreateIfMissing(true)) {
            //  // a factory method that returns a RocksDB instance
            //  try (final RocksDB db = RocksDB.open(options, dbPath)) {
            //      rocksdb = db;
            //      System.out.println("db opened: " + dbPath);

                    String val01 = new String(rocksdb.get("f8416af7-b895-4f28-bcea-be1eef6bbdb2".getBytes()));
            //      System.out.println(">>> val01 = " + val01);

            //      rocksdb.close();
            //      System.out.println("db closed: " + dbPath);
            //  }
            //} catch (RocksDBException e) {
            //  // do some error handling
            //  e.printStackTrace();
            //}

            if (ride.isStart) {
                // the matching END might have arrived first (out of order); don't overwrite it
                if (rideState.value() == null) {
                    rideState.update(ride);
                }
            } else {
                rideState.update(ride);
            }

            timerService.registerEventTimeTimer(ride.getEventTime() + 120 * 60 * 1000);
        }

        @Override
        public void onTimer(long timestamp, OnTimerContext context, Collector<TaxiRide> out) throws Exception {
            TaxiRide savedRide = rideState.value();
            if (savedRide != null && savedRide.isStart) {
                out.collect(savedRide);
            }
            rideState.clear();
        }
    }

The problem with this implementation is that it just doesn't work and here is the error message I got:

#
# A fatal error has been detected by the Java Runtime Environment:
#
#  SIGSEGV (0xb) at pc=0x000000012c94cf55, pid=64626, tid=39683
#
# JRE version: Java(TM) SE Runtime Environment (8.0_60-b27) (build 1.8.0_60-b27)
# Java VM: Java HotSpot(TM) 64-Bit Server VM (25.60-b23 mixed mode bsd-amd64 compressed oops)
# Problematic frame:
# [thread 39171 also had an error]
06:52:56.163 [pool-11-thread-1] INFO  o.a.flink.contrib.streaming.state.RocksDBKeyedStateBackend  - Asynchronous RocksDB snapshot (File Stream Factory @ file:/tmp/checkpoints/53224270d2f2be67a9d20f9deac66d09, asynchronous part) in thread Thread[pool-11-thread-1,5,Flink Task Threads] took 10 ms.
06:52:56.163 [pool-16-thread-1] INFO  o.a.flink.contrib.streaming.state.RocksDBKeyedStateBackend  - Asynchronous RocksDB snapshot (File Stream Factory @ file:/tmp/checkpoints/53224270d2f2be67a9d20f9deac66d09, asynchronous part) in thread Thread[pool-16-thread-1,5,Flink Task Threads] took 12 ms.
C06:52:56.163 [pool-13-thread-1] INFO  o.a.flink.contrib.streaming.state.RocksDBKeyedStateBackend  - Asynchronous RocksDB snapshot (File Stream Factory @ file:/tmp/checkpoints/53224270d2f2be67a9d20f9deac66d09, asynchronous part) in thread Thread[pool-13-thread-1,5,Flink Task Threads] took 9 ms.
  [librocksdbjni-osx.jnilib+0x3ff55]  _Z18rocksdb_get_helperP7JNIEnv_PN7rocksdb2DBERKNS1_11ReadOptionsEPNS1_18ColumnFamilyHandleEP11_jbyteArrayii+0xe5
#
# Failed to write core dump. Core dumps have been disabled. To enable core dumping, try "ulimit -c unlimited" before starting Java again
#
06:52:56.163 [pool-12-thread-1] INFO  o.a.flink.contrib.streaming.state.RocksDBKeyedStateBackend  - Asynchronous RocksDB snapshot (File Stream Factory @ file:/tmp/checkpoints/53224270d2f2be67a9d20f9deac66d09, asynchronous part) in thread Thread[pool-12-thread-1,5,Flink Task Threads] took 13 ms.
[thread 46339 also had an error]
# An error report file with more information is saved as:
# /Users/abc/MyFiles/workspace/flink-java-project/hs_err_pid64626.log
[thread 22279 also had an error]
[thread 33027 also had an error]
#
# If you would like to submit a bug report, please visit:
#   http://bugreport.java.com/bugreport/crash.jsp
# The crash happened outside the Java Virtual Machine in native code.
# See problematic frame for where to report the bug.
#

Process finished with exit code 134 (interrupted by signal 6: SIGABRT)

Detail trace from "/Users/abc/MyFiles/workspace/flink-java-project/hs_err_pid64626.log" can be found in this link (http://memyselfandtaco.blogspot.tw/2018/04/how-to-correctly-access-rocksdb-in.html)

James Yu
  • 399
  • 6
  • 22
  • 1
    What underlying problem are you trying to solve? A more straightforward solution might be to load the data from MySQL into flink state, and then configure Flink to use RocksDB as the state backend if that's warranted. – David Anderson Apr 04 '18 at 01:56
  • @DavidAnderson thanks for your advise, it works smoothly. – James Yu Apr 04 '18 at 08:41

0 Answers0