My Code:
from pyflink.datastream.connectors.jdbc import JdbcSink, JdbcExecutionOptions, JdbcConnectionOptions
from pyflink.common.typeinfo import Types
from pyflink.datastream import StreamExecutionEnvironment
JDBC_JAR_PATH = "file:///Users/jar_files/postgresql-42.5.0.jar"
env = StreamExecutionEnvironment.get_execution_environment()
env.add_jars(JDBC_JAR_PATH)
env.set_parallelism(1)
type_info = Types.ROW([Types.INT(), Types.STRING(), Types.STRING(), Types.INT()])
ds = env.from_collection(
[(101, "Stream Processing with Apache Flink", "Fabian Hueske, Vasiliki Kalavri", 2019),
(102, "Streaming Systems", "Tyler Akidau, Slava Chernyak, Reuven Lax", 2018),
(103, "Designing Data-Intensive Applications", "Martin Kleppmann", 2017),
(104, "Kafka: The Definitive Guide", "Gwen Shapira, Neha Narkhede, Todd Palino", 2017)
], type_info=type_info).name('Source')
def change_id(data):
data[0] = data[0] + 100
return data
ds1 = ds.map(change_id)
ds2 = ds1.add_sink(
JdbcSink.sink(
"insert into books(id, title, authors, year) values (?, ?, ?, ?)",
type_info,
JdbcConnectionOptions.JdbcConnectionOptionsBuilder()
.with_url('jdbc:postgresql://localhost:5432/nhan_su')
.with_driver_name('org.postgresql.Driver')
.with_user_name('psql')
.with_password('psql')
.build(),
JdbcExecutionOptions.builder()
.with_batch_interval_ms(1000)
.with_batch_size(200)
.with_max_retries(5)
.build()
))
env.execute()
Error:
Caused by: java.lang.ClassCastException: class [B cannot be cast to class org.apache.flink.types.Row ([B is in module java.base of loader 'bootstrap'; org.apache.flink.types.Row is in unnamed module of loader 'app') at org.apache.flink.connector.jdbc.internal.executor.SimpleBatchStatementExecutor.executeBatch(SimpleBatchStatementExecutor.java:70)
How to Sink DataStream to JDBC Sink after Using map function in PyFlink?
I tried to change the return type of map function to tuple, but the issue is still there.