4

I have started to use Debezium recently to deal with capture changes data in realtime and sink to the target database.

Instead of Kafka, I use Azure Event Hub with Kafka Connect to connect SQL Server and use confluent JDBC to sink changed data to the target database which is SQL Server.

I understand Debezium does async to have less impact on the performance of the database but is there any way I can increase the throughput of streaming?

Recently, I spin up Event Hub with minimum throughput units is 10 and auto-inflate to 20. So I expect Debezium + Kafka Connect + Event Hubs could stream 10MB - 20MB / second and the egress should be 20 - 40MB / second.

However, the real performance is worst. I manually import 10k of records to the source database which is less than 6MB. So I expect Debezium with sink connector will capture the changes and sink to the target database in a few seconds.

Instead of getting data at one, the sink connector manually updates the data to the target database.

The following is my config. Please let me know if I need to change the configuration to improve performance. Any helps would be very much appreciated.

Kafka Connect: Kafk

bootstrap.servers=sqldbcdc.servicebus.windows.net:9093

group.id=connect-cluster-group

# connect internal topic names, auto-created if not exists
config.storage.topic=connect-cluster-configs
offset.storage.topic=connect-cluster-offsets
status.storage.topic=connect-cluster-status

# internal topic replication factors - auto 3x replication in Azure Storage
config.storage.replication.factor=1
offset.storage.replication.factor=1
status.storage.replication.factor=1

rest.advertised.host.name=connect
offset.flush.interval.ms=10000
connections.max.idle.ms=180000
metadata.max.age.ms=180000
auto.register.schemas=false


key.converter=org.apache.kafka.connect.json.JsonConverter
value.converter=org.apache.kafka.connect.json.JsonConverter
internal.key.converter=org.apache.kafka.connect.json.JsonConverter
internal.value.converter=org.apache.kafka.connect.json.JsonConverter

internal.key.converter.schemas.enable=false
internal.value.converter.schemas.enable=false

# required EH Kafka security settings
security.protocol=SASL_SSL
sasl.mechanism=PLAIN
sasl.jaas.config=org.apache.kafka.common.security.plain.PlainLoginModule required username="$ConnectionString" password="Endpoint=sb://sqldbcdc.servicebus.windows.net/;SharedAccessKeyName=RootManageSharedAccessKey;SharedAccessKey=**************************=";

producer.security.protocol=SASL_SSL
producer.sasl.mechanism=PLAIN
producer.sasl.jaas.config=org.apache.kafka.common.security.plain.PlainLoginModule required username="$ConnectionString" password="Endpoint=sb://sqldbcdc.servicebus.windows.net/;SharedAccessKeyName=RootManageSharedAccessKey;SharedAccessKey=**************************=";

consumer.security.protocol=SASL_SSL
consumer.sasl.mechanism=PLAIN
consumer.sasl.jaas.config=org.apache.kafka.common.security.plain.PlainLoginModule required username="$ConnectionString" password="Endpoint=sb://sqldbcdc.servicebus.windows.net/;SharedAccessKeyName=RootManageSharedAccessKey;SharedAccessKey=**************************=";

plugin.path=C:\kafka\libs

SQL Connector:

{
 "name": "sql-server-connection", 
 "config": {
 "connector.class": "io.debezium.connector.sqlserver.SqlServerConnector",
 "tasks.max": "1",
 "database.hostname": "localhost",
 "database.port": "1433",
 "database.user": "sa",
 "database.password": "******",
 "database.dbname": "demodb",
 "database.server.name": "dbservername",
 "table.whitelist": "dbo.portfolios",
 "database.history":"io.debezium.relational.history.MemoryDatabaseHistory",
 "transforms": "route",
 "transforms.route.type": "org.apache.kafka.connect.transforms.RegexRouter",
 "transforms.route.regex": "([^.]+)\\.([^.]+)\\.([^.]+)",
 "transforms.route.replacement": "$3"
 }
}

Sink Connector:

{
  "name": "jdbc-sink",
  "config":{
      "connector.class": "io.confluent.connect.jdbc.JdbcSinkConnector",
      "tasks.max": "1",
      "topics": "portfolios",
      "connection.url": "jdbc:sqlserver://localhost:1433;instance=NEWMSSQLSERVER;databaseName=demodb",
      "connection.user":"sa",
      "connection.password":"*****",
      "batch.size":2000,
      "transforms": "unwrap",
      "transforms.unwrap.type": "io.debezium.transforms.ExtractNewRecordState",
      "transforms.unwrap.drop.tombstones": false,
      "transforms.unwrap.delete.handling.mode": "none",
      "auto.create": "true",
      "insert.mode": "upsert",
      "delete.enabled":true,
      "pk.fields": "portfolio_id",
      "pk.mode": "record_key",
      "table.name.format": "replicated_portfolios"
  }
}
Muhammad Waqas Dilawar
  • 1,844
  • 1
  • 23
  • 34
kien.nt
  • 81
  • 2
  • 12

0 Answers0