Avoid joins by multiple select statements - Logstash

Question

I'm using Logstash to migrate data from mysql to elasticsearch. My mysql database has a primary table called product that has many relations the query to select it contains around 46 left outer join and the result returned is very huge (50k) rows for one record. So I plan to divide the query into multiple selects. I have used jdbc_streaming plugin of Logstash. However, I wonder if my solution is logical and correct?

This is simple config file describing my implementation (not to all relations):

input {
    jdbc {
       jdbc_driver_library => "mysql-connector-java-5.1.47-bin.jar"
       jdbc_driver_class => "com.mysql.jdbc.Driver"
       jdbc_connection_string => "jdbc:mysql://localhost:3306/my-conn?useSSL=false&allowPublicKeyRetrieval=true"
       jdbc_user => "root"
       jdbc_password => "root"
       schedule => "* * * * *"
       #jdbc_paging_enabled => true
       #jdbc_page_size => 10000
       #jdbc_fetch_size => 5
       statement => "select product_id from product"
       clean_run => false
    }
 }
filter {

   jdbc_streaming {
      jdbc_driver_library => "mysql-connector-java-5.1.47-bin.jar"
      jdbc_driver_class => "com.mysql.jdbc.Driver"
      jdbc_connection_string => "jdbc:mysql://localhost:3306/my-conn?useSSL=false&allowPublicKeyRetrieval=true"
      jdbc_user => "root"
      jdbc_password => "root"
      statement => "select * from product_translation where product_id = :id"
      parameters => { "id" =>"product_id"}
      target => "productTranslations"
}
aggregate {
     task_id => "%{product_id}"
     code => "
        require 'C:\elasticsearch\replication\product-replication\demo.rb' ; 
        ProductMapping.startAggregate(event, map)
     "
     push_previous_map_as_event => true
     timeout => 5
     timeout_tags => ["aggregate"]
 }

 if "aggregate" not in [tags] {
    drop{}
 }
}
output {
elasticsearch { 
    hosts => ["localhost:9200"]
    document_id => "%{productId}"
    document_type => "product"
    index => "test-products"
}
stdout { codec => rubydebug }
}

Including the JOIN query you are trying to optimize (or at least some hint to it) to your question would help answering. — Farhan Nasim, Jun 17 '19 at 15:02

Avoid joins by multiple select statements - Logstash

0 Answers0