I'm using Logstash to migrate data from mysql to elasticsearch. My mysql database has a primary table called product that has many relations the query to select it contains around 46 left outer join and the result returned is very huge (50k) rows for one record. So I plan to divide the query into multiple selects. I have used jdbc_streaming plugin of Logstash. However, I wonder if my solution is logical and correct?
This is simple config file describing my implementation (not to all relations):
input {
jdbc {
jdbc_driver_library => "mysql-connector-java-5.1.47-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/my-conn?useSSL=false&allowPublicKeyRetrieval=true"
jdbc_user => "root"
jdbc_password => "root"
schedule => "* * * * *"
#jdbc_paging_enabled => true
#jdbc_page_size => 10000
#jdbc_fetch_size => 5
statement => "select product_id from product"
clean_run => false
}
}
filter {
jdbc_streaming {
jdbc_driver_library => "mysql-connector-java-5.1.47-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/my-conn?useSSL=false&allowPublicKeyRetrieval=true"
jdbc_user => "root"
jdbc_password => "root"
statement => "select * from product_translation where product_id = :id"
parameters => { "id" =>"product_id"}
target => "productTranslations"
}
aggregate {
task_id => "%{product_id}"
code => "
require 'C:\elasticsearch\replication\product-replication\demo.rb' ;
ProductMapping.startAggregate(event, map)
"
push_previous_map_as_event => true
timeout => 5
timeout_tags => ["aggregate"]
}
if "aggregate" not in [tags] {
drop{}
}
}
output {
elasticsearch {
hosts => ["localhost:9200"]
document_id => "%{productId}"
document_type => "product"
index => "test-products"
}
stdout { codec => rubydebug }
}