I have a requirement to query the multiple data sources in single SQL query. I saw Calcite provides this with a JDBC adapter. While running this, I encountered the following challenges:
Execution was happening in memory which causes OOM on big data.
I saw there is an option for Spark. If this option is enabled, will the query be executed using Spark?
If the answer to the 2nd question is yes, when I am trying to execute the query using the Spark option, I am getting the following exception:
Caused by: java.lang.NullPointerException
at CalciteProgram84650.bind(Unknown Source)
at org.apache.calcite.jdbc.CalcitePrepare$CalciteSignature.enumerable(CalcitePrepare.java:355)
at org.apache.calcite.jdbc.CalciteConnectionImpl.enumerable(CalciteConnectionImpl.java:309)
at org.apache.calcite.jdbc.CalciteMetaImpl._createIterable(CalciteMetaImpl.java:506)
at org.apache.calcite.jdbc.CalciteMetaImpl.createIterable(CalciteMetaImpl.java:497)
at org.apache.calcite.avatica.AvaticaResultSet.execute(AvaticaResultSet.java:182)
at org.apache.calcite.jdbc.CalciteResultSet.execute(CalciteResultSet.java:64)
at org.apache.calcite.jdbc.CalciteResultSet.execute(CalciteResultSet.java:43)
at org.apache.calcite.avatica.AvaticaConnection$1.execute(AvaticaConnection.java:667)
at org.apache.calcite.jdbc.CalciteMetaImpl.prepareAndExecute(CalciteMetaImpl.java:566)
at org.apache.calcite.avatica.AvaticaConnection.prepareAndExecuteInternal(AvaticaConnection.java:675)
at org.apache.calcite.avatica.AvaticaStatement.executeInternal(AvaticaStatement.java:156)
... 2 more
- And please let me know how to overcome OOM when big data execution.
Java class
package com.sixdee.calcite;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.Properties;
import org.apache.calcite.util.Sources;
public class MultiJDBCSchemaJoinTest {
public static void main(String[] args) {
Connection connection = null;
Statement statement = null;
ResultSet resultSet = null;
try {
Properties info = new Properties();
info.put("model", jsonPath("model"));
info.put("spark", "true");
connection = DriverManager.getConnection("jdbc:calcite:", info);
String sql = "SELECT SUB_DETAILS.MSISDN FROM DB1.SUB_DETAILS";
statement = connection.createStatement();
resultSet = statement.executeQuery(sql);
while (resultSet.next()) {
System.out.println(resultSet.getString(1));
}
} catch (Exception exception) {
exception.printStackTrace();
} finally {
if (resultSet != null) {
try {
resultSet.close();
} catch (Exception exception) {
exception.printStackTrace();
} finally {
resultSet = null;
}
}
if (statement != null) {
try {
statement.close();
} catch (Exception exception) {
exception.printStackTrace();
} finally {
statement = null;
}
}
if (connection != null) {
try {
connection.close();
} catch (Exception exception) {
exception.printStackTrace();
} finally {
connection = null;
}
}
}
}
public static String jsonPath(String model) {
return resourcePath(model + ".json");
}
public static String resourcePath(String path) {
return Sources.of(MultiJDBCSchemaJoinTest.class.getResource("/" + path)).file().getAbsolutePath();
}
}
Model
{
"version": "1.0",
"defaultSchema": "DB",
"schemas": [ {
"type": "jdbc",
"name": "DB1",
"jdbcUser": "root",
"jdbcPassword": "admin",
"jdbcUrl": "jdbc:mysql://localhost:3306/ignite",
"jdbcSchema": "ignite"
}, {
"type": "jdbc",
"name": "DB2",
"jdbcUser": "SYSTEM",
"jdbcPassword": "SYSTEM",
"jdbcUrl": "jdbc:oracle:thin:@localhost:1521:xe",
"jdbcSchema": "SYSTEM"
}]
}