I am writing ResultSet to parquet file using AvroParquetWriter. One column in the ResultSet is java.sql.Timestamp. When writing, I get the exception :
java.sql.Timestamp cannot be cast to java.lang.Number
Adding addLogicalTypeConversion does not work. The code is as below. Please provide any suggestions on how to make the LogicalTypeConversion work on the code:
GenericData timeSupport = GenericData.get();
timeSupport.addLogicalTypeConversion(new TimeConversions.DateConversion());
timeSupport.addLogicalTypeConversion(new TimeConversions.LocalTimestampMillisConversion());
timeSupport.addLogicalTypeConversion(new TimeConversions.LocalTimestampMicrosConversion());
timeSupport.addLogicalTypeConversion(new TimeConversions.TimeMicrosConversion());
timeSupport.addLogicalTypeConversion(new TimeConversions.TimeMillisConversion());
timeSupport.addLogicalTypeConversion(new TimeConversions.TimestampMicrosConversion());
timeSupport.addLogicalTypeConversion(new TimeConversions.TimestampMillisConversion());
OutputFile opHadoop = HadoopOutputFile.fromPath(outputPath, new Configuration());
SchemaResults schemaResults = new ResultSetSchemaGenerator().generateSchema(resultSet, schemaName, namespace);
ParquetWriter<GenericRecord> parquetWriter = AvroParquetWriter.<GenericRecord> builder(opHadoop)
.withSchema(schemaResults.getParsedSchema())
.withDataModel(timeSupport)
.withCompressionCodec(CompressionCodecName.SNAPPY)
.build();
List<GenericRecord> records = new ArrayList<>();
while (resultSet.next()) {
GenericRecordBuilder builder = new GenericRecordBuilder(schemaResults.getParsedSchema())
for (SchemaSqlMapping mapping : schemaResults.getMappings()) {
builder.set(
schemaResults.getParsedSchema().getField(mapping.getSchemaName()),
extractResult(mapping, resultSet));
}
GenericRecord record = builder.build();
records.add(record);
}
for (GenericRecord record : records) {
parquetWriter.write(record);
}
parquetWriter.close();
/**
* Extracts the appropriate value from the ResultSet using the given
* SchemaSqlMapping.
*
* @param mapping
* @param resultSet
* @return
* @throws SQLException
*/
public Object extractResult(SchemaSqlMapping mapping, ResultSet resultSet) throws SQLException {
switch (mapping.getSqlType()) {
case Types.BOOLEAN:
return resultSet.getBoolean(mapping.getColumnIndex());
case Types.TINYINT:
case Types.SMALLINT:
case Types.INTEGER:
case Types.BIGINT:
case Types.ROWID:
return resultSet.getInt(mapping.getColumnIndex());
case Types.CHAR:
case Types.VARCHAR:
case Types.LONGVARCHAR:
case Types.NCHAR:
case Types.NVARCHAR:
case Types.LONGNVARCHAR:
case Types.SQLXML:
return resultSet.getString(mapping.getColumnIndex());
case Types.REAL:
case Types.FLOAT:
return resultSet.getFloat(mapping.getColumnIndex());
case Types.DOUBLE:
return resultSet.getDouble(mapping.getColumnIndex());
case Types.NUMERIC:
return resultSet.getBigDecimal(mapping.getColumnIndex());
case Types.DECIMAL:
return resultSet.getBigDecimal(mapping.getColumnIndex());
case Types.DATE:
return resultSet.getDate(mapping.getColumnIndex());
case Types.TIME:
case Types.TIME_WITH_TIMEZONE:
return resultSet.getTime(mapping.getColumnIndex());
case Types.TIMESTAMP:
case Types.TIMESTAMP_WITH_TIMEZONE:
return resultSet.getTimestamp(mapping.getColumnIndex());
case Types.BINARY:
case Types.VARBINARY:
case Types.LONGVARBINARY:
case Types.NULL:
case Types.OTHER:
case Types.JAVA_OBJECT:
case Types.DISTINCT:
case Types.STRUCT:
case Types.ARRAY:
case Types.BLOB:
case Types.CLOB:
case Types.REF:
case Types.DATALINK:
case Types.NCLOB:
case Types.REF_CURSOR:
return resultSet.getByte(mapping.getColumnIndex());
default:
return resultSet.getString(mapping.getColumnIndex());
}
ResultSetSchemaGenerator class:
public class ResultSetSchemaGenerator {
/**
* Generates parquet schema using {@link ResultSetMetaData }
*
* @param resultSet
* @param name
* Record name.
* @param nameSpace
* @return
* @throws SQLException
*/
public SchemaResults generateSchema(ResultSet resultSet, String name, String nameSpace) throws SQLException {
SchemaResults schemaResults = new SchemaResults();
List<SchemaSqlMapping> mappings = new ArrayList<>();
Schema recordSchema = Schema.createRecord(name, null, nameSpace, false);
List<Schema.Field> fields = new ArrayList<>();
if (resultSet != null) {
ResultSetMetaData resultSetMetaData = resultSet.getMetaData();
int columnCount = resultSetMetaData.getColumnCount();
for (int x = 1; x <= columnCount; x++) {
String columnName = resultSetMetaData.getColumnName(x).replaceAll("[^a-zA-Z0-9_]", "");
int sqlColumnType = resultSetMetaData.getColumnType(x);
String schemaName = columnName.toLowerCase();
Schema.Type schemaType = parseSchemaType(sqlColumnType);
mappings.add(new SchemaSqlMapping(schemaName, columnName, sqlColumnType, x, schemaType));
fields.add(createNullableField(schemaName, schemaType));
}
}
recordSchema.setFields(fields);
schemaResults.setMappings(mappings);
schemaResults.setParsedSchema(recordSchema);
return schemaResults;
}
public Schema.Type parseSchemaType(int sqlColumnType) {
switch (sqlColumnType) {
case Types.BOOLEAN:
return Schema.Type.BOOLEAN;
case Types.TINYINT: // 1 byte
case Types.SMALLINT: // 2 bytes
case Types.INTEGER: // 4 bytes
return Schema.Type.INT; // 32 bit (4 bytes) (signed)
case Types.ROWID:
case Types.CHAR:
case Types.VARCHAR:
case Types.LONGVARCHAR:
case Types.NCHAR:
case Types.NVARCHAR:
case Types.LONGNVARCHAR:
case Types.SQLXML:
return Schema.Type.STRING; // unicode string
case Types.REAL: // Approximate numerical (mantissa single precision 7)
return Schema.Type.FLOAT; // A 32-bit IEEE single-float
case Types.DOUBLE: // Approximate numerical (mantissa precision 16)
case Types.DECIMAL: // Exact numerical (5 - 17 bytes)
case Types.NUMERIC: // Exact numerical (5 - 17 bytes)
case Types.FLOAT: // Approximate numerical (mantissa precision 16)
return Schema.Type.DOUBLE; // A 64-bit IEEE double-float
case Types.DATE:
case Types.TIME:
case Types.TIMESTAMP:
case Types.TIME_WITH_TIMEZONE:
case Types.TIMESTAMP_WITH_TIMEZONE:
case Types.BIGINT: // 8 bytes
return Schema.Type.LONG; // 64 bit (signed)
case Types.BINARY:
case Types.VARBINARY:
case Types.LONGVARBINARY:
case Types.NULL:
case Types.OTHER:
case Types.JAVA_OBJECT:
case Types.DISTINCT:
case Types.STRUCT:
case Types.ARRAY:
case Types.BLOB:
case Types.CLOB:
case Types.REF:
case Types.DATALINK:
case Types.NCLOB:
case Types.REF_CURSOR:
return Schema.Type.BYTES; // sequence of bytes
}
return null;
}
/**
* Creates field that can be null {@link Schema#createUnion(List)}
*
* @param recordSchema
* The root record schema
* @param columnName
* The schema column name
* @param type
* Schema type for this field
* @return
*/
public Schema.Field createNullableField(String columnName, Schema.Type type) {
Schema intSchema = Schema.create(type);
Schema nullSchema = Schema.create(Schema.Type.NULL);
List<Schema> fieldSchemas = new ArrayList<>();
fieldSchemas.add(intSchema);
fieldSchemas.add(nullSchema);
Schema fieldSchema = Schema.createUnion(fieldSchemas);
return new Schema.Field(columnName, fieldSchema, null, null);
}