1

I am writing ResultSet to parquet file using AvroParquetWriter. One column in the ResultSet is java.sql.Timestamp. When writing, I get the exception :

java.sql.Timestamp cannot be cast to java.lang.Number

Adding addLogicalTypeConversion does not work. The code is as below. Please provide any suggestions on how to make the LogicalTypeConversion work on the code:

GenericData timeSupport = GenericData.get();
timeSupport.addLogicalTypeConversion(new TimeConversions.DateConversion());
timeSupport.addLogicalTypeConversion(new TimeConversions.LocalTimestampMillisConversion());
timeSupport.addLogicalTypeConversion(new TimeConversions.LocalTimestampMicrosConversion());
timeSupport.addLogicalTypeConversion(new TimeConversions.TimeMicrosConversion());
timeSupport.addLogicalTypeConversion(new TimeConversions.TimeMillisConversion());
timeSupport.addLogicalTypeConversion(new TimeConversions.TimestampMicrosConversion());
timeSupport.addLogicalTypeConversion(new TimeConversions.TimestampMillisConversion());

 OutputFile opHadoop = HadoopOutputFile.fromPath(outputPath, new Configuration());
 SchemaResults schemaResults = new ResultSetSchemaGenerator().generateSchema(resultSet, schemaName, namespace);

 ParquetWriter<GenericRecord> parquetWriter = AvroParquetWriter.<GenericRecord> builder(opHadoop)
                .withSchema(schemaResults.getParsedSchema())
                .withDataModel(timeSupport)
                .withCompressionCodec(CompressionCodecName.SNAPPY)
                .build();
List<GenericRecord> records = new ArrayList<>();
while (resultSet.next()) {
    GenericRecordBuilder builder = new GenericRecordBuilder(schemaResults.getParsedSchema())
    
    for (SchemaSqlMapping mapping : schemaResults.getMappings()) {
         builder.set(
                    schemaResults.getParsedSchema().getField(mapping.getSchemaName()),
                    extractResult(mapping, resultSet));
        
    }
    
    GenericRecord record = builder.build();
    records.add(record);
}

for (GenericRecord record : records) {
    parquetWriter.write(record);
}

parquetWriter.close();

/**
     * Extracts the appropriate value from the ResultSet using the given
     * SchemaSqlMapping.
     *
     * @param mapping
     * @param resultSet
     * @return
     * @throws SQLException
     */
    public Object extractResult(SchemaSqlMapping mapping, ResultSet resultSet) throws SQLException {
        switch (mapping.getSqlType()) {
        case Types.BOOLEAN:
            return resultSet.getBoolean(mapping.getColumnIndex());
        case Types.TINYINT:
        case Types.SMALLINT:
        case Types.INTEGER:
        case Types.BIGINT:
        case Types.ROWID:
            return resultSet.getInt(mapping.getColumnIndex());
        case Types.CHAR:
        case Types.VARCHAR:
        case Types.LONGVARCHAR:
        case Types.NCHAR:
        case Types.NVARCHAR:
        case Types.LONGNVARCHAR:
        case Types.SQLXML:
            return resultSet.getString(mapping.getColumnIndex());
        case Types.REAL:
        case Types.FLOAT:
            return resultSet.getFloat(mapping.getColumnIndex());
        case Types.DOUBLE:
            return resultSet.getDouble(mapping.getColumnIndex());
        case Types.NUMERIC:
            return resultSet.getBigDecimal(mapping.getColumnIndex());
        case Types.DECIMAL:
            return resultSet.getBigDecimal(mapping.getColumnIndex());
        case Types.DATE:
            return resultSet.getDate(mapping.getColumnIndex());
        case Types.TIME:
        case Types.TIME_WITH_TIMEZONE:
            return resultSet.getTime(mapping.getColumnIndex());
        case Types.TIMESTAMP:
        case Types.TIMESTAMP_WITH_TIMEZONE:
            return resultSet.getTimestamp(mapping.getColumnIndex());
        case Types.BINARY:
        case Types.VARBINARY:
        case Types.LONGVARBINARY:
        case Types.NULL:
        case Types.OTHER:
        case Types.JAVA_OBJECT:
        case Types.DISTINCT:
        case Types.STRUCT:
        case Types.ARRAY:
        case Types.BLOB:
        case Types.CLOB:
        case Types.REF:
        case Types.DATALINK:
        case Types.NCLOB:
        case Types.REF_CURSOR:
            return resultSet.getByte(mapping.getColumnIndex());
        default:
            return resultSet.getString(mapping.getColumnIndex());
    }

ResultSetSchemaGenerator class:

public class ResultSetSchemaGenerator {
        /**
         * Generates parquet schema using {@link ResultSetMetaData }
         *
         * @param resultSet
         * @param name
         *            Record name.
         * @param nameSpace
         * @return
         * @throws SQLException
         */
        public SchemaResults generateSchema(ResultSet resultSet, String name, String nameSpace) throws SQLException {
    
            SchemaResults schemaResults = new SchemaResults();
            List<SchemaSqlMapping> mappings = new ArrayList<>();
    
            Schema recordSchema = Schema.createRecord(name, null, nameSpace, false);
    
            List<Schema.Field> fields = new ArrayList<>();
    
            if (resultSet != null) {
    
                ResultSetMetaData resultSetMetaData = resultSet.getMetaData();
                int columnCount = resultSetMetaData.getColumnCount();
    
                for (int x = 1; x <= columnCount; x++) {
    
                    String columnName = resultSetMetaData.getColumnName(x).replaceAll("[^a-zA-Z0-9_]", "");
                    int sqlColumnType = resultSetMetaData.getColumnType(x);
                    String schemaName = columnName.toLowerCase();
                    Schema.Type schemaType = parseSchemaType(sqlColumnType);
                    mappings.add(new SchemaSqlMapping(schemaName, columnName, sqlColumnType, x, schemaType));
                    fields.add(createNullableField(schemaName, schemaType));
                    
                }
    
            }
    
            recordSchema.setFields(fields);
    
            schemaResults.setMappings(mappings);
            schemaResults.setParsedSchema(recordSchema);
    
            return schemaResults;
        }
    
    
    
        public Schema.Type parseSchemaType(int sqlColumnType) {
             switch (sqlColumnType) {
    
             case Types.BOOLEAN:
                 return Schema.Type.BOOLEAN;
    
             case Types.TINYINT:             // 1 byte
             case Types.SMALLINT:            // 2 bytes
             case Types.INTEGER:             // 4 bytes
                 return Schema.Type.INT;     // 32 bit (4 bytes) (signed)
    
             case Types.ROWID:
             case Types.CHAR:
             case Types.VARCHAR:
             case Types.LONGVARCHAR:
             case Types.NCHAR:
             case Types.NVARCHAR:
             case Types.LONGNVARCHAR:
             case Types.SQLXML:
                 return Schema.Type.STRING;  // unicode string
    
             case Types.REAL:                // Approximate numerical (mantissa single precision 7)
                 return Schema.Type.FLOAT;   // A 32-bit IEEE single-float
    
             case Types.DOUBLE:              // Approximate numerical (mantissa precision 16)
             case Types.DECIMAL:             // Exact numerical (5 - 17 bytes)
             case Types.NUMERIC:             // Exact numerical (5 - 17 bytes)
             case Types.FLOAT:               // Approximate numerical (mantissa precision 16)
                 return Schema.Type.DOUBLE;  // A 64-bit IEEE double-float
    
             case Types.DATE:
             case Types.TIME:
             case Types.TIMESTAMP:
             case Types.TIME_WITH_TIMEZONE:
             case Types.TIMESTAMP_WITH_TIMEZONE:
             case Types.BIGINT:              // 8 bytes
                 return Schema.Type.LONG;    // 64 bit (signed)
    
             case Types.BINARY:
             case Types.VARBINARY:
             case Types.LONGVARBINARY:
             case Types.NULL:
             case Types.OTHER:
             case Types.JAVA_OBJECT:
             case Types.DISTINCT:
             case Types.STRUCT:
             case Types.ARRAY:
             case Types.BLOB:
             case Types.CLOB:
             case Types.REF:
             case Types.DATALINK:
             case Types.NCLOB:
             case Types.REF_CURSOR:
                 return Schema.Type.BYTES;   // sequence of bytes
         }
    
            return null;
        }
    
        /**
         * Creates field that can be null {@link Schema#createUnion(List)}
         *
         * @param recordSchema
         *            The root record schema
         * @param columnName
         *            The schema column name
         * @param type
         *            Schema type for this field
         * @return
         */
        public Schema.Field createNullableField(String columnName, Schema.Type type) {
    
            Schema intSchema = Schema.create(type);
            Schema nullSchema = Schema.create(Schema.Type.NULL);
    
            List<Schema> fieldSchemas = new ArrayList<>();
            fieldSchemas.add(intSchema);
            fieldSchemas.add(nullSchema);
    
            Schema fieldSchema = Schema.createUnion(fieldSchemas);
    
            return new Schema.Field(columnName, fieldSchema, null, null);
        }
javaseeker
  • 73
  • 1
  • 9

0 Answers0