18

I have a java application that converts json messages to parquet format. Is there any parquet writer which writes to buffer or byte stream in java? Most of the examples, I have seen write to files.

naimdjon
  • 3,162
  • 1
  • 20
  • 41
vijju
  • 415
  • 1
  • 5
  • 9

3 Answers3

4

TLDR; you will need to implement OutputFile, e.g. something along the line of:

import org.apache.parquet.io.OutputFile;
import org.apache.parquet.io.PositionOutputStream;

import java.io.BufferedOutputStream;
import java.io.IOException;

public class ParquetBufferedWriter implements OutputFile {

    private final BufferedOutputStream out;

    public ParquetBufferedWriter(BufferedOutputStream out) {
        this.out = out;
    }

    @Override
    public PositionOutputStream create(long blockSizeHint) throws IOException {
        return createPositionOutputstream();
    }

    private PositionOutputStream createPositionOutputstream() {
        return new PositionOutputStream() {
            @Override
            public long getPos() throws IOException {
                return 0;
            }

            @Override
            public void write(int b) throws IOException {
                out.write(b);
            }
        };
    }

    @Override
    public PositionOutputStream createOrOverwrite(long blockSizeHint) throws IOException {
        return createPositionOutputstream();
    }

    @Override
    public boolean supportsBlockSize() {
        return false;
    }

    @Override
    public long defaultBlockSize() {
        return 0;
    }

}

And your writer would be something like:

    ParquetBufferedWriter out = new ParquetBufferedWriter();
        try (ParquetWriter<Record> writer = AvroParquetWriter.
                <Record>builder(out)
                .withRowGroupSize(DEFAULT_BLOCK_SIZE)
                .withPageSize(DEFAULT_PAGE_SIZE)
                .withSchema(SCHEMA)
                .build()) {

            for (Record record : records) {
                writer.write(record);
            }
        } catch (IOException e) {
            throw new IllegalStateException(e);
        }
naimdjon
  • 3,162
  • 1
  • 20
  • 41
  • position (getPos()) should not be 0, otherwise output is broken. Correct answer is by: @breadcrumb42 with counting bytes for position. Other solution could be use: CountingOutpustream and set position based on position of the OutputStream. – rbrisuda Nov 22 '21 at 12:47
4

I just also needed to write to a stream, so I completed the example given by naimdjon. The following works perfectly fine for me.

class ParquetBufferedWriter implements OutputFile {
    
    private final BufferedOutputStream out;

    public ParquetBufferedWriter(BufferedOutputStream out) {
        this.out = out;
    }

    @Override
    public PositionOutputStream create(long blockSizeHint) throws IOException {
        return createPositionOutputstream();
    }

    private PositionOutputStream createPositionOutputstream() {
        return new PositionOutputStream() {
            
            int pos = 0;

            @Override
            public long getPos() throws IOException {
                return pos;
            }

            @Override
            public void flush() throws IOException {
                out.flush();
            };

            @Override
            public void close() throws IOException {
                out.close();
            };

            @Override
            public void write(int b) throws IOException {
                out.write(b);
                pos++;
            }

            @Override
            public void write(byte[] b, int off, int len) throws IOException {
                out.write(b, off, len);
                pos += len;
            }
        };
    }

    @Override
    public PositionOutputStream createOrOverwrite(long blockSizeHint) throws IOException {
        return createPositionOutputstream();
    }

    @Override
    public boolean supportsBlockSize() {
        return false;
    }

    @Override
    public long defaultBlockSize() {
        return 0;
    }
}
flaxel
  • 4,173
  • 4
  • 17
  • 30
breadcrumb42
  • 165
  • 1
  • 8
-2

You need to write the data into temp file and then covert the data from file to input stream or buffer something like this, first read the tempfile data

final InputStream targetStream = new DataInputStream(new FileInputStream(tmp1.getAbsoluteFile()));

StringWriter writer = new StringWriter();
String encoding = StandardCharsets.UTF_8.name();
IOUtils.copy(targetStream, writer, encoding);
System.out.println(writer);
user207421
  • 305,947
  • 44
  • 307
  • 483
raj03
  • 445
  • 1
  • 6
  • 19