2

I am trying to compress a file stream with LZO and not getting very far. Specifically, I get a segmentation fault when extracting the archive file created by my compressFileWithLzo1x function.

My main function and prototype declarations are:

#include <stdio.h>
#include <stdlib.h>
#include "lzo/include/lzo/lzo1x.h"

#define LZO_IN_CHUNK (128*1024L)
#define LZO_OUT_CHUNK (LZO_IN_CHUNK + LZO_IN_CHUNK/16 + 64 + 3)

int compressFileWithLzo1x(const char *inFn, const char *outFn);
int extractFileWithLzo1x(const char *inFn);

int main(int argc, char **argv) {

    const char *inFilename = "test.txt";
    const char *outFilename = "test.txt.lzo1x";

    if ( compressFileWithLzo1x(inFilename, outFilename) != 0 )
        exit(EXIT_FAILURE);

    if ( extractFileWithLzo1x(outFilename) != 0 )
        exit(EXIT_FAILURE);

    return 0;
}

Here is the implementation of my compression function:

int compressFileWithLzo1x(const char *inFn, const char *outFn) {

    FILE *inFnPtr = fopen(outFn, "r");
    FILE *outFnPtr = fopen(outFn, "wb");
    int compressionResult;
    lzo_bytep in;
    lzo_bytep out;
    lzo_voidp wrkmem;
    lzo_uint out_len;
    size_t inResult;

    if (lzo_init() != LZO_E_OK)
        return -1;

    in = (lzo_bytep)malloc(LZO_IN_CHUNK);
    out = (lzo_bytep)malloc(LZO_OUT_CHUNK);
    wrkmem = (lzo_voidp)malloc(LZO1X_1_MEM_COMPRESS);

    do { 
        inResult = fread(in, sizeof(lzo_byte), LZO_IN_CHUNK, inFnPtr);
        if (inResult == 0)
            break;
        compressionResult = lzo1x_1_compress(in, LZO_IN_CHUNK, out, &out_len, wrkmem);
        if ((out_len >= LZO_IN_CHUNK) || (compressionResult != LZO_E_OK))
            return -1;
        if (fwrite(out, sizeof(lzo_byte), (size_t)out_len, outFnPtr) != (size_t)out_len || ferror(outFnPtr))
            return -1;
        fflush(outFnPtr);
    } while (!feof(inFnPtr) && !ferror(inFnPtr));

    free(wrkmem);
    free(out);
    free(in);
    fclose(inFnPtr);
    fclose(outFnPtr);

    return 0;
}

Here is the implementation of my decompression function:

int extractFileWithLzo1x(const char *inFn) {

    FILE *inFnPtr = fopen(inFn, "rb");
    lzo_bytep in = (lzo_bytep)malloc(LZO_IN_CHUNK);
    lzo_bytep out = (lzo_bytep)malloc(LZO_OUT_CHUNK);
    int extractionResult; 
    size_t inResult;
    lzo_uint new_length;

    if (lzo_init() != LZO_E_OK)
        return -1;

    do {
        new_length = LZO_IN_CHUNK;
        inResult = fread(in, sizeof(lzo_byte), LZO_IN_CHUNK, inFnPtr);
        extractionResult = lzo1x_decompress(out, LZO_OUT_CHUNK, in, &new_length, NULL);
        if ((extractionResult != LZO_E_OK) || (new_length != LZO_IN_CHUNK))
            return -1;
        fprintf(stderr, "out: [%s]\n", (unsigned char *)out);
    } while (!feof(inFnPtr) && (!ferror(inFnPtr));

    free(in);
    free(out);
    fclose(inFnPtr);

    return 0;
}

The segmentation fault occurs here:

extractionResult = lzo1x_decompress(out, LZO_OUT_CHUNK, in, &new_length, NULL);

What is wrong with this approach that is causing the segmentation fault?

I hope I haven't left any code out this time. Feel free to let me know if I need to add more information. Thanks in advance for your advice.

Alex Reynolds
  • 95,983
  • 54
  • 240
  • 345

3 Answers3

2

You're compressing independent blocks. The LZO decompressor needs the byte length of the compressed data because when it decodes EOF it checks whether it has consumed all the input bytes (and returns an error if it hasn't) so you need to store the length of each compressed chunk as well. Thus you need a more complex file format. For example:

# compressing, in python-like pseudocode
ifile = open("data", "rb")
ofile = open("data.mylzo", "wb")
input, input_len = ifile.read(65536)
while input_len > 0:
  compressed, compressed_len = lzo1x(input, input_len)
  compressed_len -= 1 # store len-1 of next block
  if compressed_len < 65536 - 1:
    ofile.write(compressed_len & 255) # be sure of endianess in file formats!
    ofile.write(compressed_len >> 8)
    ofile.write(compressed)
  else:
    ofile.write(255) # incompressible block stored it as-is (saves space & time).
    ofile.write(255)
    ofile.write(input)
  input, input_len = ifile.read(65536)
ofile.close()
ifile.close()

# decompressing, in python-like pseudocode
ifile = open("data.mylzo", "rb")
ofile = open("data", "wb")
compressed_len_s = ifile.read(2)
while len(compressed_len_s) == 2:
  compressed_len = (compressed_len_s[0] | (compressed_len_s[1] << 8)) + 1
  if compressed_len == 65536:
    ofile.write(ifile.read(65536)) # this can be done without copying
  else:
    compressed = ifile.read(compressed_len)
    decompressed = lzo1x_decompress(compressed, compressed_len)
    ofile.write(decompressed)
  compressed_len_s = ifile.read(2)
ofile.close()
ifile.close()

If you want to be able to decompress the chunks without skipping (either for decompression in parallel or random access) you should place the lengths of compressed chunks at the beginning, before the first chunk. Precede them with the number of chunks.

The last chunk can be shorter than 64k, and it can be incompressible but we'll still store the compressed form, even though it's longer than the non-compressed form, because only full 64k blocks are stored as-is. If entire file is shorter than 64k, it will grow.

Z.T.
  • 939
  • 8
  • 20
1

I think you are opening the wrong file in int compressFileWithLzo1x:

FILE *inFnPtr = fopen(outFn, "r");

it should be

FILE *inFnPtr = fopen(inFn, "r");
gambler
  • 11
  • 1
1

The code you've given won't compile (spurious = in the #defines; inFilePtr instead of inFnPtr in various places, etc.). But:

  1. When compressing, you are not taking account of the actual amount of data returned by the fread(), which might well be less than LZO_IN_CHUNK.

    compressionResult = lzo1x_1_compress(in, LZO_IN_CHUNK, out, &out_len, wrkmem);
    

    should probably be

    compressionResult = lzo1x_1_compress(in, inResult, out, &out_len, wrkmem);
    

    (This is unlikely to be the problem, but will add bogus junk at the end of the file.)

  2. When decompressing, you have a similar problem, and the in / out arguments are the wrong way round, which is likely to be the cause of your segfault.

    extractionResult = lzo1x_decompress(out, LZO_OUT_CHUNK, in, &new_length, NULL);
    

    should probably be

    extractionResult = lzo1x_decompress(in, inResult, out, &new_length, NULL);
    
Matthew Slattery
  • 45,290
  • 8
  • 103
  • 119
  • Getting closer, I think, but doing `fprintf()` statements before and after the `lzo1x_1_compress()` call (which look at the `in` and `out` buffers) suggests the textual data are not being compressed. – Alex Reynolds Nov 21 '10 at 01:01