0

I'm struggling with the following task: write a C program which takes two parameters: input.bin and output.bin.

  • input.bin and output.bin are binary files
  • input.bin can contain max 65535 uint16_t numbers
  • file output.bin must be created by the program and must contain the numbers from input.bin sorted in ascending order
  • your program can work with only 256 KB RAM and 2MB disk space

So, I figured out that I can try to do it with counting sort. And here is what I do:

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <stdint.h>
#include <unistd.h>

int main(int argc, char* argv[]) {
    if(argc != 3)
        errx(1, "Usage: %s <input.bin> <output.bin>", argv[0]);

    const char * in = argv[1];
    char * out = argv[2];

    struct stat st;
    if(stat(in, &st) == -1)
        err(2, "fail to stat file %s", in);

    if(st.st_size % sizeof(uint16_t) != 0)
        errx(3, "file %s is corrupted", in);

    if(st.st_size / sizeof(uint16_t) > 0xffff)
        warnx("overflow in file %s may occur", in);

    int fd_i = open(in, O_RDONLY);
    if(fd_i == -1)
        err(4, "error while opening file %s", in);

    uint16_t *counting = malloc(0xffff + 1);

    if(counting == NULL) 
        errx(5, "not enough memory");

    uint16_t buf[sizeof(uint16_t) * 1024];

    ssize_t rd_sz;
    while((rd_sz = read(fd_i, &buf, sizeof(buf))) > 0){
        for(uint32_t i = 0; i < rd_sz; ++i){
            ++counting[buf[i]];
        }       
    }

    close(fd_i);

    ssize_t fd_o = open(out, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP);
    if(fd_o == -1){
        const int _errno = errno;
        close(fd_i);
        free(counting);
        errno = _errno;
        err(6, "error while opening file %s", out);
    }


    size_t MAX = 0xffff + 1;
    size_t pos = 0; // position
    uint32_t i = 0;
    while(i <= MAX){ // iterate over each number
        pos = 0;
        // fill the buffer
        size_t buf_sz = sizeof(uint16_t) * 1024 - 1;
        while(pos < buf_sz && i <= MAX) {   
            if (counting[i] == 0) {
                ++i; // move to next number
            } else {
                buf[pos] = i;
                ++pos;
                --counting[i];
            }
        }

        // write the buffer to the file
        ssize_t wr_sz = write(fd_o, buf, pos);
        if (wr_sz != (ssize_t)pos) {
            err(7, "cannot write %ld bytes to output file", pos);
        }
    }

    close(fd_o);    
    free(counting);
    exit(0);
}

But unfortunately it gives me segmentation fault, and it doesn't work and i cannot figure out why :(

fastAlgo
  • 83
  • 6
  • At what level / line do you get the segfault? – Déjà vu May 16 '20 at 10:21
  • You allocate 0x10000 bytes for your counting array, but you need space for 0x10000 `uint16_t`'s, which is twice that. – M Oehm May 16 '20 at 10:23
  • You must initialize elements of `counting` before starting to count. – MikeCAT May 16 '20 at 10:28
  • @MOehm I change it and the segmentation fault is no longer there, but it doesnt sort it right – fastAlgo May 16 '20 at 10:31
  • Then look at MikeCAT's comment. There's also an error in how you treat the return value of `read` -- it returns te number of bytes, so you must divide that by two. – M Oehm May 16 '20 at 10:34
  • muitiplying `sizeof(uint16_t)` in `uint16_t buf[sizeof(uint16_t) * 1024];` is not harmful but may not be what you want. – MikeCAT May 16 '20 at 10:38

2 Answers2

1

In this part

    ssize_t rd_sz;
    while((rd_sz = read(fd_i, &buf, sizeof(buf))) > 0){
        for(uint32_t i = 0; i < rd_sz; ++i){
            ++counting[buf[i]];
        }       
    }

You used the return value of read() as number of elements.
Unfortunately, what read() returns if number of bytes read and in uint16_t case it is twice the number of elements.
This will lead to out-of-bounds read.

It should be like this:

    ssize_t rd_sz;
    while((rd_sz = read(fd_i, &buf, sizeof(buf))) > 0){
        if (rd_sz % sizeof(uint16_t) != 0){
            puts("sorry, partial read not supported!");
            return 1;
        }
        for(uint32_t i = 0; i < rd_sz / sizeof(uint16_t); ++i){
            ++counting[buf[i]];
        }       
    }

Also the buffer allocation for counting is wrong because:

  • Only 0x10000 bytes are allocated while 0x10000 elements (2 bytes per element) is required (as @MOehm says).
  • The buffer is not initialized before counting.
    uint16_t *counting = malloc(0xffff + 1);

should be

    uint16_t *counting = calloc(0xffff + 1, sizeof(uint16_t));

This calloc() does:

  • allocate 0xffff + 1 elements, whose size is sizeof(uint16_t) bytes
  • Zero-initialize the allocated buffer

One more point:

size_t MAX = 0xffff + 1;

should be

size_t MAX = 0xffff;

because you choose to use <= in i <= MAX and then adding one here will lead to out-of-bounds read in counting[i].

MikeCAT
  • 73,922
  • 11
  • 45
  • 70
0

Where is the segmentation fault now, after I added all the corrections?

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <stdint.h>
#include <unistd.h>

int main(int argc, char* argv[]) {
    if(argc != 3)
        errx(1, "Usage: %s <input.bin> <output.bin>", argv[0]);

    const char * in = argv[1];
    char * out = argv[2];

    struct stat st;
    if(stat(in, &st) == -1)
        err(2, "fail to stat file %s", in);

    if(st.st_size % sizeof(uint16_t) != 0)
        errx(3, "file %s is corrupted", in);

    if(st.st_size / sizeof(uint16_t) > 0xffff)
        warnx("overflow in file %s may occur", in);

    int fd_i = open(in, O_RDONLY);
    if(fd_i == -1)
        err(4, "error while opening file %s", in);

     uint32_t CMAX = sizeof(uint16_t) * (0xffff + 1); 
    uint16_t *counting = malloc(CMAX);

    if(counting == NULL) 
        errx(5, "not enough memory");

     for(uint32_t i = 0; i < CMAX; ++i){
     counting[i] = 0;
     }

    uint16_t buf[1<<10];

    ssize_t rd_sz;
    while((rd_sz = read(fd_i, &buf, sizeof(buf))) > 0){
        for(uint32_t i = 0; i < rd_sz/sizeof(uint16_t); ++i){
            ++counting[buf[i]];
        }       
    }

    close(fd_i);

    ssize_t fd_o = open(out, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP);
    if(fd_o == -1){
        const int _errno = errno;
        close(fd_i);
        free(counting);
        errno = _errno;
        err(6, "error while opening file %s", out);
    }


    size_t MAX = 0xffff + 1;
    size_t pos = 0; // position
    uint32_t i = 0;
    while(i <= MAX){ // iterate over each number
        pos = 0;
        // fill the buffer
        while(pos < sizeof(buf) && i < MAX) {   
            if (counting[i] == 0) {
                ++i; // move to next number
            } else {
                buf[pos] = (uint16_t)i;
                ++pos;
                --counting[i];
            }
        }

        // write the buffer to the file
        ssize_t wr_sz = write(fd_o, &buf, pos);
        if (wr_sz != (ssize_t)pos) {
            err(7, "cannot write %ld bytes to output file", pos);
        }
    }

    close(fd_o);    
    free(counting);
    exit(0);
}
fastAlgo
  • 83
  • 6