2

I wrote this code to recover 50 images from cs50 pset4 recover. The code can only retrieve 4 images all of which are not the right ones. It is compiling fine. When i use printf to debug it seems like the if(found) piece of code runs many times when name_count == 0 more than it is supposed to.

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>

typedef uint8_t BYTE;
const int BLOCK_SIZE = 152;
bool is_a_jpeg(BYTE *buffer);
int main(int argc, char *argv[])
{
//Check if there are exactly two commandline arguments
    if (argc != 2)
    {
        printf("usage: ./IMAGE\n");
        return 1;
    }

//Open a storage device and chack if it has data
    FILE *input = fopen(argv[1], "r");
    if (input == NULL)
    {
        fclose(input);
        printf("Could not open file.\n");
        return 2;
    }
//Create a file to write into and clear it
    FILE *img = NULL;

//Declar an interger of counting images
    int name_count = 0;

// create buffer
    BYTE buffer[BLOCK_SIZE];

//Declare space for saving the filename
    char filename[8];

//Declare a bolean variable used to check for already found images
    bool found = false;

//A function for reading through the device looking for images
    while (fread(buffer, BLOCK_SIZE, 1, input))
    {
//If a jpeg image is found notify the program(set found = true)
//and start writing the data to a new file
        if (is_a_jpeg(buffer))
        {
            found = true;
//If we are not writing the first image, close the previous one
            if (name_count > 0)
            {
                fclose(img);
            }
//Create incrementing filenames for each new picture i.e 000.jpg, 001.jpg etc.
            sprintf(filename, "%03d.jpg", name_count);

//Open an initially created empty file and start writing to it
            img = fopen (filename, "w");
            name_count++;
            fwrite(buffer, BLOCK_SIZE, 1, img);
        }
//Continue writing to a file as soon as it is found until another JPEG image is found
        if(found)
        {
            fwrite(buffer, BLOCK_SIZE, 1, img);
        }
    }

//Close all the files
    fclose(input);
    fclose(img);
    return 0;
}
//Function to check for a JPEG Image
bool is_a_jpeg(BYTE *buffer)
{
        return buffer[0] == 0xff &&
        buffer[1] == 0xd8 &&
        buffer[2] == 0xff &&
        (buffer[3] & 0xf0) == 0xe0;
}

cs50 results for check50

printing dot everytime if(found) code runs. The code unnecessarily spends a lot of time on the first image before closing it

  • 1
    Are you assuming that each JPEG is exactly 152 bytes long? Doesn't sound reasonable to me. – Eugene Sh. Jun 20 '22 at 13:39
  • 1
    Your program only detects a JPEG if it's at the start of a block. – user253751 Jun 20 '22 at 14:24
  • the major flaw of the code is the number 152. Changing it to 512 fixed the bug. Data is read in blocks of 512 bytes as per the problem specification even though now the images are partial. – Emthehacker Jun 20 '22 at 17:53

1 Answers1

1

There are some problems in the posted code:

  • the block size should be 512 bytes, not 152.

  • you should not fclose(file) if fopen failed. This has undefined behavior.

  • you unconditionally close img at the end, which may have undefined behavior if no JPG file was found or if the last JPG file could not be open.

  • both the disk image file and the jpg destination files must be open in binary mode. This may explain unexpected behavior if you are working on Windows.

Here is a modified version:

#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

typedef uint8_t BYTE;
const int BLOCK_SIZE = 512;
bool is_a_jpeg(const BYTE *buffer);

int main(int argc, char *argv[]) {
    //Check if there are exactly two commandline arguments
    if (argc != 2) {
        printf("usage: ./IMAGE\n");
        return 1;
    }

    //Open a storage device and check if it has data
    FILE *input = fopen(argv[1], "rb");
    if (input == NULL) {
        fprintf(stderr, "Could not open file %s: %s.\n", argv[1], strerror(errno));
        return 2;
    }
    //Create a file to write into and clear it
    FILE *img = NULL;

    //Declare an integer of counting images
    int name_count = 0;

    // create buffer
    BYTE buffer[BLOCK_SIZE];

    //Declare space for saving the filename
    char filename[16];

    //A function for reading through the device looking for images
    while (fread(buffer, BLOCK_SIZE, 1, input)) {
        //If a jpeg image is found notify the program(set found = true)
        //and start writing the data to a new file
        if (is_a_jpeg(buffer)) {
            //close the current image file if any
            if (img) {
                fclose(img);
                img = NULL;
            }
            //Create incrementing filenames for each new picture i.e 000.jpg, 001.jpg etc.
            sprintf(filename, "%03d.jpg", name_count);
            name_count++;

            //Create an empty file and start writing to it
            img = fopen(filename, "wb");
            if (img == NULL) {
                fprintf(stderr, "Could not open output file %s: %s.\n", filename, strerror(errno));
            }
        }
        //Continue writing to a file as soon as it is found until another JPEG image is found
        if (img) {
            if (!fwrite(buffer, BLOCK_SIZE, 1, img)) {
                fprintf(stderr, "Error writing to %s: %s.\n", filename, strerror(errno));
                fclose(img);
                img = NULL;
            }
        }
    }

    //Close all the files
    fclose(input);
    if (img)
        fclose(img);
    return 0;
}

//Function to check for a JPEG Image
bool is_a_jpeg(const BYTE *buffer) {
    return buffer[0] == 0xff &&
        buffer[1] == 0xd8 &&
        buffer[2] == 0xff &&
        (buffer[3] & 0xf0) == 0xe0;
}
chqrlie
  • 131,814
  • 10
  • 121
  • 189
  • 1
    The answer is very useful. Pinpointing problems and giving a solution really helps me. – Emthehacker Jun 21 '22 at 19:23
  • The code works fine but i need to understand something how does the code continue to write into the file after it opens it and starts writing to it. – Emthehacker Jun 21 '22 at 19:45
  • The code writes every subsequent block to the current file until it find another JPEG header. You might want to scan for a JPEG trailer `FF D9`, but there is a possibility for false positives, parsing the JFIF stream is possible also but well beyond the scope of a simple filter so it is simpler to just write all blocks. – chqrlie Jun 21 '22 at 19:50
  • To reframe my question...Explain to me how does the last if block of code continues to write to the file also i truely don't understand how the file is written to without using fwrite(). The way i understand it is like we check for a JPEG file, then we open a file to write to, then we check if we opened the file to write to by first if() after fopen(filename, "wb") . And then we keep checking if we are writing to the file by the last if() block of code. But i don't get how we keep writing to the file. I only see us checking if we are writing to the file – Emthehacker Jun 21 '22 at 20:21
  • The `if (is_a_jpeg(buffer))` determines if the current block is the start of a new JPG file. If so, we close the current `img` file if any and open the next output file. After this test, a second test `if (img)` tests if we are writing to an `img` file. If so we write the current block to it. `img` is null only at the start of the scan, until we find the first JPG signature. The rest of the disk image is split into JPG files, one for each signature found. This may produce JPG files that contain a lot of meaningless trailing data, but it seems to suffice for this small cs50 program. – chqrlie Jun 21 '22 at 20:55