0

I am coding up a C program that extracts from a standard UNIX archive ar and creates the files it stores.

Here is an example of what an ar looks like if I open it in vim:

!<arch>
yo              1382105439  501   20    100644  10        `
test1 lol
yo2             1382105444  501   20    100644  10        `
test2 lol

...where "test1 lol" and "test2 lol" are the contents of each file, "yo" and "yo2" are two different file names, and the rest is metadata stored in a format corresponding to the standard ar.h (read more on it here: http://www.lehman.cuny.edu/cgi-bin/man-cgi?ar.h+3)

Anyway, I am still in the process of writing out the function but here is what I have so far:

static void extract_files (int argc, char *argv[])
{

  int fd;
  int new_file_fd;
  int num_read = 0;
  int new_file_size;

  struct ar_hdr current_header;

  char name_buffer[16];
  char date_buffer[12];
  char uid_buffer[6];
  char gid_buffer[6];
  char mode_buffer[8];
  char size_buffer[10];
  char fmag_buffer[2];

  // grab the fd #
  fd = open(argv[2], O_RDWR | O_CREAT, 0666);

  // go to the first header
  lseek(fd, SARMAG, SEEK_CUR);

  // store the number of bits read in a struct current_header
  // until its size equal to the size of the entire
  // header, or in other words, until the entire
  // header is read
  while ((num_read = read(fd, (char*) &current_header, 
    sizeof(struct ar_hdr))) == sizeof(struct ar_hdr))
  {

    // scans the current string in header and stores
    // in nameStr array
    sscanf(current_header.ar_name, "%s", name_buffer);
    sscanf(current_header.ar_date, "%s", date_buffer);
    sscanf(current_header.ar_uid, "%s", uid_buffer);
    sscanf(current_header.ar_gid, "%s", gid_buffer);

    int mode;
    sscanf(current_header.ar_mode, "%o", &mode);
    sscanf(current_header.ar_size, "%s", size_buffer);
    int size = atoi(size_buffer);
    sscanf(current_header.ar_fmag, "%s", fmag_buffer);

    // Create a new file
    new_file_fd = creat(name_buffer, mode);
    // Grab new file size
    new_file_size = atoi(size_buffer);

    int io_size; // buffer size
    char buff[size];
    int read_cntr = 0;

    // from copy.c
    while ((io_size = read (fd, buff, new_file_size)) > 0)
    {
      read_cntr++;
      if (read_cntr > new_file_size)
        break;
      write (new_file_fd, buff, new_file_size);
    }

    close(new_file_fd);
    printf("%s\n", name_buffer);
    printf("%s\n", date_buffer);
    printf("%s\n", uid_buffer);
    printf("%s\n", gid_buffer);
    printf("%s\n", mode_buffer);
    printf("%s\n", size_buffer);
    printf("%s\n", fmag_buffer);

    /* Seek to next header. */
    lseek(fd, atoi(current_header.ar_size) + (atoi(current_header.ar_size)%2), SEEK_CUR);
  }

}

The issue I am having lies in the second while loop in the above code:

    // from copy.c
while ((io_size = read (fd, buff, new_file_size)) > 0)
{
  read_cntr++;
  if (read_cntr > new_file_size)
    break;
  write (new_file_fd, buff, new_file_size);
}

For some reason, the files written in this while loop don't run to the length specified by write. The third argument for the standard read()/write() should be the number of bytes to write. For some reason though, my code results in the entire archive being read in and written into the first file.

If I open up the resulting "yo" file, I find the entire archive file has been written to it

test1 lol
yo2             1382105444  501   20    100644  10        `
test2 lol

instead of terminating after reading 10 bytes and giving the expected outcome "test1 lol".

I can also confirm that the "new_file_size" value is indeed 10. So my question is: what am I reading wrong about this while loop?

Note: Expected input would be a command line argument that looks something like: ./extractor.c -x name_of_archive_file

The only relevant information I think I need to deal with in this function is the name of the archive file which I get the fd for at the beginning of extract_files.

Added: Misc -- the output from when this is run:

yo
1382105439
501
20
X
10
`

As you can see, it never sees the yo2 file or prints out its header because it gets written to "yo" before that can happen...because of this stray while loop :(

Andrew Barber
  • 39,603
  • 20
  • 94
  • 123
ns1
  • 117
  • 2
  • 3
  • 10
  • 1
    Some hints: compile with warnings enabled (`-Wall -Wextra` with gcc) *and fix them*. Also, use code autoformatting or autoindenting (if you don't know how to do it with your editor/IDE, learn, and if it can't do it, switch to something which can). – hyde Oct 18 '13 at 19:28
  • Another thing: when parsing any input with any `scanf` function, check the return value to make sure it got value to *all* arguments. – hyde Oct 18 '13 at 19:32
  • Thanks mate. I went ahead and fixed the relevant warning I had (using the wrong mode variable for creat()) but it didn't seem to do much good . Thanks for the general tip though. – ns1 Oct 18 '13 at 19:33
  • 1
    Also, you relying on buffer overflow not happening seems a bit bold. When you `sscanf` the last item, are you sure it is followed by whitespace or `'\0'` in the source string, so `sscanf` knows to stop? Also, all your small buffers, are you sure they all have enough space? You should specify target buffer size to `sscanf`, including room for `'\0'`! – hyde Oct 18 '13 at 19:44
  • I am pretty sure they have enough space for the respective fields of current_header that I am grabbing based on the structure of ar.h. So if you look at the above output, it appears sscanf() captures each item in the header properly and the problem does lie in the while loop. That being said, I'll definitely go back and add some stronger error handling if I'm able to get the writes working for this case. – ns1 Oct 18 '13 at 19:55
  • 1
    Perhaps you should just step through the code with debugger... :) And see return values of all your `scanf`, `read`, `write` etc calls. Then you can immediately see if there's error at some point. – hyde Oct 18 '13 at 19:58
  • 1
    Don't forget to check what happens when you replace `yo` with `supercalifragilisticexpialidocious`. Different versions of `ar` have different techniques for handling long file names. Also make sure you test files with both even and odd file lengths; again, my recollection is that the odd lengths are null-padded to an even length in the archive file. You'll be OK as long as you read the exact length, but you may also have to skip a (zero) byte to get to the start of the next file header. It's also worth checking your code on some binary data (eg the object file or the program you're testing). – Jonathan Leffler Oct 19 '13 at 00:12

2 Answers2

1

Your while() loop should probably have braces ({ ... }) after it, otherwise you're just incrementing read_cntr without doing anything else.

Christian Ternus
  • 8,406
  • 24
  • 39
1

You read a value, size_buffer, and assign it to size and new_file_size, you also create a buffer[size] of that same size,

int size = atoi(size_buffer);
sscanf(current_header.ar_fmag, "%s", fmag_buffer);
//...
new_file_size = atoi(size_buffer);
//...
char buff[size];

Read returns a ssize_t count of bytes in range [0..new_file_size], which you set into io_size, realize that read(2) may return < new_file_size bytes, which is why you need the while loop. So you need to write everything you have read, until you reach your write limit. I have made some comments to guide you.

// from copy.c
while ((io_size = read (fd, buff, new_file_size)) > 0)
{
    read_cntr++;
    //perhaps you mean read_cntr += io_size;
    //you probably mean to write io_size bytes here, regardless
    //write(new_file_fd, buff, io_size);
    if (read_cntr > new_file_size) //probably you want >= here
        break;
    //you may have broke before you write...
    write (new_file_fd, buff, new_file_size);
}

A more typical idiom for this copy would be something where you pick a read/write buffer size, say 4*1024 (4K), 16*1024 (16K), etc, and read that blocksize, until you have less than that blocksize remaining; for example,

//decide how big to make buffer for read()
#define BUFSIZE (16*1024) //16K
//you need min(
#define min(x,y) ( ((x)<(y)) ? (x) : (y) )
ssize_t fdreader(int fd, int ofd, ssize_t new_file_size )
{
    ssize_t remaining = new_file_size;
    ssize_t readtotal = 0;
    ssize_t readcount;
    unsigned char buffer[BUFSIZE];
    for(  ; readcount=read(fd,buffer,min(sizeof(buffer),remaining));  )
    {
        readtotal += readcount;
        if( readcount > remaining ) //only keep remaining
            readcount = remaining;
        write( ofd, buffer, readcount);
        remaining -= readcount;
        if( remaining <= 0 ) break; //done
    }
    return readtotal;
}

Try this,

#include<stdio.h>
#include<stdlib.h>

void usage(char*progname)
{
    printf("need 2 files\n");
    printf("%s <infile> <outfile>\n",progname);
}

//decide how big to make buffer for read()
#define BUFSIZE (16*1024) //16K
//you need min(
#define min(x,y) ( ((x)<(y)) ? (x) : (y) )
ssize_t fdreader(int fd, int ofd, ssize_t new_file_size )
{
    ssize_t remaining = new_file_size;
    ssize_t readtotal = 0;
    ssize_t readcount;
    unsigned char buffer[BUFSIZE];
    for(  ; readcount=read(fd,buffer,min(sizeof(buffer),remaining));  )
    {
        readtotal += readcount;
        if( readcount > remaining ) //only keep remaining
            readcount = remaining;
        write( ofd, buffer, readcount);
        remaining -= readcount;
        if( remaining <= 0 ) break; //done
    }
    return readtotal;
}

int main(int argc,char**argv)
{
    int i=0; /* the infamous 'i' */
    FILE*infh;
    FILE*outfh;

    if( argc < 3 )
    {
        usage(argv[0]);
        return 0;
    }

    printf("%s %s\n",argv[1],argv[2]); fflush(stdout);
    if( !(infh=fopen(argv[1],"r")) )
    {
        printf("cannot open %s\n",argv[2]); fflush(stdout);
        return(2);
    }
    if( !(outfh=fopen(argv[2],"w+")) )
    {
        printf("cannot open %s\n",argv[3]); fflush(stdout);
        return(3);
    }

    int x = fdreader(fileno(infh), fileno(outfh), 512 );

    return 0;
}
ChuckCottrill
  • 4,360
  • 2
  • 24
  • 42
  • That seemed to fix the biggest portion of my problem. Thanks so much! – ns1 Oct 18 '13 at 21:11
  • When I applied your idiom, it seemed to write past the indicated "new_file_size". Any idea why that might happen? – ns1 Oct 19 '13 at 01:02