0

I have the following code which basically reproduce the functionality of the wc command in linux. My question is how I can rewrite the code using mmap? I know I can use struct stat sb; and then char *file_in_memory = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); but I can't get it work/ I don't know how to implement it correctly in the while loop while ((n = read(file, buffer, LUNG_BUF - 1)) > 0). In my tries after I run the code it will display only values of 0.

#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>

#define LUNG_BUF 4096

int main(int argc, char** argv)
{
  int bytes = 0;
  int words = 0;
  int newLine = 0;
  int max_value;  // the maximum of the above three
  int dim;        // string width of the max value

  char buffer[LUNG_BUF];
  enum states { WHITESPACE, WORD };
  int state = WHITESPACE;
  if ( argc !=2 )
  {
    printf( "No file name\n%s", argv[0]);
  }
  else
  {
    int file = open(argv[1], O_RDONLY);

    if(file < 0)
    {
      printf("can not open :%s\n",argv[1]);
    }
    else
    {
      char *thefile = argv[1];
      size_t n;

      while ((n = read(file, buffer, LUNG_BUF - 1)) > 0)
      {
        buffer[n] = '\0';
        char *ptr = buffer;

        while (*ptr)
        {
          bytes++;
          if (*ptr == ' ' || *ptr == '\t')
          {
            state = WHITESPACE;
          }
          else if (*ptr == '\n')
          {
            newLine++;
            state = WHITESPACE;
          }
          else
          {
            if (state == WHITESPACE)
            {
                words++;
            }
            state = WORD;
          }
          ptr++;
        }
      }

      // find out the largest value of all and determine the printed width of it
      max_value = newLine;
      if (words > max_value)
        max_value = words;
      if (bytes > max_value)
        max_value = bytes;
      dim = snprintf(NULL, 0, "%d", max_value);

      // print lines, words, bytes and filename aligned to the longest number
      printf("%*d %*d %*d %s\n", dim, newLine, dim, words, dim, bytes, thefile);
    }
  }
}

The script that I was trying:

#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>

#define LUNG_BUF 4096

int main(int argc, char** argv)
{
  int bytes = 0;
  int words = 0;
  int newLine = 0;
  int max_value;  // the maximum of the above three
  int dim;        // string width of the max value

  char buffer[LUNG_BUF];
  enum states { WHITESPACE, WORD };
  int state = WHITESPACE;
  if ( argc !=2 )
  {
    printf( "No file name\n%s", argv[0]);
  }
  else
  {
    int file = open(argv[1], O_RDONLY);

    if(file < 0)
    {
      printf("can not open :%s\n",argv[1]);
    }
    else
    {
      char *thefile = argv[1];
      size_t n;
      struct stat sb;
      char *file_in_memory = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
      for(int i=0;i<=sb.st_size;i++)
      {
        buffer[i] = '\0';
        char *ptr = buffer;
        while (*ptr)
        {
          bytes++;
          if (*ptr == ' ' || *ptr == '\t')
          {
            state = WHITESPACE;
          }
          else if (*ptr == '\n')
          {
            newLine++;
            state = WHITESPACE;
          }
          else
          {
            if (state == WHITESPACE)
            {
                words++;
            }
            state = WORD;
          }
          ptr++;
        }
      }

      // find out the largest value of all and determine the printed width of it
      max_value = newLine;
      if (words > max_value)
        max_value = words;
      if (bytes > max_value)
        max_value = bytes;
      dim = snprintf(NULL, 0, "%d", max_value);

      // print lines, words, bytes and filename aligned to the longest number
      printf("%*d %*d %*d %s\n", dim, newLine, dim, words, dim, bytes, thefile);
      munmap(file_in_memory, sb.st_size);
      close(file);
    }
  }
}
gameloverr2
  • 85
  • 2
  • 17
  • Please post your attempt with `mmap`, not just your old program with `read`. – Joseph Sible-Reinstate Monica May 03 '20 at 17:09
  • 3
    `struct stat sb; char *file_in_memory = mmap(NULL, sb.st_size` `sb` is uninitialized. – KamilCuk May 03 '20 at 17:41
  • 1
    You have to actually call `stat(2)` to fill in the `struct stat`. Compiler warnings should have told you that `sb` was uninitialized; are they turned on? – Nate Eldredge May 03 '20 at 17:45
  • regarding: `size_t n; while ((n = read(file, buffer, LUNG_BUF - 1)) > 0)` the function: `read()` returns a `ssize_t` not a `size_t` (I.E. a signed value.) This is because the function: `read()` can return a <0 value when an error occurs or even 0 when the sender closes the connection – user3629249 May 04 '20 at 15:29
  • the function: `wc` can work from either a command line parameter or from `stdin`, like `cat myfile.txt | wc` or `wc < myfile.txt`. The posted code does not handle that scenario. – user3629249 May 04 '20 at 15:32
  • regarding: `while ((n = read(file, buffer, LUNG_BUF - 1)) > 0)` Since the variable `n` is 'unsigned', when an error occurs, `n` will be seen as if containing a VERY large value – user3629249 May 04 '20 at 15:34
  • regarding: `if (*ptr == ' ' || *ptr == '\t') { state = WHITESPACE; }` what about punctuation? like `.` `,` `;` `:` etc. – user3629249 May 04 '20 at 15:36
  • regarding: `else if (*ptr == '\n') { newLine++; state = WHITESPACE; }` This does not properly count the number of lines when there is no 'newline' at the end of the file – user3629249 May 04 '20 at 15:40

1 Answers1

2

The code you posted above didn't compile and had quite a few problems. I've tidied it up a bit below, hopefully this will help. I tried not to change it too much so you could see what I did.

You hadn't actually called stat and the fd variable you had passed to mmap was not the variable you used to open the file. I would always compile your code with "-Wall -Werror" if you can.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>

int main(int argc, char** argv)
{

  if ( argc !=2 )
  {
    printf( "No file name\n%s", argv[0]);
    exit(-1);
  }

  char *fileName = argv[1];

  int file = open(fileName, O_RDONLY);

  if(file < 0)
  {
    perror("Error: ");
    exit(-1);
  }

  struct stat sb = {0};
  stat(fileName, &sb);
  char *filePtr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, file, 0);
  if (filePtr == MAP_FAILED)
  {
      perror("Error:");
      exit(-1);
  }

  int bytes = sb.st_size;
  int words = 0;
  int newLine = 0;

  enum states { WHITESPACE, WORD };
  int state = WHITESPACE;

  for(size_t pos=0;pos<=sb.st_size;pos++)
  {
      if (state == WHITESPACE)
      {
          if (filePtr[pos] == '\n')
          {
              newLine++;
          }
          else if ((filePtr[pos] != ' ') && (filePtr[pos] != '\t'))
          {
              state = WORD;
          }
      }
      else // (state == WORD)
      {
        if (filePtr[pos] == ' ' || filePtr[pos] == '\t')
        {
            state = WHITESPACE;
            words++;
        }
        else if (filePtr[pos] == '\n')
        {
            state = WHITESPACE;
            words++;
            newLine++;
        }
      }
  }

  // Max value is always bytes

  int dim = snprintf(NULL, 0, "%d", bytes);

  // print lines, words, bytes and filename aligned to the longest number
  printf("%*d %*d %*d %s\n", dim, newLine, dim, words, dim, bytes, fileName);

  munmap(filePtr, sb.st_size);
  close(file);
}
CodeWash
  • 155
  • 6
  • In your opinion using mmap is more efficient? – gameloverr2 May 03 '20 at 19:15
  • 1
    I used mmap in this answer, purely because you did. It can be faster, depending on what you are doing. I would benchmark both ways if performance really mattered but I would only optimize if your application is too slow. – CodeWash May 03 '20 at 19:44