0

Here are two problems in the program First, is that when I uncomment the pthread_join() in the main function, there will be a seg fault, other wise the program will run... Second, is that the output file will be missing the first letter of each word that has stored in the global variable words from last read file. So, for example, there are two files:

one has words "abc abc abc abc abc abc abc abc".

the second has words "def def"

if i input 5 for the second argument when calling a.out, the output in the output file will be abc abc abc abc abc bc bc bc def def This is also a werid thing I could not figure out why.

/* main.c */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <dirent.h>
#include <ctype.h>
#include <pthread.h>
#include "hw3.h"
int index_;
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;

typedef struct files
{
    char *inputfile;
    FILE * outputfile;

} files;

void * readFile( void *arg ){

    files *info = (files *)arg;
    char fileName[80];
    strncat(fileName, (info->inputfile), 79);
    fileName[80] = '\0';
    FILE *outputfd = info->outputfile;
    FILE* fd;
    fd = fopen(fileName, "r");
    if ( fd  == NULL) {
      fprintf(stderr, "ERROR:<open() failed>\n");
    }

    printf("TID %d: Opened \"%s\"\n", (unsigned int)pthread_self(), fileName);
    fflush(stdout);
    int rc;
    char ch[1] = {0};
    char word[80] = {0};
    ch[0] = fgetc(fd);
    pthread_mutex_lock(&mutex);
    while( ch[0] != EOF){
      if( isalnum(ch[0]) ){
      //  char str = ch[0];
      strncat(word, ch, 1);
      }
      else{//it's a word
        if( strlen( word ) >= 2 ){

          words[index_] = word;
          printf("TID %d: Stored \"%s\" in shared buffer at index [%d]\n",(unsigned int)pthread_self(), word, index_ );
          if( index_+ 1 == maxwords ){
            index_ = 0;
            printf("MAIN: Buffer is full; writing %d words to output file\n", maxwords);
            for( unsigned int i = 0; i<maxwords; i++ ){
              rc = fwrite(  words[i], 1, sizeof(words[i]), outputfd  );
              fwrite(  "\n", 1, sizeof("\n"), outputfd );
              if( rc == -1 ){
                fprintf(stderr, "ERRPR:<write() failed>\n");
                //return EXIT_FAILURE;
              }
            }
          }
          else{
            index_ ++;
          }
        }
        for(int i = 0; i< strlen(word); i++){
          word[i] = '\0';
        }
      }
    ch[0] = fgetc(fd);
    }
    pthread_mutex_unlock(&mutex);
    printf("TID %d: Closed \"%s\"; and exiting\n", (unsigned int)pthread_self(), fileName );
    fclose(fd);
    pthread_exit( NULL );
}

int main( int argc, char * argv[] ){

  if(argc != 4){
  fprintf(stderr, "ERROR: Invalid arguments\nUSAGE: ./a.out <input-directory> <buffer-size> <output-file>\n");
    return EXIT_FAILURE;
  }
  //dynamically allocated words buffer with argument 2
  maxwords = atoi(argv[2]);
  words = (char**)calloc(maxwords, sizeof(char*) );

  if ( words == NULL)
  {
    fprintf( stderr, "ERROR:<word calloc() failed\n>" );
    return EXIT_FAILURE;
  }

  printf("MAIN: Dynamically allocated memory to store %d words\n", maxwords);
  fflush(stdout);

  //open/create output file of the third argument
  FILE* outputfd = fopen (argv[3], "w");
  if ( outputfd == NULL )
  {
    perror( "open() failed" );
    return EXIT_FAILURE;
  }
  DIR * dir = opendir( argv[1] );

  if(dir == NULL){
    perror("ERRPR:<opendir() failed>");
    return EXIT_FAILURE;
  }
  chdir(argv[1]);

  printf("MAIN: Opened \"%s\" directory\n", argv[1]);
  fflush(stdout);

  pthread_t tid[10];
  index_ = 0;
  int i = 0;//files index

  struct dirent * file;
  //files allfiles[20];
  char fileName[80];
  int rc;

  //-----------------------------------------------------------------------
  // while loop reads all files in the directory
  while ( ( file = readdir( dir ) ) != NULL )
  {
    struct stat buf;
    rc = lstat( file->d_name, &buf );  /* e.g., "xyz.txt" */
    /* ==> "assignments/xyz.txt" */

    if ( rc == -1 ){
      fprintf(stderr, "ERRPR:<lstat() failed>\n");
      return EXIT_FAILURE;
    }

    if ( S_ISREG( buf.st_mode ) )
    {
    //      printf( " -- regular file\n" );
   //      fflush(stdout);
      strncpy(fileName, file->d_name, 79);

      files info;
      info.inputfile = fileName;
      info.outputfile = outputfd;
      //printf("%d",i);
      printf("MAIN: Created child thread for \"%s\"\n",fileName);
      rc = pthread_create( &tid[i], NULL, readFile,(void *)&info );
      sleep(1);
      i++
    }
    else if ( S_ISDIR( buf.st_mode ) )
    {
    //      printf( " -- directory\n" );
    //      fflush(stdout);
    }
    else
    {
   //      printf( " -- other file\n" );
   //      fflush(stdout);
    }
  }

  closedir(dir);
  printf("MAIN: Closed \"%s\" directory\n", argv[1]);
  fflush(stdout);
  printf("MAIN: Created \"%s\" output file\n",argv[3]);
  fflush(stdout);
  //-----------------------------------------------------------------------

  for( int j = 0; j<i; j++){

      printf( "MAIN: Joined child thread: %u\n", (unsigned int)tid[j] );
      pthread_join(tid[i], NULL);
  }


  for( unsigned int i = 0; i<index_; i++ ){
    int rc = fwrite( words[i], 1, sizeof(words[i]), outputfd );
    if( rc == -1 ){
      fprintf(stderr, "ERRPR:<write() failed>\n");
      return EXIT_FAILURE;
    }
  }
  printf( "MAIN: All threads are done; writing %d words to output file\n", index_);
  fflush(stdout);

  free( words );
  fclose( outputfd );
  return EXIT_SUCCESS;
}

This here is the whole program, and there is a header file which is just two global variab

char ** words = NULL;

/* global/shared integer specifying the size */
/*  of the words array (from argv[2])        */
int maxwords;

Thanks to everyone for the help!

Jimmy Li
  • 53
  • 2
  • 9
  • please indent the code consistently. Note: a 2 space indent will not be wide enough for visibility when using a variable width font. Suggest each indent level be 4 spaces. – user3629249 Nov 12 '17 at 17:25
  • when calling any of the heap allocation functions: (malloc, calloc, realloc) 1) in C, the return type is `void*` which can be assigned to any pointer. Casting just clutters the code, making it more difficult to understand, debug, etc – user3629249 Nov 12 '17 at 17:30
  • when a system function fails, it sets the variable `errno`. Each errno value has a related text string. When reporting an error from a system function, should also report the related text. The function: `perror()` does the job correctly, if using `fprintf()` then need to include the `errno.h` header file and the format string for 'fprintf()` needs to include a second '%s' and a parameter: `strerror(errno)` – user3629249 Nov 12 '17 at 17:34
  • the function: `fgetc()` returns an `int`, not a `char` and can return the value `EOF`, which needs to be checked for. – user3629249 Nov 12 '17 at 17:36
  • the functions: `pthread_mutex_lock()` and `pthread_mutex_unlock()` return an integer result. of 0 when successful and some positive number when they fail. The posted code is failing to check that returned value. – user3629249 Nov 12 '17 at 17:38
  • regarding: `char ch[1] = {0};` and `char word[80] = {0};` the 0 is an integer, not a char, so why is the code trying to initialize character arrays using integers? Suggest using: `'\0' as the initializer value – user3629249 Nov 12 '17 at 17:40
  • regarding: `while( ch[0] != EOF){` this may (or may not) work, depending on if a `char` is signed or unsigned.` – user3629249 Nov 12 '17 at 17:41
  • `for( unsigned int i = 0; i – user3629249 Nov 12 '17 at 17:48
  • regarding: `if( rc == -1 ) { fprintf(stderr, "ERRPR:\n"); //return EXIT_FAILURE; }` the only 'good' returned value from the prior call to `fwrite()` is `sizeof( words[i] )` (which happens to be 1) so should be checking for `!=1` I.E. the returned value could be 0, which means no `word[i]` was written. – user3629249 Nov 12 '17 at 17:51
  • regarding the variable `index_` the trailing underscore just makes the code more difficult to read/understand Suggest removing the trailing underscore. – user3629249 Nov 12 '17 at 17:52
  • this loop: `for(int i = 0; i< strlen(word); i++){ word[i] = '\0'; }` would be better written as: `memset( word, '\0', sizeof( word ) );` – user3629249 Nov 12 '17 at 17:54
  • The call to `pthread_create()` is using the same memory area for each call. So the next call will cause the prior thread instance to be pointing to corrupted data – user3629249 Nov 12 '17 at 18:12
  • what happens if there are more than 10 total files? – user3629249 Nov 12 '17 at 18:14
  • regarding: `printf("MAIN: Closed \"%s\" directory\n", argv[1]); fflush(stdout); printf("MAIN: Created \"%s\" output file\n",argv[3]); fflush(stdout);` the calls to `fflush()` are not needed as the last char in the format string `\n` will cause the stdout buffer to be flushed. – user3629249 Nov 12 '17 at 18:15
  • in the function: `readfiile()`, the variable `words[]` is not visible, as the scope of the actual `words[]` array is limited to the function: `main()` – user3629249 Nov 12 '17 at 18:19
  • the code does not compile, for several reasons. One of those reasons is: ` sleep(1); i++ }` is missing a ';' after the statement: `i++` So the posted code was never executed. – user3629249 Nov 12 '17 at 18:21
  • when compiling, always enable the warnings, then fix those warnings. (for `gcc`, at a minimum use: `-Wall -Wextra -Wconversion -pedantic -std=gnu11` ) – user3629249 Nov 12 '17 at 18:26
  • regarding: char fileName[80]; strncat(fileName, (info->inputfile), 79); fileName[80] = '\0'; 1) the valid index into an array is the range 0...(number of elements in array -1) so accessing element [80] is one past the end of the array, I.E. undefined behavior that can lead to a seg fault event. 2) it should be: char fileName[80]; strncat(fileName, (info->inputfile), 78); The call to strncat() will always append a NUL byte – user3629249 Nov 12 '17 at 18:29

2 Answers2

0

You need separate info objects for each thread. Right now, all of the threads get the same info object, which you change in between creating threads, and therefore, for most of them, by the time they get a chance to look at the name of the file they are supposed to process, it has been changed.

The segmentation fault is being caused by code you have not shown us, so I can't help you with that except to suggest that you apply valgrind.

zwol
  • 135,547
  • 38
  • 252
  • 361
  • thank you very much for your help, I figured that if I put the pthread_create in the while loop, the threads would be accessing different files. However, I still don't know how the pthread_join funtion could corrupt since everything in the readFile function compiled and outputted the results. I updated the question as well. – Jimmy Li Nov 11 '17 at 01:56
  • Moving the `pthread_create` call into the while loop is a good change (now you only have one loop in the "main" thread) but you still need to allocate separate `info` objects for each thread. That is a fundamental requirement of working with threads: whatever data you pass through `pthread_create` to the thread procedure now belongs to the thread procedure and you mustn't touch it from outside. This still isn't a complete program so I still can't help you with the segfault, except to say that your `strncat` call in `readFile` is unsafe, because `fileName` is uninitialized. – zwol Nov 11 '17 at 02:09
  • ... on rereading the main loop, you are now only creating one worker thread at a time, so you technically can get away with one info object, but if you're going to do it that way there's no point having threads at all! The `pthread_join` calls should still be in their own loop, and that means you do still need separate `info` objects for each thread. – zwol Nov 11 '17 at 02:11
  • Yet I updated. I moved moved the "files info;" line into the while loop, so every time there is a new file found, it will create a new "files" object, and store new information to the files object "info", but it didn't really help. And i also moved the join() function out of the while loop and put it into a new for loop. Don't know if that helped. Multithreading is so uncertain. – Jimmy Li Nov 11 '17 at 04:01
  • No, that won't create a separate `info` object for each thread. The simplest option that will actually work is to use `malloc` to allocate a new `info` object. each time around the loop. – zwol Nov 11 '17 at 05:18
  • I tried that and yet still only passing the last file found in the directory to all child threads – Jimmy Li Nov 11 '17 at 07:13
  • 1
    info contains only pointers. Allocating a different info for each thread is not sufficient, the strings pointed at must also become owned by the thread and not mutated anywhere else. Instead of 'char *inputfile;' etc, just allocate actual space! Try 'char inputfile[256];' and strcpy in the names. – Martin James Nov 11 '17 at 20:24
0

Here are two more bugs:

char fileName[80];
strncat(fileName, (info->inputfile), 79);

You can only concatenate onto a string, not an unitialized array of characters that may or may not contain a valid string.

char ch[1] = {0};
char word[80] = {0};
ch[0] = fgetc(fd);
pthread_mutex_lock(&mutex);
while( ch[0] != EOF){

The fgets function returns an integer that will be EOF on end of file, otherwise it returns the character value. You convert it to a char and then compare the char to EOF. But that makes no sense since EOF is the integer value that represents end of file. Once cast to a character, it is a valid character that could have been read from the file since the file can contain any characters and "end of file" is not a character.

David Schwartz
  • 179,497
  • 17
  • 214
  • 278
  • I don't know how both case worked in my program, but them both worked. for the fileName thing, I think it is valid in c, since string in C is just an array of char-s. as for the EOF and char issue, Thanks for the advise! I added a feof(fd) check at the end of the loop to check EOF. – Jimmy Li Nov 11 '17 at 04:17
  • @JimmyLi A string is not "just an array of chars". A string is a sequence of characters stored in consecutive locations with the end of the data marked with a nul character. So your array of chars is *not* a string -- yet you pass it to `strncat` whose first parameter *must* be a string. – David Schwartz Nov 11 '17 at 17:50