5

I'm trying to count the number of words in a file with strtok().

/*
 * code.c
 *
 * WHAT
 *      Use strtok() to count the number of words in a file.
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define STRMAX 128

int main() {
    /* Declarations */
    FILE* fptr;
    int iCntr = 0;
    char sLine[STRMAX];
    char* cPToken;

    /* Read file */
    /* Error handler */
    if ((fptr = fopen("/home/ubuntu/Dropbox/Unief/C/H18/Opdr01/Debug/test.txt", "r")) == NULL) {
        printf("Couldn't read test.txt.\n");
        exit(0);
    } else {
        while (fgets(sLine, STRMAX-1, fptr) != NULL) {                  /* Read line */
            while ((cPToken = strtok(sLine, ".,; !?\r\n")) != NULL) {   /* Split into words */
                iCntr++;
            }
        }
        printf("Number of words: %d\n", iCntr);
    }

    /* Always clean up your mess */
    fclose(fptr);
    return 0;
}

This causes an infinite loop. Why?

Pieter
  • 31,619
  • 76
  • 167
  • 242

1 Answers1

9

You need two calls, the second time you need to pass NULL to strtok.

Instead of:

while ((cPToken = strtok(sLine, ".,; !?\r\n")) != NULL) {  /* Split into words */
                iCntr++;
}

do

cPToken = strtok(sLine, ".,; !?\r\n");
while (cPToken != NULL) {   /* Split into words */
     iCntr++; /* we have a valid word */
     cPToken = strtok(NULL, ".,; !?\r\n");          
}

Edit: Full source:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

size_t wcount(const char *fname, const char *delim) {
    char buf[ 512 ];
    size_t nw = 0;
    FILE *fp = fopen(fname, "r");
    if (fp) {
        while (fgets(buf, sizeof buf, fp) != NULL) {
            for (char *w = strtok(buf, delim); w; w = strtok(NULL, delim))
                nw++;
        }
        fclose(fp);
    }
    return nw;
}

int main(int argc, char* argv[])
{
    printf("%u\n", wcount("C:\\sample.txt", ".,; !?\r\n"));
    return 0;
} 

With your input file, I get the result as 16.

Edit# 2: Modifying your source:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define STRMAX 128

int main() {
    /* Declarations */
    FILE* fptr;
    int iCntr = 0;
    char sLine[STRMAX];
    char* cPToken;

    /* Read file */
    /* Error handler */
    if ((fptr = fopen("c:\\test.txt", "r")) == NULL) {
        printf("Couldn't read test.txt.\n");
        exit(0);
    } else {
        while (fgets(sLine, STRMAX-1, fptr) != NULL) {                  /* Read line */
            cPToken = strtok(sLine, ".,; !?\r\n");
            while (cPToken != NULL) {   /* Split into words */
                iCntr++;
                cPToken = strtok(NULL, ".,; !?\r\n");
            }
        }
        printf("Number of words: %d\n", iCntr);
    }

    /* Always clean up your mess */
    fclose(fptr);
    return 0;
}

I get the same result -- 16.

dirkgently
  • 108,024
  • 16
  • 131
  • 187
  • That doesn't accurately represent the number of words in my text file, as far as I can tell. – Pieter Feb 15 '10 at 11:15
  • 1
    @Pieter: You need to call `strtok` a second time. That is how it works. And the first parameter must be `NULL` for the second call (which is usually wrapped in a loop). You may need to rearrange the counter increment. The code I posted was intended to show how to call `strtok` only. – dirkgently Feb 15 '10 at 11:20
  • @Pieter "as far as i can tell" is a little vague feedback, try with a text file with a known word count and use it to verify wether or not the solution works. – kb. Feb 15 '10 at 11:21
  • Text file: `Here are four words. Here are four more! Now we have a total of sixteen words.` This returns `Number of words: 1`. – Pieter Feb 15 '10 at 11:29
  • `for` loops are the best for `strtok` and `strtok_r`. I accidentally introduced a `continue` inside the loop, and was clueless for a few minutes and ventured here. – zapstar Apr 07 '18 at 09:17