0

I am currently writing a program that is supposed to receive input from a text file and output statistics about the text such as the number of letters, size of words and how often they occur, and how many times each word occurs. However, every time I run the program, I get a segmentation fault error. The program runs until I hit the line Letter Count Analysis. The I receive the segmentation fault error. Here is some sample text:

1 Hello my name is Bob I live in Canada

The number represents how many lines are supposed to be read. What should I do to correct my issue? I am very new to programming so I'm sure it is something basic.

 #include <stdio.h>
 #include <string.h>


#define MAX_LINE_LENGTH 80
#define MAX_WORD_LENGTH 20
#define MAX_LINES 10

void letterAnalysis(char [][MAX_LINE_LENGTH], int lineTotal);
int wordLengthAnalysis(char [][MAX_LINE_LENGTH], int lineTotal, int wordLength);
void wordAnalysis(char [][MAX_LINE_LENGTH], int lineTotal);


int main (void){

    int lineTotal, wordSize;
    char text[lineTotal][MAX_LINE_LENGTH];
    char n[1];

    fgets(n, 10, stdin);
    lineTotal = n[0] - '0';


    for(int i = 0; i < lineTotal; i++){
        fgets(text[i], MAX_WORD_LENGTH, stdin);
    }

    printf("\n***Letter count analysis***\n");
    letterAnalysis(text, lineTotal);

    printf("\n***Word length analysis***\n");
    for (int i = 1; i <= MAX_WORD_LENGTH; i++){
        wordSize = wordLengthAnalysis(text, lineTotal, i);
        if (wordSize == 1){
            printf("\n%-2d\tword of length %d", wordSize, i);
        }
        else{
            printf("\n%-2d\twords of length %d", wordSize, i);
        }
    }

    printf("\n\n***Word analysis***\n");
    wordAnalysis(text, lineTotal);

    return 0;

}

void letterAnalysis(char text[][MAX_LINE_LENGTH], int lineTotal){

    int alphabet[26] = {0};

    for (int i = 0; i < lineTotal; i++){
        for(int j = 0; j < MAX_LINE_LENGTH; j++){
            switch(text[i][j]){
                case 'A': case 'a':
                alphabet[0]++;
                break;
                case 'B': case 'b':
                alphabet[1]++;
                break;
                case 'C': case 'c':
                break;
                alphabet[2]++;
                case 'D': case 'd':
                alphabet[3]++;
                break;
                case 'E': case 'e':
                alphabet[4]++;
                break;
                case 'F': case 'f':
                alphabet[5]++;
                break;
                case 'G': case 'g':
                alphabet[6]++;
                break;
                case 'H': case 'h':
                alphabet[7]++;
                break;
                case 'I': case 'i':
                alphabet[8]++;
                break;
                case 'J': case 'j':
                alphabet[9]++;
                break;
                case 'K': case 'k':
                alphabet[10]++;
                break;
                case 'L': case 'l':
                alphabet[11]++;
                break;
                case 'M': case 'm':
                alphabet[12]++;
                break;
                case 'N': case 'n':
                alphabet[13]++;
                break;
                case 'O': case 'o':
                alphabet[14]++;
                break;
                case 'P': case 'p':
                alphabet[15]++;
                break;
                case 'Q': case 'q':
                alphabet[16]++;
                break;
                case 'R': case 'r':
                alphabet[17]++;
                break;
                case 'S': case 's':
                alphabet[18]++;
                break;
                case 'T': case 't':
                alphabet[19]++;
                break;
                case 'U': case 'u':
                alphabet[20]++;
                break;
                case 'V': case 'v':
                alphabet[21]++;
                break;
                case 'W': case 'w':
                alphabet[22]++;
                break;
                case 'X': case 'x':
                alphabet[23]++;
                break;
                case 'Y': case 'y':
                alphabet[24]++;
                break;
                case 'Z': case 'z':
                alphabet[25]++;
                break;
            }       
        }
    }

    for(int i = 0; i <= 25; i++){
        printf("%c: \t%d\n",'a' + i, alphabet[i]);;
    }
}

int wordLengthAnalysis(char text[][MAX_LINE_LENGTH], int lineTotal, int wordLength){

    int sentenceLength;
    int counter, wordSize = 0;

    for(int i = 0; i < lineTotal; i++){
        sentenceLength = strlen(&text[i][0]);
        for(int j = 0; j < sentenceLength + 2; j++){
            if(text[i][j] == ' '){
                if(counter == wordLength){
                    ++wordSize;
                    counter = 0;
                }
                else{
                counter = 0;
                }
            }
            else{
                counter++;
            }
        }
    }

    return wordSize;
}

void wordAnalysis(char text[][MAX_LINE_LENGTH], int lineTotal){

    char maxWords[800];
    char word[MAX_LINE_LENGTH], word2[MAX_WORD_LENGTH], *ptrText, *ptrTextCounter;
    int counter, textCounter = 0;
    int sentenceLength, wordTracker;
    int lineFlag;

    for(int i = 0; i < lineTotal; i++){
        ptrText = &text[i][0];
        sentenceLength = strlen(ptrText);
        counter = 0;

        for (int j = 0; j < sentenceLength + 1; j++){
            wordTracker = 1;

            if (text[i][j] == ' ' ){
                if (counter != 0){
                    sprintf(word, "%.*s", counter, ptrText);
                    ptrTextCounter = &text[i][j+1];
                    lineFlag = j;

                if(strstr(maxWords, word) == NULL){
                    for (int k = i; k < lineTotal; k++){
                        textCounter = 0;

                        if (lineFlag == j){
                            ptrTextCounter = &text[i][j+1];
                        }
                        else{
                            lineFlag = 0;
                            ptrTextCounter = &text[i][j+1];
                        }

                        for ( ; lineFlag < sentenceLength; lineFlag++){
                            if(text[k][lineFlag] == ' '){
                                if (textCounter != 0){
                                    if(textCounter == counter){
                                        sprintf(word2, "%.*s", textCounter, ptrTextCounter);
                                            if(strcmp(word, word2) == 0){
                                                wordTracker++;
                                            }
                                    }
                                    ptrTextCounter = &text[k][lineFlag];
                                    textCounter = 0;
                                }
                                else{
                                    ptrTextCounter = &text[k][lineFlag+1];
                                }
                            }
                            else{
                                textCounter++;
                            }
                        }
                    }

                    if(wordTracker == 1){
                        printf("\n\"%.*s\"\t\tappeared %d time", counter, ptrText, wordTracker);
                    }
                    else{
                        printf("\n\"%.*s\"\t\tappeared %d time", counter, ptrText, wordTracker);
                    }
                }


                strcat(maxWords, word);
                    ptrText = &text[i][j+1];
                    counter = 0;
                }
                else{
                    ptrText = &text[i][j+1];
                }
            }
            else{
                counter++;
            }
        }

    }


}
Noah210012
  • 89
  • 9
  • this switch: `switch(text[i][j]){` and all its' cases can be reduced to: `if( isalpha(text[i][j]) { alphabet[ tolower(text[i][j]) - 'a' ]++;}` Note: `tolower()` and `isalpha()` can be found in `ctypes.h` – user3629249 Mar 11 '17 at 22:15
  • Generally, a line like: `for(int i = 0; i <= 25; i++){` would be written as: `for(int i = 0; i < 26; i++){` and more generically as: `for(int i = 0; i < sizeof( alphabet); i++){ – user3629249 Mar 11 '17 at 22:20
  • this line: `printf("%c: \t%d\n",'a' + i, alphabet[i]);;` contains an extra trailing semicolon `;` and will not exactly reproduce the incoming characters, especially an incoming capital characters. – user3629249 Mar 11 '17 at 22:22
  • the function: `fgets()` will not set any unused input buffer characters to some known value. so this line: `for(int j = 0; j < MAX_LINE_LENGTH; j++){` should be: `for(int j = 0; text[i][j]; j++){` which will terminate when it sees the NUL byte string terminator (which `fgets()` placed at the end of the input string. HOWEVER, `fgets()` will have input the stdin trailing newline '\n' and placed it into the input buffer, so that char needs to be removed from each input line – user3629249 Mar 11 '17 at 22:27
  • regarding these two lines: `for(int i = 0; i < lineTotal; i++){ fgets(text[i], MAX_WORD_LENGTH, stdin);` what if the user wants to stop before inputting `lineTotal` lines? What if the user inputs an EOF? Suggest: while( i – user3629249 Mar 11 '17 at 22:33
  • there are several other problems in the posted code beyond those listed in the above comments. Suggest OP walk through the code with a debugger so they can see what is actually happening. – user3629249 Mar 11 '17 at 22:42

1 Answers1

5

I get a couple of warnings:

main.c:17:14: warning: variable length array used [-Wvla]
    char text[lineTotal][MAX_LINE_LENGTH];
             ^
main.c:17:15: warning: variable 'lineTotal' is uninitialized when used here [-Wuninitialized]
    char text[lineTotal][MAX_LINE_LENGTH];
              ^~~~~~~~~

You haven't initialized lineTotal but are using it. This causes undefined behavior.

main.c:64:21: warning: code will never be executed [-Wunreachable-code]
                    alphabet[2]++;
                    ^~~~~~~~

Your break; is likely misplaced.

main.c:152:20: warning: variable 'counter' may be uninitialized when used here [-Wconditional-uninitialized]
                if(counter == wordLength){
                   ^~~~~~~

Again, you're using a potentially uninitialized variable.

Also:

char n[1];

fgets(n, 10, stdin);

Your array has one element but you tell fgets it can access up to n[10].

Hint (if it wasn't obvious already): never program C without warnings.

Emil Laine
  • 41,598
  • 9
  • 101
  • 157
  • Thank you tuple_cat! That cleared up a few things. I am still not getting the desired output however but no more segmentation faults at least! – Noah210012 Mar 10 '17 at 20:45