2

This program works as a spell checker, it reads a dictionary file to load into the hash table, then reads another text file that will be read and will check every word if it is in the hash table, if not then it is considered a misspelled word. All of my functions seem to work except for my check function, when I run it the number of misspelled words is always the same as the number of words in text. This was working before but I changed the hash function because this hash function was said to be better to assign the values into unique index, but after changing just the hash function the check function doesn't work anymore.

// Implements a dictionary's functionality
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#include "dictionary.h"

// Represents a node in a hash table
typedef struct node
{
    char word[LENGTH + 1];
    struct node *next;
}
node;

// Number of buckets in hash table
const unsigned int N = 200000;

// Hash table
node *table[N];

// Returns true if word is in dictionary else false
bool check(const char *word)
{
    // TODO
    int len = strlen(word);
    char copy[len + 1];
    // change into lowercase the word
    for (int i = 0; i != '\0'; i++)
    {
        copy[i] = tolower(word[i]);
    }
    // get the index by using the hash function
    int index = hash(copy);
    if (table[index] == NULL)
    {
        return false;
    }

    node *tmp = table[index];
    // check if the word is in the hash table
    while (tmp != NULL)
    {
        if (strcmp(tmp->word, copy) == 0)
        {
            return true;
        }

        tmp = tmp->next;
    }

    return false;
}

// Hashes word to a number
unsigned int hash(const char *word)
{
    /* credits to...
     *https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/
     */
    unsigned int hash = 0;
    for (int i = 0, n = strlen(word); i < n; i++)
    {
        hash = (hash << 2) ^ word[i];
    }
    return hash % N;
}

// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
    // TODO
    char *words = malloc(sizeof(char) * (LENGTH + 1));
    if (words == NULL)
    {
        return 1;
    }
    // initialize the hash table to NULL
    for (int i = 0; i < N; i++)
    {
        table[i] = NULL;
    }

    // open dictionary file
    FILE *indata = fopen(dictionary, "r");

    // 1 character for '\0' and another for '\n' because fgets takes a trailing new line
    // when it reads 'man' the value of words will be "man\n\0" so meaning 2 extra characters
    while (fgets(words, LENGTH + 2, indata) != NULL)
    {
        // get the index by using the hash function
        int index = hash(words);
        // allocate memory for the newNode
        node *newNode = malloc(sizeof(node));
        if (newNode == NULL)
        {
            return false;
        }

        // get rid of the trailing new line from fgets
        words[strlen(words) - 1] = '\0';
        strcpy(newNode->word, words);
        // make the newNode the head of the list
        newNode->next = table[index];
        table[index] = newNode;
    }

    // free memory and close the opened file
    free(words);
    fclose(indata);
    return true;
}

// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void)
{
    // TODO
    // counter of words loaded
    unsigned int counter = 0;
    // loop through the hash table
    for (int i = 0; i < N; i++)
    {
        node *tmp = table[i];

        while (tmp != NULL)
        {
            counter++;
            tmp = tmp->next;
        }
    }
    return counter;
}

// Unloads dictionary from memory, returning true if successful else false
bool unload(void)
{
    // TODO
    // loop through the whole hash table
    for (int i = 0; i < N; i++)
    {
        while (table[i] != NULL)
        {
            node *tmp = table[i]->next;
            free(table[i]);
            table[i] = tmp;
        }
    }
    return true;
}
chqrlie
  • 131,814
  • 10
  • 121
  • 189
Ojou Nii
  • 244
  • 4
  • 11
  • 1
    Suggest you `\0` terminate the `copy` in `check()`. – Chris Hall Apr 17 '20 at 09:33
  • did you mean my for loop?? i != '\0' I changed it already i used word[i] != '\0' and even i < len; and it didn't make a difference... – Ojou Nii Apr 17 '20 at 10:29
  • 1
    Your `hash()` requires a `'\0'` terminated string. You don't seem to `'\0'` terminate `copy`. I agree that `i != '\0'` is a mistake... I missed that one. – Chris Hall Apr 17 '20 at 10:42

1 Answers1

0

There are multiple problems in your code:

  • The definition node *table[N]; is invalid in C because N must be a compile time constant expression. const unsigned int N = 200000; fits this constraint in C++, but not in C. N must be a macro or an enum definition.

  • in check(), the loop to copy the string as lowercase is incorrect: for (int i = 0; i != '\0'; i++) should be for (int i = 0; word[i] != '\0'; i++)

  • in check() you do not null terminate the string you build in copy. copy is allocated with malloc(), it is uninitialized, so the null terminator must be set explicitly.

  • the char argument in tolower(word[i]) must be cast as tolower((unsigned char)word[i]) to avoid undefined behavior on negative char values, should the char be signed on your platform.

  • in load(), the words array is allocated with a length of LENGTH+1 bytes, but you pass LENGTH+2 to fgets as the buffer size, causing potential undefined behavior if the dictionary contains a line with LENGTH characters.

  • in load(), hash(words) is called before stripping the newline at the end of the line. Hence the hash code is incorrect and the word will not be found in the dictionary because it is stored in the wrong bucket.

Here is a modified version:

// Implements a dictionary's functionality
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#include "dictionary.h"

// Represents a node in a hash table
typedef struct node {
    struct node *next;
    char word[LENGTH + 1];
} node;

// Number of buckets in hash table
#define HASH_SIZE  200000

// Hash table
node *table[HASH_SIZE];

// Hashes word to a number
unsigned int hash(const char *word) {
    /* credits to...
     *https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/
     */
    unsigned int hash = 0;
    for (int i = 0; word[i] != '\0'; i++) {
        hash = (hash << 2) ^ word[i];
    }
    return hash % HASH_SIZE;
}

// Returns true if word is in dictionary else false
bool check(const char *word) {
    char copy[LENGTH + 1];
    int i, len = strlen(word);

    if (len > LENGTH)
        return false;

    // change into lowercase the word
    for (i = 0; word[i] != '\0'; i++) {
        copy[i] = (char)tolower((unsigned char)word[i]);
    }
    copy[i] = '\0';

    // get the index by using the hash function
    int index = hash(copy);
    // check if the word is in the hash table
    for (node *tmp = table[index]; tmp != NULL; tmp = tmp->next) {
        if (strcmp(tmp->word, copy) == 0) {
            return true;
        }
    }
    return false;
}

// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary) {
    // 1 character for '\0' and another for '\n' because fgets takes a trailing new line
    // when it reads 'man' the value of words will be "man\n\0" so meaning 2 extra bytes
    char words[LENGTH + 2];

    // open dictionary file
    FILE *indata = fopen(dictionary, "r");
    if (indata == NULL)
        return false;

    while (fgets(words, sizeof words, indata) != NULL) {
        // get rid of the trailing new line from fgets
        words[strcspn(words, "\n")] = '\0'; 
        // allocate memory for the newNode
        node *newNode = malloc(sizeof(node));
        if (newNode == NULL) {
            fclose(indata);
            return false;
        }
        strcpy(newNode->word, words);
        // get the index by using the hash function
        int index = hash(words);
        // make the newNode the head of the list
        newNode->next = table[index];
        table[index] = newNode;
    }

    // close the opened file
    fclose(indata);
    return true;
}

// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void) {
    // counter of words loaded
    unsigned int counter = 0;
    // loop through the hash table
    for (int i = 0; i < HASH_SIZE; i++) {
        for (node *tmp = table[i]; tmp != NULL; tmp = tmp->next) {
            counter++;
        }
    }
    return counter;
}

// Unloads dictionary from memory, returning true if successful else false
bool unload(void) {
    // loop through the whole hash table
    for (int i = 0; i < HASH_SIZE; i++) {
        while (table[i] != NULL) {
            node *next = table[i]->next;
            free(table[i]);
            table[i] = next;
        }
    }
    return true;
}
chqrlie
  • 131,814
  • 10
  • 121
  • 189