I'm programming a spellchecker per CS50's Week 4 problem set, but I ran into an issue. I am leaking memory.
:) dictionary.c exists
:) speller compiles
:) handles most basic words properly
:) handles min length (1-char) words
:) handles max length (45-char) words
:) handles words with apostrophes properly
:) spell-checking is case-insensitive
:) handles substrings properly
:( program is free of memory errors
valgrind tests failed; see log for more information.
As shown above, I'm passing all the tests except the last one.
My code (dictionary.c):
// Implements a dictionary's functionality
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// TODO: Choose number of buckets in hash table
const unsigned int N = 390;
// Hash table
node *table[N];
// # of words in dict
unsigned int numWords = 0;
// Returns true if word is in dictionary (case insensitive), else false
bool check(const char *word)
{
// TODO
int index = hash(word);
node *cursor = table[index];
while (cursor != NULL)
{
if (strcasecmp(word, cursor->word) == 0)
{
return true;
}
cursor = cursor->next;
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO: Improve this hash function
// use a 2d array -> x-axis = first letter of word (a-z), y-axis = size of word (1-15)
// 390 buckets (26 x 15)
int first_letter = tolower(word[0]) - '`'; // gets the value of each inital letter (1-26)
int word_length = strlen(word);
int index = first_letter * word_length;
if (index > N)
{
return N - 1;
}
else
{
return index;
}
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
// TODO
FILE *file = fopen(dictionary, "r");
if (!file) // checks if file can be opened
{
return false;
}
char word_list[LENGTH];
while (fscanf(file, "%s", word_list) != EOF) // reads one word
{
node *node_word = malloc(sizeof(node));
if (node_word == NULL) // checks if there is enough memory for the node
{
return false;
}
strcpy(node_word->word, word_list); // copies the already read word into the node
// prepends each new word in a linked list at table[index]
int index = hash(word_list); // finds which index this word belongs in the hash table
if (table[index] == NULL) // checks if there are any words in linked-list
{
table[index] = node_word; // the root points to newly-"read" word and that word marks beginning of linked list
}
else
{
struct node *next = table[index]; // creates a node that points beginning of linked list at that index
table[index] = node_word; // the root points to our newly-"read" word
node_word->next = next; // newly-"read" word points to "second" (if there is one) word in linked list
}
numWords++; // increments numWords, which is returned in size()
}
fclose(file);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
// TODO
return numWords;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
// TODO
for (int i = 1; i <= N; i++) // starts at one since indexing starts at one
{
node *cursor = table[i];
while (cursor)
{
node *tmp = cursor;
cursor = cursor->next;
free(tmp);
}
}
return true;
}
Basically, I load in a "dictionary" and scan a piece of text to see if any of the words in it are "misspelled" (or not in the aforementioned dictionary). But, the problem is that the dictionary is not being freed or unloaded.
Link to assignment instructions: https://cs50.harvard.edu/x/2023/psets/5/speller/
I tried using Valgrind on different texts and I got varying results. For texts without any misspellings (e.g. cat.txt and pneumonoultramicroscopicsilicovolcanoconiosis.txt) I got:
==24212==
==24212== HEAP SUMMARY:
==24212== in use at exit: 0 bytes in 0 blocks
==24212== total heap usage: 143,096 allocs, 143,096 frees, 8,023,256 bytes allocated
==24212==
==24212== All heap blocks were freed -- no leaks are possible
==24212==
==24212== For lists of detected and suppressed errors, rerun with: -s
==24212== ERROR SUMMARY: 193 errors from 1 contexts (suppressed: 0 from 0)
But when I load in all texts that do have misspellings (the rest of them), I get:
==24578== Process terminating with default action of signal 2 (SIGINT)
==24578== at 0x4A5CA35: write (write.c:26)
==24578== by 0x49D2F6C: _IO_file_write@@GLIBC_2.2.5 (fileops.c:1180)
==24578== by 0x49D4A60: new_do_write (fileops.c:448)
==24578== by 0x49D4A60: _IO_new_do_write (fileops.c:425)
==24578== by 0x49D4A60: _IO_do_write@@GLIBC_2.2.5 (fileops.c:422)
==24578== by 0x49D3754: _IO_new_file_xsputn (fileops.c:1243)
==24578== by 0x49D3754: _IO_file_xsputn@@GLIBC_2.2.5 (fileops.c:1196)
==24578== by 0x49BE049: outstring_func (vfprintf-internal.c:239)
==24578== by 0x49BE049: __vfprintf_internal (vfprintf-internal.c:1593)
==24578== by 0x49A881E: printf (printf.c:33)
==24578== by 0x10964F: main (speller.c:122)
==24578==
==24578== HEAP SUMMARY:
==24578== in use at exit: 8,018,688 bytes in 143,094 blocks
**==24578== total heap usage: 143,096 allocs, 2 frees, 8,023,256 bytes allocated // correct amount of allocs, wrong amount of frees**
==24578==
==24578== 472 bytes in 1 blocks are still reachable in loss record 1 of 4
==24578== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==24578== by 0x49C76CD: __fopen_internal (iofopen.c:65)
==24578== by 0x49C76CD: fopen@@GLIBC_2.2.5 (iofopen.c:86)
==24578== by 0x109388: main (speller.c:55)
==24578==
==24578== 1,024 bytes in 1 blocks are still reachable in loss record 2 of 4
==24578== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==24578== by 0x49C6C23: _IO_file_doallocate (filedoalloc.c:101)
==24578== by 0x49D5D5F: _IO_doallocbuf (genops.c:347)
==24578== by 0x49D4FDF: _IO_file_overflow@@GLIBC_2.2.5 (fileops.c:744)
==24578== by 0x49D3754: _IO_new_file_xsputn (fileops.c:1243)
==24578== by 0x49D3754: _IO_file_xsputn@@GLIBC_2.2.5 (fileops.c:1196)
==24578== by 0x49BD1CC: outstring_func (vfprintf-internal.c:239)
==24578== by 0x49BD1CC: __vfprintf_internal (vfprintf-internal.c:1263)
==24578== by 0x49A881E: printf (printf.c:33)
==24578== by 0x1093D1: main (speller.c:64)
==24578==
==24578== 4,096 bytes in 1 blocks are still reachable in loss record 3 of 4
==24578== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==24578== by 0x49C6C23: _IO_file_doallocate (filedoalloc.c:101)
==24578== by 0x49D5D5F: _IO_doallocbuf (genops.c:347)
==24578== by 0x49D3543: _IO_file_xsgetn (fileops.c:1287)
==24578== by 0x49C7C28: fread (iofread.c:38)
==24578== by 0x10940A: main (speller.c:72)
==24578==
==24578== 8,013,096 bytes in 143,091 blocks are still reachable in loss record 4 of 4
==24578== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==24578== by 0x109A51: load (dictionary.c:80)
==24578== by 0x1092DB: main (speller.c:40)
==24578==
==24578== LEAK SUMMARY:
==24578== definitely lost: 0 bytes in 0 blocks
==24578== indirectly lost: 0 bytes in 0 blocks
==24578== possibly lost: 0 bytes in 0 blocks
==24578== still reachable: 8,018,688 bytes in 143,094 blocks
==24578== suppressed: 0 bytes in 0 blocks
==24578==
==24578== For lists of detected and suppressed errors, rerun with: -s
==24578== ERROR SUMMARY: 543 errors from 1 contexts (suppressed: 0 from 0)
On the bolded line, I noticed the expected amount of allocs (the # of words in the dictionary), but an incorrect amount of frees. I don't know why it only frees 2 times and how it's leaking memory in files that I haven't touched. As instructed, I am not able to change the code of these files: speller.c
// Implements a spell-checker
#include <ctype.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/time.h>
#include "dictionary.h"
// Undefine any definitions
#undef calculate
#undef getrusage
// Default dictionary
#define DICTIONARY "dictionaries/large"
// Prototype
double calculate(const struct rusage *b, const struct rusage *a);
int main(int argc, char *argv[])
{
// Check for correct number of args
if (argc != 2 && argc != 3)
{
printf("Usage: ./speller [DICTIONARY] text\n");
return 1;
}
// Structures for timing data
struct rusage before, after;
// Benchmarks
double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0;
// Determine dictionary to use
char *dictionary = (argc == 3) ? argv[1] : DICTIONARY;
// Load dictionary
getrusage(RUSAGE_SELF, &before);
bool loaded = load(dictionary);
getrusage(RUSAGE_SELF, &after);
// Exit if dictionary not loaded
if (!loaded)
{
printf("Could not load %s.\n", dictionary);
return 1;
}
// Calculate time to load dictionary
time_load = calculate(&before, &after);
// Try to open text
char *text = (argc == 3) ? argv[2] : argv[1];
FILE *file = fopen(text, "r");
if (file == NULL)
{
printf("Could not open %s.\n", text);
unload();
return 1;
}
// Prepare to report misspellings
printf("\nMISSPELLED WORDS\n\n");
// Prepare to spell-check
int index = 0, misspellings = 0, words = 0;
char word[LENGTH + 1];
// Spell-check each word in text
char c;
while (fread(&c, sizeof(char), 1, file))
{
// Allow only alphabetical characters and apostrophes
if (isalpha(c) || (c == '\'' && index > 0))
{
// Append character to word
word[index] = c;
index++;
// Ignore alphabetical strings too long to be words
if (index > LENGTH)
{
// Consume remainder of alphabetical string
while (fread(&c, sizeof(char), 1, file) && isalpha(c));
// Prepare for new word
index = 0;
}
}
// Ignore words with numbers (like MS Word can)
else if (isdigit(c))
{
// Consume remainder of alphanumeric string
while (fread(&c, sizeof(char), 1, file) && isalnum(c));
// Prepare for new word
index = 0;
}
// We must have found a whole word
else if (index > 0)
{
// Terminate current word
word[index] = '\0';
// Update counter
words++;
// Check word's spelling
getrusage(RUSAGE_SELF, &before);
bool misspelled = !check(word);
getrusage(RUSAGE_SELF, &after);
// Update benchmark
time_check += calculate(&before, &after);
// Print word if misspelled
if (misspelled)
{
printf("%s\n", word);
misspellings++;
}
// Prepare for next word
index = 0;
}
}
// Check whether there was an error
if (ferror(file))
{
fclose(file);
printf("Error reading %s.\n", text);
unload();
return 1;
}
// Close text
fclose(file);
// Determine dictionary's size
getrusage(RUSAGE_SELF, &before);
unsigned int n = size();
getrusage(RUSAGE_SELF, &after);
// Calculate time to determine dictionary's size
time_size = calculate(&before, &after);
// Unload dictionary
getrusage(RUSAGE_SELF, &before);
bool unloaded = unload();
getrusage(RUSAGE_SELF, &after);
// Abort if dictionary not unloaded
if (!unloaded)
{
printf("Could not unload %s.\n", dictionary);
return 1;
}
// Calculate time to unload dictionary
time_unload = calculate(&before, &after);
// Report benchmarks
printf("\nWORDS MISSPELLED: %d\n", misspellings);
printf("WORDS IN DICTIONARY: %d\n", n);
printf("WORDS IN TEXT: %d\n", words);
printf("TIME IN load: %.2f\n", time_load);
printf("TIME IN check: %.2f\n", time_check);
printf("TIME IN size: %.2f\n", time_size);
printf("TIME IN unload: %.2f\n", time_unload);
printf("TIME IN TOTAL: %.2f\n\n",
time_load + time_check + time_size + time_unload);
// Success
return 0;
}
// Returns number of seconds between b and a
double calculate(const struct rusage *b, const struct rusage *a)
{
if (b == NULL || a == NULL)
{
return 0.0;
}
else
{
return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) -
(b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) +
((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) -
(b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec)))
/ 1000000.0);
}
}
dictionary.h
// Declares a dictionary's functionality
#ifndef DICTIONARY_H
#define DICTIONARY_H
#include <stdbool.h>
// Maximum length for a word
// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis)
#define LENGTH 45
// Prototypes
bool check(const char *word);
unsigned int hash(const char *word);
bool load(const char *dictionary);
unsigned int size(void);
bool unload(void);
#endif // DICTIONARY_H
This file allows me to use the command 'make' in the terminal as opposed to its longer clang equivalent. Makefile
speller:
clang -ggdb3 -gdwarf-4 -O0 -Qunused-arguments -std=c11 -Wall -Werror -Wextra -Wno-gnu-folding-constant -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wshadow -c -o speller.o speller.c
clang -ggdb3 -gdwarf-4 -O0 -Qunused-arguments -std=c11 -Wall -Werror -Wextra -Wno-gnu-folding-constant -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wshadow -c -o dictionary.o dictionary.c
clang -ggdb3 -gdwarf-4 -O0 -Qunused-arguments -std=c11 -Wall -Werror -Wextra -Wno-gnu-folding-constant -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wshadow -o speller speller.o dictionary.o -lm
Thanks!