I'm trying to create a program that opens a .txt file containing a speech and assigns each word to a space in the array/set based on the hash value. Collisions are accounted for using open addressing method. The program should be able to perform the following functions: add(), remove(), find(), count() which keeps count of the elements IN the array/set, and loadfactor(). A template header.h file was provided that required some filling in, but my unfamiliarity with that style of coding was making it difficult for me to understand it. Below I have provided the code I have so far, and everything seems to be working except the mCount. The speech contains about 300 words but when I run the code, the count output shows 17. I'm assuming the error is in my resizing function but I am unsure.
//hashset.h file
#pragma once
#include <cmath>
#include <functional>
#include <vector>
template <typename TValue, typename TFunc>
class HashSet
{
private:
// Unlike Java, the C++ hashtable array won't be full of null references.
// It will be full of "Entry" objects, each of which tracks its current state
// as EMPTY, FULL (with a value), or NIL (value was once here, but was removed).
class Entry
{
public:
enum EntryState { EMPTY, FULL, NIL };
TValue value;
EntryState state;
Entry() : value(), state(EMPTY) {}
Entry(const TValue& v) : value(v), state(EMPTY) {}
};
TFunc mHash;
std::vector<Entry> mTable;
std::size_t mCount;
public:
// Constructs a hashtable with the given size, using the given function for
// computing h(k).
// hash must be a callable object (function, functor, etc.) that takes a parameter
// of type TValue and returns std::size_t (an integer).
HashSet(int size, TFunc hash) : mHash(hash)
{
// initialize count
mCount = 0;
// hashtable array cannot be same data type as that of what is being stored (cannot be string)
// requirement #4 - if user inputs array size that is not a power of 2, constructor must round to nearest power of 2 value
size = pow(2, (int(log(size - 1) / log(2)) | 1));
mTable.resize(size); // resizes the vector to have given size.
// Each element will be default-constructed to have state EMPTY.
}
void resize(int new_size) {
HashSet aux{ new_size, mHash }; //double the size, same hash function
for (const auto& entry : mTable)
if (entry.state == Entry::FULL && entry.state == Entry::EMPTY && entry.state == Entry::NIL) //there is an element
aux.add(entry.value); //insert it on the new set
*this = aux;
}
// Inserts the given value into the set.
void add(const TValue& value)
{
// Use the type std::size_t for working with hash table indices.
// Invoke the mHash function, passing the key to calculate h(k), as in
// size_t hashCode = mHash(value);
// Mod down to size.
// Go to the table at that index and do the insertion routine.
// Note, if table is full when trying to add an element, it should double in size
// to keep table size a power of 2
if (double(mCount) / mTable.size() > 0.8) // load factor comparison
this->resize(2 * mTable.size()); // call resize function if array is too small to accommodate addition
size_t hashCode = mHash(value) % mTable.size(); // mod value by table size to get starting index
if (mTable[hashCode].state == Entry::EMPTY || mTable[hashCode].state == Entry::NIL) { // NIL space CAN be replaced with value
mTable[hashCode].value = value; // store value in vector index specified by hashCode
mCount++; // increment counter when word is added
}
else {
for (std::size_t i = 1; i < mTable.size(); i++) {
// use open addressing to find next open space
if (mTable[hashCode].state != Entry::EMPTY) {
hashCode = ((mHash(value) % mTable.size()) + ((int)(pow(i, 2) + i) >> 1)) % mTable.size(); // h(k) + f(i) or h(k) + ((i^2 + i)) / 2
}
else if (mTable[hashCode].value == value) { // account for duplicates
break; // exit for-loop
}
else if (mTable[hashCode].state == Entry::EMPTY || mTable[hashCode].state == Entry::NIL) { // NIL space CAN be replaced with value
mTable[hashCode].value = value; // store value in vector index specified by new hashCode
mCount++; // increment counter when word is added
break; // exit for-loop
}
else
break; // exit for-loop
}
}
}
// Returns true if the given value is present in the set.
bool find(const TValue& key)
{
size_t hashCode = mHash(key) % mTable.size(); // mod value by table size to get starting index to do retrace
if (mTable[hashCode].value == key)
return true;
else if (mTable[hashCode].state != Entry::EMPTY || mTable[hashCode].state == Entry::NIL) { // check that set is not empty or has a NIL state
for (std::size_t i = 1; i < mTable.size(); i++) {
// use open addressing again to find key
if (mTable[hashCode].value != key)
hashCode = ((mHash(key) % mTable.size()) + ((int)(pow(i, 2) + i) >> 1)) % mTable.size();
else if (mTable[hashCode].value == key) {
return true; // value found at speecified location
break; // exit for-loop as first instance of value has been found
}
//else if (i == mTable.size()) // end of table reached, element not in set
//return false;
}
}
else // end of table reached, element was not in set
return false;
}
// Removes the given value from the set.
void remove(const TValue& key)
{
size_t hashCode = mHash(key) % mTable.size(); // mod value by table size to get starting index to do retrace
if (mTable[hashCode].value == key) {
mTable[hashCode].value = Entry::NIL; // replace value with NIL so find() op does not return a false when searching for element
mCount--; // decrement element counter
}
else if (mTable[hashCode].state != Entry::EMPTY || mTable[hashCode].state != Entry::NIL) { // check that there is a value to be removed
for (std::size_t i = 1; i < mTable.size(); i++) {
// use open addressing again to find key
if (mTable[hashCode].value != key) {
hashCode = ((mHash(key) % mTable.size()) + ((int)(pow(i, 2) + i) >> 1)) % mTable.size();
}
else {
mTable[hashCode].value = Entry::NIL; // if found after open addressing, replace with NIL
mCount--; // decrement element counter
}
}
}
}
int count() {
return mCount;
}
double loadFactor() {
double a = double(mCount) / mTable.size();
return a;
}
};
// main function
#include "hashset.h"
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
using namespace std;
int main()
{
string testline;
vector<string> word;
HashSet<std::string, std::hash<std::string> > obj1{ 50, std::hash<std::string>{} };
ifstream Test("speech.txt");
if (!Test)
{
cout << "There was an error opening the file.\n";
return 0;
}
//store words in vector
while (Test >> testline) {
word.push_back(testline);
//obj1.add(testline);
}
//output whole vector with position numbers for each entry
cout << "Array contents:\n";
for (int i = 0; i < word.size(); i++) {
obj1.add(word[i]);
cout << word[i] << "(" << i << ")" << endl;
}
cout << "current count: " << obj1.count() << endl;
obj1.add("abcd"); // should hash to 4
if (obj1.find("abcd"))
cout << "abcd is in the set " << endl;
else
cout << "abcd is not in set " << endl;
obj1.add("adcb"); // should hash to 4 then 5 after probing
if (obj1.find("adcb"))
cout << "adcb is in the set " << endl;
else
cout << "adcb is not in set " << endl;
obj1.add("acde"); // should hash to 4 then 7 after probing
if (obj1.find("acde"))
cout << "acde is in the set " << endl;
else
cout << "acde is not in set " << endl;
obj1.remove("adcb"); // 5 should have NIL
if (obj1.find("adcb"))
cout << "adcb is in the set " << endl;
else
cout << "adcb is not in set " << endl;
if (obj1.find("acde"))
cout << "acde is still in the set " << endl;
else
cout << "acde is not in set " << endl;
cout << "final count: " << obj1.count() << endl;
system("pause");
exit(0);
}
}