I would suggest that you use a Trie data structure. It is designed to store words and associate them a value.
The advantages of a trie over a hashmap are the following:
- Lookup for an element is faster
- No collisions
- Easy ways to traverse the trie or return all value by alphabetical order
- Straight-forward implementation (no hash function, no linked lists ...). It's a simple tree.
The memory use is generally lower in a trie than in a hash table, but in the worst case it will use more memory.
An even more efficient data structure for this purpose is the DAWG (or deterministic acyclic finite state automaton), but its construction is much more complex, so if you do not have millions of nodes in your graph I'd suggest you stick to the Trie.
A possible implementation in C would be the following:
Data structure:
#include <stdlib.h>
#include <stdio.h>
#define ALPHABET_SIZE 26
#define IMPOSSIBLE_VALUE -42
typedef struct TrieNode_struct {
struct TrieNode_struct *children[ALPHABET_SIZE];
int value;
} TrieNode_t;
typedef TrieNode_t *Trie_t;
TrieNode_t *new_node() {
TrieNode_t *new_node = malloc(sizeof(TrieNode_t));
new_node->value = IMPOSSIBLE_VALUE;
for (int i = 0; i < ALPHABET_SIZE; i++) {
new_node->children[i] = NULL;
}
return new_node;
}
int char_to_idx(char c){
return c - 'a';
}
Insert a string/value couple in the trie
void trie_insert_rec(TrieNode_t *node, char *str, int val, int depth) {
if (str[depth] == '\0') {
node->value = val;
} else {
if (node->children[char_to_idx(str[depth])] == NULL) {
node->children[char_to_idx(str[depth])] = new_node();
}
trie_insert_rec(node->children[char_to_idx(str[depth])], str, val, depth+1);
}
}
void trie_insert(Trie_t trie, char *str, int val) {
trie_insert_rec(trie, str, val, 0);
}
Search for a value in the trie:
int trie_fetch_rec(TrieNode_t *node, char *str, int depth) {
if (str[depth] == '\0') {
return node->value;
} else if (node->children[char_to_idx(str[depth])] == NULL) {
return IMPOSSIBLE_VALUE;
} else {
return trie_fetch_rec(node->children[char_to_idx(str[depth])], str, depth+1);
}
}
int trie_fetch(TrieNode_t *node, char *str){
return trie_fetch_rec(node, str, 0);
}
Tiny toy-test
int main() {
Trie_t trie = new_node();
char str[5] = "john\0";
trie_insert(trie, str, 11);
printf("%d\n", trie_fetch(trie, str));
}