I am working on implementing a huffman tree. I am having trouble generating the code dictionary for the tree. Here is what I have so far:
class HuffmanNode:
def __init__(self, char, freq):
self.char = char
self.freq = freq
self.code = None
self.left = None
self.right = None
# add any necessary functions you need
#returns true if tree rooted at node a comes before tree rooted at node b
#function used to determine the order of the nodes in the list of huffman nodes
#input: 2 huffman nodes: a, b
#output: boolean value: True if a comes before b, else False
def comes_before (a, b) :
if a.freq == b.freq:
if a.char < b.char:
return True
else:
return False
if a.freq < b.freq:
return True
else:
return False
#COUNT OCCURENCE
#opens a text file with a givern file name and counts the
#frequency of occurences of all the characters within that file.
#store the recoreded frequency in a Python list with size 256
#input: text file (name of the file passed as a string)
#output: a Python list, size of 256, contain the frequency of occurrences
#of all the characters within the file.
#NOTE: THE INDEX OF EACH ELEMENT IS ACTUALLY THE ASCII CODE OF A CHARACTER
#AND THE VALUE OF THE ELEMENT IS THE FREQUENCY
def cnt_freq(filename):
with open(filename, encoding='utf-8-sig') as file:
frequency = [0]*256
string = file.read()
for character in string:
index = ord(character)
frequency[index] += 1
file.close()
return frequency
#FIND THE MINIMUM NODE:
#with the help from function comes_before(), this function will find the
#smallest node
#input: list of unsorted huffman nodes
#output: reference to the minimu node
def findMin(list):
min_node = list[0]
for index in range(0, len(list)):
current_node = list[index]
if comes_before(current_node,min_node):
min_node = current_node
return min_node
#BUILD A HUFFMAN TREE
#This function will scan the frequency list to find the characters that have frequency
#greater than 1. For each characters, the function creates a huffman node to hold the
#ascii value of the character and the freq. These nodes will be put together into a list.
#Then it is going to use the findMin function to group up 2 smallest nodes.
#The end result will be a huffman tree
#input: frequency list
#output: reference to the root node of the huffman tree
def create_huff_tree(char_freq):
list = []
#making a list of huffman nodes
for index in range (0,256):
if char_freq[index] != 0:
new_node = HuffmanNode(index,char_freq[index])
list.append(new_node)
while len(list) > 1:
node1 = findMin(list)
list.remove(node1)
node2 = findMin(list)
list.remove(node2)
combine_freq = node1.freq + node2.freq
combine_char = min(node1.char, node2.char)
combine_node = HuffmanNode(combine_char,combine_freq)
combine_node.left = node1
combine_node.right = node2
list.insert(0,combine_node)
return list[0]
#BUILD A LIST FOR THE CHARACTER CODE
#this function will create a list of strings, size of 256
#it will call the helper function to get the list of codes and return the list
#input: root node of the huffman tree
#output: list of code for each character
def create_code (node):
code_list = [""]*256
code_list = helper_function(node, code_list)
return code_list
#this function will traverse the huffman tree to find the leaves. (only the leaves matter)
#it will call the function generate_code to get the code of the character of the leaf
#then it will store the code into the list at appropriate position. Then it will return the list.
#input: the node to get code, the root node, the list to store code
#output: the list of codes
def helper_function(node, list):
generating_code(node)
if (node.left == None) and (node.right == None) :
list[node.char] = node.code
else:
if (node.left != None):
helper_function(node.left, list)
if (node.right != None):
helper_function(node.right, list)
return list
def generating_code(node, temp = ""):
if (node.left != None):
history_left = temp
temp = temp + "0"
generating_code(node.left, temp)
temp = history_left
if (node.right != None):
history_temp = temp
temp = temp + "1"
generating_code(node.right, temp)
temp = history_temp
if (node.left == None) and (node.right == None):
node.code = temp
print (chr(node.char),temp)
#reads an input text file and writes, using the huffman code, the encoded
#text into an output file
#input: name of input and output file (as strings)
#output: output file holds the encoded text
def huffman_encode(in_file, out_file):
freqlist = cnt_freq(in_file)
hufftree = create_huff_tree(freqlist)
codes = create_code(hufftree)
fout = open(out_file, 'w')
with open(in_file, encoding='utf-8-sig') as fin:
string = fin.read()
for character in string:
converted_code = codes[ord(character)]
fout.write(converted_code)
fin.close()
fout.close()
def huffman_decode(freqs, encoded_file, decode_file):
pass
def tree_preord (node):
pass
So what I am having trouble with is when I try to run the generating_code function, the print (chr(node.char),temp) is generating the following:
a 0000
f 0001
b 001
c 01
d 1
a 000
f 001
b 01
c 1
a 00
f 01
b 1
a 0
f 1
a
f
b
c
d
F.a 0000
f 0001
b 001
c 01
d 1
a 000
f 001
b 01
c 1
a 00
f 01
b 1
a 0
f 1
a
f
b
c
d
F.
So, as you can see, for the first 5 lines, the function actually did get the right code for each character... to my test case. But then, it keeps going and eventually messed up everything. Can anyone please help me with this function???? Thank you in advance!!