-1
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include "huffman.h"

// Structures defined here.
struct hmNode {
  struct hmNode *left;
  char ch;
  freq f;
  bool used;
  struct hmNode *right;
};
typedef struct hmNode hmNode;

struct nodeArr {
  int capacity;
  struct hmNode **arr;
};
typedef struct nodeArr nodeArr;

//-----------------------------------------------------------------------------
// Functions defined here.

hmNode *new_node(char ch, freq f){
  hmNode *p = malloc(sizeof(hmNode));
  *p = (hmNode) {NULL, ch, f, false, NULL};
  return p;
}

nodeArr *make_arr(char *chs, freq  *fs){
  nodeArr *nodeArr = malloc(sizeof(nodeArr));
  int n = strlen(chs);
  nodeArr->capacity = n;
  nodeArr->arr = malloc( sizeof(hmNode*) * (n+1) );
  for(int i=0; i<nodeArr->capacity; i++){
    nodeArr->arr[i] = new_node(chs[i], fs[i]);
  }
  nodeArr->arr[nodeArr->capacity] = new_node('\0', -1);
  return nodeArr;
}

int extract_min_idx(nodeArr *nodeArr){
  int smaller;
  hmNode **arr = nodeArr->arr;
  int idx = 0;
  while(arr[idx]->used) idx++;
  smaller = idx;
  int val = arr[idx]->f;
  while(idx < nodeArr->capacity){
    idx++;
    while(arr[idx]->used) idx++;
    if(arr[idx]->ch != '\0' && arr[idx]->f < val) {
      val = arr[idx]->f;
      smaller = idx;
    }
  }
  arr[smaller]->used = true;
  return smaller;
}

void insert_node(nodeArr *nodeArr){
  int s1 = extract_min_idx(nodeArr);
  int s2 = extract_min_idx(nodeArr);
  hmNode *new = malloc(sizeof(hmNode));
  new->left = nodeArr->arr[s1];
  new->right = nodeArr->arr[s2];
  new->f = new->left->f + new->right->f;
  new->used = false;
  nodeArr->arr[s1] = new;
}

bool is_tree_done(nodeArr *nodeArr){
  int cnt = 0;
  for(int i=0;i<nodeArr->capacity;i++){
    if(!(nodeArr->arr[i]->used)) cnt++;
  }
  if(cnt == 1) return true;
  else return false;
}

//void free_nodeArr(nodeArr *nodeArr){
//  for(int i=0;i<nodeArr->capacity; i++){
//    free(nodeArr->arr[i]);
//    }
//  free(nodeArr->arr[nodeArr->capacity]);
//  free(nodeArr->arr);
//  free(nodeArr);
//}

hmNode *build_tree(char chs[], freq fs[]){
  hmNode *root;
  nodeArr *nodeArr = make_arr(chs, fs);
  while(!(is_tree_done(nodeArr))){
    insert_node(nodeArr);
  }
  for(int i=0;i<nodeArr->capacity;i++){
    if(!(nodeArr->arr[i]->used)) {
      root = nodeArr->arr[i];
      root->used = true;
    }
  }
  return root;
}

int is_leaf(hmNode *hmNode){
  return !(hmNode->left) && !(hmNode->right);
}

void encode(hmNode *root, int idx, char code[]){
  if(root->left){
    code[idx]='0';
    encode(root->left, idx + 1, code);
  }
  if(root->right){
    code[idx]='1';
    encode(root->right, idx + 1, code);
  }
  if(is_leaf(root)){
    printf("%c: ", root->ch);
    printf("%s\n", code);
  }
}

// void free_tree(hmNode *root){
//   if(is_leaf(root))
//   free_tree(root->left);
//   free_tree(root->right);
//   free(root);
// }
//-----------------------------------------------------------------------------
// Tests

void test_new_node(){
  char chs[] = "ab";
  freq fs[] = {1,2};
  hmNode *new = new_node(chs[0], fs[0]);
  assert(new->ch  == 'a');
  assert(new->f == 1);
  free(new);
}

void test_new_arr(){
  char chs[] = "ab";
  freq fs[] = {1,2};
  nodeArr *newArr = make_arr(chs, fs);
  assert(newArr->arr[0]->ch == 'a');
  assert(newArr->arr[0]->f  ==  1);
  assert(newArr->arr[1]->ch == 'b');
  assert(newArr->arr[1]->f  ==  2);
  assert(newArr->arr[2]->ch == '\0');
  assert(newArr->arr[2]->f  ==  -1);
  //free_nodeArr(newArr);
}

//-----------------------------------------------------------------------------
int main(){
  test_new_node();
  test_new_arr();
  printf("Completed\n");

  //hmNode *root = build_tree(chs, fs);
  //free_tree(root);
  // printf("%d\n", root->f);
  // printf("%d\n", root->left->f);
  // printf("%d\n", root->right->f);
  // printf("%d\n", root->right->left->f);
  // printf("%d\n", root->right->right->f);
  // char code[10];
  // encode(root, 0, code);

  return 0;
}

I tried to build a Huffman tree using a priority queueing algorithm, and I checked it worked when I compiled it using 'clang -std=c11 -Wall -pedantic -g (NAMEOFTHEFILE).c -o (NAMEOFTHEFILE)'. However when I compiled it with 'clang -std=c11 -Wall -pedantic -g (NAMEOFTHEFILE).c -o (NAMEOFTHEFILE) -fsanitize=undefined -fsanitize=address -fsanitize=leak' it kept saying 'heap-buffer-overflow', I guess there's a problem on 'make_arr' function.

Could anyone can tell me how to handle this error?

  • Not your problem, but `nodeArr->arr[i] = new_node(chs[0], fs[i]);` should probably be `nodeArr->arr[i] = new_node(chs[i], fs[i]);`. And better use Valgrind for more detailed error. – Eugene Sh. Dec 03 '18 at 21:29
  • Thanks for pointing it out, but it still doesn't work:( – onchainben Dec 03 '18 at 21:33
  • I can't see an issue (well, unless the sanitizer is analyzing the functions which are not used as well). Could be false positive? Try Valgrind. – Eugene Sh. Dec 03 '18 at 21:51
  • I don't think the sanitizer checks the unused functions. Which options should I add on Valgrind to check 'heap-buffer-overflow' issue? – onchainben Dec 03 '18 at 21:55
  • Dosen't clang -fsanitize output contain the location of the overflow? [The docs](https://clang.llvm.org/docs/AddressSanitizer.html) indicate that it does. – AShelly Dec 03 '18 at 22:16
  • It didn't show me which line of the code caused overflow, but just showed something like this: ==29113==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x602000000018 at pc 0x000000512612 bp 0x7ffef40c63f0 sp 0x7ffef40c63e8 WRITE of size 8 at 0x602000000018 thread T0 and some other memory addresses. – onchainben Dec 03 '18 at 22:22

1 Answers1

1

Your problem is here:

 nodeArr *nodeArr = malloc(sizeof(nodeArr));

sizeof(nodeArr) is sizeof the pointer (8), not sizeof the structure with the same name (16). This is one of the many reasons it is not a good idea to name your variables the same as your types.

You should look into configuring the ASAN_SYMBOLIZER_PATH so that you have better symbols when sanitizing. That's how I found this.

AShelly
  • 34,686
  • 15
  • 91
  • 152