0

C to implement algorithms for (a) approximate order-preserving Huffman coding - each phase merging two adjacent subtrees whose weights give the smallest sum. The input is 1) a positive integer n and 2) a sequence of n positive integers giving the frequency counts (weights) for symbols in an ordered character set.

Order preservation is guaranteed only when the leaf order is consistent with the order of the character set.

I have to modify the below code to make the above possible:

    // Huffman code using a minHeap with handles (index-heap-based priority queue).
// Heap routines are adapted from "Algorithms in C, Third Edition", and
// "Algorithms in Java, Third Edition", Robert Sedgewick

// This is a prototype for demonstration purposes only.
// Minimally, the heap/priority queue implementation should
// be in a different source file.

#include <stdio.h>
#include <stdlib.h>

int N,          // Number of items in queue
        *pq,        // Priority queue
        *qp,        // Table of handles (for tracking)
        maxQueued;  // Capacity of priority queue
double *a;      // Pointer to user's table

void exch(int i, int j) {
// Swaps parent with child
    int t;

    t = pq[i];
    pq[i] = pq[j];
    pq[j] = t;
    qp[pq[i]] = i;
    qp[pq[j]] = j;
}

void PQinit(double *items, int n, int m) {
    int i;

    a = items;    // Save reference to index table
    maxQueued = m;
    N = 0;
    pq = (int*) malloc((maxQueued + 1) * sizeof(int)); // Contains subscripts to a[]
    qp = (int*) malloc(n * sizeof(int)); // Inverse of pq, allows changing priorities
    if (!pq || !qp) {
        printf("malloc failed %d\n", __LINE__);
        exit(0);
    }
// Set all handles to unused
    for (i = 0; i < n; i++)
        qp[i] = (-1);
}

int PQempty() {
    return !N;
}

int PQfull() {
    return N == maxQueued;
}

int less(int i, int j) {
// Notice how heap entries reference a[]
    return a[pq[i]] < a[pq[j]];
}

void fixUp(int *pq, int k) // AKA swim
{
    while (k > 1 && less(k, k / 2)) {
        exch(k, k / 2);
        k = k / 2;
    }
}

void fixDown(int *pq, int k, int N) // AKA sink
{
    int j;

    while (2 * k <= N) {
        j = 2 * k;
        if (j < N && less(j + 1, j))
            j++;
        if (!less(j, k))
            break;
        exch(k, j);
        k = j;
    }
}

void PQinsert(int k) {
    qp[k] = ++N;
    pq[N] = k;
    fixUp(pq, N);
}

int PQdelmin() {
    exch(1, N);
    fixDown(pq, 1, --N);
    qp[pq[N + 1]] = (-1);  // Set to unused
    return pq[N + 1];
}

void PQchange(int k) {
    fixUp(pq, qp[k]);
    fixDown(pq, qp[k], N);
}

// main implements Huffman code.
// Index is just a table of priorities whose
// subscripts are used in the PQ.

main() {
    int n, m, op, i, j, val;
    double *priority, probSum, expected = 0.0;
    int *left, *right;  // Links for Huffman code tree, root is subscript m-1
    int *parent;  // For printing the codes
    int *length;
    char *outString;

    printf("Enter alphabet size\n");
    scanf("%d", &n);
    m = 2 * n - 1;  // Number of nodes in tree
    priority = (double*) malloc(m * sizeof(double));
    left = (int*) malloc(m * sizeof(int));
    right = (int*) malloc(m * sizeof(int));
    parent = (int*) malloc(m * sizeof(int));
    outString = (char*) malloc((n + 1) * sizeof(char));
    length = (int*) malloc(m * sizeof(int));
    if (!priority || !left || !right || !parent || !outString || !length) {
        printf("malloc problem %d\n", __LINE__);
        exit(0);
    }

    PQinit(priority, m, n);

    for (i = 0; i < n; i++)
        priority[i] = (-1);

// Read and load alphabet symbols' probabilities into priority queue.
    probSum = 0.0;
    for (i = 0; i < n; i++) {
        scanf("%lf", priority + i);
        probSum += priority[i];
        PQinsert(i);
        left[i] = right[i] = (-1);
    }
    printf("Probabilities sum to %f\n", probSum);

// Huffman code tree construction
    for (i = n; i < m; i++) {
        left[i] = PQdelmin();
        right[i] = PQdelmin();
        parent[left[i]] = parent[right[i]] = i;
        priority[i] = priority[left[i]] + priority[right[i]];
        PQinsert(i);
    }
    i = PQdelmin();
    if (i != m - 1) {
        printf("The root isn't the root\n");
        exit(0);
    }
    parent[m - 1] = (-1);

// Goes breadth-first from root to compute length of prefix bit codes.
    length[m - 1] = 0;
    for (i = m - 1; i >= n; i--)
        length[left[i]] = length[right[i]] = length[i] + 1;

// Print the leaves, i.e. for the alphabet symbols
    printf("   i  prob  parent  bits product  code\n");
    for (i = 0; i < n; i++) {
        // Crawl up the tree to get prefix code
        outString[length[i]] = '\0';
        for (j = i; j != m - 1; j = parent[j])
            outString[length[j] - 1] = (left[parent[j]] == j) ? '0' : '1';
        printf("%5d %5.3f %5d %5d   %5.3f  %s\n", i, priority[i], parent[i],
                length[i], priority[i] * length[i], outString);
        expected += priority[i] * length[i];
    }
    printf("Expected bits per symbol: %f\n", expected);

// Print the internal nodes
    printf("   i  prob    left right parent\n");
    for (i = n; i < m; i++)
        printf("%5d %5.3f %5d %5d  %5d\n", i, priority[i], left[i], right[i],
                parent[i]);

    free(priority);
    free(left);
    free(right);
    free(parent);
    free(outString);
    free(length);
    free(pq);
    free(qp);
}

Following was suggested as an approach:

(a). You will want each heap entry to correspond to two adjacent subtrees that could be merged. After a PQdelmin() determines the merge to apply, you will need PQdelete() to discard unneeded candidate(s) (due to the merge) and a PQinsert() to include new candidate(s) (also resulting from the merge). Handles facilitate this.

But i am stuck for sequencing and managing the arrays. Kindly help!

Sangram Mohite
  • 735
  • 4
  • 11
  • 23

1 Answers1

0

Due to different probabilities given to each symbol, you could have one node containing a and z, that is supposed to merge with another node containing b and c. There is no simple way of ordering non-leaf-nodes, and this gets increasingly abstract at higher levels.

Anyway; To control the order of the sub-trees, you could swap the left and right sub-trees when merging the nodes:

// Huffman code tree construction
for (i = n; i < m; i++) {
    left[i] = PQdelmin();
    right[i] = PQdelmin();
    if (CompareNodes(left[i], right[i]) > 0) {
        Swap(&left[i], &right[i]);
    }
    parent[left[i]] = parent[right[i]] = i;
    priority[i] = priority[left[i]] + priority[right[i]];
    PQinsert(i);
}

More information:

Community
  • 1
  • 1
Markus Jarderot
  • 86,735
  • 21
  • 136
  • 138