C to implement algorithms for (a) approximate order-preserving Huffman coding - each phase merging two adjacent subtrees whose weights give the smallest sum. The input is 1) a positive integer n and 2) a sequence of n positive integers giving the frequency counts (weights) for symbols in an ordered character set.
Order preservation is guaranteed only when the leaf order is consistent with the order of the character set.
I have to modify the below code to make the above possible:
// Huffman code using a minHeap with handles (index-heap-based priority queue).
// Heap routines are adapted from "Algorithms in C, Third Edition", and
// "Algorithms in Java, Third Edition", Robert Sedgewick
// This is a prototype for demonstration purposes only.
// Minimally, the heap/priority queue implementation should
// be in a different source file.
#include <stdio.h>
#include <stdlib.h>
int N, // Number of items in queue
*pq, // Priority queue
*qp, // Table of handles (for tracking)
maxQueued; // Capacity of priority queue
double *a; // Pointer to user's table
void exch(int i, int j) {
// Swaps parent with child
int t;
t = pq[i];
pq[i] = pq[j];
pq[j] = t;
qp[pq[i]] = i;
qp[pq[j]] = j;
}
void PQinit(double *items, int n, int m) {
int i;
a = items; // Save reference to index table
maxQueued = m;
N = 0;
pq = (int*) malloc((maxQueued + 1) * sizeof(int)); // Contains subscripts to a[]
qp = (int*) malloc(n * sizeof(int)); // Inverse of pq, allows changing priorities
if (!pq || !qp) {
printf("malloc failed %d\n", __LINE__);
exit(0);
}
// Set all handles to unused
for (i = 0; i < n; i++)
qp[i] = (-1);
}
int PQempty() {
return !N;
}
int PQfull() {
return N == maxQueued;
}
int less(int i, int j) {
// Notice how heap entries reference a[]
return a[pq[i]] < a[pq[j]];
}
void fixUp(int *pq, int k) // AKA swim
{
while (k > 1 && less(k, k / 2)) {
exch(k, k / 2);
k = k / 2;
}
}
void fixDown(int *pq, int k, int N) // AKA sink
{
int j;
while (2 * k <= N) {
j = 2 * k;
if (j < N && less(j + 1, j))
j++;
if (!less(j, k))
break;
exch(k, j);
k = j;
}
}
void PQinsert(int k) {
qp[k] = ++N;
pq[N] = k;
fixUp(pq, N);
}
int PQdelmin() {
exch(1, N);
fixDown(pq, 1, --N);
qp[pq[N + 1]] = (-1); // Set to unused
return pq[N + 1];
}
void PQchange(int k) {
fixUp(pq, qp[k]);
fixDown(pq, qp[k], N);
}
// main implements Huffman code.
// Index is just a table of priorities whose
// subscripts are used in the PQ.
main() {
int n, m, op, i, j, val;
double *priority, probSum, expected = 0.0;
int *left, *right; // Links for Huffman code tree, root is subscript m-1
int *parent; // For printing the codes
int *length;
char *outString;
printf("Enter alphabet size\n");
scanf("%d", &n);
m = 2 * n - 1; // Number of nodes in tree
priority = (double*) malloc(m * sizeof(double));
left = (int*) malloc(m * sizeof(int));
right = (int*) malloc(m * sizeof(int));
parent = (int*) malloc(m * sizeof(int));
outString = (char*) malloc((n + 1) * sizeof(char));
length = (int*) malloc(m * sizeof(int));
if (!priority || !left || !right || !parent || !outString || !length) {
printf("malloc problem %d\n", __LINE__);
exit(0);
}
PQinit(priority, m, n);
for (i = 0; i < n; i++)
priority[i] = (-1);
// Read and load alphabet symbols' probabilities into priority queue.
probSum = 0.0;
for (i = 0; i < n; i++) {
scanf("%lf", priority + i);
probSum += priority[i];
PQinsert(i);
left[i] = right[i] = (-1);
}
printf("Probabilities sum to %f\n", probSum);
// Huffman code tree construction
for (i = n; i < m; i++) {
left[i] = PQdelmin();
right[i] = PQdelmin();
parent[left[i]] = parent[right[i]] = i;
priority[i] = priority[left[i]] + priority[right[i]];
PQinsert(i);
}
i = PQdelmin();
if (i != m - 1) {
printf("The root isn't the root\n");
exit(0);
}
parent[m - 1] = (-1);
// Goes breadth-first from root to compute length of prefix bit codes.
length[m - 1] = 0;
for (i = m - 1; i >= n; i--)
length[left[i]] = length[right[i]] = length[i] + 1;
// Print the leaves, i.e. for the alphabet symbols
printf(" i prob parent bits product code\n");
for (i = 0; i < n; i++) {
// Crawl up the tree to get prefix code
outString[length[i]] = '\0';
for (j = i; j != m - 1; j = parent[j])
outString[length[j] - 1] = (left[parent[j]] == j) ? '0' : '1';
printf("%5d %5.3f %5d %5d %5.3f %s\n", i, priority[i], parent[i],
length[i], priority[i] * length[i], outString);
expected += priority[i] * length[i];
}
printf("Expected bits per symbol: %f\n", expected);
// Print the internal nodes
printf(" i prob left right parent\n");
for (i = n; i < m; i++)
printf("%5d %5.3f %5d %5d %5d\n", i, priority[i], left[i], right[i],
parent[i]);
free(priority);
free(left);
free(right);
free(parent);
free(outString);
free(length);
free(pq);
free(qp);
}
Following was suggested as an approach:
(a). You will want each heap entry to correspond to two adjacent subtrees that could be merged. After a PQdelmin() determines the merge to apply, you will need PQdelete() to discard unneeded candidate(s) (due to the merge) and a PQinsert() to include new candidate(s) (also resulting from the merge). Handles facilitate this.
But i am stuck for sequencing and managing the arrays. Kindly help!