Huffman coding (based on textbook)
Script started on Tue Sep 02 11:07:42 1997
sh-2.00$ cat huffman.c
/* Huffman algorithm from textbook, with expanded comments
and some names changed to protect the innocent. Also a little
reorganization. */
#define TRUE 1
#define FALSE 0
/* Maximum number of bits required for an encoding. */
#define MAXBITS 50
/* Maximum number of characters in the alphabet. */
#define MAXSYMBS MAXBITS
/* Maximum number of nodes required in the tree. */
#define MAXNODES 2*MAXSYMBS-1
/* When we finally have a character encoded,
the bits will be recorded in this kind of structure.
The code is a string of 0's and 1's, starting from array
index "startpos" and ending at array index MAXBITS-1. */
struct codetype {
int bits[MAXBITS];
int startpos;
};
/* A node in the tree. If node[p] is not a root node, the
parent field points to the node's parent. If it is a root
node, parent points to (that is, indexes) the next root
node in the priority queue. This is a tree in which we
don't care what the children of a node are, but we do
need to know whether each node is a left or a right
child so we can assign the appropriate code. */
struct nodetype {
int frequency; /* So we type a few more characters, big deal. */
int parent;
int isleft;
};
/* Priority queue insertion and deletion prototypes. */
/* See next comment for changes from textbook. */
void pqinsert(int);
int pqmindelete();
/* It seems to me that if the priority queue routines
are going to use the parent fields in the nodes, then they
had better know about the nodes, so make this a global. */
/* Also the low end of the priority queue, called "rootnodes"
in the main in the textbook, should be completely under
the control of the priority queue routines. */
struct nodetype node[MAXNODES];
void main()
{
struct codetype cd, code[MAXSYMBS];
int i;
int numberofsymbols; /* Better than n. */
int bitcounter; /* k in textbook program. */
int nextavailablenode; /* p */
int leftnode, rightnode; /* p1, p2 */
int root;
/* Get rid of rootnodes, see above comments. */
int thisnode; /* Used in code building to know where we are. */
char symbol, alphabet[MAXSYMBS];
for(i = 0; i < MAXSYMBS; i++)
{
alphabet[i] = ' ';
}
/* How big is the alphabet? */
scanf("%d", &numberofsymbols);
/* Get the alphabet and frequencies. */
for(i = 0; i < numberofsymbols; i++)
{
scanf("%s%d", &symbol, &node[i].frequency);
pqinsert( i);
alphabet[i] = symbol;
}
/* We have used up nodes 0 to numberofsymbols-1 so far. */
for(nextavailablenode = numberofsymbols;
nextavailablenode < 2*numberofsymbols - 1;
nextavailablenode ++ /* Textbook has typo here. */ )
{
leftnode = pqmindelete();
rightnode = pqmindelete();
/* Make a new tree of which these are the children. */
node[leftnode].parent = nextavailablenode;
node[rightnode].parent = nextavailablenode;
node[leftnode].isleft = TRUE;
node[rightnode].isleft = FALSE;
/* Frequency in parent is sum of frequencies in its children. */
node[nextavailablenode].frequency = node[leftnode].frequency + node[rightnode].frequency;
/* Stick new root node back in priority queue. */
pqinsert( nextavailablenode);
}
/* When we get here, there is only one node remaining
with a NULL parent field. */
root = pqmindelete();
/* Extract the codes from the tree. */
for(i = 0; i < numberofsymbols; i++)
{
/* Initialize code[i] */
cd.startpos = MAXBITS;
/* Travel up the tree. */
thisnode = i;
while(thisnode != root)
{
--cd.startpos;
cd.bits[cd.startpos] = node[thisnode].isleft ? 0 : 1;
thisnode = node[thisnode].parent;
}
/* Copy this into array. */
for(bitcounter = cd.startpos; bitcounter < MAXBITS; bitcounter++)
{
code[i].bits[bitcounter] = cd.bits[bitcounter];
}
code[i].startpos = cd.startpos;
}/* End of lengthy for loop. */
/* Show results. */
for(i = 0; i < numberofsymbols; i++)
{
printf("\n%c %d ", alphabet[i], node[i].frequency);
for(bitcounter = code[i].startpos; bitcounter < MAXBITS; bitcounter++)
{
printf("%d", code[i].bits[bitcounter]);
}
printf("\n");
}
}/* That's the main. */
/* Initially, priority queue is empty. */
int rootnodes = -1;
void pqinsert(int which)
/* Insert node[which] into priority queue. */
{
int thisnode, previous;
/* Handle case of empty queue. */
if(rootnodes == -1)
{
node[which].parent = -1;
rootnodes = which;
}
else
{
/* Chase through until we find a larger frequency. */
thisnode = rootnodes;
/* Remember where we are and where we came from. */
previous = -1;
while(thisnode != -1 && node[thisnode].frequency < node[which].frequency )
{
previous = thisnode;
thisnode = node[thisnode].parent;
}
/* Link to first larger node. */
node[which].parent = thisnode;
/* Copy link if inside list. */
if(previous != -1)
{
node[previous].parent = which;
}
/* otherwise insert at beginning. */
else
{
rootnodes = which;
}
}
}
int pqmindelete()
/* Remove first node from priority queue. */
{
int thisnode = rootnodes;
rootnodes = node[thisnode].parent;
return thisnode;
}
sh-2.00$ a.out
3
a 8
b 2
c 5
a 8 1
b 2 00
c 5 01
sh-2.00$ a.out
a 32
sh-2.00$ a.out
6
a 12
b 12
c 2
d 2
e 2
f 6
a 12 0
b 12 11
c 2 1000
d 2 10011
e 2 10010
f 6 101
sh-2.00$ exit
exit
script done on Tue Sep 02 11:11:31 1997