Huffman coding (based on textbook)

Script started on Tue Sep 02 11:07:42 1997
sh-2.00$ cat huffman.c
/* Huffman algorithm from textbook, with expanded comments
and some names changed to protect the innocent. Also a little
reorganization. */

#define TRUE 1
#define FALSE 0

/* Maximum number of bits required for an encoding. */
#define MAXBITS 50

/* Maximum number of characters in the alphabet. */
#define MAXSYMBS MAXBITS

/* Maximum number of nodes required in the tree. */
#define MAXNODES 2*MAXSYMBS-1

/* When we finally have a character encoded,
   the bits will be recorded in this kind of structure.
   The code is a string of 0's and 1's, starting from array
   index "startpos" and ending at array index MAXBITS-1. */
struct codetype {
	int bits[MAXBITS];
	int startpos;
};

/* A node in the tree. If node[p] is not a root node, the
   parent field points to the node's parent. If it is a root
   node, parent points to (that is, indexes) the next root
   node in the priority queue. This is a tree in which we
   don't care what the children of a node are, but we do
   need to know whether each node is a left or a right
   child so we can assign the appropriate code. */
struct nodetype {
	int frequency;	/* So we type a few more characters, big deal. */
	int parent;
	int isleft;
};

/* Priority queue insertion and deletion prototypes. */
/* See next comment for changes from textbook. */
void pqinsert(int);
int pqmindelete();

/* It seems to me that if the priority queue routines
are going to use the parent fields in the nodes, then they
had better know about the nodes, so make this a global. */
/* Also the low end of the priority queue, called "rootnodes"
in the main in the textbook, should be completely under
the control of the priority queue routines. */
struct nodetype node[MAXNODES];

void main()
{
	struct codetype cd, code[MAXSYMBS];
	int i;
	int numberofsymbols;	/* Better than n. */
	int bitcounter;	/* k in textbook program. */
	int nextavailablenode;  /* p */
	int leftnode, rightnode; /* p1, p2 */
	int root;
        /* Get rid of rootnodes, see above comments. */
	int thisnode;	/* Used in code building to know where we are. */

	char symbol, alphabet[MAXSYMBS];

	for(i = 0; i < MAXSYMBS; i++)
	{
		alphabet[i] = ' ';
	}

	/* How big is the alphabet? */
	scanf("%d", &numberofsymbols);

	/* Get the alphabet and frequencies. */
	for(i = 0; i < numberofsymbols; i++)
	{
		scanf("%s%d", &symbol, &node[i].frequency);
		pqinsert( i);
		alphabet[i] = symbol;
	}

	/* We have used up nodes 0 to numberofsymbols-1 so far. */

	for(nextavailablenode =  numberofsymbols;
		nextavailablenode < 2*numberofsymbols - 1;
		nextavailablenode ++ /* Textbook has typo here. */ )
	{
		leftnode = pqmindelete();
		rightnode = pqmindelete();
		
		/* Make a new tree of which these are the children. */
		node[leftnode].parent = nextavailablenode;
		node[rightnode].parent = nextavailablenode;
		node[leftnode].isleft = TRUE;
		node[rightnode].isleft = FALSE;
	
		/* Frequency in parent is sum of frequencies in its children. */
		node[nextavailablenode].frequency = node[leftnode].frequency + node[rightnode].frequency;
		
		/* Stick new root node back in priority queue. */
		pqinsert( nextavailablenode);

	}

	/* When we get here, there is only one node remaining
	   with a NULL parent field. */
	root = pqmindelete();

	/* Extract the codes from the tree. */
	for(i = 0; i < numberofsymbols; i++)
	{
		/* Initialize code[i] */
		cd.startpos = MAXBITS;

		/* Travel up the tree. */
		thisnode = i;
		while(thisnode != root)
		{
			--cd.startpos;
			cd.bits[cd.startpos] = node[thisnode].isleft ? 0 : 1;
			thisnode = node[thisnode].parent;
		}

		/* Copy this into array. */
		for(bitcounter = cd.startpos; bitcounter < MAXBITS; bitcounter++)
		{
			code[i].bits[bitcounter] = cd.bits[bitcounter];
		}
		code[i].startpos = cd.startpos;

	}/* End of lengthy for loop. */

	/* Show results. */
	for(i = 0; i < numberofsymbols; i++)
	{
		printf("\n%c %d ", alphabet[i], node[i].frequency);
		for(bitcounter = code[i].startpos; bitcounter < MAXBITS; bitcounter++)
		{
			printf("%d", code[i].bits[bitcounter]);

		}
		printf("\n");
	}
}/* That's the main. */

/* Initially, priority queue is empty. */
int rootnodes = -1;

void pqinsert(int which)
/* Insert node[which] into priority queue. */
{
	int thisnode, previous;
	/* Handle case of empty queue. */
	if(rootnodes == -1)
	{
		node[which].parent = -1;
		rootnodes = which;
	}
	else
	{
		/* Chase through until we find a larger frequency. */
		thisnode = rootnodes; 
		/* Remember where we are and where we came from. */
		previous = -1;
		while(thisnode != -1 && node[thisnode].frequency < node[which].frequency )
		{
			previous = thisnode;
			thisnode = node[thisnode].parent;
 		}

		/* Link to first larger node. */		
		node[which].parent = thisnode;
		/* Copy link if inside list. */
		if(previous != -1)
		{
			node[previous].parent = which;
		}
		/* otherwise insert at beginning. */
		else
		{
			rootnodes = which;
		}

	}
}

int pqmindelete()
/* Remove first node from priority queue. */
{
	int thisnode = rootnodes;
	rootnodes = node[thisnode].parent;
	return thisnode;
}
	
sh-2.00$ a.out
3
a 8
b 2
c 5

a 8 1

b 2 00

c 5 01
sh-2.00$ a.out
a 32
sh-2.00$ a.out
6
a 12
b 12
c 2
d 2
e 2
f 6

a 12 0

b 12 11

c 2 1000

d 2 10011

e 2 10010

f 6 101
sh-2.00$ exit
exit

script done on Tue Sep 02 11:11:31 1997