/* ** Command & Conquer Red Alert(tm) ** Copyright 2025 Electronic Arts Inc. ** ** This program is free software: you can redistribute it and/or modify ** it under the terms of the GNU General Public License as published by ** the Free Software Foundation, either version 3 of the License, or ** (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program. If not, see . */ /**************************************************************************** * * C O N F I D E N T I A L --- W E S T W O O D S T U D I O S * *---------------------------------------------------------------------------- * * FILE * huffcmp.h * * DESCRIPTION * Huffman order 0 compressor. * * PROGRAMMER * Denzil E. Long, Jr. * * DATE * May 19, 1995 * *---------------------------------------------------------------------------- * * PUBLIC * HuffCompress - Compress data using huffman order 0 compression. * HuffCount - Count the frequency of occurence of every byte. * HuffScaleCounts - Scale down the frequency counts. * RLEHuffCounts - Run Length Encode the Huffman counts. * ConvertToCodes - Convert the Huffman tree into a table of codes. * HuffEncode - Huffman encode the data. * ****************************************************************************/ #include #include "huffman.h" /**************************************************************************** * * NAME * HuffCompress - Compress data using huffman order 0 compression. * * SYNOPSIS * Size = HuffCompress(Data, Buffer, Size, Temp) * * long HuffCompress(unsigned char *, unsigned char *, long, char *); * * FUNCTION * This function performs an order 0 Huffman encoding of the input data. * The algorithm used is fairly straightforward. First a count is made of * all the bytes in the input data, then the counts are scaled down to * a single byte representation in the node array. After the counts are * scaled, a Huffman decoding tree is built from the node array. Then * a code array is built by traversing the tree for each symbol. Finally, * the input data is compressed. * * INPUTS * Data - Pointer to data to compress. * Buffer - Pointer to compressed data. * Size - Length of data to compress. * Temp - Pointer to temporary working buffer. (Must be >= 5120 bytes!) * * RESULT * Size - Compressed size. * ****************************************************************************/ long __cdecl HuffCompress(unsigned char *data, unsigned char *buffer, long length, char *temp) { #if(1) TreeNode *nodes; HuffCode *codes; long size; long root; /* Initialize variables */ nodes = (TreeNode *)temp; temp += (514 * sizeof(TreeNode)); codes = (HuffCode *)temp; /* Analyze the frequency of the data. */ HuffCount(data, nodes, length, 1); HuffScaleCounts(nodes); /* Save the counts for the decompression. */ size = RLEHuffCounts(nodes, buffer); buffer += size; /* Build the Huffman decode tree and generate codes for encoding. */ root = BuildHuffTree(nodes); ConvertToCodes(nodes, codes, 0, 0, root); /* Encode the data. */ size += HuffEncode(data, buffer, codes, length); return (size); #else TreeNode *nodes; HuffCode *codes; unsigned long *counts; unsigned long max_count; long i; long size; long first; long last; long next; unsigned long symbol; unsigned long mask; /* Initialize variables. */ nodes = (TreeNode *)temp; temp += (514 * sizeof(TreeNode)); counts = (unsigned long *)temp; codes = (HuffCode *)temp; /* Zero the initial counts. */ memset(temp, 0, 256 * sizeof(unsigned long)); size = 0; /*------------------------------------------------------------------------- * Calculate the distribution of the data then scale down the counts so * they fit in an 8 bit value. This is done in order to limit the size of * the codes to 16 bits. *-----------------------------------------------------------------------*/ i = 0; while (i < length) { counts[((unsigned char *)data)[i]]++; i++; } /* Scale down the counts. */ max_count = 0; /* Find the maximum count. */ for (i = 0; i < 256; i++) { if (counts[i] > max_count) { max_count = counts[i]; } } if (max_count == 0) { counts[0] = 1; max_count = 1; } max_count /= 255; max_count++; /* Scale down the counts. */ for (i = 0; i < 256; i++) { nodes[i].count = (unsigned long)(counts[i] / max_count); /* Make sure that a node with a non-zero count does not get scaled * down to zero. */ if ((nodes[i].count == 0) && (counts[i] != 0)) { nodes[i].count = 1; } } nodes[HUFF_EOS].count = 1; /*------------------------------------------------------------------------- * OUTPUT THE COUNTS * * In order to save space, we store runs of counts in the following format: * * Start, Stop, Counts..., Start, Stop, Counts..., ..., 0 * * The list is terminated by storing a start value of zero (0). * * In order to efficiently use this format, we do not want to stop a run * because of just one or two zero counts. So we include zero counts of * less than three (3) in the run. *-----------------------------------------------------------------------*/ /* Find the first occurance of a non-zero count. */ first = 0; while ((first < 255) && (nodes[first].count == 0)) { first++; } /* Each time I hit the start of the loop, I assume that first is the * number for a run of non-zero values. The rest of the loop is * concerned with finding the value for last, which is the end of the * run, and the value of next, which is the start of the next run. * At the end of the loop, I assign next to first, so it starts in on * the next run. */ for (; first < 256; first = next) { last = first + 1; for (;;) { /* Find the end of the run of non-zeros. */ for (; last < 256; last++) { if (nodes[last].count == 0) { break; } } last--; /* Check the beginning of the next run of non-zero counts. */ for (next = last + 1; next < 256; next++) { if (nodes[next].count != 0) { break; } } /* Quit the run if we have reached the end. */ if (next > 255) { break; } /* Quit the run if there is more than three non-zero counts. */ if ((next - last) > 3) { break; } last = next; }; /* Output Start and Stop. */ *buffer++ = first; *buffer++ = last; /* Output the run of counts. */ for (i = first; i <= last; i++) { *buffer++ = (char)nodes[i].count; } size += (((last - first) + 1) + 2); } /* Output terminator. */ *buffer++ = 0; size++; /*------------------------------------------------------------------------- * Build the Huffman tree. All active nodes are scanned in order to locate * the two nodes with the minimum weights. These two weights are added * together and assigned to a new node. The new node makes the two minimum * nodes into its 0 child and 1 child. The two minimum nodes are then * marked as inactive. This process repeats until their is only one node * left, which is the root node. *-----------------------------------------------------------------------*/ /* Node 513 is used to arbitratilly provide a node with a guaranteed * maximum value. */ nodes[513].count = 0xFFFF; for (next = (HUFF_EOS + 1); ; next++) { first = 513; last = 513; for (i = 0; i < next; i++) { /* We are only concerned with non-zero count nodes. */ if (nodes[i].count != 0) { if (nodes[i].count < nodes[first].count) { last = first; first = i; } else if (nodes[i].count < nodes[last].count) { last = i; } } } if (last == 513) { break; } nodes[next].count = (nodes[first].count + nodes[last].count); nodes[first].count = 0; nodes[last].count = 0; nodes[next].child0 = (first << 3); nodes[next].child1 = (last << 3); } next--; /*------------------------------------------------------------------------- * Convert the Huffman tree into an encoding table then encode the data. *-----------------------------------------------------------------------*/ ConvertToCodes(nodes, codes, 0, 0, (next << 3)); /* Encode the data. */ *buffer = 0; next = 0x80; i = 0; do { if (i < length) { symbol = ((unsigned char *)data)[i]; } else { symbol = HUFF_EOS; } mask = 1L << (codes[symbol].bits - 1); while (mask != 0) { /* Set a bit in the output stream for each bit in the code. */ if (mask & codes[symbol].code) { *buffer |= next; } /* Next bit position. */ next >>= 1; /* Advance to the next byte in the output stream when the current * byte is full. */ if (next == 0) { buffer++; *buffer = 0; next = 0x80; size++; } /* Next bit in the code. */ mask >>= 1; } i++; } while (symbol != HUFF_EOS); if (next != 0x80) { size++; } return (size); #endif } /**************************************************************************** * * NAME * HuffCount - Count the frequency of occurence of every byte. * * SYNOPSIS * HuffCount(Data, Nodes, Length, Zero) * * void HuffCounts(unsigned char *, TreeNode *, long, long); * * FUNCTION * This function counts the frequency of occurence of every byte in the * input data. The nodes must be initialized to zero prior to calling this * function for the first time, otherwise the counts will be flawed. * * INPUTS * Data - Pointer to data to analyze. * TreeNode - Pointer to array of nodes. * Length - Length of data to analyze. * Zero - Zero any previous counts flag. (TRUE = zero counts) * * RESULT * Size - Amount of buffer used to hold counts. * ****************************************************************************/ void __cdecl HuffCount(unsigned char *data, TreeNode *nodes, long length, long zero) { long i; /* Zero any previous counts. */ if (zero) { for (i = 0; i < 256; i++) { nodes[i].count = 0; } } /* Calculate the distribution of the data. */ i = 0; while (i < length) { nodes[((unsigned char *)data)[i]].count++; i++; } } /**************************************************************************** * * NAME * HuffScaleCounts - Scale down the frequency counts. * * SYNOPSIS * HuffScaleCounts(Nodes) * * void HuffScaleCounts(TreeNode *); * * FUNCTION * In order to limit the size of the Huffman codes to 16 bits, we must * scale down the counts so they can be represented by a BYTE size value. * * INPUTS * Nodes - Pointer to nodes to scale counts for. * * RESULT * NONE * ****************************************************************************/ void __cdecl HuffScaleCounts(TreeNode *nodes) { unsigned long max_count; unsigned long unscaled; long i; long first; long last; long next; /* Scale down the counts so they fit in an 8 bit value. This is done in * order to limit the size of the codes to 16 bits. */ max_count = 0; /* Find the maximum count. */ for (i = 0; i < 256; i++) { if (nodes[i].count > max_count) { max_count = nodes[i].count; } } if (max_count == 0) { nodes[0].count = 1; max_count = 1; } max_count /= 255; max_count++; /* Scale down the counts. */ for (i = 0; i < 256; i++) { unscaled = nodes[i].count; nodes[i].count /= max_count; /* Make sure that a node with a non-zero count does not get scaled * down to zero. */ if ((nodes[i].count == 0) && (unscaled != 0)) { nodes[i].count = 1; } } nodes[HUFF_EOS].count = 1; } /**************************************************************************** * * NAME * RLEHuffCounts - Run Length Encode the Huffman counts. * * SYNOPSIS * Size = RLEHuffCounts(Nodes, Buffer) * * long RLEHuffCounts(TreeNode *, unsigned char *); * * FUNCTION * In order for the decoder to build the same model, we have to transmit * the symbol counts to it. To save space we do not save all 256 symbols * unconditionally, instead we run length encode the counts. The format * used to store the counts is as follows: * * Start, Stop, Counts[n], Start, Stop, Counts[n], .... 0 * * Note: The sequence is terminated by a start value of 0. Also at least * 1 run of counts has to be stored, even if the first start value is 0. * * INPUTS * Nodes - Pointer to initialized nodes. * Buffer - Pointer to buffer to store RLE'd counts. * * RESULT * Size - Size of the RLE'd counts. * ****************************************************************************/ long __cdecl RLEHuffCounts(TreeNode *nodes, unsigned char *buffer) { long i; long first; long last; long next; long size = 0; /* Find the first occurance of a non-zero count. */ first = 0; while ((first < 255) && (nodes[first].count == 0)) { first++; } /* Each time I hit the start of the loop, I assume that first is the * number for a run of non-zero values. The rest of the loop is * concerned with finding the value for last, which is the end of the * run, and the value of next, which is the start of the next run. * At the end of the loop, I assign next to first, so it starts in on * the next run. */ for (; first < 256; first = next) { last = first + 1; for (;;) { /* Find the end of the run of non-zeros. */ for (; last < 256; last++) { if (nodes[last].count == 0) { break; } } last--; /* Check the beginning of the next run of non-zero counts. */ for (next = last + 1; next < 256; next++) { if (nodes[next].count != 0) { break; } } /* Quit the run if we have reached the end. */ if (next > 255) { break; } /* Quit the run if there is more than three non-zero counts. */ if ((next - last) > 3) { break; } last = next; }; /* Output Start and Stop. */ *buffer++ = first; *buffer++ = last; /* Output the run of counts. */ for (i = first; i <= last; i++) { *buffer++ = (unsigned char)nodes[i].count; } size += (((last - first) + 1) + 2); } /* Output terminator. */ *buffer = 0; size++; return (size); } /**************************************************************************** * * NAME * ConvertToCodes - Convert the Huffman tree into a table of codes. * * SYNOPSIS * ConvertToCodes(Nodes, Codes, Code, Bits, Node) * * void ConvertToCodes(TreeNode *, HuffCode *, unsigned short, short, * short); * * FUNCTION * Since the Huffman tree is built as a decoding tree, there is no simple * way to get the encoding values for each symbol. This routine * recursively walks through the tree, adding the child bits to each code * until it gets to a leaf. When it gets to a leaf, it stores the code * value. * * INPUTS * Nodes - Pointer to the Huffman tree. * Codes - Pointer to the table of codes to generate. * Code - Code being built (initialize with 0). * Bits - Number of bits the code is comprised of (initialize with 0). * Node - Number of the current node. * * RESULT * NONE * ****************************************************************************/ void __cdecl ConvertToCodes(TreeNode *nodes, HuffCode *codes, unsigned short code, short bits, short node) { node >>= 3; if (node <= HUFF_EOS) { codes[node].code = code; codes[node].bits = bits; return; } code <<= 1; bits++; ConvertToCodes(nodes, codes, code, bits, nodes[node].child0); ConvertToCodes(nodes, codes, code|1, bits, nodes[node].child1); } /**************************************************************************** * * NAME * HuffEncode - Huffman encode the data. * * SYNOPSIS * Size = HuffEncode(Data, Buffer, Codes, Length); * * long HuffEncode(unsigned char *, unsigned char *, HuffCodes *, long); * * FUNCTION * Encoding of the data is simple. Each byte of data is taken as the index * of the codes array, the corresponding code is then put in the output. * * INPUTS * Data - Pointer to data to encode. * Buffer - Pointer to buffer to hold encoded data. * Codes - Pointer to array of Huffman codes. * Length - Length of data buffer to encode. * * RESULT * Size - Size of encoded data. * ****************************************************************************/ long __cdecl HuffEncode(unsigned char *data, unsigned char *buffer, HuffCode *codes, long length) { long i; long size; long next; unsigned long mask; unsigned long symbol; /* Initialize */ *buffer = 0; next = 0x80; i = 0; do { if (i < length) { symbol = ((unsigned char *)data)[i]; } else { symbol = HUFF_EOS; } mask = 1L << (codes[symbol].bits - 1); while (mask != 0) { /* Set a bit in the output stream for each bit in the code. */ if (mask & codes[symbol].code) { *buffer |= next; } /* Next bit position. */ next >>= 1; /* Advance to the next byte in the output stream when the current * byte is full. */ if (next == 0) { buffer++; *buffer = 0; next = 0x80; size++; } /* Next bit in the code. */ mask >>= 1; } i++; } while (symbol != HUFF_EOS); if (next != 0x80) { size++; } return (size); }