CnC_Red_Alert/VQ/VQM32/HUFFCMP.CPP

709 lines
17 KiB
C++
Raw Permalink Normal View History

/*
** Command & Conquer Red Alert(tm)
** Copyright 2025 Electronic Arts Inc.
**
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/****************************************************************************
*
* C O N F I D E N T I A L --- W E S T W O O D S T U D I O S
*
*----------------------------------------------------------------------------
*
* FILE
* huffcmp.h
*
* DESCRIPTION
* Huffman order 0 compressor.
*
* PROGRAMMER
* Denzil E. Long, Jr.
*
* DATE
* May 19, 1995
*
*----------------------------------------------------------------------------
*
* PUBLIC
* HuffCompress - Compress data using huffman order 0 compression.
* HuffCount - Count the frequency of occurence of every byte.
* HuffScaleCounts - Scale down the frequency counts.
* RLEHuffCounts - Run Length Encode the Huffman counts.
* ConvertToCodes - Convert the Huffman tree into a table of codes.
* HuffEncode - Huffman encode the data.
*
****************************************************************************/
#include <mem.h>
#include "huffman.h"
/****************************************************************************
*
* NAME
* HuffCompress - Compress data using huffman order 0 compression.
*
* SYNOPSIS
* Size = HuffCompress(Data, Buffer, Size, Temp)
*
* long HuffCompress(unsigned char *, unsigned char *, long, char *);
*
* FUNCTION
* This function performs an order 0 Huffman encoding of the input data.
* The algorithm used is fairly straightforward. First a count is made of
* all the bytes in the input data, then the counts are scaled down to
* a single byte representation in the node array. After the counts are
* scaled, a Huffman decoding tree is built from the node array. Then
* a code array is built by traversing the tree for each symbol. Finally,
* the input data is compressed.
*
* INPUTS
* Data - Pointer to data to compress.
* Buffer - Pointer to compressed data.
* Size - Length of data to compress.
* Temp - Pointer to temporary working buffer. (Must be >= 5120 bytes!)
*
* RESULT
* Size - Compressed size.
*
****************************************************************************/
long cdecl HuffCompress(unsigned char *data, unsigned char *buffer,
long length, char *temp)
{
#if(1)
TreeNode *nodes;
HuffCode *codes;
long size;
long root;
/* Initialize variables */
nodes = (TreeNode *)temp;
temp += (514 * sizeof(TreeNode));
codes = (HuffCode *)temp;
/* Analyze the frequency of the data. */
HuffCount(data, nodes, length, 1);
HuffScaleCounts(nodes);
/* Save the counts for the decompression. */
size = RLEHuffCounts(nodes, buffer);
buffer += size;
/* Build the Huffman decode tree and generate codes for encoding. */
root = BuildHuffTree(nodes);
ConvertToCodes(nodes, codes, 0, 0, root);
/* Encode the data. */
size += HuffEncode(data, buffer, codes, length);
return (size);
#else
TreeNode *nodes;
HuffCode *codes;
unsigned long *counts;
unsigned long max_count;
long i;
long size;
long first;
long last;
long next;
unsigned long symbol;
unsigned long mask;
/* Initialize variables. */
nodes = (TreeNode *)temp;
temp += (514 * sizeof(TreeNode));
counts = (unsigned long *)temp;
codes = (HuffCode *)temp;
/* Zero the initial counts. */
memset(temp, 0, 256 * sizeof(unsigned long));
size = 0;
/*-------------------------------------------------------------------------
* Calculate the distribution of the data then scale down the counts so
* they fit in an 8 bit value. This is done in order to limit the size of
* the codes to 16 bits.
*-----------------------------------------------------------------------*/
i = 0;
while (i < length) {
counts[((unsigned char *)data)[i]]++;
i++;
}
/* Scale down the counts. */
max_count = 0;
/* Find the maximum count. */
for (i = 0; i < 256; i++) {
if (counts[i] > max_count) {
max_count = counts[i];
}
}
if (max_count == 0) {
counts[0] = 1;
max_count = 1;
}
max_count /= 255;
max_count++;
/* Scale down the counts. */
for (i = 0; i < 256; i++) {
nodes[i].count = (unsigned long)(counts[i] / max_count);
/* Make sure that a node with a non-zero count does not get scaled
* down to zero.
*/
if ((nodes[i].count == 0) && (counts[i] != 0)) {
nodes[i].count = 1;
}
}
nodes[HUFF_EOS].count = 1;
/*-------------------------------------------------------------------------
* OUTPUT THE COUNTS
*
* In order to save space, we store runs of counts in the following format:
*
* Start, Stop, Counts..., Start, Stop, Counts..., ..., 0
*
* The list is terminated by storing a start value of zero (0).
*
* In order to efficiently use this format, we do not want to stop a run
* because of just one or two zero counts. So we include zero counts of
* less than three (3) in the run.
*-----------------------------------------------------------------------*/
/* Find the first occurance of a non-zero count. */
first = 0;
while ((first < 255) && (nodes[first].count == 0)) {
first++;
}
/* Each time I hit the start of the loop, I assume that first is the
* number for a run of non-zero values. The rest of the loop is
* concerned with finding the value for last, which is the end of the
* run, and the value of next, which is the start of the next run.
* At the end of the loop, I assign next to first, so it starts in on
* the next run.
*/
for (; first < 256; first = next) {
last = first + 1;
for (;;) {
/* Find the end of the run of non-zeros. */
for (; last < 256; last++) {
if (nodes[last].count == 0) {
break;
}
}
last--;
/* Check the beginning of the next run of non-zero counts. */
for (next = last + 1; next < 256; next++) {
if (nodes[next].count != 0) {
break;
}
}
/* Quit the run if we have reached the end. */
if (next > 255) {
break;
}
/* Quit the run if there is more than three non-zero counts. */
if ((next - last) > 3) {
break;
}
last = next;
};
/* Output Start and Stop. */
*buffer++ = first;
*buffer++ = last;
/* Output the run of counts. */
for (i = first; i <= last; i++) {
*buffer++ = (char)nodes[i].count;
}
size += (((last - first) + 1) + 2);
}
/* Output terminator. */
*buffer++ = 0;
size++;
/*-------------------------------------------------------------------------
* Build the Huffman tree. All active nodes are scanned in order to locate
* the two nodes with the minimum weights. These two weights are added
* together and assigned to a new node. The new node makes the two minimum
* nodes into its 0 child and 1 child. The two minimum nodes are then
* marked as inactive. This process repeats until their is only one node
* left, which is the root node.
*-----------------------------------------------------------------------*/
/* Node 513 is used to arbitratilly provide a node with a guaranteed
* maximum value.
*/
nodes[513].count = 0xFFFF;
for (next = (HUFF_EOS + 1); ; next++) {
first = 513;
last = 513;
for (i = 0; i < next; i++) {
/* We are only concerned with non-zero count nodes. */
if (nodes[i].count != 0) {
if (nodes[i].count < nodes[first].count) {
last = first;
first = i;
} else if (nodes[i].count < nodes[last].count) {
last = i;
}
}
}
if (last == 513) {
break;
}
nodes[next].count = (nodes[first].count + nodes[last].count);
nodes[first].count = 0;
nodes[last].count = 0;
nodes[next].child0 = (first << 3);
nodes[next].child1 = (last << 3);
}
next--;
/*-------------------------------------------------------------------------
* Convert the Huffman tree into an encoding table then encode the data.
*-----------------------------------------------------------------------*/
ConvertToCodes(nodes, codes, 0, 0, (next << 3));
/* Encode the data. */
*buffer = 0;
next = 0x80;
i = 0;
do {
if (i < length) {
symbol = ((unsigned char *)data)[i];
} else {
symbol = HUFF_EOS;
}
mask = 1L << (codes[symbol].bits - 1);
while (mask != 0) {
/* Set a bit in the output stream for each bit in the code. */
if (mask & codes[symbol].code) {
*buffer |= next;
}
/* Next bit position. */
next >>= 1;
/* Advance to the next byte in the output stream when the current
* byte is full.
*/
if (next == 0) {
buffer++;
*buffer = 0;
next = 0x80;
size++;
}
/* Next bit in the code. */
mask >>= 1;
}
i++;
} while (symbol != HUFF_EOS);
if (next != 0x80) {
size++;
}
return (size);
#endif
}
/****************************************************************************
*
* NAME
* HuffCount - Count the frequency of occurence of every byte.
*
* SYNOPSIS
* HuffCount(Data, Nodes, Length, Zero)
*
* void HuffCounts(unsigned char *, TreeNode *, long, long);
*
* FUNCTION
* This function counts the frequency of occurence of every byte in the
* input data. The nodes must be initialized to zero prior to calling this
* function for the first time, otherwise the counts will be flawed.
*
* INPUTS
* Data - Pointer to data to analyze.
* TreeNode - Pointer to array of nodes.
* Length - Length of data to analyze.
* Zero - Zero any previous counts flag. (TRUE = zero counts)
*
* RESULT
* Size - Amount of buffer used to hold counts.
*
****************************************************************************/
void cdecl HuffCount(unsigned char *data, TreeNode *nodes, long length,
long zero)
{
long i;
/* Zero any previous counts. */
if (zero) {
for (i = 0; i < 256; i++) {
nodes[i].count = 0;
}
}
/* Calculate the distribution of the data. */
i = 0;
while (i < length) {
nodes[((unsigned char *)data)[i]].count++;
i++;
}
}
/****************************************************************************
*
* NAME
* HuffScaleCounts - Scale down the frequency counts.
*
* SYNOPSIS
* HuffScaleCounts(Nodes)
*
* void HuffScaleCounts(TreeNode *);
*
* FUNCTION
* In order to limit the size of the Huffman codes to 16 bits, we must
* scale down the counts so they can be represented by a BYTE size value.
*
* INPUTS
* Nodes - Pointer to nodes to scale counts for.
*
* RESULT
* NONE
*
****************************************************************************/
void cdecl HuffScaleCounts(TreeNode *nodes)
{
unsigned long max_count;
unsigned long unscaled;
long i;
long first;
long last;
long next;
/* Scale down the counts so they fit in an 8 bit value. This is done in
* order to limit the size of the codes to 16 bits.
*/
max_count = 0;
/* Find the maximum count. */
for (i = 0; i < 256; i++) {
if (nodes[i].count > max_count) {
max_count = nodes[i].count;
}
}
if (max_count == 0) {
nodes[0].count = 1;
max_count = 1;
}
max_count /= 255;
max_count++;
/* Scale down the counts. */
for (i = 0; i < 256; i++) {
unscaled = nodes[i].count;
nodes[i].count /= max_count;
/* Make sure that a node with a non-zero count does not get scaled
* down to zero.
*/
if ((nodes[i].count == 0) && (unscaled != 0)) {
nodes[i].count = 1;
}
}
nodes[HUFF_EOS].count = 1;
}
/****************************************************************************
*
* NAME
* RLEHuffCounts - Run Length Encode the Huffman counts.
*
* SYNOPSIS
* Size = RLEHuffCounts(Nodes, Buffer)
*
* long RLEHuffCounts(TreeNode *, unsigned char *);
*
* FUNCTION
* In order for the decoder to build the same model, we have to transmit
* the symbol counts to it. To save space we do not save all 256 symbols
* unconditionally, instead we run length encode the counts. The format
* used to store the counts is as follows:
*
* Start, Stop, Counts[n], Start, Stop, Counts[n], .... 0
*
* Note: The sequence is terminated by a start value of 0. Also at least
* 1 run of counts has to be stored, even if the first start value is 0.
*
* INPUTS
* Nodes - Pointer to initialized nodes.
* Buffer - Pointer to buffer to store RLE'd counts.
*
* RESULT
* Size - Size of the RLE'd counts.
*
****************************************************************************/
long cdecl RLEHuffCounts(TreeNode *nodes, unsigned char *buffer)
{
long i;
long first;
long last;
long next;
long size = 0;
/* Find the first occurance of a non-zero count. */
first = 0;
while ((first < 255) && (nodes[first].count == 0)) {
first++;
}
/* Each time I hit the start of the loop, I assume that first is the
* number for a run of non-zero values. The rest of the loop is
* concerned with finding the value for last, which is the end of the
* run, and the value of next, which is the start of the next run.
* At the end of the loop, I assign next to first, so it starts in on
* the next run.
*/
for (; first < 256; first = next) {
last = first + 1;
for (;;) {
/* Find the end of the run of non-zeros. */
for (; last < 256; last++) {
if (nodes[last].count == 0) {
break;
}
}
last--;
/* Check the beginning of the next run of non-zero counts. */
for (next = last + 1; next < 256; next++) {
if (nodes[next].count != 0) {
break;
}
}
/* Quit the run if we have reached the end. */
if (next > 255) {
break;
}
/* Quit the run if there is more than three non-zero counts. */
if ((next - last) > 3) {
break;
}
last = next;
};
/* Output Start and Stop. */
*buffer++ = first;
*buffer++ = last;
/* Output the run of counts. */
for (i = first; i <= last; i++) {
*buffer++ = (unsigned char)nodes[i].count;
}
size += (((last - first) + 1) + 2);
}
/* Output terminator. */
*buffer = 0;
size++;
return (size);
}
/****************************************************************************
*
* NAME
* ConvertToCodes - Convert the Huffman tree into a table of codes.
*
* SYNOPSIS
* ConvertToCodes(Nodes, Codes, Code, Bits, Node)
*
* void ConvertToCodes(TreeNode *, HuffCode *, unsigned short, short,
* short);
*
* FUNCTION
* Since the Huffman tree is built as a decoding tree, there is no simple
* way to get the encoding values for each symbol. This routine
* recursively walks through the tree, adding the child bits to each code
* until it gets to a leaf. When it gets to a leaf, it stores the code
* value.
*
* INPUTS
* Nodes - Pointer to the Huffman tree.
* Codes - Pointer to the table of codes to generate.
* Code - Code being built (initialize with 0).
* Bits - Number of bits the code is comprised of (initialize with 0).
* Node - Number of the current node.
*
* RESULT
* NONE
*
****************************************************************************/
void cdecl ConvertToCodes(TreeNode *nodes, HuffCode *codes,
unsigned short code, short bits, short node)
{
node >>= 3;
if (node <= HUFF_EOS) {
codes[node].code = code;
codes[node].bits = bits;
return;
}
code <<= 1;
bits++;
ConvertToCodes(nodes, codes, code, bits, nodes[node].child0);
ConvertToCodes(nodes, codes, code|1, bits, nodes[node].child1);
}
/****************************************************************************
*
* NAME
* HuffEncode - Huffman encode the data.
*
* SYNOPSIS
* Size = HuffEncode(Data, Buffer, Codes, Length);
*
* long HuffEncode(unsigned char *, unsigned char *, HuffCodes *, long);
*
* FUNCTION
* Encoding of the data is simple. Each byte of data is taken as the index
* of the codes array, the corresponding code is then put in the output.
*
* INPUTS
* Data - Pointer to data to encode.
* Buffer - Pointer to buffer to hold encoded data.
* Codes - Pointer to array of Huffman codes.
* Length - Length of data buffer to encode.
*
* RESULT
* Size - Size of encoded data.
*
****************************************************************************/
long cdecl HuffEncode(unsigned char *data, unsigned char *buffer,
HuffCode *codes, long length)
{
long i;
long size;
long next;
unsigned long mask;
unsigned long symbol;
/* Initialize */
*buffer = 0;
next = 0x80;
i = 0;
do {
if (i < length) {
symbol = ((unsigned char *)data)[i];
} else {
symbol = HUFF_EOS;
}
mask = 1L << (codes[symbol].bits - 1);
while (mask != 0) {
/* Set a bit in the output stream for each bit in the code. */
if (mask & codes[symbol].code) {
*buffer |= next;
}
/* Next bit position. */
next >>= 1;
/* Advance to the next byte in the output stream when the current
* byte is full.
*/
if (next == 0) {
buffer++;
*buffer = 0;
next = 0x80;
size++;
}
/* Next bit in the code. */
mask >>= 1;
}
i++;
} while (symbol != HUFF_EOS);
if (next != 0x80) {
size++;
}
return (size);
}