/************************** Start of LZW12.C ************************* * * This is 12 bit LZW program, which is discussed in the first part * of the chapter. It uses a fixed size code, and does not attempt * to flush the dictionary after it fills up. */ #include #include #include #include "errhand.h" #include "bitio.h" /* * Constants used throughout the program. BITS defines how many bits * will be in a code. TABLE_SIZE defines the size of the dictionary * table. */ #define BITS 12 #define MAX_CODE ( ( 1 << BITS ) - 1 ) #define TABLE_SIZE 5021 #define END_OF_STREAM 256 #define FIRST_CODE 257 #define UNUSED -1 /* * Local prototypes. */ #ifdef __STDC__ unsigned int find_child_node( int parent_code, int child_character ); unsigned int decode_string( unsigned int offset, unsigned int code ); #else unsigned int find_child_node(); unsigned int decode_string(); #endif char *CompressionName = "LZW 12 Bit Encoder"; char *Usage = "in-file out-file\n\n"; /* * This data structure defines the dictionary. Each entry in the dictionary * has a code value. This is the code emitted by the compressor. Each * code is actually made up of two pieces: a parent_code, and a * character. Code values of less than 256 are actually plain * text codes. */ struct dictionary { int code_value; int parent_code; char character; } dict[ TABLE_SIZE ]; char decode_stack[ TABLE_SIZE ]; /* * The compressor is short and simple. It reads in new symbols one * at a time from the input file. It then checks to see if the * combination of the current symbol and the current code are already * defined in the dictionary. If they are not, they are added to the * dictionary, and we start over with a new one symbol code. If they * are, the code for the combination of the code and character becomes * our new code. */ void CompressFile( input, output, argc, argv ) FILE *input; BIT_FILE *output; int argc; char *argv[]; { int next_code; int character; int string_code; unsigned int index; unsigned int i; next_code = FIRST_CODE; for ( i = 0 ; i < TABLE_SIZE ; i++ ) dict[ i ].code_value = UNUSED; if ( ( string_code = getc( input ) ) == EOF ) string_code = END_OF_STREAM; while ( ( character = getc( input ) ) != EOF ) { index = find_child_node( string_code, character ); if ( dict[ index ].code_value != -1 ) string_code = dict[ index ].code_value; else { if ( next_code <= MAX_CODE ) { dict[ index ].code_value = next_code++; dict[ index ].parent_code = string_code; dict[ index ].character = (char) character; } OutputBits( output, (unsigned long) string_code, BITS ); string_code = character; } } OutputBits( output, (unsigned long) string_code, BITS ); OutputBits( output, (unsigned long) END_OF_STREAM, BITS ); while ( argc-- > 0 ) printf( "Unknown argument: %s\n", *argv++ ); } /* * The file expander operates much like the encoder. It has to * read in codes, the convert the codes to a string of characters. * The only catch in the whole operation occurs when the encoder * encounters a CHAR+STRING+CHAR+STRING+CHAR sequence. When this * occurs, the encoder outputs a code that is not presently defined * in the table. This is handled as an exception. */ void ExpandFile( input, output, argc, argv ) BIT_FILE *input; FILE *output; int argc; char *argv[]; { unsigned int next_code; unsigned int new_code; unsigned int old_code; int character; unsigned int count; next_code = FIRST_CODE; old_code = (unsigned int) InputBits( input, BITS ); if ( old_code == END_OF_STREAM ) return; character = old_code; putc( old_code, output ); while ( ( new_code = (unsigned int) InputBits( input, BITS ) ) != END_OF_STREAM ) { /* ** This code checks for the CHARACTER+STRING+CHARACTER+STRING+CHARACTER ** case which generates an undefined code. It handles it by decoding ** the last code, and adding a single character to the end of the decode string. */ if ( new_code >= next_code ) { decode_stack[ 0 ] = (char) character; count = decode_string( 1, old_code ); } else count = decode_string( 0, new_code ); character = decode_stack[ count - 1 ]; while ( count > 0 ) putc( decode_stack[ --count ], output ); if ( next_code <= MAX_CODE ) { dict[ next_code ].parent_code = old_code; dict[ next_code ].character = (char) character; next_code++; } old_code = new_code; } while ( argc-- > 0 ) printf( "Unknown argument: %s\n", *argv++ ); } /* * This hashing routine is responsible for finding the table location * for a string/character combination. The table index is created * by using an exclusive OR combination of the prefix and character. * This code also has to check for collisions, and handles them by * jumping around in the table. */ unsigned int find_child_node( parent_code, child_character ) int parent_code; int child_character; { int index; int offset; index = ( child_character << ( BITS - 8 ) ) ^ parent_code; if ( index == 0 ) offset = 1; else offset = TABLE_SIZE - index; for ( ; ; ) { if ( dict[ index ].code_value == UNUSED ) return( index ); if ( dict[ index ].parent_code == parent_code && dict[ index ].character == (char) child_character ) return( index ); index -= offset; if ( index < 0 ) index += TABLE_SIZE; } } /* * This routine decodes a string from the dictionary, and stores it * in the decode_stack data structure. It returns a count to the * calling program of how many characters were placed in the stack. */ unsigned int decode_string( count, code ) unsigned int count; unsigned int code; { while ( code > 255 ) { decode_stack[ count++ ] = dict[ code ].character; code = dict[ code ].parent_code; } decode_stack[ count++ ] = (char) code; return( count ); } /************************** End of LZW12.C *************************/