/************************** Start of ARITH1E.C ************************* * * This is the order-1 arithmetic coding module used in the second * part of chapter 6. This module implements an order 1 arithmetic * compression program that uses escape codes to encode previously * unseen symbols. * * Compile with BITIO.C, ERRHAND.C, and either MAIN-C.C or MAIN-E.C */ #include #include #include #include "errhand.h" #include "bitio.h" /* * The SYMBOL structure is what is used to define a symbol in * arithmetic coding terms. A symbol is defined as a range between * 0 and 1. Since we are using integer math, instead of using 0 and 1 * as our end points, we have an integer scale. The low_count and * high_count define where the symbol falls in the range. */ typedef struct { unsigned short int low_count; unsigned short int high_count; unsigned short int scale; } SYMBOL; #define MAXIMUM_SCALE 16383 /* Maximum allowed frequency count */ #define END_OF_STREAM 257 /* The EOF symbol */ #define ESCAPE 256 /* The ESCAPE symbol */ int *totals[ END_OF_STREAM ]; /* * Local function prototypes. */ #ifdef __STDC__ void initialize_arithmetic_decoder( BIT_FILE *stream ); void remove_symbol_from_stream( BIT_FILE *stream, SYMBOL *s ); void initialize_arithmetic_encoder( void ); void encode_symbol( BIT_FILE *stream, SYMBOL *s ); void flush_arithmetic_encoder( BIT_FILE *stream ); short int get_current_count( SYMBOL *s ); void initialize_model( void ); void update_model( int symbol, int context ); int convert_int_to_symbol( int symbol, int context, SYMBOL *s ); void get_symbol_scale( int context, SYMBOL *s ); int convert_symbol_to_int( int count, int context, SYMBOL *s ); #else void initialize_arithmetic_decoder(); void remove_symbol_from_stream(); void initialize_arithmetic_encoder(); void encode_symbol(); void flush_arithmetic_encoder(); short int get_current_count(); void initialize_model(); void update_model(); int convert_int_to_symbol(); void get_symbol_scale(); int convert_symbol_to_int(); #endif char *CompressionName = "Adaptive order 1 model with arithmetic coding"; char *Usage = "in-file out-file\n\n"; /* * The compression loop for this program is a little more complicated than * its counterpart in ARITH1.C. In addition to handling different contexts, * this program has to handle the possiblity of an escape code, which * means recoding the symbol using a different context table. In this * case, that is the ESCAPE table, which has one count per symbol and is * never updated. */ void CompressFile( input, output, argc, argv ) FILE *input; BIT_FILE *output; int argc; char *argv[]; { SYMBOL s; int c; int context; int escaped; context = 0; initialize_model(); initialize_arithmetic_encoder(); for ( ; ; ) { c = getc( input ); if ( c == EOF ) c = END_OF_STREAM; escaped = convert_int_to_symbol( c, context, &s ); encode_symbol( output, &s ); if ( escaped ) { convert_int_to_symbol( c, ESCAPE, &s ); encode_symbol( output, &s ); } if ( c == END_OF_STREAM ) break; update_model( c, context ); context = c; } flush_arithmetic_encoder( output ); putchar( '\n' ); while ( argc-- > 0 ) printf( "Unused argument: %s\n", *argv++ ); } /* * Just like the compression loop, the expansion routine has to handle * an incoming escape character, and switch contexts if one is read. * Here that logic has been implemented as a loop so as to avoid * writing the same code twice. * */ void ExpandFile( input, output, argc, argv ) BIT_FILE *input; FILE *output; int argc; char *argv[]; { SYMBOL s; int count; int c; int context; int last_context; context = 0; initialize_model(); initialize_arithmetic_decoder( input ); for ( ; ; ) { last_context = context; do { get_symbol_scale( context, &s ); count = get_current_count( &s ); c = convert_symbol_to_int( count, context, &s ); remove_symbol_from_stream( input, &s ); context = c; } while ( c == ESCAPE ); if ( c == END_OF_STREAM ) break; putc( (char) c, output ); update_model( c, last_context ); context = c; } putchar( '\n' ); while ( argc-- > 0 ) printf( "Unused argument: %s\n", *argv++ ); } /* * Since we are using ESCAPE codes in this program, we can initialize * all of the contexts to contain nothing more than a single count for * the escape code. However, the default context table, the ESCAPE * table, needs to have a single count for every symbol, since it is * our context table of final resort. The remaining tables are assumed * to be initialized to zero when they are created using calloc(). The * ESCAPE code has its count set to 1 by the call to update_model(). */ void initialize_model() { int context; int i; for ( context = 0 ; context < END_OF_STREAM ; context++ ) { totals[ context ] = (int *) calloc( END_OF_STREAM + 2, sizeof(int) ); if ( totals[ context ] == NULL ) fatal_error( "Failure allocating context %d", context ); update_model( ESCAPE, context ); } for ( i = 0 ; i <= ( END_OF_STREAM + 1 ) ; i++ ) totals[ ESCAPE ][ i ] = i; } /* * This routine is called to increment the counts for the current * contexts. It is called after a character has been encoded or decoded. * Just like in the routines from other programs, this code has to check * to see if the context table has passed the MAXIMUM_SCALE value. If * it has, the table needs to be rescaled. The difference here is that * we don't care if any of the symbols have their counts reduced to 0, * since we can always issue an escape code. The only symbol that is * required to be non-zero is the ESCAPE code, and that is handled * by the two lines of code at the end of the routine. */ void update_model( symbol, context ) int symbol; int context; { int i; for ( i = symbol + 1 ; i <= ( END_OF_STREAM + 1 ) ; i++ ) totals[ context ][ i ]++; if ( totals[ context ][ END_OF_STREAM + 1 ] < MAXIMUM_SCALE ) return; for ( i = 1 ; i <= ( END_OF_STREAM + 1 ) ; i++ ) totals[ context ][ i ] /= 2; totals[ context ][ END_OF_STREAM ] = totals[ context ][ ESCAPE ] + 1; totals[ context ][ END_OF_STREAM + 1 ] = totals[ context ][ END_OF_STREAM ] + 1; } /* * This routine operates much like the convert_int_to_symbol from ARITH1.C * and earlier programs. The difference here is that the symbol is checked * to see if it has a count of 0. If it does, instead of returning * the counts for this symbol, we return the counts for the ESCAPE symbol * in the current context. The return value from the function is either 0 * in case the symbol was valid, or a 1 if the symbol had to be escaped. */ int convert_int_to_symbol( c, context, s ) int c; int context; SYMBOL *s; { s->scale = totals[ context ][ END_OF_STREAM + 1 ]; s->low_count = totals[ context ][ c ]; s->high_count = totals[ context ][ c + 1 ]; if ( s->low_count < s->high_count ) return( 0 ); s->low_count = totals[ context ][ ESCAPE ]; s->high_count = totals[ context ][ ESCAPE + 1 ]; return( 1 ); } /* * This routine is unchanged from earlier versions of this program. It * just has to find the cumulative total count for the entire context * table. */ void get_symbol_scale( context, s ) int context; SYMBOL *s; { s->scale = totals[ context][ END_OF_STREAM + 1 ]; } /* * This routine has not had to change from earlier versions of this program * either. It just has to look through the table for the correct symbol, * and return its value. */ int convert_symbol_to_int( count, context, s ) int count; int context; SYMBOL *s; { int c; for ( c = 0; count >= totals[ context ][ c + 1 ] ; c++ ) ; s->high_count = totals[ context ][ c + 1 ]; s->low_count = totals[ context ][ c ]; return( c ); } /* * Everything from here down defines the arithmetic coder section * of the program. This code is unchanged from the arithmetic coding * sections of ARITH1.C and earlier programs. */ /* * These four variables define the current state of the arithmetic * coder/decoder. They are assumed to be 16 bits long. Note that * by declaring them as short ints, they will actually be 16 bits * on most 80X86 and 680X0 machines, as well as VAXen. */ static unsigned short int code; /* The present input code value */ static unsigned short int low; /* Start of the current code range */ static unsigned short int high; /* End of the current code range */ long underflow_bits; /* Number of underflow bits pending */ /* * This routine must be called to initialize the encoding process. * The high register is initialized to all 1s, and it is assumed that * it has an infinite string of 1s to be shifted into the lower bit * positions when needed. */ void initialize_arithmetic_encoder() { low = 0; high = 0xffff; underflow_bits = 0; } /* * At the end of the encoding process, there are still significant * bits left in the high and low registers. We output two bits, * plus as many underflow bits as are necessary. */ void flush_arithmetic_encoder( stream ) BIT_FILE *stream; { OutputBit( stream, low & 0x4000 ); underflow_bits++; while ( underflow_bits-- > 0 ) OutputBit( stream, ~low & 0x4000 ); OutputBits( stream, 0L, 16 ); } /* * This routine is called to encode a symbol. The symbol is passed * in the SYMBOL structure as a low count, a high count, and a range, * instead of the more conventional probability ranges. The encoding * process takes two steps. First, the values of high and low are * updated to take into account the range restriction created by the * new symbol. Then, as many bits as possible are shifted out to * the output stream. Finally, high and low are stable again and * the routine returns. */ void encode_symbol( stream, s ) BIT_FILE *stream; SYMBOL *s; { long range; /* * These three lines rescale high and low for the new symbol. */ range = (long) ( high-low ) + 1; high = low + (unsigned short int) (( range * s->high_count ) / s->scale - 1 ); low = low + (unsigned short int) (( range * s->low_count ) / s->scale ); /* * This loop turns out new bits until high and low are far enough * apart to have stabilized. */ for ( ; ; ) { /* * If this test passes, it means that the MSDigits match, and can * be sent to the output stream. */ if ( ( high & 0x8000 ) == ( low & 0x8000 ) ) { OutputBit( stream, high & 0x8000 ); while ( underflow_bits > 0 ) { OutputBit( stream, ~high & 0x8000 ); underflow_bits--; } } /* * If this test passes, the numbers are in danger of underflow, because * the MSDigits don't match, and the 2nd digits are just one apart. */ else if ( ( low & 0x4000 ) && !( high & 0x4000 )) { underflow_bits += 1; low &= 0x3fff; high |= 0x4000; } else return ; low <<= 1; high <<= 1; high |= 1; } } /* * When decoding, this routine is called to figure out which symbol * is presently waiting to be decoded. This routine expects to get * the current model scale in the s->scale parameter, and it returns * a count that corresponds to the present floating point code: * * code = count / s->scale */ short int get_current_count( s ) SYMBOL *s; { long range; short int count; range = (long) ( high - low ) + 1; count = (short int) ((((long) ( code - low ) + 1 ) * s->scale-1 ) / range ); return( count ); } /* * This routine is called to initialize the state of the arithmetic * decoder. This involves initializing the high and low registers * to their conventional starting values, plus reading the first * 16 bits from the input stream into the code value. */ void initialize_arithmetic_decoder( stream ) BIT_FILE *stream; { int i; code = 0; for ( i = 0 ; i < 16 ; i++ ) { code <<= 1; code += InputBit( stream ); } low = 0; high = 0xffff; } /* * Just figuring out what the present symbol is doesn't remove * it from the input bit stream. After the character has been * decoded, this routine has to be called to remove it from the * input stream. */ void remove_symbol_from_stream( stream, s ) BIT_FILE *stream; SYMBOL *s; { long range; /* * First, the range is expanded to account for the symbol removal. */ range = (long)( high - low ) + 1; high = low + (unsigned short int) (( range * s->high_count ) / s->scale - 1 ); low = low + (unsigned short int) (( range * s->low_count ) / s->scale ); /* * Next, any possible bits are shipped out. */ for ( ; ; ) { /* * If the MSDigits match, the bits will be shifted out. */ if ( ( high & 0x8000 ) == ( low & 0x8000 ) ) { } /* * Else, if underflow is threatening, shift out the 2nd MSDigit. */ else if ((low & 0x4000) == 0x4000 && (high & 0x4000) == 0 ) { code ^= 0x4000; low &= 0x3fff; high |= 0x4000; } else /* * Otherwise, nothing can be shifted out, so I return. */ return; low <<= 1; high <<= 1; high |= 1; code <<= 1; code += InputBit( stream ); } } /************************** End of ARITH1E.C ****************************/