/************************** Start of ARITH1.C ************************* * * This is the order-1 arithmetic coding module used in the first * part of chapter 6. Compile with BITIO.C, ERRHAND.C, and either * MAIN-C.C or MAIN-E.C. Under MS-DOS, you will need to use the * compact or large models, as this routine allocates a lot of memory. */ #include #include #include #include "errhand.h" #include "bitio.h" /* * The SYMBOL structure is what is used to define a symbol in * arithmetic coding terms. A symbol is defined as a range between * 0 and 1. Since we are using integer math, instead of using 0 and 1 * as our end points, we have an integer scale. The low_count and * high_count define where the symbol falls in the range. */ typedef struct { unsigned short int low_count; unsigned short int high_count; unsigned short int scale; } SYMBOL; #define MAXIMUM_SCALE 16383 /* Maximum allowed frequency count */ #define END_OF_STREAM 256 /* The EOF symbol */ extern long underflow_bits; /* The present underflow count in */ /* the arithmetic coder. */ int *totals[ 257 ]; /* Pointers to the 257 context tables */ /* * Local Function prototypes. */ #ifdef __STDC__ void initialize_arithmetic_decoder( BIT_FILE *stream ); void remove_symbol_from_stream( BIT_FILE *stream, SYMBOL *s ); void initialize_arithmetic_encoder( void ); void encode_symbol( BIT_FILE *stream, SYMBOL *s ); void flush_arithmetic_encoder( BIT_FILE *stream ); short int get_current_count( SYMBOL *s ); void initialize_model( void ); void update_model( int symbol, int context ); void convert_int_to_symbol( int symbol, int context, SYMBOL *s ); void get_symbol_scale( int context, SYMBOL *s ); int convert_symbol_to_int( int count, int context, SYMBOL *s ); #else void initialize_arithmetic_decoder(); void remove_symbol_from_stream(); void initialize_arithmetic_encoder(); void encode_symbol(); void flush_arithmetic_encoder(); short int get_current_count(); void initialize_model(); void update_model(); void convert_int_to_symbol(); void get_symbol_scale(); int convert_symbol_to_int(); #endif char *CompressionName = "Adaptive order 1 model with arithmetic coding"; char *Usage = "in-file out-file\n\n"; /* * This is the compression main loop. It is called to compress the * already opened file. */ void CompressFile( input, output, argc, argv ) FILE *input; BIT_FILE *output; int argc; char *argv[]; { SYMBOL s; int c; int context; context = 0; initialize_model(); initialize_arithmetic_encoder(); for ( ; ; ) { c = getc( input ); if ( c == EOF ) c = END_OF_STREAM; convert_int_to_symbol( c, context, &s ); encode_symbol( output, &s ); if ( c == END_OF_STREAM ) break; update_model( c, context ); context = c; } flush_arithmetic_encoder( output ); putchar( '\n' ); while ( argc-- > 0 ) printf( "Unknown argument: %s\n", *argv++ ); } /* * This is the expansion routine. It reads in bytes from the compressed * file and writes them out to the text file. */ void ExpandFile( input, output, argc, argv ) BIT_FILE *input; FILE *output; int argc; char *argv[]; { SYMBOL s; int count; int c; int context; context = 0; initialize_model(); initialize_arithmetic_decoder( input ); for ( ; ; ) { get_symbol_scale( context, &s ); count = get_current_count( &s ); c = convert_symbol_to_int( count, context, &s ); remove_symbol_from_stream( input, &s ); if ( c == END_OF_STREAM ) break; putc( (char) c, output ); update_model( c, context ); context = c; } putchar( '\n' ); while ( argc-- > 0 ) printf( "Unknown argument: %s\n", *argv++ ); } /* * The next five routines implement the modeling functions for the order-1 * adpative model. The first routine initializes the model, which consists * of allocating 256 context tables. Note that even though there are * 257 symbols in the alphabet, after taking into account the END_OF_STREAM * symbol, we only need 256 tables. END_OF_STREAM doesn't get a table of * its own, since it will never be used as a context. */ void initialize_model() { int context; int i; for ( context = 0 ; context < END_OF_STREAM ; context++ ) { totals[ context ] = (int *) calloc( END_OF_STREAM + 2, sizeof(int) ); if ( totals[ context ] == NULL ) fatal_error( "Failure allocating context %d", context ); for ( i = 0 ; i <= ( END_OF_STREAM + 1 ) ; i++ ) totals[ context ][ i ] = i; } } /* * This routine is called to increment the counts for the current * context. It is called after a character has been encoded or * decoded. It has to increment all the counts, then check to see * if we have hit the maximum allowable count. If we have, the * counts are rescaled, making sure that none of them have fallen to * zero. */ void update_model( symbol, context ) int symbol; int context; { int i; for ( i = symbol + 1 ; i <= ( END_OF_STREAM + 1 ) ; i++ ) totals[ context ][ i ]++; if ( totals[ context ][ END_OF_STREAM + 1 ] < MAXIMUM_SCALE ) return; for ( i = 1 ; i <= ( END_OF_STREAM + 1 ) ; i++ ) { totals[ context ][ i ] /= 2; if ( totals[ context ][ i ] <= totals[ context ][ i - 1 ] ) totals[ context ][ i ] = totals[ context ][ i - 1 ] + 1; } } /* * Convert an integer to a symbol works just like the old arithmetic * coding routine in ARITH.C. The only difference is that we have to * pass a context number so the routine knows which context table to * get the counts and scale from. */ void convert_int_to_symbol( c, context, s ) int c; int context; SYMBOL *s; { s->scale = totals[ context ][ END_OF_STREAM + 1 ]; s->low_count = totals[ context ][ c ]; s->high_count = totals[ context ][ c + 1 ]; } /* * Getting the current scale for the symbol just means pulling the number * out of the chosen context table. */ void get_symbol_scale( context, s ) int context; SYMBOL *s; { s->scale = totals[ context][ END_OF_STREAM + 1 ]; } /* * Converting a symbol to an int means looking through the current * context for a symbol that spans the range that the SYMBOL parameter * has. */ int convert_symbol_to_int( count, context, s) int count; int context; SYMBOL *s; { int c; for ( c = 0; count >= totals[ context ][ c + 1 ] ; c++ ) ; s->high_count = totals[ context ][ c + 1 ]; s->low_count = totals[ context ][ c ]; return( c ); } /* * The remaining routines are the arithmetic coding routines. They are * identical to those seen earlier in ARITH.C. */ /* * These four variables define the current state of the arithmetic * coder/decoder. They are assumed to be 16 bits long. Note that * by declaring them as short ints, they will actually be 16 bits * on most 80X86 and 680X0 machines, as well as VAXen. */ static unsigned short int code; /* The present input code value */ static unsigned short int low; /* Start of the current code range */ static unsigned short int high; /* End of the current code range */ long underflow_bits; /* Number of underflow bits pending */ /* * This routine must be called to initialize the encoding process. * The high register is initialized to all 1s, and it is assumed that * it has an infinite string of 1s to be shifted into the lower bit * positions when needed. */ void initialize_arithmetic_encoder() { low = 0; high = 0xffff; underflow_bits = 0; } /* * At the end of the encoding process, there are still significant * bits left in the high and low registers. We output two bits, * plus as many underflow bits as are necessary. */ void flush_arithmetic_encoder( stream ) BIT_FILE *stream; { OutputBit( stream, low & 0x4000 ); underflow_bits++; while ( underflow_bits-- > 0 ) OutputBit( stream, ~low & 0x4000 ); OutputBits( stream, 0L, 16 ); } /* * This routine is called to encode a symbol. The symbol is passed * in the SYMBOL structure as a low count, a high count, and a range, * instead of the more conventional probability ranges. The encoding * process takes two steps. First, the values of high and low are * updated to take into account the range restriction created by the * new symbol. Then, as many bits as possible are shifted out to * the output stream. Finally, high and low are stable again and * the routine returns. */ void encode_symbol( stream, s ) BIT_FILE *stream; SYMBOL *s; { long range; /* * These three lines rescale high and low for the new symbol. */ range = (long) ( high-low ) + 1; high = low + (unsigned short int) (( range * s->high_count ) / s->scale - 1 ); low = low + (unsigned short int) (( range * s->low_count ) / s->scale ); /* * This loop turns out new bits until high and low are far enough * apart to have stabilized. */ for ( ; ; ) { /* * If this test passes, it means that the MSDigits match, and can * be sent to the output stream. */ if ( ( high & 0x8000 ) == ( low & 0x8000 ) ) { OutputBit( stream, high & 0x8000 ); while ( underflow_bits > 0 ) { OutputBit( stream, ~high & 0x8000 ); underflow_bits--; } } /* * If this test passes, the numbers are in danger of underflow, because * the MSDigits don't match, and the 2nd digits are just one apart. */ else if ( ( low & 0x4000 ) && !( high & 0x4000 )) { underflow_bits += 1; low &= 0x3fff; high |= 0x4000; } else return ; low <<= 1; high <<= 1; high |= 1; } } /* * When decoding, this routine is called to figure out which symbol * is presently waiting to be decoded. This routine expects to get * the current model scale in the s->scale parameter, and it returns * a count that corresponds to the present floating point code: * * code = count / s->scale */ short int get_current_count( s ) SYMBOL *s; { long range; short int count; range = (long) ( high - low ) + 1; count = (short int) ((((long) ( code - low ) + 1 ) * s->scale-1 ) / range ); return( count ); } /* * This routine is called to initialize the state of the arithmetic * decoder. This involves initializing the high and low registers * to their conventional starting values, plus reading the first * 16 bits from the input stream into the code value. */ void initialize_arithmetic_decoder( stream ) BIT_FILE *stream; { int i; code = 0; for ( i = 0 ; i < 16 ; i++ ) { code <<= 1; code += InputBit( stream ); } low = 0; high = 0xffff; } /* * Just figuring out what the present symbol is doesn't remove * it from the input bit stream. After the character has been * decoded, this routine has to be called to remove it from the * input stream. */ void remove_symbol_from_stream( stream, s ) BIT_FILE *stream; SYMBOL *s; { long range; /* * First, the range is expanded to account for the symbol removal. */ range = (long)( high - low ) + 1; high = low + (unsigned short int) (( range * s->high_count ) / s->scale - 1 ); low = low + (unsigned short int) (( range * s->low_count ) / s->scale ); /* * Next, any possible bits are shipped out. */ for ( ; ; ) { /* * If the MSDigits match, the bits will be shifted out. */ if ( ( high & 0x8000 ) == ( low & 0x8000 ) ) { } /* * Else, if underflow is threatening, shift out the 2nd MSDigit. */ else if ((low & 0x4000) == 0x4000 && (high & 0x4000) == 0 ) { code ^= 0x4000; low &= 0x3fff; high |= 0x4000; } else /* * Otherwise, nothing can be shifted out, so I return. */ return; low <<= 1; high <<= 1; high |= 1; code <<= 1; code += InputBit( stream ); } } /************************** End of ARITH1.C ****************************/