/* lzpack.c - compress encode/decode file for Tar program (see file tar.c) * Programmer: T.V.Shaporev * Prepared for release 19 Oct 1990 * Called by store() (see file store.c) and extract() (see file extract.c) */ /* Source: * LZARI.C -- A Data Compression Program * 4/7/1989 Haruhiko Okumura * PC-VAN SCIENCE * NIFTY-Serve PAF01022 * CompuServe 74050,1022 */ /* Modified 30.9.90 Tim Shaporev */ #include "sysup.h" extern char v_flag; #include #ifdef UNIX char *malloc(); void free(); #endif #ifdef MSDOS # include #endif #define pare(x) ((x)+(x)) #define half(x) ((x)>>1) #include "sysup.h" #ifdef MSDOS # define __ARGS__(x) x #endif #ifndef __ARGS__ # define __ARGS__(x) () #endif #include "lzpack.h" static int GetBit __ARGS__(( void )); static void StartModel __ARGS__(( void )); static void UpdateModel __ARGS__(( int )); static int BinarySearchSym __ARGS__(( unsigned )); static int BinarySearchPos __ARGS__(( unsigned )); static void StartDecode __ARGS__(( void )); static int DecodeChar __ARGS__(( void )); static int DecodePosition __ARGS__(( void )); /********** Bit I/O **********/ static int (*getbyte) __ARGS__(( void )); static void (*putbyte) __ARGS__(( int )); static short GetBuf, GetMask; #define InitGetBit (GetMask=0) static int GetBit() /* Get one bit (0 or 1) */ { if ((GetMask >>= 1) == 0) { GetBuf = (*getbyte)(); GetMask = 128; } return (GetBuf & GetMask) != 0; } /********** LZSS with multiple binary trees **********/ #define N 4096 /* size of ring buffer */ #define F 60 /* upper limit for match_length */ #define THRESHOLD 2 /* encode string into position and length */ /* if match_length is greater than this */ #define NIL N /* index for root of binary search trees */ #define INC(x) ((x)=((x)+1)&(N-1)) /* ring buffer of size N, with extra F-1 bytes for string comparison */ /* unsigned char text_buf[N+F-1]; */ static short *core = (short *)0; #define position_cum ((unsigned short *)core) #if 0 #define lson (core + N+1) #define rson (core + N+1 + N+1) #define dad (core + N+1 + N+1 + N+257) #define text_buf ((unsigned char *)(core + N+1 + N+1 + N+257 + N+1)) #else #define text_buf ((unsigned char *)core) #endif int lzgetmem() { if (!core) { core=(short*)malloc(/*(N+1 + N+1 + N+257 + N+1)*sizeof(short)+*/ N+F); if (!core) return -1; } return 0; } void lzrelmem() { if (core) free((char *)core); core = (short *)0; } /********** Arithmetic Compression **********/ /* If you are not familiar with arithmetic compression, you should read I. E. Witten, R. M. Neal, and J. G. Cleary, Communications of the ACM, Vol. 30, pp. 520-540 (1987), from which much have been borrowed. */ #define M 15 /* Q1 (= 2 to the M) must be sufficiently large, but not so large as the unsigned long 4 * Q1 * (Q1 - 1) overflows. */ #define Q1 (1L << M) #define Q2 (2 * Q1) #define Q3 (3 * Q1) #define Q4 (4 * Q1) #define MAX_CUM (Q1 - 1) #define N_CHAR (256 - THRESHOLD + F) /* character code = 0, 1, ..., N_CHAR - 1 */ static unsigned long /*int*/ low = 0, high = Q4, value = 0; static short shifts = 0; /* counts for magnifying low and high around Q2 */ static short char_to_sym[N_CHAR], sym_to_char[N_CHAR + 1]; static unsigned short sym_freq[N_CHAR + 1]; /* frequency for symbols */ static unsigned short sym_cum[N_CHAR + 1]; /* cumulative freq for symbols */ #if 0 unsigned short position_cum[N + 1]; /* cumulative freq for positions */ #endif static void StartModel() /* Initialize model */ { register i; register ch, sym; sym_cum[N_CHAR] = 0; for (sym = N_CHAR; sym >= 1; sym--) { ch = sym - 1; char_to_sym[ch] = sym; sym_to_char[sym] = ch; sym_freq[sym] = 1; sym_cum[sym - 1] = sym_cum[sym] + sym_freq[sym]; } sym_freq[0] = 0; /* sentinel (!= sym_freq[1]) */ position_cum[N] = 0; for (i = N; i >= 1; i--) position_cum[i - 1] = position_cum[i] + 10000 / (i + 200); /* empirical distribution function (quite tentative) */ /* Please devise a better mechanism! */ } static void UpdateModel(sym) register int sym; { register i; register c; register ch_i, ch_sym; if (sym_cum[0] >= MAX_CUM) { c = 0; for (i = N_CHAR; i > 0; i--) { sym_cum[i] = c; c += (sym_freq[i] = (sym_freq[i] + 1) >> 1); } sym_cum[0] = c; } for (i = sym; sym_freq[i] == sym_freq[i - 1]; i--) ; if (i < sym) { ch_i = sym_to_char[i]; ch_sym = sym_to_char[sym]; sym_to_char[i] = ch_sym; sym_to_char[sym] = ch_i; char_to_sym[ch_i] = sym; char_to_sym[ch_sym] = i; } sym_freq[i]++; while (--i >= 0) sym_cum[i]++; } static int BinarySearchSym(x) register unsigned int x; /* 1 if x >= sym_cum[1], N_CHAR if sym_cum[N_CHAR] > x, i such that sym_cum[i - 1] > x >= sym_cum[i] otherwise */ { register i; register j; register k; i = 1; j = N_CHAR; while (i x) i=k+1; else j=k; return i; } static int BinarySearchPos(x) register unsigned int x; /* 0 if x >= position_cum[1], N - 1 if position_cum[N] > x, i such that position_cum[i] > x >= position_cum[i + 1] otherwise */ { register i; register j; register k; i = 1; j = N; while (i x) i=k+1; else j=k; return i-1; } static void StartDecode() { register i; for (i=0; i= Q2) { value -= Q2; low -= Q2; high -= Q2; } else if (low >= Q1 && high <= Q3) { value -= Q1; low -= Q1; high -= Q1; } else if (high > Q2) break; low += low; high += high; value = pare(value) + GetBit(); } ch = sym_to_char[sym]; UpdateModel(sym); return ch; } static int DecodePosition() { register unsigned long range; register position; range = high - low; position = BinarySearchPos((unsigned int) (((value - low + 1) * position_cum[0] - 1) / range)); high = low + (range * position_cum[position ]) / position_cum[0]; low += (range * position_cum[position + 1]) / position_cum[0]; for (;;) { if (low >= Q2) { value -= Q2; low -= Q2; high -= Q2; } else if (low >= Q1 && high <= Q3) { value -= Q1; low -= Q1; high -= Q1; } else if (high > Q2) break; low += low; high += high; value = pare(value) + GetBit(); } return position; } long lzdecode(fget, fput, textsize) /* Attemt to read after end of file IS NOT ERROR !!! Horror */ int (*fget) __ARGS__ ((void)); void (*fput) __ARGS__ ((int)); long textsize; { register i; register r; register c; register unsigned long count; getbyte = fget; putbyte = fput; low = 0; high = Q4; value = 0; shifts = 0; InitGetBit; count = 0; StartDecode(); StartModel(); for (i=0; i