/************************************************************************ * Binary file squeeze for packet transmission Steve Ward W1GOH 1/86 * * Version 2.0 2/20/86 * ************************************************************************ ** Copyright 1986 by W1GOH on bahalf of the Amateur Radio community. ** ** Unrestricted permission to use, copy, modify, extend, improve, ** ** and build upon this program is hereby granted. ** ************************************************************************ * * * Converts a binary file to ASCII, excluding "special" characters * * which might cause trouble over ASCII transmission media (viz. * * packet radio). Does some data compression. * * * * USAGE: * * bsq file.ext [outname] * * reads binary file.ext, writes file.bsq (or outname if specified)* * bsq file.bsq [outname] * * reads file.bsq, writes original file name (or outname) * * * * may include: * * -b Bootstrap mode. Does simple bit-stuffing, no data * * compression. Output file suitable for simple BASIC pgm * * -o Old format. Writes a .BSQ file compatible with earlier * * versions of BSQ. * * * * A trivial BASIC program suffices to decode files converted with * * the -b option. Although originally intended for bootstraping * * the BSQ program over the air, it may be used routinely if * * expedient. However, the BASIC pgm is very primitive indeed. * * * * This program is written in vanilla C, and should be easy to port. * * Currently I have it running on VAX/UNIX and IBM PCs; I * * encourage interested parties to port it to CPM and other * * environments. * * * ************************************************************************ * * * The encoding algorithm involves the following techniques: * * (1) Basic bit-stuffing, breaking input stream of 8-bit binary bytes* * into an output stream of 6-bit printable ASCII chars. Each * * of the latter is relocated to printable range by adding 33 dec.* * (2) Data compression, which uses 28 of the remaining 30 printable * * ASCII codes as "meta" characters abbreviating input byte * * sequences. My compression scheme is adapted from the TERSE * * algorithm; it involves 2 identical FSMs (at sender & receiver, * * respectively) which cause meta chars to be defined in identical* * way at both ends on basis of communications history. In * * particular, each output step -- either of a bit-stuffed byte * * or of a meta symbol -- causes a new meta symbol to be defined * * as the PREVIOUS TWO output symbols. The metasymbol to be * * thus defined is selected via a deterministic LRU approximation,* * avoiding conflicts with active subnodes via a reference count. * * (3) A 29th character serves as a REPEAT code. REPEAT followed by * * a 6-bit count (relocated to ASCII per above) causes the last * * INPUT symbol to be rescanned times... thus if the * * last input symbol was a meta symbol representing a 100-byte * * string, the repeat sequence might expand to 6300 bytes. * * (4) Additional compression hacks are envisioned for the remaining * * character. Watch this space. * * * * The bit-stuffing yields an output file 4/3 the size of the input, * * inflated further by (1) a header line and (2) CR/LFs at each output * * line. * * * * BUGS: * * 1. The program has suffered from a period of evolution, and needs * * to be cleaned up somewhat. Volunteers welcome. * * 2. The algorithm is still the subject of sporadic hacking. It * * might change incompatibly, if I hit upon something significantly* * better. Incompatibities are in theory detectable, however, * * by virtue of a version number in .bsq file headers. * ************************************************************************ * Program history: * * 12/24/85 SAW: First version working. * * 1/1/86 SAW: Added data compression (v. 1.0) * * 2/17/86 SAW: Cleaned up, implemented K1OJH's suggestion of output * * to file named in header. Also part number, nparts in header. * * (v. 1.2) * * 2/20/86 SAW: Improved data compression, by avoiding flushing the * * bit-stuff pipeline on meta chars (uses buffer BinBuf for latter)* * Also "suspicious text" messages. (v. 2.0) * ************************************************************************/ #include #define VERSION 2 /* Version number, for file header */ char *ANode(); /* Here for debugging only! */ /************************************************************************ * POTENTIAL MACHINE/COMPILER DEPENDANCIES * ************************************************************************/ typedef unsigned char byte; typedef unsigned short ushort; /************************************************************************ * Parameters & Globals * ************************************************************************/ #define NTS 28 /* Number of definable nonterminals */ #define NNODES NTS /* Max nodes for data compression */ #define ASCBITS 6 /* Bits per ASCII character. */ #define ASCODES (1<Prev) { if (n == oldnn) Error("Node alloc!");/* "Can't happen!" */ oldnn = oldn; p = &Nodes[n]; if (p->Refs == 0) /* Unused node? */ { KillNode(n); /* Yup, recycle it. */ break; } } p->Left = left; p->Right = right; p->Refs = 0; /* Presumably no superiors yet */ p->Prefix = UNUSED; /* No left superiors, yet */ p->Size = NodeSize(left)+NodeSize(right); /* Splice onto proper Prefix thread: */ if (ATOMIC(left)) { p->Thread = Prefixes[DATA(left)]; Prefixes[DATA(left)] = n; } else { p->Thread = Nodes[left].Prefix; Nodes[left].Prefix = n; } Touch(n); /* Bring it to head of LRU chn */ return n; } /* De-allocate a node: */ KillNode(n) { register struct Node *p; register NodeID *nid; p = &Nodes[n]; if (p->Left == UNUSED) return; /* Unallocated node. */ /* Splice out of prefix list. */ if (ATOMIC(p->Left)) nid = &Prefixes[DATA(p->Left)]; else nid = &(Nodes[p->Left].Prefix); for (; *nid != UNUSED; nid = &(Nodes[*nid].Thread)) { if (*nid == n) { *nid = p->Thread; break; } } /* Deref inferior nodes. */ if (!ATOMIC(p->Left)) Nodes[p->Left].Refs--; if (!ATOMIC(p->Right)) Nodes[p->Right].Refs--; p->Left = p->Right = UNUSED; /* Now nothings used. */ p->Refs = 0; } /* "Touch" a nonterminal... ie, make it recent. */ Touch(n) { int prev, next; register struct Node *p, *l; if (ATOMIC(n)) return; /* Shouldnt happen, but ... */ p = &Nodes[n]; /* To become head of chain. */ Nodes[p->Prev].Next = p->Next; /* Splice out node n. */ Nodes[p->Next].Prev = p->Prev; l = &Nodes[LRUHead]; /* Current head of chain. */ p->Prev = l->Prev; /* Splice in n as new head. */ p->Next = LRUHead; Nodes[p->Prev].Next = n; l->Prev = n; LRUHead = n; } /* Drive the database state: call whenever a node is output. */ NodeOut(n) NodeID n; { if (Previous != UNUSED) /* Define a new node. */ { NodeID new; new = NewNode(Previous, n); } Previous = n; } /* Lookahead Input routines: */ byte ibuf[1024]; /* Lookahead buffer */ short ibufbase = 0, /* Base index of buffer */ ibufn = 0; /* Bytes in buffer. */ #define INEOF ((!ibufn)&&(feof(in))) /* Detect EOF on input. */ Peek(n) /* Returns -1 iff can't. */ { int nr; register ch; while (ibufn <= n) { if (n >= 1024) return -1; if (feof(in)) return -1; /* Can't read any more. */ nr = (ibufbase+ibufn) & 1023; while (!feof(in) && (ibufn < 1024)) { if ((ch = getc(in)) == EOF) break; ibuf[nr++] = ch; nr &= 1023; ++ibufn; } } return 0xFF & ibuf[(n+ibufbase) & 1023]; } /* Remove next n input chars from buffer: */ Remove(n) { ibufbase = (ibufbase+n) & 1023; ibufn -= n; } NodeID FindBest; int FindSize; /* Called with n=node, offset = next input symbol locn on match. * Updates FindBest, FindSize to best (longest) match. */ MatchHit(n, offset) { NodeID thr; register struct Node *p; p = &Nodes[n]; if (ATOMIC(n)) { if (!FindSize) { FindSize = 1; FindBest = n; } } else { if (FindSize < p->Size) { FindSize = p->Size; FindBest = n; } thr = p->Prefix; FindRight(thr, offset); } } /* Follow a thread. Find all matches of right inferior, calling MatchHit. * Presumably, left nodes all match just fine. */ FindRight(thr, offset) { register i; while (thr != UNUSED) { if (i = AbMatch(offset, Nodes[thr].Right)) MatchHit(thr, offset+i); thr = Nodes[thr].Thread; } } /* Find longest applicable abbreviation. * Leaves Best node in FindBest, its size in FindSize; * Returns size of best abbreviation, or 0 iff none. */ NodeID AbBest; /* AbMatch return values: NodeID, */ int AbSize; /* Size of node. */ Abbrev() { register i, j; if (((j = Peek(0)) == -1) || /* EOF. */ ((i = Prefixes[j]) == UNUSED)) /* Nothing there. */ return 0; /* Return failure. */ FindBest = UNUSED; FindSize = 0; FindRight(i, 1); /* Search for match. */ return FindSize; } /* Perform the recursive match. * Returns results in AbBest, AbSize. * Returns size, or 0 iff none. * offset is position in input stream, n is node ID. */ AbMatch(offset, n) NodeID n; { register j; if (n == UNUSED) return 0; if (ATOMIC(n)) { if (DATA(n) == Peek(offset)) { AbBest = n; return AbSize = 1; } return 0; } if (j = AbMatch(offset, Nodes[n].Left)) { register k; if (k = AbMatch(offset+j, Nodes[n].Right)) { AbBest = n; return (AbSize = j+k); } } return 0; } /* Check for repeats. * Returns size of repeat string (in bytes), or 0 iff none. * Leaves repeat count in FindSize. * Guarantees repeat count < 64. */ Repeat() { register size, i; if (Previous == UNUSED) return; if (ATOMIC(Previous)) /* String of atoms? */ { for (i=DATA(Previous), size=0; Peek(size) == i; size++) if (size >= 63) break; return FindSize=size; } FindSize = 0; for (size=0; i = AbMatch(size, Previous);) { size += i; if (++FindSize >= 63) break; } return size; } /* Return pointer to directory-less file name: */ char *BaseName(cc) register char *cc; { register char *dd; dd = cc; for (;*cc;++cc) if ((*cc == '/') || (*cc == '\\')) dd = cc+1; return dd; } char *extension(name) char *name; { while (*name && (*name != '.')) name++; return name; } main(argc, argv) char **argv; { char *arg, argn, tobsq = 0; while ((argc>1) && (*(arg=argv[1]) == '-')) /* OPTIONS */ switch (*++arg) { case 'o': OldFlag = 1; argv++; argc--; Partials = 0; continue; case 'b': BFlag = 1; argv++; argc--; continue; default: Error("Illegal option '%s'", argv[1]); } if ((argc < 2) || (argc > 3)) Usage(); from = argv[1]; if (!strcmp(extension(from), ".bsq")) { if (argc > 2) strcpy(to, argv[2]); tobsq = 0; } else { if (argc > 2) strcpy(to, argv[2]); else { strcpy(to, BaseName(from)); strcpy(extension(to), ".bsq"); } tobsq = 1; } NodeInit(); /* Open input and output files. * * NB: Files must be read/written in BINARY. If your C * * environment (eg Lattice MSDOS) requires some funny code to * * open files for binary I/O, do it here! */ if (!(in = fopen(from, "r"))) Error("Can't read file %s", from); if (!tobsq) RdHeader(); if (!(out = fopen(to, "w"))) Error("Can't write file %s", to); Msg("W1GOH Binary SQueeze 2.0: %s -> %s\r\n", from, to); if (tobsq) Squeeze(); else UnSqueeze(); return 0; } Usage() { Error("Usage: bsq infile [outfile]\r\n"); } /************************************************************************ * * * Actual conversion happens here. * * * * Algorithm: * * bytes 041 - 0xx are 6-bit data, bit-stuffed into output byte stream* * Each 8-bit output byte shifted into LastCh output fifo, whence * * LastCh[0] is most recent byte, LastCh[1] next most recent, etc * * Remaining 30 codes are META bytes, and cause output from defn * * (besides flushing any accumulated partial byte if !Partials) * ************************************************************************/ /* File conversion: BINARY -> ASCII */ Squeeze() { register ch, i; /* First, make space for a header: */ WrHeader(); crlf(); NBytes = Length = 0; AscInit(); for (;;) { if (!BFlag && (BinN < (BBSIZE-3))) { int rsize=0, rcnt, asize=0; if ((ch = Repeat()) && (FindSize > 1) && (ch > 4)) { rsize = ch; rcnt = FindSize; } asize = Abbrev(); if ((rsize-1) > asize) /* Use the repeat?? */ { if (!Partials) FlushStuff(); /* Flush stuff state. */ OutBin(METARPT-ASCBAS); /* Use a meta char. */ OutBin(rcnt); /* Repeat count. */ for (i=0; i VERSION) Error("This is BSQ version %d, file used newer version %d!", VERSION, iversion); for (i=0; fname[i]; i++) if (fname[i] < ' ') fname[i] = 0; if (!*to) /* Use this as output file? */ strcpy(to, BaseName(fname)); /* Version-specific features: */ switch (iversion) { case 1: Partials = 0; CheckColumns = 0; break; } } /* Write a header to output file: */ WrHeader() { if (OldFlag) /* Old version header? */ fprintf(out, "=== BSQ %d FILE %6ld %6u %s", 1, NBytes, 0x7FFF & checksum, BaseName(from)); else fprintf(out, "=== BSQ ver %3d %6ld bytes %6u (%2u %2u) %s", VERSION, NBytes, 0x7FFF & checksum, ipartn, inparts, BaseName(from)); } #define LINSIZ 200 /* Maximum line size. */ byte InBuf[LINSIZ]; /* Input line buffer */ int InPtr=0, InSize=0; /* Number of chars in InBuf */ int Suspect=0, LineNo = 1; /* Number of suspicious line. */ GetC() /* Fetch an input character... */ { while (InPtr == InSize) /* At end of buffer? */ if (!GetLine()) return EOF; /* On end-of-file. */ return InBuf[InPtr++]; } GetLine() /* Fetch an input line. */ { register i, ch; /* Returns zero iff EOF. */ InSize = InPtr = 0; while (InSize == 0) { for (; InSize ' ') { InBuf[InSize++] = ch; continue; } if (!InSize) continue; goto GotLine; } } GotLine: if (Suspect) Msg("Suspicious text in file %s, line %d\r\n", from, Suspect); if (CheckColumns && (InSize != CheckColumns)) Suspect = LineNo; else Suspect = 0; return InSize; } UnSqueeze() { register ch; BinInit(); for (;;) { ch = GetC(); if (ch == EOF) break; if (ch > ' ') BinOut(ch); } BinFin(); fclose(in); fclose(out); if (Length != NBytes) Error("File size: %s is %ld, %s was %ld", to, NBytes, fname, Length); if ((icheck & 0x7FFF) != (checksum & 0x7FFF)) Error("Checksum error... %x %x\r\n", icheck, checksum); } /************************************************************************ * * * ASCII -> BINARY conversion routines. * * AscInit() - prepare for output. * * StuffOut(byte) - Bit-stuff a byte of 8 bits. * * AscFin() - flush buffers * * * ************************************************************************/ unsigned outbuf; /* Partial output bytes. */ unsigned bits; /* number of bits in outbuf */ unsigned outcol; /* output chars on current text line */ unsigned runch, run; /* For run-length encoding. */ AscInit() { checksum = outcol = outbuf = bits = 0; runch = run = 0; } /* Bit-stuff an 8-bit byte: */ StuffOut(b) unsigned b; { register ch; outbuf <<= 8; /* Stuff in the new byte. */ outbuf |= (b & ((1 << 8)-1)); bits += 8; /* 8 more bits. */ /* Now add to output, ASCBITS bits/output char: */ while (bits >= ASCBITS) { ch = (outbuf >> (bits-ASCBITS)) & 0x3F; /* 6 Bits. */ bits -= ASCBITS; OutAscii(ch); if (BinN) { register i; for (i=0; i 0) /* Output any remaining bits. */ OutAscii(0x3F & (outbuf<<(ASCBITS-bits))); outbuf = bits = 0; if (BinN) { register i; for (i=0; i= COLUMNS) crlf(); } AscFin() { FlushStuff(); if (outcol > 0) crlf(); } /* Output a CR/LF: */ crlf() { putc('\r', out); putc('\n', out); outcol = 0; } /************************************************************************ * * * BINARY -> ASCII conversion routines. * * BinInit() - prepare for output. * * BinOut(byte) - output a byte of s bits. * * BinFin() - flush buffers & close. * * * ************************************************************************/ BinInit() { checksum = outcol = outbuf = bits = 0; } /* Convert another ASCII input character to binary... */ BinOut(b) unsigned b; { register i, ch; b &= 0177; if (b <= ' ') return; /* Ignore control chars. */ ch = (b - 041) & ((1 << ASCBITS) -1); /* Extract ASCBITS */ if (b >= METABAS) { if (!Partials) bits = 0; /* Flush bit-stuff pipe. */ if (b == METARPT) /* Repeat character? */ { if ((ch=GetC()) == EOF) return; ch = 0x3F & (ch - 041);/* Repeat count. */ while (ch--) BinExp(Previous); return; } BinExp(b-METABAS); /* Expand the output. */ NodeOut(b-METABAS); /* Run the state machine. */ } else { BinOut1(ch); /* Bit-stuff input. */ } } BinOut1(b) unsigned b; { register ch; outbuf <<= ASCBITS; /* Stuff in the new byte. */ outbuf |= b; bits += ASCBITS; /* Now add to output, 8 bits/output char: */ while (bits >= 8) { ch = ((outbuf >> (bits-8)) & 0xFF); NodeOut(MKATOM(ch)); /* Recognise it as output */ bits -= 8; putc(ch, out); ++NBytes; checksum += ch; } } /* Expand & output binary bytes for a node. */ BinExp(n) NodeID n; { register j; if (n == UNUSED) return 0; if (ATOMIC(n)) { j = DATA(n); putc(j, out); ++NBytes; checksum += j; } else { BinExp(Nodes[n].Left); BinExp(Nodes[n].Right); } } BinFin() { if ((bits > 0) && (NBytes != Length)) /* Output any remaining bits.*/ { outbuf &= ((1 << bits)-1); putc(outbuf, out); checksum += outbuf; NBytes++; } } /************************************************************************ * Misc. utilitiy functions * ************************************************************************/ Error(a0,a1,a2,a3,a4,a5) /* Fatal error handler -- */ { fprintf(stderr, /* Called like printf. */ a0,a1,a2,a3,a4,a5); fprintf(stderr, "\r\n"); exit(-1); } Msg(a0,a1,a2,a3,a4,a5) /* Message, for debugging. */ { fprintf(stderr, /* Called like printf. */ a0,a1,a2,a3,a4,a5); fprintf(stderr, "\r\n"); }