static char rcsid[] = "$Id: regex.c,v 1.1 1992/09/06 19:31:32 mike Exp $";

/* $Log: regex.c,v $
 * Revision 1.1  1992/09/06  19:31:32  mike
 * Initial revision
 *
 */

/*
 * regex - Regular expression pattern matching
 *         and replacement
 *
 * By:  Ozan S. Yigit (oz), Dept. of Computer Science, York University
 * Mods: C Durland
 *
 * These routines are the PUBLIC DOMAIN equivalents of regex routines as
 * found in 4.nBSD UN*X, with minor extensions.
 *
 * These routines are derived from various implementations found in software
 * tools books, and Conroy's grep.  They are NOT derived from
 * licensed/restricted software.  For more interesting/academic/complicated
 * implementations, see Henry Spencer's regexp routines, or GNU Emacs
 * pattern matching module.
 *
 * dfa = deterministic finite automata
 * Routines:
 *  re_comp: compile a regular expression into a DFA.
 *	char *re_comp(s)
 *	char *s;
 *	returns: NULL if OK, else error string
 *	If s is NULL or 0 length, last compiled pattern is used.
 *  re_exec: execute the DFA to match a pattern.
 *	int re_exec(s)
 *	char *s;
 *  re_subs: substitute the matched portions in a new string.
 *	int re_subs(src, dst)
 *	char *src;
 *	char *dst;
 *  re_fail:	failure routine for re_exec.
 *	void re_fail(msg, op)
 *	char *msg;
 *	char op;
 *  
 * Regular Expressions:
 *
 *      [1]     char    matches itself, unless it is a special
 *                      character (metachar): . \ [ ] * + ^ $
 *
 *      [2]     .       matches any character.
 *
 *      [3]     \       matches the character following it, except
 *			when followed by one of: ()123456789<> adnwW
 *			(see [7], [8] and [9])
 *			It is used as an escape character for all other
 *			meta-characters, and itself.  When used in a set
 *			([4]), it is treated as an ordinary character.
 *
 *      [4]     [set]   matches one of the characters in the set.
 *                      If the first character in the set is "^",
 *                      it matches a character NOT in the set. A
 *                      shorthand S-E is used to specify a set of
 *                      characters S upto E, inclusive. The special
 *                      characters "]" and "-" have no special
 *                      meaning if they appear as the first chars
 *                      in the set.
 *                      Example:   Matches:
 *			--------   -------
 *			[a-z]	   Any lowercase alpha.
 *
 *			[^]-]      Any char except ] and -.
 *
 *			[^A-Z]     Any char except uppercase alpha.
 *
 *			[a-zA-Z]   Any alpha.
 *
 *                      [a-b-c] == [a-bb-c] == [a-c]
 *                      [a-a] == [a]
 *			[-abc] == [abc-]  Match -, a, b, c.
 *
 *			[]] == ]   Match "]"
 *			[-]]	   Match only "-]".  This is a set ([-])
 *				   and a character (]).
 *			
 *			[z-a]      Nothing and is an error.
 *
 *      [5]     *       any regular expression form [1] to [4], followed by
 *                      closure char (*) matches zero or more matches of
 *                      that form.
 *
 *      [6]     +       same as [5], except it matches one or more.
 *
 *      [7]             a regular expression in the form [1] to [10], enclosed
 *                      as \(form\) matches what form matches. The enclosure
 *                      creates a set of tags, used for [8] and for
 *                      pattern substution. The tagged forms are numbered
 *			starting from 1.
 *
 *      [8]             a \ followed by a digit 1 to 9 matches whatever a
 *                      previously tagged regular expression ([7]) matched.
 *
 *	[9]	\<	a regular expression starting with a \< construct
 *		\>	and/or ending with a \> construct, restricts the
 *			pattern matching to the beginning of a word, and/or
 *			the end of a word. A word is defined to be a character
 *			string beginning and/or ending with the characters
 *			A-Z a-z 0-9 and _. It must also be preceded and/or
 *			followed by any character outside those mentioned.
 *
 *      [10]            a composite regular expression xy where x and y
 *                      are in the form [1] to [10] matches the longest
 *                      match of x followed by a match for y.
 *
 *      [11]	^	a regular expression starting with a ^ character
 *		$	and/or ending with a $ character, restricts the
 *                      pattern matching to the beginning of the line,
 *                      or the end of line. [anchors] Elsewhere in the
 *			pattern, ^ and $ are treated as ordinary characters.
 *
 * Acknowledgements:
 *   HCR's Hugh Redelmeier has been most helpful in various stages of
 *   development.  He convinced me to include BOW and EOW constructs,
 *   originally invented by Rob Pike at the University of Toronto.
 * References:
 *   Software tools		Kernighan & Plauger
 *   Software tools in Pascal	Kernighan & Plauger
 *   Grep [rsx-11 C dist]	David Conroy
 *   ed - text editor		Un*x Programmer's Manual
 *   Advanced editing on Un*x	B. W. Kernighan
 *   RegExp routines		Henry Spencer
 * Notes:
 *  This implementation uses a bit-set representation for character sets for
 *    speed and compactness.  Each character is represented by one bit in a
 *    N-bit block.  Thus, SET or NSET always takes a constant M bytes in the
 *    internal dfa, and re_exec does a single bit comparison to locate the
 *    character in the set.  N is 128 for 7 bits ASCII and 256 for 8 bit
 *    data.  Thus M is 16 or 32 bytes.
 *  Put CLO in front of what gets closed for ease of interpreting.
 *  Put END at end of what gets closed to limit recursion.
 * Examples:
 *	pattern:	foo*.*
 *	compile:	CHR f CHR o CLO CHR o END CLO ANY END END
 *	matches:	fo foo fooo foobar fobar foxx ...
 *
 *	pattern:	fo[ob]a[rz]	
 *	compile:	CHR f CHR o SET bitset CHR a SET bitset END
 *	matches:	fobar fooar fobaz fooaz
 *
 *	pattern:	foo\\+
 *	compile:	CHR f CHR o CHR o CHR \ CLO CHR \ END END
 *	matches:	foo\ foo\\ foo\\\  ...
 *
 *	pattern:	\(foo\)[1-3]\1	(same as foo[1-3]foo)
 *	compile:	BOT 1 CHR f CHR o CHR o EOT 1 SET bitset REF 1 END
 *	matches:	foo1foo foo2foo foo3foo
 *
 *	pattern:	\(fo.*\)-\1
 *	compile:	BOT 1 CHR f CHR o CLO ANY END EOT 1 CHR - REF 1 END
 *	matches:	foo-foo fo-fo fob-fob foobar-foobar ...
 */

#include <char.h>
#include <const.h>

typedef unsigned char Char;

#define MAXDFA 768	/* amount of space for compiled RE */
#define MAXTAG  10

#define CHR	 1	/* character		:: CHR<character> */
#define ANY	 2	/* .			:: ANY */
#define SET	 3	/* set: [...]		:: SET bitset */
#define NSET	 4	/* not set: [^...]	:: SET bitset */
#define BOL	 5	/* beginning of line: ^ :: BOL */
#define EOL	 6	/* end of line: $	:: EOL */
#define BOT	 7	/* beginning of tag: \( */
#define EOT	 8	/* end of tag: \) */
#define BOW	 9	/* beginning of word: \< */
#define EOW	10	/* end of word: \> */
#define REF	11	/* tag reference: \1,...,\9 */
#define CLO	12	/* closure: +, *	:: CLO dfa END */
#define SPACE	13	/* ": ": match isspace() */
#define ALPHA	14	/* :a	match isalpha() */
#define DIGIT	15	/* :d 	match isdigit() */
#define ALNUM	16	/* :n	match isalnum() */
#define WORD	17	/* :w	match isword() */
#define NWORD	18	/* :W	match !isword() */

#define END	 0

/* ******************************************************************** */
/* **************************** Bit Tables **************************** */
/* ******************************************************************** */

/* 
 * Bit table:  a string of bits stored in an array of char
 *     .-----------------------------------.
 *     |01234567|89012345|67890123|45678901|
 *     `-----------------------------------'
 *    bits[0]      [1]      [2]      [3]
 * To find what bucket the nth bit is in (8 bits per bucket):
 *       bit_bucket(n) = bits[n/8]
 *   It might be a good idea to restrict n so it doesn't index outside its
 *     range (only works if number of bits is a power of 2):
 *       n = n & ((max_n - 1) & ~7)  where max_n is a power of 2
 *     The ~7 is just to get rid of the lower bits that won't do anything
 *	 anyway.
 * The nth bit in the bucket is n mod 8 (ie the lower 3 bits of n (0-7) are
 *   the bit position):
 *       bit_flag(n) = 1 << (n & 7)
 * To find the state of the nth bit (0 == off and !0 == on):
 *       bit_bucket(n) & bit_flag(n)
 * To set the nth bit:
 *       bits[bit_bucket(n)] |= bit_flag(n)
 * Notes:
 *   The bits are stored in a character array so that the array can be
 *     easily copied without worrying about alignment (ie can use a loop as
 *     well as memcpy()).
 *   This is based on two's complement math.
 */

/* The following defines are for character set size.  128 for straight
 *   ASCII, 256 for Euro ASCII (8 bit characters).
 */
#define MAXCHR	256		/*  128 or  256 */
#define BLKIND	0xf8		/* 0x78 or 0xf8 */

/*
 * The following defines are not meant to be changeable.
 * They are for readability only.
 */
#define CHRBIT	8
#define BITBLK	MAXCHR/CHRBIT
#define BITIND	0x7

static Char bittab[BITBLK];	/* bit table for SET */

    /* Add or check to see if a character is in the bit table (character
     *   set).
     * Note:
     *   When calling these routines, make sure c is an unsigned char (or
     *     int) so if it has the high bit set, casting it to an int won't
     *     make it a large negative number.
     */
#define ISINSET(bittab,c) ((bittab)[((c) & BLKIND)>>3] & (1<<((c) & BITIND)))
#define   CHSET(bittab,c)  (bittab)[((c) & BLKIND)>>3] |= 1<<((c) & BITIND)

static void chset(c) register int c; { CHSET(bittab,c); }

/* ******************************************************************** */

#define SLOP	50		/* dfa overflow protection */

static Char dfa[MAXDFA + SLOP];	/* automaton */
static int
  tagstk[MAXTAG],		/* subpat tag stack */
  pattern_compiled = FALSE;	/* status of lastpat */

#define BADPAT(msg)	return (*dfa = END, (Char *)msg)
#define STORE(x)	(*mp++ = x)  /* !!! should check for dfa overflow */
 
    /* Compile RE to internal format & store in dfa[]
     * Input:
     *   pat : pointer to regular expression string to compile.
     * Returns:
     *   NULL:  RE compiled OK.
     *   pointer to error message.
     */
Char *re_comp(pat) Char *pat;
{
  register Char
    *p,			/* pattern pointer */
    *mp = dfa,		/* dfa pointer */
    *lp,		/* saved pointer */
    *sp = dfa;		/* another one */
  register int
    tagi = 0,		/* tag stack index */
    tagc = 1,		/* actual tag count */
    n;

  if (pat == NULL || *pat == '\0')
    if (pattern_compiled) return NULL;
    else BADPAT("No previous regular expression");

  pattern_compiled = FALSE;

  *dfa = END;			/* init dfa for lp and sp */

  for (p = pat; *p; p++)
  {
    lp = mp;			/* start of next dfa state */
    switch(*p)
    {
      case '.': STORE(ANY); break;		/* match any character */
      case '^':					/* match beginning of line */
	if (p == pat) STORE(BOL);
	else { STORE(CHR); STORE(*p); }		/* match ^ */
	break;
      case '$':					/* match end of line */
	if (*(p+1) == '\0') STORE(EOL);
	else { STORE(CHR); STORE(*p); }		/* match $ */
	break;
      case '[':					/* match a set of characters */
	if (*++p == '^') { STORE(NSET); p++; } else STORE(SET);
	if (*p == ']') chset(*p++);		/* real bracket, match ] */
	if (*p == '-') chset(*p++);		/* real dash, match - */
	while (*p && *p != ']')
	{
	  if (*p == '-' && *(p+1) != '\0' && *(p+1) != ']')	/* [a-z] */
	  {
	    int c1, c2;

	    p++;
	    c1 = *(p-2);	/* 'a' */
	    c2 = *p++;		/* 'z' */
	    if (c1 > c2) BADPAT("Empty set");	/* something like [z-a] */
		/* remember that 'a' has already been put into bittab */
	    while (++c1 <= c2) chset(c1);	/* build bit table */
	  }
#ifdef EXTEND
	  else if (*p == '\\' && *(p+1)) { p++; chset(*p++); }
#endif
	  else chset(*p++);
	}
	if (*p == '\0') BADPAT("Missing ]");
	for (n = 0; n < BITBLK; bittab[n++] = '\0') STORE(bittab[n]);
	break;
      case '*':				/* match 0 or more of preceding RE */
      case '+':			/* match 1 or more.  Note: x+ == xx* */
	if (p == pat) BADPAT("Empty closure");
	lp = sp;		/* previous opcode */
	if (*lp == CLO) break;	/* equivalence: x** == x*  */
	switch(*lp)
	{
	  case BOL: case BOT: case EOT: case BOW: case EOW: case REF:
	    BADPAT("Illegal closure");
	}
	if (*p == '+') for (sp = mp; lp < sp; lp++) STORE(*lp);
	STORE(END); STORE(END); sp = mp;
	while (--mp > lp) *mp = mp[-1]; STORE(CLO);	/* open hole for CLO */
	mp = sp;
	break;
      case '\\':  		            /* tags, backrefs */
	switch(*++p)
	{
	  case '\0': BADPAT("Bad quote");
	  case '(':
	    if (tagc < MAXTAG)
	      { tagstk[++tagi] = tagc; STORE(BOT); STORE(tagc++); }
	    else BADPAT("Too many \\(\\) pairs");
	    break;
	  case ')':
	    if (*sp == BOT) BADPAT("Null pattern inside \\(\\)");
	    if (tagi > 0) { STORE(EOT); STORE(tagstk[tagi--]); }
	    else BADPAT("Unmatched \\)");
	    break;
	  case '<': STORE(BOW); break;
	  case '>':
	    if (*sp == BOW) BADPAT("Null pattern inside \\<\\>");
	    STORE(EOW);
	    break;
	  case '1': case '2': case '3': case '4': case '5': case '6': 
	  case '7': case '8': case '9':
	    n = *p - '0';
	    if (tagi > 0 && tagstk[tagi] == n) BADPAT("Cyclical reference");
	    if (tagc > n) { STORE(REF); STORE(n); }
	    else BADPAT("Undetermined reference");
	    break;
	  case ' ': STORE(SPACE); break;
	  case 'a': STORE(ALPHA); break;
	  case 'd': STORE(DIGIT); break;
	  case 'n': STORE(ALNUM); break;
	  case 'w': STORE(WORD);  break;
	  case 'W': STORE(NWORD); break;
#ifdef EXTEND
	  case 'b': STORE(CHR); STORE('\b'); break;
	  case 'n': STORE(CHR); STORE('\n'); break;
	  case 'f': STORE(CHR); STORE('\f'); break;
	  case 'r': STORE(CHR); STORE('\r'); break;
	  case 't': STORE(CHR); STORE('\t'); break;
#endif
	  default: STORE(CHR); STORE(*p);
	}
	break;
      default : STORE(CHR); STORE(*p); break;	/* an ordinary character */
    }
    sp = lp;		/* start of previous state */

    if (mp > dfa + MAXDFA) BADPAT("Pattern too long (braindead re_comp())");
  }

  if (tagi > 0) BADPAT("Unmatched \\(");

  STORE(END);

  pattern_compiled = TRUE;

  return NULL;
}

static Char *pmatch();

Char *bopat[MAXTAG], *eopat[MAXTAG];
int re_errorcode;	/* sleaze */

static Char *bol;

/* re_exec:  execute dfa to find a match.
 *
 * special cases: (dfa[0])	
 *  BOL
 *	Match only once, starting from the beginning.
 *  CHR
 *	First locate the character without calling pmatch(), and if found,
 *	call pmatch() for the remaining string.
 *  END
 *	re_comp() failed, poor luser did not check for it. Fail fast.
 *
 * If a match is found, bopat[0] and eopat[0] are set to the beginning and
 *   the end of the matched fragment, respectively.
 *
 * Input:
 *   lp: string to search
 *   SoL==TRUE if lp starts line
 *   move==TRUE if search the entire string for match
 * Returns:
 *   TRUE:  
 *   FALSE:  
 *   ????
 * Munges:
 *   re_errorcode: Set to FALSE, re_fail() might want to set it.
 * Notes:
 *   If SoL is FALSE, lp[-1] MUST be at valid!  A couple of REs will look
 *     there if they can.
 */
int re_exec(lp,SoL,move) register Char *lp; int SoL, move;
{
  register Char *ap = dfa, c;
  Char *ep = NULL;

  re_errorcode = FALSE;		/* assume no match */
  bol = (SoL ? lp : NULL);

  bopat[0] = bopat[1] = bopat[2] = bopat[3] = bopat[4] = bopat[5] = 
  bopat[6] = bopat[7] = bopat[8] = bopat[9] = NULL;

  switch(*ap)
  {
    case END: return FALSE;		/* munged automaton. fail always */
    case BOL:				/* anchored: match from BOL only */
      if (!SoL) return FALSE;		/* BoL can only be at front of dfa */
      ep = pmatch(lp,++ap);
      break;
    case CHR:				/* ordinary char: locate it fast */
      if (move)
      {
	c = *(ap+1);
	while (*lp && !ceq(*lp,c)) lp++;
	if (!*lp) return FALSE;		/* if EoS, fail. else fall thru */
      }
    default:				/* regular matching all the way. */
      if (!move) { ep = pmatch(lp,ap); break; }
#if 0
      while (*lp)
      {
	if ((ep = pmatch(lp,ap))) break;
	lp++;
      }
#else
      while (TRUE)
      {
	if ((ep = pmatch(lp,ap))) break;
	if ('\0' == *lp++) break;
      }
#endif
      break;
  }
  if (!ep) return re_errorcode;		/* only if pmatch() returns NULL */

  bopat[0] = lp; eopat[0] = ep; 

  return TRUE;
}

/* 
 * pmatch: internal routine for the hard part
 *
 * This code is mostly snarfed from an early grep written by David Conroy.
 *   The backref and tag stuff, and various other mods are by oZ.
 *
 * special cases: (dfa[n], dfa[n+1])
 *  CLO ANY
 *    We KNOW ".*" will match ANYTHING upto the end of line.  Thus, go to
 *    the end of line straight, without calling pmatch() recursively.  As in
 *    the other closure cases, the remaining pattern must be matched by
 *    moving backwards on the string recursively, to find a match for xy (x
 *    is ".*" and y is the remaining pattern) where the match satisfies the
 *    LONGEST match for x followed by a match for y.
 *  CLO CHR
 *    Scan forward matching the single char without recursion, and at the
 *    point of failure, we execute the remaining dfa recursively, as
 *    described above.
 *
 * At the end of a successful match, bopat[n] and eopat[n] are set to the
 *   beginning and end of subpatterns matched by tagged expressions (n = 1
 *   to 9).
 * 
 * Input:
 * Returns:
 *   NULL: No match, re_fail() may have been called.
 *   else: pointer to end of match.
 */

extern void re_fail();

	/* skip values for CLO XXX to skip past the closure */
#define ANYSKIP	2 		/* CLO ANY END ...	   */
#define CHRSKIP	3		/* CLO CHR chr END ...	   */
#define SETSKIP (2 +BITBLK)	/* CLO SET 16bytes END ... */

static Char *pmatch(lp,dfa) register Char *lp, *dfa;
{
  register Char
    *e,			/* extra pointer for CLO */
    *bp, *ep;		/* beginning and ending of subpat */
  Char *are;		/* to save the line ptr */
  register int op, c, n;

  while ((op = *dfa++) != END)
    switch(op)
    {
      case CHR:	if (!ceq(*lp++,*dfa++)) return NULL; break;
      case ANY: if (*lp++ == '\0')      return NULL; break;
      case SET:
        c = *lp++;
	if (!ISINSET(dfa,c)) return NULL;	/* ISINSET(dfa,0) is FALSE */
	dfa += BITBLK;
	break;
      case NSET:
	if ((c = *lp++) == '\0' || ISINSET(dfa,c)) return NULL;
	dfa += BITBLK;
	break;
      case BOT: bopat[*dfa++] = lp; break;
      case EOT: eopat[*dfa++] = lp; break;
      case EOL:   if (*lp != '\0')		    return NULL; break;
      case ALNUM: if (!isalnum(*lp++))		    return NULL; break;
      case ALPHA: if (!isalpha(*lp++)) 		    return NULL; break;
      case DIGIT: if (!isdigit(*lp++))		    return NULL; break;
      case SPACE: if (!isspace(*lp++))		    return NULL; break;
      case WORD:  if (!isword (*lp++))		    return NULL; break;
      case NWORD: if (*lp == '\0' || isword(*lp++)) return NULL; break;
      case BOW:
        if (!(lp != bol && isword(lp[-1])) && isword(*lp)) break;
	return NULL;
      case EOW:		/* 'w\0' is OK here */
        if ((lp != bol && isword(lp[-1])) && !isword(*lp)) break;
	return NULL;
      case REF:		/* !!! case_fold? */
        n = *dfa++; bp = bopat[n]; ep = eopat[n];
	while (bp < ep) if (*bp++ != *lp++) return NULL;  /* !!! recurse? */
	break;
      case CLO:
        are = lp; n = ANYSKIP;
	switch(*dfa)
	{
	  case ANY:   while (*lp)          lp++; break;
	  case ALNUM: while (isalnum(*lp)) lp++; break;
	  case ALPHA: while (isalpha(*lp)) lp++; break;
	  case DIGIT: while (isdigit(*lp)) lp++; break;
	  case SPACE: while (isspace(*lp)) lp++; break;
	  case WORD:  while (isword (*lp)) lp++; break;
	  case NWORD: while (*lp && !isword(*lp)) lp++; break;
	  case CHR:
	    c = *(dfa+1);		/* we know c!='\0' */
	    while (ceq(*lp,c)) lp++;
	    n = CHRSKIP;
	    break;
	  case SET: case NSET:
	    while (*lp && (e = pmatch(lp,dfa))) lp = e;
	    n = SETSKIP;
	    break;
	  default: re_fail("closure: bad dfa.",*dfa); return NULL;
	}
	dfa += n;
	while (lp >= are)	/* backup up till match next pattern */
	{
	  if (e = pmatch(lp,dfa)) return e;
	  --lp;
	}
	return NULL;
      default: re_fail("re_exec: bad dfa.",op); return NULL;
    }
  return lp;
}

/*
 * re_subs: substitute the matched portions of the src in dst.
 *	&	substitute the entire matched pattern.
 *	\digit	substitute a subpattern, with the given
 *		tag number. Tags are numbered from 1 to
 *		9. If the particular tagged subpattern
 *		does not exist, null is substituted.
 * 	!!!Note: if the line that was used re_exec() has gone byebye
 *	  then \digit will blow cookies since the tags point into the line.
 * Input:
 *   src:
 *   dst:
 * Returns:
 *   TRUE:   everything went as expected
 *   FALSE:  Bad src or no match.
 */
int re_subs(src,dst) register Char *src, *dst;
{
  register Char c, *bp, *ep;
  register int pin;

  if (!bopat[0]) return FALSE;

  while (c = *src++)
  {
    switch(c)
    {
      case '&': pin = 0; break;
      case '\\': 
        c = *src++;
	if (c >= '0' && c <= '9') { pin = c - '0'; break; }
      default: *dst++ = c; continue;
    }
    if ((bp = bopat[pin]) && (ep = eopat[pin]))
    {
      while (*bp && bp < ep) *dst++ = *bp++;
      if (bp < ep) return FALSE;
    }
  }
  *dst = '\0';

  return TRUE;
}

/* ******************************************************************** */
/* ************************* DEBUG ************************************ */
/* ******************************************************************** */

#ifdef DEBUG
/*
 * symbolic - produce a symbolic dump of the dfa
 */
symbolic(s) 
char *s;
{
	printf("pattern: %s\n", s);
	printf("dfacode:\n");
	dfadump(dfa);
}

static	
dfadump(dfa) Char *dfa;
{
  register int n;

  while (*dfa != END)
    switch(*dfa++)
    {
      case CLO:
        printf("CLOSURE");
	dfadump(dfa);
	switch(*dfa)
	{
	  case CHR: n = CHRSKIP; break;
	  case ANY: n = ANYSKIP; break;
	  case SET: case NSET: n = SETSKIP; break;
	}
	dfa += n;
	break;
      case CHR: printf("\tCHR %c\n",*dfa++); break;
      case ANY: printf("\tANY .\n"); break;
      case BOL: printf("\tBOL -\n"); break;
      case EOL: printf("\tEOL -\n"); break;
      case BOT: printf("BOT: %d\n",*dfa++); break;
      case EOT: printf("EOT: %d\n",*dfa++); break;
      case BOW: printf("BOW\n"); break;
      case EOW: printf("EOW\n"); break;
      case REF: printf("REF: %d\n",*dfa++); break;
      case SET:
        printf("\tSET [");
	for (n = 0; n < MAXCHR; n++)
	  if (ISINSET(dfa,n)) printf("%c",n);
	printf("]\n");
	dfa += BITBLK;
	break;
      case NSET:
        printf("\tNSET [");
	for (n = 0; n < MAXCHR; n++)
	  if (ISINSET(dfa,n)) printf("%c",n);
	printf("]\n"); dfa += BITBLK;
	break;
      default:
        printf("bad dfa. opcode %o\n", dfa[-1]);
	exit(1);
	break;
    }
}
#endif