#if 0 /* SAJ */ #include "compiler.h" #ifdef LATTICE #define void int #endif #endif /* SAJ */ #include "debug.h" /* sedcomp.c -- stream editor main and compilation phase The stream editor compiles its command input (from files or -e options) into an internal form using compile() then executes the compiled form using execute(). Main() just initializes data structures, interprets command line options, and calls compile() and execute() in appropriate sequence. The data structure produced by compile() is an array of compiled-command structures (type sedcmd). These contain several pointers into pool[], the regular-expression and text-data pool, plus a command code and g & p flags. In the special case that the command is a label the struct will hold a ptr into the labels array labels[] during most of the compile, until resolve() resolves references at the end. The operation of execute() is described in its source module. ==== Written for the GNU operating system by Eric S. Raymond ==== */ #include /* uses getc, fprintf, fopen, fclose */ #include "sed.h" /* command type struct and name defines */ /* imported functions */ extern int strcmp(); /* test strings for equality */ extern void execute(); /* execute compiled command */ /***** public stuff ******/ #define MAXCMDS 200 /* maximum number of compiled commands */ #define MAXLINES 256 /* max # numeric addresses to compile */ /* main data areas */ char linebuf[MAXBUF+1]; /* current-line buffer */ sedcmd cmds[MAXCMDS+1]; /* hold compiled commands */ long linenum[MAXLINES]; /* numeric-addresses table */ /* miscellaneous shared variables */ int nflag; /* -n option flag */ int eargc;/ 1yI99190@ 28 p990 q19q@0 8888x88888Șp"GVR67GVfb6FVfR4RQCMD,0, 0, 0, 0, 0, 0, H+CDCMD,0, 0, CGCMD, CHCMD, 0, 0, 0, 0, 0, CNCMD, 0, CPCMD, 0, 0, 0, H+CTCMD,0, 0, H+CWCMD, 0, 0, 0, 0, 0, 0, 0, 0, 0, H+ACMD, H+BCMD, H+CCMD, DCMD, 0, 0, GCMD, HCMD, H+ICMD, 0, 0, H+LCMD, 0, NCMD, 0, PCMD, H+QCMD, H+RCMD, H+SCMD, H+TCMD, 0, 0, H+WCMD, XCMD, H+YCMD, 0, H+BCMD, 0, H, 0, 0, }; static void compile() /* precompile sed commands out of a file */ { char ccode, *address(); PASS("compile(): entry"); for(;;) /* main compilation loop */ { if (*cp != ';') /* get a new command line */ if (cmdline(cp = linebuf) < 0) break; SKIPWS(cp); if (*cp=='\0' || *cp=='#') /* a comment */ continue; if (*cp == ';') /* ; separates cmds */ { cp++; continue; } /* compile first address */ if (fp > poolend) ABORT(TMTXT); else if ((fp = address(cmdp->addr1 = fp)) == BAD) ABORT(AGMSG); if (fp == cmdp->addr1) /* if empty RE was found */ { if (lastre) /* if there was previous RE */ cmdp->addr1 = lastre; /* use it */ else ABORT(FRENL); } else if (fp == NULL) /* if fp was NULL */ { fp = cmdp->addr1; /* use current pool location */ cmdp->addr1 = NULL; } else { lastre = cmdp->addr1; if (*cp == ',' || *cp == ';') /* there's 2nd addr */ { cp++; if (fp > poolend) ABORT(TMTXT); fp = address(cmdp->addr2 = fp); if (fp == BAD || fp == NULL) ABORT(AGMSG); if (fp == cmdp->addr2) cmdp->addr2 = lastre; else lastre = cmdp->addr2; } else cmdp->addr2 = NULL; /* no 2nd address */ } if (fp > poolend) ABORT(TMTXT); SKIPWS(cp); /* discard whitespace after address */ IFEQ(cp, '!') cmdp->flags.allbut = 1; SKIPWS(cp); /* get cmd char, range-check it */ if ((*cp < LOWCMD) || #1>>12161&'. '*')181212?$346412;0:412$477142412124212102941112224492 continue; /* skip next line read */ if (++cmdp >= cmds + MAXCMDS) ABORT(TMCDS); SKIPWS(cp); /* look for trailing stuff */ if (*cp != '\0') if (*++cp == ';') continue; elshOÌ@FO@OLHBpϞÁ@pπpp Oό Ì p@όOOOpόό Oext */ case 'i': /* insert text */ case 'r': /* read file into stream */ if (cmdp->addr2) ABORT(AD2NG); case 'c': /* change text */ if ((*cp == '\\') && (*++cp == '\n')) cp++; fp = gettext(cmdp->u.lhs = fp); break; case 'D': /* delete current line in hold space */ cmdp->u.link = cmds; break; case 's': /* substitute regular expression */ redelim = *cp++; /* get delimiter from 1st ch */ if ((fp = recomp(cmdp->u.lhs = fp, redelim)) == BAD) ABORT(CGMSG); if (fp == cmdp->u.lhs) /* if compiled RE zero len */ cmdp->u.lhs = lastre; /* use the previous one */ else /* otherwise */ lastre = cmdp->u.lhs; /* save the one just found */ if ((cmdp->rhs = fp) > poolend) ABORT(TMTXT); if ((fp = rhscomp(cmdp->rhs, redelim)) == BAD) ABORT(CGMSG); if (gflag) cmdp->flags.global++; while (*cp == 'g' || *cp == 'p' || *cp == 'P') { IFEQ(cp, 'g') cmdp->flags.global++; IFEQ(cp, 'p') cmdp->flags.print = 1; IFEQ(cp, 'P') cmdp->flags.print = 2; } case 'l': /* list pattern space */ if (*cp == 'w') cp++; /* and execute a w command! */ else break; /* s or l is done */ case 'w': /* write-pattern-space command */ case 'W': /* write-first-line command */ if (nwfiles >= WFILES) ABORT(TMWFI); fp=gettext(fname[nwfiles]=fp); /* filename will be in pool */ for(i = nwfiles-1; i >= 0; i--) /* match it in table */ if (strcmp(fname[nwfiles], fname[i]) == 0) { cmdp->fout = fout[i]; return(0); } /* if didn't find one, open new out file */ if ((cmdp->fout = fopen(fname[nwfiles], "w")) == NULL) { fprintf(stderr, CCOFI, fname[nwfiles]); exit(2); } fout[nwfiles++] = cmdp->fout; break; case 'y': /* transliterate text */ fp = ycomp(cmdp->u.lhs = fp, *cp++); /* compile translit */ if (fp == BAD) ABORT(CGMSG); /* fail on bad form */ if (fp > poolend) ABORT(TMTXT); /* fail on overflow */ break; } return(0); /* succeeded in interpreting one command */ } static char *rhscomp(rhsp, delim) /* uses bcount */ /* generate replacement string for substitute command right hand side */ register char *rhsp; /* place to compile expression to */ register char delim; /* regular-expression end-mark to look for */ { register char *p = cp; /* strictly for speed */ for(;;) if ((*rhsp = *p++) == '\\') /* copy; if it's a \, */ { *rhsp = *p++; /* copy escaped char */ /* check validity of pattern tag */ if (*rhsp > bcount + '0' && *rhsp <= '9') return(BAD); *rhsp++ |= 0x80; /* mark the good ones */ continue; / ТV6R 1~?R@@@@@@@@^T@@@X@\\\@T^@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@TVV@z@N`Nv@@@@@@@@@^T@@@@@T^@@@@@@@@@@@@@@@@@@@@@@@@@z@v@@@@@@@@@@@@@@@@@@@@@@@@PRv@@@@@@@@@@@^T@@@b@@@@T^@@@@@@@@@@@@@@@@@@@@@@@@@@@ else if (*rhsp++ == '\0') /* lasuόπτOσOpτÜ@pp O Èτ BO@πOόp@όOOψa   le it to */ char redelim; /) 0   ៟ ster char *ep = expbuf; /* current-compil` χpτόOO@τ@  χp c = '\n'; else if (c == 't') /* match a tab */ c = '\t'; else goto defchar; /* else match \c */ case '\0': /* ignore nuls */ continue; case '\n': /* trailing pattern delimiter is missing */ return(cp = sp, BAD); case '.': /* match any char except newline */ *ep++ = CDOT; continue; case '+': /* 1 to n repeats of previous pattern */ if (lastep == NULL) /* if + not first on line */ goto defchar; /* match a literal + */ if (*lastep == CKET) /* can't iterate a tag */ return(cp = sp, BAD); pp = ep; /* else save old ep */ while (lastep < pp) /* so we can blt the pattern */ *ep++ = *lastep++; *lastep |= STAR; /* flag the copy */ continue; case '*': /* 0..n repeats of previous pattern */ if (lastep == NULL) /* if * isn't first on line */ goto defchar; /* match a literal * */ if (*lastep == CKET) /* can't iterate a tag */ return(cp = sp, BAD); *lastep |= STAR; /* flag previous pattern */ continue; case '$': /* match only end-of-line */ if (*sp != redelim) /* if we're not at end of RE */ goto defchar; /* match a literal $ */ *ep++ = CDOL; /* insert end-symbol mark */ continue; case '[': /* begin character set pattern */ if (ep + 17 >= expbuf + RELIMIT) ABORT(REITL); *ep++ = CCL; /* insert class mark */ if (negclass = ((c = *sp++) == '^')) c = *sp++; svclass = sp; /* save ptr to class start */ do { if (c == '\0') ABORT(CGMSG); /* handle character ranges */ if (c == '-' && sp > svclass && *sp != ']') for (c = sp[-2]; c < *sp; c++) ep[c >> 3] |= bits[c & 7]; /* handle escape sequences in sets */ if (c == '\\') if ((c = *sp++) == 'n') c = '\n'; else if (c == 't') c = '\t'; /* enter (possibly translated) char in set */ ep[c >> 3] |= bits[c & 7]; } while ((c = *sp++) != ']'); /* invert the bitmask if all-but was specified */ if (negclass) for(classct = 0; classct < 16; classct++) ep[classct] ^= 0xFF; ep[0] &= 0xFE; /* never match ASCII 0 */ ep += 16; /* advance ep past set mask */ continue; defchar: /* match literal character */ default: /* which is what we'd do by default */ *ep++ = CCHR; /* insert character mark */ *ep++ = c; } } } static int cmdline(cbuf) /* uses eflag, eargc, cmdf */ /* read next command from -e argument or command file */ register char *cbuf; { register int inc; /* not char because must hold EOF */ cbuf--; /* so pre-increment points us at cbuf */ /* e command flag is on */ if (eflag) { register char *p; /* ptr to current -e argument */ static char *savep; /* saves previous value of p */ if (eflag > 0) /* there are pending -e arguments */ { eflag = -1; if (eargc-- <= 0) exit(2); /* if no arguments, barf */ /* else transcribe next e argument into cbuf */ p = *++eargv; while(*++cbuf = *p++) if (*cbuf == '\\') { if ((*++cbuf = ŕ '!8gD@aD'@#! 8g8g@@ 8g8gHY ؝YOH H ʈ[و HY[H ˆ ؝YH ˆ lno = lno*10 + *rcp++ - '0'; /* compute their value */ if (rcp > cp) /* if we caught a number... */ { *expbuf++ = CLNUM; /* put a numeric-address marker */ *expbuf++ = numl; /* and the address table index */ linenum[numl++] = lno; /* and set the table entry */ if (numl >= MAXLINES) /* oh-oh, address table overflow */ ABORT(TMLNR); /* abort with error message */ *expbuf++ = CEOF; /* write the end-of-address marker */ cp = rcp; /* point compile past the address */ return(expbuf); /* we're done */ } return(NULL); /* no legal address was found */ } static char *gettext(txp) /* uses global cp */ /* accept multiline input from *cp..., discarding leading whitespace */ register char *txp; /* where to put the text */ { register char *p = cp; /* this is for speed */ SKIPWS(p); /* discard whitespace */ do { if ((*txp = *p++) == '\\') /* handle escapes */ *txp = *p++; if (*txp == '\0') /* we're at end of input */ return(cp = --p, ++txp); else if (*txp == '\n') /* also SKIPWS after newline */ SKIPWS(p); } while (txp++); /* keep going till we find that nul */ } static label *search(ptr) /* uses global lablst */ /* find the label matching *ptr, return NULL if none */ register label *ptr; { register label *rp; for(rp = lablst; rp < ptr; rp++) if (strcmp(rp->name, ptr->name) == 0) return(rp); return(NULL); } static void resolve() /* uses global lablst */ /* write label links into the compiled-command space */ { register label *lptr; register sedcmd *rptr, *trptr; /* loop through the label table */ for(lptr = lablst; lptr < lab; lptr++) if (lptr->address == NULL) /* barf if not defined */ { fprintf(stderr, ULABL, lptr->name); exit(2); } else if (lptr->last) /* if last is non-null */ { rptr = lptr->last; /* chase it */ while(trptr = rptr->u.link) /* resolve refs */ { rptr->u.link = lptr->address; rptr = trptr; } rptr->u.link = lptr->address; } } static char *ycomp(ep, delim) /* compile a y (transliterate) command */ register char *ep; /* where to compile to */ char delim; /* end delimiter to look for */ { register char c, *tp, *sp; /* scan the 'from' section for invalid chars */ for(sp = tp = cp; *tp != delim; tp++) { if (*tp == '\\') tp++; if ((*tp == '\n') || (*tp == '\0')) return(BAD); } tp++; /* tp now points at first char of 'to' section */ /* now rescan the 'from' section */ while((c = *sp++ & 0x7F) != delim) { if (c == '\\' && *sp == 'n') { sp++; c = '\n'; } if ((ep[c] = *tp++) == '\\' && *tp == 'n') { ep[c] = '\n'; tp++; } if ((ep[c] == delim) || (ep[c] == '\0')) return(BAD); } if (*tp != delim) /* 'to', 'from' parts have unequal lengths */ return(BAD); cp = ++tp; /* point compile ptr past translit */ for(c = 0; c < 128; c++) /* fill in self-map entries in table */ if (ep[c] == 0) ep[c] = c; return(ep + 0x80); /* return first free location past table end */ } /* sedcomp.c ends here */