/**********************************************************************\ * * DGREP.C * * Main module for dgrep. Input, output, search strategy selection * etc. are done here. * For output we use fputs, because it makes possible to use own * putstr with Turbo C and MSC under MS-DOS (smaller exe-file). * * Author: Jarmo Ruuth 6-Feb-1988 * * Copyright (C) 1988-90 by Jarmo Ruuth * May be freely copied for any non-commercial usage. \**********************************************************************/ #include #include "system.h" #include "dfa.h" #include "dfaregex.h" #include "bm.h" #define VERSION "1.71" typedef enum { STOP, OK } status_t; typedef enum { NONE, /* no file name displayed */ BEFORE_LINE, /* name displayed before each matching line (in same line) */ BLOCK /* name displayed only once before matching lines */ } show_fname_t; /* flags and local variables */ #ifdef TEST int debug = FALSE; #endif static char nonmatch = FALSE; static char count = FALSE; static char names = FALSE; static char number = FALSE; static char ignorecase = FALSE; static char regmust_only = FALSE; static char exact = FALSE; static char silent = FALSE; static char no_boymoore = FALSE; static char touch = FALSE; static char block_mode = FALSE; static int leading_context = 0; static int trailing_context= 0; static ulong matchcount; static ulong linecount; static ulong* linecount_ptr = NULL; static int waiting_lines = 0; static int type_bits = RE_SEARCH; static char file_not_found = FALSE; static char touch_failed = FALSE; static char was_match = FALSE; static show_fname_t show_fname = NONE; static char fname_shown = FALSE; static char first_match = TRUE; static char* path = ""; /* current path and filename */ static int handle = 0; /* current file handle */ static char* expr = NULL; static char* regmust = NULL; static int regmustlen; static char use_normal; /* By default, use time() as a random number generator */ #define drandom() ((unsigned)time(NULL)) #if defined(__TURBOC__) /* Use faster direct dos system calls */ #define open(p,a) _open(p,O_RDONLY|O_DENYALL) #define close(h) _close(h) #define read(h,b,n) _read(h,b,n) #undef drandom #define drandom() get_sec() extern unsigned get_sec(void); #elif defined(M_I86) && defined(MSDOS) && !defined(OS2) /* Microsoft C */ /* Microsoft C defines MSDOS even in OS/2!, so you need to define OS2 yourself. */ #include static int msc_h, msc_cnt; #define open(p,a) (_dos_open(p,O_RDONLY,&msc_h)==0 ? msc_h : ERROR) #define close(h) _dos_close(h) #define read(h,b,n) (_dos_read(h,(void far *)(b),n,&msc_cnt)==0 \ ? msc_cnt : ERROR) #endif /* Funtext quotations are found from the pep[1-4]grep.doc texts written by James A. Woods. Texts came with the fastgrep archive. (Woods is also responsible for the original idea to combine bm-search with egrep.) */ static char* funtext[] = { "The chief defect of Henry King\n" "Was chewing little bits of string.\n" "\t-- Hilaire Belloc, Cautionary Tales [1907]\n", "Attempt the end, and never stand to doubt\n" "Nothing's so hard but search will find it out.\n" "\t-- Robert Herrick, Hesperides [1648]\n", "Gratiano speaks an infinite deal of nothing, more than any man in all\n" "of Venice. His reasons are as two grains of wheat hid in two bushels of\n" "chaff: you shall seek all day ere you find them, they are not worth\n" "the search.\n" "\t-- Shakespeare, Merchant of Venice\n", "Reach out and Boyer-Moore Egrep Someone\n" "\t-- James A. Woods\n", "I need very little,\n" " and of that little,\n" " I need very little.\n" "\t-- St. Francis of Assisi\n", "How long a time lies in one little word!\n" "\t-- Shakespeare, Richard II, I, iii\n", "Fine words butter no parsnips.\n" "\t-- Southern proverb\n" }; #define NFUNTEXT (sizeof(funtext) / sizeof(funtext[0])) static char* long_usage_line = { "Usage: dgrep [options] {-f expression file | [-e] expression} [file...]\n" }; static char* short_usage_line = { "Version " VERSION ", dgrep -h for full help\n" }; static char* options1 = { "Options: -An n lines after the matching line are printed\n" "\t -Bn n lines before the matching line are printed\n" "\t -b filename is displayed only once before matches\n" "\t -d only dfa is used for searching\n" "\t -c only a count of matching lines is printed\n" "\t -i case insensitive match\n" "\t -l only names of files with matching lines are printed\n" "\t -n each line is preceded by its relative line number in file\n" "\t -s silent mode, nothing is printed except error messages\n" }; /* split options for MSC */ static char* options2 = { #ifdef TOUCH "\t -t all files that contain matches are touched\n" #endif "\t -v all lines but those matching are printed\n" "\t -x exact, all characters in expression are taken literally\n" "\t -1-9 1-9 lines before and after the matching line are printed\n" "\t -e expression, useful when expression begins with -\n" "\t -f file that contains expression\n" }; static char* regexps = { "Regular expressions:\t\t\t" "." "\tany single character\n" "*" "\tzero or more repeats\t\t" "(...)" "\tgrouping\n" "+" "\tone or more repeats\t\t" "^" "\tbeginning of line\n" "?" "\tzero or one repeat\t\t" "$" "\tend of line\n" "[...]" "\tany character in set\t\t" "\\c" "\tquote special character c\n" "[^...]""\tany character not in set\t" "|" "\talternative (\"or\")\n" }; /********************************************************************** * * short_usage * * Exits with options help message and funtext. */ static void short_usage(void) { fputs(long_usage_line, stdout); fputs(short_usage_line, stdout); fputs(options1, stdout); fputs(options2, stdout); fputs(funtext[drandom() % NFUNTEXT], stdout); exit(2); } /********************************************************************** * * long_usage * * Exits with long help message. */ static void long_usage(void) { fputs(long_usage_line, stdout); fputs(options1, stdout); fputs(options2, stdout); fputs(regexps, stdout); exit(2); } /********************************************************************** * * quote_expr */ static char* quote_expr(REG1 char* expr, unsigned len) { REG2 char* p; REG3 char* ptr; p = malloc(2 * len + 1); if (p == NULL) error("Out of memory", 3); ptr = p; while (*expr) { *p++ = '\\'; *p++ = *expr++; } *p = '\0'; return ptr; } /********************************************************************** * * str_toupper */ static void str_toupper(REG1 uchar* s) { REG2 unsigned ch; for (; (ch = *s) != 0; s++) { if (islower(ch)) *s = toupper(ch); } } /********************************************************************** * * memrchr */ static char* memrchr(REG1 char* b, REG3 char c, REG2 unsigned n) { for (; n--; b--) { if (*b == c) return b; } return NULL; } /********************************************************************** * * show_number */ static void show_number(ulong number) { char number_buffer[10]; if (show_fname == BEFORE_LINE) fputs(":", stdout); fputs(ultoa(number, number_buffer, 10), stdout); } /********************************************************************** * * show_file */ #define show_file(p) fputs(p, stdout) /********************************************************************** * * show_line * * Shows line followed by '\n'. */ static void show_line(char* line, FILE *f) { fputs(line, f); fputs("\n", f); } #ifdef TOUCH /********************************************************************** * * touch_file */ static void touch_file(char* fname, int h) { if (!set_file_time(fname, h)) { fputs("Warning: Can't touch file ", stderr); show_line(fname, stderr); touch_failed = TRUE; } } #endif /* TOUCH */ /********************************************************************** * * print_line */ static char* print_line(char* beg, REG1 char* end, ulong lineno) { REG3 int c; c = *end; *end = '\0'; if (show_fname == BEFORE_LINE) show_file(path); else if (show_fname == BLOCK && !fname_shown) { fputs("*** File ", stdout); fputs(path, stdout); fputs(":\n", stdout); fname_shown = TRUE; } if (number) show_number(lineno); if (show_fname == BEFORE_LINE || number) fputs(":", stdout); show_line(beg, stdout); *end = c; return end + NEOL(end); } /********************************************************************** * * get_leading_bytes */ static int get_leading_bytes(char* bufbeg, char* bufend, int* nlines) { int nfound = 0; char* ptr = bufend; for (; ; ptr--) { if (ptr <= bufbeg) { *nlines = nfound; return bufend - bufbeg + 1; } if (*ptr == EOL1) { nfound++; if (nfound > *nlines) break; } } *nlines = nfound - 1; ptr += NEOL(ptr); return bufend - ptr + 1; } /********************************************************************** * * print_context */ static int print_context(char* bufbeg, char* bufend, int nlines, ulong lineno) { char* ptr; char* linebeg; ptr = linebeg = bufbeg; for (; nlines && ptr <= bufend; ptr++) if (*ptr == EOL1) { print_line(linebeg, ptr, lineno++); nlines--; linebeg = ptr + NEOL(ptr); } return nlines; } /********************************************************************** * * print_trailing_context */ #define print_trailing_context(p1, p2) \ print_context((p1), (p2), trailing_context, linecount+1) /********************************************************************** * * print_leading_context */ static void print_leading_context(char* bufbeg, char* bufend) { int bytes; int nlines; if (bufbeg >= bufend) return; nlines = leading_context; bytes = get_leading_bytes(bufbeg, bufend, &nlines); print_context(bufend-bytes+1, bufend, leading_context, linecount-nlines); } /********************************************************************** * * check flags */ static status_t check_flags(char* beg, char* end, char* bufend) { was_match = TRUE; #ifdef TOUCH if (touch) touch_file(path, handle); #endif if (names) { if (!silent) { show_file(path); fputs("\n", stdout); } return STOP; } if (count) { matchcount++; return OK; } if (silent) return OK; if (!first_match && (leading_context || trailing_context)) fputs("----------\n", stdout); first_match = FALSE; if (leading_context) print_leading_context(buffer, beg-1); print_line(beg, end, linecount); if (trailing_context) waiting_lines = print_trailing_context(end+NEOL(end), bufend); if (number) linecount++; return OK; } /********************************************************************** * * normal_dgrep * * When use_normal is TRUE, use this. This version separates * lines from buffer and then searches pattern from that line. */ static int normal_dgrep(char* buf, char* bufend, REG4 int bufsize) { REG3 char* line = buf; /* current line */ REG2 char* le; /* line end */ REG1 char* match; while ((le = memchr(line, EOL1, bufsize)) != NULL) { if (regmust) { match = boyer_moore(line,le,regmust,regmustlen); if (match != NULL && !regmust_only) match = reg_exec(line, le, NULL); } else match = reg_exec(line, le, NULL); if ((match && !nonmatch) || (!match && nonmatch)) { if (check_flags(line, le, bufend) == STOP) return -1; } else linecount++; le += NEOL(le); if ((bufsize -= (le-line)) <= 0) return 0; line = le; } return bufsize; } /********************************************************************** * * fast_dgrep * * Searches pattern from the whole buffer, and when a match is found, * makes a line from the match position. */ static int fast_dgrep(char* buf, REG3 char* bufend, int bufsize) { REG1 char* matchptr = buf; REG2 char* linebeg; /* beginning of current line */ for (;;) { if (regmust) matchptr=boyer_moore(matchptr,bufend,regmust,regmustlen); else { matchptr = reg_exec(matchptr, bufend, linecount_ptr); if (matchptr > buf && matchptr[0] == EOL1 && matchptr[-1] != EOL2) matchptr--; } if (matchptr == NULL) break; if ((linebeg=memrchr(matchptr, EOL2, matchptr-buf)) == NULL) linebeg = buf-1; ++linebeg; /* below matchptr points end of line */ matchptr = memchr(matchptr, EOL1, bufend-matchptr); if (matchptr == NULL) matchptr = bufend; if (regmust_only || !regmust /* match found or ...*/ || reg_exec(linebeg,matchptr,NULL) != NULL) /* verify ok */ if (check_flags(linebeg, matchptr, bufend) == STOP) return -1; matchptr += NEOL(matchptr); if (matchptr > bufend) break; } return (buf+bufsize)-(bufend+NEOL(bufend)); } /********************************************************************** * * dgrep_buffer */ static int dgrep_buffer(char* buf, int bufsize) { char* bufend; /* bufend is last EOL1 in buffer */ if ((bufend=memrchr(buf+bufsize-1, EOL1, bufsize)) == NULL) { fputs("Warning: No line separator in buffer", stderr); if (show_fname != NONE) { fputs(" in file ", stderr); show_line(path, stderr); } else fputs("\n", stderr); return bufsize; } if (waiting_lines) { print_context(buf, bufend, waiting_lines, linecount+trailing_context-waiting_lines+1); waiting_lines = 0; } return use_normal ? normal_dgrep(buf, bufend, bufsize) : fast_dgrep(buf, bufend, bufsize); } /********************************************************************** * * add_last_newline_if * * Checks that buffer ends with an EOL character. If it doesn't, adds one * at the end of buffer and returns 1. Otherwise returns 0. Routine * assemes that there is enough space for the added EOL character and * that bufsize is at least one. */ static int add_last_newline_if(char* buffer, int bufsize) { #if EOL1 == EOL2 if (buffer[bufsize-1] == EOL1) return 0; else { buffer[bufsize] = EOL1; return 1; } #else /* a more complicated case, check if last char is EOL1 */ if (buffer[bufsize-1] == EOL1) return 0; /* check if one char (that can't be EOL1) */ if (bufsize == 1) { buffer[bufsize] = EOL1; return 1; } /* check if EOL1-EOL2-pair at the end */ if (buffer[bufsize-2] == EOL1 && buffer[bufsize-1] == EOL2) return 0; /* otherwise there isn't terminating EOL1 */ buffer[bufsize] = EOL1; return 1; #endif } /********************************************************************** * * align * * If the buffer size is larger than ALIGN, align it. Using alignment we * can always read full I/O device blocks which potentially makes reading * faster. If ALIGN is 1 then no alignment is done. */ static int align(int bufsize) { if (bufsize < ALIGN) return bufsize; else return (bufsize / ALIGN) * ALIGN; } /********************************************************************** * * dgrep * * Greps previously opened handle h. */ static void dgrep(int h) { REG1 int bufsize; REG2 int nleftover = 0; REG3 int leading_bytes = 0; int nlines; int nread; linecount = 1L; /* first line number is 1 */ matchcount = 0L; waiting_lines = 0; fname_shown = FALSE; if (show_fname == BLOCK) /* reset context match flag */ first_match = TRUE; nread = align(maxbuf); while ((bufsize = read(h, buffer+leading_bytes+nleftover, nread)) > 0) { /* update nread to contain all bytes in the buffer */ nread += leading_bytes+nleftover; bufsize += nleftover; if (bufsize + leading_bytes < nread) /* not full buffer */ bufsize += add_last_newline_if(buffer, bufsize + leading_bytes); nleftover = dgrep_buffer(buffer+leading_bytes, bufsize); if (nleftover < 0) break; else if (nleftover == bufsize) nleftover = 0; else { bufsize += leading_bytes; if (leading_context && bufsize == nread) { nlines = leading_context; leading_bytes = get_leading_bytes( buffer, buffer+bufsize-nleftover-1, &nlines); } else leading_bytes = 0; memcpy(buffer, buffer+bufsize-leading_bytes-nleftover, leading_bytes+nleftover); } nread = align(maxbuf-leading_bytes-nleftover); } if (nleftover > 0) { buffer[nleftover++] = EOL1; dgrep_buffer(buffer, nleftover); } if (!silent && count && !names) { if (show_fname != NONE) show_file(path); show_number(matchcount); fputs("\n", stdout); } } /********************************************************************** * * dgrep_file * * Greps file defined in src. */ static void dgrep_file(REG1 char* src) { REG2 int h; /* file handle */ path = src; /* for filename display */ if ((h=open(src,O_RDONLY|O_BINARY)) != ERROR) { handle = h; dgrep(h); close(h); } else { fputs("Warning: Can't open file ", stderr); show_line(src, stderr); file_not_found = TRUE; } } /********************************************************************** * * read_exp * * Reads an expression from a file. */ static uchar* read_exp(REG1 char* str) { REG2 int bufsize; REG3 int h, len; if ((h = open(str, O_RDONLY|O_BINARY)) == ERROR) error("Can't open expression file",2); if ((bufsize = read(h, buffer, maxbuf)) == 0) error("Empty expression file",2); if (bufsize == ERROR) error("Error when reading expression file",3); buffer[bufsize] = '\0'; if ((str = memchr(buffer,EOL1,bufsize)) != NULL) *str = '\0'; len = strlen(buffer); if (len == 0) error("Empty expression in expression file",2); if ((str = malloc(len+1)) == NULL) error("Out of memory",3); return strcpy(str, buffer); } /********************************************************************** * * get_args */ static int get_args(REG1 int argc, char* argv[]) { REG2 int opt; extern int optind, opterr; extern char* optarg; extern int getopt(int argc, char* argv[], char* optionS); opterr = FALSE; /* handle errors ourselves */ while ((opt = getopt(argc, argv, "bdchilnrstvx123456789A:B:e:f:D")) != EOF) { switch (opt) { case 'A': trailing_context = (unsigned)atoi(optarg); break; case 'B': leading_context = (unsigned)atoi(optarg); break; case 'b': block_mode = TRUE; break; case 'd': no_boymoore = TRUE; break; case 'c': count = TRUE; break; case 'h': long_usage(); /* and exit */ case 'i': ignorecase = TRUE; type_bits |= RE_IGNORECASE; break; case 'l': names = TRUE; break; case 'n': number = TRUE; linecount_ptr = &linecount; break; case 's': silent = TRUE; break; case 't': touch = names = TRUE; break; case 'v': nonmatch = TRUE; break; case 'x': exact = TRUE; break; case 'e': expr = optarg; break; case 'f': expr = read_exp(optarg); break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': leading_context = trailing_context = opt - '0'; break; #ifdef TEST case 'D': debug++; break; #endif default: error("Invalid command line option, " "dgrep -h for help", 2); } } if (count || names || silent || touch) leading_context = trailing_context = 0; if (names || touch || silent) block_mode = FALSE; return optind; } /********************************************************************** * * main */ int main(REG1 int argc, char* argv[]) { REG2 int optind; REG3 char* s; REG4 int len, nfile; #ifdef PROFILE prof_start(argv[0]); #endif if (argc == 1) short_usage(); alloc_buffer(); /* Allocate I/O buffer */ optind = get_args (argc, argv); if (optind >= argc && !expr) short_usage(); if (!expr) expr = argv[optind++]; #ifdef BSD /* It is faster to count lines with dfa in Bsd. (Bsd doesn't have system memchr, and my version is too slow?) In PC current method is faster. I don't know about other systems. */ if (number) no_boymoore = TRUE; #endif len = strlen(expr); if (exact && (no_boymoore || len > MAXREGMUST)) { /* Bm can handle only MAXREGMUST long patterns with N+M worst case. So we verify match with reg_exec, which does it in N+M time. (We use bm with the regmust pattern from reg_comp.) */ expr = quote_expr(expr, len); exact = FALSE; } if (!exact) { s = reg_comp_eol(expr, EOL1, EOL2, type_bits); if (s != NULL) error(s, 2); } if (!no_boymoore) regmust = exact ? expr : rbuf.regmust; if (regmust) { if (ignorecase) { str_toupper(regmust); str_toupper(expr); } regmust_only = (strcmp(regmust,expr) == 0); regmustlen = strlen(regmust); if (!regmust_only && regmustlen <= 1) regmust = NULL; else gosper(regmust, regmustlen, ignorecase); } use_normal = nonmatch || (regmust && number); nfile = argc - optind; if (nfile > 0) { if (block_mode) show_fname = BLOCK; else show_fname = BEFORE_LINE; while (optind < argc) dgrep_file(argv[optind++]); } else dgrep(fileno(stdin)); #ifdef TEST if (debug) { printf("maxbuf = %d\n", maxbuf); show_dfa_report(); show_boyer_moore_report(); fflush(stdout); } #endif return (file_not_found || touch_failed) ? 2 : was_match ? 0 : 1; }