/* boyerm - find lines containing given constant string */ /* A. Kotanski, 1989 */ /* For explanation of the method see the following articles : */ /* R.S.Boyer, J.S. Moore, "A fast string search algorithm" */ /* Comm. ACM 20, 762-772 (1977) */ /* D.E.Knuth, J.H.Morris and V.B.Pratt */ /* "Fast pattern matching in strings" */ /* SIAM J. Computing, 6, 323-350 (1977)*/ #include #define large 0x6000 #ifdef OSK #define max(a,b) ((a)>(b)?(a):(b)) #define min(a,b) ((a)>(b)?(b):(a)) #endif direct int except = 0, number = 0, ppos = 1; direct int patlen, stringlen; int delta0[128], delta2[80]; direct int c, argsused = 0; char string[256], pat[80], f[80]; extern char *optarg; extern int optind, opterr; main(argc, argv) /* find pattern from first argument */ int argc; char *argv[]; { int lineno = 0, match; register int i, t; while ((c = getopt(argc, argv, "nj:x")) != EOF ) { switch (c) { case 'n': number =1; argsused++; break; case 'j': ppos = atoi(optarg); argsused++; break; case 'x': except = 1; argsused++; break; case '?': fputs("Usage: boyerm \"pattern\" start comparing at column \n", stderr); fputs(" -x print lines that do not match\n\n", stderr); exit(0); } } if ( argc - argsused == 1 ) { setbuf(stderr, 0); fputs("boyerm: pattern = ", stderr); readln(2, pat, 80); pat[strlen(pat) - 1] = '\0'; } else strcpy(pat, argv[optind]); patlen = strlen(pat); for ( i = 0; i < 128; i++ ) delta0[i] = patlen; for ( i = 0; i < patlen; i++ ) delta0[pat[i]] = patlen - i - 1; delta0[pat[patlen - 1]] = large; for ( i = 0; i < patlen; i++) delta2[i] = patlen + patlen - i - 1; i = patlen - 1; t = patlen; while ( i >= 0 ) { f[i] = t; while ( ( t < patlen ) && ( pat[i] != pat[t] ) ) { delta2[t] = min(delta2[t], patlen-i-1); t = f[t]; } t--; i--; } for ( i = 0; i <= t; i++) delta2[i] = min(delta2[i], patlen + t - i); while( gets(string) ) { lineno++; stringlen = strlen(string); match = boyer(); if ( (match && !except) || (!match && except) ) { if (number) printf("%d: ", lineno); puts(string); } } } int boyer() { register int i, j; if ( (i = patlen + ppos - 2) >= stringlen ) return 0; for ( ; ; ) { while ( (i += delta0[string[i]]) < stringlen ) ; if ( i < large ) return 0; i -= large + 1; if ( (j = patlen - 2) < 0 ) return i + 2; while ( string[i] == pat[j] ) { i--; if ( --j < 0 ) return i + 2; } if ( string[i] == pat[patlen-1] ) i += delta2[j]; else i += max(delta0[string[i]], delta2[j]); } }