/************************************************************************ * This program is Copyright (C) 1986 by Jonathan Payne. JOVE is * * provided to you without charge, and with no warranty. You may give * * away copies of JOVE, including sources, provided that this notice is * * included in all the files. * ************************************************************************/ /* Contains commands for C mode. Paren matching routines are in here. */ /* [TRH] Jun-90: * - incorporated advanced (4.14) c_indent * - cleaned up comment format * [TRH] 04-Nov-91: * - ignore (almost) all `backslashed' characters in {m,scan}_paren; * - make quotes mode-specific. * Wishlist: * - find out start state of quotes in {m,scan}_paren; * - handle ``# ... \n''-type comments (ie., // C++; # sh/awk/perl; % TeX) */ #include "jove.h" RCS("$Id: c.c,v 14.30.0.5 1993/09/01 00:29:58 tom Exp tom $") #include "re.h" #include "ctype.h" int backslashed(base, cp) register const char *base, *cp; { register int cnt = 0; while (cp > base && (*--cp == '\\' || /* Perl considers $" $' $\ $[ $] etc. legitimate variables, which really makes `$' behave similar to backslash. NOTE: these can (and should) be mixed! (consider "$\$"" where the middle `"' ought to be found `backslashed'.) */ if_PERL(*cp == '$' && MajorMode(PERLMODE)))) cnt++; return (cnt & 1); } private char p_types[] = "(){}[]"; private char *mp_kind; #define MP_OKAY NULL private char MP_MISMATCH[] = "[Mismatched parentheses]", MP_UNBALANCED[] = "[Unbalanced parenthesis]", MP_INCOMMENT[] = "[Inside a comment]"; void mp_error() { if (mp_kind == MP_OKAY) return; message(mp_kind); rbell(); } /* Precompile the search strings for speedup; since these are dependent of Major mode, recompile them when Major mode changes. */ private short compflag = -1; /* should we (p)recompile? */ #define compiled (compflag == curbuf->b_major) #define MATCH 0 #define NEXPR (1 + 4) /* # of precompiled strings */ #define SCAN_BASE (NEXPR - 3) /* offset for scan_paren */ private char re_buf[NEXPR][12+Bit(BPC-3)]; /* precompiled search strings */ private const char * const re_fmt[NEXPR] = { /* uncompiled search strings */ "[][(){}%s]", /* m_paren */ "\\{[]})%s],\\<\\}", /* scan_paren, !skip_words, BACKWARD */ "[]})%s]", /* scan_paren, skip_words, BACKWARD */ "\\{[[{(%s],.\\>\\}", /* scan_paren, !skip_words, FORWARD */ "[[{(%s]", /* scan_paren, skip_words, FORWARD */ }; private const char re_quote[NMAJORS][4] = { /* Mode-dependent parts. */ "\"'", /* Fundamental mode */ "", /* Text mode (no quotes for more accurate \TeX mode) */ "\"'/", /* C mode */ #ifdef LISP "\"", /* Lisp mode (' has special meaning) */ #endif #ifdef PERL "\"'`", /* Perl mode */ #endif }; private void precompile __(( void )); private void precompile() { register int i = NEXPR - 1; register const char *quotes = re_quote[compflag = curbuf->b_major]; do { REcompile(sprint(re_fmt[i], quotes), YES, re_buf[i]); } while (--i >= 0); } /* * Search from the position given by sp, or the current position if * sp==NULL, for the paren that matches p_type. Search in the direction * dir. If (mode & MP_CAN_MISMATCH) then it is okay to have mismatched * parens. If (mode & MP_CAN_STOP) then when an open paren is found at * the beginning of a line, it is assumed that there is no point in * backing up further. This is so when you hit tab or LineFeed outside, * in-between procedure/function definitions, it won't sit there searching * all the way to the beginning of the file for a match that doesn't * exist. {forward,backward}-{list,s-expression} are the only ones that * insist on getting the "true" story. */ Bufpos * m_paren(p_type, dir, mode, sp) char p_type; register int dir; int mode; register Bufpos *sp; { register char *cp, p_match, quote = '\0'; register int c_pos, balance = 0, comment = -1; /* i.e, unknown */ char *re_p = re_buf[0]; if (!compiled) precompile(); mp_kind = MP_UNBALANCED; /* let's be pessimistic */ if ((cp = index(p_types, p_type)) == NULL) complain("[Cannot match %c's]", p_type); p_match = cp[dir]; /* * [TRH] well, use rewritten docompiled() that does not * depend on any cheating with curline/curchar. */ while (balance >= 0) { if ((sp = docompiled(dir, re_p, sp, (Line *)0)) == NULL) return NULL; c_pos = sp->p_char; if (dir > 0) c_pos--; cp = lcontents(sp->p_line) + c_pos; if (backslashed(cp - c_pos, cp) && /* Perl considers `$' followed by a single character a legitimate variable. That is, EXCEPT `${' where the `{' is still used for grouping. */ !if_PERL(*cp == '{' && cp[-1] == '$')) { /* (niente.) */ } else if (quote) { if (*cp == quote) quote = '\0'; } else if (comment > 0) { if ((*cp == '/') && (c_pos - dir >= 0) && (cp[-dir] == '*')) comment = NO; } else if (isopenp(*cp)) { if ((balance += dir) >= 0 && (mode & MP_CAN_STOP) && (c_pos == 0)) /* * [TRH] although we stop prematurely, we * still generate the error message, in the * hope that it will be more informative than * no message at all. */ return NULL; } else if (isclosep(*cp)) { balance -= dir; } else if (*cp == '/') { /* occurs in C mode only */ if ((c_pos + dir >= 0) && (cp[dir] == '*')) comment = YES; else if ((comment < 0) && (c_pos - dir >= 0) && (cp[-dir] == '*')) { /* bail out if we started inside a comment */ mp_kind = MP_INCOMMENT; return NULL; } } else { quote = *cp; } } /* * we get here only on balanced parens (which can still mismatch). * Note that the algorithm does not check on NESTED mismatched * parens. */ mp_kind = MP_OKAY; if (*cp != p_match) { mp_kind = MP_MISMATCH; if (!(mode & MP_CAN_MISMATCH)) sp = NULL; } return sp; } /* * Scan for a suitable start parenthesis in direction dir, starting at * position bp. If skipwords is YES intermediate words are ignored, else * the search is stopped if an intermediate word is found and 0 is * returned. Quoted strings count as a single word. If a paren is found, * it is returned. In C mode comments are skipped. The final location is * returned in *bp. */ private int scan_paren __(( int _(dir), int _(skip_words), Bufpos *_(bp) )); private int scan_paren(dir, skip_words, bp) register int dir; int skip_words; register Bufpos *bp; { int comment = NO, quote = '\0'; Bufpos *rp; char *re_p = re_buf[SCAN_BASE + dir + skip_words]; if (!compiled) precompile(); while (rp = docompiled(dir, re_p, bp, (Line *)0)) { register const char *cp; register int c_pos; *bp = *rp; c_pos = bp->p_char; if (dir > 0) c_pos--; cp = lcontents(bp->p_line) + c_pos; if (backslashed(cp - c_pos, cp) && /* Perl considers `$' followed by a single character a legitimate variable. That is, EXCEPT `${' where the `{' is still used for grouping. */ !if_PERL(*cp == '{' && cp[-1] == '$')) { /* (nada.) */ } else if (quote) { if (*cp == quote) { quote = '\0'; if (!skip_words) return 0; } } else if (comment) { if ((*cp == '/') && (c_pos - dir >= 0) && (cp[-dir] == '*')) comment = NO; } else if (has_syntax(*cp, _Op|_Cl)) { return *cp; } else if (isword(*cp)) { if (!skip_words) /* in some modes: isword('\'') */ return 0; } else if (*cp == '/') { /* occurs in C mode only */ if ((c_pos + dir >= 0) && (cp[dir] == '*')) comment++; } else { quote = *cp; } } /* if we get here we didn't find a good match in the buffer */ { register Buffer *cb = curbuf; if (dir < 0) { bp->p_line = cb->b_first; bp->p_char = 0; } else { bp->p_char = length(bp->p_line = cb->b_last); } } return 0; } /* With these routines in hand, the move-{s-expression,list} routine is very simple indeed */ DEF_CMD( "backward-list", FSexpr, NEGATE|ARG(YES) ); DEF_CMD( "backward-s-expression", FSexpr, NEGATE|ARG(NO) ); DEF_CMD( "forward-list", FSexpr, ARG(YES) ); DEF_CMD( "forward-s-expression", FSexpr, ARG(NO) ) { register int num, dir = FORWARD; Bufpos save; register Bufpos *bp = &save; if ((num = exp) < 0) { dir = BACKWARD; num = -num; } DOTsave(bp); while (--num >= 0) { register Bufpos *mp; register int paren; /* The -list commands have ARG(YES) in Type field set */ if (paren = scan_paren(dir, ObjArg(LastCmd), bp)) { mp = m_paren(paren, dir, MP_CAN_MISMATCH, bp); mp_error(); if (mp == NULL) break; *bp = *mp; /* remember */ } } SetDot(bp); } DEF_CMD( "backward-up-list", UpList, NEGATE ); DEF_CMD( "up-list", UpList, NO ) { register int dir = FORWARD; register int paren = C_like_Mode(curbuf) ? '{' : '('; if (exp < 0) { dir = -dir; paren = (paren == '{') ? '}' : ')'; } /* [TRH] Allow mismatches */ SetDot(m_paren(paren, dir, MP_CAN_MISMATCH|MP_CAN_STOP, (Bufpos *) 0)); mp_error(); } DEF_CMD( "down-list", FDownList, NO ) { register Bufpos *mp; #ifdef LISP # define PARENS (MajorMode(LISPMODE) ? "[()]" : "[][(){}]") #else # define PARENS "[][(){}]" #endif if ((mp = dosearch(PARENS, FORWARD, YES)) == NULL || isclosep(lcontents(mp->p_line)[mp->p_char - 1])) complain("[No contained expression]"); SetDot(mp); #undef PARENS } /* * Get the line which has the "right" indent for the matching start brace * (at bp). If it is the first non-blank character on the line, we return * this line right away, else it is probably a construct like "do {" or * "if ( ... ) {", eventually stretched over several lines, so we backup * until we find a word or a close paren. */ private Line *c_get_indent_line __(( Bufpos *_(bp) )); private Line * c_get_indent_line(bp) register Bufpos *bp; { { register const char *cp = lcontents(bp->p_line), *base = cp; cp += bp->p_char; do { if (cp == base) return bp->p_line; } while (isspace(*--cp)); } /* only get here if there was a non-blank before the brace */ { Bufpos save; register Bufpos *mp; register int paren; register Line *line; save = *bp; /* remember it so we can restore it later on */ paren = scan_paren(BACKWARD, NO, bp); line = bp->p_line; if (paren && (mp = m_paren(paren, BACKWARD, MP_CAN_STOP, bp))) line = mp->p_line; *bp = save; /* restore remembered location */ return line; } } /* If CArgIndent == ALIGN_ARGS then the indentation routine will indent a continued line by lining it up with the first argument. Otherwise, it will indent CArgIndent characters past the indent of the first line of the procedure call. */ #define ALIGN_ARGS 0 /* [TRH] can't enter negative variables */ DEF_INT( "c-argument-indentation", CArgIndent, V_BASE10 ) = ALIGN_ARGS; _IF(def PRIVATE) Bufpos * c_indent(incrmt) int incrmt; { register Bufpos *bp; register int indent = 0; if (bp = m_paren('}', BACKWARD, MP_CAN_MISMATCH|MP_CAN_STOP, (Bufpos *)0)) { if (mp_kind == MP_OKAY) { /* good match */ { register const char *cp; register Line *line = c_get_indent_line(bp); if ((indent = get_indent(line)) <= 0 && /* special treatment for #define statements; line up with start of macro name */ (cp = lcontents(line))[0] == '#' && LookingAt("#[ \t]*\\{define[ \t]+,\\}", cp, 0)) indent = calc_pos(cp, REeom); } { register int tabstop; if ((tabstop = incrmt) && /* keep close braces properly aligned */ (ToIndent(), linebuf[curchar] != '}')) indent += TabIncr(indent); /* local tabstop! */ } } else { register const char *cp = lcontents(bp->p_line), *base = cp; if (CArgIndent == ALIGN_ARGS) cp += indent = bp->p_char + 1; while (isspace(*cp++)) indent++; indent = calc_pos(base, indent) + CArgIndent; #if (ALIGN_ARGS != 0) if (CArgIndent == ALIGN_ARGS) indent -= ALIGN_ARGS; #endif } } n_indent(indent); return bp; } /* * [TRH] StrLength was in misc.c - but I felt it belonged here. * rewritten to cope with escaped quotes and -newlines */ #ifndef TINY private int do_strlen __(( _PI_(char) _(quote) )); private int do_strlen(quote) char quote; { register int n; register const char *lp = linebuf, *base = lp; register Line *line = curline; /* skip backward to start quote, if any */ lp += curchar; do { if (lp == base) { /* * quote not found on current line. * see if previous line has escaped newline */ if ((line = line->l_prev) == NULL) return -1; base = lp = lcontents(line); while (*lp++) ; --lp; if (!backslashed(base, lp)) return -1; } } while (*--lp != quote || backslashed(base, lp)); /* * if we get here, there is at least a start quote. * keep counting characters until we encounter the end * quote (or hit a non-escaped newline) */ for (n = 0, ++lp; *lp != quote; ) { if (*lp == '\0') /* Arghh. an un-terminated string */ return -2; if (*lp++ == '\\') { if (*lp == '\0') { /* an escaped newline */ if ((line = line->l_next) == NULL) return -2; /* end of buffer */ lp = lcontents(line); continue; /* don't count */ } else if (tolower(*lp) == 'x') { if ((++lp, ishex(*lp)) && (++lp, ishex(*lp))) ++lp; } else if (isoctal(*lp++)) { if (!isoctal(*lp++) || !isoctal(*lp++)) --lp; } } n++; } return n; } DEF_CMD( "string-length", NonExisting, NO ) _IF(def TINY); DEF_CMD( "string-length", StrLength, NO ) _IF(ndef TINY) { register int len; if ((len = do_strlen(exp_p ? '\'' : '"')) < 0) complain("Where are the quotes?"); s_mess("%d character%n.", len); } #endif /* TINY */ #ifdef CMT_FMT DEF_STR( "comment-format", CmtFmt, 80, V_STRING ) _IF(def CMT_FMT)_IF(def PRIVATE) = "/*%n%! * %c%!%n */"; /* Strip leading and trailing white space. Skip over any imbedded '\n's. */ private int strip_c __(( const char *_(from), char *_(to) )); private int strip_c(from, to) const char *from; char *to; { register const char *s = from; register char *d = to, c; while (c = *s++) { if (!isspace(c) && c != '\n') { do { if (c != '\n') *d++ = c; } while (c = *s++); do { if (!isspace(*--d)) { ++d; break; } } while (d > to); break; } } *d = '\0'; return (int)(d - to); } typedef struct { int cb_len; char cb_str[20]; } body_tp; /* Fill in the data structure from the format string. Don't return if there's trouble. */ private void parse_cmt_fmt __(( const char *_(str), body_tp _(body)[8], int *_(nlflags) )); private void parse_cmt_fmt(str, body, nlflags) const char *str; register body_tp body[8]; int *nlflags; { static const char fieldname[4][16] = { "open comment", "line header", "line trailer", "close comment" }; register const char *fmtp = str; register char *body_p; register int n; *nlflags = 0; { n = 1; } do { body_p = body->cb_str; body->cb_len = 0; while (*body_p++ = *fmtp) { if (++(body->cb_len) == sizeof(body->cb_str)) complain("[Too long %s field: %s]", fieldname[n-1], fmtp - 1); if (*fmtp++ != '%') continue; --body_p; /* backup over '%' */ switch(*fmtp++) { default: complain("[Unknown comment escape: %%%c]", *--fmtp); case 'n': if (n & 2) complain("[%%n not allowed in %s: %s]", fieldname[n-1], fmtp - 2); *nlflags |= n; *body_p++ = '\n'; continue; case 't': *body_p++ = '\t'; continue; case '%': /* "%" already there */ continue; case 'c': if (n != 2) complain("[misplaced comment body: %s]", fmtp - 2); break; case '!': if (n == 4) complain("[Too many fields in comment format %s]", fmtp - 2); break; } /* only get here on %! and %c */ body->cb_len--; *body_p = '\0'; break; } body[4].cb_len = strip_c(body->cb_str, body[4].cb_str); } while (++body, ++n <= 4); } DEF_CMD( "fill-comment", Comment, EDIT ) _IF(def CMT_FMT) { register Buffer *cb = curbuf; int saveRMargin, saveSaveReg = SaveRegion, indent_pos, close_at_dot = NO; register char *cp; Bufpos open_c_pt, close_c_pt; register Bufpos *match; register Mark *entry_mark; body_tp body[8]; int nlflags; #define NL_IN_OPEN_C (nlflags & 1) #define NL_IN_CLOSE_C (nlflags & 4) #define open_c body[0].cb_str /* the open comment format string */ #define l_header body[1].cb_str /* the prefix for each comment line */ #define l_trailer body[2].cb_str /* the suffix ... */ #define close_c body[3].cb_str /* the close comment string */ #define open_pat body[4].cb_str /* search patterns for the above */ #define header_pat body[5].cb_str #define trailer_pat body[6].cb_str #define close_pat body[7].cb_str #define open_c_len body[0].cb_len #define l_head_len body[1].cb_len #define l_trail_len body[2].cb_len #define close_c_len body[3].cb_len #define open_p_len body[4].cb_len #define header_len body[5].cb_len #define trailer_len body[6].cb_len #define close_p_len body[7].cb_len parse_cmt_fmt((exp_p) ? ask(CmtFmt, ProcFmt) : CmtFmt, body, &nlflags); exp_p = NO; /* for SetMark(), DelNChar() */ /* figure out if we're "inside" a comment */ if ((match = dosearch(open_pat, BACKWARD, NO)) == NULL) complain("No opening %s to match to.", open_pat); open_c_pt = *match; if ((match = dosearch(close_pat, BACKWARD, NO)) && inorder(open_c_pt.p_line, open_c_pt.p_char, match->p_line, match->p_char)) complain("Must be between %s and %s to re-format.", open_pat, close_pat); /* Here's where we figure out whether to format from dot or from the close comment. Note that we've already searched backwards to find the open comment symbol for the comment we are formatting. The open symbol mentioned below refers to the possible existence of the next comment. There are 5 cases: 1) no open or close symbol ==> dot 2) open, but no close symbol ==> dot 3) close, but no open ==> close 4) open, close are inorder ==> dot 5) open, close are not inorder ==> close */ if ((match = dosearch(close_pat, FORWARD, NO)) == NULL || ((close_c_pt = *match, match = dosearch(open_pat, FORWARD, NO)) && inorder(match->p_line, match->p_char, close_c_pt.p_line, close_c_pt.p_char))) { close_at_dot++; DOTsave(&close_c_pt); } SetDot(&open_c_pt); indent_pos = calc_pos(linebuf, cb->b_char); set_mark(); /* Mark to start of comment */ SetDot(&close_c_pt); /* * [TRH May-89] save original comment on kill ring (and leave Mark at * end of comment when done) so that you can easily undo it. */ if (True(SaveRegion)) SaveRegion = NO, /* to avoid save in RegJustify */ CopyRegion(); /* so you can undo it */ /* search for a close comment; delete it if it exits */ if (!close_at_dot) { exp = -close_p_len, DelNChar(); /* DoTimes(DelNChar(), -close_p_len); - we don't want it saved on the kill ring */ } entry_mark = MakeMark(cb->b_dot, cb->b_char, FLOATER); SetDot(&open_c_pt); /* always separate the comment body from anything preceding it */ LineInsert(1); del_whitespace(FORWARD + BACKWARD); Bol(); for (cp = open_c; *cp; cp++) { if (*cp == '\n') { if (!eolp(cb)) LineInsert(1); else line_move(FORWARD); } else if (isspace(*cp)) { if (linebuf[cb->b_char] != *cp) Insert(*cp); } else /* Since we matched the open comment string on this line, we don't need to worry about crossing line boundaries. */ cb->b_char++; } set_mark(); /* Mark start of region to be adjusted */ /* Strip each comment line of the open and close comment strings before reformatting it. */ do { Bol(); del_whitespace(FORWARD + BACKWARD); if (header_len && numcomp(linebuf, header_pat) == header_len) exp = header_len, DelNChar(); /* DoTimes(DelNChar(), header_len); - we don't want it saved on the kill ring */ if (trailer_len) { Eol(); del_whitespace(FORWARD + BACKWARD); if ((cb->b_char > trailer_len) && (numcomp(&linebuf[cb->b_char - trailer_len], trailer_pat) == trailer_len)) exp = -trailer_len, DelNChar(); /* DoTimes(DelPChar(), trailer_len); - we don't want it saved on the kill ring */ } } while (cb->b_dot != entry_mark->m_line && (line_move(FORWARD), YES)); ToMark(entry_mark); saveRMargin = RMargin; RMargin -= l_head_len + l_trail_len + indent_pos - 2; #if NO do_rfill(); #else /* * [TRH] I like this better; it preserves paragraphs. But we * need to remove trailing blank lines first. */ while (linebuf[0] == '\0') exp = BACKWARD, DelNChar(); /* i.e., DelPChar() */ RegJustify(); #endif SaveRegion = saveSaveReg; RMargin = saveRMargin; PopMark(); /* get back to the start of adjusted region. */ if (cb->b_dot != open_c_pt.p_line->l_next) { n_indent(indent_pos); ins_str(l_header, NO); } while (Eol(), (cb->b_dot != entry_mark->m_line)) { ins_str(l_trailer, NO); line_move(FORWARD); n_indent(indent_pos); ins_str(l_header, NO); } DelMark(entry_mark); del_whitespace(FORWARD + BACKWARD); /* if the addition of the close symbol would cause the line to be too long, put the close symbol on the next line. But don't do it if the close comment contains newline(s). */ if (NL_IN_CLOSE_C) ins_str(l_trailer, NO); else if (close_c_len + calc_pos(linebuf, cb->b_char) > RMargin) { ins_str(l_trailer, NO); LineInsert(1); n_indent(indent_pos); } for (cp = close_c; *cp; cp++) { if (*cp == '\n') { LineInsert(1); n_indent(indent_pos); } else Insert(*cp); } PtToMark(); /* i.e., Point to start of comment, Mark to end */ Eol(); DelNChar(); /* remove line separator */ } #endif /* CMT_FMT */ /*====================================================================== * $Log: c.c,v $ * Revision 14.30.0.5 1993/09/01 00:29:58 tom * (m_paren, scan_paren): de-const-ify `re_p'. * * Revision 14.30 1993/02/06 00:48:32 tom * cleanup whitespace; some random optimizations. * * Revision 14.28 1992/10/05 13:25:08 tom * convert to "port{ansi,defs}.h" conventions. * * Revision 14.27 1992/09/21 22:48:04 tom * add "up-list" command. * * Revision 14.26 1992/08/26 23:56:50 tom * PRIVATE-ized some Variable defs; add RCS directives. * */