/************************************************************************ * This program is Copyright (C) 1986 by Jonathan Payne. JOVE is * * provided to you without charge, and with no warranty. You may give * * away copies of JOVE, including sources, provided that this notice is * * included in all the files. * ************************************************************************/ /* * Modified: 1988-91 by T.R.Hageman * Sep-88 [TRH] revision of DoJustify, DoPara * Apr-89 [TRH] revised get_indent, find_para * Jul-90 [TRH] consider lines starting with % (TeX comment) as blank * Jan-91 [TRH] add right-margin alignment */ #include "jove.h" RCS("$Id: para.c,v 14.32.0.11 1994/06/03 04:02:02 tom Exp tom $") #include "ctype.h" #include "re.h" /* Thanks to Brian Harvey for this paragraph boundary finding algorithm. It's really quite hairy figuring it out. This deals with paragraphs that are separated by blank lines, lines beginning with a Period (assumed to be an nroff command), lines beginning with BackSlash (assumed to be Tex commands). Also handles paragraphs that are separated by lines of different indent; and it deals with outdented paragraphs, too. It's really quite nice. Here's Brian's algorithm. Definitions: THIS means the line containing the cursor. PREV means the line above THIS. NEXT means the line below THIS. BLANK means empty, empty except for spaces and tabs, starts with a period or a backslash, or nonexistent (because the edge of the buffer is reached). ((BH 12/24/85 A line starting with backslash is blank only if the following line also starts with backslash. This is so that \noindent is part of a paragraph, but long strings of TeX commands don't get rearranged. It still isn't perfect but it's better.)) BSBLANK means BLANK or starts with a backslash. (BH 12/24/85) HEAD means the first (nonblank) line of the paragraph containing THIS. BODY means all other (nonblank) lines of the paragraph. TAIL means the last (nb) line of the paragraph. (TAIL is part of BODY.) HEAD INDENT means the indentation of HEAD. M-J should preserve this. BODY INDENT means the indentation of BODY. Ditto. Subprocedures: TAILRULE(BODYLINE) If BODYLINE is BLANK, the paragraph has only one line, and there is no BODY and therefore no TAIL. Return. Otherwise, starting from BODYLINE, move down until you find a line that either is BSBLANK or has a different indentation from BODYLINE. The line above that different line is TAIL. Return. Rules: 1. If THIS is BLANK, which command are you doing? If M-J or M-[, then go up to the first non-BLANK line and start over. (If there is no non-BLANK line before THIS, ring the bell.) If M-], then the first non-BLANK line below THIS is HEAD, and the second consecutive non-BSBLANK line (if any) is the beginning of BODY. (If there is no non-BLANK line after THIS, ring the bell.) Do TAILRULE(beginning-of-BODY). Go to rule A. 2. If PREV is BLANK or THIS is BSBLANK, then THIS is HEAD, and NEXT (if not BSBLANK) is in BODY. Do TAILRULE(NEXT). Go to rule A. 3. If NEXT is BSBLANK, then THIS is TAIL, therefore part of BODY. Go to rule 5 to find HEAD. 4. If either NEXT or PREV has the same indentation as THIS, then THIS is part of BODY. Do TAILRULE(THIS). Go to rule 5 to find HEAD. Otherwise, go to rule 6. 5. Go up until you find a line that is either BSBLANK or has a different indentation from THIS. If that line is BLANK, the line below it is HEAD. If that line is non-BLANK, then call that new line THIS for what follows. If (the new) PREV has the same indent as THIS, then (the new) NEXT is HEAD. If PREV has a different indent from THIS, then THIS is HEAD. Go to rule A. 6. If you got here, then both NEXT and PREV are nonblank and are differently indented from THIS. This is a tricky case and there is no guarantee that you're going to win. The most straightforward thing to do is assume that we are not using hanging indentation. In that case: whichever of PREV and THIS is indented further is HEAD. Do TAILRULE(HEAD+1). Go to rule A. 6+. A more complicated variant would be this: if THIS is indented further than PREV, we are using regular indentation and rule 6 applies. If PREV is indented further than THIS, look at both NEXT and the line after NEXT. If those two lines are indented equally, and more than THIS, then we are using hanging indent, THIS is HEAD, and NEXT is the first line of BODY. Do TAILRULE(NEXT). Otherwise, rule 6 applies. A. You now know where HEAD and TAIL are. The indentation of HEAD is HEAD INDENT; the indentation of TAIL is BODY INDENT. B. If you are trying to M-J, you are now ready to do it. C. If you are trying to M-], leave point after the newline that ends TAIL. In other words, leave the cursor at the beginning of the line after TAIL. It is not possible for this to leave point where it started unless it was already at the end of the buffer. D. If you are trying to M-[, if the line before HEAD is not BLANK, then leave point just before HEAD. That is, leave the cursor at the beginning of HEAD. If the line before HEAD is BLANK, then leave the cursor at the beginning of that line. If the cursor didn't move, go up to the first earlier non-BLANK line and start over. End of Algorithm. I implemented rule 6+ because it seemed nicer. */ DEF_INT( "right-margin", RMargin, V_BASE10 ) = 78; DEF_INT( "left-margin", LMargin, V_BASE10 ) ZERO; private Line *para_head, *para_tail; private int head_indent, body_indent; /* some defines for paragraph boundary checking */ #define I_EMPTY -1 /* line "looks" empty (spaces and tabs) */ #define I_PERIOD -2 /* line begins with "." or "\" */ #define I_BUFEDGE -3 /* line is nonexistent (edge of buffer) */ private int bslash; /* Nonzero if get_indent finds line starting with backslash */ #define i_blank(lp) (get_indent(lp) < 0) #define i_bsblank(lp) (i_blank(lp) || bslash) int get_indent(lp) Line *lp; { register int indent = 0; register char *lbuf; register Line *line; bslash = 0; if ((line = lp) == NULL) return I_BUFEDGE; lbuf = lcontents(line); for (;;) { /* [TRH] scan line for blanks ourselves */ if (*lbuf == '\0') return I_EMPTY; /* end of line */ if (*lbuf == ' ') indent++; else if (*lbuf == '\t') indent += TabIncr(indent); else break; lbuf++; } if (indent) return indent; /* indent == 0, so lbuf still points to start of line */ if (lbuf[0] == '.' || lbuf[0] == '%') return I_PERIOD; if (lbuf[0] == '\\') { /* BH 12/24/85. Backslash is BLANK only if next line also starts with Backslash. */ bslash++; if (line->l_next && lcontents(line->l_next)[0] == '\\') return I_PERIOD; } return 0; } /* Finds the beginning, end and indent of the current paragraph, and sets the above global variables. HOW says how to behave when we're between paragraphs. That is, it's either FORWARD or BACKWARD depending on which way we're favoring. [TRH Jul-91] now returns whether paragraph is found instead of complaining. */ private int find_para __(( int _(how) )); private int find_para(how) { register int this_indent; int prev_indent, next_indent; register Line *this = curline, *head = NULL, *body = NULL, *tail = NULL; exp = 1; while ((this_indent = get_indent(this)) < 0 /*i_blank(this)*/) { /* rule 1 */ if (how < 0) /* BACKWARD */ this = this->l_prev; else /* FORWARD */ this = this->l_next; if (this == NULL) return NO; } /* here: !i_blank(this) */ if (bslash || (prev_indent = get_indent(this->l_prev)) < 0) { /* (i_bsblank(this) || i_blank(this->l_prev)) /* rule 2 */ head = this; if (!i_bsblank(this->l_next)) body = this->l_next; } else if ((next_indent = get_indent(this->l_next)) < 0 || bslash) { /* (i_bsblank(this->l_next)) /* rule 3 */ tail = body = this; } else if (next_indent == this_indent || prev_indent == this_indent) { /* rule 4 */ body = this; } else if (prev_indent > this_indent && /* next_indent >= 0, implies this->l_next exists */ next_indent == get_indent(this->l_next->l_next)) { /* rule 6+ */ head = this; body = this->l_next; } else { /* rule 6 */ head = (this_indent > prev_indent) ? this : this->l_prev; body = head->l_next; } /* rule 5 -- find the missing parts */ while (head == NULL) { if ((prev_indent = get_indent(this->l_prev)) < 0) /* (i_blank(this->l_prev)) */ head = this; else if (bslash || prev_indent != this_indent) /* (prev_indent >= 0, implies this->l_prev exists */ if (this_indent == get_indent(this->l_prev->l_prev)) head = this; else head = this->l_prev; else if ((this = this->l_prev) == NULL) return NO; } if (body == NULL) /* this must be a one line paragraph */ body = head; if (tail == NULL && (this_indent = get_indent(tail = body)) >= 0) while ((get_indent(tail->l_next) == this_indent) && !bslash) /* BH line with backslash is head of next para */ if ((tail = tail->l_next) == NULL) return NO; #ifdef DEBUG /* this "shouldn't happen" */ if (tail == NULL || head == NULL || body == NULL) complain("BUG! tail(%d),head(%d),body(%d)!", tail, head, body); #endif para_head = head; para_tail = tail; head_indent = get_indent(head); body_indent = get_indent(body); return YES; } #ifdef RALIGN_FILL /* * Fill out current line, from start to Point, with blanks to the width * of "right-margin". The blank spreading algorithm is stolen from * Minix's NROFF (by Stephen L. Browning, W.N. Paul and Bill * Rozenkranz) which is in turn based on the design in "Software Tools" * by Kernighan and Plauger. The idea is to cumulate the excess spaces * alternately at either side of a line so that no big white holes * develop (surely there is a TeXnical term for this...) and the * complete paragraph looks better. We keep track of the visual position * and the number of words in `DoJustify' in order to avoid scanning the * line again. This comment is filled using this method. Maybe you like * it... */ DEF_INT( "ragged-right-fill", RaggedRight, V_BOOL ) = YES; _IF(def RALIGN_FILL)_IF(def PRIVATE) private int zigzag = FORWARD; private void right_align __(( int _(pos), int _(nwords) )); private void right_align(pos, nwords) { register Buffer *cb = curbuf; register int n_add, ngaps, end_char; zigzag = -zigzag; if ((n_add = (RMargin - pos)) <= 0 || (ngaps = (nwords - 1)) <= 0) return; end_char = cb->b_char + n_add; if (zigzag > 0) cb->b_char = 0; do { exp = zigzag, ForWord(); exp = (n_add - 1) / ngaps + 1, Insert(' '); } while ((n_add -= exp) && --ngaps); exp = 1; cb->b_char = end_char; } #endif /* RALIGN_FILL */ /* * Adjust spacing. * PRE: curchar points to first blank to adjust, or eolp(). * POST: curchar point to first non-blank after adjusted blanks. * Number of spaces to insert: * when not pointing at blank (or begin of line): 0 * after punctuation (? ! . :): 2+ * otherwise 1 * (+ if already followed by two or more blanks, and `sentence-sep-regexp' * does not match a single blank; otherwise 1) * note that ) ] } ' " are considered blank during punctuation scan. * returns: updated visual position. * NOTA BENE: TABS ARE LEFT ALONE. The routine used to convert tabs to * spaces but I found this very inconvenient when I continually lost my * tables... */ private char sent_end_single_space; #define DO_SPACE_SETUP() do { \ /* Don't expand to two spaces after end of sentence if `sentence-sep-regexp' matches a single space. This is to accommodate non-english spacing conventions. */\ sent_end_single_space = LookingAt(SentSepRE, " A", 0); \ /* Use `sentence-end-regexp' in order to determine whether we are indeed at the end of sentence. Compile it here, so we only do it once. */ \ REcompile(SentEndRE, YES, compbuf); \ REdirection = BACKWARD; \ } while (0) private int do_space __(( int _(pos) )); private int do_space(pos) { register int diff, newpos = pos; register char *cp = &linebuf[curchar], *sp = cp; /* skip blanks; convert tabs to spaces */ for (;;) { if (*cp == ' ') { newpos++; cp++; } else if (*cp == '\t') { newpos += TabIncr(newpos); cp++; } else break; } if ((diff = (sp - cp)) == 0) return newpos; curchar -= diff; if (diff == (pos - newpos)) { /* i.e. no tabs */ newpos = 0; if ((cp = sp) > linebuf) { newpos++; if (diff <= -2 && !sent_end_single_space && /* See if we are at end of sentence. At this point `compbuf' contains the pre-compiled value of `sentence-end-regexp'. */ re_sindex(linebuf, (int)(cp - linebuf), compbuf) && REeom == (int)(cp - linebuf)) newpos++; } if (diff += newpos) { exp_p = NO; if ((exp = diff) < 0) DelNChar(); /* DoTimes(DelPchar(), diff); - we don't want it saved on the kill ring */ else /* (diff > 0) */ Insert(' '); /* DoTimes(Insert(' '), diff); */ exp = 1; } newpos += pos; } return newpos; } void DoJustify(l1, c1, l2, c2, scrunch, indent) Line *l1, *l2; { register Buffer *cb = curbuf; Mark *savedot = MakeMark(cb->b_dot, cb->b_char, FLOATER); register Mark *endmark; register char *cp, *sp; register int pos, prev_char = 0; #ifdef RALIGN_FILL register int nwords = 0, prev_pos; if (!scrunch) zigzag = FORWARD; /* so paragraphs are always right-aligned the same way. */ #endif exp = 1; /*- fixorder(&l1, &c1, &l2, &c2);-*/ /* l1/c1 will be before l2/c2 */ endmark = MakeMark(l2, c2, FLOATER); /* [TRH] BEFORE indent!! */ /* Find indent of first line of region, and adjust it if necessary. */ SetLine(l1); ToIndent(); if (cb->b_char < c1) { /* Region starts after indent, so leave it alone. */ cb->b_char = c1; } else if (!scrunch && indent < 0) { /* (indent < 0) means use LMargin, see below. */ n_indent(LMargin + (head_indent - body_indent)); } if (indent < 0) { indent = LMargin; } DO_SPACE_SETUP(); /* [TRH] pseudo-code: * * invalidate remembered position * while point < endmark * do * skip over word * if rest of line is blank and this is not the last line of region * if first word on next line does not fit * fill out if not ragged-right * advance to next line * continue * fi * append next line * fi * if (visual) position <= right margin * remember point * if point < endmark * adjust spacing * fi * else * if remembered position is valid * set point to remembered position * invalidate remembered position * fi * if not at end-of-line * fill out if not ragged-right * break line at point * fi * fi * od * * for the sake of efficiency, we keep track of current visual * position ourselves instead of calculating it each time. */ #define end_of_region() (cb->b_dot == endmark->m_line && \ cb->b_char >= endmark->m_char) WITH_TABLE(FUNDAMENTAL); /* [TRH] in this table words consist of anything but blanks (and control chars) so we can use isword() */ pos = calc_pos(linebuf, cb->b_char); while (!end_of_region()) { cp = &linebuf[cb->b_char]; sp = cp; while (isword(*cp++)) ; /* skip over word */ --cp; /* one too far */ pos += (cp - sp); cb->b_char += (cp - sp); if (*cp && !isspace(*cp)) { /* * this can only happen if the text contains * weird stuff like control characters. */ do ++cp; while (*cp && !isspace(*cp)); /* I'm lost, so recalculate */ pos = calc_pos(linebuf, cb->b_char = (cp - linebuf)); } if (blnkp(cp) && cb->b_dot != endmark->m_line) { /* * lookahead to see if it's worth appending * the next line. This should save lots of temp.file * accesses when reformatting an already justified * section. (this only matters on floppy-disk systems, * of course; leave it out if you don't care...) */ #ifndef TINY if (pos <= RMargin) { char *save = cp; if (pos == RMargin) goto move_to_next_line; /* I'm getting lazy at my old age... */ cp = lcontents(cb->b_dot->l_next); while (isspace(*cp++)); /* skip indent */ sp = --cp; while (isword(*cp++)); /* skip over word */ if (pos + (cp - sp) > RMargin) { /* next word (+ space) doesn't fit */ move_to_next_line: #ifdef RALIGN_FILL if (False(RaggedRight)) { right_align(pos, nwords + 1); nwords = 0; } #endif del_whitespace(FORWARD + BACKWARD); ForChar(); /* to next line */ prev_char = 0; /* reset */ n_indent(pos = indent); continue; } cp = save; } #endif /* TINY */ Eol(); DelNChar(); /* delete line sep. */ del_whitespace(FORWARD + BACKWARD); /* and blanks */ ins_str(" ", NO); /* extra spaces */ cb->b_char = (cp - linebuf); /* restore curchar */ } if (pos <= RMargin) { prev_char = cb->b_char; /* remember position */ if (!end_of_region()) { #ifdef RALIGN_FILL prev_pos = pos; nwords++; #endif pos = do_space(pos); /* adjust spacing */ } } else { if (prev_char > 0) { cb->b_char = prev_char; prev_char = 0; } /* * end-of-line can only occur if this were the last * line of the region, since in other cases we have * already appended the next line (plus some extra * blanks). Also, end-of-line at the last line of * the region implies end_of_region() (since it is * the farthest possible position on the line), so * there is no point in breaking the line here. * Note also that a valid prev_char implies * NOT end-of-line. */ if (!eolp(cb)) { /* * If we get here, curchar always points to * the first blank after a word. So break here * and then adjust indent. This order should * move point to beginning of next line when * auto-filling in the middle of a line. */ #ifdef RALIGN_FILL if (False(RaggedRight)) { right_align(prev_pos, nwords); nwords = 0; } #endif LineInsert(1); if (scrunch && TwoBlank()) { Eol(); DelNChar(); } n_indent(pos = indent); /* TODO: handle correct mode-dependent indent if scrunch. */ } } } END_TABLE(); ToMark(savedot); /* Back to where we were */ DelMark(endmark); /* Free up marks */ DelMark(savedot); this_cmd = 0; /* So everything is under control */ } DEF_CMD( "backward-paragraph", ForPara, NEGATE ); DEF_CMD( "forward-paragraph", ForPara, NO ) { register int num; if ((num = exp) == 0) return; if (num < 0) { /* BACKWARD */ do { /* * skip one char backward in case cursor is at start * of paragraph (it won't harm if at blank line) */ exp = BACKWARD, ForChar(); if (!find_para(BACKWARD)) break; SetLine(para_head); #if NO /* [TRH] I don't like it... */ if (i_blank(cb->b_dot->l_prev)) line_move(BACKWARD); #endif } while (++num); } else { /* FORWARD */ do { if (!find_para(FORWARD)) break; SetLine(para_tail); line_move(FORWARD); } while (--num); } } DEF_CMD( "fill-paragraph", Justify, EDIT ) { if (!find_para(BACKWARD)) complain((char *)0); DoJustify(para_head, 0, para_tail, length(para_tail), NO, (exp_p) ? -1 : body_indent); } DEF_CMD( "fill-region", RegJustify, EDIT ) { Bufpos r[2]; register Line *l0, *l1, *next; register int c0, c1; register int swapped; register int use_lmargin = exp_p; if ((swapped = CurRegion(r))) PtToMark(); /* so mark is at end of region... */ if (True(SaveRegion)) CopyRegion(); /* so you can undo it */ l0 = r[0].p_line, c0 = r[0].p_char; do { DotTo(l0, c0); if (!find_para(FORWARD)) break; if (inorder(l0, c0, para_head, 0)) l0 = para_head, c0 = 0; l1 = para_tail, c1 = length(l1); if (!inorder(l1, c1, r[1].p_line, r[1].p_char)) l1 = r[1].p_line, c1 = r[1].p_char; next = l1->l_next; DoJustify(l0, c0, l1, c1, NO, (use_lmargin) ? -1 : body_indent); } while (r[1].p_line != l1 && (l0 = next) != NULL && l0 != r[1].p_line); DotTo(r[0].p_line, r[0].p_char); /* to start of region... */ if (swapped) PtToMark(); /* so dot is where it used to be... */ } #ifdef CMT_FMT # if 0 /* [TRH] obsolete... */ void do_rfill() { Bufpos r[2]; CurRegion(r); DoJustify(r[0].p_line, r[0].p_char, r[1].p_line, r[1].p_char, NO, (exp_p) ? -1 : 0); } # endif #endif /* CMT_FMT */ /*====================================================================== * $Log: para.c,v $ * Revision 14.32.0.11 1994/06/03 04:02:02 tom * (RegJustify, do_rfill): use CurRegion(). * * Revision 14.32.0.10 1994/05/09 08:47:50 tom * ("re.h"): new include; (do_space): use "sentence-end" regexp to determine * end-of-sentence, use "sentence-sep" to determine whether to insert one or * two spaces after the end of a sentence; (DO_SPACE_SETUP): new support macro. * * Revision 14.32 1993/06/11 15:48:01 tom * (use_lmargin): eliminate static variable; * (DoJustify): parameter `indent' < 0 now indicates what `use_lmargin' used to. * * Revision 14.31 1993/02/15 02:01:49 tom * remove (void) casts. * * Revision 14.30 1993/02/05 14:53:19 tom * cleanup whitespace; replace calls to DelWtSpace() with * del_whitespace(BACKWARD + FORWARD). * * Revision 14.26 1992/08/26 23:56:57 tom * PRIVATE-ized some Variable defs; add RCS directives. * */