/**********************************************************************\ * * CALCPOS.C * * Calculates firstpos, lastpos and followpos from syntax tree. * * Author: Jarmo Ruuth 15-Mar-1988 * * Copyright (C) 1988-90 by Jarmo Ruuth * May be freely copied for any non-commercial usage. \**********************************************************************/ #include "system.h" #include "dfa.h" /********************************************************************** * * nullable * * Sets nullable TRUE or FALSE for given node. * Nullable is TRUE for such nodes that are the roots of subexpressions * that generate languages that include the empty string. */ static void nullable(REG1 node_t* tnode) { switch (tnode->type) { case EMPTY_ID: tnode->nullable = TRUE; break; case CLASS_ID: case ID: tnode->nullable = FALSE; break; case OR: tnode->nullable = rbuf.tree[tnode->val.next.left].nullable || rbuf.tree[tnode->val.next.right].nullable; break; case CAT: tnode->nullable = rbuf.tree[tnode->val.next.left].nullable && rbuf.tree[tnode->val.next.right].nullable; break; case CLOSURE: tnode->nullable = TRUE; break; } } /********************************************************************** * * firstpos * * Fills firstpos-field in syntax tree. Here we suppose, that all * nodes below current node are already visited (depth-first traversal). * Firstpos is a set of positions that can match the first symbol of a * string generated by the subexpression rooted by node. */ static void firstpos(REG1 int node, REG2 node_t* tnode) { switch (tnode->type) { case EMPTY_ID: set_clear(tnode->firstpos, rbuf.setsize); break; case CLASS_ID: case ID: add_set(tnode->firstpos, node); break; case OR: set_union(tnode->firstpos, rbuf.tree[tnode->val.next.left].firstpos, rbuf.tree[tnode->val.next.right].firstpos, rbuf.setsize); break; case CAT: if (rbuf.tree[tnode->val.next.left].nullable) set_union(tnode->firstpos, rbuf.tree[tnode->val.next.left].firstpos, rbuf.tree[tnode->val.next.right].firstpos, rbuf.setsize); else set_copy(tnode->firstpos, rbuf.tree[tnode->val.next.left].firstpos, rbuf.setsize); break; case CLOSURE: set_copy(tnode->firstpos, rbuf.tree[tnode->val.next.left].firstpos, rbuf.setsize); break; } } /********************************************************************** * * lastpos * * Fills lastpos-field in syntax tree. Here we suppose, that all * nodes below current node are already visited (depth-first traversal). * Lastpos is a set of positions that can match the last symbol of a * string generated by the subexpression rooted by node. */ static void lastpos(int node, REG1 node_t* tnode) { switch (tnode->type) { case EMPTY_ID: set_clear(tnode->lastpos, rbuf.setsize); break; case CLASS_ID: case ID: add_set(tnode->lastpos, node); break; case OR: set_union(tnode->lastpos, rbuf.tree[tnode->val.next.left].lastpos, rbuf.tree[tnode->val.next.right].lastpos, rbuf.setsize); break; case CAT: if (rbuf.tree[tnode->val.next.right].nullable) set_union(tnode->lastpos, rbuf.tree[tnode->val.next.left].lastpos, rbuf.tree[tnode->val.next.right].lastpos, rbuf.setsize); else set_copy(tnode->lastpos, rbuf.tree[tnode->val.next.right].lastpos, rbuf.setsize); break; case CLOSURE: set_copy(tnode->lastpos, rbuf.tree[tnode->val.next.left].lastpos, rbuf.setsize); break; } } /********************************************************************** * * set_pos * * Sets nullable, firstpos and lastpos in whole tree. */ static bool set_pos(void) { REG1 int i; REG2 node_t* tnode = rbuf.tree; for (i = 0; i <= rbuf.root; i++, tnode++) { nullable(tnode); if ((tnode->firstpos = calloc(rbuf.setsize, 1)) == NULL) return FALSE; firstpos(i, tnode); if ((tnode->lastpos = calloc(rbuf.setsize, 1)) == NULL) return FALSE; lastpos(i, tnode); } return TRUE; } /********************************************************************** * * followpos * * Creates followpos array using depth-first traversal. * Followpos(i) is the set of positions j such that there is some input * string ...cd... such that i corresponds to this occurence of c and * j to this occurence of d. So for example for a subexpression cd each * lastpos of c is followed by firstpos of d, or for c* each lastpos of * c is followed by firstpos in c. */ static bool followpos(void) { REG1 int i; REG2 int j; REG3 node_t* inode; REG4 node_t* jnode; REG5 set_t** followptr; if ((followptr = calloc((rbuf.root+1) * sizeof(set_t *), 1)) == NULL) return FALSE; rbuf.follow_array = followptr; inode = rbuf.tree; for (i = 0; i <= rbuf.root; i++, inode++, followptr++) { if (inode->type != ID && inode->type != CLASS_ID ) { *followptr = NULL; continue; } if ((*followptr = calloc(rbuf.setsize, 1)) == NULL) return FALSE; jnode = &rbuf.tree[i+1]; for (j = i+1; j <= rbuf.root; j++, jnode++) { switch (jnode->type) { case CAT: if (in_set(rbuf.tree[jnode->val.next.left]. lastpos, i)) set_union( *followptr, *followptr, rbuf.tree[jnode->val.next.right].firstpos, rbuf.setsize); break; case CLOSURE: if (in_set(jnode->lastpos, i)) set_union( *followptr, *followptr, jnode->firstpos, rbuf.setsize); break; default: break; } } } return TRUE; } /********************************************************************** * * free_posmem * * Frees memory, that is not needed to create transition table. */ static void free_posmem(void) { REG1 int i; REG2 node_t* tnode = &rbuf.tree[rbuf.root]; d_free(&tnode->lastpos); --tnode; for (i = rbuf.root-1; i-- >= 0; tnode--) { d_free(&tnode->firstpos); d_free(&tnode->lastpos); } } #ifdef TEST extern int debug; /********************************************************************** * * print_tree */ static void print_tree(void) { REG1 int i,j; char* char_image(int c); for (i = 0; i <= rbuf.root; i++) { printf("%2d ",i); switch (rbuf.tree[i].type) { case CAT: printf("CAT %2d %2d", rbuf.tree[i].val.next.left, rbuf.tree[i].val.next.right); break; case OR: printf("OR %2d %2d", rbuf.tree[i].val.next.left, rbuf.tree[i].val.next.right); break; case CLOSURE: printf("CLO %2d ", rbuf.tree[i].val.next.left); break; case ID: printf("ID %3s ", char_image(rbuf.tree[i].val.item)); break; case EMPTY_ID: printf("EMPTY_ID "); break; case CLASS_ID: printf("CLASS ID "); break; } printf(" f"); for (j = 0; j <= rbuf.root; j++) if (in_set(rbuf.tree[i].firstpos, j)) printf(" %2d",j); printf("\tl"); for (j = 0; j <= rbuf.root; j++) if (in_set(rbuf.tree[i].lastpos, j)) printf(" %2d",j); printf("\n"); } fflush(stdout); } /********************************************************************** * * print_followpos */ static void print_followpos(void) { REG1 int i, j; printf("Followpositions:\n"); for (i = 0; i <= rbuf.root; i++) { if (rbuf.follow_array[i] == NULL) continue; printf("%2d:", i); for (j = 0; j <= rbuf.root; j++) if (in_set(rbuf.follow_array[i], j)) printf(" %d", j); printf("\n"); } fflush(stdout); } #endif /* TEST */ /********************************************************************** * * calcpos */ bool calcpos(void) { rbuf.setsize = set_bytesize(rbuf.root+1); if (!set_pos() || !followpos()) { free_posmem(); return FALSE; } #ifdef TEST if (debug > 1) { print_tree(); print_followpos(); } #endif free_posmem(); return TRUE; }