#include #include #include #include #include "global.h" #include "ztype.h" #define BASE10 1 #define BASE8 2 #define BASE16 3 #define GRANULARITY 256 extern char *next_c; static int tok_flags = 0; static TokenP tok_blocks, next_free_tok, pushback_list; /* alloc_token() -- return space for a Token, either from the free list or from freshly malloc()'ed memory */ TokenP alloc_token() { register TokenP T; register int i; if (!next_free_tok) { /* allocate several Token's at once for efficiency */ T = (TokenP) mallok(GRANULARITY * sizeof (Token)); /* the first pointer is used to chain blocks of Token's together */ T->next = tok_blocks; tok_blocks = T; /* string the rest together to form a free list */ for (i = 1; i < GRANULARITY; i++) { T[i].next = T + i + 1; T[i].pre_ws = T[i].txt = NULL; } T[GRANULARITY - 1].next = NULL; next_free_tok = T + 1; } T = next_free_tok; next_free_tok = T->next; T->hashval = T->val = T->type = T->subtype = T->flags = 0; T->next = NULL; return T; } /* free_token() -- return an allocated Token to the free list */ void free_token(T) TokenP T; { T->next = NULL; free_tlist(T); } /* free_tlist() -- return a list of Token's to the free list */ void free_tlist(T) register TokenP T; { register TokenP T1; for (T1 = T; T; T = T1) { T1 = T->next; free(T->pre_ws); T->pre_ws = NULL; free(T->txt); T->txt = NULL; T->next = next_free_tok; next_free_tok = T; } } /* copy_token() -- return a new Token that is a duplicate of the given token */ TokenP copy_token(T1) TokenP T1; { TokenP T2 = alloc_token(); *T2 = *T1; T2->pre_ws = strdup(T1->pre_ws); T2->txt = strdup(T1->txt); T2->next = NULL; return T2; } /* copy_tlist() -- create a duplicate of a list of Token's */ TokenP copy_tlist(T1) TokenP T1; { Token head; TokenP T2 = &head; for (T2->next = NULL; T1; T1 = T1->next, T2 = T2->next) T2->next = copy_token(T1); return head.next; } /* tok_shutdown() -- free all space allocated for Token's */ void tok_shutdown() { register TokenP T, T1; register int i; for (T1 = T = tok_blocks; T; T = T1) { T1 = T->next; for (i = 1; i < GRANULARITY; i++) { if (T[i].pre_ws) free(T[i].pre_ws); if (T[i].txt) free(T[i].txt); } free(T); } } /* push_tlist() -- "un-read" the list of Token's |T|; token() will return all of these tokens in order before reading another token from the input file */ void push_tlist(T) TokenP T; { register TokenP t; if (!T) return; t = T; while (t->next) t = t->next; t->next = pushback_list; pushback_list = T; } /* mk_eof() -- makes and returns an EOF_ token */ static TokenP mk_eof() { TokenP T = alloc_token(); T->type = EOF_; T->pre_ws = mallok(1); *T->pre_ws = '\0'; T->txt = mallok(1); *T->txt = '\0'; return T; } /* mk_eol() -- makes and returns an EOL token */ TokenP mk_eol(s, n) char *s; int n; { TokenP T = alloc_token(); T->pre_ws = mallok(n + 1); strncpy(T->pre_ws, s, n); T->pre_ws[n] = '\0'; T->txt = mallok(2); T->txt[0] = '\n'; T->txt[1] = '\0'; T->type == EOL; T->subtype = '\n'; return T; } /* mk_stopper() -- makes and returns a STOP token. See expand_tlist() for further information. */ TokenP mk_stopper() { TokenP T = alloc_token(); T->type = STOP; T->pre_ws = mallok(1); *T->pre_ws = '\0'; T->txt = mallok(1); *T->txt = '\0'; return T; } /* mk_unmarker() -- makes and returns a special token that informs the tokenizer to unmark the macro text associated with token |T|. See expand() for further information. */ TokenP mk_unmarker(T) TokenP T; { TokenP T1 = copy_token(T); T1->type = UNMARK; return T1; } /* flush_tokenizer() -- discard all Tokens pushed back by push_tlist() */ void flush_tokenizer() { free_tlist(pushback_list); pushback_list = NULL; } /* number() -- copies from |s| into the token |T| a string of characters denoting an integer or floating-point constant. Returns a pointer to the first uncopied character. */ static char *number(s, T) register char *s; TokenP T; { int numtype = BASE10, fpflag = 0; char *t; T->type = NUMBER; if (*s == '0') { /* check for octal or hexadecimal constant */ if ((s[1] == 'x' || s[1] == 'X') && isxdigit(s[2])) { numtype = BASE16; T->flags |= UNS_VAL; } else if (is_octal(s[1])) { numtype = BASE8; T->flags |= UNS_VAL; } } T->val = strtol(s, &t, 0); s = t; if (numtype != BASE10 || is_isuff(*s)) { /* if we're not in base 10, or the next characters are integer constant suffixes, this can't be a floating-point constant */ while (is_isuff(*s)) { if (*s == 'u' || *s == 'U') T->flags |= UNS_VAL; s++; } return s; } /* check to see if the number is actually floating point */ if (*s == '.') { fpflag = 1; do s++; while (isdigit(*s)); } if (*s == 'e' || *s == 'E') { register char *t = s; t++; if (*t == '-' || *t == '+') t++; if (isdigit(*t)) { fpflag = 1; do t++; while (isdigit(*t)); s = t; } } if (fpflag) { T->type = FP_NUM; if (is_fsuff(*s)) s++; } return s; } /* char_constant() -- copy from |s| into the token |T| a string of characters denoting a character constant. We do not translate escape sequences at this point, though we might need to */ static char *char_constant(s, T) register char *s; TokenP T; { T->type = CHAR_CON; for (; *s; s++) { if (*s == '\'') return s + 1; if (*s == '\\') s++; } error("unterminated character constant"); return s; } /* string_literal() -- copy from |s| into the token |T| a string of characters denoting a string literal. We do not translate escape sequences at this point, though we might need to */ static char *string_literal(s, T) register char *s; TokenP T; { T->type = STR_CON; for (; *s; s++) { if (*s == '"') return s + 1; if (*s == '\\') s++; } error("unterminated string literal"); return s; } /* include_name() -- copy from |s| into the token |T| a string of characters denoting an #include file specifier enclosed in <>. |s| points to the character after the '<'. */ static char *include_name(s, T) register char *s; TokenP T; { T->type = INC_NAM; for (; *s; s++) { if (*s == '>') return s + 1; } error("unterminated include file name"); } /* set_mode() -- set the tokenizer flags to |m| */ void set_mode(m) int m; { tok_flags = m; } /* change_mode() -- twiddle the tokenizer flags; in particular, set the flags specified in |raise| and clear the flags specified in |lower| */ void change_mode(raise, lower) { tok_flags |= raise; tok_flags &= (~lower); } /* get_mode() -- return the current value of the tokenizer flags */ int get_mode() { return tok_flags; } /* xlate_token() -- determines the type of the next preprocessor token in the string pointed to by |s|. Information about the token found is placed in the Token |T|. Returns a pointer to the first character not in the token read. */ static char *xlate_token(s, T) register char *s; TokenP T; { if (is_ctoks(*s)) { char *t; T->hashval = hash_id(s, &t); s = t; T->type = ID; return t; } else if (isdigit(*s)) return number(s, T); else switch (*s++) { case '.': T->subtype = '.'; if (*s == '.' && s[1] == '.') { s += 2; T->type = DONT_CARE; } else if (isdigit(*s)) s = number(s - 1, T); else T->type = DONT_CARE; break; case '#': if (*s == '#') { s++; T->type = TOK_CAT; } else T->type = POUND; break; case '&': T->subtype = '&'; if (*s == '&') { s++; T->type = L_AND_OP; } else if (*s == '=') { s++; T->type = DONT_CARE; } else T->type = B_AND_OP; break; case '|': T->subtype = '|'; if (*s == '|') { s++; T->type = L_OR_OP; } else if (*s == '=') { s++; T->type = DONT_CARE; } else T->type = B_OR_OP; break; case '+': T->subtype = '+'; if (*s == s[-1] || *s == '=') { s++; T->type = DONT_CARE; } else T->type = ADD_OP; break; case '~': T->type = UNARY_OP; T->subtype = '~'; break; case ',': T->type = COMMA; T->subtype = ','; break; case '(': T->type = LPAREN; T->subtype = '('; break; case ')': T->type = RPAREN; T->subtype = ')'; break; case '!': T->subtype = '!'; if (*s == '=') T->type = EQ_OP; else T->type = UNARY_OP; break; case '=': T->subtype = '='; if (*s == '=') T->type = EQ_OP; else T->type = DONT_CARE; break; case '*': case '/': case '%': T->subtype = s[-1]; if (*s == '=') { s++; T->type = DONT_CARE; } else T->type = MUL_OP; break; case '^': T->subtype = '^'; if (*s == '=') { s++; T->type = DONT_CARE; } else T->type = B_XOR_OP; break; case '-': T->subtype = '-'; if (*s == '-' || *s == '=' || *s == '>') { s++; T->type = DONT_CARE; } else T->type = ADD_OP; break; case '<': if (tok_flags & INCLUDE_LINE) { s = include_name(s, T); break; } /* else fall through */ case '>': T->subtype = s[-1]; T->type = REL_OP; if (*s == s[-1]) { s++; T->type = SHIFT_OP; } if (*s == '=') { s++; if (T->type == REL_OP) T->subtype = (T->subtype == '<' ? '(' : ')'); else T->type = DONT_CARE; } break; case '\'': s = char_constant(s, T); break; case '"': s = string_literal(s, T); break; case '[': case ']': case '{': case '}': case ';': case ':': case '?': T->type = DONT_CARE; break; default: T->type = UNKNOWN; } return s; } /* print_token() -- write token |T| to the output file */ void print_token(T) TokenP T; { if (T->type == STOP) bugchk("STOP token in output stream?"); fputs(T->pre_ws, outf); fputs(T->txt, outf); if (T->flags & TRAIL_SPC) fputc(' ', outf); } /* merge_tokens() -- Perform token pasting on Token's |T1| and |T2|. Returns the resulting token. */ TokenP merge_tokens(T1, T2) TokenP T1, T2; { TokenP T = alloc_token(); char *s, *t; T->pre_ws = strdup(T1->pre_ws); T->txt = mallok(strlen(T1->txt) + strlen(T2->txt) + 1); strcpy(T->txt, T1->txt); strcat(T->txt, T2->txt); t = xlate_token(T->txt, T); if (*t != '\0') { warning("Invalid token \"%s\" created by concatenation", t); T->type = UNKNOWN; } return T; } TokenP _one_token() { register char *s = next_c, *t, *u; int n; TokenP T = alloc_token(); t = suck_ws(s, &(T->pre_ws)); if (!t || !*t) { T->txt = mallok(2); T->txt[0] = '\n'; T->txt[1] = '\0'; T->type = EOL; T->subtype = '\n'; next_c = t; return T; } u = xlate_token(t, T); n = u - t; if (T->type == UNKNOWN && w_bad_chars) error("Unrecognized character 0x%02x='%c'", *t, *t); T->txt = mallok(n + 1); strncpy(T->txt, t, n); T->txt[n] = '\0'; next_c = u; return T; } void _tokenize_line() { Token head; TokenP T = &head; head.next = NULL; do { T = T->next = _one_token(); } while (T->type != EOL); push_tlist(head.next); } TokenP token() { TokenP T; register char *s; while (pushback_list) { T = pushback_list; pushback_list = T->next; T->next = NULL; if (T->type == UNMARK) { Macro *M; M = lookup(T->txt, T->hashval); if (!M) bugchk("UNMARK on non-macro token %s", T->txt); if (!(M->flags & MARKED)) bugchk("UNMARK on unmarked macro %s", T->txt); M->flags ^= MARKED; free_token(T); continue; } else { return T; } } /* if we get to here, the pushback list is empty, and we need to read in another line */ next_c = s = getline(); if (!s) return mk_eof(); T = _one_token(); if (T->type == EOL) { return T; } if (T->type != POUND || get_mode() & SLURP) _tokenize_line(); return T; } TokenP exp_token() { TokenP T = token(); Macro *M; if (T->type == ID && !(T->flags & BLUEPAINT) && (M = lookup(T->txt, T->hashval))) { expand(T, M); return exp_token(); } else return T; }