/* ------------------------------------------------------------------------ * * K R 2 A N S I . C * * ------------------------------------------------------------------------ * * 9/23/91 * Author: Harry Karayiannis * ______________________E-MAIL:_____________________ * INTERnet: | BITnet: * harryk@bucsf.bu.edu | cscrzcc@buacca.bu.edu * -------------------------------------------------- * * Purpose: Read a C-code source file written in Kernighan-Ritchie's * programming style, and produce ANSI prototypes for all * functions. (Hint: using output redirection you can create * a file with prototypes of all the functions in your K&R C * source-file. The program is automagically putting the word * "extern" in front of the prototype-line, so you can #include * the file in you current source code and compile it with an * ANSI compiler). * * Usage: kr2ansi [-p] [ [-r file1] | [usr_def1 ... usr_defn] ] file2 * -p : show parameters * -r file1 : read user-defined types from 'file1' * usr_def1 ... usr_defn : * user-defined types present in 'file2' * file2 : the file to read C-code from * * Notes: This program is useful for people (like myself) who like * K&R programming style, but want to take advantage of the * parameter-cheking, during compilation, typically done by * ANSI compilers (e.g. gcc, Turbo-C, Prospero-C, etc). * Others simply want their K&R code to compile under an ANSI * compiler without watching all those "irritating" warnings * saying: "Function call without prototyping". * For either case, kr2ansi can prove very handy..... * * Caveats: The program is not bug-free. I tried to make it work with * files that follow the programming style presented in * "The C Programming Language (1st Edition)", by Kernighan * and Ritchie. * In other words, it expects K&R function declarations to * be in the following form: * (NOTE: I write comments with backslashes '\' cause * MWC does not parse nested comments) * * FUNCTION-TYPE FUNCTION-NAME(P1,P2,...,PN) \* comments *\ * \* also you can have ... *\ * \* ...more comments here *\ * PARAMETER-TYPE P1,P2; \* comments *\ * \* ... or here *\ * PARAMETER-TYPE P3; \* more comments *\ * PARAMETER-TYPE P4,...,PN; \* yet more comments *\ * { \* ... or even here *\ * function-body * } * * The program gets confused when it finds 1) comments inside the * parameter list, 2) semicolons placed after comments, 3) comments * that open in one line and close in a different one, 4) curly * braces '{' that do not appear as the first char in a separate * line, and perhaps in some more case I've not figured out yet. * But you shouldn't use such a bad programming style anyways 8*) * (Hint: I think that unix-like preprocessors (like "cpp" in * MWc, gcc, etc) take a flag that causes comments to be * removed from the source-file...check it out) * * However, it is almost guaranteed to work with files that follow * K & R, or Rochkind's programming style (except in one case...) * * * Bugs: kr2ansi fails to find integer functions that do not include * the data-type in the beginning of the function-declaration: * e.g. * main(argc,argv) * int argc; * char *argv[]; * { * ... * } * * The reason is that the program identifies a K&R function header * by checking the first word of the line, which *must* be a valid * data-type (standard or user-defined). If you come up with a bet- * ter algorithm please let me know. (Please don't tell me to write * a complete C parser, cause I won't) */ #include /* don't tell me you don't know this one */ #include "kr2ansi.h" /* constants, macros & user-defined types */ char *progname; /* the name of the program */ char *data_types[]={ DATA_TYPES }; /* array of valid data-types */ unsigned options = NONE; /* bit map with command-line options */ /* ======================================================================== * * main * ======================================================================== */ int main(argc, argv) int argc; char *argv[]; { char fname_in[MAXNAME]; /* name of input file */ GLOBAL char *data_types[]; /* array of valid data-types */ void kr2ansi(); BOOLEAN parse(); /* return FALSE on command_line error */ extern void usage(); /* in file: ERROR.C */ progname = argv[0]; /* set the program's name */ if ( !parse(argc, argv, data_types, fname_in) ) usage("[-p] [ [-r file1] | [usr_def1 ... usr_def10] ] file2"); else kr2ansi(fname_in); return(0); } /* ======================================================================== * * parse * ======================================================================== */ BOOLEAN parse(argc, argv, data_types, fname_in) int argc; /* number of args in command-line */ char *argv[]; /* the arguments themselves */ char *data_types[]; /* array of valid data-types */ char *fname_in; /* name of the input file */ { /* Check the validity of the command line, * make valid specified user-defined data-types, * and assign the specified filename to : fname_in */ register int i,u; /* integer counters */ void set_dtypes(); /* read user-def. types from a file */ GLOBAL unsigned options; /* bit map with command-line options */ STD_CLIB char *strcpy(); /* part of the standard C-library */ if (argc < 2) /* too few arguments */ return(FALSE); i=1; /* you can add your own options in this loop */ while ( argv[i][0] == '-' ) { if ( !strcmp(argv[i],"-p") ) /* option -p: */ { /* show parameters */ if (i == argc-1) return(FALSE); options |= SHOW_PARA; } else if ( !strcmp(argv[i],"-r") ) /* option -r: */ { /* read user-defined types from file */ if (i != argc-3) return(FALSE); options |= RD_TYPES; set_dtypes(argv[i+1], data_types); } else if ( !strcmp(argv[i],"") ) options |= UNUSED4; /* unused slot */ else if ( !strcmp(argv[i],"") ) options |= UNUSED5; /* unused slot */ else if ( !strcmp(argv[i],"") ) options |= UNUSED6; /* unused slot */ else if ( !strcmp(argv[i],"") ) options |= UNUSED7; /* unused slot */ else if ( !strcmp(argv[i],"") ) options |= UNUSED8; /* unused slot */ else /* invalid option */ return(FALSE); /* return FALSE */ i++; } if ( !(options & RD_TYPES) ) /* get user-def types from command-line */ for (u=i; (u<=N_DT_USR && u 0){ /* no ';' allowed in the header */ if ( header[i] == ';' ) return(FALSE); i--; } /* C o n d i t i o n # 3: */ i = 0; /* a. remove potential comments */ while (header[i] != '\0' && header[i] != '/') i++; header[i] = '\0'; i = strlen(header)-1; /* b. last non-blank char must be ')' */ while ( IS_BLANK(header[i]) && i>0 ) i--; if ( header[i] != ')' ) return(FALSE); /* M o d i f y L i n e: */ header[++i] = ';'; /* append a semicolon, and */ header[++i] = '\n'; /* a newline character */ header[++i] = '\0'; return(TRUE); } /* ======================================================================== * * get_1st_word * ======================================================================== */ char *get_1st_word(line) char *line; { /* Return the first word in the parameter string * If the word is longer than MAXWORD, * the function returns W_TOO_LONG * NOTE: * W_TOO_LONG should contain _at most_ MAXWORD chars in the quotes */ char word[MAXLINE]; /* reserve space for MAXLINE chars (see below) */ register int i; /* just a counter */ STD_CLIB char *strcpy(); /* part of the standard C-library */ i=0; /* skip leading blanks and... */ while (line[i] != '\0' && IS_BLANK(line[i])) i++; strcpy(word, &line[i]); /* ...put result in: 'word' */ /* NOTE: 'word' has room for MAXLINE chars */ i=0; /* keep only the 1st word */ while (word[i] != '\0' && !IS_BLANK(word[i])) i++; word[i] = '\0'; /* return 'word' (or W_TOO_LONG) */ return( (strlen(word) > MAXWORD) ? W_TOO_LONG : word ); } /* ======================================================================== * * valid * ======================================================================== */ BOOLEAN valid(word) char *word; { /* Compare the string 'word' against all * strings listed in the array 'data_types'. * Return TRUE on the first match, or FALSE * if 'word' is not listed in 'data_types'. */ register int i; /* just a counter */ GLOBAL char *data_types[]; /* array with valid data-types */ STD_CLIB int strcmp(); /* part of the standard C-library */ for (i=DT_STD1; i<=(LAST_DT_STD+N_DT_USR); i++) if ( !strcmp(word, data_types[i]) ) return(TRUE); return(FALSE); } /* ======================================================================== * * make_ansi * ======================================================================== */ void make_ansi(fp, proto) FILE *fp; /* pointer to file: fname_in */ char *proto; /* ANSI-prototyping to be produced */ { /* This function takes the string 'proto' and converts it to * a valid ANSI function prototype: * * First it inserts the word "extern" into the string 'line', * just in front of the function's data-type. * Then it checks if the parameter-list is empty, via the * function: has_param(), and removes all the chars after the * opening parenthesis (i.e. 'proto' becomes: * "extern fn_type fn_name(" ). * Now, if the parameter list was empty, the string: "void);\n" * is appended to 'proto' and the function returns. Otherwise it * is expecting to find the parameter-declarations between the * function-declaration and the first '{' character. For each * such line, it removes potential comments and checks the first * word (parameter-type) against all valid data-types. * If everything is ok, the parameter-type along with the * parameter itself are appended to 'proto' (via the function: * append_param() ). Otherwise the parameter-type is "assumed" * to be invalid and the string constant: UNDEFINED_DATA_TYPE * is used instead. * BUG: The above algorithm fails when a comment is opened in * one line and is closed in a different line. * The result is that in the output line commented words * will appear as a parameters of type UNDEFINED_DATA_TYPE. */ char *cp; char par_decl[MAXLINE]; /* parameter-declaration line */ char par_type[MAXWORD]; /* parameter's data-type */ char *get_1st_word(); BOOLEAN no_err = TRUE; BOOLEAN append_param(); /* see below */ BOOLEAN has_params(); /* see below */ BOOLEAN valid(); STD_CLIB int strlen(); /* part of the standard C-library */ STD_CLIB char *strcpy(); /* part of the standard C-library */ STD_CLIB char *strcat(); /* part of the standard C-library */ STD_CLIB char *strchr(); /* part of the standard C-library */ strcpy(par_decl, "extern "); /* put "extern" in the front */ strcat(par_decl, proto); /* (note: here we use 'par_decl' */ strcpy(proto, par_decl); /* as temporary string storage) */ if ( !has_params(proto) ) /* check & remove parameter-list */ { strcat(proto, "void);\n"); return; } /* get next 'par_decl' */ while ( fgets(par_decl, MAXLINE, fp) != NULL && no_err) { char *ptr; if ( ptr = strchr(par_decl,'/') ) /* remove potential comments */ *ptr = '\0'; strcpy(par_type, get_1st_word(par_decl)); /* get the parameter-type */ if (par_type[0] == '{') /* if we hit a '{' we stop */ break; if ( !valid(par_type) ) /* check for valid 'par_type' */ strcpy(par_type, UNDEFINED_DATA_TYPE); /* append ANSI parameter-list */ no_err = append_param(proto, par_type, par_decl); } /* The function append_param() converts 'proto' to the following form: */ /* "extern fn_type fn_name(ptype p1, ptype p2, ..., ptype pn, " */ /* So we need to fix 'proto''s tail by 1. erasing the last two chars */ /* (namely ' ' and ',') and 2. appending the string: ");\n" */ cp = proto + (strlen(proto)-2); /* go two chars back */ *cp = ')'; *(cp+1) = ';'; *(cp+2) = '\n'; *(cp+3) = '\0'; } /* ======================================================================== * * has_params * ======================================================================== */ BOOLEAN has_params(header) char *header; /* the function-header line */ { /* This function checks if the parameter-list is empty, * and removes all chars after the opening parenthesis. * Its task is to modify 'header' and to return TRUE if * the parameter-list was empty. * NOTICE that the parameter-list is considered empty * when either the char ')' comes right after char '(' * or it consists of white(BLANK) characters. */ char *cp1, *cp2; /* temporary pointers */ BOOLEAN param_yes = FALSE; /* what the function returns */ STD_CLIB char *strchr(); /* part of the standard C-library */ cp1 = cp2 = strchr(header,'('); /* save the start of param-list in cp2 */ if ( *(cp1+1) == ')' ) /* if ')' comes right after '(' */ { /* Remove all charactes coming */ *(cp1+1) = '\0'; /* right after '(', and... */ return(FALSE); /* ...return FALSE */ } /* check for BLANK parameter-list */ while ( *cp2 != '\0' && IS_BLANK(*cp2) ) cp2++; if (*cp2 != ')') param_yes = TRUE; *(cp1+1) = '\0'; return(param_yes); } /* ======================================================================== * * append_param * ======================================================================== */ BOOLEAN append_param(proto, par_type, par_decl) char *proto; /* the output ANSI-prototype */ char *par_type; /* the data-type of the parameter */ char *par_decl; /* the parameter-declaration line */ { /* Get a parameter-declaration line, construct the appropriate * ANSI-prototyped-declaration string, and append it to the ANSI * prototype. * *** I m p o r t a n t *** * 'par_decl' has been ensured (by function make_ansi()), to be * valid (i.e. the 1st word is a valid data-type). So the string * coming after the 1st word(='par_type') should be a list of * parameters. BUT if 'par_type' is one of the strings "unsigned", * short" or "long", then the 2nd word might be "int" (which should * not be treated as a parameter, but as part of the parameters' * data-type). Furthermore, 'par_type' may be "register", in which * case we only the 2nd word only (or the string "register" if the * 2nd word is not a valid data-type). */ char *s, *param; char _str[MAXWORD]; register int i; void construct_ANSI_declaration(); BOOLEAN valid(); STD_CLIB int *strcmp(); STD_CLIB char *strcat(), *strtok(); i = 0; /* skip the first word of 'par_decl' */ while (par_decl[i] != '\0' && IS_BLANK(par_decl[i])) i++; while (par_decl[i] != '\0' && !IS_BLANK(par_decl[i])) i++; s = &par_decl[i]; /* check for: register data-type,* * or unsigned/short/long int */ strcpy(_str, get_1st_word(s)); if ( valid(_str) ) { if ( !strcmp(par_type,"register") ) /* handle "register" cases */ strcpy(par_type, _str); else /* handle unsigned/long/short */ { strcat(par_type, " "); /* cat " int" in 'par_type'*/ strcat(par_type, _str); } while( *s != '\0' && IS_BLANK(*s) ) s++; /* skip the 2nd word: "int" */ while( *s != '\0' && !IS_BLANK(*s) ) s++; } /* append ANSI param-declaration to 'proto' */ while ( (param = strtok(s, ",; \t\n")) != NULL ) { if ( strlen(proto) >= MAXLINE-strlen(par_type)-strlen(param)-4 ) { /* error-check for "output line too long" */ strcat(proto, "<...>, "); return(FALSE); } construct_ANSI_declaration(proto, par_type, param); s = (char *)NULL; } return(TRUE); } /* ======================================================================== * * construct_ANSI_declaration * ======================================================================== */ void construct_ANSI_declaration(proto, par_type, param) char *proto; /* the functin prototype */ char *par_type; /* parameter's data-type */ char *param; /* the parameter itself */ { /* Construct the ANSI parameter-declaration, * so it can be appended in the parameter-list * (if DONT_SHOW_PARAMETERS is not defined, the * parameter itself is also included) */ register int i; /* just a counter */ GLOBAL unsigned options; /* bit map with command-line options */ STD_CLIB int strlen(); /* part of the standard C-library */ STD_CLIB char *strcat(); /* part of the standard C-library */ strcat(proto, par_type); /* append the parameter's data-type */ strcat(proto, " "); if (options & SHOW_PARA) /* include parameter in parameter-list */ strcat(proto, param); else /* exclude parameter from parameter-list */ { for (i=0; param[i] != '\0'; i++) /* look for any pointers */ if (param[i] == '[' || param[i] == '*') strcat(proto, "*"); } i = strlen(proto)-1; /* separate parameters with ", " */ if ( proto[i] == ' ' ) proto[i] = '\0'; strcat(proto, ", "); }