Mercurial > ~dholland > hg > ag > index.cgi
view tests/agcl/oldagsrc/ts.syn @ 8:ec2b657edf13
Add explicit lint-comment-style fallthrough annotations.
GCC now assumes that if you don't have these you're making a mistake,
which is annoying.
XXX: This changeset updates the AG output files only (by hand) and is
XXX: abusive - rebuilding them will erase the change. However, I need
XXX: to get things to build before I can try to get AG to issue the
XXX: annotations itself, so this seems like a reasonable expedient.
author | David A. Holland |
---|---|
date | Mon, 30 May 2022 23:51:43 -0400 |
parents | 13d2b8934445 |
children |
line wrap: on
line source
{ /* AnaGram, a System for Syntax Directed Programming C Macro preprocessor and parser Copyright (c) 1993, Parsifal Software. All Rights Reserved. TS.SYN: Token Scanner Module */ #include "mpp.h" // context structure for diagnostics struct location { unsigned line, column;}; } // Configuration section [ context type = location // request context tracking ~allow macros // function defs for red procs auto resynch line numbers // #line statements in output error trace // build trace on syntax error ~test range // not necessary ~declare pcb ~error frame subgrammar { simple token, expanded token, initial arg element, ws, eol, macro definition header, } parser file name = "#.cpp" ] // Character Set Definitions any text char = ~eof - newline - '\\' ascii = 1..126 blank = ' ' + '\t' + '\r' + '\f' + '\v' digit = '0-9' eof = -1 + 0 hex digit = '0-9' + 'A-F' + 'a-f' newline = '\n' letter = 'a-z' + 'A-Z' + '_' not punctuation = '#' + blank + letter + digit + '\'' + '"' + newline + '\\' punctuation = ascii - not punctuation simple char = ~eof - ('\'' + '\\' + '\n') string char = ~eof - ('"' + '\\' + '\n') // Grammar, or Start token input file $ // Grammar Token -> [section | eol]/..., eof // Alternating sequence eol -> newline, [newline | space]... // Conditional Compilation Control section -> expanded token... =*scanner_sink << op('\n'); -> control line -> conditional block conditional block -> true if section, eol, endif line -> true if section, eol, skip else section, eol, endif line -> false if section, eol, endif line -> false if section, eol, else section, eol, endif line true if section -> true condition -> true if section, eol, section -> false if section, eol, true else condition false if section -> false condition -> false if section, eol, skip section -> false if section, eol, false else condition else section -> '#', ws?, "else", ws? -> else section, eol, section endif line -> '#', ws?, "endif", ws? skip section -> skip line -> skip if section, eol, endif line skip if section -> '#', ws?, {"if" | "ifdef" | "ifndef"}, any text?... -> skip if section, eol, skip section -> skip if section, eol, skip else line skip else section -> skip else line -> skip else section, eol, skip else line -> skip else section, eol, skip section skip else line -> '#', ws?, "elif", any text?... -> '#', ws?, "else", ws? skip line -> '#', ws?, [{"define" | "undefine" | "include" | "line" | "error" | "pragma"}, any text?...] -> not control mark, any text?... any text -> any text char -> '\\', ~eof not control mark -> any text char - '#' -> '\\', ~eof // Conditional Control Lines true condition, false condition -> '#', ws?, "ifdef", ws, name string, ws? =check_defined(1); -> '#', ws?, "ifndef", ws, name string, ws? =check_defined(0); -> '#', ws?, if header, expanded token... =eval_if(); true else condition, false else condition -> '#', ws?, else if header, expanded token... =eval_elif(); if header -> "if", ws =init_condition(); else if header -> "elif", ws =init_condition(); // Other Control Lines control line -> include header, expanded token... =include_file(); -> '#', ws?, "undef", ws, name string, ws? =undefine(); -> '#', ws?, [{"line" | "error" | "pragma"}, any text?...] -> macro definition header:id, simple token?... =save_macro_body(id); include header -> '#', ws?, "include" =save_sink << scanner_sink, scanner_sink = &++ta; // Macro Definitions (int) macro definition header -> '#', ws?, "define", ws, name string =init_macro_def(0,0); -> '#', ws?, "define", ws, name string, '(', ws?, parameter list:n, ')' =init_macro_def(n,1); (int) parameter list -> =0; -> names, ws? (int) names -> name string =1; -> names:n, ws?, ',', ws?, name string =n+1; // Unexpanded text (for macro definitions simple token -> space:c =*scanner_sink << space_op(c); -> word -> separator -> '#' =*scanner_sink << op('#'); -> qualified real -> integer constant word -> name string =*scanner_sink << name_token(); // Expanded text expanded token -> expanded word -> separator -> space -> qualified real -> integer constant expanded word -> variable:t =*scanner_sink << t; -> simple macro:t =expand(t,0); -> macro:t, ws? =*scanner_sink << t; -> macro:t, ws?, '(', ws?, macro arg list:n, ')' =expand(t,n); -> defined, ws?, '(', ws?, name string, ws?, ')' =*scanner_sink << defined(); -> defined, ws, name string =*scanner_sink << defined(); // Name classification (token) variable, simple macro, macro, defined -> name string =id_macro(); // Macro Arguments (int) macro arg list -> =0; -> !save_sink << scanner_sink, scanner_sink = &ta;, macro args:n = save_sink >> scanner_sink, n; (int) macro args -> !++ta;, arg elements =1; -> macro args:n, ',', ws?, !++ta;, arg elements =n+1; arg elements -> initial arg element -> arg elements, arg element arg element -> space:c =*scanner_sink << space_op(c); -> initial arg element initial arg element -> name string =*scanner_sink << name_token(); -> qualified real -> integer constant -> string literal =*scanner_sink << tkn(STRINGliteral); -> character constant =*scanner_sink << tkn(CHARACTERconstant); -> operator -> punctuation - '(' - ',' - ')':p =*scanner_sink << op(p); -> nested elements, ')':t =*scanner_sink << op(t); nested elements -> '(':t =*scanner_sink << op(t); -> nested elements, arg element -> nested elements, ',':t =*scanner_sink << op(t); // Basic syntactic elements separator -> string literal =*scanner_sink << tkn(STRINGliteral); -> character constant =*scanner_sink << tkn(CHARACTERconstant); -> operator -> punctuation:p =*scanner_sink << op(p); -> '\\', '\n' (int) space -> blank -> comment =' '; ws = space... comment -> comment head, "*/" comment head -> "/*" -> comment head, ~eof comment, comment head -> comment head, comment ={if (nest_comments) CHANGE_REDUCTION(comment_head);} operator -> '&', '&' =*scanner_sink << op(ANDAND); -> '&', '=' =*scanner_sink << op(ANDassign); -> '-', '>' =*scanner_sink << op(ARROW); -> '#', '#' =*scanner_sink << op(CONCAT); -> '-', '-' =*scanner_sink << op(DECR); -> '/', '=' =*scanner_sink << op(DIVassign); -> '.', '.', '.' =*scanner_sink << op(ELLIPSIS); -> '=', '=' =*scanner_sink << op(EQ); -> '^', '=' =*scanner_sink << op(ERassign); -> '>', '=' =*scanner_sink << op(GE); -> '+', '+' =*scanner_sink << op(ICR); -> '<', '=' =*scanner_sink << op(LE); -> '<', '<' =*scanner_sink << op(LS); -> '<', '<', '=' =*scanner_sink << op(LSassign); -> '%', '=' =*scanner_sink << op(MODassign); -> '-', '=' =*scanner_sink << op(MINUSassign); -> '*', '=' =*scanner_sink << op(MULTassign); -> '!', '=' =*scanner_sink << op(NE); -> '|', '=' =*scanner_sink << op(ORassign); -> '|', '|' =*scanner_sink << op(OROR); -> '+', '=' =*scanner_sink << op(PLUSassign); -> '>', '>' =*scanner_sink << op(RS); -> '>', '>', '=' =*scanner_sink << op(RSassign); // Numeric constants qualified real -> real constant, floating qualifier =*scanner_sink << tkn(FLOATconstant); real constant -> real floating qualifier -> -> 'f' + 'F' =sa << 'F'; -> 'l' + 'L' =sa << 'L'; real -> simple real -> simple real, exponent -> confusion, exponent -> decimal integer, exponent simple real -> confusion, '.' =sa << '.'; -> octal integer, '.' -> decimal integer, '.' =sa << '.'; -> '.', '0-9':d =++sa << '.' << d; -> simple real, '0-9':d =sa << d; confusion -> octal integer, '8-9':d =sa << d; -> confusion, '0-9':d =sa << d; exponent -> 'e' + 'E', '-', '0-9':d =sa << '-' << d; -> 'e' + 'E', '+'?, '0-9':d =sa << '+' << d; -> exponent, '0-9':d =sa << d; integer qualifier -> 'u' + 'U' =sa << 'U'; -> 'l' + 'L' =sa << 'L'; integer constant -> octal constant =*scanner_sink << tkn(OCTconstant); -> decimal constant =*scanner_sink << tkn(DECconstant); -> hex constant =*scanner_sink << tkn(HEXconstant); octal constant -> octal integer -> octal constant, integer qualifier octal integer -> '0' =++sa << '0'; -> octal integer, '0-7':d =sa << d; hex constant -> hex integer -> hex constant, integer qualifier hex integer -> '0', 'x' + 'X', hex digit:d =++sa << "0X" << d; -> hex integer, hex digit:d =sa << d; decimal constant -> decimal integer -> decimal constant, integer qualifier decimal integer -> '1-9':d =++sa << d; -> decimal integer, '0-9':d =sa << d; // String Literals and Character Constants string literal -> string chars, '"' =sa << '"'; string chars -> '"' =++sa << '"'; -> string chars, string char:c =sa << c; -> string chars, '\\', ~eof - '\n':c =sa << '\\' << c; -> string chars, '\\', '\n' // Character constants character constant -> simple chars, '\'' =sa << '\''; simple chars -> '\'' =++sa << '\''; -> simple chars, simple char:c = sa << c; -> simple chars, '\\', ~eof - '\n': c = sa << '\\' << c; -> simple chars, '\\', '\n' // Identifiers name string -> letter:c =++sa << c; -> name string, letter+digit:c =sa << c; { // Embedded C #include "array.h" // \AnaGram\classlib\include\array.h #include "stack.h" // \AnaGram\classlib\include\stack.h #include <io.h> // If not found, not necessary #include <sys/types.h> // If not found, not necessary #include <sys/stat.h> #include <fcntl.h> // Macro Definitions #define SYNTAX_ERROR syntax_error_scanning(PCB.error_message) #define GET_CONTEXT (CONTEXT.line = PCB.line, CONTEXT.column = PCB.column) #define GET_INPUT (PCB.input_code = getc(input.file)) #define PCB input.pcb // Structure Definition struct file_descriptor { char *name; // name of file FILE *file; // source of input characters ts_pcb_type pcb; // parser control block for file }; // Static Data Declarations static char *error_modifier = ""; static file_descriptor input; static stack<token_sink *> save_sink(5); // Syntax Error Reporting /* syntax_error() provides an error diagnostic procedure for those parsers which are called by the token scanner. error_modifier is set by expand() so that an error encountered during a macro expansion will be so described. Otherwise, the diagnostic will not make sense. Since all other parsers are called from reduction procedures, the line and column number of the token they are dealing with is given by the context of the token scanner production that is being reduced. */ void syntax_error(char *msg) { printf("%s: Line %d, Column %d: %s%s\n", input.name, CONTEXT.line, CONTEXT.column, msg, error_modifier); } /* syntax_error_scanning() provides an error diagnostic procedure for the token scanner itself. The locus of the error is given by the current line and column number of the token scan, as given in the parser control block. */ static void syntax_error_scanning(char *msg) { printf("%s: Line %d, Column %d: %s\n", input.name, PCB.line, PCB.column, msg); } // Support for Reduction Procedures /* name_token() looks up the name string in the string accumulator, identifies it in the token dictionary, checks to see if it is a reserved word, and creates a token. */ static token name_token(void) { token t; t.id = NAME; t.handle = td << sa; --sa; if (t.handle <= n_reserved_words) t.id = reserved_words[t.handle].id; return t; } /* op() creates a token for a punctuation character. */ static token op(unsigned x) { token t; t.id = (token_id) x; t.handle = token_handles[x]; return t; } /* space_op() creates a token for a space character. Note that a space could be a tab, vertical tab, or form feed character as well as a blank. */ static token space_op(unsigned x) { token t; t.id = (token_id) ' '; t.handle = token_handles[x]; return t; } /* tkn() creates a token with a specified id for the string on the top of the string accumulator */ static token tkn(token_id id) { token t; t.id = id; t.handle = td << sa; --sa; return t; } // Macro Processing Procedures /* check_defined() looks up the name on the string accumulator to see if it is the name of a macro. It then selects a reduction token according to the outcome of the test and an input flag. */ static void check_defined(int flag) { unsigned id = macro_id[td[sa]]; --sa; flag ^= id != 0; if (flag) CHANGE_REDUCTION(false_condition); else CHANGE_REDUCTION(true_condition); } /* defined() returns a decimal constant token equal to one or zero depending on whether the token named on the string accumulator is or is not defined as a macro */ static token defined(void) { unsigned id = macro_id[td[sa]]; token t; t.id = DECconstant; t.handle = id ? one_value : zero_value; --sa; return t; } /* expand() expands and outputs a macro. t.handle is the token dictionary index of the macro name. n is the number of arguments found. Since it is possible that scanner sink is pointing to ta, it is necessary to pop the expanded macro from ta before passing it on to scanner_sink. Otherwise, we would have effectively ta << ta, a situation which causes an infinite loop. */ static void expand(token t, unsigned n) { error_modifier = " in macro expansion"; // fix error diagnostic expand_macro(t,n); // Defined in MAS.SYN if (size(ta)) { array<token> x(ta,size(ta) + 1); --ta; *scanner_sink << x; } else --ta; error_modifier = ""; } /* Look up the name string on the string accumulator. Determine whether it is a reserved word, or a simple identifier. Then determine whether it is the name of a macro. */ static token id_macro(void) { token t; unsigned id; t.id = NAME; t.handle = td << sa; --sa; if (t.handle <= n_reserved_words) t.id = reserved_words[t.handle].id; if (if_clause && t.handle == defined_value) { CHANGE_REDUCTION(defined); return t; } id = macro_id[t.handle]; if (id == 0) return t; if (macro[id].parens) CHANGE_REDUCTION(macro); else CHANGE_REDUCTION(simple_macro); return t; } /* Start a macro definition. This procedure defines all but the body of the macro. nargs is the count of parameters that were found. flag is set if the macro was defined with parentheses. The parameter names are on the string accumulator, with the last name on the top of the stack, so they must be popped off, identified and stored in reverse order. The name of the macro is beneath the parameter names on the string accumulator. Before returning, this procedure saves the current value of scanner_sink, increments the level on the token stack and sets scanner_sink so that subsequent tokens produced by the token scanner will accumulate on the token stack. These tokens comprise the body of the macro. When the end of the macro body is encountered, the procedure save_macro_body will remove them from the token stack and restore the value of scanner_sink. */ static int init_macro_def(int nargs, int flag) { int k; int id = ++n_macros; unsigned name; unsigned *arg_list = nargs ? new unsigned[nargs] : NULL; assert(id < N_MACROS); for (k = nargs; k--;) { arg_list[k] = td << sa; --sa; } macro[id].arg_names = arg_list; macro[id].n_args = nargs; macro[id].name = name = td << sa; --sa; macro_id[name] = id; macro[id].busy_flag = 0; macro[id].parens = flag ; save_sink << scanner_sink; scanner_sink = &++ta; return id; } /* save_macro_body() finishes the definition of a macro by making a permanent copy of the token string on the token accumulator. It then restores the scanner_sink to the value it had when the macro definition was encountered. */ static void save_macro_body(int id) { macro[id].body = size(ta) ? copy(ta) : NULL; --ta; save_sink >> scanner_sink; } /* undefine() deletes the macro definition for the macro whose name is on the top of the string accumulator. If there is no macro with the given name, undefine simply returns. Otherwise, it frees the storage associated with the macro. It then fills the resulting hole in the table with the last macro in the table. The macro_id table is updated appropriately. */ static void undefine(void) { unsigned name = td << sa; int id = macro_id[name]; --sa; if (id == 0) return; macro_id[name] = 0; if (macro[id].arg_names) delete [] macro[id].arg_names; if (macro[id].body) delete [] macro[id].body; macro[id] = macro[n_macros--]; macro_id[macro[id].name] = id; } // Include file procedures /* file_name() interprets the file name provided by an #include statement. If the file name is enclosed in <> brackets it scans the directory list in paths to try to find the file. If it finds it, it prefixes the path to the file name. If the file name is enclosed in "" quotation marks, file_name() simply strips the quotation marks. If file_name() succeeds, it returns 1 and provides path-name in the string accumulator, otherwise it returns 0 and nothing in the string accumulator. Note that file name uses a temporary string accumulator, lsa. */ static int file_name(char *file) { int c; int tc; string_accumulator lsa(100); // for temporary storage of name while (*file == ' ') file++; tc = *file++; if (tc == '<') tc = '>'; else if (tc != '"') return 0; while ((c = *file++) != 0 && c != tc) lsa << c; if (c != tc) return 0; if (tc == '>') { int k, n; n = size(paths); for (k = 0; k < n; k++) { FILE *f; ++sa << paths[k]; if (sa[0] != '\\' || sa[0] != '/') sa << '/'; sa << lsa; f = fopen(sa,"rt"); if (f != NULL) { fclose(f); return 1; } --sa; } return 0; } ++sa << lsa; return 1; } /* include_file() is called in response to a #include statement. First, it saves the file_descriptor for the current input. Then it restores the scanner_sink which was saved prior to accumulating macro expanded tokens on the token_accumulator. When include_file() is called, the argument of the #include statement exists in the form of tokens on the token accumulator. These tokens are passed to a token_translator which turns the tokens into a string on the string accumulator. file_name() is then called to distinguish between "" and <> files. In the latter case, file_name() prefixes a directory path to the name. The name is then in the string accumulator. scan_input() is then called to scan the include file. Finally, before returning, the previous file_descriptor is restored. */ static void include_file(void) { file_descriptor save_input = input; // save input state int flag; save_sink >> scanner_sink; // restore scanner_sink token_translator tt(&++sa); tt << ta; // recover string from tokens --ta; // discard token string array<char> file(sa, size(sa)+1); // local copy of string --sa; flag = file_name(file); if (!flag) { fprintf(stderr, "Bad include file name: %s\n", (char *) file); return; } array<char> path(sa, size(sa) + 1); --sa; scan_input(path); // recursive call to ts() input = save_input; // restore input state return; } // Conditional compilation procedures /* init_condition() prepares for evaluation the condition expression in #if and #elif statements. It protects scanner_sink by pushing it onto the save_sink stack. Then it resets the expression evaluatior, condition, and sets scanner_sink to point to it. Finally it sets the if_clause flag so that defined() will be handled properly. */ static void init_condition(void) { save_sink << scanner_sink; scanner_sink = &reset(condition); if_clause = 1; } /* eval_condition() is called to deal with #if and #elif statements. The init_condition() procedure has redirected scanner output to the expression evaluator, so eval_condition() restores the previous scanner destination. It then sends an eof token to the expression evaluator, resets if_clause and reads the value of the condition. Remember that (long) condition returns the value of the expression. */ static int eval_condition(void) { save_sink >> scanner_sink; condition << op(0); // eof to exp evaluator if_clause = 0; return condition != 0L; } /* In eval_if() and eval_elif() note the use of CHANGE_REDUCTION to select the appropriate reduction token depending on the outcome of the condition. */ static void eval_elif(void) { if (eval_condition()) CHANGE_REDUCTION(true_else_condition); else CHANGE_REDUCTION(false_else_condition); } static void eval_if(void) { if (eval_condition()) CHANGE_REDUCTION(true_condition); else CHANGE_REDUCTION(false_condition); } // Do token scan /* scan_input() 1) opens the specified file, if possible 2) calls the parser 3) closes the input file */ void scan_input(char *path) { input.file = fopen(path, "rt"); input.name = path; if (input.file == NULL) { fprintf(stderr,"Cannot open %s\n", (char *) path); return; } ts(); fclose(input.file); } } // End of Embedded C