Mercurial > ~dholland > hg > ag > index.cgi
diff tests/agcl/oldagsrc/ts.syn @ 0:13d2b8934445
Import AnaGram (near-)release tree into Mercurial.
author | David A. Holland |
---|---|
date | Sat, 22 Dec 2007 17:52:45 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/agcl/oldagsrc/ts.syn Sat Dec 22 17:52:45 2007 -0500 @@ -0,0 +1,859 @@ +{ +/* + AnaGram, a System for Syntax Directed Programming + C Macro preprocessor and parser + + Copyright (c) 1993, Parsifal Software. + All Rights Reserved. + + TS.SYN: Token Scanner Module +*/ + +#include "mpp.h" + + +// context structure for diagnostics + +struct location { unsigned line, column;}; + +} + + +// Configuration section + +[ + context type = location // request context tracking + ~allow macros // function defs for red procs + auto resynch + line numbers // #line statements in output + error trace // build trace on syntax error + ~test range // not necessary + ~declare pcb + ~error frame + subgrammar { + simple token, + expanded token, + initial arg element, + ws, + eol, + macro definition header, + } + parser file name = "#.cpp" +] + + +// Character Set Definitions + +any text char = ~eof - newline - '\\' +ascii = 1..126 +blank = ' ' + '\t' + '\r' + '\f' + '\v' +digit = '0-9' +eof = -1 + 0 +hex digit = '0-9' + 'A-F' + 'a-f' +newline = '\n' +letter = 'a-z' + 'A-Z' + '_' +not punctuation = '#' + blank + letter + digit + '\'' + '"' + newline + '\\' +punctuation = ascii - not punctuation +simple char = ~eof - ('\'' + '\\' + '\n') +string char = ~eof - ('"' + '\\' + '\n') + + +// Grammar, or Start token + +input file $ // Grammar Token + -> [section | eol]/..., eof // Alternating sequence + +eol + -> newline, [newline | space]... + + +// Conditional Compilation Control + +section + -> expanded token... =*scanner_sink << op('\n'); + -> control line + -> conditional block + +conditional block + -> true if section, eol, endif line + -> true if section, eol, skip else section, eol, endif line + -> false if section, eol, endif line + -> false if section, eol, else section, eol, endif line + +true if section + -> true condition + -> true if section, eol, section + -> false if section, eol, true else condition + +false if section + -> false condition + -> false if section, eol, skip section + -> false if section, eol, false else condition + +else section + -> '#', ws?, "else", ws? + -> else section, eol, section + +endif line + -> '#', ws?, "endif", ws? + +skip section + -> skip line + -> skip if section, eol, endif line + +skip if section + -> '#', ws?, {"if" | "ifdef" | "ifndef"}, any text?... + -> skip if section, eol, skip section + -> skip if section, eol, skip else line + +skip else section + -> skip else line + -> skip else section, eol, skip else line + -> skip else section, eol, skip section + +skip else line + -> '#', ws?, "elif", any text?... + -> '#', ws?, "else", ws? + +skip line + -> '#', ws?, [{"define" | "undefine" | "include" | "line" | + "error" | "pragma"}, any text?...] + -> not control mark, any text?... + +any text + -> any text char + -> '\\', ~eof + +not control mark + -> any text char - '#' + -> '\\', ~eof + + +// Conditional Control Lines + +true condition, false condition + -> '#', ws?, "ifdef", ws, name string, ws? =check_defined(1); + -> '#', ws?, "ifndef", ws, name string, ws? =check_defined(0); + -> '#', ws?, if header, expanded token... =eval_if(); + +true else condition, false else condition + -> '#', ws?, else if header, expanded token... =eval_elif(); + +if header + -> "if", ws =init_condition(); + +else if header + -> "elif", ws =init_condition(); + + +// Other Control Lines + +control line + -> include header, expanded token... =include_file(); + -> '#', ws?, "undef", ws, name string, ws? =undefine(); + -> '#', ws?, [{"line" | "error" | "pragma"}, any text?...] + -> macro definition header:id, simple token?... =save_macro_body(id); + +include header + -> '#', ws?, "include" =save_sink << scanner_sink, scanner_sink = &++ta; + + +// Macro Definitions + +(int) macro definition header + -> '#', ws?, "define", ws, name string =init_macro_def(0,0); + -> '#', ws?, "define", ws, name string, + '(', ws?, parameter list:n, ')' =init_macro_def(n,1); + +(int) parameter list + -> =0; + -> names, ws? + +(int) names + -> name string =1; + -> names:n, ws?, ',', ws?, name string =n+1; + + +// Unexpanded text (for macro definitions + +simple token + -> space:c =*scanner_sink << space_op(c); + -> word + -> separator + -> '#' =*scanner_sink << op('#'); + -> qualified real + -> integer constant + +word + -> name string =*scanner_sink << name_token(); + + +// Expanded text + +expanded token + -> expanded word + -> separator + -> space + -> qualified real + -> integer constant + +expanded word + -> variable:t =*scanner_sink << t; + -> simple macro:t =expand(t,0); + -> macro:t, ws? =*scanner_sink << t; + -> macro:t, ws?, '(', ws?, macro arg list:n, ')' =expand(t,n); + -> defined, ws?, '(', ws?, name string, ws?, ')' =*scanner_sink << defined(); + -> defined, ws, name string =*scanner_sink << defined(); + + +// Name classification + +(token) variable, simple macro, macro, defined + -> name string =id_macro(); + + +// Macro Arguments + +(int) macro arg list + -> =0; + -> !save_sink << scanner_sink, scanner_sink = &ta;, macro args:n = + save_sink >> scanner_sink, n; + +(int) macro args + -> !++ta;, arg elements =1; + -> macro args:n, ',', ws?, !++ta;, arg elements =n+1; + +arg elements + -> initial arg element + -> arg elements, arg element + +arg element + -> space:c =*scanner_sink << space_op(c); + -> initial arg element + +initial arg element + -> name string =*scanner_sink << name_token(); + -> qualified real + -> integer constant + -> string literal =*scanner_sink << tkn(STRINGliteral); + -> character constant =*scanner_sink << tkn(CHARACTERconstant); + -> operator + -> punctuation - '(' - ',' - ')':p =*scanner_sink << op(p); + -> nested elements, ')':t =*scanner_sink << op(t); + +nested elements + -> '(':t =*scanner_sink << op(t); + -> nested elements, arg element + -> nested elements, ',':t =*scanner_sink << op(t); + + +// Basic syntactic elements + +separator + -> string literal =*scanner_sink << tkn(STRINGliteral); + -> character constant =*scanner_sink << tkn(CHARACTERconstant); + -> operator + -> punctuation:p =*scanner_sink << op(p); + -> '\\', '\n' + +(int) space + -> blank + -> comment =' '; + +ws = space... + +comment + -> comment head, "*/" + +comment head + -> "/*" + -> comment head, ~eof + +comment, comment head + -> comment head, comment ={if (nest_comments) CHANGE_REDUCTION(comment_head);} + +operator + -> '&', '&' =*scanner_sink << op(ANDAND); + -> '&', '=' =*scanner_sink << op(ANDassign); + -> '-', '>' =*scanner_sink << op(ARROW); + -> '#', '#' =*scanner_sink << op(CONCAT); + -> '-', '-' =*scanner_sink << op(DECR); + -> '/', '=' =*scanner_sink << op(DIVassign); + -> '.', '.', '.' =*scanner_sink << op(ELLIPSIS); + -> '=', '=' =*scanner_sink << op(EQ); + -> '^', '=' =*scanner_sink << op(ERassign); + -> '>', '=' =*scanner_sink << op(GE); + -> '+', '+' =*scanner_sink << op(ICR); + -> '<', '=' =*scanner_sink << op(LE); + -> '<', '<' =*scanner_sink << op(LS); + -> '<', '<', '=' =*scanner_sink << op(LSassign); + -> '%', '=' =*scanner_sink << op(MODassign); + -> '-', '=' =*scanner_sink << op(MINUSassign); + -> '*', '=' =*scanner_sink << op(MULTassign); + -> '!', '=' =*scanner_sink << op(NE); + -> '|', '=' =*scanner_sink << op(ORassign); + -> '|', '|' =*scanner_sink << op(OROR); + -> '+', '=' =*scanner_sink << op(PLUSassign); + -> '>', '>' =*scanner_sink << op(RS); + -> '>', '>', '=' =*scanner_sink << op(RSassign); + + +// Numeric constants + +qualified real + -> real constant, floating qualifier =*scanner_sink << tkn(FLOATconstant); + +real constant + -> real + +floating qualifier + -> + -> 'f' + 'F' =sa << 'F'; + -> 'l' + 'L' =sa << 'L'; + +real + -> simple real + -> simple real, exponent + -> confusion, exponent + -> decimal integer, exponent + +simple real + -> confusion, '.' =sa << '.'; + -> octal integer, '.' + -> decimal integer, '.' =sa << '.'; + -> '.', '0-9':d =++sa << '.' << d; + -> simple real, '0-9':d =sa << d; + +confusion + -> octal integer, '8-9':d =sa << d; + -> confusion, '0-9':d =sa << d; + +exponent + -> 'e' + 'E', '-', '0-9':d =sa << '-' << d; + -> 'e' + 'E', '+'?, '0-9':d =sa << '+' << d; + -> exponent, '0-9':d =sa << d; + +integer qualifier + -> 'u' + 'U' =sa << 'U'; + -> 'l' + 'L' =sa << 'L'; + +integer constant + -> octal constant =*scanner_sink << tkn(OCTconstant); + -> decimal constant =*scanner_sink << tkn(DECconstant); + -> hex constant =*scanner_sink << tkn(HEXconstant); + +octal constant + -> octal integer + -> octal constant, integer qualifier + +octal integer + -> '0' =++sa << '0'; + -> octal integer, '0-7':d =sa << d; + +hex constant + -> hex integer + -> hex constant, integer qualifier + +hex integer + -> '0', 'x' + 'X', hex digit:d =++sa << "0X" << d; + -> hex integer, hex digit:d =sa << d; + +decimal constant + -> decimal integer + -> decimal constant, integer qualifier + +decimal integer + -> '1-9':d =++sa << d; + -> decimal integer, '0-9':d =sa << d; + + +// String Literals and Character Constants + +string literal + -> string chars, '"' =sa << '"'; + +string chars + -> '"' =++sa << '"'; + -> string chars, string char:c =sa << c; + -> string chars, '\\', ~eof - '\n':c =sa << '\\' << c; + -> string chars, '\\', '\n' + + +// Character constants + +character constant + -> simple chars, '\'' =sa << '\''; + +simple chars + -> '\'' =++sa << '\''; + -> simple chars, simple char:c = sa << c; + -> simple chars, '\\', ~eof - '\n': c = sa << '\\' << c; + -> simple chars, '\\', '\n' + + +// Identifiers + +name string + -> letter:c =++sa << c; + -> name string, letter+digit:c =sa << c; + + +{ // Embedded C +#include "array.h" // \AnaGram\classlib\include\array.h +#include "stack.h" // \AnaGram\classlib\include\stack.h +#include <io.h> // If not found, not necessary +#include <sys/types.h> // If not found, not necessary +#include <sys/stat.h> +#include <fcntl.h> + + +// Macro Definitions + +#define SYNTAX_ERROR syntax_error_scanning(PCB.error_message) +#define GET_CONTEXT (CONTEXT.line = PCB.line, CONTEXT.column = PCB.column) +#define GET_INPUT (PCB.input_code = getc(input.file)) +#define PCB input.pcb + + +// Structure Definition + +struct file_descriptor { + char *name; // name of file + FILE *file; // source of input characters + ts_pcb_type pcb; // parser control block for file +}; + + +// Static Data Declarations + +static char *error_modifier = ""; +static file_descriptor input; +static stack<token_sink *> save_sink(5); + + +// Syntax Error Reporting +/* + syntax_error() provides an error diagnostic procedure for those + parsers which are called by the token scanner. error_modifier is set + by expand() so that an error encountered during a macro expansion + will be so described. Otherwise, the diagnostic will not make + sense. + + Since all other parsers are called from reduction procedures, the + line and column number of the token they are dealing with is given + by the context of the token scanner production that is being + reduced. +*/ + +void syntax_error(char *msg) { + printf("%s: Line %d, Column %d: %s%s\n", + input.name, CONTEXT.line, CONTEXT.column, msg, error_modifier); +} + +/* + syntax_error_scanning() provides an error diagnostic procedure for + the token scanner itself. The locus of the error is given by the + current line and column number of the token scan, as given in the + parser control block. +*/ + +static void syntax_error_scanning(char *msg) { + printf("%s: Line %d, Column %d: %s\n", + input.name, PCB.line, PCB.column, msg); +} + + +// Support for Reduction Procedures +/* + name_token() looks up the name string in the string accumulator, + identifies it in the token dictionary, checks to see if it is a + reserved word, and creates a token. +*/ + +static token name_token(void) { + token t; + t.id = NAME; + t.handle = td << sa; + --sa; + if (t.handle <= n_reserved_words) t.id = reserved_words[t.handle].id; + return t; +} + +/* + op() creates a token for a punctuation character. +*/ + +static token op(unsigned x) { + token t; + t.id = (token_id) x; + t.handle = token_handles[x]; + return t; +} + +/* + space_op() creates a token for a space character. Note that a space + could be a tab, vertical tab, or form feed character as well as a + blank. +*/ + +static token space_op(unsigned x) { + token t; + t.id = (token_id) ' '; + t.handle = token_handles[x]; + return t; +} + +/* + tkn() creates a token with a specified id for the string on the top + of the string accumulator +*/ + +static token tkn(token_id id) { + token t; + t.id = id; + t.handle = td << sa; + --sa; + return t; +} + + +// Macro Processing Procedures + +/* + check_defined() looks up the name on the string accumulator to see if + it is the name of a macro. It then selects a reduction token according + to the outcome of the test and an input flag. +*/ + +static void check_defined(int flag) { + unsigned id = macro_id[td[sa]]; + --sa; + flag ^= id != 0; + if (flag) CHANGE_REDUCTION(false_condition); + else CHANGE_REDUCTION(true_condition); +} + +/* + defined() returns a decimal constant token equal to one or zero + depending on whether the token named on the string accumulator is or + is not defined as a macro +*/ + +static token defined(void) { + unsigned id = macro_id[td[sa]]; + token t; + t.id = DECconstant; + t.handle = id ? one_value : zero_value; + --sa; + return t; +} + +/* + expand() expands and outputs a macro. t.handle is the token dictionary + index of the macro name. n is the number of arguments found. + + Since it is possible that scanner sink is pointing to ta, it is + necessary to pop the expanded macro from ta before passing it on to + scanner_sink. Otherwise, we would have effectively ta << ta, a + situation which causes an infinite loop. +*/ + +static void expand(token t, unsigned n) { + error_modifier = " in macro expansion"; // fix error diagnostic + expand_macro(t,n); // Defined in MAS.SYN + if (size(ta)) { + array<token> x(ta,size(ta) + 1); + --ta; + *scanner_sink << x; + } else --ta; + error_modifier = ""; +} + +/* + Look up the name string on the string accumulator. Determine whether + it is a reserved word, or a simple identifier. Then determine + whether it is the name of a macro. +*/ + +static token id_macro(void) { + token t; + unsigned id; + + t.id = NAME; + t.handle = td << sa; + --sa; + if (t.handle <= n_reserved_words) t.id = reserved_words[t.handle].id; + + if (if_clause && t.handle == defined_value) { + CHANGE_REDUCTION(defined); + return t; + } + id = macro_id[t.handle]; + if (id == 0) return t; + + if (macro[id].parens) CHANGE_REDUCTION(macro); + else CHANGE_REDUCTION(simple_macro); + return t; +} + +/* + Start a macro definition. This procedure defines all but the body of + the macro. + + nargs is the count of parameters that were found. flag is set if + the macro was defined with parentheses. + + The parameter names are on the string accumulator, with the last + name on the top of the stack, so they must be popped off, identified + and stored in reverse order. + + The name of the macro is beneath the parameter names on the string + accumulator. + + Before returning, this procedure saves the current value of + scanner_sink, increments the level on the token stack and sets + scanner_sink so that subsequent tokens produced by the token scanner + will accumulate on the token stack. These tokens comprise the body + of the macro. When the end of the macro body is encountered, the + procedure save_macro_body will remove them from the token stack and + restore the value of scanner_sink. +*/ + +static int init_macro_def(int nargs, int flag) { + int k; + int id = ++n_macros; + unsigned name; + unsigned *arg_list = nargs ? new unsigned[nargs] : NULL; + + assert(id < N_MACROS); + for (k = nargs; k--;) { + arg_list[k] = td << sa; + --sa; + } + + macro[id].arg_names = arg_list; + macro[id].n_args = nargs; + + macro[id].name = name = td << sa; + --sa; + + macro_id[name] = id; + + macro[id].busy_flag = 0; + macro[id].parens = flag ; + + save_sink << scanner_sink; + scanner_sink = &++ta; + return id; +} + +/* + save_macro_body() finishes the definition of a macro by making a + permanent copy of the token string on the token accumulator. It then + restores the scanner_sink to the value it had when the macro + definition was encountered. +*/ + +static void save_macro_body(int id) { + macro[id].body = size(ta) ? copy(ta) : NULL; + --ta; + save_sink >> scanner_sink; +} + +/* + undefine() deletes the macro definition for the macro whose name is + on the top of the string accumulator. If there is no macro with the + given name, undefine simply returns. + + Otherwise, it frees the storage associated with the macro. It then + fills the resulting hole in the table with the last macro in the + table. The macro_id table is updated appropriately. +*/ + +static void undefine(void) { + unsigned name = td << sa; + int id = macro_id[name]; + --sa; + if (id == 0) return; + macro_id[name] = 0; + if (macro[id].arg_names) delete [] macro[id].arg_names; + if (macro[id].body) delete [] macro[id].body; + macro[id] = macro[n_macros--]; + macro_id[macro[id].name] = id; +} + + +// Include file procedures + +/* + file_name() interprets the file name provided by an #include + statement. If the file name is enclosed in <> brackets it scans the + directory list in paths to try to find the file. If it finds it, it + prefixes the path to the file name. + + If the file name is enclosed in "" quotation marks, file_name() + simply strips the quotation marks. + + If file_name() succeeds, it returns 1 and provides path-name in the + string accumulator, otherwise it returns 0 and nothing in the string + accumulator. + + Note that file name uses a temporary string accumulator, lsa. +*/ + +static int file_name(char *file) { + int c; + int tc; + string_accumulator lsa(100); // for temporary storage of name + + while (*file == ' ') file++; + tc = *file++; + if (tc == '<') tc = '>'; + else if (tc != '"') return 0; + while ((c = *file++) != 0 && c != tc) lsa << c; + if (c != tc) return 0; + if (tc == '>') { + int k, n; + n = size(paths); + for (k = 0; k < n; k++) { + FILE *f; + ++sa << paths[k]; + if (sa[0] != '\\' || sa[0] != '/') sa << '/'; + sa << lsa; + f = fopen(sa,"rt"); + if (f != NULL) { + fclose(f); + return 1; + } + --sa; + } + return 0; + } + ++sa << lsa; + return 1; +} + +/* + include_file() is called in response to a #include statement. + + First, it saves the file_descriptor for the current input. Then it + restores the scanner_sink which was saved prior to accumulating + macro expanded tokens on the token_accumulator. + + When include_file() is called, the argument of the #include + statement exists in the form of tokens on the token accumulator. + These tokens are passed to a token_translator which turns the tokens + into a string on the string accumulator. + + file_name() is then called to distinguish between "" and <> files. + In the latter case, file_name() prefixes a directory path to the name. + The name is then in the string accumulator. + + scan_input() is then called to scan the include file. + + Finally, before returning, the previous file_descriptor is restored. +*/ + +static void include_file(void) { + file_descriptor save_input = input; // save input state + int flag; + + save_sink >> scanner_sink; // restore scanner_sink + + token_translator tt(&++sa); + tt << ta; // recover string from tokens + --ta; // discard token string + + array<char> file(sa, size(sa)+1); // local copy of string + --sa; + + flag = file_name(file); + + if (!flag) { + fprintf(stderr, "Bad include file name: %s\n", (char *) file); + return; + } + array<char> path(sa, size(sa) + 1); + --sa; + scan_input(path); // recursive call to ts() + input = save_input; // restore input state + return; +} + + +// Conditional compilation procedures + +/* + init_condition() prepares for evaluation the condition expression in + #if and #elif statements. + + It protects scanner_sink by pushing it onto the save_sink stack. + Then it resets the expression evaluatior, condition, and sets + scanner_sink to point to it. + + Finally it sets the if_clause flag so that defined() will be handled + properly. +*/ + +static void init_condition(void) { + save_sink << scanner_sink; + scanner_sink = &reset(condition); + if_clause = 1; +} + +/* + eval_condition() is called to deal with #if and #elif statements. The + init_condition() procedure has redirected scanner output to the + expression evaluator, so eval_condition() restores the previous + scanner destination. + + It then sends an eof token to the expression evaluator, resets + if_clause and reads the value of the condition. Remember that + (long) condition returns the value of the expression. +*/ + +static int eval_condition(void) { + save_sink >> scanner_sink; + condition << op(0); // eof to exp evaluator + if_clause = 0; + return condition != 0L; +} + +/* + In eval_if() and eval_elif() note the use of CHANGE_REDUCTION to + select the appropriate reduction token depending on the outcome of + the condition. +*/ + +static void eval_elif(void) { + if (eval_condition()) CHANGE_REDUCTION(true_else_condition); + else CHANGE_REDUCTION(false_else_condition); +} + +static void eval_if(void) { + if (eval_condition()) CHANGE_REDUCTION(true_condition); + else CHANGE_REDUCTION(false_condition); +} + + +// Do token scan + +/* + scan_input() + 1) opens the specified file, if possible + 2) calls the parser + 3) closes the input file +*/ + +void scan_input(char *path) { + input.file = fopen(path, "rt"); + input.name = path; + if (input.file == NULL) { + fprintf(stderr,"Cannot open %s\n", (char *) path); + return; + } + ts(); + fclose(input.file); +} + +} // End of Embedded C