Mercurial > ~dholland > hg > ag > index.cgi
view tests/agcl/oldagsrc/pgg24ts.syn @ 0:13d2b8934445
Import AnaGram (near-)release tree into Mercurial.
author | David A. Holland |
---|---|
date | Sat, 22 Dec 2007 17:52:45 -0500 |
parents | |
children |
line wrap: on
line source
{/* AnaGram Syntax Analyzer. Copyright (c) Jerome T. Holland, 1989, 1990, 1991, 1992 All Rights Reserved. */ //#define INCLUDE_LOGGING #include "assert.h" //ASSERT #include "config.h" #include "csexp.h" #include "data.h" //DATA #include "error.h" #include "log.hpp" #include "myalloc.h" #include "rproc.h" #include "stk.h" //STK #include "symbol.h" #include "token.h" #include "tree.h" #include "tsd.h" #include "ut.h" //UT #include <ctype.h> //CTYPE #include <stdio.h> //STDIO #include <stdlib.h> //STDLIB #include <string.h> //STRING #include "pgg24.h" //PGG #undef MODULE #define MODULE 3 } [ auto resynch context type = cint default reductions ~declare pcb diagnose errors lines and columns error frame line numbers //~allow macros nest comments parser stack size = 128 extend pcb { IPrivateResource mutex; int x; int y; int widget(int x) { return x; } } wrapper {Expression} pointer input ~test range token names parser name = pgg test file mask = "*.syn" //reentrant parser distinguish keywords {'.'} parser file name = "#.cpp" ] any digit = digit + hex letter anything = ~eof backslash = '\\' blank char = ' ' + tab c literal elem = ~(single quote + double quote + eol chars + backslash + eof) digit = '0-9' double quote = '"' eof = 0 + 26 eol chars = carriage return + newline hex letter = 'a-f' + 'A-F' letter = 'a-z' + 'A-Z' + '_' newline = '\n' nonoctal digit = any digit - octal digit octal digit = '0-7' carriage return= '\r' simple c char = ~('{' + '}' + single quote + double quote + eof) simple string char = ~eof - (any digit + double quote + backslash + eol chars) single quote = '\'' tab = '\t' vertical space = '\f' + '\v' arrow -> "->", blank?... comma -> ',', space? ellipsis -> "...", blank?... equals -> '=', space? bang -> '!', space? left brace -> '{', space? left bracket -> '[', space? left parenthesis -> '(', space? minus -> '-', space? plus -> '+', space? right bracket -> ']', blank?... right brace -> '}', blank?... right parenthesis -> ')', blank?... right quote -> single quote, blank?... tilde -> '~', space? vertical bar -> '|', space? (void) syntax definition $ -> blank?..., end of line?, complete statements, statement?, eof complete statements -> production:t, end of line =production(t); -> definition, end of line -> embedded c, end of line =nPrologueSegments = 1; -> configuration section, end of line -> complete statements, complete statement (void) complete statement -> production:t, end of line =production(t); -> simple statement, end of line (void) statement -> production:t =production(t); -> simple statement (int) production -> left hand side:type =iws(),type; -> left hand side, right hand side -> production:type, end of line, right hand side =concat_list(),type; -> production, end of line, additional rule spec... (void) right hand side -> arrow, rule specs (void) simple statement -> definition -> embedded c -> configuration section (int) left hand side -> token name list =0; -> type definition:d, token name list =head_list_3(d); (void) token name list //left hand side of productions -> token name:x =sws(head_list_2(x)); -> token name list, comma, token name:x =aws(head_list_2(x)); (int) token name -> name =0; -> name, '$', blank?... =1; (int) type definition -> left parenthesis, data type, ')', space? =Cast::create(); (void) proper vp rule specs -> vp rule spec:n =sws(n); -> proper vp rule specs, additional vp rule spec (void) additional vp rule spec -> vertical bar, vp rule spec:n =aws(n); (void) vp rule specs -> reduction procedure:pn =sws((iws(),vp_form3(pn))); -> vp rule specs, additional vp rule spec (void) vp rules -> vp rule specs | proper vp rule specs (int) vp rule spec -> grammar rule, reduction procedure:s =vp_form3(s); (void) rule specs -> reduction procedure:n =sws(form_spec_2(form1(),n)); -> rule spec:n =sws(n); -> rule specs, additional rule spec (void) additional rule spec -> vertical bar, rule spec:n =aws(n); (int) rule spec -> grammar rule, reduction procedure:s =form_spec_2(makeRule(ruleElementStack.pop()),s); (void) grammar rule -> init grammar rule, rule element:e, parameter name:p =ruleElementStack.top().push(RuleElement(e,p)); -> grammar rule, comma, rule element:e, parameter name:p =ruleElementStack.top().push(RuleElement(e,p)); init grammar rule -> =ruleElementStack.push(AgStack<RuleElement>()); (int) reduction procedure -> =0; -> equals, embedded c =proc_spec_4(0); -> equals, c expression, ';', blank?... =proc_spec_4(1); (void) data type -> type name -> type name, template field -> type name, template field?, abstract declarator =concat_string(); (void) type name -> name -> "::", !sss("::");, blank?..., name =concat_string(); -> type name, "::", !ass("::");, blank?..., name =concat_string(); (void) abstract declarator -> indirect data type -> direct abstract declarator -> indirect data type, direct abstract declarator =concat_string(); (void) template field -> template field head, data type, '>', blank?... =acs('>'), concat_string(); template field head -> '<', blank?... = acs('<'); -> template field head, data type, ',', blank?... =acs(','), concat_string(); (void) direct abstract declarator -> {left parenthesis =scs('(');}, abstract declarator, right parenthesis =concat_string(), acs(')'); (void) star -> '*', blank?... =sss(" *"); (void) pointer -> star -> star, name =concat_string(); (void) indirect data type -> pointer -> indirect data type, pointer =concat_string(); (void) name string -> letter:c =scs(c); -> name string, letter + digit :c =acs(c); -> name string, blank..., letter + digit :c =acs(' '), acs(c); (void) enum fix -> "enum", blank..., letter + digit : c =sss("enum "), acs(c); -> enum fix, letter + digit:c =acs(c); -> enum fix, blank..., letter + digit:c =acs(' '), acs(c); (void) name -> name string, blank?... (void) blank -> blank char -> c comment (void) space -> blank... -> blank..., continuation -> continuation (void) continuation -> comment, next line -> next line (void) next line -> carriage return?, newline -> carriage return?, newline, blank... (void) white -> blank -> carriage return?, newline -> comment, carriage return?, newline (void) end of line -> comment, carriage return?, newline -> carriage return?, newline -> end of line, white -> end of line, vertical space //form feed (void) comment -> "//", ~eol chars & ~eof?... (int) character -> signed number -> '^', 33..126:x, blank?... =x & 0x1f; (int) quoted character -> single quote, char const:c, right quote =(character_seen=1),c; (int) signed number -> sign:s, simple number:n, blank?... =s*n; (int) sign -> plus? =1; -> minus =-1; (int) simple number -> decimal number -> '0', octal number:n =n; -> {"0x" | "0X"}, hex number:n =n; (int) decimal number -> '1-9':d =d - '0'; -> decimal number:n, '0-9':d =10*n + d - '0'; (int) octal number -> =0; -> octal number:n, '0-7':d =8*n + d - '0'; (int) hex number -> hex digit:d =d; -> hex number:n, hex digit:d =16*n + d; (int) hex digit -> '0-9':d =d-'0'; -> 'a-f' + 'A-F':d =(d&7)+9; (int) rule element -> token -> mid rule action:p =vp_8a(p); (int) token -> union:n =form_element_1(n); -> virtual production (Expression) union -> intersection -> union:u, plus, intersection:i =new CharSetUnion(u,i); -> union:u, minus, intersection:i =new CharSetDifference(u,i); (Expression) intersection -> negation -> intersection:i, '&', space?, negation:n =new CharSetIntersection(i,n); (Expression) negation -> simple set -> tilde, negation:n =new CharSetComplement(n); (int) char const // -> anything -(single quote + backslash + eol chars):c = // case_sensitive ? (c) : agToUpper((char) c); -> anything -(single quote + backslash + eol chars) -> escape sequence -> octal escape -> three octal -> hex escape (Expression) simple set -> character range -> character:c =new IndividualCode(c); -> quoted character:c =new IndividualChar(c); -> name =ss2(); -> left parenthesis, union:x, right parenthesis =x; (Expression) character range -> single quote, char const:l, '-', char const:r, right quote =(character_seen=1),new CharRange(l,r); -> character:l, "..", space?, character:r =new CodeRange(l,r); (void) definition -> name, equals, union:n =definition_1(n); -> name, equals, virtual production:p =definition_2(p); -> name, equals, mid rule action:p =definition_2(vp_8a(p)); (void) enum definition -> name =definition_3(enum_base++); -> name, equals, character:n =definition_3(enum_base = n), enum_base++; -> name, equals, quoted character:n =definition_3(enum_base = n), enum_base++; (void) enum statement -> "enum", space, left brace, enum definition -> enum statement, comma, enum definition (void) keyword string -> keyword string head, string, double quote, blank?... (void) string -> string A | string B | string C (void) keyword string head -> double quote =ics(); (int) string char -> simple string char -> escape sequence -> three octal:n =null_warning(n); (int) escape sequence -> "\\a" ='\a'; -> "\\b" ='\b'; -> "\\f" ='\f'; -> "\\n" ='\n'; -> "\\r" ='\r'; -> "\\t" ='\t'; -> "\\v" ='\v'; -> "\\\\" ='\\'; -> "\\?" = '\?'; -> "\\'" ='\''; -> "\\\"" ='"'; { int null_warning(int n) { extern void warning_here(const char *,...); if (n == 0) log_error("Null character in keyword string"); return n; } } (int) one octal -> backslash, '0-7':n =n&7; (int) two octal -> one octal:n, '0-7':d =n*8 + (d&7); (int) three octal -> two octal:n, '0-7':d =n*8 + (d&7); (int) octal escape -> one octal | two octal (int) hex escape -> "\\x", hex number:x =x; (void) string A -> string char:c =acs(c); -> any digit:c =acs(c); -> string, string char:c =acs(c); -> string A, any digit:c =acs(c); -> string B, nonoctal digit:c =acs(c); (void) string B -> octal escape:c =acs(null_warning(c)); -> string, octal escape:c =acs(null_warning(c)); (void) string C -> hex escape:c =acs(null_warning(c)); -> string, hex escape:c =acs(null_warning(c)); (int) parameter name -> =0; -> ':', space?, c name =cVariableList << buildAgString(); (void) simple name -> letter:c =scs(c); -> simple name, letter + digit:c =acs(c); (void) c name -> simple name, blank?... (void) configuration section -> '[', configuration parameters, right bracket (void) configuration parameters -> blank?..., end of line?, [configuration parameter list, end of line?] (void) configuration parameter list -> configuration parameter -> configuration parameter list, end of line, configuration parameter (void) configuration parameter -> name =ConfigParam::set(1, pggErrorHandler); //gp4(1); -> tilde, name =ConfigParam::set(0, pggErrorHandler); //gp4(0); -> name, equals, data type =ConfigParam::set(pggErrorHandler); //gp2(); -> name, equals, keyword string =ConfigParam::set(pggErrorHandler); //gp3(); -> enum fix, blank?..., equals, keyword string =ConfigParam::set(pggErrorHandler); //gp3(); -> name, equals, signed number:n =ConfigParam::set(n, pggErrorHandler); //gp5(n); -> attribute statement -> keyword kluge, name =concat_string(), ConfigParam::set(1, pggErrorHandler); //gp4(1); keyword kluge -> "distinguish", blank... =sss("distinguish "); (void) attribute statement -> "left", token list =set_prec(1,0); -> "right", token list =set_prec(0,1); -> "nonassoc", token list =set_prec(0,0); -> "sticky", token list =set_sticky(); -> "subgrammar", token list =set_subgrammar(); -> "hidden", token list =set_hidden(); -> new reserve statement, list end //=new_reserve(); -> enum statement, list end -> "disregard", blank..., token:t =disregard(t); -> "lexeme", token list =set_lexeme(); -> "reserve", blank..., "keywords", token list =set_reserved_words(); -> "rename", blank..., "macro",blank..., macro name:oldName, blank..., macro name:newName =at(rename_macro_list,oldName,newName); (int) macro name -> letter =(PCB).pointer-input_base-1; -> macro name, letter + digit list end -> [comma | continuation], right brace (void) new reserve statement -> "distinguish", blank..., "keywords", blank..., left brace, union:n =distinguishSets.push(ParseTree(n)); -> new reserve statement, comma, union:n =distinguishSets.push(ParseTree(n)); (void) token list -> blank..., left brace, tokens, list end (void) tokens -> token:t =sws(t); -> tokens, comma, token:t =aws(t); (int) virtual production -> keyword string =vp_s(); -> keyword string, '?', blank?... =vp_5(vp_s()); -> left brace, vp rules, right brace =vp_1(); -> left brace, vp rules, "}...", blank?... =vp_2(); -> left bracket, proper vp rule specs, right bracket =vp_3(); -> left bracket, proper vp rule specs, "]...", blank?... =vp_4(); -> union:n, '?', blank?... =vp_5(form_element_1(n)); -> union:n, "?...", blank?... =vp_6(form_element_1(n)); -> union:n, ellipsis =vp_7(form_element_1(n)); -> left brace, proper vp rule specs, "}/...", blank?... =vp_9(); -> left bracket, proper vp rule specs, "]/...", blank?... =vp_10(); (int) mid rule action -> bang, embedded c =mid_line(proc_spec_4(0)); -> bang, c expression, ';', blank?... =mid_line(proc_spec_4(1)); (void) embedded c -> embedded c head, c code:y, right brace =cSegmentStack.top().end = y; (void) embedded c head -> '{', [carriage return?, newline]... =cSegmentStack.push(CSegment()); // =copyon(); (unsigned char *) c code -> =PCB.pointer; -> c code first, c text =PCB.pointer; (void) c code first -> simple c char - eol chars -> c comment -> comment, carriage return?, newline -> c character constant, single quote -> c string constant, double quote (void) c text -> -> c text, c char (void) c char //c char represents the content of embedded c -> simple c char -> '{', c text, '}' -> c comment -> comment, carriage return?, newline -> c character constant, single quote -> c string constant, double quote (void) c expression head -> =cSegmentStack.push(CSegment()); (void) c expression -> c expression head, c chars =cSegmentStack.top().end = PCB.pointer; (void) c chars -> c expression char -> c chars, c expression char -> c chars, blank (void) c expression char //c char represents the content of embedded C -> simple c char - ';' - eol chars - blank char -> "\\\n" -> c character constant, single quote -> c string constant, double quote (void) c comment -> c comment head, "*/" (void) c comment head -> "/*" -> c comment head, ~eof (void) c comment, c comment head -> c comment head, c comment = {if (nest_comments) PCB.reduction_token = pgg_c_comment_head_token;} (void) c string constant -> double quote, [c literal elem | single quote | backslash, anything]... (void) c character constant -> single quote, [c literal elem | double quote | backslash, anything]... [ hidden { arrow, comma, ellipsis, equals, left brace, left bracket, left parenthesis, minus, plus, right bracket, list end, right brace, right parenthesis, right quote, star, tilde, vertical bar, simple statement, token name, rule spec, pointer, indirect data type, name string, blank, space, next line, white, end of line, sign, simple number, simple set, one octal, two octal, three octal, string A, string B, string C, simple name, c text, c char, c chars, c expression char, c expression head, c comment head, c code, c name, bang, complete statement, rule specs, vp rule specs, additional rule spec, data type, template field, abstract declarator, direct abstract declarator, pointer, name string, proper vp rule specs, additional vp rule spec, new reserve statement, anything, union, intersection, negation, enum fix } ] { pgg_pcb_type pgcb; //#define PCB pgcb #define PARSER_STACK_OVERFLOW parse_stack_overflow() #define SYNTAX_ERROR log_syntax_error() #define GET_CONTEXT CONTEXT.y = PCB.line, CONTEXT.x = PCB.column; #define REDUCTION_TOKEN_ERROR reduction_token_error() extern AgStack< AgStack<RuleElement> > ruleElementStack; AgStack<CSegment> cSegmentStack; int nPrologueSegments; CSegment::CSegment() : begin(PCB.pointer), end(PCB.pointer), line(PCB.line) { LOGSECTION("CSegment::CSegment"); // Nothing to do } class PggErrorHandler : public ConfigParam::ErrorHandler { void badParam(const char *s){log_error(s);} } pggErrorHandler; void reduction_token_error(void) { #ifdef INCLUDE_LOGGING LOGSECTION("reduction_token_error"); LOGV(PCB.sn) LCV(PCB.ssx) LCV(PCB.reduction_token); for (int i = 0; i < PCB.ssx; i++) LOGV(i) LCV(PCB.ss[i]); #endif assert(0); } extern AgString infile_name; Error::Error(AgString msg, int contextFlag) : file(infile_name), line(contextFlag? ERROR_CONTEXT.y : PCB.line), column(contextFlag? ERROR_CONTEXT.x : PCB.column), key(warn), message(msg) { LOGSECTION("Error::Error(AgString, int)"); LOGV(PCB.line) LCV(PCB.column) LCV(contextFlag) LCV(msg.pointer()); LOGV(ERROR_CONTEXT.x) LCV(ERROR_CONTEXT.y); // Nothing here } Error::Error(int l, int c, AgString msg) : file(infile_name), line(l), column(c), key(warn), message(msg) { LOGSECTION("Error::Error(int, int, AgString)"); // Nothing here } extern tsd *rename_macro_list; extern int parse_abort_flag; void parse_stack_overflow(void) { reset_stk(); log_error("Nesting too deep. Analysis aborted."); errorList.top().setFatal(); parse_abort_flag = 1; PCB.exit_flag = AG_SEMANTIC_ERROR_CODE; } extern int character_seen; extern int enum_base; extern unsigned char *input_base; extern int precedence_level; extern int syntax_error_flag; Expressionss2(void); void acs(int); void aws(int); void atkn(Token); void definition_1(Expression); void definition_2(int); void definition_3(int); int form_element_1(Expression); int form_spec_2(int, int); int form1(void); int form2(void); int head_list_1(int); int head_list_2(int); static int head_list_3(int); void ics(void); int identify_node(Expression); void iws(void); int makeRule(const AgIndexedContainer<RuleElement> &); int mid_line(int); int null_warning(int); int proc_spec_4(int); void production(int); void range(int, int); void scs(int); static void set_prec(int,int); void sws(int); int vp_form3(int); int vp_s(void); int vp_1(void); int vp_2(void); int vp_3(void); int vp_4(void); int vp_5(int); int vp_6(int); int vp_7(int); int vp_8(int); int vp_8a(int); int vp_9(void); int vp_10(void); int *disregard_list = NULL; int n_disregard_list = 0; static void disregard(int tn) { LOGSECTION("disregard"); iws(); if (n_disregard_list) { int *p = disregard_list; while (n_disregard_list--) aws(*p++); DEALLOCATE(disregard_list); } xws(tn); disregard_list = build_list(); n_disregard_list = fis(); } static void log_syntax_error(void) { LOGSECTION("log_syntax_error"); #ifdef INCLUDE_LOGGING { LOGV(PCB.sn) LCV(PCB.ssx) LCV(PCB.token_number); for (int i = 0; i < PCB.ssx; i++) LOGV(i) LCV(PCB.ss[i]); } #endif int flag = PCB.error_frame_token == pgg_c_comment_token || PCB.error_frame_token == pgg_embedded_c_token; LOGV(flag); reset_stk(); #ifdef INCLUDE_LOGGING { LOGV(PCB.sn) LCV(PCB.ssx) LCV(PCB.token_number); for (int i = 0; i < PCB.ssx; i++) LOGV(i) LCV(PCB.ss[i]); } #endif ssprintf("%s in %s", PCB.error_message, pgg_token_names[PCB.error_frame_token]); LOGS(string_base); errorList.push(Error(string_base, flag && PCB.token_number == pgg_eof_token)); rcs(); #ifdef INCLUDE_LOGGING { LOGV(PCB.sn) LCV(PCB.ssx) LCV(PCB.token_number); for (int i = 0; i < PCB.ssx; i++) LOGV(i) LCV(PCB.ss[i]); } #endif LOGS("error logged"); errorList.top().setFatal(); LOGS("fatal flag set"); syntax_error_flag++; #ifdef INCLUDE_LOGGING { LOGV(PCB.sn) LCV(PCB.ssx) LCV(PCB.token_number); for (int i = 0; i < PCB.ssx; i++) LOGV(i) LCV(PCB.ss[i]); } #endif if (errorList.size() < 50) return; log_error("Too many errors. Parse aborted."); errorList.top().setFatal(); PCB.exit_flag = AG_SYNTAX_ERROR_CODE; } static int head_list_3(int type) { LOGSECTION("head_list_3"); LOGV(type); int i, n = tis(); for (i = 0; i < n; i ++) { Token token(list_base[i]); int ptt = token->value_type; LOGV(ptt) LCV(type); if (ptt && ptt != type) { ssprintf("Type Redefinition of T%03d: ", (int) token); atkn(token); log_error(); } token->value_type = type; /* if (ptt == 0 && token->rp_arg && type == void_token_type) { //ssprintf("Void token, %s, used as parameter", dict_str(tkn_dict,token->token_name)); ssprintf("Void token, %s, used as parameter", token->token_name->string.pointer()); log_error(); } */ } return type; } static void set_prec(int left, int right){ LOGSECTION("set_prec"); int n = tis(); precedence_level++; while (n--) { Token token(list_base[n]); token->left_associative = left; token->right_associative = right; token->precedence_level = precedence_level; } rws(); } static void set_sticky(void){ LOGSECTION("set_sticky"); int n = tis(); while (n--) { Token token(list_base[n]); token->sticky = 1; } rws(); } static void set_subgrammar(void){ LOGSECTION("set_subgrammar"); int n = tis(); while (n--) { Token token = list_base[n]; token->subgrammar = 1; } rws(); } void set_hidden(void) { LOGSECTION("set_hidden"); int *lb = list_base; int n = tis(); //while (n--) map_token_number[*lb++].fine_structure = 1; while (n--) Token(*lb++)->fine_structure = 1; rws(); } void set_lexeme(void) { LOGSECTION("set_lexeme"); int n = tis(); while (n--) { Token token = list_base[n]; token->lexeme = 1; } rws(); } void set_reserved_words(void) { LOGSECTION("set_reserved_words"); int n = tis(); while (n--) { Token(list_base[n])->reserved_word = 1; } rws(); } void parse(void) { pgcb.pointer = input_base; pgg(&pgcb); } }