Mercurial > ~dholland > hg > ag > index.cgi
diff examples/mpp/ct.syn @ 0:13d2b8934445
Import AnaGram (near-)release tree into Mercurial.
author | David A. Holland |
---|---|
date | Sat, 22 Dec 2007 17:52:45 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/mpp/ct.syn Sat Dec 22 17:52:45 2007 -0500 @@ -0,0 +1,237 @@ +{ +/* + * AnaGram, a System for Syntax Directed Programming + * C Macro preprocessor + * Token Classifier Module + * + * Copyright 1993-2000 Parsifal Software. All Rights Reserved. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#include "mpp.h" + +} + +// Configuration section + +[ + // far tables // uncomment for 16 bit environment + ~allow macros // to simplify debugging + line numbers // put #line directives in parser + //escape backslashes // uncomment if using MSVC++ + ~diagnose errors // no diagnostics + pointer input // input is an array in memory + ~lines and columns // not needed + ~test range // not needed + default token type = token_id // saves a lot of explicit defs + parser file name = "#.cpp" +] + +// Character set definitions + +any text char = ~eof - '\\' +ascii = 1..126 - eof +digit = '0-9' +eof = 0 +hex digit = '0-9' + 'A-F' + 'a-f' +letter = 'a-z' + 'A-Z' + '_' +punctuation = 1..126 -(letter + digit + '\'' + '"') +simple char = ~eof - ('\'' + '\\' + '\n') +string char = ~eof - ('"' + '\\' + '\n') + + +// C token grammar + +grammar + -> token, eof + +token + -> name string =id_name(); + -> qualified real + -> integer constant + -> string literal =STRINGliteral; + -> character constant =CHARACTERconstant; + -> operator + -> punctuation:p =(token_id) p; + +operator + -> '&', '&' =ANDAND; + -> '&', '=' =ANDassign; + -> '-', '>' =ARROW; + -> '#', '#' =CONCAT; + -> '-', '-' =DECR; + -> '/', '=' =DIVassign; + -> '.', '.', '.' =ELLIPSIS; + -> '=', '=' =EQ; + -> '^', '=' =ERassign; + -> '>', '=' =GE; + -> '+', '+' =ICR; + -> '<', '=' =LE; + -> '<', '<' =LS; + -> '<', '<', '=' =LSassign; + -> '%', '=' =MODassign; + -> '-', '=' =MINUSassign; + -> '*', '=' =MULTassign; + -> '!', '=' =NE; + -> '|', '=' =ORassign; + -> '|', '|' =OROR; + -> '+', '=' =PLUSassign; + -> '>', '>' =RS; + -> '>', '>', '=' =RSassign; + + +// Floating point number syntax + +qualified real + -> real constant, floating qualifier =FLOATconstant; + +real constant + -> real + +floating qualifier + -> + -> 'f' + 'F' + -> 'l' + 'L' + +real + -> simple real + -> simple real, exponent + -> confusion, exponent + -> decimal integer, exponent + +simple real + -> confusion, '.' + -> octal integer, '.' + -> decimal integer, '.' + -> '.', '0-9' + -> simple real, '0-9' + +confusion + -> octal integer, '8-9' + -> confusion, '0-9' + +exponent + -> 'e' + 'E', '-', '0-9' + -> 'e' + 'E', '+'?, '0-9' + -> exponent, '0-9' + + +// Integer Constant Syntax + +integer constant + -> octal constant =OCTconstant; + -> decimal constant =DECconstant; + -> hex constant =HEXconstant; + +octal constant + -> octal integer + -> octal constant, integer qualifier + +octal integer + -> '0' + -> octal integer, '0-7' + +hex constant + -> hex integer + -> hex constant, integer qualifier + +hex integer + -> '0', 'x' + 'X', hex digit + -> hex integer, hex digit + +decimal constant + -> decimal integer + -> decimal constant, integer qualifier + +decimal integer + -> '1-9' + -> decimal integer, '0-9' + +integer qualifier + -> 'u' + 'U' + -> 'l' + 'L' + + +// String Literals + +string literal + -> string chars, '"' + +string chars + -> '"' + -> string chars, string char + -> string chars, '\\', ~eof&~'\n' + -> string chars, '\\', '\n' + + +// Character constants + +character constant + -> simple chars, '\'' + +simple chars + -> '\'' + -> simple chars, simple char + -> simple chars, '\\', ~eof&~'\n' + -> simple chars, '\\', '\n' + +(void) name string + -> letter + -> name string, letter+digit + + + +{ // Embedded C + +#define SYNTAX_ERROR // no diagnostic needed + +static char *input_string; + +/* + +id_name() adds the string on the local string_accumulator to the token +dictionary and checks to see if the handle corresponds to a reserved +word. Otherwise the string is classified simply as a NAME + +*/ + +static token_id id_name(void) { + unsigned handle = td << input_string; + token_id id; + if (handle <= n_reserved_words) id = reserved_words[handle].id; + else id = NAME; + return id; +} + +/* + +classify_token() is an interface function for the grammar. It sets up +the pointer in the parser control block to point to the input string, +calls the parser, and returns the token_id determined by the parser if +there was no error, and returns "UNRECOGNIZED" if there was an error. + +*/ + +token_id classify_token(char *string) { + input_string = string; + PCB.pointer = (unsigned char *) string; + ct(); + return PCB.exit_flag == AG_SUCCESS_CODE ? ct_value() : UNRECOGNIZED; +} + +} // End of Embedded C