diff examples/mpp/ct.syn @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.
author David A. Holland
date Sat, 22 Dec 2007 17:52:45 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/examples/mpp/ct.syn	Sat Dec 22 17:52:45 2007 -0500
@@ -0,0 +1,237 @@
+{
+/*
+ * AnaGram, a System for Syntax Directed Programming
+ * C Macro preprocessor
+ * Token Classifier Module
+ *
+ * Copyright 1993-2000 Parsifal Software. All Rights Reserved.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+#include "mpp.h"
+
+}
+
+// Configuration section
+
+[
+  // far tables                       // uncomment for 16 bit environment
+ ~allow macros                        // to simplify debugging
+  line numbers                        // put #line directives in parser
+  //escape backslashes                // uncomment if using MSVC++
+ ~diagnose errors                     // no diagnostics
+  pointer input                       // input is an array in memory
+ ~lines and columns                   // not needed
+ ~test range                          // not needed
+  default token type = token_id       // saves a lot of explicit defs
+  parser file name = "#.cpp"
+]
+
+// Character set definitions
+
+any text char = ~eof - '\\'
+ascii         = 1..126 - eof
+digit         = '0-9'
+eof           = 0
+hex digit     = '0-9' + 'A-F' + 'a-f'
+letter        = 'a-z' + 'A-Z' + '_'
+punctuation   = 1..126 -(letter + digit + '\'' + '"')
+simple char   = ~eof - ('\'' + '\\' + '\n')
+string char   = ~eof - ('"' + '\\' + '\n')
+
+
+// C token grammar
+
+grammar
+ -> token, eof
+
+token
+ -> name string               =id_name();
+ -> qualified real
+ -> integer constant
+ -> string literal            =STRINGliteral;
+ -> character constant        =CHARACTERconstant;
+ -> operator
+ -> punctuation:p             =(token_id) p;
+
+operator
+ -> '&', '&'                  =ANDAND;
+ -> '&', '='                  =ANDassign;
+ -> '-', '>'                  =ARROW;
+ -> '#', '#'                  =CONCAT;
+ -> '-', '-'                  =DECR;
+ -> '/', '='                  =DIVassign;
+ -> '.', '.', '.'             =ELLIPSIS;
+ -> '=', '='                  =EQ;
+ -> '^', '='                  =ERassign;
+ -> '>', '='                  =GE;
+ -> '+', '+'                  =ICR;
+ -> '<', '='                  =LE;
+ -> '<', '<'                  =LS;
+ -> '<', '<', '='             =LSassign;
+ -> '%', '='                  =MODassign;
+ -> '-', '='                  =MINUSassign;
+ -> '*', '='                  =MULTassign;
+ -> '!', '='                  =NE;
+ -> '|', '='                  =ORassign;
+ -> '|', '|'                  =OROR;
+ -> '+', '='                  =PLUSassign;
+ -> '>', '>'                  =RS;
+ -> '>', '>', '='             =RSassign;
+
+
+// Floating point number syntax
+
+qualified real
+ -> real constant, floating qualifier  =FLOATconstant;
+
+real constant
+ -> real
+
+floating qualifier
+ ->
+ -> 'f' + 'F'
+ -> 'l' + 'L'
+
+real
+ -> simple real
+ -> simple real, exponent
+ -> confusion, exponent
+ -> decimal integer, exponent
+
+simple real
+ -> confusion, '.'
+ -> octal integer, '.'
+ -> decimal integer, '.'
+ -> '.', '0-9'
+ -> simple real, '0-9'
+
+confusion
+  -> octal integer, '8-9'
+  -> confusion, '0-9'
+
+exponent
+  -> 'e' + 'E', '-', '0-9'
+  -> 'e' + 'E', '+'?, '0-9'
+  -> exponent, '0-9'
+
+
+// Integer Constant Syntax
+
+integer constant
+ -> octal constant                          =OCTconstant;
+ -> decimal constant                        =DECconstant;
+ -> hex constant                            =HEXconstant;
+
+octal constant
+ -> octal integer
+ -> octal constant, integer qualifier
+
+octal integer
+  -> '0'
+  -> octal integer, '0-7'
+
+hex constant
+ -> hex integer
+ -> hex constant, integer qualifier
+
+hex integer
+  -> '0', 'x' + 'X', hex digit
+  -> hex integer, hex digit
+
+decimal constant
+ -> decimal integer
+ -> decimal constant, integer qualifier
+
+decimal integer
+  -> '1-9'
+  -> decimal integer, '0-9'
+
+integer qualifier
+  -> 'u' + 'U'
+  -> 'l' + 'L'
+
+
+// String Literals
+
+string literal
+  -> string chars, '"'
+
+string chars
+ -> '"'
+ -> string chars, string char
+ -> string chars, '\\', ~eof&~'\n'
+ -> string chars, '\\', '\n'
+
+
+// Character constants
+
+character constant
+ -> simple chars, '\''
+
+simple chars
+ -> '\''
+ -> simple chars, simple char
+ -> simple chars, '\\', ~eof&~'\n'
+ -> simple chars, '\\', '\n'
+
+(void) name string
+  -> letter
+  -> name string, letter+digit
+
+
+
+{                                      // Embedded C
+
+#define SYNTAX_ERROR                   // no diagnostic needed
+
+static char *input_string;
+
+/*
+
+id_name() adds the string on the local string_accumulator to the token
+dictionary and checks to see if the handle corresponds to a reserved
+word. Otherwise the string is classified simply as a NAME
+
+*/
+
+static token_id id_name(void) {
+  unsigned handle = td << input_string;
+  token_id id;
+  if (handle <= n_reserved_words) id = reserved_words[handle].id;
+  else id = NAME;
+  return id;
+}
+
+/*
+
+classify_token() is an interface function for the grammar. It sets up
+the pointer in the parser control block to point to the input string,
+calls the parser, and returns the token_id determined by the parser if
+there was no error, and returns "UNRECOGNIZED" if there was an error.
+
+*/
+
+token_id classify_token(char *string) {
+  input_string = string;
+  PCB.pointer = (unsigned char *) string;
+  ct();
+  return PCB.exit_flag == AG_SUCCESS_CODE ? ct_value() : UNRECOGNIZED;
+}
+
+}                                      // End of Embedded C