comparison examples/mpp/ct.syn @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.
author David A. Holland
date Sat, 22 Dec 2007 17:52:45 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:13d2b8934445
1 {
2 /*
3 * AnaGram, a System for Syntax Directed Programming
4 * C Macro preprocessor
5 * Token Classifier Module
6 *
7 * Copyright 1993-2000 Parsifal Software. All Rights Reserved.
8 *
9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the authors be held liable for any damages
11 * arising from the use of this software.
12 *
13 * Permission is granted to anyone to use this software for any purpose,
14 * including commercial applications, and to alter it and redistribute it
15 * freely, subject to the following restrictions:
16 *
17 * 1. The origin of this software must not be misrepresented; you must not
18 * claim that you wrote the original software. If you use this software
19 * in a product, an acknowledgment in the product documentation would be
20 * appreciated but is not required.
21 * 2. Altered source versions must be plainly marked as such, and must not be
22 * misrepresented as being the original software.
23 * 3. This notice may not be removed or altered from any source distribution.
24 */
25
26 #include "mpp.h"
27
28 }
29
30 // Configuration section
31
32 [
33 // far tables // uncomment for 16 bit environment
34 ~allow macros // to simplify debugging
35 line numbers // put #line directives in parser
36 //escape backslashes // uncomment if using MSVC++
37 ~diagnose errors // no diagnostics
38 pointer input // input is an array in memory
39 ~lines and columns // not needed
40 ~test range // not needed
41 default token type = token_id // saves a lot of explicit defs
42 parser file name = "#.cpp"
43 ]
44
45 // Character set definitions
46
47 any text char = ~eof - '\\'
48 ascii = 1..126 - eof
49 digit = '0-9'
50 eof = 0
51 hex digit = '0-9' + 'A-F' + 'a-f'
52 letter = 'a-z' + 'A-Z' + '_'
53 punctuation = 1..126 -(letter + digit + '\'' + '"')
54 simple char = ~eof - ('\'' + '\\' + '\n')
55 string char = ~eof - ('"' + '\\' + '\n')
56
57
58 // C token grammar
59
60 grammar
61 -> token, eof
62
63 token
64 -> name string =id_name();
65 -> qualified real
66 -> integer constant
67 -> string literal =STRINGliteral;
68 -> character constant =CHARACTERconstant;
69 -> operator
70 -> punctuation:p =(token_id) p;
71
72 operator
73 -> '&', '&' =ANDAND;
74 -> '&', '=' =ANDassign;
75 -> '-', '>' =ARROW;
76 -> '#', '#' =CONCAT;
77 -> '-', '-' =DECR;
78 -> '/', '=' =DIVassign;
79 -> '.', '.', '.' =ELLIPSIS;
80 -> '=', '=' =EQ;
81 -> '^', '=' =ERassign;
82 -> '>', '=' =GE;
83 -> '+', '+' =ICR;
84 -> '<', '=' =LE;
85 -> '<', '<' =LS;
86 -> '<', '<', '=' =LSassign;
87 -> '%', '=' =MODassign;
88 -> '-', '=' =MINUSassign;
89 -> '*', '=' =MULTassign;
90 -> '!', '=' =NE;
91 -> '|', '=' =ORassign;
92 -> '|', '|' =OROR;
93 -> '+', '=' =PLUSassign;
94 -> '>', '>' =RS;
95 -> '>', '>', '=' =RSassign;
96
97
98 // Floating point number syntax
99
100 qualified real
101 -> real constant, floating qualifier =FLOATconstant;
102
103 real constant
104 -> real
105
106 floating qualifier
107 ->
108 -> 'f' + 'F'
109 -> 'l' + 'L'
110
111 real
112 -> simple real
113 -> simple real, exponent
114 -> confusion, exponent
115 -> decimal integer, exponent
116
117 simple real
118 -> confusion, '.'
119 -> octal integer, '.'
120 -> decimal integer, '.'
121 -> '.', '0-9'
122 -> simple real, '0-9'
123
124 confusion
125 -> octal integer, '8-9'
126 -> confusion, '0-9'
127
128 exponent
129 -> 'e' + 'E', '-', '0-9'
130 -> 'e' + 'E', '+'?, '0-9'
131 -> exponent, '0-9'
132
133
134 // Integer Constant Syntax
135
136 integer constant
137 -> octal constant =OCTconstant;
138 -> decimal constant =DECconstant;
139 -> hex constant =HEXconstant;
140
141 octal constant
142 -> octal integer
143 -> octal constant, integer qualifier
144
145 octal integer
146 -> '0'
147 -> octal integer, '0-7'
148
149 hex constant
150 -> hex integer
151 -> hex constant, integer qualifier
152
153 hex integer
154 -> '0', 'x' + 'X', hex digit
155 -> hex integer, hex digit
156
157 decimal constant
158 -> decimal integer
159 -> decimal constant, integer qualifier
160
161 decimal integer
162 -> '1-9'
163 -> decimal integer, '0-9'
164
165 integer qualifier
166 -> 'u' + 'U'
167 -> 'l' + 'L'
168
169
170 // String Literals
171
172 string literal
173 -> string chars, '"'
174
175 string chars
176 -> '"'
177 -> string chars, string char
178 -> string chars, '\\', ~eof&~'\n'
179 -> string chars, '\\', '\n'
180
181
182 // Character constants
183
184 character constant
185 -> simple chars, '\''
186
187 simple chars
188 -> '\''
189 -> simple chars, simple char
190 -> simple chars, '\\', ~eof&~'\n'
191 -> simple chars, '\\', '\n'
192
193 (void) name string
194 -> letter
195 -> name string, letter+digit
196
197
198
199 { // Embedded C
200
201 #define SYNTAX_ERROR // no diagnostic needed
202
203 static char *input_string;
204
205 /*
206
207 id_name() adds the string on the local string_accumulator to the token
208 dictionary and checks to see if the handle corresponds to a reserved
209 word. Otherwise the string is classified simply as a NAME
210
211 */
212
213 static token_id id_name(void) {
214 unsigned handle = td << input_string;
215 token_id id;
216 if (handle <= n_reserved_words) id = reserved_words[handle].id;
217 else id = NAME;
218 return id;
219 }
220
221 /*
222
223 classify_token() is an interface function for the grammar. It sets up
224 the pointer in the parser control block to point to the input string,
225 calls the parser, and returns the token_id determined by the parser if
226 there was no error, and returns "UNRECOGNIZED" if there was an error.
227
228 */
229
230 token_id classify_token(char *string) {
231 input_string = string;
232 PCB.pointer = (unsigned char *) string;
233 ct();
234 return PCB.exit_flag == AG_SUCCESS_CODE ? ct_value() : UNRECOGNIZED;
235 }
236
237 } // End of Embedded C