Mercurial > ~dholland > hg > ag > index.cgi
comparison examples/mpp/ts.syn @ 0:13d2b8934445
Import AnaGram (near-)release tree into Mercurial.
author | David A. Holland |
---|---|
date | Sat, 22 Dec 2007 17:52:45 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:13d2b8934445 |
---|---|
1 { | |
2 /* | |
3 * AnaGram, a System for Syntax Directed Programming | |
4 * C Macro preprocessor and parser | |
5 * TS.SYN: Token Scanner Module | |
6 * | |
7 * Copyright 1993-2000 Parsifal Software. All Rights Reserved. | |
8 * | |
9 * This software is provided 'as-is', without any express or implied | |
10 * warranty. In no event will the authors be held liable for any damages | |
11 * arising from the use of this software. | |
12 * | |
13 * Permission is granted to anyone to use this software for any purpose, | |
14 * including commercial applications, and to alter it and redistribute it | |
15 * freely, subject to the following restrictions: | |
16 * | |
17 * 1. The origin of this software must not be misrepresented; you must not | |
18 * claim that you wrote the original software. If you use this software | |
19 * in a product, an acknowledgment in the product documentation would be | |
20 * appreciated but is not required. | |
21 * 2. Altered source versions must be plainly marked as such, and must not be | |
22 * misrepresented as being the original software. | |
23 * 3. This notice may not be removed or altered from any source distribution. | |
24 */ | |
25 | |
26 #include "mpp.h" | |
27 | |
28 | |
29 // context structure for diagnostics | |
30 | |
31 struct location { unsigned line, column; }; | |
32 | |
33 } | |
34 | |
35 | |
36 // Configuration section | |
37 | |
38 [ | |
39 // far tables // uncomment for 16 bit environment | |
40 context type = location // request context tracking | |
41 ~allow macros // function defs for red procs | |
42 auto resynch | |
43 line numbers // #line statements in output | |
44 error trace // build trace on syntax error | |
45 ~test range // not necessary | |
46 ~declare pcb // pcb declared manually | |
47 ~error frame // not wanted for diagnostics | |
48 | |
49 subgrammar { // this subgrammar statement | |
50 simple token, // will be removed and replaced | |
51 expanded token, // with "disregard ws" and | |
52 initial arg element, // lexeme statements in the | |
53 ws, // next revision | |
54 eol, | |
55 macro definition header, | |
56 } | |
57 parser file name = "#.cpp" | |
58 ] | |
59 | |
60 | |
61 // Character Set Definitions | |
62 | |
63 any text char = ~eof - newline - '\\' | |
64 ascii = 1..126 | |
65 blank = ' ' + '\t' + '\r' + '\f' + '\v' | |
66 digit = '0-9' | |
67 eof = -1 + 0 | |
68 hex digit = '0-9' + 'A-F' + 'a-f' | |
69 newline = '\n' | |
70 letter = 'a-z' + 'A-Z' + '_' | |
71 not punctuation = '#' + blank + letter + digit + '\'' + '"' + newline + '\\' | |
72 punctuation = ascii - not punctuation | |
73 simple char = ~eof - ('\'' + '\\' + '\n') | |
74 string char = ~eof - ('"' + '\\' + '\n') | |
75 | |
76 | |
77 // Grammar, or Start token | |
78 | |
79 /* | |
80 The macro/#include structure of a C/C++ program is line oriented, so the | |
81 main grammar treats the input file as logical sections, separated by | |
82 any number of new lines. | |
83 | |
84 eol is defined so that it accepts any number of blank lines and any leading | |
85 spaces on the first following nonblank line. Lines containing only comments | |
86 are considered blank lines. | |
87 | |
88 Input text, as it is recognized is sunk through the scanner_sink pointer. | |
89 scanner_sink is normally the output of the scanner, but when it is necessary | |
90 to accumulate text, as for a macro definition, scanner_sink is switched to | |
91 direct text to a buffer. When the end of the macro definition is encountered, | |
92 the scanner_sink is switched back to its previous setting. | |
93 */ | |
94 | |
95 input file $ // Grammar Token | |
96 -> [section | eol]/..., eof // Alternating sequence | |
97 | |
98 eol | |
99 -> newline, [newline | space]... | |
100 | |
101 | |
102 /* | |
103 Conditional Compilation Control | |
104 | |
105 This is the portion of the grammar that parsers #if/#ifdef#ifndef/#elif/#else/#endif | |
106 and determines which lines of text are to be passed on for further processing | |
107 and which are to be simply ignored. | |
108 | |
109 A "section" is any nonblank line of input, or an if/endif block of lines | |
110 that should be passed on to the C compiler. | |
111 | |
112 A "skip_section" is a non blank line of input or and if/endif block of lines | |
113 that should be passed over and ignored. | |
114 | |
115 "expanded token" represents the _result_ of macro substitution | |
116 | |
117 "control line" is any line beginning with # that is not an if/elsif/endif | |
118 line. | |
119 | |
120 A conditional block is everything from an #if, #ifdef, #ifndef to the | |
121 matching #endif | |
122 */ | |
123 | |
124 section | |
125 -> expanded token... =*scanner_sink << op('\n'); | |
126 -> control line | |
127 -> conditional block | |
128 | |
129 /* | |
130 There are basically two syntaxes for the body of any block of text | |
131 controlled by an #if statement: One syntax, "true if section", to be used | |
132 if the if condition is true and one, "false if section", to be used to skip | |
133 over it if the condition is false. In like manner, there are two syntaxes | |
134 for the body of the else block: "skip else section" to be used when the | |
135 if condition is true and "else section", to be used when the if condition | |
136 is false. | |
137 | |
138 The syntax for "conditional block" enumerates all possible combinations. | |
139 | |
140 This simple analysis is complicated by the existence of the #elif statement. | |
141 This complication occasions a moderately complex cross recursion between | |
142 "true if section" and "false if section". | |
143 | |
144 Note that a "false if section" is a false #if line followed by all | |
145 statements up to an #else statement or a true #elif line. A "true if | |
146 section" consists of a true #if line followed by everything up to the | |
147 next matching #elif or #else line, or it consists of false if sections | |
148 followed eventually by a true #elif line and then subsequent lines | |
149 up to a following #else or #elif line. | |
150 | |
151 "skip section" is syntax to skip over any text including matched | |
152 #if/#ifdef/#ifndef, #endif pairs. | |
153 */ | |
154 | |
155 conditional block | |
156 -> true if section, eol, endif line | |
157 -> true if section, eol, skip else section, eol, endif line | |
158 -> false if section, eol, endif line | |
159 -> false if section, eol, else section, eol, endif line | |
160 | |
161 true if section | |
162 -> true condition | |
163 -> true if section, eol, section | |
164 -> false if section, eol, true else condition | |
165 | |
166 false if section | |
167 -> false condition | |
168 -> false if section, eol, skip section | |
169 -> false if section, eol, false else condition | |
170 | |
171 /* | |
172 "else section" handles lines of text (and nested #if/#endif sections | |
173 starting with an #else line. "else section" should always be followed | |
174 in any syntactic use by eol, endif line to terminate the looping. | |
175 */ | |
176 | |
177 else section | |
178 -> '#', ws?, "else", ws? | |
179 -> else section, eol, section | |
180 | |
181 endif line | |
182 -> '#', ws?, "endif", ws? | |
183 | |
184 /* | |
185 "skip section" skips a single line, or an entire if/endif block | |
186 */ | |
187 | |
188 skip section | |
189 -> skip line | |
190 -> skip if section, eol, endif line | |
191 | |
192 /* | |
193 "skip if section" can be terminated only by an "endif line" | |
194 Note that it simply skips over #else and #elif lines, since | |
195 they are immaterial in context. | |
196 */ | |
197 | |
198 skip if section | |
199 -> '#', ws?, {"if" | "ifdef" | "ifndef"}, any text?... | |
200 -> skip if section, eol, skip section | |
201 -> skip if section, eol, skip else line | |
202 | |
203 /* | |
204 "skip else section" begins with an #elif or #else line and continues | |
205 to a terminating #endif line. | |
206 */ | |
207 | |
208 skip else section | |
209 -> skip else line | |
210 -> skip else section, eol, skip else line | |
211 -> skip else section, eol, skip section | |
212 | |
213 skip else line | |
214 -> '#', ws?, "elif", any text?... | |
215 -> '#', ws?, "else", ws? | |
216 | |
217 /* | |
218 "skip line" parses over and ignores any line that is not an | |
219 #if, #elif, #else, or #endif line. | |
220 */ | |
221 | |
222 skip line | |
223 -> '#', ws?, [{"define" | "undefine" | "include" | "line" | | |
224 "error" | "pragma"}, any text?...] | |
225 -> not control mark, any text?... | |
226 | |
227 any text | |
228 -> any text char | |
229 -> '\\', ~eof | |
230 | |
231 not control mark | |
232 -> any text char - '#' | |
233 -> '\\', ~eof | |
234 | |
235 | |
236 /* | |
237 Conditional Control Lines | |
238 | |
239 A semantically determined production is used to determine whether | |
240 an #if, #ifdef, or #ifndef line should be treated as a true | |
241 condition or a false condition. #ifdef and #ifndef can be | |
242 resolved simply by determining whether a symbol has or has | |
243 not been defined. #if is more complex and requires | |
244 evaluation of a constant expression. It does this by passing | |
245 the expanded argument string to the expression evaluator | |
246 in ex.syn | |
247 */ | |
248 | |
249 true condition, false condition | |
250 -> '#', ws?, "ifdef", ws, name string, ws? =check_defined(1); | |
251 -> '#', ws?, "ifndef", ws, name string, ws? =check_defined(0); | |
252 -> '#', ws?, if header, expanded token... =eval_if(); | |
253 | |
254 true else condition, false else condition | |
255 -> '#', ws?, else if header, expanded token... =eval_elif(); | |
256 | |
257 /* | |
258 "if header" and "else if header" are simple wrapper tokens to | |
259 provide a hook for marking the beginning of the text that | |
260 is to be used by the expression evaluator. The init_condition() | |
261 function handles this task. | |
262 */ | |
263 | |
264 if header | |
265 -> "if", ws =init_condition(); | |
266 | |
267 else if header | |
268 -> "elif", ws =init_condition(); | |
269 | |
270 | |
271 /* | |
272 Other Control Lines | |
273 | |
274 Relative to the complexity of the if/elif/else/endif logic, other | |
275 control lines are moderately straightforward. | |
276 | |
277 The #include file is handled simply by stacking the current file | |
278 position and opening the indicated file. When end of file is | |
279 encountered on the newly opened file, the current file position | |
280 will be unstacked and parsing will continue as though nothing | |
281 had happened. | |
282 | |
283 Note that there is nothing that requires that an #if/#ifdef#ifndef | |
284 and the matching #endif be in the same file. | |
285 | |
286 #undef is trivial | |
287 | |
288 #line, #error, #pragma lines are simply ignored | |
289 | |
290 #define has substantial struture. It begins with the header portion | |
291 that identifies the macro to be defined and coninues with the | |
292 body of the macro definition. "macro definition header" identifies | |
293 the name and type of the macro and initializes the accumulation of | |
294 the tokens which comprise the body of the macro. | |
295 */ | |
296 | |
297 control line | |
298 -> include header, expanded token... =include_file(); | |
299 -> '#', ws?, "undef", ws, name string, ws? =undefine(); | |
300 -> '#', ws?, [{"line" | "error" | "pragma"}, any text?...] | |
301 -> macro definition header:id, simple token?... =save_macro_body(id); | |
302 | |
303 include header | |
304 -> '#', ws?, "include" =save_sink << scanner_sink, scanner_sink = &++ta; | |
305 | |
306 | |
307 /* | |
308 Macro Definitions | |
309 | |
310 There are two types of macros: those with arguments and those without. | |
311 They are rather different in the way they have to be handled. | |
312 */ | |
313 | |
314 (int) macro definition header | |
315 -> '#', ws?, "define", ws, name string =init_macro_def(0,0); | |
316 -> '#', ws?, "define", ws, name string, | |
317 '(', ws?, parameter list:n, ')' =init_macro_def(n,1); | |
318 | |
319 (int) parameter list | |
320 -> =0; | |
321 -> names, ws? | |
322 | |
323 (int) names | |
324 -> name string =1; | |
325 -> names:n, ws?, ',', ws?, name string =n+1; | |
326 | |
327 /* | |
328 Unexpanded text (for macro definitions) | |
329 | |
330 If there are macro invocations in the body of a macro, they are | |
331 supposed to be expanded only when the macro itself is invoked, not | |
332 when the macro is defined. This means that the processing of | |
333 the body of the macro definition has to be different from the | |
334 processing of ordinary text. | |
335 | |
336 simple token is a token as it appears in the input stream. | |
337 | |
338 expanded token is the result of passing a token through macro | |
339 expansion. | |
340 */ | |
341 | |
342 simple token | |
343 -> space:c =*scanner_sink << space_op(c); | |
344 -> word | |
345 -> separator | |
346 -> '#' =*scanner_sink << op('#'); | |
347 -> qualified real | |
348 -> integer constant | |
349 | |
350 word | |
351 -> name string =*scanner_sink << name_token(); | |
352 | |
353 | |
354 // Expanded text | |
355 | |
356 expanded token | |
357 -> expanded word | |
358 -> separator | |
359 -> space | |
360 -> qualified real | |
361 -> integer constant | |
362 | |
363 /* | |
364 The semantically determined production below classifies a name string | |
365 into the categories variable, simple macro, macro or defined (as in #if defined(x)) | |
366 so that the parser can do appropriate follow up parsing. | |
367 | |
368 expand() is called to expand macros. Note that a macro that is defined with | |
369 an parameter list (whether or not the list is empty) is not expanded unless | |
370 it is invoked with a parameter list. | |
371 */ | |
372 | |
373 expanded word | |
374 -> variable:t =*scanner_sink << t; | |
375 -> simple macro:t =expand(t,0); | |
376 -> macro:t, ws? =*scanner_sink << t; | |
377 -> macro:t, ws?, '(', ws?, macro arg list:n, ')' =expand(t,n); | |
378 -> defined, ws?, '(', ws?, name string, ws?, ')' =*scanner_sink << defined(); | |
379 -> defined, ws, name string =*scanner_sink << defined(); | |
380 | |
381 | |
382 // Name classification | |
383 | |
384 (token) variable, simple macro, macro, defined | |
385 -> name string =id_macro(); | |
386 | |
387 | |
388 /* | |
389 Macro Arguments | |
390 | |
391 Macro arguments are accumulated as separate token strings on the | |
392 token accumulator stack. | |
393 */ | |
394 | |
395 | |
396 (int) macro arg list | |
397 -> =0; | |
398 -> !save_sink << scanner_sink, scanner_sink = &ta;, macro args:n = | |
399 save_sink >> scanner_sink, n; | |
400 | |
401 (int) macro args | |
402 -> increment ta, arg elements =1; | |
403 -> macro args:n, ',', ws?, increment ta, arg elements =n+1; | |
404 | |
405 // increment ta could be replaced with an "immediate action" | |
406 | |
407 (void) increment ta | |
408 -> /* Null Production */ =++ta; | |
409 | |
410 /* | |
411 The following is somewhat complex partly to skip leading space. | |
412 */ | |
413 | |
414 arg elements | |
415 -> initial arg element | |
416 -> arg elements, arg element | |
417 | |
418 arg element | |
419 -> space:c =*scanner_sink << space_op(c); | |
420 -> initial arg element | |
421 | |
422 initial arg element | |
423 -> name string =*scanner_sink << name_token(); | |
424 -> qualified real | |
425 -> integer constant | |
426 -> string literal =*scanner_sink << tkn(STRINGliteral); | |
427 -> character constant =*scanner_sink << tkn(CHARACTERconstant); | |
428 -> operator | |
429 -> punctuation - '(' - ',' - ')':p =*scanner_sink << op(p); | |
430 -> nested elements, ')':t =*scanner_sink << op(t); | |
431 | |
432 nested elements | |
433 -> '(':t =*scanner_sink << op(t); | |
434 -> nested elements, arg element | |
435 -> nested elements, ',':t =*scanner_sink << op(t); | |
436 | |
437 | |
438 /* | |
439 Basic lexical elements | |
440 | |
441 The remainder of the syntax file consists of the definitions of the | |
442 basic lexical elements of C. | |
443 | |
444 The basic lexical elements are simply copied to the scanner_sink as | |
445 they are encountered. Note that it is not the character string itself | |
446 that goes to the scanner_sink but rather a token which consists of | |
447 a type identification and a handle that can be used to recover the | |
448 string from a dictionary. | |
449 */ | |
450 | |
451 separator | |
452 -> string literal =*scanner_sink << tkn(STRINGliteral); | |
453 -> character constant =*scanner_sink << tkn(CHARACTERconstant); | |
454 -> operator | |
455 -> punctuation:p =*scanner_sink << op(p); | |
456 -> '\\', '\n' | |
457 | |
458 (int) space | |
459 -> blank | |
460 -> comment =' '; | |
461 | |
462 ws = space... | |
463 | |
464 comment | |
465 -> comment head, "*/" | |
466 | |
467 comment head | |
468 -> "/*" | |
469 -> comment head, ~eof | |
470 | |
471 comment, comment head | |
472 -> comment head, comment ={if (nest_comments) CHANGE_REDUCTION(comment_head);} | |
473 | |
474 operator | |
475 -> '&', '&' =*scanner_sink << op(ANDAND); | |
476 -> '&', '=' =*scanner_sink << op(ANDassign); | |
477 -> '-', '>' =*scanner_sink << op(ARROW); | |
478 -> '#', '#' =*scanner_sink << op(CONCAT); | |
479 -> '-', '-' =*scanner_sink << op(DECR); | |
480 -> '/', '=' =*scanner_sink << op(DIVassign); | |
481 -> '.', '.', '.' =*scanner_sink << op(ELLIPSIS); | |
482 -> '=', '=' =*scanner_sink << op(EQ); | |
483 -> '^', '=' =*scanner_sink << op(ERassign); | |
484 -> '>', '=' =*scanner_sink << op(GE); | |
485 -> '+', '+' =*scanner_sink << op(ICR); | |
486 -> '<', '=' =*scanner_sink << op(LE); | |
487 -> '<', '<' =*scanner_sink << op(LS); | |
488 -> '<', '<', '=' =*scanner_sink << op(LSassign); | |
489 -> '%', '=' =*scanner_sink << op(MODassign); | |
490 -> '-', '=' =*scanner_sink << op(MINUSassign); | |
491 -> '*', '=' =*scanner_sink << op(MULTassign); | |
492 -> '!', '=' =*scanner_sink << op(NE); | |
493 -> '|', '=' =*scanner_sink << op(ORassign); | |
494 -> '|', '|' =*scanner_sink << op(OROR); | |
495 -> '+', '=' =*scanner_sink << op(PLUSassign); | |
496 -> '>', '>' =*scanner_sink << op(RS); | |
497 -> '>', '>', '=' =*scanner_sink << op(RSassign); | |
498 | |
499 | |
500 // Numeric constants | |
501 | |
502 qualified real | |
503 -> real constant, floating qualifier =*scanner_sink << tkn(FLOATconstant); | |
504 | |
505 real constant | |
506 -> real | |
507 | |
508 floating qualifier | |
509 -> | |
510 -> 'f' + 'F' =sa << 'F'; | |
511 -> 'l' + 'L' =sa << 'L'; | |
512 | |
513 real | |
514 -> simple real | |
515 -> simple real, exponent | |
516 -> confusion, exponent | |
517 -> decimal integer, exponent | |
518 | |
519 simple real | |
520 -> confusion, '.' =sa << '.'; | |
521 -> octal integer, '.' | |
522 -> decimal integer, '.' =sa << '.'; | |
523 -> '.', '0-9':d =++sa << '.' << d; | |
524 -> simple real, '0-9':d =sa << d; | |
525 | |
526 confusion | |
527 -> octal integer, '8-9':d =sa << d; | |
528 -> confusion, '0-9':d =sa << d; | |
529 | |
530 exponent | |
531 -> 'e' + 'E', '-', '0-9':d =sa << '-' << d; | |
532 -> 'e' + 'E', '+'?, '0-9':d =sa << '+' << d; | |
533 -> exponent, '0-9':d =sa << d; | |
534 | |
535 integer qualifier | |
536 -> 'u' + 'U' =sa << 'U'; | |
537 -> 'l' + 'L' =sa << 'L'; | |
538 | |
539 integer constant | |
540 -> octal constant =*scanner_sink << tkn(OCTconstant); | |
541 -> decimal constant =*scanner_sink << tkn(DECconstant); | |
542 -> hex constant =*scanner_sink << tkn(HEXconstant); | |
543 | |
544 octal constant | |
545 -> octal integer | |
546 -> octal constant, integer qualifier | |
547 | |
548 octal integer | |
549 -> '0' =++sa << '0'; | |
550 -> octal integer, '0-7':d =sa << d; | |
551 | |
552 hex constant | |
553 -> hex integer | |
554 -> hex constant, integer qualifier | |
555 | |
556 hex integer | |
557 -> '0', 'x' + 'X', hex digit:d =++sa << "0X" << d; | |
558 -> hex integer, hex digit:d =sa << d; | |
559 | |
560 decimal constant | |
561 -> decimal integer | |
562 -> decimal constant, integer qualifier | |
563 | |
564 decimal integer | |
565 -> '1-9':d =++sa << d; | |
566 -> decimal integer, '0-9':d =sa << d; | |
567 | |
568 | |
569 // String Literals and Character Constants | |
570 | |
571 string literal | |
572 -> string chars, '"' =sa << '"'; | |
573 | |
574 string chars | |
575 -> '"' =++sa << '"'; | |
576 -> string chars, string char:c =sa << c; | |
577 -> string chars, '\\', ~eof - '\n':c =sa << '\\' << c; | |
578 -> string chars, '\\', '\n' | |
579 | |
580 | |
581 // Character constants | |
582 | |
583 character constant | |
584 -> simple chars, '\'' =sa << '\''; | |
585 | |
586 simple chars | |
587 -> '\'' =++sa << '\''; | |
588 -> simple chars, simple char:c = sa << c; | |
589 -> simple chars, '\\', ~eof - '\n': c = sa << '\\' << c; | |
590 -> simple chars, '\\', '\n' | |
591 | |
592 | |
593 // Identifiers | |
594 | |
595 name string | |
596 -> letter:c =++sa << c; | |
597 -> name string, letter+digit:c =sa << c; | |
598 | |
599 | |
600 { // Embedded C | |
601 #include "array.h" // \AnaGram\classlib\include\array.h | |
602 #include "stack.h" // \AnaGram\classlib\include\stack.h | |
603 #if defined(__MSDOS__) || defined(__WIN32__) | |
604 #include <io.h> // If not found, not necessary | |
605 #endif | |
606 #include <sys/types.h> // If not found, not necessary | |
607 #include <sys/stat.h> | |
608 #include <fcntl.h> | |
609 | |
610 | |
611 // Macro Definitions | |
612 | |
613 #define SYNTAX_ERROR syntax_error_scanning(PCB.error_message) | |
614 #define GET_CONTEXT (CONTEXT.line = PCB.line, CONTEXT.column = PCB.column) | |
615 #define GET_INPUT (PCB.input_code = getc(input.file)) | |
616 #define PCB input.pcb | |
617 | |
618 | |
619 // Structure Definition | |
620 | |
621 struct file_descriptor { | |
622 char *name; // name of file | |
623 FILE *file; // source of input characters | |
624 ts_pcb_type pcb; // parser control block for file | |
625 }; | |
626 | |
627 typedef stack<file_descriptor> file_descriptor_stack; | |
628 | |
629 // Static Data Declarations | |
630 | |
631 static const char *error_modifier = ""; | |
632 static file_descriptor input; | |
633 static stack<token_sink *> save_sink(5); | |
634 static file_descriptor_stack active_files(20); | |
635 | |
636 // Syntax Error Reporting | |
637 /* | |
638 syntax_error() provides an error diagnostic procedure for those | |
639 parsers which are called by the token scanner. error_modifier is set | |
640 by expand() so that an error encountered during a macro expansion | |
641 will be so described. Otherwise, the diagnostic will not make | |
642 sense. | |
643 | |
644 Since all other parsers are called from reduction procedures, the | |
645 line and column number of the token they are dealing with is given | |
646 by the context of the token scanner production that is being | |
647 reduced. | |
648 */ | |
649 | |
650 void syntax_error(const char *msg) { | |
651 printf("%s: Line %d, Column %d: %s%s\n", | |
652 input.name, CONTEXT.line, CONTEXT.column, msg, error_modifier); | |
653 } | |
654 | |
655 /* | |
656 syntax_error_scanning() provides an error diagnostic procedure for | |
657 the token scanner itself. The locus of the error is given by the | |
658 current line and column number of the token scan, as given in the | |
659 parser control block. | |
660 */ | |
661 | |
662 static void syntax_error_scanning(const char *msg) { | |
663 printf("%s: Line %d, Column %d: %s\n", | |
664 input.name, PCB.line, PCB.column, msg); | |
665 } | |
666 | |
667 | |
668 // Support for Reduction Procedures | |
669 /* | |
670 name_token() looks up the name string in the string accumulator, | |
671 identifies it in the token dictionary, checks to see if it is a | |
672 reserved word, and creates a token. | |
673 */ | |
674 | |
675 static token name_token(void) { | |
676 token t; | |
677 t.id = NAME; | |
678 t.handle = td << sa.top(); | |
679 --sa; | |
680 if (t.handle <= n_reserved_words) t.id = reserved_words[t.handle].id; | |
681 return t; | |
682 } | |
683 | |
684 /* | |
685 op() creates a token for a punctuation character. | |
686 */ | |
687 | |
688 static token op(unsigned x) { | |
689 token t; | |
690 t.id = (token_id) x; | |
691 t.handle = token_handles[x]; | |
692 return t; | |
693 } | |
694 | |
695 /* | |
696 space_op() creates a token for a space character. Note that a space | |
697 could be a tab, vertical tab, or form feed character as well as a | |
698 blank. | |
699 */ | |
700 | |
701 static token space_op(unsigned x) { | |
702 token t; | |
703 t.id = (token_id) ' '; | |
704 t.handle = token_handles[x]; | |
705 return t; | |
706 } | |
707 | |
708 /* | |
709 tkn() creates a token with a specified id for the string on the top | |
710 of the string accumulator | |
711 */ | |
712 | |
713 static token tkn(token_id id) { | |
714 token t; | |
715 t.id = id; | |
716 t.handle = td << sa.top(); | |
717 --sa; | |
718 return t; | |
719 } | |
720 | |
721 | |
722 // Macro Processing Procedures | |
723 | |
724 /* | |
725 check_defined() looks up the name on the string accumulator to see if | |
726 it is the name of a macro. It then selects a reduction token according | |
727 to the outcome of the test and an input flag. | |
728 */ | |
729 | |
730 static void check_defined(int flag) { | |
731 unsigned id = macro_id[td[sa.top()]]; | |
732 --sa; | |
733 flag ^= id != 0; | |
734 if (flag) CHANGE_REDUCTION(false_condition); | |
735 else CHANGE_REDUCTION(true_condition); | |
736 } | |
737 | |
738 /* | |
739 defined() returns a decimal constant token equal to one or zero | |
740 depending on whether the token named on the string accumulator is or | |
741 is not defined as a macro | |
742 */ | |
743 | |
744 static token defined(void) { | |
745 unsigned id = macro_id[td[sa.top()]]; | |
746 token t; | |
747 t.id = DECconstant; | |
748 t.handle = id ? one_value : zero_value; | |
749 --sa; | |
750 return t; | |
751 } | |
752 | |
753 /* | |
754 expand() expands and outputs a macro. t.handle is the token dictionary | |
755 index of the macro name. n is the number of arguments found. | |
756 | |
757 Since it is possible that scanner sink is pointing to ta, it is | |
758 necessary to pop the expanded macro from ta before passing it on to | |
759 scanner_sink. Otherwise, we would have effectively ta << ta, a | |
760 situation which causes an infinite loop. | |
761 */ | |
762 | |
763 static void expand(token t, unsigned n) { | |
764 error_modifier = " in macro expansion"; // fix error diagnostic | |
765 expand_macro(t,n); // Defined in MAS.SYN | |
766 if (size(ta)) { | |
767 array<token> x(ta,size(ta) + 1); | |
768 --ta; | |
769 *scanner_sink << x; | |
770 } else --ta; | |
771 error_modifier = ""; | |
772 } | |
773 | |
774 /* | |
775 Look up the name string on the string accumulator. Determine whether | |
776 it is a reserved word, or a simple identifier. Then determine | |
777 whether it is the name of a macro. | |
778 */ | |
779 | |
780 static token id_macro(void) { | |
781 token t; | |
782 unsigned id; | |
783 | |
784 t.id = NAME; | |
785 t.handle = td << sa.top(); | |
786 --sa; | |
787 if (t.handle <= n_reserved_words) t.id = reserved_words[t.handle].id; | |
788 | |
789 if (if_clause && t.handle == defined_value) { | |
790 CHANGE_REDUCTION(defined); | |
791 return t; | |
792 } | |
793 id = macro_id[t.handle]; | |
794 if (id == 0) return t; | |
795 | |
796 if (macro[id].parens) CHANGE_REDUCTION(macro); | |
797 else CHANGE_REDUCTION(simple_macro); | |
798 return t; | |
799 } | |
800 | |
801 /* | |
802 Start a macro definition. This procedure defines all but the body of | |
803 the macro. | |
804 | |
805 nargs is the count of parameters that were found. flag is set if | |
806 the macro was defined with parentheses. | |
807 | |
808 The parameter names are on the string accumulator, with the last | |
809 name on the top of the stack, so they must be popped off, identified | |
810 and stored in reverse order. | |
811 | |
812 The name of the macro is beneath the parameter names on the string | |
813 accumulator. | |
814 | |
815 Before returning, this procedure saves the current value of | |
816 scanner_sink, increments the level on the token stack and sets | |
817 scanner_sink so that subsequent tokens produced by the token scanner | |
818 will accumulate on the token stack. These tokens comprise the body | |
819 of the macro. When the end of the macro body is encountered, the | |
820 procedure save_macro_body will remove them from the token stack and | |
821 restore the value of scanner_sink. | |
822 */ | |
823 | |
824 static int init_macro_def(int nargs, int flag) { | |
825 int k; | |
826 int id = ++n_macros; | |
827 unsigned name; | |
828 unsigned *arg_list = nargs ? new unsigned[nargs] : NULL; | |
829 | |
830 assert(id < N_MACROS); | |
831 for (k = nargs; k--;) { | |
832 arg_list[k] = td << sa.top(); | |
833 --sa; | |
834 } | |
835 | |
836 macro[id].arg_names = arg_list; | |
837 macro[id].n_args = nargs; | |
838 | |
839 macro[id].name = name = td << sa.top(); | |
840 --sa; | |
841 | |
842 macro_id[name] = id; | |
843 | |
844 macro[id].busy_flag = 0; | |
845 macro[id].parens = flag ; | |
846 | |
847 save_sink << scanner_sink; | |
848 scanner_sink = &++ta; | |
849 return id; | |
850 } | |
851 | |
852 /* | |
853 save_macro_body() finishes the definition of a macro by making a | |
854 permanent copy of the token string on the token accumulator. It then | |
855 restores the scanner_sink to the value it had when the macro | |
856 definition was encountered. | |
857 */ | |
858 | |
859 static void save_macro_body(int id) { | |
860 macro[id].body = size(ta) ? copy(ta) : NULL; | |
861 --ta; | |
862 save_sink >> scanner_sink; | |
863 } | |
864 | |
865 /* | |
866 undefine() deletes the macro definition for the macro whose name is | |
867 on the top of the string accumulator. If there is no macro with the | |
868 given name, undefine simply returns. | |
869 | |
870 Otherwise, it frees the storage associated with the macro. It then | |
871 fills the resulting hole in the table with the last macro in the | |
872 table. The macro_id table is updated appropriately. | |
873 */ | |
874 | |
875 static void undefine(void) { | |
876 unsigned name = td << sa.top(); | |
877 int id = macro_id[name]; | |
878 --sa; | |
879 if (id == 0) return; | |
880 macro_id[name] = 0; | |
881 if (macro[id].arg_names) delete [] macro[id].arg_names; | |
882 if (macro[id].body) delete [] macro[id].body; | |
883 macro[id] = macro[n_macros--]; | |
884 macro_id[macro[id].name] = id; | |
885 } | |
886 | |
887 | |
888 // Include file procedures | |
889 | |
890 /* | |
891 file_name() interprets the file name provided by an #include | |
892 statement. If the file name is enclosed in <> brackets it scans the | |
893 directory list in paths to try to find the file. If it finds it, it | |
894 prefixes the path to the file name. | |
895 | |
896 If the file name is enclosed in "" quotation marks, file_name() | |
897 simply strips the quotation marks. | |
898 | |
899 If file_name() succeeds, it returns 1 and provides path-name in the | |
900 string accumulator, otherwise it returns 0 and nothing in the string | |
901 accumulator. | |
902 | |
903 Note that file name uses a temporary string accumulator, lsa. | |
904 */ | |
905 | |
906 static int file_name(char *file) { | |
907 int c; | |
908 int tc; | |
909 string_accumulator lsa(100); // for temporary storage of name | |
910 | |
911 while (*file == ' ') file++; | |
912 tc = *file++; | |
913 if (tc == '<') tc = '>'; | |
914 else if (tc != '"') return 0; | |
915 while ((c = *file++) != 0 && c != tc) lsa << c; | |
916 if (c != tc) return 0; | |
917 if (tc == '"') { | |
918 int k, n; | |
919 active_files << input; | |
920 n = size(active_files); | |
921 | |
922 while (n--) { | |
923 FILE *f; | |
924 #ifdef _MSC_VER //Cope with peculiarity of MSVC++ | |
925 char *cp; | |
926 int junk; | |
927 | |
928 ++sa << ((file_descriptor *)active_files)[n].name; | |
929 k = size(sa); | |
930 cp = (char *)sa; | |
931 while (k-- && cp[k] != '\\' && cp[k] != '/') { sa >> junk;} | |
932 #else | |
933 ++sa << active_files[n].name; | |
934 while (size(sa) && sa[0] != '\\' && sa[0] != '/') { | |
935 sa >> k; // strip off current file name to leave only path | |
936 } | |
937 #endif | |
938 sa << lsa; // append desired file name | |
939 f = fopen(sa.top(),"rt"); | |
940 if (f != NULL) { | |
941 fclose(f); | |
942 active_files >> input; | |
943 return 1; | |
944 } | |
945 --sa; | |
946 } | |
947 active_files >> input; | |
948 } | |
949 int k, n; | |
950 n = size(paths); | |
951 for (k = 0; k < n; k++) { | |
952 FILE *f; | |
953 | |
954 #ifdef _MSC_VER //Cope with peculiarity of MSVC++ | |
955 ++sa << ((char **) paths)[k]; | |
956 char c = ((char *)sa)[size(sa)-1]; | |
957 if (size(sa) && c != '\\' && c != '/') sa << '/'; | |
958 #else | |
959 ++sa << paths[k]; | |
960 if (size(sa) && sa[0] != '\\' && sa[0] != '/') sa << '/'; | |
961 #endif | |
962 sa << lsa; | |
963 f = fopen(sa.top(),"rt"); | |
964 if (f != NULL) { | |
965 fclose(f); | |
966 return 1; | |
967 } | |
968 --sa; | |
969 } | |
970 return 0; | |
971 } | |
972 | |
973 /* | |
974 include_file() is called in response to a #include statement. | |
975 | |
976 First, it saves the file_descriptor for the current input. Then it | |
977 restores the scanner_sink which was saved prior to accumulating | |
978 macro expanded tokens on the token_accumulator. | |
979 | |
980 When include_file() is called, the argument of the #include | |
981 statement exists in the form of tokens on the token accumulator. | |
982 These tokens are passed to a token_translator which turns the tokens | |
983 into a string on the string accumulator. | |
984 | |
985 file_name() is then called to distinguish between "" and <> files. | |
986 In the latter case, file_name() prefixes a directory path to the name. | |
987 The name is then in the string accumulator. | |
988 | |
989 scan_input() is then called to scan the include file. | |
990 | |
991 Finally, before returning, the previous file_descriptor is restored. | |
992 */ | |
993 | |
994 static void include_file(void) { | |
995 int flag; | |
996 | |
997 save_sink >> scanner_sink; // restore scanner_sink | |
998 | |
999 token_translator tt(&++sa); | |
1000 tt << ta; // recover string from tokens | |
1001 --ta; // discard token string | |
1002 | |
1003 array<char> file(sa.top(), size(sa)+1); // local copy of string | |
1004 --sa; | |
1005 | |
1006 flag = file_name(file); | |
1007 | |
1008 if (!flag) { | |
1009 fprintf(stderr, "Bad include file name: %s\n", (char *) file); | |
1010 return; | |
1011 } | |
1012 array<char> path(sa.top(), size(sa) + 1); | |
1013 --sa; | |
1014 active_files << input; // Save current file | |
1015 scan_input(path); // recursive call to ts() | |
1016 active_files >> input; // Restore previous file | |
1017 return; | |
1018 } | |
1019 | |
1020 | |
1021 // Conditional compilation procedures | |
1022 | |
1023 /* | |
1024 init_condition() prepares for evaluation the condition expression in | |
1025 #if and #elif statements. | |
1026 | |
1027 It protects scanner_sink by pushing it onto the save_sink stack. | |
1028 Then it resets the expression evaluatior, condition, and sets | |
1029 scanner_sink to point to it. | |
1030 | |
1031 Finally it sets the if_clause flag so that defined() will be handled | |
1032 properly. | |
1033 */ | |
1034 | |
1035 static void init_condition(void) { | |
1036 save_sink << scanner_sink; | |
1037 scanner_sink = &reset(condition); | |
1038 if_clause = 1; | |
1039 } | |
1040 | |
1041 /* | |
1042 eval_condition() is called to deal with #if and #elif statements. The | |
1043 init_condition() procedure has redirected scanner output to the | |
1044 expression evaluator, so eval_condition() restores the previous | |
1045 scanner destination. | |
1046 | |
1047 It then sends an eof token to the expression evaluator, resets | |
1048 if_clause and reads the value of the condition. Remember that | |
1049 (long) condition returns the value of the expression. | |
1050 */ | |
1051 | |
1052 static int eval_condition(void) { | |
1053 save_sink >> scanner_sink; | |
1054 condition << op(0); // eof to exp evaluator | |
1055 if_clause = 0; | |
1056 return condition != 0L; | |
1057 } | |
1058 | |
1059 /* | |
1060 In eval_if() and eval_elif() note the use of CHANGE_REDUCTION to | |
1061 select the appropriate reduction token depending on the outcome of | |
1062 the condition. | |
1063 */ | |
1064 | |
1065 static void eval_elif(void) { | |
1066 if (eval_condition()) CHANGE_REDUCTION(true_else_condition); | |
1067 else CHANGE_REDUCTION(false_else_condition); | |
1068 } | |
1069 | |
1070 static void eval_if(void) { | |
1071 if (eval_condition()) CHANGE_REDUCTION(true_condition); | |
1072 else CHANGE_REDUCTION(false_condition); | |
1073 } | |
1074 | |
1075 | |
1076 // Do token scan | |
1077 | |
1078 /* | |
1079 scan_input() | |
1080 1) opens the specified file, if possible | |
1081 2) calls the parser | |
1082 3) closes the input file | |
1083 */ | |
1084 | |
1085 void scan_input(char *path) { | |
1086 input.file = fopen(path, "rt"); | |
1087 input.name = path; | |
1088 if (input.file == NULL) { | |
1089 fprintf(stderr,"Cannot open %s\n", (char *) path); | |
1090 return; | |
1091 } | |
1092 ts(); | |
1093 fclose(input.file); | |
1094 } | |
1095 | |
1096 } // End of Embedded C |