comparison examples/mpp/ts.syn @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.
author David A. Holland
date Sat, 22 Dec 2007 17:52:45 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:13d2b8934445
1 {
2 /*
3 * AnaGram, a System for Syntax Directed Programming
4 * C Macro preprocessor and parser
5 * TS.SYN: Token Scanner Module
6 *
7 * Copyright 1993-2000 Parsifal Software. All Rights Reserved.
8 *
9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the authors be held liable for any damages
11 * arising from the use of this software.
12 *
13 * Permission is granted to anyone to use this software for any purpose,
14 * including commercial applications, and to alter it and redistribute it
15 * freely, subject to the following restrictions:
16 *
17 * 1. The origin of this software must not be misrepresented; you must not
18 * claim that you wrote the original software. If you use this software
19 * in a product, an acknowledgment in the product documentation would be
20 * appreciated but is not required.
21 * 2. Altered source versions must be plainly marked as such, and must not be
22 * misrepresented as being the original software.
23 * 3. This notice may not be removed or altered from any source distribution.
24 */
25
26 #include "mpp.h"
27
28
29 // context structure for diagnostics
30
31 struct location { unsigned line, column; };
32
33 }
34
35
36 // Configuration section
37
38 [
39 // far tables // uncomment for 16 bit environment
40 context type = location // request context tracking
41 ~allow macros // function defs for red procs
42 auto resynch
43 line numbers // #line statements in output
44 error trace // build trace on syntax error
45 ~test range // not necessary
46 ~declare pcb // pcb declared manually
47 ~error frame // not wanted for diagnostics
48
49 subgrammar { // this subgrammar statement
50 simple token, // will be removed and replaced
51 expanded token, // with "disregard ws" and
52 initial arg element, // lexeme statements in the
53 ws, // next revision
54 eol,
55 macro definition header,
56 }
57 parser file name = "#.cpp"
58 ]
59
60
61 // Character Set Definitions
62
63 any text char = ~eof - newline - '\\'
64 ascii = 1..126
65 blank = ' ' + '\t' + '\r' + '\f' + '\v'
66 digit = '0-9'
67 eof = -1 + 0
68 hex digit = '0-9' + 'A-F' + 'a-f'
69 newline = '\n'
70 letter = 'a-z' + 'A-Z' + '_'
71 not punctuation = '#' + blank + letter + digit + '\'' + '"' + newline + '\\'
72 punctuation = ascii - not punctuation
73 simple char = ~eof - ('\'' + '\\' + '\n')
74 string char = ~eof - ('"' + '\\' + '\n')
75
76
77 // Grammar, or Start token
78
79 /*
80 The macro/#include structure of a C/C++ program is line oriented, so the
81 main grammar treats the input file as logical sections, separated by
82 any number of new lines.
83
84 eol is defined so that it accepts any number of blank lines and any leading
85 spaces on the first following nonblank line. Lines containing only comments
86 are considered blank lines.
87
88 Input text, as it is recognized is sunk through the scanner_sink pointer.
89 scanner_sink is normally the output of the scanner, but when it is necessary
90 to accumulate text, as for a macro definition, scanner_sink is switched to
91 direct text to a buffer. When the end of the macro definition is encountered,
92 the scanner_sink is switched back to its previous setting.
93 */
94
95 input file $ // Grammar Token
96 -> [section | eol]/..., eof // Alternating sequence
97
98 eol
99 -> newline, [newline | space]...
100
101
102 /*
103 Conditional Compilation Control
104
105 This is the portion of the grammar that parsers #if/#ifdef#ifndef/#elif/#else/#endif
106 and determines which lines of text are to be passed on for further processing
107 and which are to be simply ignored.
108
109 A "section" is any nonblank line of input, or an if/endif block of lines
110 that should be passed on to the C compiler.
111
112 A "skip_section" is a non blank line of input or and if/endif block of lines
113 that should be passed over and ignored.
114
115 "expanded token" represents the _result_ of macro substitution
116
117 "control line" is any line beginning with # that is not an if/elsif/endif
118 line.
119
120 A conditional block is everything from an #if, #ifdef, #ifndef to the
121 matching #endif
122 */
123
124 section
125 -> expanded token... =*scanner_sink << op('\n');
126 -> control line
127 -> conditional block
128
129 /*
130 There are basically two syntaxes for the body of any block of text
131 controlled by an #if statement: One syntax, "true if section", to be used
132 if the if condition is true and one, "false if section", to be used to skip
133 over it if the condition is false. In like manner, there are two syntaxes
134 for the body of the else block: "skip else section" to be used when the
135 if condition is true and "else section", to be used when the if condition
136 is false.
137
138 The syntax for "conditional block" enumerates all possible combinations.
139
140 This simple analysis is complicated by the existence of the #elif statement.
141 This complication occasions a moderately complex cross recursion between
142 "true if section" and "false if section".
143
144 Note that a "false if section" is a false #if line followed by all
145 statements up to an #else statement or a true #elif line. A "true if
146 section" consists of a true #if line followed by everything up to the
147 next matching #elif or #else line, or it consists of false if sections
148 followed eventually by a true #elif line and then subsequent lines
149 up to a following #else or #elif line.
150
151 "skip section" is syntax to skip over any text including matched
152 #if/#ifdef/#ifndef, #endif pairs.
153 */
154
155 conditional block
156 -> true if section, eol, endif line
157 -> true if section, eol, skip else section, eol, endif line
158 -> false if section, eol, endif line
159 -> false if section, eol, else section, eol, endif line
160
161 true if section
162 -> true condition
163 -> true if section, eol, section
164 -> false if section, eol, true else condition
165
166 false if section
167 -> false condition
168 -> false if section, eol, skip section
169 -> false if section, eol, false else condition
170
171 /*
172 "else section" handles lines of text (and nested #if/#endif sections
173 starting with an #else line. "else section" should always be followed
174 in any syntactic use by eol, endif line to terminate the looping.
175 */
176
177 else section
178 -> '#', ws?, "else", ws?
179 -> else section, eol, section
180
181 endif line
182 -> '#', ws?, "endif", ws?
183
184 /*
185 "skip section" skips a single line, or an entire if/endif block
186 */
187
188 skip section
189 -> skip line
190 -> skip if section, eol, endif line
191
192 /*
193 "skip if section" can be terminated only by an "endif line"
194 Note that it simply skips over #else and #elif lines, since
195 they are immaterial in context.
196 */
197
198 skip if section
199 -> '#', ws?, {"if" | "ifdef" | "ifndef"}, any text?...
200 -> skip if section, eol, skip section
201 -> skip if section, eol, skip else line
202
203 /*
204 "skip else section" begins with an #elif or #else line and continues
205 to a terminating #endif line.
206 */
207
208 skip else section
209 -> skip else line
210 -> skip else section, eol, skip else line
211 -> skip else section, eol, skip section
212
213 skip else line
214 -> '#', ws?, "elif", any text?...
215 -> '#', ws?, "else", ws?
216
217 /*
218 "skip line" parses over and ignores any line that is not an
219 #if, #elif, #else, or #endif line.
220 */
221
222 skip line
223 -> '#', ws?, [{"define" | "undefine" | "include" | "line" |
224 "error" | "pragma"}, any text?...]
225 -> not control mark, any text?...
226
227 any text
228 -> any text char
229 -> '\\', ~eof
230
231 not control mark
232 -> any text char - '#'
233 -> '\\', ~eof
234
235
236 /*
237 Conditional Control Lines
238
239 A semantically determined production is used to determine whether
240 an #if, #ifdef, or #ifndef line should be treated as a true
241 condition or a false condition. #ifdef and #ifndef can be
242 resolved simply by determining whether a symbol has or has
243 not been defined. #if is more complex and requires
244 evaluation of a constant expression. It does this by passing
245 the expanded argument string to the expression evaluator
246 in ex.syn
247 */
248
249 true condition, false condition
250 -> '#', ws?, "ifdef", ws, name string, ws? =check_defined(1);
251 -> '#', ws?, "ifndef", ws, name string, ws? =check_defined(0);
252 -> '#', ws?, if header, expanded token... =eval_if();
253
254 true else condition, false else condition
255 -> '#', ws?, else if header, expanded token... =eval_elif();
256
257 /*
258 "if header" and "else if header" are simple wrapper tokens to
259 provide a hook for marking the beginning of the text that
260 is to be used by the expression evaluator. The init_condition()
261 function handles this task.
262 */
263
264 if header
265 -> "if", ws =init_condition();
266
267 else if header
268 -> "elif", ws =init_condition();
269
270
271 /*
272 Other Control Lines
273
274 Relative to the complexity of the if/elif/else/endif logic, other
275 control lines are moderately straightforward.
276
277 The #include file is handled simply by stacking the current file
278 position and opening the indicated file. When end of file is
279 encountered on the newly opened file, the current file position
280 will be unstacked and parsing will continue as though nothing
281 had happened.
282
283 Note that there is nothing that requires that an #if/#ifdef#ifndef
284 and the matching #endif be in the same file.
285
286 #undef is trivial
287
288 #line, #error, #pragma lines are simply ignored
289
290 #define has substantial struture. It begins with the header portion
291 that identifies the macro to be defined and coninues with the
292 body of the macro definition. "macro definition header" identifies
293 the name and type of the macro and initializes the accumulation of
294 the tokens which comprise the body of the macro.
295 */
296
297 control line
298 -> include header, expanded token... =include_file();
299 -> '#', ws?, "undef", ws, name string, ws? =undefine();
300 -> '#', ws?, [{"line" | "error" | "pragma"}, any text?...]
301 -> macro definition header:id, simple token?... =save_macro_body(id);
302
303 include header
304 -> '#', ws?, "include" =save_sink << scanner_sink, scanner_sink = &++ta;
305
306
307 /*
308 Macro Definitions
309
310 There are two types of macros: those with arguments and those without.
311 They are rather different in the way they have to be handled.
312 */
313
314 (int) macro definition header
315 -> '#', ws?, "define", ws, name string =init_macro_def(0,0);
316 -> '#', ws?, "define", ws, name string,
317 '(', ws?, parameter list:n, ')' =init_macro_def(n,1);
318
319 (int) parameter list
320 -> =0;
321 -> names, ws?
322
323 (int) names
324 -> name string =1;
325 -> names:n, ws?, ',', ws?, name string =n+1;
326
327 /*
328 Unexpanded text (for macro definitions)
329
330 If there are macro invocations in the body of a macro, they are
331 supposed to be expanded only when the macro itself is invoked, not
332 when the macro is defined. This means that the processing of
333 the body of the macro definition has to be different from the
334 processing of ordinary text.
335
336 simple token is a token as it appears in the input stream.
337
338 expanded token is the result of passing a token through macro
339 expansion.
340 */
341
342 simple token
343 -> space:c =*scanner_sink << space_op(c);
344 -> word
345 -> separator
346 -> '#' =*scanner_sink << op('#');
347 -> qualified real
348 -> integer constant
349
350 word
351 -> name string =*scanner_sink << name_token();
352
353
354 // Expanded text
355
356 expanded token
357 -> expanded word
358 -> separator
359 -> space
360 -> qualified real
361 -> integer constant
362
363 /*
364 The semantically determined production below classifies a name string
365 into the categories variable, simple macro, macro or defined (as in #if defined(x))
366 so that the parser can do appropriate follow up parsing.
367
368 expand() is called to expand macros. Note that a macro that is defined with
369 an parameter list (whether or not the list is empty) is not expanded unless
370 it is invoked with a parameter list.
371 */
372
373 expanded word
374 -> variable:t =*scanner_sink << t;
375 -> simple macro:t =expand(t,0);
376 -> macro:t, ws? =*scanner_sink << t;
377 -> macro:t, ws?, '(', ws?, macro arg list:n, ')' =expand(t,n);
378 -> defined, ws?, '(', ws?, name string, ws?, ')' =*scanner_sink << defined();
379 -> defined, ws, name string =*scanner_sink << defined();
380
381
382 // Name classification
383
384 (token) variable, simple macro, macro, defined
385 -> name string =id_macro();
386
387
388 /*
389 Macro Arguments
390
391 Macro arguments are accumulated as separate token strings on the
392 token accumulator stack.
393 */
394
395
396 (int) macro arg list
397 -> =0;
398 -> !save_sink << scanner_sink, scanner_sink = &ta;, macro args:n =
399 save_sink >> scanner_sink, n;
400
401 (int) macro args
402 -> increment ta, arg elements =1;
403 -> macro args:n, ',', ws?, increment ta, arg elements =n+1;
404
405 // increment ta could be replaced with an "immediate action"
406
407 (void) increment ta
408 -> /* Null Production */ =++ta;
409
410 /*
411 The following is somewhat complex partly to skip leading space.
412 */
413
414 arg elements
415 -> initial arg element
416 -> arg elements, arg element
417
418 arg element
419 -> space:c =*scanner_sink << space_op(c);
420 -> initial arg element
421
422 initial arg element
423 -> name string =*scanner_sink << name_token();
424 -> qualified real
425 -> integer constant
426 -> string literal =*scanner_sink << tkn(STRINGliteral);
427 -> character constant =*scanner_sink << tkn(CHARACTERconstant);
428 -> operator
429 -> punctuation - '(' - ',' - ')':p =*scanner_sink << op(p);
430 -> nested elements, ')':t =*scanner_sink << op(t);
431
432 nested elements
433 -> '(':t =*scanner_sink << op(t);
434 -> nested elements, arg element
435 -> nested elements, ',':t =*scanner_sink << op(t);
436
437
438 /*
439 Basic lexical elements
440
441 The remainder of the syntax file consists of the definitions of the
442 basic lexical elements of C.
443
444 The basic lexical elements are simply copied to the scanner_sink as
445 they are encountered. Note that it is not the character string itself
446 that goes to the scanner_sink but rather a token which consists of
447 a type identification and a handle that can be used to recover the
448 string from a dictionary.
449 */
450
451 separator
452 -> string literal =*scanner_sink << tkn(STRINGliteral);
453 -> character constant =*scanner_sink << tkn(CHARACTERconstant);
454 -> operator
455 -> punctuation:p =*scanner_sink << op(p);
456 -> '\\', '\n'
457
458 (int) space
459 -> blank
460 -> comment =' ';
461
462 ws = space...
463
464 comment
465 -> comment head, "*/"
466
467 comment head
468 -> "/*"
469 -> comment head, ~eof
470
471 comment, comment head
472 -> comment head, comment ={if (nest_comments) CHANGE_REDUCTION(comment_head);}
473
474 operator
475 -> '&', '&' =*scanner_sink << op(ANDAND);
476 -> '&', '=' =*scanner_sink << op(ANDassign);
477 -> '-', '>' =*scanner_sink << op(ARROW);
478 -> '#', '#' =*scanner_sink << op(CONCAT);
479 -> '-', '-' =*scanner_sink << op(DECR);
480 -> '/', '=' =*scanner_sink << op(DIVassign);
481 -> '.', '.', '.' =*scanner_sink << op(ELLIPSIS);
482 -> '=', '=' =*scanner_sink << op(EQ);
483 -> '^', '=' =*scanner_sink << op(ERassign);
484 -> '>', '=' =*scanner_sink << op(GE);
485 -> '+', '+' =*scanner_sink << op(ICR);
486 -> '<', '=' =*scanner_sink << op(LE);
487 -> '<', '<' =*scanner_sink << op(LS);
488 -> '<', '<', '=' =*scanner_sink << op(LSassign);
489 -> '%', '=' =*scanner_sink << op(MODassign);
490 -> '-', '=' =*scanner_sink << op(MINUSassign);
491 -> '*', '=' =*scanner_sink << op(MULTassign);
492 -> '!', '=' =*scanner_sink << op(NE);
493 -> '|', '=' =*scanner_sink << op(ORassign);
494 -> '|', '|' =*scanner_sink << op(OROR);
495 -> '+', '=' =*scanner_sink << op(PLUSassign);
496 -> '>', '>' =*scanner_sink << op(RS);
497 -> '>', '>', '=' =*scanner_sink << op(RSassign);
498
499
500 // Numeric constants
501
502 qualified real
503 -> real constant, floating qualifier =*scanner_sink << tkn(FLOATconstant);
504
505 real constant
506 -> real
507
508 floating qualifier
509 ->
510 -> 'f' + 'F' =sa << 'F';
511 -> 'l' + 'L' =sa << 'L';
512
513 real
514 -> simple real
515 -> simple real, exponent
516 -> confusion, exponent
517 -> decimal integer, exponent
518
519 simple real
520 -> confusion, '.' =sa << '.';
521 -> octal integer, '.'
522 -> decimal integer, '.' =sa << '.';
523 -> '.', '0-9':d =++sa << '.' << d;
524 -> simple real, '0-9':d =sa << d;
525
526 confusion
527 -> octal integer, '8-9':d =sa << d;
528 -> confusion, '0-9':d =sa << d;
529
530 exponent
531 -> 'e' + 'E', '-', '0-9':d =sa << '-' << d;
532 -> 'e' + 'E', '+'?, '0-9':d =sa << '+' << d;
533 -> exponent, '0-9':d =sa << d;
534
535 integer qualifier
536 -> 'u' + 'U' =sa << 'U';
537 -> 'l' + 'L' =sa << 'L';
538
539 integer constant
540 -> octal constant =*scanner_sink << tkn(OCTconstant);
541 -> decimal constant =*scanner_sink << tkn(DECconstant);
542 -> hex constant =*scanner_sink << tkn(HEXconstant);
543
544 octal constant
545 -> octal integer
546 -> octal constant, integer qualifier
547
548 octal integer
549 -> '0' =++sa << '0';
550 -> octal integer, '0-7':d =sa << d;
551
552 hex constant
553 -> hex integer
554 -> hex constant, integer qualifier
555
556 hex integer
557 -> '0', 'x' + 'X', hex digit:d =++sa << "0X" << d;
558 -> hex integer, hex digit:d =sa << d;
559
560 decimal constant
561 -> decimal integer
562 -> decimal constant, integer qualifier
563
564 decimal integer
565 -> '1-9':d =++sa << d;
566 -> decimal integer, '0-9':d =sa << d;
567
568
569 // String Literals and Character Constants
570
571 string literal
572 -> string chars, '"' =sa << '"';
573
574 string chars
575 -> '"' =++sa << '"';
576 -> string chars, string char:c =sa << c;
577 -> string chars, '\\', ~eof - '\n':c =sa << '\\' << c;
578 -> string chars, '\\', '\n'
579
580
581 // Character constants
582
583 character constant
584 -> simple chars, '\'' =sa << '\'';
585
586 simple chars
587 -> '\'' =++sa << '\'';
588 -> simple chars, simple char:c = sa << c;
589 -> simple chars, '\\', ~eof - '\n': c = sa << '\\' << c;
590 -> simple chars, '\\', '\n'
591
592
593 // Identifiers
594
595 name string
596 -> letter:c =++sa << c;
597 -> name string, letter+digit:c =sa << c;
598
599
600 { // Embedded C
601 #include "array.h" // \AnaGram\classlib\include\array.h
602 #include "stack.h" // \AnaGram\classlib\include\stack.h
603 #if defined(__MSDOS__) || defined(__WIN32__)
604 #include <io.h> // If not found, not necessary
605 #endif
606 #include <sys/types.h> // If not found, not necessary
607 #include <sys/stat.h>
608 #include <fcntl.h>
609
610
611 // Macro Definitions
612
613 #define SYNTAX_ERROR syntax_error_scanning(PCB.error_message)
614 #define GET_CONTEXT (CONTEXT.line = PCB.line, CONTEXT.column = PCB.column)
615 #define GET_INPUT (PCB.input_code = getc(input.file))
616 #define PCB input.pcb
617
618
619 // Structure Definition
620
621 struct file_descriptor {
622 char *name; // name of file
623 FILE *file; // source of input characters
624 ts_pcb_type pcb; // parser control block for file
625 };
626
627 typedef stack<file_descriptor> file_descriptor_stack;
628
629 // Static Data Declarations
630
631 static const char *error_modifier = "";
632 static file_descriptor input;
633 static stack<token_sink *> save_sink(5);
634 static file_descriptor_stack active_files(20);
635
636 // Syntax Error Reporting
637 /*
638 syntax_error() provides an error diagnostic procedure for those
639 parsers which are called by the token scanner. error_modifier is set
640 by expand() so that an error encountered during a macro expansion
641 will be so described. Otherwise, the diagnostic will not make
642 sense.
643
644 Since all other parsers are called from reduction procedures, the
645 line and column number of the token they are dealing with is given
646 by the context of the token scanner production that is being
647 reduced.
648 */
649
650 void syntax_error(const char *msg) {
651 printf("%s: Line %d, Column %d: %s%s\n",
652 input.name, CONTEXT.line, CONTEXT.column, msg, error_modifier);
653 }
654
655 /*
656 syntax_error_scanning() provides an error diagnostic procedure for
657 the token scanner itself. The locus of the error is given by the
658 current line and column number of the token scan, as given in the
659 parser control block.
660 */
661
662 static void syntax_error_scanning(const char *msg) {
663 printf("%s: Line %d, Column %d: %s\n",
664 input.name, PCB.line, PCB.column, msg);
665 }
666
667
668 // Support for Reduction Procedures
669 /*
670 name_token() looks up the name string in the string accumulator,
671 identifies it in the token dictionary, checks to see if it is a
672 reserved word, and creates a token.
673 */
674
675 static token name_token(void) {
676 token t;
677 t.id = NAME;
678 t.handle = td << sa.top();
679 --sa;
680 if (t.handle <= n_reserved_words) t.id = reserved_words[t.handle].id;
681 return t;
682 }
683
684 /*
685 op() creates a token for a punctuation character.
686 */
687
688 static token op(unsigned x) {
689 token t;
690 t.id = (token_id) x;
691 t.handle = token_handles[x];
692 return t;
693 }
694
695 /*
696 space_op() creates a token for a space character. Note that a space
697 could be a tab, vertical tab, or form feed character as well as a
698 blank.
699 */
700
701 static token space_op(unsigned x) {
702 token t;
703 t.id = (token_id) ' ';
704 t.handle = token_handles[x];
705 return t;
706 }
707
708 /*
709 tkn() creates a token with a specified id for the string on the top
710 of the string accumulator
711 */
712
713 static token tkn(token_id id) {
714 token t;
715 t.id = id;
716 t.handle = td << sa.top();
717 --sa;
718 return t;
719 }
720
721
722 // Macro Processing Procedures
723
724 /*
725 check_defined() looks up the name on the string accumulator to see if
726 it is the name of a macro. It then selects a reduction token according
727 to the outcome of the test and an input flag.
728 */
729
730 static void check_defined(int flag) {
731 unsigned id = macro_id[td[sa.top()]];
732 --sa;
733 flag ^= id != 0;
734 if (flag) CHANGE_REDUCTION(false_condition);
735 else CHANGE_REDUCTION(true_condition);
736 }
737
738 /*
739 defined() returns a decimal constant token equal to one or zero
740 depending on whether the token named on the string accumulator is or
741 is not defined as a macro
742 */
743
744 static token defined(void) {
745 unsigned id = macro_id[td[sa.top()]];
746 token t;
747 t.id = DECconstant;
748 t.handle = id ? one_value : zero_value;
749 --sa;
750 return t;
751 }
752
753 /*
754 expand() expands and outputs a macro. t.handle is the token dictionary
755 index of the macro name. n is the number of arguments found.
756
757 Since it is possible that scanner sink is pointing to ta, it is
758 necessary to pop the expanded macro from ta before passing it on to
759 scanner_sink. Otherwise, we would have effectively ta << ta, a
760 situation which causes an infinite loop.
761 */
762
763 static void expand(token t, unsigned n) {
764 error_modifier = " in macro expansion"; // fix error diagnostic
765 expand_macro(t,n); // Defined in MAS.SYN
766 if (size(ta)) {
767 array<token> x(ta,size(ta) + 1);
768 --ta;
769 *scanner_sink << x;
770 } else --ta;
771 error_modifier = "";
772 }
773
774 /*
775 Look up the name string on the string accumulator. Determine whether
776 it is a reserved word, or a simple identifier. Then determine
777 whether it is the name of a macro.
778 */
779
780 static token id_macro(void) {
781 token t;
782 unsigned id;
783
784 t.id = NAME;
785 t.handle = td << sa.top();
786 --sa;
787 if (t.handle <= n_reserved_words) t.id = reserved_words[t.handle].id;
788
789 if (if_clause && t.handle == defined_value) {
790 CHANGE_REDUCTION(defined);
791 return t;
792 }
793 id = macro_id[t.handle];
794 if (id == 0) return t;
795
796 if (macro[id].parens) CHANGE_REDUCTION(macro);
797 else CHANGE_REDUCTION(simple_macro);
798 return t;
799 }
800
801 /*
802 Start a macro definition. This procedure defines all but the body of
803 the macro.
804
805 nargs is the count of parameters that were found. flag is set if
806 the macro was defined with parentheses.
807
808 The parameter names are on the string accumulator, with the last
809 name on the top of the stack, so they must be popped off, identified
810 and stored in reverse order.
811
812 The name of the macro is beneath the parameter names on the string
813 accumulator.
814
815 Before returning, this procedure saves the current value of
816 scanner_sink, increments the level on the token stack and sets
817 scanner_sink so that subsequent tokens produced by the token scanner
818 will accumulate on the token stack. These tokens comprise the body
819 of the macro. When the end of the macro body is encountered, the
820 procedure save_macro_body will remove them from the token stack and
821 restore the value of scanner_sink.
822 */
823
824 static int init_macro_def(int nargs, int flag) {
825 int k;
826 int id = ++n_macros;
827 unsigned name;
828 unsigned *arg_list = nargs ? new unsigned[nargs] : NULL;
829
830 assert(id < N_MACROS);
831 for (k = nargs; k--;) {
832 arg_list[k] = td << sa.top();
833 --sa;
834 }
835
836 macro[id].arg_names = arg_list;
837 macro[id].n_args = nargs;
838
839 macro[id].name = name = td << sa.top();
840 --sa;
841
842 macro_id[name] = id;
843
844 macro[id].busy_flag = 0;
845 macro[id].parens = flag ;
846
847 save_sink << scanner_sink;
848 scanner_sink = &++ta;
849 return id;
850 }
851
852 /*
853 save_macro_body() finishes the definition of a macro by making a
854 permanent copy of the token string on the token accumulator. It then
855 restores the scanner_sink to the value it had when the macro
856 definition was encountered.
857 */
858
859 static void save_macro_body(int id) {
860 macro[id].body = size(ta) ? copy(ta) : NULL;
861 --ta;
862 save_sink >> scanner_sink;
863 }
864
865 /*
866 undefine() deletes the macro definition for the macro whose name is
867 on the top of the string accumulator. If there is no macro with the
868 given name, undefine simply returns.
869
870 Otherwise, it frees the storage associated with the macro. It then
871 fills the resulting hole in the table with the last macro in the
872 table. The macro_id table is updated appropriately.
873 */
874
875 static void undefine(void) {
876 unsigned name = td << sa.top();
877 int id = macro_id[name];
878 --sa;
879 if (id == 0) return;
880 macro_id[name] = 0;
881 if (macro[id].arg_names) delete [] macro[id].arg_names;
882 if (macro[id].body) delete [] macro[id].body;
883 macro[id] = macro[n_macros--];
884 macro_id[macro[id].name] = id;
885 }
886
887
888 // Include file procedures
889
890 /*
891 file_name() interprets the file name provided by an #include
892 statement. If the file name is enclosed in <> brackets it scans the
893 directory list in paths to try to find the file. If it finds it, it
894 prefixes the path to the file name.
895
896 If the file name is enclosed in "" quotation marks, file_name()
897 simply strips the quotation marks.
898
899 If file_name() succeeds, it returns 1 and provides path-name in the
900 string accumulator, otherwise it returns 0 and nothing in the string
901 accumulator.
902
903 Note that file name uses a temporary string accumulator, lsa.
904 */
905
906 static int file_name(char *file) {
907 int c;
908 int tc;
909 string_accumulator lsa(100); // for temporary storage of name
910
911 while (*file == ' ') file++;
912 tc = *file++;
913 if (tc == '<') tc = '>';
914 else if (tc != '"') return 0;
915 while ((c = *file++) != 0 && c != tc) lsa << c;
916 if (c != tc) return 0;
917 if (tc == '"') {
918 int k, n;
919 active_files << input;
920 n = size(active_files);
921
922 while (n--) {
923 FILE *f;
924 #ifdef _MSC_VER //Cope with peculiarity of MSVC++
925 char *cp;
926 int junk;
927
928 ++sa << ((file_descriptor *)active_files)[n].name;
929 k = size(sa);
930 cp = (char *)sa;
931 while (k-- && cp[k] != '\\' && cp[k] != '/') { sa >> junk;}
932 #else
933 ++sa << active_files[n].name;
934 while (size(sa) && sa[0] != '\\' && sa[0] != '/') {
935 sa >> k; // strip off current file name to leave only path
936 }
937 #endif
938 sa << lsa; // append desired file name
939 f = fopen(sa.top(),"rt");
940 if (f != NULL) {
941 fclose(f);
942 active_files >> input;
943 return 1;
944 }
945 --sa;
946 }
947 active_files >> input;
948 }
949 int k, n;
950 n = size(paths);
951 for (k = 0; k < n; k++) {
952 FILE *f;
953
954 #ifdef _MSC_VER //Cope with peculiarity of MSVC++
955 ++sa << ((char **) paths)[k];
956 char c = ((char *)sa)[size(sa)-1];
957 if (size(sa) && c != '\\' && c != '/') sa << '/';
958 #else
959 ++sa << paths[k];
960 if (size(sa) && sa[0] != '\\' && sa[0] != '/') sa << '/';
961 #endif
962 sa << lsa;
963 f = fopen(sa.top(),"rt");
964 if (f != NULL) {
965 fclose(f);
966 return 1;
967 }
968 --sa;
969 }
970 return 0;
971 }
972
973 /*
974 include_file() is called in response to a #include statement.
975
976 First, it saves the file_descriptor for the current input. Then it
977 restores the scanner_sink which was saved prior to accumulating
978 macro expanded tokens on the token_accumulator.
979
980 When include_file() is called, the argument of the #include
981 statement exists in the form of tokens on the token accumulator.
982 These tokens are passed to a token_translator which turns the tokens
983 into a string on the string accumulator.
984
985 file_name() is then called to distinguish between "" and <> files.
986 In the latter case, file_name() prefixes a directory path to the name.
987 The name is then in the string accumulator.
988
989 scan_input() is then called to scan the include file.
990
991 Finally, before returning, the previous file_descriptor is restored.
992 */
993
994 static void include_file(void) {
995 int flag;
996
997 save_sink >> scanner_sink; // restore scanner_sink
998
999 token_translator tt(&++sa);
1000 tt << ta; // recover string from tokens
1001 --ta; // discard token string
1002
1003 array<char> file(sa.top(), size(sa)+1); // local copy of string
1004 --sa;
1005
1006 flag = file_name(file);
1007
1008 if (!flag) {
1009 fprintf(stderr, "Bad include file name: %s\n", (char *) file);
1010 return;
1011 }
1012 array<char> path(sa.top(), size(sa) + 1);
1013 --sa;
1014 active_files << input; // Save current file
1015 scan_input(path); // recursive call to ts()
1016 active_files >> input; // Restore previous file
1017 return;
1018 }
1019
1020
1021 // Conditional compilation procedures
1022
1023 /*
1024 init_condition() prepares for evaluation the condition expression in
1025 #if and #elif statements.
1026
1027 It protects scanner_sink by pushing it onto the save_sink stack.
1028 Then it resets the expression evaluatior, condition, and sets
1029 scanner_sink to point to it.
1030
1031 Finally it sets the if_clause flag so that defined() will be handled
1032 properly.
1033 */
1034
1035 static void init_condition(void) {
1036 save_sink << scanner_sink;
1037 scanner_sink = &reset(condition);
1038 if_clause = 1;
1039 }
1040
1041 /*
1042 eval_condition() is called to deal with #if and #elif statements. The
1043 init_condition() procedure has redirected scanner output to the
1044 expression evaluator, so eval_condition() restores the previous
1045 scanner destination.
1046
1047 It then sends an eof token to the expression evaluator, resets
1048 if_clause and reads the value of the condition. Remember that
1049 (long) condition returns the value of the expression.
1050 */
1051
1052 static int eval_condition(void) {
1053 save_sink >> scanner_sink;
1054 condition << op(0); // eof to exp evaluator
1055 if_clause = 0;
1056 return condition != 0L;
1057 }
1058
1059 /*
1060 In eval_if() and eval_elif() note the use of CHANGE_REDUCTION to
1061 select the appropriate reduction token depending on the outcome of
1062 the condition.
1063 */
1064
1065 static void eval_elif(void) {
1066 if (eval_condition()) CHANGE_REDUCTION(true_else_condition);
1067 else CHANGE_REDUCTION(false_else_condition);
1068 }
1069
1070 static void eval_if(void) {
1071 if (eval_condition()) CHANGE_REDUCTION(true_condition);
1072 else CHANGE_REDUCTION(false_condition);
1073 }
1074
1075
1076 // Do token scan
1077
1078 /*
1079 scan_input()
1080 1) opens the specified file, if possible
1081 2) calls the parser
1082 3) closes the input file
1083 */
1084
1085 void scan_input(char *path) {
1086 input.file = fopen(path, "rt");
1087 input.name = path;
1088 if (input.file == NULL) {
1089 fprintf(stderr,"Cannot open %s\n", (char *) path);
1090 return;
1091 }
1092 ts();
1093 fclose(input.file);
1094 }
1095
1096 } // End of Embedded C