view examples/mpp/ts.syn @ 16:f9e4689b837d

Some minor updates for 15 years later.
author David A. Holland
date Tue, 31 May 2022 01:45:26 -0400
parents 13d2b8934445
children
line wrap: on
line source

{
/*
 * AnaGram, a System for Syntax Directed Programming
 * C Macro preprocessor and parser
 * TS.SYN: Token Scanner Module
 *
 * Copyright 1993-2000 Parsifal Software. All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */

#include "mpp.h"


// context structure for diagnostics

struct location { unsigned line, column; };

}


// Configuration section

[
  // far tables                       // uncomment for 16 bit environment
  context type = location             // request context tracking
 ~allow macros                        // function defs for red procs
  auto resynch
  line numbers                        // #line statements in output
  error trace                         // build trace on syntax error
 ~test range                          // not necessary
 ~declare pcb                         // pcb declared manually
 ~error frame                         // not wanted for diagnostics

  subgrammar {                        // this subgrammar statement
    simple token,                     // will be removed and replaced
    expanded token,                   // with "disregard ws" and
    initial arg element,              // lexeme statements in the
    ws,                               // next revision
    eol,
    macro definition header,
  }
  parser file name = "#.cpp"
]


// Character Set Definitions

any text char   = ~eof - newline - '\\'
ascii           = 1..126
blank           = ' ' + '\t' + '\r' + '\f' + '\v'
digit           = '0-9'
eof             = -1 + 0
hex digit       = '0-9' + 'A-F' + 'a-f'
newline         = '\n'
letter          = 'a-z' + 'A-Z' + '_'
not punctuation = '#' + blank + letter + digit + '\'' + '"' + newline + '\\'
punctuation     = ascii - not punctuation
simple char     = ~eof - ('\'' + '\\' + '\n')
string char     = ~eof - ('"' + '\\' + '\n')


// Grammar, or Start token

/*
 The macro/#include structure of a C/C++ program is line oriented, so the
 main grammar treats the input file as logical sections, separated by
 any number of new lines.

 eol is defined so that it accepts any number of blank lines and any leading
 spaces on the first following nonblank line. Lines containing only comments
 are considered blank lines.

 Input text, as it is recognized is sunk through the scanner_sink pointer.
 scanner_sink is normally the output of the scanner, but when it is necessary
 to accumulate text, as for a macro definition, scanner_sink is switched to
 direct text to a buffer. When the end of the macro definition is encountered,
 the scanner_sink is switched back to its previous setting.
*/

input file $                                // Grammar Token
 -> [section | eol]/..., eof                // Alternating sequence

eol
 -> newline, [newline | space]...


/*
 Conditional Compilation Control

 This is the portion of the grammar that parsers #if/#ifdef#ifndef/#elif/#else/#endif
 and determines which lines of text are to be passed on for further processing
 and which are to be simply ignored.

 A "section" is any nonblank line of input, or an if/endif block of lines
 that should be passed on to the C compiler.

 A "skip_section" is a non blank line of input or and if/endif block of lines
 that should be passed over and ignored.

 "expanded token" represents the _result_ of macro substitution

 "control line" is any line beginning with # that is not an if/elsif/endif
 line.

 A conditional block is everything from an #if, #ifdef, #ifndef to the
 matching #endif
*/

section
 -> expanded token...                       =*scanner_sink << op('\n');
 -> control line
 -> conditional block

/*
 There are basically two syntaxes for the body of any block of text
 controlled by an #if statement: One syntax, "true if section", to be used
 if the if condition is true and one, "false if section", to be used to skip
 over it if the condition is false. In like manner, there are two syntaxes
 for the body of the else block: "skip else section" to be used when the
 if condition is true and "else section", to be used when the if condition
 is false.

 The syntax for "conditional block" enumerates all possible combinations.

This simple analysis is complicated by the existence of the #elif statement.
This complication occasions a moderately complex cross recursion between
"true if section" and "false if section".

Note that a "false if section" is a false #if line followed by all
statements up to an #else statement or a true #elif line. A "true if
section" consists of a true #if line followed by everything up to the
next matching #elif or #else line, or it consists of false if sections
followed eventually by a true #elif line and then subsequent lines
up to a following #else or #elif line.

"skip section" is syntax to skip over any text including matched
#if/#ifdef/#ifndef, #endif pairs.
*/

conditional block
 -> true if section, eol, endif line
 -> true if section, eol, skip else section, eol, endif line
 -> false if section, eol, endif line
 -> false if section, eol, else section, eol, endif line

true if section
 -> true condition
 -> true if section, eol, section
 -> false if section, eol, true else condition

false if section
 -> false condition
 -> false if section, eol, skip section
 -> false if section, eol, false else condition

/*
 "else section" handles lines of text (and nested #if/#endif sections
 starting with an #else line. "else section" should always be followed
 in any syntactic use by eol, endif line to terminate the looping.
*/

else section
 -> '#', ws?, "else", ws?
 -> else section, eol, section

endif line
 -> '#', ws?, "endif", ws?

/*
 "skip section" skips a single line, or an entire if/endif block
*/

skip section
 -> skip line
 -> skip if section, eol, endif line

/*
 "skip if section" can be terminated only by an "endif line"
 Note that it simply skips over #else and #elif lines, since
 they are immaterial in context.
*/

skip if section
 -> '#', ws?, {"if" | "ifdef" | "ifndef"}, any text?...
 -> skip if section, eol, skip section
 -> skip if section, eol, skip else line

/*
 "skip else section" begins with an #elif or #else line and continues
 to a terminating #endif line.
*/

skip else section
 -> skip else line
 -> skip else section, eol, skip else line
 -> skip else section, eol, skip section

skip else line
 -> '#', ws?, "elif", any text?...
 -> '#', ws?, "else", ws?

/*
 "skip line" parses over and ignores any line that is not an
 #if, #elif, #else, or #endif line.
*/

skip line
 -> '#', ws?, [{"define" | "undefine" | "include" | "line" |
                    "error" | "pragma"}, any text?...]
 -> not control mark, any text?...

any text
 -> any text char
 -> '\\', ~eof

not control mark
 -> any text char - '#'
 -> '\\', ~eof


/*
 Conditional Control Lines

 A semantically determined production is used to determine whether
 an #if, #ifdef, or #ifndef line should be treated as a true
 condition or a false condition. #ifdef and #ifndef can be
 resolved simply by determining whether a symbol has or has
 not been defined. #if is more complex and requires
 evaluation of a constant expression. It does this by passing
 the expanded argument string to the expression evaluator
 in ex.syn
*/

true condition, false condition
 -> '#', ws?, "ifdef", ws, name string, ws?           =check_defined(1);
 -> '#', ws?, "ifndef", ws, name string, ws?          =check_defined(0);
 -> '#', ws?, if header, expanded token...            =eval_if();

true else condition, false else condition
 -> '#', ws?, else if header, expanded token...       =eval_elif();

/*
 "if header" and "else if header" are simple wrapper tokens to
 provide a hook for marking the beginning of the text that
 is to be used by the expression evaluator. The init_condition()
 function handles this task.
*/

if header
 -> "if", ws                                          =init_condition();

else if header
 -> "elif", ws                                        =init_condition();


/*
 Other Control Lines

 Relative to the complexity of the if/elif/else/endif logic, other
 control lines are moderately straightforward.

 The #include file is handled simply by stacking the current file
 position and opening the indicated file. When end of file is
 encountered on the newly opened file, the current file position
 will be unstacked and parsing will continue as though nothing
 had happened.

 Note that there is nothing that requires that an #if/#ifdef#ifndef
 and the matching #endif be in the same file.

 #undef is trivial

 #line, #error, #pragma lines are simply ignored

 #define has substantial struture. It begins with the header portion
 that identifies the macro to be defined and coninues with the
 body of the macro definition. "macro definition header" identifies
 the name and type of the macro and initializes the accumulation of
 the tokens which comprise the body of the macro.
*/

control line
 -> include header, expanded token...                 =include_file();
 -> '#', ws?, "undef", ws, name string, ws?           =undefine();
 -> '#', ws?, [{"line" | "error" | "pragma"}, any text?...]
 -> macro definition header:id, simple token?...      =save_macro_body(id);

include header
 -> '#', ws?, "include" =save_sink << scanner_sink, scanner_sink = &++ta;


/*
 Macro Definitions

 There are two types of macros: those with arguments and those without.
 They are rather different in the way they have to be handled.
*/

(int) macro definition header
 -> '#', ws?, "define", ws, name string         =init_macro_def(0,0);
 -> '#', ws?, "define", ws, name string,
      '(', ws?, parameter list:n, ')'          =init_macro_def(n,1);

(int) parameter list
 ->                                       =0;
 -> names, ws?

(int) names
 -> name string                           =1;
 -> names:n, ws?, ',', ws?, name string   =n+1;

/*
 Unexpanded text (for macro definitions)

 If there are macro invocations in the body of a macro, they are
 supposed to be expanded only when the macro itself is invoked, not
 when the macro is defined. This means that the processing of
 the body of the macro definition has to be different from the
 processing of ordinary text.

 simple token is a token as it appears in the input stream.

 expanded token is the result of passing a token through macro
 expansion.
*/

simple token
 -> space:c                               =*scanner_sink << space_op(c);
 -> word
 -> separator
 -> '#'                                   =*scanner_sink << op('#');
 -> qualified real
 -> integer constant

word
 -> name string                           =*scanner_sink << name_token();


// Expanded text

expanded token
 -> expanded word
 -> separator
 -> space
 -> qualified real
 -> integer constant

/*
 The semantically determined production below classifies a name string
 into the categories variable, simple macro, macro or defined (as in #if defined(x))
 so that the parser can do appropriate follow up parsing.

 expand() is called to expand macros. Note that a macro that is defined with
 an parameter list (whether or not the list is empty) is not expanded unless
 it is invoked with a parameter list.
*/

expanded word
 -> variable:t                                    =*scanner_sink << t;
 -> simple macro:t                                =expand(t,0);
 -> macro:t, ws?                                  =*scanner_sink << t;
 -> macro:t, ws?, '(', ws?, macro arg list:n, ')' =expand(t,n);
 -> defined, ws?, '(', ws?, name string, ws?, ')' =*scanner_sink << defined();
 -> defined, ws, name string                      =*scanner_sink << defined();


// Name classification

(token) variable, simple macro, macro, defined
 -> name string                              =id_macro();


/*
 Macro Arguments

 Macro arguments are accumulated as separate token strings on the
 token accumulator stack.
*/


(int) macro arg list
 ->                                          =0;
 -> !save_sink << scanner_sink, scanner_sink = &ta;, macro args:n =
     save_sink >> scanner_sink, n;

(int) macro args
 -> increment ta, arg elements                              =1;
 -> macro args:n, ',', ws?, increment ta, arg elements      =n+1;

// increment ta could be replaced with an "immediate action"

(void) increment ta
 ->  /* Null Production */ =++ta;

/*
 The following is somewhat complex partly to skip leading space.
*/

arg elements
 -> initial arg element
 -> arg elements, arg element

arg element
 -> space:c                              =*scanner_sink << space_op(c);
 -> initial arg element

initial arg element
 -> name string                          =*scanner_sink << name_token();
 -> qualified real
 -> integer constant
 -> string literal                  =*scanner_sink << tkn(STRINGliteral);
 -> character constant              =*scanner_sink << tkn(CHARACTERconstant);
 -> operator
 -> punctuation - '(' - ',' - ')':p      =*scanner_sink << op(p);
 -> nested elements, ')':t               =*scanner_sink << op(t);

nested elements
 -> '(':t                                =*scanner_sink << op(t);
 -> nested elements, arg element
 -> nested elements, ',':t               =*scanner_sink << op(t);


/*
 Basic lexical elements

 The remainder of the syntax file consists of the definitions of the
 basic lexical elements of C.

 The basic lexical elements are simply copied to the scanner_sink as
 they are encountered. Note that it is not the character string itself
 that goes to the scanner_sink but rather a token which consists of
 a type identification and a handle that can be used to recover the
 string from a dictionary.
*/

separator
 -> string literal        =*scanner_sink << tkn(STRINGliteral);
 -> character constant    =*scanner_sink << tkn(CHARACTERconstant);
 -> operator
 -> punctuation:p         =*scanner_sink << op(p);
 -> '\\', '\n'

(int) space
 -> blank
 -> comment                                  =' ';

ws = space...

comment
 -> comment head, "*/"

comment head
 -> "/*"
 -> comment head, ~eof

comment, comment head
 -> comment head, comment ={if (nest_comments) CHANGE_REDUCTION(comment_head);}

operator
 -> '&', '&'                  =*scanner_sink << op(ANDAND);
 -> '&', '='                  =*scanner_sink << op(ANDassign);
 -> '-', '>'                  =*scanner_sink << op(ARROW);
 -> '#', '#'                  =*scanner_sink << op(CONCAT);
 -> '-', '-'                  =*scanner_sink << op(DECR);
 -> '/', '='                  =*scanner_sink << op(DIVassign);
 -> '.', '.', '.'             =*scanner_sink << op(ELLIPSIS);
 -> '=', '='                  =*scanner_sink << op(EQ);
 -> '^', '='                  =*scanner_sink << op(ERassign);
 -> '>', '='                  =*scanner_sink << op(GE);
 -> '+', '+'                  =*scanner_sink << op(ICR);
 -> '<', '='                  =*scanner_sink << op(LE);
 -> '<', '<'                  =*scanner_sink << op(LS);
 -> '<', '<', '='             =*scanner_sink << op(LSassign);
 -> '%', '='                  =*scanner_sink << op(MODassign);
 -> '-', '='                  =*scanner_sink << op(MINUSassign);
 -> '*', '='                  =*scanner_sink << op(MULTassign);
 -> '!', '='                  =*scanner_sink << op(NE);
 -> '|', '='                  =*scanner_sink << op(ORassign);
 -> '|', '|'                  =*scanner_sink << op(OROR);
 -> '+', '='                  =*scanner_sink << op(PLUSassign);
 -> '>', '>'                  =*scanner_sink << op(RS);
 -> '>', '>', '='             =*scanner_sink << op(RSassign);


// Numeric constants

qualified real
 -> real constant, floating qualifier =*scanner_sink << tkn(FLOATconstant);

real constant
 -> real

floating qualifier
 ->
 -> 'f' + 'F'             =sa << 'F';
 -> 'l' + 'L'             =sa << 'L';

real
 -> simple real
 -> simple real, exponent
 -> confusion, exponent
 -> decimal integer, exponent

simple real
 -> confusion, '.'                         =sa << '.';
 -> octal integer, '.'
 -> decimal integer, '.'                   =sa << '.';
 -> '.', '0-9':d                           =++sa << '.' << d;
 -> simple real, '0-9':d                   =sa << d;

confusion
  -> octal integer, '8-9':d                =sa << d;
  -> confusion, '0-9':d                    =sa << d;

exponent
  -> 'e' + 'E', '-', '0-9':d               =sa << '-' << d;
  -> 'e' + 'E', '+'?, '0-9':d              =sa << '+' << d;
  -> exponent, '0-9':d                     =sa << d;

integer qualifier
  -> 'u' + 'U'                       =sa << 'U';
  -> 'l' + 'L'                       =sa << 'L';

integer constant
 -> octal constant                        =*scanner_sink << tkn(OCTconstant);
 -> decimal constant                      =*scanner_sink << tkn(DECconstant);
 -> hex constant                          =*scanner_sink << tkn(HEXconstant);

octal constant
 -> octal integer
 -> octal constant, integer qualifier

octal integer
  -> '0'                                 =++sa << '0';
  -> octal integer, '0-7':d              =sa << d;

hex constant
 -> hex integer
 -> hex constant, integer qualifier

hex integer
  -> '0', 'x' + 'X', hex digit:d         =++sa << "0X" << d;
  -> hex integer, hex digit:d            =sa << d;

decimal constant
 -> decimal integer
 -> decimal constant, integer qualifier

decimal integer
  -> '1-9':d                               =++sa << d;
  -> decimal integer, '0-9':d              =sa << d;


// String Literals and Character Constants

string literal
  -> string chars, '"'                     =sa << '"';

string chars
 -> '"'                                    =++sa << '"';
 -> string chars, string char:c            =sa << c;
 -> string chars, '\\', ~eof - '\n':c      =sa << '\\' << c;
 -> string chars, '\\', '\n'


// Character constants

character constant
 -> simple chars, '\''                     =sa << '\'';

simple chars
 -> '\''                                   =++sa << '\'';
 -> simple chars, simple char:c            = sa << c;
 -> simple chars, '\\', ~eof - '\n': c     = sa << '\\' << c;
 -> simple chars, '\\', '\n'


// Identifiers

name string
  -> letter:c                              =++sa << c;
  -> name string, letter+digit:c           =sa << c;


{                                      // Embedded C
#include "array.h"                     // \AnaGram\classlib\include\array.h
#include "stack.h"                     // \AnaGram\classlib\include\stack.h
#if defined(__MSDOS__) || defined(__WIN32__)
#include <io.h>                        // If not found, not necessary
#endif
#include <sys/types.h>                 // If not found, not necessary
#include <sys/stat.h>
#include <fcntl.h>


// Macro Definitions

#define SYNTAX_ERROR syntax_error_scanning(PCB.error_message)
#define GET_CONTEXT (CONTEXT.line = PCB.line, CONTEXT.column = PCB.column)
#define GET_INPUT (PCB.input_code = getc(input.file))
#define PCB input.pcb


// Structure Definition

struct file_descriptor {
  char *name;                          // name of file
  FILE *file;                          // source of input characters
  ts_pcb_type pcb;                     // parser control block for file
};

typedef stack<file_descriptor> file_descriptor_stack;

// Static Data Declarations

static const char            *error_modifier = "";
static file_descriptor        input;
static stack<token_sink *>    save_sink(5);
static file_descriptor_stack  active_files(20);

// Syntax Error Reporting
/*
 syntax_error() provides an error diagnostic procedure for those
 parsers which are called by the token scanner. error_modifier is set
 by expand() so that an error encountered during a macro expansion
 will be so described.  Otherwise, the diagnostic will not make
 sense.

 Since all other parsers are called from reduction procedures, the
 line and column number of the token they are dealing with is given
 by the context of the token scanner production that is being
 reduced.
*/

void syntax_error(const char *msg)  {
  printf("%s: Line %d, Column %d: %s%s\n",
   input.name, CONTEXT.line, CONTEXT.column, msg, error_modifier);
}

/*
 syntax_error_scanning() provides an error diagnostic procedure for
 the token scanner itself. The locus of the error is given by the
 current line and column number of the token scan, as given in the
 parser control block.
*/

static void syntax_error_scanning(const char *msg)  {
  printf("%s: Line %d, Column %d: %s\n",
    input.name, PCB.line, PCB.column, msg);
}


// Support for Reduction Procedures
/*
 name_token() looks up the name string in the string accumulator,
 identifies it in the token dictionary, checks to see if it is a
 reserved word, and creates a token.
*/

static token name_token(void) {
  token t;
  t.id = NAME;
  t.handle = td << sa.top();
  --sa;
  if (t.handle <= n_reserved_words) t.id = reserved_words[t.handle].id;
  return t;
}

/*
 op() creates a token for a punctuation character.
*/

static token op(unsigned x) {
  token t;
  t.id = (token_id) x;
  t.handle = token_handles[x];
  return t;
}

/*
 space_op() creates a token for a space character. Note that a space
 could be a tab, vertical tab, or form feed character as well as a
 blank.
*/

static token space_op(unsigned x) {
  token t;
  t.id = (token_id) ' ';
  t.handle = token_handles[x];
  return t;
}

/*
 tkn() creates a token with a specified id for the string on the top
 of the string accumulator
*/

static token tkn(token_id id) {
  token t;
  t.id = id;
  t.handle = td << sa.top();
  --sa;
  return t;
}


// Macro Processing Procedures

/*
 check_defined() looks up the name on the string accumulator to see if
 it is the name of a macro. It then selects a reduction token according
 to the outcome of the test and an input flag.
*/

static void check_defined(int flag) {
  unsigned id = macro_id[td[sa.top()]];
  --sa;
  flag ^= id != 0;
  if (flag) CHANGE_REDUCTION(false_condition);
  else CHANGE_REDUCTION(true_condition);
}

/*
 defined() returns a decimal constant token equal to one or zero
 depending on whether the token named on the string accumulator is or
 is not defined as a macro
*/

static token defined(void) {
  unsigned id = macro_id[td[sa.top()]];
  token t;
  t.id = DECconstant;
  t.handle = id ? one_value : zero_value;
  --sa;
  return t;
}

/*
 expand() expands and outputs a macro. t.handle is the token dictionary
 index of the macro name. n is the number of arguments found.

 Since it is possible that scanner sink is pointing to ta, it is
 necessary to pop the expanded macro from ta before passing it on to
 scanner_sink. Otherwise, we would have effectively ta << ta, a
 situation which causes an infinite loop.
*/

static void expand(token t, unsigned n) {
  error_modifier = " in macro expansion"; // fix error diagnostic
  expand_macro(t,n);                      // Defined in MAS.SYN
  if (size(ta)) {
    array<token> x(ta,size(ta) + 1);
    --ta;
    *scanner_sink << x;
  } else --ta;
  error_modifier = "";
}

/*
 Look up the name string on the string accumulator. Determine whether
 it is a reserved word, or a simple identifier. Then determine
 whether it is the name of a macro.
*/

static token id_macro(void) {
  token t;
  unsigned id;

  t.id = NAME;
  t.handle = td << sa.top();
  --sa;
  if (t.handle <= n_reserved_words) t.id = reserved_words[t.handle].id;

  if (if_clause && t.handle == defined_value) {
    CHANGE_REDUCTION(defined);
    return t;
  }
  id = macro_id[t.handle];
  if (id == 0) return t;

  if (macro[id].parens) CHANGE_REDUCTION(macro);
  else CHANGE_REDUCTION(simple_macro);
  return t;
}

/*
 Start a macro definition. This procedure defines all but the body of
 the macro.

 nargs is the count of parameters that were found.  flag is set if
 the macro was defined with parentheses.

 The parameter names are on the string accumulator, with the last
 name on the top of the stack, so they must be popped off, identified
 and stored in reverse order.

 The name of the macro is beneath the parameter names on the string
 accumulator.

 Before returning, this procedure saves the current value of
 scanner_sink, increments the level on the token stack and sets
 scanner_sink so that subsequent tokens produced by the token scanner
 will accumulate on the token stack. These tokens comprise the body
 of the macro. When the end of the macro body is encountered, the
 procedure save_macro_body will remove them from the token stack and
 restore the value of scanner_sink.
*/

static int init_macro_def(int nargs, int flag) {
  int k;
  int id = ++n_macros;
  unsigned name;
  unsigned *arg_list = nargs ? new unsigned[nargs] : NULL;

  assert(id < N_MACROS);
  for (k = nargs; k--;) {
    arg_list[k] = td << sa.top();
    --sa;
  }

  macro[id].arg_names = arg_list;
  macro[id].n_args = nargs;

  macro[id].name = name = td << sa.top();
  --sa;

  macro_id[name] = id;

  macro[id].busy_flag = 0;
  macro[id].parens = flag ;

  save_sink << scanner_sink;
  scanner_sink = &++ta;
  return id;
}

/*
 save_macro_body() finishes the definition of a macro by making a
 permanent copy of the token string on the token accumulator. It then
 restores the scanner_sink to the value it had when the macro
 definition was encountered.
*/

static void save_macro_body(int id) {
  macro[id].body = size(ta) ? copy(ta) : NULL;
  --ta;
  save_sink >> scanner_sink;
}

/*
 undefine() deletes the macro definition for the macro whose name is
 on the top of the string accumulator. If there is no macro with the
 given name, undefine simply returns.

 Otherwise, it frees the storage associated with the macro. It then
 fills the resulting hole in the table with the last macro in the
 table. The macro_id table is updated appropriately.
*/

static void undefine(void) {
  unsigned name = td << sa.top();
  int id = macro_id[name];
  --sa;
  if (id == 0) return;
  macro_id[name] = 0;
  if (macro[id].arg_names) delete [] macro[id].arg_names;
  if (macro[id].body) delete [] macro[id].body;
  macro[id] = macro[n_macros--];
  macro_id[macro[id].name] = id;
}


// Include file procedures

/*
 file_name() interprets the file name provided by an #include
 statement. If the file name is enclosed in <> brackets it scans the
 directory list in paths to try to find the file. If it finds it, it
 prefixes the path to the file name.

 If the file name is enclosed in "" quotation marks, file_name()
 simply strips the quotation marks.

 If file_name() succeeds, it returns 1 and provides path-name in the
 string accumulator, otherwise it returns 0 and nothing in the string
 accumulator.

 Note that file name uses a temporary string accumulator, lsa.
*/

static int file_name(char *file) {
  int c;
  int tc;
  string_accumulator lsa(100);       // for temporary storage of name

  while (*file == ' ') file++;
  tc = *file++;
  if (tc == '<') tc = '>';
  else if (tc != '"') return 0;
  while ((c = *file++) != 0 && c != tc) lsa << c;
  if (c != tc) return 0;
  if (tc == '"') {
    int k, n;
    active_files << input;
    n = size(active_files);

    while (n--) {
      FILE *f;
#ifdef _MSC_VER                  //Cope with peculiarity of MSVC++
      char *cp;
      int junk;

      ++sa << ((file_descriptor *)active_files)[n].name;
      k = size(sa);
      cp = (char *)sa;
      while (k-- && cp[k] != '\\' && cp[k] != '/') { sa >> junk;}
#else
      ++sa << active_files[n].name;
      while (size(sa) && sa[0] != '\\' && sa[0] != '/') {
         sa >> k;   // strip off current file name to leave only path
      }
#endif
      sa << lsa;    // append desired file name
      f = fopen(sa.top(),"rt");
      if (f != NULL) {
        fclose(f);
        active_files >> input;
        return 1;
      }
      --sa;
    }
    active_files >> input;
  }
  int k, n;
  n = size(paths);
  for (k = 0; k < n; k++) {
    FILE *f;

#ifdef _MSC_VER                  //Cope with peculiarity of MSVC++
    ++sa << ((char **) paths)[k];
    char c = ((char *)sa)[size(sa)-1];
    if (size(sa) && c != '\\' && c != '/') sa << '/';
#else
    ++sa << paths[k];
    if (size(sa) && sa[0] != '\\' && sa[0] != '/') sa << '/';
#endif
    sa << lsa;
    f = fopen(sa.top(),"rt");
    if (f != NULL) {
      fclose(f);
      return 1;
    }
    --sa;
  }
  return 0;
}

/*
 include_file() is called in response to a #include statement.

 First, it saves the file_descriptor for the current input. Then it
 restores the scanner_sink which was saved prior to accumulating
 macro expanded tokens on the token_accumulator.

 When include_file() is called, the argument of the #include
 statement exists in the form of tokens on the token accumulator.
 These tokens are passed to a token_translator which turns the tokens
 into a string on the string accumulator.

 file_name() is then called to distinguish between "" and <> files.
 In the latter case, file_name() prefixes a directory path to the name.
 The name is then in the string accumulator.

 scan_input() is then called to scan the include file.

 Finally, before returning, the previous file_descriptor is restored.
*/

static void include_file(void) {
  int flag;

  save_sink >> scanner_sink;               // restore scanner_sink

  token_translator tt(&++sa);
  tt << ta;                                // recover string from tokens
  --ta;                                    // discard token string

  array<char> file(sa.top(), size(sa)+1);  // local copy of string
  --sa;

  flag = file_name(file);

  if (!flag) {
    fprintf(stderr, "Bad include file name: %s\n", (char *) file);
    return;
  }
  array<char> path(sa.top(), size(sa) + 1);
  --sa;
  active_files << input;                     // Save current file
  scan_input(path);                          // recursive call to ts()
  active_files >> input;                     // Restore previous file
  return;
}


// Conditional compilation procedures

/*
 init_condition() prepares for evaluation the condition expression in
 #if and #elif statements.

 It protects scanner_sink by pushing it onto the save_sink stack.
 Then it resets the expression evaluatior, condition, and sets
 scanner_sink to point to it.

 Finally it sets the if_clause flag so that defined() will be handled
 properly.
*/

static void init_condition(void) {
  save_sink << scanner_sink;
  scanner_sink = &reset(condition);
  if_clause = 1;
}

/*
 eval_condition() is called to deal with #if and #elif statements. The
 init_condition() procedure has redirected scanner output to the
 expression evaluator, so eval_condition() restores the previous
 scanner destination.

 It then sends an eof token to the expression evaluator, resets
 if_clause and reads the value of the condition. Remember that
 (long) condition returns the value of the expression.
*/

static int eval_condition(void) {
  save_sink >> scanner_sink;
  condition << op(0);                      // eof to exp evaluator
  if_clause = 0;
  return condition != 0L;
}

/*
 In eval_if() and eval_elif() note the use of CHANGE_REDUCTION to
 select the appropriate reduction token depending on the outcome of
 the condition.
*/

static void eval_elif(void) {
  if (eval_condition()) CHANGE_REDUCTION(true_else_condition);
  else CHANGE_REDUCTION(false_else_condition);
}

static void eval_if(void) {
  if (eval_condition()) CHANGE_REDUCTION(true_condition);
  else CHANGE_REDUCTION(false_condition);
}


// Do token scan

/*
 scan_input()
   1) opens the specified file, if possible
   2) calls the parser
   3) closes the input file
*/

void scan_input(char *path) {
  input.file = fopen(path, "rt");
  input.name = path;
  if (input.file == NULL) {
    fprintf(stderr,"Cannot open %s\n", (char *) path);
    return;
  }
  ts();
  fclose(input.file);
}

}                                      // End of Embedded C