Mercurial > ~dholland > hg > ag > index.cgi

{
/*
 AnaGram, a System for Syntax Directed Programming
 C Macro preprocessor and parser

 Copyright (c) 1993, Parsifal Software.
 All Rights Reserved.

 TS.SYN: Token Scanner Module
*/

#include "mpp.h"


// context structure for diagnostics

struct location { unsigned line, column;};

}


// Configuration section

[
  context type = location             // request context tracking
 ~allow macros                        // function defs for red procs
  auto resynch
  line numbers                        // #line statements in output
  error trace                         // build trace on syntax error
 ~test range                          // not necessary
 ~declare pcb
 ~error frame
  subgrammar {
    simple token,
    expanded token,
    initial arg element,
    ws,
    eol,
    macro definition header,
  }
  parser file name = "#.cpp"
]


// Character Set Definitions

any text char   = ~eof - newline - '\\'
ascii           = 1..126
blank           = ' ' + '\t' + '\r' + '\f' + '\v'
digit           = '0-9'
eof             = -1 + 0
hex digit       = '0-9' + 'A-F' + 'a-f'
newline         = '\n'
letter          = 'a-z' + 'A-Z' + '_'
not punctuation = '#' + blank + letter + digit + '\'' + '"' + newline + '\\'
punctuation     = ascii - not punctuation
simple char     = ~eof - ('\'' + '\\' + '\n')
string char     = ~eof - ('"' + '\\' + '\n')


// Grammar, or Start token

input file $                                // Grammar Token
 -> [section | eol]/..., eof                // Alternating sequence

eol
 -> newline, [newline | space]...


// Conditional Compilation Control

section
 -> expanded token...                       =*scanner_sink << op('\n');
 -> control line
 -> conditional block

conditional block
 -> true if section, eol, endif line
 -> true if section, eol, skip else section, eol, endif line
 -> false if section, eol, endif line
 -> false if section, eol, else section, eol, endif line

true if section
 -> true condition
 -> true if section, eol, section
 -> false if section, eol, true else condition

false if section
 -> false condition
 -> false if section, eol, skip section
 -> false if section, eol, false else condition

else section
 -> '#', ws?, "else", ws?
 -> else section, eol, section

endif line
 -> '#', ws?, "endif", ws?

skip section
 -> skip line
 -> skip if section, eol, endif line

skip if section
 -> '#', ws?, {"if" | "ifdef" | "ifndef"}, any text?...
 -> skip if section, eol, skip section
 -> skip if section, eol, skip else line

skip else section
 -> skip else line
 -> skip else section, eol, skip else line
 -> skip else section, eol, skip section

skip else line
 -> '#', ws?, "elif", any text?...
 -> '#', ws?, "else", ws?

skip line
 -> '#', ws?, [{"define" | "undefine" | "include" | "line" |
                    "error" | "pragma"}, any text?...]
 -> not control mark, any text?...

any text
 -> any text char
 -> '\\', ~eof

not control mark
 -> any text char - '#'
 -> '\\', ~eof


// Conditional Control Lines

true condition, false condition
 -> '#', ws?, "ifdef", ws, name string, ws?           =check_defined(1);
 -> '#', ws?, "ifndef", ws, name string, ws?          =check_defined(0);
 -> '#', ws?, if header, expanded token...            =eval_if();

true else condition, false else condition
 -> '#', ws?, else if header, expanded token...       =eval_elif();

if header
 -> "if", ws                                          =init_condition();

else if header
 -> "elif", ws                                        =init_condition();


// Other Control Lines

control line
 -> include header, expanded token...                 =include_file();
 -> '#', ws?, "undef", ws, name string, ws?           =undefine();
 -> '#', ws?, [{"line" | "error" | "pragma"}, any text?...]
 -> macro definition header:id, simple token?...      =save_macro_body(id);

include header
 -> '#', ws?, "include" =save_sink << scanner_sink, scanner_sink = &++ta;


// Macro Definitions

(int) macro definition header
 -> '#', ws?, "define", ws, name string         =init_macro_def(0,0);
 -> '#', ws?, "define", ws, name string,
      '(', ws?, parameter list:n, ')'          =init_macro_def(n,1);

(int) parameter list
 ->                                       =0;
 -> names, ws?

(int) names
 -> name string                           =1;
 -> names:n, ws?, ',', ws?, name string   =n+1;


// Unexpanded text (for macro definitions

simple token
 -> space:c                               =*scanner_sink << space_op(c);
 -> word
 -> separator
 -> '#'                                   =*scanner_sink << op('#');
 -> qualified real
 -> integer constant

word
 -> name string                           =*scanner_sink << name_token();


// Expanded text

expanded token
 -> expanded word
 -> separator
 -> space
 -> qualified real
 -> integer constant

expanded word
 -> variable:t                               =*scanner_sink << t;
 -> simple macro:t                           =expand(t,0);
 -> macro:t, ws?                             =*scanner_sink << t;
 -> macro:t, ws?, '(', ws?, macro arg list:n, ')' =expand(t,n);
 -> defined, ws?, '(', ws?, name string, ws?, ')' =*scanner_sink << defined();
 -> defined, ws, name string                 =*scanner_sink << defined();


// Name classification

(token) variable, simple macro, macro, defined
 -> name string                              =id_macro();


// Macro Arguments

(int) macro arg list
 ->                                          =0;
 -> !save_sink << scanner_sink, scanner_sink = &ta;, macro args:n =
     save_sink >> scanner_sink, n;

(int) macro args
 -> !++ta;, arg elements                              =1;
 -> macro args:n, ',', ws?, !++ta;, arg elements      =n+1;

arg elements
 -> initial arg element
 -> arg elements, arg element

arg element
 -> space:c                              =*scanner_sink << space_op(c);
 -> initial arg element

initial arg element
 -> name string                          =*scanner_sink << name_token();
 -> qualified real
 -> integer constant
 -> string literal                  =*scanner_sink << tkn(STRINGliteral);
 -> character constant              =*scanner_sink << tkn(CHARACTERconstant);
 -> operator
 -> punctuation - '(' - ',' - ')':p      =*scanner_sink << op(p);
 -> nested elements, ')':t               =*scanner_sink << op(t);

nested elements
 -> '(':t                                =*scanner_sink << op(t);
 -> nested elements, arg element
 -> nested elements, ',':t               =*scanner_sink << op(t);


// Basic syntactic elements

separator
 -> string literal        =*scanner_sink << tkn(STRINGliteral);
 -> character constant    =*scanner_sink << tkn(CHARACTERconstant);
 -> operator
 -> punctuation:p         =*scanner_sink << op(p);
 -> '\\', '\n'

(int) space
 -> blank
 -> comment                                  =' ';

ws = space...

comment
 -> comment head, "*/"

comment head
 -> "/*"
 -> comment head, ~eof

comment, comment head
 -> comment head, comment ={if (nest_comments) CHANGE_REDUCTION(comment_head);}

operator
 -> '&', '&'                  =*scanner_sink << op(ANDAND);
 -> '&', '='                  =*scanner_sink << op(ANDassign);
 -> '-', '>'                  =*scanner_sink << op(ARROW);
 -> '#', '#'                  =*scanner_sink << op(CONCAT);
 -> '-', '-'                  =*scanner_sink << op(DECR);
 -> '/', '='                  =*scanner_sink << op(DIVassign);
 -> '.', '.', '.'             =*scanner_sink << op(ELLIPSIS);
 -> '=', '='                  =*scanner_sink << op(EQ);
 -> '^', '='                  =*scanner_sink << op(ERassign);
 -> '>', '='                  =*scanner_sink << op(GE);
 -> '+', '+'                  =*scanner_sink << op(ICR);
 -> '<', '='                  =*scanner_sink << op(LE);
 -> '<', '<'                  =*scanner_sink << op(LS);
 -> '<', '<', '='             =*scanner_sink << op(LSassign);
 -> '%', '='                  =*scanner_sink << op(MODassign);
 -> '-', '='                  =*scanner_sink << op(MINUSassign);
 -> '*', '='                  =*scanner_sink << op(MULTassign);
 -> '!', '='                  =*scanner_sink << op(NE);
 -> '|', '='                  =*scanner_sink << op(ORassign);
 -> '|', '|'                  =*scanner_sink << op(OROR);
 -> '+', '='                  =*scanner_sink << op(PLUSassign);
 -> '>', '>'                  =*scanner_sink << op(RS);
 -> '>', '>', '='             =*scanner_sink << op(RSassign);


// Numeric constants

qualified real
 -> real constant, floating qualifier =*scanner_sink << tkn(FLOATconstant);

real constant
 -> real

floating qualifier
 ->
 -> 'f' + 'F'             =sa << 'F';
 -> 'l' + 'L'             =sa << 'L';

real
 -> simple real
 -> simple real, exponent
 -> confusion, exponent
 -> decimal integer, exponent

simple real
 -> confusion, '.'                         =sa << '.';
 -> octal integer, '.'
 -> decimal integer, '.'                   =sa << '.';
 -> '.', '0-9':d                           =++sa << '.' << d;
 -> simple real, '0-9':d                   =sa << d;

confusion
  -> octal integer, '8-9':d                =sa << d;
  -> confusion, '0-9':d                    =sa << d;

exponent
  -> 'e' + 'E', '-', '0-9':d               =sa << '-' << d;
  -> 'e' + 'E', '+'?, '0-9':d              =sa << '+' << d;
  -> exponent, '0-9':d                     =sa << d;

integer qualifier
  -> 'u' + 'U'                       =sa << 'U';
  -> 'l' + 'L'                       =sa << 'L';

integer constant
 -> octal constant                        =*scanner_sink << tkn(OCTconstant);
 -> decimal constant                      =*scanner_sink << tkn(DECconstant);
 -> hex constant                          =*scanner_sink << tkn(HEXconstant);

octal constant
 -> octal integer
 -> octal constant, integer qualifier

octal integer
  -> '0'                                 =++sa << '0';
  -> octal integer, '0-7':d              =sa << d;

hex constant
 -> hex integer
 -> hex constant, integer qualifier

hex integer
  -> '0', 'x' + 'X', hex digit:d         =++sa << "0X" << d;
  -> hex integer, hex digit:d            =sa << d;

decimal constant
 -> decimal integer
 -> decimal constant, integer qualifier

decimal integer
  -> '1-9':d                               =++sa << d;
  -> decimal integer, '0-9':d              =sa << d;


// String Literals and Character Constants

string literal
  -> string chars, '"'                     =sa << '"';

string chars
 -> '"'                                    =++sa << '"';
 -> string chars, string char:c            =sa << c;
 -> string chars, '\\', ~eof - '\n':c      =sa << '\\' << c;
 -> string chars, '\\', '\n'


// Character constants

character constant
 -> simple chars, '\''                     =sa << '\'';

simple chars
 -> '\''                                   =++sa << '\'';
 -> simple chars, simple char:c            = sa << c;
 -> simple chars, '\\', ~eof - '\n': c     = sa << '\\' << c;
 -> simple chars, '\\', '\n'


// Identifiers

name string
  -> letter:c                              =++sa << c;
  -> name string, letter+digit:c           =sa << c;


{                                      // Embedded C
#include "array.h"                     // \AnaGram\classlib\include\array.h
#include "stack.h"                     // \AnaGram\classlib\include\stack.h
#include <io.h>                        // If not found, not necessary
#include <sys/types.h>                 // If not found, not necessary
#include <sys/stat.h>
#include <fcntl.h>


// Macro Definitions

#define SYNTAX_ERROR syntax_error_scanning(PCB.error_message)
#define GET_CONTEXT (CONTEXT.line = PCB.line, CONTEXT.column = PCB.column)
#define GET_INPUT (PCB.input_code = getc(input.file))
#define PCB input.pcb


// Structure Definition

struct file_descriptor {
  char *name;                          // name of file
  FILE *file;                          // source of input characters
  ts_pcb_type pcb;                     // parser control block for file
};


// Static Data Declarations

static char                  *error_modifier = "";
static file_descriptor        input;
static stack<token_sink *>    save_sink(5);


// Syntax Error Reporting
/*
 syntax_error() provides an error diagnostic procedure for those
 parsers which are called by the token scanner. error_modifier is set
 by expand() so that an error encountered during a macro expansion
 will be so described.  Otherwise, the diagnostic will not make
 sense.

 Since all other parsers are called from reduction procedures, the
 line and column number of the token they are dealing with is given
 by the context of the token scanner production that is being
 reduced.
*/

void syntax_error(char *msg)  {
  printf("%s: Line %d, Column %d: %s%s\n",
   input.name, CONTEXT.line, CONTEXT.column, msg, error_modifier);
}

/*
 syntax_error_scanning() provides an error diagnostic procedure for
 the token scanner itself. The locus of the error is given by the
 current line and column number of the token scan, as given in the
 parser control block.
*/

static void syntax_error_scanning(char *msg)  {
  printf("%s: Line %d, Column %d: %s\n",
    input.name, PCB.line, PCB.column, msg);
}


// Support for Reduction Procedures
/*
 name_token() looks up the name string in the string accumulator,
 identifies it in the token dictionary, checks to see if it is a
 reserved word, and creates a token.
*/

static token name_token(void) {
  token t;
  t.id = NAME;
  t.handle = td << sa;
  --sa;
  if (t.handle <= n_reserved_words) t.id = reserved_words[t.handle].id;
  return t;
}

/*
 op() creates a token for a punctuation character.
*/

static token op(unsigned x) {
  token t;
  t.id = (token_id) x;
  t.handle = token_handles[x];
  return t;
}

/*
 space_op() creates a token for a space character. Note that a space
 could be a tab, vertical tab, or form feed character as well as a
 blank.
*/

static token space_op(unsigned x) {
  token t;
  t.id = (token_id) ' ';
  t.handle = token_handles[x];
  return t;
}

/*
 tkn() creates a token with a specified id for the string on the top
 of the string accumulator
*/

static token tkn(token_id id) {
  token t;
  t.id = id;
  t.handle = td << sa;
  --sa;
  return t;
}


// Macro Processing Procedures

/*
 check_defined() looks up the name on the string accumulator to see if
 it is the name of a macro. It then selects a reduction token according
 to the outcome of the test and an input flag.
*/

static void check_defined(int flag) {
  unsigned id = macro_id[td[sa]];
  --sa;
  flag ^= id != 0;
  if (flag) CHANGE_REDUCTION(false_condition);
  else CHANGE_REDUCTION(true_condition);
}

/*
 defined() returns a decimal constant token equal to one or zero
 depending on whether the token named on the string accumulator is or
 is not defined as a macro
*/

static token defined(void) {
  unsigned id = macro_id[td[sa]];
  token t;
  t.id = DECconstant;
  t.handle = id ? one_value : zero_value;
  --sa;
  return t;
}

/*
 expand() expands and outputs a macro. t.handle is the token dictionary
 index of the macro name. n is the number of arguments found.

 Since it is possible that scanner sink is pointing to ta, it is
 necessary to pop the expanded macro from ta before passing it on to
 scanner_sink. Otherwise, we would have effectively ta << ta, a
 situation which causes an infinite loop.
*/

static void expand(token t, unsigned n) {
  error_modifier = " in macro expansion"; // fix error diagnostic
  expand_macro(t,n);                      // Defined in MAS.SYN
  if (size(ta)) {
    array<token> x(ta,size(ta) + 1);
    --ta;
    *scanner_sink << x;
  } else --ta;
  error_modifier = "";
}

/*
 Look up the name string on the string accumulator. Determine whether
 it is a reserved word, or a simple identifier. Then determine
 whether it is the name of a macro.
*/

static token id_macro(void) {
  token t;
  unsigned id;

  t.id = NAME;
  t.handle = td << sa;
  --sa;
  if (t.handle <= n_reserved_words) t.id = reserved_words[t.handle].id;

  if (if_clause && t.handle == defined_value) {
    CHANGE_REDUCTION(defined);
    return t;
  }
  id = macro_id[t.handle];
  if (id == 0) return t;

  if (macro[id].parens) CHANGE_REDUCTION(macro);
  else CHANGE_REDUCTION(simple_macro);
  return t;
}

/*
 Start a macro definition. This procedure defines all but the body of
 the macro.

 nargs is the count of parameters that were found.  flag is set if
 the macro was defined with parentheses.

 The parameter names are on the string accumulator, with the last
 name on the top of the stack, so they must be popped off, identified
 and stored in reverse order.

 The name of the macro is beneath the parameter names on the string
 accumulator.

 Before returning, this procedure saves the current value of
 scanner_sink, increments the level on the token stack and sets
 scanner_sink so that subsequent tokens produced by the token scanner
 will accumulate on the token stack. These tokens comprise the body
 of the macro. When the end of the macro body is encountered, the
 procedure save_macro_body will remove them from the token stack and
 restore the value of scanner_sink.
*/

static int init_macro_def(int nargs, int flag) {
  int k;
  int id = ++n_macros;
  unsigned name;
  unsigned *arg_list = nargs ? new unsigned[nargs] : NULL;

  assert(id < N_MACROS);
  for (k = nargs; k--;) {
    arg_list[k] = td << sa;
    --sa;
  }

  macro[id].arg_names = arg_list;
  macro[id].n_args = nargs;

  macro[id].name = name = td << sa;
  --sa;

  macro_id[name] = id;

  macro[id].busy_flag = 0;
  macro[id].parens = flag ;

  save_sink << scanner_sink;
  scanner_sink = &++ta;
  return id;
}

/*
 save_macro_body() finishes the definition of a macro by making a
 permanent copy of the token string on the token accumulator. It then
 restores the scanner_sink to the value it had when the macro
 definition was encountered.
*/

static void save_macro_body(int id) {
  macro[id].body = size(ta) ? copy(ta) : NULL;
  --ta;
  save_sink >> scanner_sink;
}

/*
 undefine() deletes the macro definition for the macro whose name is
 on the top of the string accumulator. If there is no macro with the
 given name, undefine simply returns.

 Otherwise, it frees the storage associated with the macro. It then
 fills the resulting hole in the table with the last macro in the
 table. The macro_id table is updated appropriately.
*/

static void undefine(void) {
  unsigned name = td << sa;
  int id = macro_id[name];
  --sa;
  if (id == 0) return;
  macro_id[name] = 0;
  if (macro[id].arg_names) delete [] macro[id].arg_names;
  if (macro[id].body) delete [] macro[id].body;
  macro[id] = macro[n_macros--];
  macro_id[macro[id].name] = id;
}


// Include file procedures

/*
 file_name() interprets the file name provided by an #include
 statement. If the file name is enclosed in <> brackets it scans the
 directory list in paths to try to find the file. If it finds it, it
 prefixes the path to the file name.

 If the file name is enclosed in "" quotation marks, file_name()
 simply strips the quotation marks.

 If file_name() succeeds, it returns 1 and provides path-name in the
 string accumulator, otherwise it returns 0 and nothing in the string
 accumulator.

 Note that file name uses a temporary string accumulator, lsa.
*/

static int file_name(char *file) {
  int c;
  int tc;
  string_accumulator lsa(100);       // for temporary storage of name

  while (*file == ' ') file++;
  tc = *file++;
  if (tc == '<') tc = '>';
  else if (tc != '"') return 0;
  while ((c = *file++) != 0 && c != tc) lsa << c;
  if (c != tc) return 0;
  if (tc == '>') {
    int k, n;
    n = size(paths);
    for (k = 0; k < n; k++) {
      FILE *f;
      ++sa << paths[k];
      if (sa[0] != '\\' || sa[0] != '/') sa << '/';
      sa << lsa;
      f = fopen(sa,"rt");
      if (f != NULL) {
        fclose(f);
        return 1;
      }
      --sa;
    }
     return 0;
  }
  ++sa << lsa;
  return 1;
}

/*
 include_file() is called in response to a #include statement.

 First, it saves the file_descriptor for the current input. Then it
 restores the scanner_sink which was saved prior to accumulating
 macro expanded tokens on the token_accumulator.

 When include_file() is called, the argument of the #include
 statement exists in the form of tokens on the token accumulator.
 These tokens are passed to a token_translator which turns the tokens
 into a string on the string accumulator.

 file_name() is then called to distinguish between "" and <> files.
 In the latter case, file_name() prefixes a directory path to the name.
 The name is then in the string accumulator.

 scan_input() is then called to scan the include file.

 Finally, before returning, the previous file_descriptor is restored.
*/

static void include_file(void) {
  file_descriptor save_input = input;      // save input state
  int flag;

  save_sink >> scanner_sink;               // restore scanner_sink

  token_translator tt(&++sa);
  tt << ta;                                // recover string from tokens
  --ta;                                    // discard token string

  array<char> file(sa, size(sa)+1);  // local copy of string
  --sa;

  flag = file_name(file);

  if (!flag) {
    fprintf(stderr, "Bad include file name: %s\n", (char *) file);
    return;
  }
  array<char> path(sa, size(sa) + 1);
  --sa;
  scan_input(path);                        // recursive call to ts()
  input = save_input;                      // restore input state
  return;
}


// Conditional compilation procedures

/*
 init_condition() prepares for evaluation the condition expression in
 #if and #elif statements.

 It protects scanner_sink by pushing it onto the save_sink stack.
 Then it resets the expression evaluatior, condition, and sets
 scanner_sink to point to it.

 Finally it sets the if_clause flag so that defined() will be handled
 properly.
*/

static void init_condition(void) {
  save_sink << scanner_sink;
  scanner_sink = &reset(condition);
  if_clause = 1;
}

/*
 eval_condition() is called to deal with #if and #elif statements. The
 init_condition() procedure has redirected scanner output to the
 expression evaluator, so eval_condition() restores the previous
 scanner destination.

 It then sends an eof token to the expression evaluator, resets
 if_clause and reads the value of the condition. Remember that
 (long) condition returns the value of the expression.
*/

static int eval_condition(void) {
  save_sink >> scanner_sink;
  condition << op(0);                      // eof to exp evaluator
  if_clause = 0;
  return condition != 0L;
}

/*
 In eval_if() and eval_elif() note the use of CHANGE_REDUCTION to
 select the appropriate reduction token depending on the outcome of
 the condition.
*/

static void eval_elif(void) {
  if (eval_condition()) CHANGE_REDUCTION(true_else_condition);
  else CHANGE_REDUCTION(false_else_condition);
}

static void eval_if(void) {
  if (eval_condition()) CHANGE_REDUCTION(true_condition);
  else CHANGE_REDUCTION(false_condition);
}


// Do token scan

/*
 scan_input()
   1) opens the specified file, if possible
   2) calls the parser
   3) closes the input file
*/

void scan_input(char *path) {
  input.file = fopen(path, "rt");
  input.name = path;
  if (input.file == NULL) {
    fprintf(stderr,"Cannot open %s\n", (char *) path);
    return;
  }
  ts();
  fclose(input.file);
}

}                                      // End of Embedded C
author	David A. Holland
date	Mon, 30 May 2022 23:51:43 -0400
parents	13d2b8934445
children