view tests/agcl/oldagsrc/mas.syn @ 24:a4899cdfc2d6 default tip

Obfuscate the regexps to strip off the IBM compiler's copyright banners. I don't want bots scanning github to think they're real copyright notices because that could cause real problems.
author David A. Holland
date Mon, 13 Jun 2022 00:40:23 -0400
parents 13d2b8934445
children
line wrap: on
line source

{
/*
 AnaGram, a System for Syntax Directed Programming
 C Macro preprocessor

 Copyright (c) 1993, Parsifal Software.
 All Rights Reserved.

 Macro argument substitution module
*/

#include "mpp.h"

}


// Configuration Section

[
 ~allow macros
  line numbers
  pointer input
  pointer type = token *
  input values
 ~error frame
  error trace
 ~lines and columns
 ~backtrack
 ~test range
  default input type = token
 ~declare pcb
  subgrammar {
    parse unit
  }
  enum {
    eof               =0,
    SPACE             =' ',
    ANDAND            ='A', // "&&"
    ANDassign,              // "&="
    ARROW,                  // "->"
    CONCAT,                 // "##"
    DECR,                   // "--"
    DIVassign,              // "/="
    ELLIPSIS,               // "..."
    EQ,                     // "=="
    ERassign,               // "^="
    GE,                     // ">="
    ICR,                    // "++"
    LE,                     // "<="
    LS,                     // "<<"
    LSassign,               // "<<="
    MODassign,              // "%="
    MINUSassign,            // "-="
    MULTassign,             // "*="
    NE,                     // "!="
    ORassign,               // "|="
    OROR,                   // "||"
    PLUSassign,             // "+="
    RS,                     // ">>"
    RSassign,               // ">>="
    CHARACTERconstant,      // character constant
    STRINGliteral,          // character string
    HEXconstant       =129,
    OCTconstant,
    DECconstant,
    FLOATconstant,          // real
    NAME,
    AUTO,                   // "auto"
    BREAK,                  // "break"
    CASE,                   // "case"
    CHAR,                   // "char"
    CONSTANT,               // "const"
    CONTINUE,               // "continue"
    DEFAULT,                // "default"
    DO,                     // "do"
    DOUBLE,                 // "double"
    ELSE,                   // "else"
    ENUM,                   // "enum"
    EXTERN,                 // "extern"
    FLOAT,                  // "float"
    FOR,                    // "for"
    GOTO,                   // "goto"
    IF,                     // "if"
    INT,                    // "int"
    LONG,                   // "long"
    REGISTER,               // "register"
    RETURN,                 // "return"
    SHORT,                  // "short"
    SIGNED,                 // "signed"
    SIZEOF,                 // "sizeof"
    STATIC,                 // "static"
    STRUCT,                 // "struct"
    SWITCH,                 // "switch"
    TYPEDEF,                // "typedef"
    UNION,                  // "union"
    UNSIGNED,               // "unsigned"
    VOIDkey,                // "void"
    VOLATILE,               // "volatile"
    WHILE,                  // "while"
    UNRECOGNIZED,
  }
  parser file name = "#.cpp"
]

grammar
 ->space, parse unit?..., eof


// Accumulate optional space

space
 ->                   =reset(space_stack);
 -> space, ' ':s      ={if (args_only) space_stack << s;}


// Basic parse units

parse unit
 -> parameter expansion
 -> simple parse unit, space                =ta << space_stack;
 -> concatenation, space                    =ta << space_stack;
 -> macro:t, space                          =ta << t << space_stack;

simple parse unit
 -> ~eof - NAME - CONCAT - '#'- ' ':t       =ta << t;
 -> '#', parameter name:n                   =ta << make_string(n.handle);
 -> variable:t                              =ta << t;
 -> simple macro:t                          =expand_macro(t,0), concat(ta);
 -> macro:t, space, '(', macro arg list:n, ')' =expand_macro(t,n), concat(ta);
 -> defined, macro name:n                   =ta << defined(n.handle);

(token) macro name
 -> space, NAME:n                             =n;
 -> space, '(', space, NAME:n, space, ')'     =n;

(token) variable, parameter name, simple macro, macro, defined
 -> NAME:n                                  =id_macro(n);

parameter expansion
 -> parameter name:name, space    =expand_arg(name.handle), ta << space_stack;


// Implementation of "##" operator

concatenation
 -> left side, space,
      parameter name:name             =ta << args[name.handle], concatenate();
 -> left side, space, right side      =concatenate();

left side
 -> parameter name:n, space, CONCAT         =ta << args[n.handle], ++ta;
 -> simple parse unit, space, CONCAT        =++ta;
 -> macro:t, space, CONCAT                  =ta << t, ++ta;
 -> concatenation, space, CONCAT            =++ta;

right side
 -> ~eof - NAME - CONCAT - '#'- ' ':t       =ta << t;
 -> '#', parameter name:n                   =ta << make_string(n.handle);
 -> not parameter:t                         =ta << t;

(token) not parameter
 -> variable
 -> simple macro
 -> macro
 -> defined


// Gather Macro Arguments

(int) macro arg list
 -> space                                   = 0;
 -> space, arg elements                     = 1;
 -> macro arg list:n, ',', space, arg elements =n+1;

initial arg element
 -> ~eof - ',' - '(' - ')' - SPACE:t        =++ta << t;
 -> nested elements, ')':t                  =ta << t;

arg element
 -> ~eof - ',' - '(' - ')':t                =ta << t;
 -> nested elements, ')':t                  =concat(ta) << t;

arg elements
 -> initial arg element
 -> arg elements, arg element

nested elements
 -> '(':t                                   =++ta << t;
 -> nested elements, arg element
 -> nested elements, ',':t                  =ta << t;


{                                      // Embedded C
#include "array.h"                     // AnaGram\CLASSLIB\INCLUDE\array.h
#include "stack.h"                     // AnaGram\CLASSLIB\INCLUDE\stack.h


// Macro Definitions

#define INPUT_CODE(T) (T).id
#define PCB (*mas_pcb)
#define SYNTAX_ERROR syntax_error(PCB.error_message);


// Static variables

static stack<unsigned>    active_macros(200,20);
static token            **args;
static int                args_only = 0;
static mas_pcb_type      *mas_pcb;
static int                n_concats = 0;
static int                n_args;
static unsigned          *params;
static token_accumulator  space_stack(100);


/*

expand_text() is a shell procedure which calls the mas parser a
number of times. It is used to expand arguments before substituting
them into a macro, and to expand the body of a macro. Notice that
expand_text() is recursive, since macros encountered during the an
expansion process may themselves need to be expanded.

expand_text() takes three explicit arguments:
  token *text:
    points to a string of tokens, terminated by an eof token.

  int n:
    specifies the number of arguments. Defaults to 0. The arguments
    themselves are token strings on the token accumulator stack.
    expand_text() makes copies of them and stores pointers to them in
    the args array.

  unsigned *p:
    An array of n dictionary indices which gives the names of the
    parameters for which the arguments are to be substituted. p
    defaults to NULL.

global switches
  Two global switches affect the expansion of text: if_clause and
  args_only.  Setting if_clause affects the treatment of the token
  "defined". Setting args_only causes only macro parameters to be
  expanded.

*/

void expand_text(token *text, int n, unsigned *p) {
  mas_pcb_type pcb;

// Save old status
  mas_pcb_type *save_pcb = mas_pcb;
  int save_n_args = n_args;
  token **save_args = args;
  unsigned *save_params = params;
  int save_switch = args_only;

// pop args from accumlator stack and expand them
  args_only = 0;
  token **new_args;
  int k = n;
  if (n) {
    new_args = new token*[n];
    args_only = 1;
    while (k--) {
      token t;
      while (ta[0].id == SPACE) ta >> t;           //trim space on right
      array<token> arg_tokens(ta, size(ta) + 1);
      token *tp = arg_tokens;
      while (tp->id == SPACE) tp++;                //trim space on left
      --ta;
      mas_pcb = &pcb;
      pcb.pointer = tp;
      ++ta;
      mas();
      new_args[k] = copy(ta);
      --ta;
    }
    args_only = 0;
  }
  else new_args = NULL;

// Expand text
  args = new_args;
  n_args = n;
  params = p;
  pcb.pointer = text;
  mas_pcb = &pcb;
  ++ta;
  ++active_macros;
  n_concats = 0;
  mas();

// If any new tokens were created by concatenation, rescan
  while (n_concats) {
    array<token> expansion(ta,size(ta) + 1);
    --ta;
    pcb.pointer = expansion;
    ++ta;
    n_concats = 0;
    n = size(active_macros);
    while (n--) macro[active_macros[n]].busy_flag = 1;
    mas();
  }
  n = size(active_macros);
  while (n--) macro[active_macros[n]].busy_flag = 0;
  --active_macros;

// Discard argument strings

  n = n_args;
  while (n--) delete [] args[n];
  if (n_args) delete [] args;

// Restore old status

  args_only = save_switch;
  args = save_args;
  n_args = save_n_args;
  params = save_params;
  mas_pcb = save_pcb;
}

/*

expand_macro() is a shell procedure which sets up a call to
expand_text for a specific macro.

*/

void expand_macro(token t, unsigned n_args) {
  unsigned id = macro_id[t.handle];
  token *body = macro[id].body;
  assert(n_args == macro[id].n_args);
  if (body == NULL) {
    while (n_args--) --ta;
    ++ta;
    return;
  }
  expand_text(body,n_args,macro[id].arg_names);
}

/*

expand_arg() is another shell procedure for expand_text() which does
a complete expansion of a single macro argument.

*/

static void expand_arg(unsigned n) {
  expand_text(args[n]);
  concat(ta);
}

/*

id_macro() is very nearly the same as id_macro() in TS.SYN. The
primary difference is that this one deals in tokens, the other in
character strings.

*/

static token id_macro(token t) {
  unsigned n = n_args;
  unsigned id;

  while (n--) if (t.handle == params[n]) {
    CHANGE_REDUCTION(parameter_name);
    t.handle = n;
    return t;
  }
  if (args_only) return t;
  if (if_clause && t.handle == defined_value) {
    CHANGE_REDUCTION(defined);
    return t;
  }
  id = macro_id[t.handle];
  if (id == 0) return t;
  if (macro[id].busy_flag) return t;
  active_macros << id;
  if (macro[id].parens) CHANGE_REDUCTION(macro);
  else CHANGE_REDUCTION(simple_macro);
  return t;
}

/*

defined() is very nearly the same as defined() in TS.SYN. The primary
difference is that this one deals in tokens, the other in character
strings.

*/

static token defined(unsigned handle) {
  token t;
  t.id = DECconstant;
  t.handle = macro_id[handle] ? one_value : zero_value;
  return t;
}

/*

concatenate() implements the splicing together of two tokens by the
"##" operator in a macro definition. Because of the way the grammar
has been written, spaces have already been trimmed on both sides of the
## by the parser.

If there are actually two tokens to concatenate, the last token on
the left is popped off, its string value is obtained from the token
dictionary and pushed onto the string accumulator, ditto for the
first token on the right. The string is then identified and the token
is classified. If the new token is the name of a macro, a new scan
will be required to expand it.

*/

static void concatenate(void) {
  array<token> right_arg(ta, size(ta) + 1);
  token t;
  token *tp = right_arg;

  --ta;                // discard right argument from stack

  if (size(ta) && tp->id != END_OF_FILE) {
    ta >> t;                               // pop left token
    ++sa << td[t.handle] << td[tp->handle];      // left string + right string
    t.handle = td << sa;                      // identify string
    t.id  = classify_token(sa);            // classify token
    --sa;                                  // discard string
    ++tp;                                  // discard old token on right
    if (macro_id[t.handle]) n_concats++;      // if macro, signal rescan
    ta << t;                               // output new token
  }
  ta << tp;                                // remainder of right side
}

/*

make_string() implements the '#' operator in macro expansions, that
is, it turns its operand into a string constant. To do this it must
provide "" marks and must quote any embedded " or \ characters with
the \ character.

*/

static token make_string(unsigned n) {
  token *tp;
  token t;

  tp = args[n];
  ++sa << '"';
  while (tp->id != END_OF_FILE) {
    char *p = td[tp->handle];
    char c;
    while ((c = *p++) != 0) {
      if (c == '"' || c == '\\') sa << '\\';
      sa << c;
    }
    tp++;
  }
  sa << '"';
  t.id = STRINGliteral;
  t.handle = td << sa;
  --sa;
  return t;
}

}                                      // End of Embedded C