view examples/mpp/mas.syn @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.
author David A. Holland
date Sat, 22 Dec 2007 17:52:45 -0500
parents
children
line wrap: on
line source

{
/*
 * AnaGram, a System for Syntax Directed Programming
 * C Macro preprocessor
 * Macro argument substitution module
 *
 * Copyright 1993-2000 Parsifal Software. All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */

#include "mpp.h"

}


// Configuration Section

[
  //far tables                              // uncomment for 16 bit environment
 ~allow macros
  line numbers
  //escape backslashes                      // uncomment if using MSVC++
  pointer input
  pointer type = token *
  input values
 ~error frame
  error trace
 ~lines and columns
 ~backtrack
 ~test range
  default input type = token
 ~declare pcb
  parser file name = "#.cpp"
  sticky { space }
  enum {
    eof               =0,
    SPACE             =' ',
    ANDAND            ='A', // "&&"
    ANDassign,              // "&="
    ARROW,                  // "->"
    CONCAT,                 // "##"
    DECR,                   // "--"
    DIVassign,              // "/="
    ELLIPSIS,               // "..."
    EQ,                     // "=="
    ERassign,               // "^="
    GE,                     // ">="
    ICR,                    // "++"
    LE,                     // "<="
    LS,                     // "<<"
    LSassign,               // "<<="
    MODassign,              // "%="
    MINUSassign,            // "-="
    MULTassign,             // "*="
    NE,                     // "!="
    ORassign,               // "|="
    OROR,                   // "||"
    PLUSassign,             // "+="
    RS,                     // ">>"
    RSassign,               // ">>="
    CHARACTERconstant,      // character constant
    STRINGliteral,          // character string
    HEXconstant       =129,
    OCTconstant,
    DECconstant,
    FLOATconstant,          // real
    NAME,
    AUTO,                   // "auto"
    BREAK,                  // "break"
    CASE,                   // "case"
    CHAR,                   // "char"
    CONSTANT,               // "const"
    CONTINUE,               // "continue"
    DEFAULT,                // "default"
    DO,                     // "do"
    DOUBLE,                 // "double"
    ELSE,                   // "else"
    ENUM,                   // "enum"
    EXTERN,                 // "extern"
    FLOAT,                  // "float"
    FOR,                    // "for"
    GOTO,                   // "goto"
    IF,                     // "if"
    INT,                    // "int"
    LONG,                   // "long"
    REGISTER,               // "register"
    RETURN,                 // "return"
    SHORT,                  // "short"
    SIGNED,                 // "signed"
    SIZEOF,                 // "sizeof"
    STATIC,                 // "static"
    STRUCT,                 // "struct"
    SWITCH,                 // "switch"
    TYPEDEF,                // "typedef"
    UNION,                  // "union"
    UNSIGNED,               // "unsigned"
    VOIDkey,                // "void"
    VOLATILE,               // "volatile"
    WHILE,                  // "while"
    UNRECOGNIZED,
  }
]

grammar
 ->space, parse unit?..., eof


// Accumulate optional space

space
 ->                   =reset(space_stack);
 -> space, ' ':s      ={if (args_only) space_stack << s;}


// Basic parse units

parse unit
 -> parameter expansion
 -> simple parse unit, space                =ta << space_stack;
 -> concatenation, space                    =ta << space_stack;
 -> macro:t, space                          =ta << t << space_stack;

simple parse unit
 -> ~eof - NAME - CONCAT - '#'- ' ':t       =ta << t;
 -> '#', parameter name:n                   =ta << make_string(n.handle);
 -> variable:t                              =ta << t;
 -> simple macro:t                          =expand_macro(t,0), concat(ta);
 -> macro:t, space, '(', macro arg list:n, ')' =expand_macro(t,n), concat(ta);
 -> defined, macro name:n                   =ta << defined(n.handle);

(token) macro name
 -> space, NAME:n                             =n;
 -> space, '(', space, NAME:n, space, ')'     =n;

(token) variable, parameter name, simple macro, macro, defined
 -> NAME:n                                  =id_macro(n);

parameter expansion
 -> parameter name:name, space    =expand_arg(name.handle), ta << space_stack;


// Implementation of "##" operator

concatenation
 -> left side, space,
      parameter name:name             =ta << args[name.handle], concatenate();
 -> left side, space, right side      =concatenate();

left side
 -> parameter name:n, space, CONCAT         =ta << args[n.handle], ++ta;
 -> simple parse unit, space, CONCAT        =++ta;
 -> macro:t, space, CONCAT                  =ta << t, ++ta;
 -> concatenation, space, CONCAT            =++ta;

right side
 -> ~eof - NAME - CONCAT - '#'- ' ':t       =ta << t;
 -> '#', parameter name:n                   =ta << make_string(n.handle);
 -> not parameter:t                         =ta << t;

(token) not parameter
 -> variable
 -> simple macro
 -> macro
 -> defined


// Gather Macro Arguments

(unsigned) macro arg list
 -> space                                   = 0;
 -> space, arg elements                     = 1;
 -> macro arg list:n, ',', space, arg elements =n+1;

initial arg element
 -> ~eof - ',' - '(' - ')' - SPACE:t        =++ta << t;
 -> nested elements, ')':t                  =ta << t;

arg element
 -> ~eof - ',' - '(' - ')':t                =ta << t;
 -> nested elements, ')':t                  =concat(ta) << t;

arg elements
 -> initial arg element
 -> arg elements, arg element

nested elements
 -> '(':t                                   =++ta << t;
 -> nested elements, arg element
 -> nested elements, ',':t                  =ta << t;


{                                      // Embedded C
#include "array.h"                     // AnaGram\CLASSLIB\INCLUDE\array.h
#include "stack.h"                     // AnaGram\CLASSLIB\INCLUDE\stack.h


// Macro Definitions

#define INPUT_CODE(T) (T).id
#define PCB (*mas_pcb)
#define SYNTAX_ERROR syntax_error(PCB.error_message);


// Static variables

typedef stack<unsigned>   unsigned_stack;      // accomodate broken compilers

static unsigned_stack     active_macros(200,20);
static token            **args;
static int                args_only = 0;
static mas_pcb_type      *mas_pcb;
static int                n_concats = 0;
static int                n_args;
static unsigned          *params;
static token_accumulator  space_stack(100);


/*

expand_text() is a shell procedure which calls the mas parser a
number of times. It is used to expand arguments before substituting
them into a macro, and to expand the body of a macro. Notice that
expand_text() is recursive, since macros encountered during the an
expansion process may themselves need to be expanded.

expand_text() takes three explicit arguments:
  token *text:
    points to a string of tokens, terminated by an eof token.

  int n:
    specifies the number of arguments. Defaults to 0. The arguments
    themselves are token strings on the token accumulator stack.
    expand_text() makes copies of them and stores pointers to them in
    the args array.

  unsigned *p:
    An array of n dictionary indices which gives the names of the
    parameters for which the arguments are to be substituted. p
    defaults to NULL.

global switches
  Two global switches affect the expansion of text: if_clause and
  args_only.  Setting if_clause affects the treatment of the token
  "defined". Setting args_only causes only macro parameters to be
  expanded.

*/

void expand_text(token *text, int n, unsigned *p) {
  mas_pcb_type pcb;

// Save old status
  mas_pcb_type *save_pcb = mas_pcb;
  int save_n_args = n_args;
  token **save_args = args;
  unsigned *save_params = params;
  int save_switch = args_only;

// pop args from accumlator stack and expand them
  args_only = 0;
  token **new_args;
  int k = n;
  if (n) {
    new_args = new token*[n];
    args_only = 1;
    while (k--) {
    token t;
    token top = *(token *) ta;
    while (top.id == SPACE) ta >> t;             //trim space on right
    array<token> arg_tokens(ta, size(ta) + 1);
    token *tp = arg_tokens;
    while (tp->id == SPACE) tp++;                //trim space on left
    --ta;
    mas_pcb = &pcb;
    pcb.pointer = tp;
    ++ta;
    mas();
      new_args[k] = copy(ta);
      --ta;
    }
    args_only = 0;
  }
  else new_args = NULL;

// Expand text
  args = new_args;
  n_args = n;
  params = p;
  pcb.pointer = text;
  mas_pcb = &pcb;
  ++ta;
  ++active_macros;
  n_concats = 0;
  mas();

// If any new tokens were created by concatenation, rescan
  while (n_concats) {
    array<token> expansion(ta,size(ta) + 1);
    --ta;
    pcb.pointer = expansion;
    ++ta;
    n_concats = 0;
    n = size(active_macros);

#ifdef _MSC_VER                  //Cope with peculiarity of MSVC++
    while (n--) macro[*((unsigned *)active_macros + n)].busy_flag = 1;
#else
    while (n--) macro[active_macros[n]].busy_flag = 1;
#endif
    mas();
  }
  n = size(active_macros);
#ifdef _MSC_VER                  //Cope with peculiarity of MSVC++
    while (n--) macro[*((unsigned *)active_macros + n)].busy_flag = 0;
#else
  while (n--) macro[active_macros[n]].busy_flag = 0;
#endif
  --active_macros;

// Discard argument strings

  n = n_args;
  while (n--) delete [] args[n];
  if (n_args) delete [] args;

// Restore old status

  args_only = save_switch;
  args = save_args;
  n_args = save_n_args;
  params = save_params;
  mas_pcb = save_pcb;
}

/*

expand_macro() is a shell procedure which sets up a call to
expand_text for a specific macro.

*/

void expand_macro(token t, unsigned n_args) {
  unsigned id = macro_id[t.handle];
  token *body = macro[id].body;
  assert(n_args == macro[id].n_args);
  if (body == NULL) {
    while (n_args--) --ta;
    ++ta;
    return;
  }
  expand_text(body,n_args,macro[id].arg_names);
}

/*

expand_arg() is another shell procedure for expand_text() which does
a complete expansion of a single macro argument.

*/

static void expand_arg(unsigned n) {
  expand_text(args[n]);
  concat(ta);
}

/*

id_macro() is very nearly the same as id_macro() in TS.SYN. The
primary difference is that this one deals in tokens, the other in
character strings.

*/

static token id_macro(token t) {
  unsigned n = n_args;
  unsigned id;

  while (n--) if (t.handle == params[n]) {
    CHANGE_REDUCTION(parameter_name);
    t.handle = n;
    return t;
  }
  if (args_only) return t;
  if (if_clause && t.handle == defined_value) {
    CHANGE_REDUCTION(defined);
    return t;
  }
  id = macro_id[t.handle];
  if (id == 0) return t;
  if (macro[id].busy_flag) return t;
  active_macros << id;
  if (macro[id].parens) CHANGE_REDUCTION(macro);
  else CHANGE_REDUCTION(simple_macro);
  return t;
}

/*

defined() is very nearly the same as defined() in TS.SYN. The primary
difference is that this one deals in tokens, the other in character
strings.

*/

static token defined(unsigned handle) {
  token t;
  t.id = DECconstant;
  t.handle = macro_id[handle] ? one_value : zero_value;
  return t;
}

/*

concatenate() implements the splicing together of two tokens by the
"##" operator in a macro definition. Because of the way the grammar
has been written, spaces have already been trimmed on both sides of the
## by the parser.

If there are actually two tokens to concatenate, the last token on
the left is popped off, its string value is obtained from the token
dictionary and pushed onto the string accumulator, ditto for the
first token on the right. The string is then identified and the token
is classified. If the new token is the name of a macro, a new scan
will be required to expand it.

*/

static void concatenate(void) {
  array<token> right_arg(ta, size(ta) + 1);
  token t;
  token *tp = right_arg;

  --ta;                // discard right argument from stack

  if (size(ta) && tp->id != END_OF_FILE) {
    ta >> t;                               // pop left token
    ++sa << td[t.handle] << td[tp->handle];      // left string + right string
    t.handle = td << sa.top();             // identify string
    t.id  = classify_token(sa.top());      // classify token
    --sa;                                  // discard string
    ++tp;                                  // discard old token on right
    if (macro_id[t.handle]) n_concats++;      // if macro, signal rescan
    ta << t;                               // output new token
  }
  ta << tp;                                // remainder of right side
}

/*

make_string() implements the '#' operator in macro expansions, that
is, it turns its operand into a string constant. To do this it must
provide "" marks and must quote any embedded " or \ characters with
the \ character.

*/

static token make_string(unsigned n) {
  token *tp;
  token t;

  tp = args[n];
  ++sa << '"';
  while (tp->id != END_OF_FILE) {
    char *p = td[tp->handle];
    char c;
    while ((c = *p++) != 0) {
      if (c == '"' || c == '\\') sa << '\\';
      sa << c;
    }
    tp++;
  }
  sa << '"';
  t.id = STRINGliteral;
  t.handle = td << sa.top();
  --sa;
  return t;
}

}                                      // End of Embedded C