view examples/mpp/krc.syn @ 24:a4899cdfc2d6 default tip

Obfuscate the regexps to strip off the IBM compiler's copyright banners. I don't want bots scanning github to think they're real copyright notices because that could cause real problems.
author David A. Holland
date Mon, 13 Jun 2022 00:40:23 -0400
parents 13d2b8934445
children
line wrap: on
line source

{
/*
 * AnaGram, a System for Syntax Directed Programming
 * C Macro preprocessor
 * Sample C Grammar
 * Compatible with Kernighan and Ritchie, 2nd. Edition.
 *
 * Copyright 1993-2000 Parsifal Software. All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */

#include "mpp.h"

stack<unsigned> id_stack(100,20);

}
[
  grammar token = program              // Define grammar token
 ~nest comments                        // Make it explicitly ANSI
  sticky { statement}                  // resolves if-then-else conflict
  event driven
  // far tables                        // uncomment for 16 bit environment
 ~allow macros
  input values                         // token input
  line numbers
  //escape backslashes                 // uncomment if using MSVC++
  error trace                          // for diagnosing errors
  parser name = cc                     // parser will be cc()
  parser file name = "krc.cpp"
  header file name = "krc.h"
  default input type = token
 ~test range                           // not necessary
  enum {                               // See TOKEN.H
    eof               =0,
    SPACE             =' ',
    ANDAND            ='A', // "&&"
    ANDassign,              // "&="
    ARROW,                  // "->"
    CONCAT,                 // "##"
    DECR,                   // "--"
    DIVassign,              // "/="
    ELLIPSIS,               // "..."
    EQ,                     // "=="
    ERassign,               // "^="
    GE,                     // ">="
    ICR,                    // "++"
    LE,                     // "<="
    LS,                     // "<<"
    LSassign,               // "<<="
    MODassign,              // "%="
    MINUSassign,            // "-="
    MULTassign,             // "*="
    NE,                     // "!="
    ORassign,               // "|="
    OROR,                   // "||"
    PLUSassign,             // "+="
    RS,                     // ">>"
    RSassign,               // ">>="
    CHARACTERconstant,      // character constant
    STRINGliteral,          // character string
    UNSIGNEDqualifier =97,
    LONGqualifier,
    FLOATqualifier,
    HEXconstant       =129,
    OCTconstant,
    DECconstant,
    FLOATconstant,          // real
    NAME,
    AUTO,                   // "auto"
    BREAK,                  // "break"
    CASE,                   // "case"
    CHAR,                   // "char"
    CONSTANT,               // "const"
    CONTINUE,               // "continue"
    DEFAULT,                // "default"
    DO,                     // "do"
    DOUBLE,                 // "double"
    ELSE,                   // "else"
    ENUM,                   // "enum"
    EXTERN,                 // "extern"
    FLOAT,                  // "float"
    FOR,                    // "for"
    GOTO,                   // "goto"
    IF,                     // "if"
    INT,                    // "int"
    LONG,                   // "long"
    REGISTER,               // "register"
    RETURN,                 // "return"
    SHORT,                  // "short"
    SIGNED,                 // "signed"
    SIZEOF,                 // "sizeof"
    STATIC,                 // "static"
    STRUCT,                 // "struct"
    SWITCH,                 // "switch"
    TYPEDEF,                // "typedef"
    UNION,                  // "union"
    UNSIGNED,               // "unsigned"
    VOIDkey,                // "void"
    VOLATILE,               // "volatile"
    WHILE,                  // "while"
    UNRECOGNIZED,
  }
]

program
 -> translation unit, eof

translation unit
 -> external declaration
 -> translation unit, external declaration

external declaration
 -> function definition
 -> declaration

function definition
 -> declarator, compound statement
 -> declarator, declaration list, compound statement
 -> declaration specifiers, declarator, compound statement
 -> declaration specifiers, declarator, declaration list, compound statement

declaration
 -> declaration specifiers:m, init declarator list?, ';'  =mark_typedef(m);

declaration list
 -> declaration
 -> declaration list, declaration

(int) declaration specifiers
 -> storage class specifier
 -> type specifier                                        =0;
 -> type qualifier                                        =0;
 -> declaration specifiers:m, storage class specifier:v   =m | v;
 -> declaration specifiers, type specifier
 -> declaration specifiers, type qualifier

(int) storage class specifier
 -> AUTO              =0;
 -> REGISTER          =0;
 -> STATIC            =0;
 -> EXTERN            =0;
 -> TYPEDEF           =1;

type specifier
 -> VOIDkey | CHAR | SHORT | INT | LONG | FLOAT | DOUBLE | SIGNED | UNSIGNED
 -> struct or union specifier
 -> enum specifier
 -> typedef name

type qualifier
 -> CONSTANT
 -> VOLATILE

struct or union specifier
 -> struct or union, identifier?, !++id_stack;,
   '{', struct declaration list, '}' =--id_stack;
 -> struct or union, identifier


struct or union
 -> STRUCT
 -> UNION

struct declaration list
 -> struct declaration
 -> struct declaration list, struct declaration

init declarator list
 -> init declarator
 -> init declarator list, ',', init declarator

init declarator
 -> declarator
 -> declarator, '=', initializer

struct declaration
 -> specifier qualifier list, struct declarator list, ';'

specifier qualifier list
 -> type specifier
 -> type qualifier
 -> specifier qualifier list, type specifier
 -> specifier qualifier list, type qualifier

struct declarator list
 -> struct declarator
 -> struct declarator list, ',', struct declarator

struct declarator
 -> declarator
 -> declarator?, ':', constant expression

enum specifier
 -> ENUM, identifier?, '{', enumerator list, '}'
 -> ENUM, identifier

enumerator list
 -> enumerator
 -> enumerator list, ',', enumerator

enumerator
 -> identifier
 -> identifier, '=', constant expression

declarator
 -> direct declarator
 -> pointer, direct declarator

direct declarator
 -> identifier:n                          =id_stack << n.handle;
 -> '(', declarator, ')'
 -> direct declarator, '[', constant expression?, ']'
 -> direct declarator, '(', parameter type list, ')'
 -> direct declarator, '(', identifier list?, ')'

pointer
 -> '*', type qualifier list?
 -> '*', type qualifier list?, pointer

type qualifier list
 -> type qualifier
 -> type qualifier list, type qualifier

parameter type list
 -> parameter list
 -> parameter list, ',', ELLIPSIS

parameter list
 -> parameter declaration
 -> parameter list, ',', parameter declaration

parameter declaration
 -> declaration specifiers, declarator
 -> declaration specifiers, abstract declarator?

identifier list
 -> identifier
 -> identifier list, ',', identifier

initializer
 -> assignment expression
 -> '{', initializer list, '}'
 -> '{', initializer list, ',', '}'

initializer list
 -> initializer
 -> initializer list, ',', initializer

type name
 -> specifier qualifier list, abstract declarator?

abstract declarator
 -> pointer
 -> direct abstract declarator
 -> pointer, direct abstract declarator

direct abstract declarator
 -> '(', abstract declarator, ')'
 -> '[', constant expression?, ']'
 -> direct abstract declarator, '[', constant expression?, ']'
 -> '(', parameter type list?, ')'
 -> direct abstract declarator, '(', parameter type list?, ')'

(token) identifier, typedef name
 -> NAME:n                         =check_typedef(n);

statement
 -> labeled statement
 -> expression statement
 -> compound statement
 -> selection statement
 -> iteration statement
 -> jump statement

labeled statement
 -> identifier, ':', statement
 -> CASE, constant expression, ':', statement
 -> DEFAULT, ':', statement

expression statement
 -> expression?, ';'

compound statement
 -> '{', statement list?, '}'
 -> '{', declaration list, statement list?, '}'

statement list
 -> statement
 -> statement list, statement

selection statement
 -> IF, '(', expression, ')', statement
 -> IF, '(', expression, ')', statement, ELSE, statement
 -> SWITCH, '(', expression, ')', statement

iteration statement
 -> WHILE, '(', expression, ')', statement
 -> DO, statement, WHILE, '(', expression, ')', ';'
 -> FOR, '(', expression?, ';', expression?, ';', expression?, ')',
      statement

jump statement
 -> GOTO, identifier, ';'
 -> CONTINUE, ';'
 -> BREAK, ';'
 -> RETURN, expression?, ';'

expression
 -> assignment expression
 -> expression, ',', assignment expression

assignment expression
 -> conditional expression
 -> unary expression, assignment operator, assignment expression

assignment operator
 -> '=' | MULTassign | DIVassign | MODassign | PLUSassign | MINUSassign
 -> LSassign | RSassign | ANDassign | ORassign | ERassign

conditional expression
 -> logical or expression
 -> logical or expression, '?', expression, ':', conditional expression

constant expression
 -> conditional expression

logical or expression
 -> logical and expression
 -> logical or expression, OROR, logical and expression

logical and expression
 -> inclusive or expression
 -> logical and expression, ANDAND, inclusive or expression

inclusive or expression
 -> exclusive or expression
 -> inclusive or expression, '|', exclusive or expression

exclusive or expression
 -> and expression
 -> exclusive or expression, '^', and expression

and expression
 -> equality expression
 -> and expression, '&', equality expression

equality expression
 -> relational expression
 -> equality expression, EQ, relational expression
 -> equality expression, NE, relational expression

relational expression
 -> shift expression
 -> relational expression, '<', shift expression
 -> relational expression, '>', shift expression
 -> relational expression, LE, shift expression
 -> relational expression, GE, shift expression

shift expression
 -> additive expression
 -> shift expression, LS, additive expression
 -> shift expression, RS, additive expression

additive expression
 -> multiplicative expression
 -> additive expression, '+', multiplicative expression
 -> additive expression, '-', multiplicative expression

multiplicative expression
 -> cast expression
 -> multiplicative expression, '*', cast expression
 -> multiplicative expression, '/', cast expression
 -> multiplicative expression, '%', cast expression

cast expression
 -> unary expression
 -> '(', type name, ')', cast expression

unary expression
 -> postfix expression
 -> ICR, unary expression
 -> DECR, unary expression
 -> unary operator, cast expression
 -> SIZEOF, unary expression
 -> SIZEOF, '(', type name, ')'

unary operator
 -> '&' | '*' | '+' | '-' | '~' | '!'

postfix expression
 -> primary expression
 -> postfix expression, '[', expression, ']'
 -> postfix expression, '(', argument expression list?, ')'
 -> postfix expression, '.', identifier
 -> postfix expression, ARROW, identifier
 -> postfix expression, ICR
 -> postfix expression, DECR

primary expression
 -> identifier
 -> constant
 -> STRINGliteral
 -> '(', expression, ')'

argument expression list
 -> assignment expression
 -> argument expression list, ',', assignment expression

constant
 -> HEXconstant
 -> OCTconstant
 -> DECconstant
 -> FLOATconstant
 -> CHARACTERconstant


{                                           // Embedded C
#include <stack.h>


// Macro Definitions

#define INPUT_CODE(T) (T).id
#define SYNTAX_ERROR  syntax_error(PCB.error_message)


// Variable definitions

static int use_count = 0;
symbol_type_enum symbol_table[N_SYMBOLS];


/*
  mark_typedef() gets a non_zero argument for typedef statements, a zero
  argument otherwise. If the argument is non-zero it marks all stacked
  identifiers as typedef_names. It then resets the id stack.
*/

static void mark_typedef(int mask) {
  unsigned x;
  if (mask) {
    while (size(id_stack)) {
      id_stack >> x;
      symbol_table[x] = typedef_name;
    }
    return;
  }
  reset(id_stack);
}

/*
  check_typedef() resolves a semantically determined productin by determining
  whether a token is a typedef_name or not.  If so it changes the reduction
  token appropriately.
*/

static token check_typedef(token t) {
  if (symbol_table[t.handle] == typedef_name)
    CHANGE_REDUCTION(typedef_name);
  return t;
}


// Member Functions for Class c_parser

// Constructor

/*
  This parser has no provisions for multiple simultaneous parses or for
  recursion. The purpose of use_count is to make sure that there is only one
  copy of the parser active at any time.
*/


c_parser::c_parser() {
  assert(use_count == 0);
  use_count++;
  reset(id_stack);
  memset(symbol_table, 0, sizeof(symbol_table));
  init_cc();                                // init parse
}


// Destructor

c_parser::~c_parser() {
  use_count--;                              // Makes parser available
}


// Reset Parser

c_parser &reset(c_parser &c) {
  reset(id_stack);
  memset(symbol_table, 0, sizeof(symbol_table));
  init_cc();                                // init parse
  return c;
}


// Transmit token to c_parser

/*
  The overloaded operator "<<" is used to transmit data to a parser.
  Newline tokens are filtered out, since they are passed along by the
  token scanner only in case text output of the preprocessor is
  required.

  If the parser has encountered an error, there is no point in giving
  it any further input.

  Otherwise, the input_code and input_value fields of the pcb are set
  up and cc() is called to deal with the token.
*/

token_sink &c_parser::operator << (token c) {
  if (PCB.exit_flag != AG_RUNNING_CODE || (int) c.id == '\n') return *this;
  PCB.input_code = c.id;
  PCB.input_value = c;
  cc();
  return *this;
}

token_sink &c_parser::operator << (token *s) {
  while (s->id != END_OF_FILE && PCB.exit_flag == AG_RUNNING_CODE) {
    if ((int) s->id == 10) continue;
    PCB.input_code = s->id;
    PCB.input_value = *s++;
    cc();
  }
  return *this;
}

}                                           // End Embedded C