Mercurial > ~dholland > hg > ag > index.cgi

{
/*
 * AnaGram, a System for Syntax Directed Programming
 * C Macro preprocessor
 * Token Classifier Module
 *
 * Copyright 1993-2000 Parsifal Software. All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */

#include "mpp.h"

}

// Configuration section

[
  // far tables                       // uncomment for 16 bit environment
 ~allow macros                        // to simplify debugging
  line numbers                        // put #line directives in parser
  //escape backslashes                // uncomment if using MSVC++
 ~diagnose errors                     // no diagnostics
  pointer input                       // input is an array in memory
 ~lines and columns                   // not needed
 ~test range                          // not needed
  default token type = token_id       // saves a lot of explicit defs
  parser file name = "#.cpp"
]

// Character set definitions

any text char = ~eof - '\\'
ascii         = 1..126 - eof
digit         = '0-9'
eof           = 0
hex digit     = '0-9' + 'A-F' + 'a-f'
letter        = 'a-z' + 'A-Z' + '_'
punctuation   = 1..126 -(letter + digit + '\'' + '"')
simple char   = ~eof - ('\'' + '\\' + '\n')
string char   = ~eof - ('"' + '\\' + '\n')


// C token grammar

grammar
 -> token, eof

token
 -> name string               =id_name();
 -> qualified real
 -> integer constant
 -> string literal            =STRINGliteral;
 -> character constant        =CHARACTERconstant;
 -> operator
 -> punctuation:p             =(token_id) p;

operator
 -> '&', '&'                  =ANDAND;
 -> '&', '='                  =ANDassign;
 -> '-', '>'                  =ARROW;
 -> '#', '#'                  =CONCAT;
 -> '-', '-'                  =DECR;
 -> '/', '='                  =DIVassign;
 -> '.', '.', '.'             =ELLIPSIS;
 -> '=', '='                  =EQ;
 -> '^', '='                  =ERassign;
 -> '>', '='                  =GE;
 -> '+', '+'                  =ICR;
 -> '<', '='                  =LE;
 -> '<', '<'                  =LS;
 -> '<', '<', '='             =LSassign;
 -> '%', '='                  =MODassign;
 -> '-', '='                  =MINUSassign;
 -> '*', '='                  =MULTassign;
 -> '!', '='                  =NE;
 -> '|', '='                  =ORassign;
 -> '|', '|'                  =OROR;
 -> '+', '='                  =PLUSassign;
 -> '>', '>'                  =RS;
 -> '>', '>', '='             =RSassign;


// Floating point number syntax

qualified real
 -> real constant, floating qualifier  =FLOATconstant;

real constant
 -> real

floating qualifier
 ->
 -> 'f' + 'F'
 -> 'l' + 'L'

real
 -> simple real
 -> simple real, exponent
 -> confusion, exponent
 -> decimal integer, exponent

simple real
 -> confusion, '.'
 -> octal integer, '.'
 -> decimal integer, '.'
 -> '.', '0-9'
 -> simple real, '0-9'

confusion
  -> octal integer, '8-9'
  -> confusion, '0-9'

exponent
  -> 'e' + 'E', '-', '0-9'
  -> 'e' + 'E', '+'?, '0-9'
  -> exponent, '0-9'


// Integer Constant Syntax

integer constant
 -> octal constant                          =OCTconstant;
 -> decimal constant                        =DECconstant;
 -> hex constant                            =HEXconstant;

octal constant
 -> octal integer
 -> octal constant, integer qualifier

octal integer
  -> '0'
  -> octal integer, '0-7'

hex constant
 -> hex integer
 -> hex constant, integer qualifier

hex integer
  -> '0', 'x' + 'X', hex digit
  -> hex integer, hex digit

decimal constant
 -> decimal integer
 -> decimal constant, integer qualifier

decimal integer
  -> '1-9'
  -> decimal integer, '0-9'

integer qualifier
  -> 'u' + 'U'
  -> 'l' + 'L'


// String Literals

string literal
  -> string chars, '"'

string chars
 -> '"'
 -> string chars, string char
 -> string chars, '\\', ~eof&~'\n'
 -> string chars, '\\', '\n'


// Character constants

character constant
 -> simple chars, '\''

simple chars
 -> '\''
 -> simple chars, simple char
 -> simple chars, '\\', ~eof&~'\n'
 -> simple chars, '\\', '\n'

(void) name string
  -> letter
  -> name string, letter+digit


{                                      // Embedded C

#define SYNTAX_ERROR                   // no diagnostic needed

static char *input_string;

/*

id_name() adds the string on the local string_accumulator to the token
dictionary and checks to see if the handle corresponds to a reserved
word. Otherwise the string is classified simply as a NAME

*/

static token_id id_name(void) {
  unsigned handle = td << input_string;
  token_id id;
  if (handle <= n_reserved_words) id = reserved_words[handle].id;
  else id = NAME;
  return id;
}

/*

classify_token() is an interface function for the grammar. It sets up
the pointer in the parser control block to point to the input string,
calls the parser, and returns the token_id determined by the parser if
there was no error, and returns "UNRECOGNIZED" if there was an error.

*/

token_id classify_token(char *string) {
  input_string = string;
  PCB.pointer = (unsigned char *) string;
  ct();
  return PCB.exit_flag == AG_SUCCESS_CODE ? ct_value() : UNRECOGNIZED;
}

}                                      // End of Embedded C
author	David A. Holland
date	Tue, 31 May 2022 00:58:42 -0400
parents	13d2b8934445
children