Mercurial > ~dholland > hg > ag > index.cgi
view examples/mpp/ct.syn @ 24:a4899cdfc2d6 default tip
Obfuscate the regexps to strip off the IBM compiler's copyright banners.
I don't want bots scanning github to think they're real copyright
notices because that could cause real problems.
author | David A. Holland |
---|---|
date | Mon, 13 Jun 2022 00:40:23 -0400 |
parents | 13d2b8934445 |
children |
line wrap: on
line source
{ /* * AnaGram, a System for Syntax Directed Programming * C Macro preprocessor * Token Classifier Module * * Copyright 1993-2000 Parsifal Software. All Rights Reserved. * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages * arising from the use of this software. * * Permission is granted to anyone to use this software for any purpose, * including commercial applications, and to alter it and redistribute it * freely, subject to the following restrictions: * * 1. The origin of this software must not be misrepresented; you must not * claim that you wrote the original software. If you use this software * in a product, an acknowledgment in the product documentation would be * appreciated but is not required. * 2. Altered source versions must be plainly marked as such, and must not be * misrepresented as being the original software. * 3. This notice may not be removed or altered from any source distribution. */ #include "mpp.h" } // Configuration section [ // far tables // uncomment for 16 bit environment ~allow macros // to simplify debugging line numbers // put #line directives in parser //escape backslashes // uncomment if using MSVC++ ~diagnose errors // no diagnostics pointer input // input is an array in memory ~lines and columns // not needed ~test range // not needed default token type = token_id // saves a lot of explicit defs parser file name = "#.cpp" ] // Character set definitions any text char = ~eof - '\\' ascii = 1..126 - eof digit = '0-9' eof = 0 hex digit = '0-9' + 'A-F' + 'a-f' letter = 'a-z' + 'A-Z' + '_' punctuation = 1..126 -(letter + digit + '\'' + '"') simple char = ~eof - ('\'' + '\\' + '\n') string char = ~eof - ('"' + '\\' + '\n') // C token grammar grammar -> token, eof token -> name string =id_name(); -> qualified real -> integer constant -> string literal =STRINGliteral; -> character constant =CHARACTERconstant; -> operator -> punctuation:p =(token_id) p; operator -> '&', '&' =ANDAND; -> '&', '=' =ANDassign; -> '-', '>' =ARROW; -> '#', '#' =CONCAT; -> '-', '-' =DECR; -> '/', '=' =DIVassign; -> '.', '.', '.' =ELLIPSIS; -> '=', '=' =EQ; -> '^', '=' =ERassign; -> '>', '=' =GE; -> '+', '+' =ICR; -> '<', '=' =LE; -> '<', '<' =LS; -> '<', '<', '=' =LSassign; -> '%', '=' =MODassign; -> '-', '=' =MINUSassign; -> '*', '=' =MULTassign; -> '!', '=' =NE; -> '|', '=' =ORassign; -> '|', '|' =OROR; -> '+', '=' =PLUSassign; -> '>', '>' =RS; -> '>', '>', '=' =RSassign; // Floating point number syntax qualified real -> real constant, floating qualifier =FLOATconstant; real constant -> real floating qualifier -> -> 'f' + 'F' -> 'l' + 'L' real -> simple real -> simple real, exponent -> confusion, exponent -> decimal integer, exponent simple real -> confusion, '.' -> octal integer, '.' -> decimal integer, '.' -> '.', '0-9' -> simple real, '0-9' confusion -> octal integer, '8-9' -> confusion, '0-9' exponent -> 'e' + 'E', '-', '0-9' -> 'e' + 'E', '+'?, '0-9' -> exponent, '0-9' // Integer Constant Syntax integer constant -> octal constant =OCTconstant; -> decimal constant =DECconstant; -> hex constant =HEXconstant; octal constant -> octal integer -> octal constant, integer qualifier octal integer -> '0' -> octal integer, '0-7' hex constant -> hex integer -> hex constant, integer qualifier hex integer -> '0', 'x' + 'X', hex digit -> hex integer, hex digit decimal constant -> decimal integer -> decimal constant, integer qualifier decimal integer -> '1-9' -> decimal integer, '0-9' integer qualifier -> 'u' + 'U' -> 'l' + 'L' // String Literals string literal -> string chars, '"' string chars -> '"' -> string chars, string char -> string chars, '\\', ~eof&~'\n' -> string chars, '\\', '\n' // Character constants character constant -> simple chars, '\'' simple chars -> '\'' -> simple chars, simple char -> simple chars, '\\', ~eof&~'\n' -> simple chars, '\\', '\n' (void) name string -> letter -> name string, letter+digit { // Embedded C #define SYNTAX_ERROR // no diagnostic needed static char *input_string; /* id_name() adds the string on the local string_accumulator to the token dictionary and checks to see if the handle corresponds to a reserved word. Otherwise the string is classified simply as a NAME */ static token_id id_name(void) { unsigned handle = td << input_string; token_id id; if (handle <= n_reserved_words) id = reserved_words[handle].id; else id = NAME; return id; } /* classify_token() is an interface function for the grammar. It sets up the pointer in the parser control block to point to the input string, calls the parser, and returns the token_id determined by the parser if there was no error, and returns "UNRECOGNIZED" if there was an error. */ token_id classify_token(char *string) { input_string = string; PCB.pointer = (unsigned char *) string; ct(); return PCB.exit_flag == AG_SUCCESS_CODE ? ct_value() : UNRECOGNIZED; } } // End of Embedded C