diff tests/agcl/parsifal/xmlp8.syn @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.
author David A. Holland
date Sat, 22 Dec 2007 17:52:45 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/agcl/parsifal/xmlp8.syn	Sat Dec 22 17:52:45 2007 -0500
@@ -0,0 +1,774 @@
+{/*
+ * XML Syntax
+ * Transcribed from
+ *    Extensible Markup Language (XML) 1.0
+ *    W3C Recommendation 10-February-1998
+ *    http://www.w3.org/TR/1998/REC-xml-19980210
+ *
+ * Transcription Copyright © Parsifal Software, 1999.
+ *
+ * Revision 1 fixed definition of [4] NameChar
+ *
+ * Revision 2, begun 1/24/00
+ *
+ * Fixed problem with mixed content declaration [51]
+ * Fixed missing S in [52]
+ * Removed unnecessary "sticky" statement
+*/
+
+#include "xmldef8.h"
+#include <stdio.h>
+
+}
+
+
+[
+  reserve keywords {"--", "?>", "]]>"}
+  test file mask = "*.xml"
+  parser file name = "#.cpp"
+  parser name = parse
+  reentrant parser
+  //line numbers
+  event driven
+  context type = Context
+  extend pcb {
+    AgStack<AgString> tagStack;
+    AgStack<Symtab> mapStack;
+    AgString textStack;
+    AgString spaceStack;
+    enum TokenType {
+      none,
+      spaceType,
+      textType,
+      commentType,
+      startType,
+      endType,
+      emptyType,
+      entityRefType,
+      //charRefType,
+      errorType
+    } tokenType;
+    AgString currentTagName;
+    AgString currentEntityName;
+    //int currentCharRef;
+    AgString commentString;
+    Symtab currentMap;
+    AgStack<Warning> warningList;
+    AgBaseMapString<Element> elementMap;
+    int dtdPresent;
+
+    parse_pcb_struct();
+    parse(char *input, AgString *);
+    void spaceChar(int);
+    void textChar(int);
+    void startTag(const Context &, const AgString &, AttvalList &);
+    void emptyTag(const Context &, const AgString &, AttvalList &);
+    void endTag(const AgString &);
+    void registerAttributes(const AgString &name, const AgBaseStack<Attribute> &AttributeList);
+    void checkAttributes(const Context &, const AgString &name, Symtab &map);
+    int warnAttval(const Context &context, const char *, const AttvalPair &p);
+    static void normalize(AgString &s);
+  }
+  wrapper {AttvalPair,
+      AgString,
+      AttvalList,
+      Attribute,
+      AttributeList,
+      Default}
+]
+
+// Document
+
+// [1]
+document $
+  -> prolog, element, Misc?..., eof
+
+// For completeness
+eof = -1
+
+
+// Character Range
+
+// [2]
+//Char = 0x9 + 0xA + 0x20..0xd7ff + 0xe000..0xfffd + 0x0000-0x10ffff
+Char = 0x9 + 0xA + 0x20..0xff          // 8 bit characters only pro tem
+
+
+// White Space
+// [3]
+SpaceChar = 0x20 + 0x9 + 0xd + 0xa
+
+S
+ -> SpaceChar...
+
+
+// Names and Tokens
+
+// [4]
+NameChar = Letter + Digit + '.' + '-' + '_' + ':' + Extender  // + CombiningChar
+
+// [5]
+(AgString) Name
+ -> Letter + '_' + ':':c                         =AgString().concat(c);
+ -> Name:s, NameChar:c                           =s.concat(c);
+
+// [6]
+Names
+ -> Name
+ -> Names, S, Name
+
+// [7]
+Nmtoken
+ -> NameChar...
+
+// [8]
+Nmtokens
+ -> Nmtoken, [S, Nmtoken]...
+
+
+// Literals
+
+// [9]
+EntityValue
+ -> '"', [~(eof + '%' + '&' + '"') | PEReference | Reference]..., '"'
+ -> '\'', [~(eof + '%' + '&' + '\'') | PEReference | Reference]..., '\''
+
+// [10]
+(AgString) AttValue
+ -> '"', dq AttValString:s, '"'                        =s;
+ -> '\'', sq AttValString:s, '\''                      =s;
+
+(AgString) dq AttValString
+ ->                                                    =AgString();
+ -> dq AttValString:s, ~(eof + '%' + '&' + '"' + SpaceChar):c      =s.concat(c);
+ -> dq AttValString:s, SpaceChar                       =s.concat(' ');
+ //-> dq AttValString:s, CharRef                         =s;
+ -> dq AttValString:s, CharRef:c                       =s.concat(c);
+ -> dq AttValString:s, EntityRef                       =s;
+
+(AgString) sq AttValString
+ ->                                                    =AgString();
+ -> sq AttValString:s, ~(eof + '%' + '&' + '\'' + SpaceChar):c     =s.concat(c);
+ -> sq AttValString:s, SpaceChar                       =s.concat(' ');
+ -> sq AttValString:s, CharRef:c                         =s.concat(c);
+ -> sq AttValString:s, EntityRef                       =s;
+
+// [11]
+SystemLiteral
+ -> '"', ~(eof + '"')?..., '"'
+ -> '\'', ~(eof + '\'')?..., '\''
+
+// [12]
+PubidLiteral
+ -> '"', PubidChar?..., '"'
+ -> '\'', PubidChar-'\''?..., '\''
+
+// [13]
+PubidChar = 0x20 + 0xd + 0xa + 'a-z' + 'A-Z' + '0-9' +
+            '-' + '\'' + '(' + ')' + '+' + ',' + '.' +
+            '/' + ':' + '?' + ';' + '!' + '*' + '#' +
+            '@' + '$' + '_' + '%'
+
+
+// Character Data
+
+// [14]
+CharData = ~(eof + '<' + '&')         // Note that the iteration is in the usage
+  // Note that use of keyword "]]>" will take care of CDATA problem
+
+
+// Comments
+
+// [15]
+Comment
+ -> "<!--", comment text:t, "--", '>'  =PCB.tokenType = Pcb::commentType, PCB.commentString = t;
+
+(AgString) comment text
+ ->                        =AgString();
+ ->comment text:s, Char:c  =s.concat(c);
+
+
+// Processing Instructions
+
+// [16]
+PI
+ -> "<?", PITarget, [S, [Char-SpaceChar, Char?...]], "?>"
+
+// [17]
+PITarget
+ -> Name                               // Name lookup mechanism should reject "xml"
+
+
+// CDATA Sections
+
+// [18]
+CDSect
+ -> CDStart, CData, CDEnd
+
+// [19]
+CDStart
+ -> "<![CDATA["
+
+// [20]
+CData
+ -> [Char - SpaceChar:c =PCB.textChar(c); |
+    "\r\n"       =PCB.spaceChar('\n'); |
+     SpaceChar:c =PCB.spaceChar(c);]...
+ // Keyword recognition logic overrides character recognition
+
+// [21]
+CDEnd
+ -> "]]>"
+
+
+// Prolog
+
+// [22]
+prolog
+ -> XMLDecl?, Misc?..., [doctypedecl, Misc?...]
+
+// [23]
+XMLDecl
+// -> "<?xml", VersionInfo, EncodingDecl?, SDDecl?, S?, "?>"
+ -> "<?xml", S, VersionInfo, VersionDecl, "?>"
+
+ VersionDecl
+  -> S?
+  -> S, EncodingDecl, S?
+  -> S, EncodingDecl, S, SDDecl, S?
+  -> S, SDDecl, S?
+
+
+// [24]
+VersionInfo
+ -> "version", Eq, {'\'', VersionNum, '\'' | '"', VersionNum, '"'}
+
+// [25]
+Eq
+ -> S?, '=', S?
+
+// [26]
+VersionNum
+ -> 'a-z' + 'A-Z' + '0-9' + '_' + '.' + ':' + '-'...
+
+// [27]
+Misc
+// -> Comment | PI | S
+ -> Comment | PI | SpaceChar                    // Avoid double recursion
+
+
+// Document Type Definition
+
+// [28]
+doctypedecl
+ -> "<!DOCTYPE", S, Name:n,
+    [S, [ExternalID, S?]],
+      [ '[', [markupdecl | PEReference | SpaceChar]..., ']', S?], '>' =PCB.dtdPresent=1;
+
+// [29]
+markupdecl
+ -> elementdecl
+ -> AttlistDecl
+ -> EntityDecl
+ -> NotationDecl
+ -> PI
+ -> Comment
+
+
+// External Subset
+
+// [30]
+
+extSubset
+ -> TextDecl?, extSubsetDecl
+
+// [31]
+extSubsetDecl
+ -> [markupdecl | conditionalSect | PEReference | SpaceChar]...
+
+
+// Standalone Document Declaration
+
+// [32]
+SDDecl
+ -> "standalone", Eq, "'yes'" | "\"yes\"" | "'no'" | "\"no\""
+
+
+// Language Identification
+
+// [33]
+LanguageId
+ -> Langcode, ['-', Subcode]...
+
+// [34]
+Langcode
+ -> ISO639Code | IanaCode | UserCode
+
+// [35]
+ISO639Code
+ -> 'a-z' + 'A-Z' -('i' + 'I' + 'x' + 'X'), 'a-z' + 'A-Z'
+
+// [36]
+IanaCode
+ -> 'i' + 'I', '-', 'a-z' + 'A-Z'...
+
+// [37]
+UserCode
+ -> 'x' + 'X', '-', 'a-z' + 'A-Z'...
+
+// [38]
+Subcode
+ -> 'a-z' + 'A-Z'...
+
+
+// Element
+
+// [39]
+element
+ -> EmptyElementTag
+ -> STag, content, ETag
+
+
+// Start-Tags, End-Tags, and Empty-Element Tags
+
+// Start-tag
+
+// [40]
+STag
+ -> '<', Name:n, AttributeList:l, S?, '>'  =PCB.startTag(CONTEXT, n, l);
+
+(AttvalList) AttributeList
+ ->                                        =AttvalList();
+ -> AttributeList:list, S, Attribute:a     =list.push(a);
+
+// [41]
+(AttvalPair) Attribute
+ -> Name:n, Eq, AttValue:s                =AttvalPair(n, s, CONTEXT);
+
+// End-tag
+
+// [42]
+ETag
+ -> "</", Name:n, S?, '>'   =PCB.endTag(n);
+
+// Content of Elements
+
+// [43]
+content
+ -> content unit...
+
+content unit
+ -> element
+ -> CharData - SpaceChar:c =PCB.textChar(c);
+ -> CharRef:c   =PCB.textChar(c);
+ -> EntityRef
+ -> SpaceChar:c =PCB.spaceChar(c);
+ -> "\r\n"      =PCB.spaceChar('\n');
+ -> CDSect
+ -> PI
+ -> Comment
+
+// Tags for Empty Elements
+
+// [44]
+EmptyElementTag
+ -> '<', Name:n, AttributeList:l, S?, "/>" =PCB.emptyTag(CONTEXT, n, l);
+
+// Element Type Declaration
+
+// [45]
+elementdecl
+ -> "<!ELEMENT", S, Name, S, contentspec, S?, '>'
+
+// [46]
+contentspec
+ -> "EMPTY" | "ANY" | Mixed | Children
+
+
+// Element-content Models
+
+// [47]
+Children
+ -> {choice | seq}, ['?' | '*' | '+']
+
+// [48]
+cp
+ -> {Name | choice | seq}, ['?' | '*' | '+']
+
+// [49]
+choice
+ -> '(', S?, cp, S?, ['|', S?, cp, S?]..., ')'
+
+// [50]
+seq
+ -> '(', S?, cp, S?, {',', S?, cp, S?}..., ')'
+
+
+// Mixed-content Declaration
+
+// [51]
+Mixed
+ -> '(', S?, "#PCDATA", ['|', S?, Name | S]/..., ")*"
+ -> '(', S?, "#PCDATA", S?, ')'
+
+
+// Attribute-list Declaration
+
+// [52]
+AttlistDecl
+ -> "<!ATTLIST", S, Name:n, AttDefs:l, S?, '>'   =PCB.registerAttributes(n, l);
+
+(AttributeList) AttDefs
+ -> AttDef:a                                     =AttributeList().push(a);
+ -> AttDefs:l, AttDef:a                          =l.push(a);
+
+// [53]
+(Attribute) AttDef
+ -> S, Name:n, S, AttType:t, S, DefaultDecl:d    =Attribute(RULE_CONTEXT[1], n, t, d);
+
+
+// Attribute Types
+
+// [54]
+(Attribute::Type) AttType
+ -> StringType
+ -> TokenizedType
+ -> EnumeratedType
+
+// [55]
+(Attribute::Type) StringType
+ -> "CDATA"             =Attribute::CDATA;
+
+// [56]
+(Attribute::Type) TokenizedType
+ -> "ID"                =Attribute::ID;
+ -> "IDREF"             =Attribute::IDREF;
+ -> "IDREFS"            =Attribute::IDREFS;
+ -> "ENTITY"            =Attribute::ENTITY;
+ -> "ENTITIES"          =Attribute::ENTITIES;
+ -> "NMTOKEN"           =Attribute::NMTOKEN;
+ -> "NMTOKENS"          =Attribute::NMTOKENS;
+
+// [57]
+(Attribute::Type) EnumeratedType
+ -> NotationType   =Attribute::NOTATION;
+ -> Enumeration    =Attribute::ENUMERATION;
+
+// [58]
+NotationType
+ -> "NOTATION", S, '(', Name, S?, ['|', S?, Name, S?]..., ')'
+
+// [59]
+Enumeration
+ -> '(', Nmtoken, S?, ['|', S?, Nmtoken, S?]..., ')'
+
+
+// Attribute Defaults
+
+// [60]
+(Default) DefaultDecl
+ -> "#REQUIRED"                =Default(Default::REQUIRED);
+ -> "#IMPLIED"                 =Default();
+ -> AttValue:v                 =Default(v);
+ -> "#FIXED", S, AttValue:v    =Default(v,1);
+
+
+// Conditional Section
+
+// [61]
+conditionalSect
+ -> includeSect | ignoreSect
+
+// [62]
+includeSect
+ -> "<![", S?, "INCLUDE", S?, '[', extSubsetDecl, "]]>"
+
+// [63]
+ignoreSect
+ -> "<![", S?, "IGNORE", S?, '[', ignoreSectContents?..., "]]>"
+
+// [64]
+ignoreSectContents
+ -> Ignore, ["<![", ignoreSectContents, "]]>", Ignore]...
+
+// [65]
+Ignore
+ -> Char?...
+
+
+//Character Reference
+
+// [66]
+(int) CharRef
+ //-> "&#", '0-9'..., ';'
+ //-> "&#x", '0-9' + 'a-f' + 'A-F'..., ';'
+ //-> decimal CharRef:c, ';'      =PCB.currentCharRef = c, PCB.tokenType = Pcb::charRefType;
+ -> decimal CharRef, ';'
+ //-> hex CharRef:c, ';'          =PCB.currentCharRef = c, PCB.tokenType = Pcb::charRefType;
+ -> hex CharRef, ';'
+
+(int) decimal CharRef
+ -> "&#", '0-9':d          =d-'0';
+ -> decimal CharRef:n, '0-9':d          =10*n + d-'0';
+
+(int) hex CharRef
+ -> "&#x", '0-9':d                      =d-'0';
+ -> hex CharRef:n, '0-9':d              =16*n + d;
+ -> hex CharRef:n, 'a-f' + 'A-F':d      =16*n + (d&7) + 9;
+
+//Entity Reference
+
+// [67]
+(int) Reference
+ -> EntityRef | CharRef
+
+// [68]
+(int) EntityRef
+ -> '&', Name:s, ';'  =PCB.currentEntityName = s, PCB.tokenType = Pcb::entityRefType;
+
+// [69]
+PEReference
+ -> '%', Name, ';'
+
+
+// Entity Declaration
+
+// [70]
+EntityDecl
+ -> GEDecl | PEDecl
+
+// [71]
+GEDecl
+ -> "<!ENTITY", S, Name, S, EntityDef, '>'
+
+// [72]
+PEDecl
+ -> "<!ENTITY", S, '%', S, Name, S, PEDef, S?, '>'
+
+// [73]
+EntityDef
+ -> EntityValue, S? | ExternalID, [NDataDecl | S]
+
+// [74]
+PEDef
+ -> EntityValue | ExternalID
+
+
+// External Entity Declaration
+
+// [75]
+ExternalID
+ -> "SYSTEM", S, SystemLiteral
+ -> "PUBLIC", S, PubidLiteral, S, SystemLiteral
+
+// [76]
+NDataDecl
+ -> S, "NDATA", S, Name
+
+
+// Text Declaration
+
+// [77]
+TextDecl
+ -> "<?xml", S, [VersionInfo, S], EncodingDecl, S?, "?>"
+
+
+// Well-Formed External Parsed Entity
+
+// [78]
+extParsedEnt
+ -> TextDecl?, content
+
+// [79]
+extPE
+ -> TextDecl?, extSubsetDecl
+
+
+// Encoding Declaration
+
+// [80]
+EncodingDecl
+ -> "encoding", Eq, {'"', EncName, '"' | '\'', EncName, '\''}
+
+// [81]
+EncName
+ -> 'a-z' + 'A-Z', ['a-z'+'A-Z'+'0-9'+'_' | '-']...
+
+
+// Notation Declarations
+
+// [82]
+NotationDecl
+ -> "<!NOTATION", S, Name, S, {ExternalID, S? | PublicID}, '>'
+
+// [83]
+PublicID
+ -> "PUBLIC", S, PubidLiteral, S?
+
+
+// Characters
+
+// [84]
+Letter = BaseChar           // | Ideographic       // No ideographs for now
+
+// [85]
+BaseChar =                             // only 8 bit characters for now
+  0x41..0x5a +
+  0x61..0x7a +
+  0xc0..0xd6 +
+  0xd8..0xf6 +
+  0xf8..0xff
+
+// [86]
+// Ideographic =
+
+
+// [87]
+// CombiningChar =
+
+// [88]
+Digit = 0x30..0x39
+
+// [89]
+Extender = 0xb7
+
+{
+  #define GET_CONTEXT CONTEXT = Context(PCB.line, PCB.column)
+
+  typedef parse_pcb_struct Pcb;
+
+  parse_pcb_struct::parse_pcb_struct()
+    : tokenType(Pcb::none), dtdPresent(0)
+  {
+    // Nothing to do
+  }
+
+  void parse_pcb_struct::spaceChar(int c) {
+    if (c == '\r') c = '\n';
+    if (textStack.size()) tokenType = textType;
+    spaceStack.concat(c);
+  }
+
+  void parse_pcb_struct::textChar(int c) {
+    if (spaceStack.size()) tokenType=spaceType;
+    textStack.concat((char)c);
+  }
+
+  void parse_pcb_struct::normalize(AgString &s) {
+    char *readPointer = s;
+    char *writePointer = readPointer;
+    if (readPointer == NULL) return;
+    while (*readPointer && strchr("\t\r\n ",*readPointer)) readPointer++;
+    while (*readPointer) {
+      while (*readPointer && strchr("\t\r\n ",*readPointer) == NULL) *writePointer++ = *readPointer++;
+      while (*readPointer && strchr("\t\r\n ",*readPointer)) readPointer++;
+      if (*readPointer) *writePointer++ = ' ';
+    }
+    *writePointer = 0;
+  }
+
+  void parse_pcb_struct::checkAttributes(const Context &context, const AgString &name, Symtab &map) {
+    if (!dtdPresent) return;
+    Element &element = elementMap[name];
+    AgBaseStack<Attribute> &attributeList = element.attributeList;
+    int i;
+    for (i = 0; i < attributeList.size(); i++) {
+      Attribute &attribute = attributeList[i];
+      switch (attribute.def.presence) {
+        case Default::REQUIRED: {
+          const char *value = map.findValue(attribute.name);
+          if (value) continue;
+          map[attribute.name] = "VALUE NOT SPECIFIED";
+          char buf[200];
+          sprintf(buf, "Required attribute '%s' is missing", (const char *)attribute.name);
+          warningList.push(Warning(context, buf));
+          break;
+        }
+        case Default::DEFAULT: {
+          const char *value = map[attribute.name];
+          if (value == 0) value = attribute.def.value;
+          if (attribute.def.fixed && attribute.def.value == value) continue;
+          char buf[200];
+          sprintf(buf, "Value of fixed attribute '%s' does not match default value ",
+              (const char *) attribute.name);
+          warningList.push(Warning(context, buf));
+          break;
+        }
+      }
+    }
+    for (i = 0; i < attributeList.size(); i++) {
+      Attribute &attribute = attributeList[i];
+      if (attribute.type != Attribute::CDATA) normalize(map[attribute.name]);
+    }
+  }
+
+  int parse_pcb_struct::warnAttval(const Context &context, const char *value, const AttvalPair &p) {
+    if (value == NULL) return 0;
+    char buf[200];
+    sprintf(buf, "Value of '%s' ignored. Previously specified as '%s'",
+            (const char *) p.name, value);
+    warningList.push(Warning(context, buf));
+    return 1;
+  }
+
+  void parse_pcb_struct::startTag(const Context &context, const AgString &name, AttvalList &list) {
+    Symtab map;
+    for (int i = 0; i < list.size(); i++) {
+      AgString &value = map[list[i].name];
+      if (warnAttval(context, value, list[i])) continue;
+      value = list[i].value;
+    }
+    currentTagName = name;
+    checkAttributes(context, name, map);
+    currentMap = map;
+    tagStack.push(name);
+    mapStack.push(map);
+    tokenType = startType;
+  }
+
+  void parse_pcb_struct::emptyTag(const Context &context, const AgString &name, AttvalList &list) {
+    Symtab map;
+    for (int i = 0; i < list.size(); i++) {
+      AgString &value = map[list[i].name];
+      if (warnAttval(context, value, list[i])) continue;
+      map[list[i].name] = list[i].value;
+    }
+    checkAttributes(context, name, map);
+    currentTagName = name;
+    currentMap = map;
+    tokenType = emptyType;
+  }
+
+  void parse_pcb_struct::endTag(const AgString &name) {
+    currentTagName = name;
+    currentMap = mapStack.pop();
+    if (name != tagStack.pop()) {
+      error_message = "Element nesting error";
+      exit_flag = AG_SEMANTIC_ERROR_CODE;
+      tokenType = errorType;
+      return;
+    }
+    tokenType = endType;
+  }
+  void parse_pcb_struct::registerAttributes(const AgString &name, const AgBaseStack<Attribute> &attributeList) {
+    Element &element = elementMap[name];
+    if (element.name.size() == 0) element.name = name;
+    int i;
+    for (i = 0; i < attributeList.size(); i++) {
+      AgString attributeName = attributeList[i].name;
+      int &index = element.attributeIndex[attributeName];
+      if (index != -1) {
+        Attribute &attribute = element.attributeList[index];
+        Context previous = attribute.context;
+        char buf[200];
+        sprintf(buf, "Redefinition of '%s::%s' ignored. Defined at line %d, column %d",
+                (const char *) name,
+                (const char *) attributeName, previous.line, previous.column);
+        warningList.push(Warning(attributeList[i].context, buf));
+        continue;
+      }
+      index = element.attributeList.size();
+      element.attributeList.push(attributeList[i]);
+    }
+  }
+}