Mercurial > ~dholland > hg > ag > index.cgi
diff tests/agcl/parsifal/xmlp8.syn @ 0:13d2b8934445
Import AnaGram (near-)release tree into Mercurial.
author | David A. Holland |
---|---|
date | Sat, 22 Dec 2007 17:52:45 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/agcl/parsifal/xmlp8.syn Sat Dec 22 17:52:45 2007 -0500 @@ -0,0 +1,774 @@ +{/* + * XML Syntax + * Transcribed from + * Extensible Markup Language (XML) 1.0 + * W3C Recommendation 10-February-1998 + * http://www.w3.org/TR/1998/REC-xml-19980210 + * + * Transcription Copyright © Parsifal Software, 1999. + * + * Revision 1 fixed definition of [4] NameChar + * + * Revision 2, begun 1/24/00 + * + * Fixed problem with mixed content declaration [51] + * Fixed missing S in [52] + * Removed unnecessary "sticky" statement +*/ + +#include "xmldef8.h" +#include <stdio.h> + +} + + +[ + reserve keywords {"--", "?>", "]]>"} + test file mask = "*.xml" + parser file name = "#.cpp" + parser name = parse + reentrant parser + //line numbers + event driven + context type = Context + extend pcb { + AgStack<AgString> tagStack; + AgStack<Symtab> mapStack; + AgString textStack; + AgString spaceStack; + enum TokenType { + none, + spaceType, + textType, + commentType, + startType, + endType, + emptyType, + entityRefType, + //charRefType, + errorType + } tokenType; + AgString currentTagName; + AgString currentEntityName; + //int currentCharRef; + AgString commentString; + Symtab currentMap; + AgStack<Warning> warningList; + AgBaseMapString<Element> elementMap; + int dtdPresent; + + parse_pcb_struct(); + parse(char *input, AgString *); + void spaceChar(int); + void textChar(int); + void startTag(const Context &, const AgString &, AttvalList &); + void emptyTag(const Context &, const AgString &, AttvalList &); + void endTag(const AgString &); + void registerAttributes(const AgString &name, const AgBaseStack<Attribute> &AttributeList); + void checkAttributes(const Context &, const AgString &name, Symtab &map); + int warnAttval(const Context &context, const char *, const AttvalPair &p); + static void normalize(AgString &s); + } + wrapper {AttvalPair, + AgString, + AttvalList, + Attribute, + AttributeList, + Default} +] + +// Document + +// [1] +document $ + -> prolog, element, Misc?..., eof + +// For completeness +eof = -1 + + +// Character Range + +// [2] +//Char = 0x9 + 0xA + 0x20..0xd7ff + 0xe000..0xfffd + 0x0000-0x10ffff +Char = 0x9 + 0xA + 0x20..0xff // 8 bit characters only pro tem + + +// White Space +// [3] +SpaceChar = 0x20 + 0x9 + 0xd + 0xa + +S + -> SpaceChar... + + +// Names and Tokens + +// [4] +NameChar = Letter + Digit + '.' + '-' + '_' + ':' + Extender // + CombiningChar + +// [5] +(AgString) Name + -> Letter + '_' + ':':c =AgString().concat(c); + -> Name:s, NameChar:c =s.concat(c); + +// [6] +Names + -> Name + -> Names, S, Name + +// [7] +Nmtoken + -> NameChar... + +// [8] +Nmtokens + -> Nmtoken, [S, Nmtoken]... + + +// Literals + +// [9] +EntityValue + -> '"', [~(eof + '%' + '&' + '"') | PEReference | Reference]..., '"' + -> '\'', [~(eof + '%' + '&' + '\'') | PEReference | Reference]..., '\'' + +// [10] +(AgString) AttValue + -> '"', dq AttValString:s, '"' =s; + -> '\'', sq AttValString:s, '\'' =s; + +(AgString) dq AttValString + -> =AgString(); + -> dq AttValString:s, ~(eof + '%' + '&' + '"' + SpaceChar):c =s.concat(c); + -> dq AttValString:s, SpaceChar =s.concat(' '); + //-> dq AttValString:s, CharRef =s; + -> dq AttValString:s, CharRef:c =s.concat(c); + -> dq AttValString:s, EntityRef =s; + +(AgString) sq AttValString + -> =AgString(); + -> sq AttValString:s, ~(eof + '%' + '&' + '\'' + SpaceChar):c =s.concat(c); + -> sq AttValString:s, SpaceChar =s.concat(' '); + -> sq AttValString:s, CharRef:c =s.concat(c); + -> sq AttValString:s, EntityRef =s; + +// [11] +SystemLiteral + -> '"', ~(eof + '"')?..., '"' + -> '\'', ~(eof + '\'')?..., '\'' + +// [12] +PubidLiteral + -> '"', PubidChar?..., '"' + -> '\'', PubidChar-'\''?..., '\'' + +// [13] +PubidChar = 0x20 + 0xd + 0xa + 'a-z' + 'A-Z' + '0-9' + + '-' + '\'' + '(' + ')' + '+' + ',' + '.' + + '/' + ':' + '?' + ';' + '!' + '*' + '#' + + '@' + '$' + '_' + '%' + + +// Character Data + +// [14] +CharData = ~(eof + '<' + '&') // Note that the iteration is in the usage + // Note that use of keyword "]]>" will take care of CDATA problem + + +// Comments + +// [15] +Comment + -> "<!--", comment text:t, "--", '>' =PCB.tokenType = Pcb::commentType, PCB.commentString = t; + +(AgString) comment text + -> =AgString(); + ->comment text:s, Char:c =s.concat(c); + + +// Processing Instructions + +// [16] +PI + -> "<?", PITarget, [S, [Char-SpaceChar, Char?...]], "?>" + +// [17] +PITarget + -> Name // Name lookup mechanism should reject "xml" + + +// CDATA Sections + +// [18] +CDSect + -> CDStart, CData, CDEnd + +// [19] +CDStart + -> "<![CDATA[" + +// [20] +CData + -> [Char - SpaceChar:c =PCB.textChar(c); | + "\r\n" =PCB.spaceChar('\n'); | + SpaceChar:c =PCB.spaceChar(c);]... + // Keyword recognition logic overrides character recognition + +// [21] +CDEnd + -> "]]>" + + +// Prolog + +// [22] +prolog + -> XMLDecl?, Misc?..., [doctypedecl, Misc?...] + +// [23] +XMLDecl +// -> "<?xml", VersionInfo, EncodingDecl?, SDDecl?, S?, "?>" + -> "<?xml", S, VersionInfo, VersionDecl, "?>" + + VersionDecl + -> S? + -> S, EncodingDecl, S? + -> S, EncodingDecl, S, SDDecl, S? + -> S, SDDecl, S? + + +// [24] +VersionInfo + -> "version", Eq, {'\'', VersionNum, '\'' | '"', VersionNum, '"'} + +// [25] +Eq + -> S?, '=', S? + +// [26] +VersionNum + -> 'a-z' + 'A-Z' + '0-9' + '_' + '.' + ':' + '-'... + +// [27] +Misc +// -> Comment | PI | S + -> Comment | PI | SpaceChar // Avoid double recursion + + +// Document Type Definition + +// [28] +doctypedecl + -> "<!DOCTYPE", S, Name:n, + [S, [ExternalID, S?]], + [ '[', [markupdecl | PEReference | SpaceChar]..., ']', S?], '>' =PCB.dtdPresent=1; + +// [29] +markupdecl + -> elementdecl + -> AttlistDecl + -> EntityDecl + -> NotationDecl + -> PI + -> Comment + + +// External Subset + +// [30] + +extSubset + -> TextDecl?, extSubsetDecl + +// [31] +extSubsetDecl + -> [markupdecl | conditionalSect | PEReference | SpaceChar]... + + +// Standalone Document Declaration + +// [32] +SDDecl + -> "standalone", Eq, "'yes'" | "\"yes\"" | "'no'" | "\"no\"" + + +// Language Identification + +// [33] +LanguageId + -> Langcode, ['-', Subcode]... + +// [34] +Langcode + -> ISO639Code | IanaCode | UserCode + +// [35] +ISO639Code + -> 'a-z' + 'A-Z' -('i' + 'I' + 'x' + 'X'), 'a-z' + 'A-Z' + +// [36] +IanaCode + -> 'i' + 'I', '-', 'a-z' + 'A-Z'... + +// [37] +UserCode + -> 'x' + 'X', '-', 'a-z' + 'A-Z'... + +// [38] +Subcode + -> 'a-z' + 'A-Z'... + + +// Element + +// [39] +element + -> EmptyElementTag + -> STag, content, ETag + + +// Start-Tags, End-Tags, and Empty-Element Tags + +// Start-tag + +// [40] +STag + -> '<', Name:n, AttributeList:l, S?, '>' =PCB.startTag(CONTEXT, n, l); + +(AttvalList) AttributeList + -> =AttvalList(); + -> AttributeList:list, S, Attribute:a =list.push(a); + +// [41] +(AttvalPair) Attribute + -> Name:n, Eq, AttValue:s =AttvalPair(n, s, CONTEXT); + +// End-tag + +// [42] +ETag + -> "</", Name:n, S?, '>' =PCB.endTag(n); + +// Content of Elements + +// [43] +content + -> content unit... + +content unit + -> element + -> CharData - SpaceChar:c =PCB.textChar(c); + -> CharRef:c =PCB.textChar(c); + -> EntityRef + -> SpaceChar:c =PCB.spaceChar(c); + -> "\r\n" =PCB.spaceChar('\n'); + -> CDSect + -> PI + -> Comment + +// Tags for Empty Elements + +// [44] +EmptyElementTag + -> '<', Name:n, AttributeList:l, S?, "/>" =PCB.emptyTag(CONTEXT, n, l); + +// Element Type Declaration + +// [45] +elementdecl + -> "<!ELEMENT", S, Name, S, contentspec, S?, '>' + +// [46] +contentspec + -> "EMPTY" | "ANY" | Mixed | Children + + +// Element-content Models + +// [47] +Children + -> {choice | seq}, ['?' | '*' | '+'] + +// [48] +cp + -> {Name | choice | seq}, ['?' | '*' | '+'] + +// [49] +choice + -> '(', S?, cp, S?, ['|', S?, cp, S?]..., ')' + +// [50] +seq + -> '(', S?, cp, S?, {',', S?, cp, S?}..., ')' + + +// Mixed-content Declaration + +// [51] +Mixed + -> '(', S?, "#PCDATA", ['|', S?, Name | S]/..., ")*" + -> '(', S?, "#PCDATA", S?, ')' + + +// Attribute-list Declaration + +// [52] +AttlistDecl + -> "<!ATTLIST", S, Name:n, AttDefs:l, S?, '>' =PCB.registerAttributes(n, l); + +(AttributeList) AttDefs + -> AttDef:a =AttributeList().push(a); + -> AttDefs:l, AttDef:a =l.push(a); + +// [53] +(Attribute) AttDef + -> S, Name:n, S, AttType:t, S, DefaultDecl:d =Attribute(RULE_CONTEXT[1], n, t, d); + + +// Attribute Types + +// [54] +(Attribute::Type) AttType + -> StringType + -> TokenizedType + -> EnumeratedType + +// [55] +(Attribute::Type) StringType + -> "CDATA" =Attribute::CDATA; + +// [56] +(Attribute::Type) TokenizedType + -> "ID" =Attribute::ID; + -> "IDREF" =Attribute::IDREF; + -> "IDREFS" =Attribute::IDREFS; + -> "ENTITY" =Attribute::ENTITY; + -> "ENTITIES" =Attribute::ENTITIES; + -> "NMTOKEN" =Attribute::NMTOKEN; + -> "NMTOKENS" =Attribute::NMTOKENS; + +// [57] +(Attribute::Type) EnumeratedType + -> NotationType =Attribute::NOTATION; + -> Enumeration =Attribute::ENUMERATION; + +// [58] +NotationType + -> "NOTATION", S, '(', Name, S?, ['|', S?, Name, S?]..., ')' + +// [59] +Enumeration + -> '(', Nmtoken, S?, ['|', S?, Nmtoken, S?]..., ')' + + +// Attribute Defaults + +// [60] +(Default) DefaultDecl + -> "#REQUIRED" =Default(Default::REQUIRED); + -> "#IMPLIED" =Default(); + -> AttValue:v =Default(v); + -> "#FIXED", S, AttValue:v =Default(v,1); + + +// Conditional Section + +// [61] +conditionalSect + -> includeSect | ignoreSect + +// [62] +includeSect + -> "<![", S?, "INCLUDE", S?, '[', extSubsetDecl, "]]>" + +// [63] +ignoreSect + -> "<![", S?, "IGNORE", S?, '[', ignoreSectContents?..., "]]>" + +// [64] +ignoreSectContents + -> Ignore, ["<![", ignoreSectContents, "]]>", Ignore]... + +// [65] +Ignore + -> Char?... + + +//Character Reference + +// [66] +(int) CharRef + //-> "&#", '0-9'..., ';' + //-> "&#x", '0-9' + 'a-f' + 'A-F'..., ';' + //-> decimal CharRef:c, ';' =PCB.currentCharRef = c, PCB.tokenType = Pcb::charRefType; + -> decimal CharRef, ';' + //-> hex CharRef:c, ';' =PCB.currentCharRef = c, PCB.tokenType = Pcb::charRefType; + -> hex CharRef, ';' + +(int) decimal CharRef + -> "&#", '0-9':d =d-'0'; + -> decimal CharRef:n, '0-9':d =10*n + d-'0'; + +(int) hex CharRef + -> "&#x", '0-9':d =d-'0'; + -> hex CharRef:n, '0-9':d =16*n + d; + -> hex CharRef:n, 'a-f' + 'A-F':d =16*n + (d&7) + 9; + +//Entity Reference + +// [67] +(int) Reference + -> EntityRef | CharRef + +// [68] +(int) EntityRef + -> '&', Name:s, ';' =PCB.currentEntityName = s, PCB.tokenType = Pcb::entityRefType; + +// [69] +PEReference + -> '%', Name, ';' + + +// Entity Declaration + +// [70] +EntityDecl + -> GEDecl | PEDecl + +// [71] +GEDecl + -> "<!ENTITY", S, Name, S, EntityDef, '>' + +// [72] +PEDecl + -> "<!ENTITY", S, '%', S, Name, S, PEDef, S?, '>' + +// [73] +EntityDef + -> EntityValue, S? | ExternalID, [NDataDecl | S] + +// [74] +PEDef + -> EntityValue | ExternalID + + +// External Entity Declaration + +// [75] +ExternalID + -> "SYSTEM", S, SystemLiteral + -> "PUBLIC", S, PubidLiteral, S, SystemLiteral + +// [76] +NDataDecl + -> S, "NDATA", S, Name + + +// Text Declaration + +// [77] +TextDecl + -> "<?xml", S, [VersionInfo, S], EncodingDecl, S?, "?>" + + +// Well-Formed External Parsed Entity + +// [78] +extParsedEnt + -> TextDecl?, content + +// [79] +extPE + -> TextDecl?, extSubsetDecl + + +// Encoding Declaration + +// [80] +EncodingDecl + -> "encoding", Eq, {'"', EncName, '"' | '\'', EncName, '\''} + +// [81] +EncName + -> 'a-z' + 'A-Z', ['a-z'+'A-Z'+'0-9'+'_' | '-']... + + +// Notation Declarations + +// [82] +NotationDecl + -> "<!NOTATION", S, Name, S, {ExternalID, S? | PublicID}, '>' + +// [83] +PublicID + -> "PUBLIC", S, PubidLiteral, S? + + +// Characters + +// [84] +Letter = BaseChar // | Ideographic // No ideographs for now + +// [85] +BaseChar = // only 8 bit characters for now + 0x41..0x5a + + 0x61..0x7a + + 0xc0..0xd6 + + 0xd8..0xf6 + + 0xf8..0xff + +// [86] +// Ideographic = + + +// [87] +// CombiningChar = + +// [88] +Digit = 0x30..0x39 + +// [89] +Extender = 0xb7 + +{ + #define GET_CONTEXT CONTEXT = Context(PCB.line, PCB.column) + + typedef parse_pcb_struct Pcb; + + parse_pcb_struct::parse_pcb_struct() + : tokenType(Pcb::none), dtdPresent(0) + { + // Nothing to do + } + + void parse_pcb_struct::spaceChar(int c) { + if (c == '\r') c = '\n'; + if (textStack.size()) tokenType = textType; + spaceStack.concat(c); + } + + void parse_pcb_struct::textChar(int c) { + if (spaceStack.size()) tokenType=spaceType; + textStack.concat((char)c); + } + + void parse_pcb_struct::normalize(AgString &s) { + char *readPointer = s; + char *writePointer = readPointer; + if (readPointer == NULL) return; + while (*readPointer && strchr("\t\r\n ",*readPointer)) readPointer++; + while (*readPointer) { + while (*readPointer && strchr("\t\r\n ",*readPointer) == NULL) *writePointer++ = *readPointer++; + while (*readPointer && strchr("\t\r\n ",*readPointer)) readPointer++; + if (*readPointer) *writePointer++ = ' '; + } + *writePointer = 0; + } + + void parse_pcb_struct::checkAttributes(const Context &context, const AgString &name, Symtab &map) { + if (!dtdPresent) return; + Element &element = elementMap[name]; + AgBaseStack<Attribute> &attributeList = element.attributeList; + int i; + for (i = 0; i < attributeList.size(); i++) { + Attribute &attribute = attributeList[i]; + switch (attribute.def.presence) { + case Default::REQUIRED: { + const char *value = map.findValue(attribute.name); + if (value) continue; + map[attribute.name] = "VALUE NOT SPECIFIED"; + char buf[200]; + sprintf(buf, "Required attribute '%s' is missing", (const char *)attribute.name); + warningList.push(Warning(context, buf)); + break; + } + case Default::DEFAULT: { + const char *value = map[attribute.name]; + if (value == 0) value = attribute.def.value; + if (attribute.def.fixed && attribute.def.value == value) continue; + char buf[200]; + sprintf(buf, "Value of fixed attribute '%s' does not match default value ", + (const char *) attribute.name); + warningList.push(Warning(context, buf)); + break; + } + } + } + for (i = 0; i < attributeList.size(); i++) { + Attribute &attribute = attributeList[i]; + if (attribute.type != Attribute::CDATA) normalize(map[attribute.name]); + } + } + + int parse_pcb_struct::warnAttval(const Context &context, const char *value, const AttvalPair &p) { + if (value == NULL) return 0; + char buf[200]; + sprintf(buf, "Value of '%s' ignored. Previously specified as '%s'", + (const char *) p.name, value); + warningList.push(Warning(context, buf)); + return 1; + } + + void parse_pcb_struct::startTag(const Context &context, const AgString &name, AttvalList &list) { + Symtab map; + for (int i = 0; i < list.size(); i++) { + AgString &value = map[list[i].name]; + if (warnAttval(context, value, list[i])) continue; + value = list[i].value; + } + currentTagName = name; + checkAttributes(context, name, map); + currentMap = map; + tagStack.push(name); + mapStack.push(map); + tokenType = startType; + } + + void parse_pcb_struct::emptyTag(const Context &context, const AgString &name, AttvalList &list) { + Symtab map; + for (int i = 0; i < list.size(); i++) { + AgString &value = map[list[i].name]; + if (warnAttval(context, value, list[i])) continue; + map[list[i].name] = list[i].value; + } + checkAttributes(context, name, map); + currentTagName = name; + currentMap = map; + tokenType = emptyType; + } + + void parse_pcb_struct::endTag(const AgString &name) { + currentTagName = name; + currentMap = mapStack.pop(); + if (name != tagStack.pop()) { + error_message = "Element nesting error"; + exit_flag = AG_SEMANTIC_ERROR_CODE; + tokenType = errorType; + return; + } + tokenType = endType; + } + void parse_pcb_struct::registerAttributes(const AgString &name, const AgBaseStack<Attribute> &attributeList) { + Element &element = elementMap[name]; + if (element.name.size() == 0) element.name = name; + int i; + for (i = 0; i < attributeList.size(); i++) { + AgString attributeName = attributeList[i].name; + int &index = element.attributeIndex[attributeName]; + if (index != -1) { + Attribute &attribute = element.attributeList[index]; + Context previous = attribute.context; + char buf[200]; + sprintf(buf, "Redefinition of '%s::%s' ignored. Defined at line %d, column %d", + (const char *) name, + (const char *) attributeName, previous.line, previous.column); + warningList.push(Warning(attributeList[i].context, buf)); + continue; + } + index = element.attributeList.size(); + element.attributeList.push(attributeList[i]); + } + } +}