view tests/agcl/parsifal/xml2.syn @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.
author David A. Holland
date Sat, 22 Dec 2007 17:52:45 -0500
parents
children
line wrap: on
line source

// Document

// [1]
document $
  -> prolog, element, Misc?...


// Character Range

// [2]
//Char = 0x9 + 0xA + 0x20..0xd7ff + 0xe000..0xfffd + 0x0000-0x10ffff
Char = 0x9 + 0xA + 0x20..0xff          // 8 bit characters only pro tem


// White Space

// [3]
SpaceChar
 -> 0x20 + 0x9 + 0xd + 0xa
S
 -> SpaceChar...


// Names and Tokens

// [4]
NameChar = Letter + Digit + '.' + '-' + ':' + CombiningChar + Extender

// [5]
Name
 -> Letter + '_' + ':', NameChar?...

// [6]
Names
 -> Name, [S, Name]...

// [7]
Nmtoken
 -> NameChar...

// [8]
Nmtokens
 -> Nmtoken, [S, Nmtoken]...


// Literals

// [9]
EntityValue
 -> '"', [~(eof + '%' + '&' + '"') | PEReference | Reference]..., '"'
 -> '\'', [~(eof + '%' + '&' + '\'') | PEReference | Reference]..., '\''

// [10]
AttValue
 -> '"', [~(eof + '%' + '&' + '"') | Reference]..., '"'
 -> '\'', [~(eof + '%' + '&' + '\'') | Reference]..., '\''

// [11]
SystemLiteral
 -> '"', ~(eof + '"')?..., '"'
 -> '\'', ~(eof + '\'')?..., '\''

// [12]
PubidLiteral
 -> '"', PubidChar?..., '"'
 -> '\'', PubidChar?..., '\''

// [13]
PubidChar = 0x20 + 0xd + 0xa + 'a-z' + 'A-Z' + '0-9' +
            '-' + '\'' + '(' + ')' + '+' + ',' + '.' +
            '/' + ':' + '?' + ';' + '!' + '*' + '#' +
            '@' + '$' + '_' + '%'


// Character Data

// [14]
CharData
 -> ~(eof + '<' + '&')?...
  // Note that use of keyword "]]>" will take care of CDATA problem


// Comments

// [15]
Comment
 -> "<!--", Char?..., "--", '>'


// Processing Instructions

// [16]
PI
 -> "<?", PITarget, [S, char?...], "?>"

// [17]
PITarget
 -> Name                    // Name lookup mechanism should reject "xml"


// CDATA Sections

// [18]
CDSect
 -> CDStart, CData, CDEnd

// [19]
CDStart
 -> "<![CDATA["

// [20]
CData
 -> Char?...
 // Keyword recognition logic overrides character recognition

// [21]
 -> "]]>"


// Prolog

// [22]
prolog
 -> XMLDecl?, Misc?..., [doctypedecl, Misc?...]

// [23]
XMLDecl
// -> "<?xml", VersionInfo, EncodingDecl?, SDDecl?, S?, "?>"
 -> "<?xml", VersionInfo, EncodingDecl?, [SDDecl, S? | S], "?>"

// [24]
VersionInfo
 -> S, "version", Eq, {'\'', VersionNum, '\'' | '"', VersionNum, '"'}

// [25]
Eq
 -> S?, '=', S?

// [26]
VersionNum
 -> 'a-z' + 'A-Z' + '0-9' + '_' + '.' + ':' + '-'...

// [27]
Misc
// -> Comment | PI | S
 -> Comment | PI | SpaceChar                    // Avoid double recursion


// Document Type Definition

// [28]
doctypedecl
// -> "<!DOCTYPE", S, Name, [S, ExternalID], S?, [ '[', [markupdecl | PEReference | S]..., ']', S?], '>'
// -> "<!DOCTYPE", S, Name, [S, ExternalID], S?, [ '[', [markupdecl | PEReference | SpaceChar]..., ']', S?], '>'
 -> "<!DOCTYPE", S, Name, [S, [ExternalID, S?]], [ '[', [markupdecl | PEReference | SpaceChar]..., ']', S?], '>'

// [29]
markupdecl
 -> elementdecl
 -> AttlistDecl
 -> EntityDecl
 -> NotationDecl
 -> PI
 -> Comment


// External Subset

// [30]

extSubset
 -> TextDecl?, extSubsetDecl

// [31]
extSubsetDecl
 -> [markupdecl | conditionalSect | PEReference | SpaceChar]...


// Standalone Document Declaration

// [32]
SDDecl
 -> S, "standalone", Eq, "'yes'" | "\"yes\"" | "'no'" | "\"no\""


// Language Identification

// [33]
Language Id
 -> Langcode, ['-', Subcode]...

// [34]
Langcode
 -> ISO639Code | IanaCode | UserCode

// [35]
ISO639Code
 -> 'a-z' + 'A-Z' -('i' + 'I' + 'x' + 'X'), 'a-z' + 'A-Z'

// [36]
IanaCode
 -> 'i' + 'I', '-', 'a-z' + 'A-Z'...

// [37]
UserCode
 -> 'x' + 'X', '-', 'a-z' + 'A-Z'...

// [38]
Subcode
 -> 'a-z' + 'A-Z'...


// Element

// [39]
element
 -> EmptyElemTag | STag, content, ETag


// Start-Tags, End-Tags, and Empty-Element Tags

// Start-tag

// [40]
STag
 -> '<', Name, [{S, Attribute}... | S], '>'

// [41]
Attribute
 -> Name, Eq, AttValue

// End-tag

// [42]
ETag
 -> "</", Name, S?, '>'

// Content of Elements

// [43]

content
 -> [element | CharData | Reference | CDSect | PI | Comment]...