view tests/agcl/parsifal/xml1.syn @ 24:a4899cdfc2d6 default tip

Obfuscate the regexps to strip off the IBM compiler's copyright banners. I don't want bots scanning github to think they're real copyright notices because that could cause real problems.
author David A. Holland
date Mon, 13 Jun 2022 00:40:23 -0400
parents 13d2b8934445
children
line wrap: on
line source

// [1]
document $
  -> prolog, element, Misc?...

// [2]
Char = 0x9 + 0xA + 0x20..0xd7ff + 0xe000..0xfffd + 0x0000-0x10ffff

// [3]
S
 -> 0x20 + 0x9 + 0xd + 0xa...

// [4]
NameChar = Letter + Digit + '.' + '-' + ':' + CombiningChar + Extender

// [5]
Name
 -> Letter + '_' + ':', NameChar?...

// [6]
Names
 -> Name, [S, Name]...

// [7]
Nmtoken
 -> NameChar...

// [8]
Nmtokens
 -> Nmtoken, [S, Nmtoken]...

// [9]
EntityValue
 -> '"', [~(eof + '%' + '&' + '"') | PEReference | Reference]..., '"'
 -> '\'', [~(eof + '%' + '&' + '\'') | PEReference | Reference]..., '\''

// [10]
AttValue
 -> '"', [~(eof + '%' + '&' + '"') | Reference]..., '"'
 -> '\'', [~(eof + '%' + '&' + '\'') | Reference]..., '\''

// [11]
SystemLiteral
 -> '"', ~(eof + '"')?..., '"'
 -> '\'', ~(eof + '\'')?..., '\''

// [12]
PubidLiteral
 -> '"', PubidChar?..., '"'
 -> '\'', PubidChar?..., '\''

// [13]
PubidChar = 0x20 + 0xd + 0xa + 'a-z' + 'A-Z' + '0-9' +
            '-' + '\'' + '(' + ')' + '+' + ',' + '.' +
            '/' + ':' + '?' + ';' + '!' + '*' + '#' +
            '@' + '$' + '_' + '%'

// [14]
CharData
 -> ~(eof + '<' + '&')?...
  // Note that use of keyword "]]>" will take care of CDATA problem

// [15]
Comment
 -> "<!--", Char?..., "--", '>'

// [16]
PI
 -> "<?", PITarget, [S, char?...], "?>"

// [17]
PITarget
 -> Name
 // Name lookup mechanism should reject "xml"

// [18]
CDSect
 -> CDStart, CData, CDEnd

// [19]
CDStart
 -> "<![CDATA["

// [20]
CData
 -> Char?...
 // Keyword recognition logic overrides character recognition

// [21]
 -> "]]>"

// [22]
prolog
 -> XMLDecl?, Misc?..., [doctypedecl, Misc?...]

// [23]
XMLDecl
 -> "<?xml", VersionInfo, EncodingDecl?, SDDecl?, S?, "?>"

// [24]
VersionInfo
 -> S, "version", Eq, {'\'', VersionNum, '\'' | '"', VersionNum, '"'}

// [25]
Eq
 -> S?, '=', S?

// [26]
VersionNum
 -> 'a-z' + 'A-Z' + '0-9' + '_' + '.' + ':' + '-'...

// [27]
Misc
 -> Comment | PI | S

// [28]
doctypedecl
 -> "<!DOCTYPE", S, Name, [S, ExternalID], S?,
    [ '[', [markupdecl | PEReference | S]..., ']', S?], '>'

// [29]
markupdecl
 -> elementdecl
 -> AttlistDecl
 -> EntityDecl
 -> NotationDecl
 -> PI
 -> Comment