view tests/agcl/parsifal/rfc822a.syn @ 24:a4899cdfc2d6 default tip

Obfuscate the regexps to strip off the IBM compiler's copyright banners. I don't want bots scanning github to think they're real copyright notices because that could cause real problems.
author David A. Holland
date Mon, 13 Jun 2022 00:40:23 -0400
parents 13d2b8934445
children
line wrap: on
line source

/*
 Transcription of RFC822 grammar to AnaGram Syntax
 Transcription copyright (c) Parsifal Software, 1997. All Rights Reserved.
 See the file COPYING for license and usage terms.

 Grammar taken from RFC 822

 The rules as given in RFC 822 have been retained as comments.
 In a few cases obvious errors or questionable syntax was observed.
 These are noted in // comments
*/

{
#define GET_CONTEXT CONTEXT = PCB.pointer
}
[
  pointer input
  ~case sensitive
  grammar token = message
  context type = unsigned char *
  eof token = CRLF
  sticky {atom, word, {linear_space}..., number}
]



{
	/**************************************************************
	* Input function to the parsing engine.
	*
	* It takse a pointer to a string and a ponter to a Request Object
	* that will be populated
	***************************************************************/
/*INT rfc822_parser(char *buffer, CRfc822Req  *Request)
	{
		PCB.pointer = (unsigned char *)buffer;
		pRequest = Request;
		rfc822parse();
		return PCB.exit_flag;

	}*/
}




/***************************************************************************
*     address     =  mailbox                      ; one addressee
*                 /  group                        ; named list
****************************************************************************/
 address
  -> mailbox
  -> group


/***************************************************************************
*     addr-spec   =  local-part "@" domain        ; global address
****************************************************************************/
addr_spec
 -> local_part,'@',domain


/***************************************************************************
*     ALPHA       =  <any ASCII alphabetic character>
*                                                 ; (101-132, 65.- 90.)
*                                                 ; (141-172, 97.-122.)
****************************************************************************/
ALPHA = 'a-z' + 'A-Z'


/***************************************************************************
*    atom        =  1*<any CHAR except specials, SPACE and CTLs>
****************************************************************************/
atom
 -> ATOM_CHAR
 -> atom, ATOM_CHAR

ATOM_CHAR = CHAR - atom_specials

atom_specials = SPACE + CTL + specials

/***************************************************************************
*     authentic   =   "From"       ":"   mailbox  ; Single author
*                 / ( "Sender"     ":"   mailbox  ; Actual submittor
*                     "From"       ":" 1#mailbox) ; Multiple authors
*                                                 ;  or not sender
****************************************************************************/
authentic
 -> "From",':',mailbox
 -> '(',"Sender",':',mailbox, SPACE,"From",':',multiple_mailboxes,')'

multiple_mailboxes
 -> mailbox
 -> multiple_mailboxes, SPACE?, mailbox


/***************************************************************************
*     CHAR        =  <any ASCII character>        ; (  0-177,  0.-127.)
****************************************************************************/
CHAR = 0x00..0x7F


/***************************************************************************
*     comment     =  "(" *(ctext / quoted-pair / comment) ")"
****************************************************************************/
comment
 -> '(', comment_choice,')'

comment_choice
 -> ctext
 -> quoted_pair
 -> comment


/***************************************************************************
*     CR          =  <ASCII CR, carriage return>  ; (     15,      13.)
****************************************************************************/
CR = 0x0D


/***************************************************************************
*     CRLF        =  CR LF
****************************************************************************/
CRLF
 -> "\r\n"          //CR,LF


/***************************************************************************
*     ctext       =  <any CHAR excluding "(",     ; => may be folded
*                     ")", "\" & CR, & including
*                     linear-white-space>
****************************************************************************/
ctext
 -> CHAR - ctext_specials
 ->linear_white_space

ctext_specials = '(' + ')' + '\\' + CR


/***************************************************************************
*     CTL         =  <any ASCII control           ; (  0- 37,  0.- 31.)
*                     character and DEL>          ; (    177,     127.)
****************************************************************************/
CTL = 0x00..0x1F + 0x7F



/***************************************************************************
*     date        =  1*2DIGIT month 2DIGIT        ; day month year
*                                                 ;  e.g. 20 Jun 82
****************************************************************************/
date
 -> day_numb,SPACE, month,SPACE, year

day_numb
 -> number

year
 -> number

number
 -> DIGIT
 -> number, DIGIT

/***************************************************************************
*     dates       =   orig-date                   ; Original
*                   [ resent-date ]               ; Forwarded
****************************************************************************/
dates
 -> orig_date
 -> orig_date, SPACE, resent_date


/***************************************************************************
*     date-time   =  [ day "," ] date time        ; dd mm yy
*                                                 ;  hh:mm:ss zzz
****************************************************************************/
date_time
 -> [ day, ','] , date, time


/***************************************************************************
*     day         =  "Mon"  / "Tue" /  "Wed"  / "Thu"
*                 /  "Fri"  / "Sat" /  "Sun"
****************************************************************************/
day
 -> "Mon"  | "Tue" |  "Wed"  | "Thu" |  "Fri"  | "Sat" |  "Sun"



/***************************************************************************
*     delimiters  =  specials / linear-white-space / comment
****************************************************************************/
delimeters
 -> specials
 -> linear_white_space
 -> comment

/***************************************************************************
*     destination =  "To"          ":" 1#address  ; Primary
*                 /  "Resent-To"   ":" 1#address
*                 /  "cc"          ":" 1#address  ; Secondary
*                 /  "Resent-cc"   ":" 1#address
*                 /  "bcc"         ":"  #address  ; Blind carbon
*                 /  "Resent-bcc"  ":"  #address
****************************************************************************/
destination
 -> "To",':',addresses
 -> "Resent-To",':',addresses
 -> "cc",':',addresses
 -> "Recent-cc",':',addresses
 -> "bcc",':', address?
 -> "Resent-bcc",':', address?

addresses
 -> address
 -> addresses,SPACE,address

/***************************************************************************
*     DIGIT       =  <any ASCII decimal digit>    ; ( 60- 71, 48.- 57.)
****************************************************************************/
DIGIT = '0-9'

/***************************************************************************
*     domain      =  sub-domain *("." sub-domain)
****************************************************************************/
domain
 -> multiple_sub_domains

multiple_sub_domains
 -> sub_domain
 -> multiple_sub_domains,'.',sub_domain

/***************************************************************************
*     domain-literal =  "[" *(dtext / quoted-pair) "]"
****************************************************************************/
domain_literal
 -> '[', literal_fill..., ']'

literal_fill
 -> dtext
 -> quoted_pair



/***************************************************************************
*     domain-ref  =  atom                         ; symbolic reference
****************************************************************************/
domain_ref
 -> atom


/***************************************************************************
*     dtext       =  <any CHAR excluding "[",     ; => may be folded
*                     "]", "\" & CR, & including
*                     linear-white-space>
****************************************************************************/
dtext
 -> CHAR  - dtext_specials
 -> linear_white_space

//Added SPACE and TAB otherwise grammer would be ambiguous since they also appear in
//definition of liner_white_space

dtext_specials = '[' + ']' + '\\'+ CR + SPACE + HTAB

/***************************************************************************
*     extension-field =
*                   <Any field which is defined in a document
*                    published as a formal extension to this
*                    specification; none will have names beginning
*                    with the string "X-">
****************************************************************************/




/***************************************************************************
*     field       =  field-name ":" [ field-body ] CRLF
****************************************************************************/
field
 -> field_name, ':',[field_body],CRLF


/***************************************************************************
*     fields      =    dates                      ; Creation time,
*                      source                     ;  author id & one
*                    1*destination                ;  address required
*                     *optional-field             ;  others optional
****************************************************************************/
fields
 -> fields_makeup
 -> fields, fields_makeup

fields_makeup
 -> dates
 -> source
 -> destination
 -> optional_field


/***************************************************************************
*     field-body  =  field-body-contents
*                    [CRLF LWSP-char field-body]
****************************************************************************/
field_body
 -> field_body_contents, [CRLF,LWSP_char,field_body]


/***************************************************************************
*     field-body-contents =
*                   <the ASCII characters making up the field-body, as
*                    defined in the following sections, and consisting
*                    of combinations of atom, quoted-string, and
*                    specials tokens, or else consisting of texts>
****************************************************************************/
field_body_contents
 -> CHAR...

/***************************************************************************
*     field-name  =  1*<any CHAR, excluding CTLs, SPACE, and ":">
****************************************************************************/
field_name
 ->  (0x21..0x7E) - 0x3A


/***************************************************************************
*     group       =  phrase ":" [#mailbox] ";"
****************************************************************************/
group
 -> "phrase", ':', multiple_mailboxes,';'



/***************************************************************************
*     hour        =  2DIGIT ":" 2DIGIT [":" 2DIGIT]
*                                                 ; 00:00:00 - 23:59:59
****************************************************************************/
hour
 -> DIGIT,DIGIT,':',DIGIT,DIGIT,[':',DIGIT,DIGIT]

/***************************************************************************
*     HTAB        =  <ASCII HT, horizontal-tab>   ; (     11,       9.)
****************************************************************************/
HTAB = 0x09

/***************************************************************************
*     LF          =  <ASCII LF, linefeed>         ; (     12,      10.)
****************************************************************************/
LF = 0x0A

/***************************************************************************
*     linear-white-space =  1*([CRLF] LWSP-char)  ; semantics = SPACE
*                                                 ; CRLF => folding
****************************************************************************/
linear_white_space
 -> {linear_space}...

linear_space
 -> CRLF,LWSP_char
 -> LWSP_char

/***************************************************************************
*     local-part  =  word *("." word)             ; uninterpreted
*                                                 ; case-preserved
****************************************************************************/
local_part
 -> word,SPACE,{'.',word}...


/***************************************************************************
*     LWSP-char   =  SPACE / HTAB                 ; semantics = SPACE
****************************************************************************/
LWSP_char = SPACE + HTAB


/***************************************************************************
*     mailbox     =  addr-spec                    ; simple address
*                 /  phrase route-addr            ; name & addr-spec
****************************************************************************/
mailbox
 -> addr_spec
 -> "phrase",SPACE,route_addr


/***************************************************************************
*     message     =  fields *( CRLF *text )       ; Everything after
*                                                 ;  first null line
*                                                 ;  is message body
****************************************************************************/
message
 -> fields, {CRLF, text?}...


/***************************************************************************
*     month       =  "Jan"  /  "Feb" /  "Mar"  /  "Apr"
*                 /  "May"  /  "Jun" /  "Jul"  /  "Aug"
*                 /  "Sep"  /  "Oct" /  "Nov"  /  "Dec"
****************************************************************************/
month
 -> "Jan"
 -> "Feb"
 -> "Mar"
 -> "Apr"
 -> "May"
 -> "Jun"
 -> "Jul"
 -> "Aug"
 -> "Sep"
 -> "Oct"
 -> "Nov"
 -> "Dec"




/***************************************************************************
*     msg-id      =  "<" addr-spec ">"            ; Unique message id
****************************************************************************/
msg_id
 -> '<',addr_spec,'>'



/***************************************************************************
*     optional-field =
*                 /  "Message-ID"        ":"   msg-id
*                 /  "Resent-Message-ID" ":"   msg-id
*                 /  "In-Reply-To"       ":"  *(phrase / msg-id)
*                 /  "References"        ":"  *(phrase / msg-id)
*                 /  "Keywords"          ":"  #phrase
*                 /  "Subject"           ":"  *text
*                 /  "Comments"          ":"  *text
*                 /  "Encrypted"         ":" 1#2word
*                 /  extension-field              ; To be defined
*                 /  user-defined-field           ; May be pre-empted
****************************************************************************/
optional_field
 -> optional_fields

optional_fields
 -> "Message-ID" , ':',  msg_id


/***************************************************************************
*     orig-date   =  "Date"        ":"   date-time
****************************************************************************/
orig_date
 -> "Date", ':', date_time


/***************************************************************************
*     originator  =   authentic                   ; authenticated addr
*                   [ "Reply-To"   ":" 1#address] )
****************************************************************************/
originator
 -> authentic, ["Reply-To", ':', addresses]

/***************************************************************************
*     phrase      =  1*word                       ; Sequence of words
****************************************************************************/
phrase
 -> word...



/***************************************************************************
*     qtext       =  <any CHAR excepting <">,     ; => may be folded
*                     "\" & CR, and including
*                     linear-white-space>
****************************************************************************/
qtext
 -> CHAR - qtext_specials
 -> linear_white_space

//Added SPACE and HTAB to reduce an ambiguous grammer conflict

qtext_specials = '"' + '\\' + CR + SPACE + HTAB


/***************************************************************************
*     quoted-pair =  "\" CHAR                     ; may quote any char
****************************************************************************/
quoted_pair
 -> '\\',CHAR

/***************************************************************************
*     quoted-string = <"> *(qtext/quoted-pair) <">; Regular qtext or
*                                                 ;   quoted chars.
****************************************************************************/
quoted_string
 -> '"', QUOTED?...,'"'

QUOTED
 -> qtext
 -> quoted_pair



/***************************************************************************
*     received    =  "Received"    ":"            ; one per relay
*                       ["from" domain]           ; sending host
*                       ["by"   domain]           ; receiving host
*                       ["via"  atom]             ; physical path
*                      *("with" atom)             ; link/mail protocol
*                       ["id"   msg-id]           ; receiver msg id
*                       ["for"  addr-spec]        ; initial form
*                        ";"    date-time         ; time received
****************************************************************************/
received
-> "Received",':',received_fields..., ';',rec_date_time

received_fields
 -> from,from_domain
 -> "by",by_domain
 -> "via",via_atom
 -> "with",with_atom
 -> "id",rec_msg_id
 -> "for",for_addr_spec


from
 -> "from"

from_domain
 -> domain

by_domain
 -> domain

via_atom
 -> atom

with_atom
 -> atom

rec_msg_id
 ->msg_id

for_addr_spec
 -> addr_spec

rec_date_time
 -> date_time

/***************************************************************************
*     resent      =   resent-authentic
*                   [ "Resent-Reply-To"  ":" 1#address] )
****************************************************************************/
resent
 -> resent_authentic,["Resent-Reply-To",  ':', addresses]


/***************************************************************************
*     resent-authentic =
*                 =   "Resent-From"      ":"   mailbox
*                 / ( "Resent-Sender"    ":"   mailbox
*                     "Resent-From"      ":" 1#mailbox  )
****************************************************************************/
resent_authentic
 -> "Resent-Sender",':',mailbox
 -> "Resent-From",':',multiple_mailboxes

/***************************************************************************
*     resent-date =  "Resent-Date" ":"   date-time
****************************************************************************/
resent_date
 -> "Resent-Date",':',date_time


/***************************************************************************
*     return      =  "Return-path" ":" route-addr ; return address
****************************************************************************/
return
 -> "Return-path",':',route_addr

/***************************************************************************
*     route       =  1#("@" domain) ":"           ; path-relative
****************************************************************************/
route
 -> route_list,':'

sub_route
 -> '@',domain

route_list
 -> sub_route
 -> route_list, sub_route



/***************************************************************************
*     route-addr  =  "<" [route] addr-spec ">"
****************************************************************************/
route_addr
 -> '<',[route],addr_spec, '>'


/***************************************************************************
*     source      = [  trace ]                    ; net traversals
*                      originator                 ; original mail
*                   [  resent ]                   ; forwarded
****************************************************************************/
source
 -> trace?,originator,resent?


/***************************************************************************
*     SPACE       =  <ASCII SP, space>            ; (     40,      32.)
****************************************************************************/
SPACE = 0x20


/***************************************************************************
*     specials    =  "(" / ")" / "<" / ">" / "@"  ; Must be in quoted-
*                 /  "," / ";" / ":" / "\" / <">  ;  string, to use
*                 /  "." / "[" / "]"              ;  within a word.
****************************************************************************/
specials = '(' + ')' + '<' + '>' + '@' + ',' + ';' + ':' + '\\' + '"' + '.' + '[' + ']'


/***************************************************************************
*     sub-domain  =  domain-ref / domain-literal
****************************************************************************/
sub_domain
 -> domain_ref
 -> domain_literal

/***************************************************************************
*     text        =  <any CHAR, including bare    ; => atoms, specials,
*                     CR & bare LF, but NOT       ;  comments and
*                     including CRLF>             ;  quoted-strings are
*                                                 ;  NOT recognized.
****************************************************************************/
text
 -> text_char

text_char
 -> 0x00..0x7F


/***************************************************************************
*     time        =  hour zone                    ; ANSI and Military
****************************************************************************/
time
 -> hour, SPACE, zone

/***************************************************************************
*     trace       =    return                     ; path to sender
*                    1*received                   ; receipt tags
****************************************************************************/
trace
 -> return,SPACE,received...

/***************************************************************************
     user-defined-field =
                   <Any field which has not been defined
                    in this specification or published as an
                    extension to this specification; names for
                    such fields must be unique and may be
                    pre-empted by published extensions>
****************************************************************************/


/***************************************************************************
*     word        =  atom / quoted-string
****************************************************************************/
word
 -> atom
 -> quoted_string


/***************************************************************************
*     zone        =  "UT"  / "GMT"                ; Universal Time
*                                                 ; North American : UT
*                 /  "EST" / "EDT"                ;  Eastern:  - 5/ - 4
*                 /  "CST" / "CDT"                ;  Central:  - 6/ - 5
*                 /  "MST" / "MDT"                ;  Mountain: - 7/ - 6
*                 /  "PST" / "PDT"                ;  Pacific:  - 8/ - 7
*                 /  1ALPHA                       ; Military: Z = UT;
*     <">         =  <ASCII quote mark>           ; (     42,      34.)
****************************************************************************/
zone
 -> "UT"
 -> "GMT"
 -> "EST"
 -> "EDT"
 -> "CST"
 -> "CDT"
 -> "MST"
 -> "MDT"
 -> "PST"
 -> "PDT"
 -> ALPHA
 -> 0x22