/* * RCS file input */ #ifndef lint static char rcsid[]= "$Id: rcslex.c,v 1.1.1.1 1993/03/21 09:45:37 cgd Exp $ Purdue CS"; #endif /********************************************************************************* * Lexical Analysis. * Character mapping table, * hashtable, Lexinit, nextlex, getlex, getkey, * getid, getnum, readstring, printstring, savestring, * checkid, serror, fatserror, error, faterror, warn, diagnose * fflsbuf, puts, fprintf * Testprogram: define LEXDB ********************************************************************************* */ /* Copyright (C) 1982, 1988, 1989 Walter Tichy * All rights reserved. * * Redistribution and use in source and binary forms are permitted * provided that the above copyright notice and this paragraph are * duplicated in all such forms and that any documentation, * advertising materials, and other materials related to such * distribution and use acknowledge that the software was developed * by Walter Tichy. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * * Report all problems and direct all questions to: * rcs-bugs@cs.purdue.edu * */ /* $Log: rcslex.c,v $ /* Revision 1.1.1.1 1993/03/21 09:45:37 cgd /* initial import of 386bsd-0.1 sources /* * Revision 4.6 89/05/01 15:13:07 narten * changed copyright header to reflect current distribution rules * * Revision 4.5 88/11/08 12:00:54 narten * changes from eggert@sm.unisys.com (Paul Eggert) * * Revision 4.5 88/08/28 15:01:12 eggert * Don't loop when writing error messages to a full filesystem. * Flush stderr/stdout when mixing output. * Yield exit status compatible with diff(1). * Shrink stdio code size; allow cc -R; remove lint. * * Revision 4.4 87/12/18 11:44:47 narten * fixed to use "varargs" in "fprintf"; this is required if it is to * work on a SPARC machine such as a Sun-4 * * Revision 4.3 87/10/18 10:37:18 narten * Updating version numbers. Changes relative to 1.1 actually relative * to version 4.1 * * Revision 1.3 87/09/24 14:00:17 narten * Sources now pass through lint (if you ignore printf/sprintf/fprintf * warnings) * * Revision 1.2 87/03/27 14:22:33 jenkins * Port to suns * * Revision 1.1 84/01/23 14:50:33 kcs * Initial revision * * Revision 4.1 83/03/25 18:12:51 wft * Only changed $Header to $Id. * * Revision 3.3 82/12/10 16:22:37 wft * Improved error messages, changed exit status on error to 1. * * Revision 3.2 82/11/28 21:27:10 wft * Renamed ctab to map and included EOFILE; ctab is now a macro in rcsbase.h. * Added fflsbuf(), fputs(), and fprintf(), which abort the RCS operations * properly in case there is an IO-error (e.g., file system full). * * Revision 3.1 82/10/11 19:43:56 wft * removed unused label out:; * made sure all calls to getc() return into an integer, not a char. */ /* #define LEXDB /* version LEXDB is for testing the lexical analyzer. The testprogram * reads a stream of lexemes, enters the revision numbers into the * hashtable, and prints the recognized tokens. Keywords are recognized * as identifiers. */ #include "rcsbase.h" #if __STDC__ #include #else #include #endif /* character mapping table */ enum tokens map[] = { EOFILE, /* this will end up at ctab[-1] */ UNKN, INSERT, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, SPACE, NEWLN, UNKN, SPACE, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, SPACE, EXCLA, DQUOTE, HASH, DOLLAR, PERCNT, AMPER, SQUOTE, LPARN, RPARN, TIMES, PLUS, COMMA, MINUS, PERIOD, DIVIDE, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, COLON, SEMI, LESS, EQUAL, GREAT, QUEST, AT, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LBRACK, BACKSL, RBRACK, UPARR, UNDER, ACCENT, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LBRACE, BAR, RBRACE, TILDE, UNKN }; struct hshentry * nexthsh; /*pointer to next hashtable-entry, set by lookup*/ enum tokens nexttok; /*next token, set by nextlex */ int hshenter; /*if true, next suitable lexeme will be entered */ /*into the symbol table. Handle with care. */ int nextc; /*next input character, initialized by Lexinit */ int eof; /*end-of-file indicator, set to >0 on end of file*/ int line; /*current line-number of input */ int nerror; /*counter for errors */ int nwarn; /*counter for warnings */ char * cmdid; /*command identification for error messages */ int quietflag; /*indicates quiet mode */ FILE * finptr; /*input file descriptor */ FILE * frewrite; /*file descriptor for echoing input */ int rewriteflag;/*indicates whether to echo to frewrite */ char StringTab[strtsize]; /* string table and heap */ char * NextString; /*pointer to next identifier in StringTab*/ char * Topchar; /*pointer to next free byte in StringTab*/ /*set by nextlex, lookup */ struct hshentry hshtab[hshsize]; /*hashtable */ lookup() { /* Function: Looks up the character string pointed to by NextString in the * hashtable. If the string is not present, a new entry for it is created. * If the string is present, TopChar is moved back to save the space for * the string, and NextString is set to point to the original string. * In any case, the address of the corresponding hashtable entry is placed * into nexthsh. * Algorithm: Quadratic hash, covering all entries. * Assumptions: NextString points at the first character of the string. * Topchar points at the first empty byte after the string. */ register int ihash; /* index into hashtable */ register char * sp, * np; int c, delta, final, FirstScan; /*loop control*/ /* calculate hash code */ sp = NextString; ihash = 0; while (*sp) ihash += *sp++; /* set up first search loop (c=0,step=1,until (hshsiz-1)/2 */ c=0;delta=1;final=(hshsize-1)/2; FirstScan=true; /*first loop */ for (;;) { ihash = (ihash+c)%hshsize; /*next index*/ if (hshtab[ihash].num == nil) { /*empty slot found*/ hshtab[ihash].num = NextString; nexthsh= &hshtab[ihash];/*save hashtable address*/ # ifdef LEXDB VOID printf("\nEntered: %s at %d ",nexthsh->num, ihash); # endif return; } /* compare strings */ sp=NextString;np=hshtab[ihash].num; while (*sp == *np++) { if (*sp == 0) { /* match found */ nexthsh= &hshtab[ihash]; Topchar = NextString; NextString = nexthsh->num; return; } else sp++; } /* neither empty slot nor string found */ /* calculate next index and repeat */ if (c != final) c += delta; else { if (FirstScan) { /*set up second sweep*/ delta = -1; final = 1; FirstScan= false; } else { fatserror("Hashtable overflow"); } } } }; Lexinit() /* Function: Initialization of lexical analyzer: * initializes the hastable, * initializes nextc, nexttok if finptr != NULL */ { register int c; for (c=hshsize-1; c>=0; c--) { hshtab[c].num = nil; } hshenter=true; eof=0; line=1; nerror=0; nwarn=0; NextString=nil; Topchar = &StringTab[0]; if (finptr) { nextc = GETC(finptr,frewrite,rewriteflag); /*initial character*/ nextlex(); /*initial token*/ } else { nextc = '\0'; nexttok=EOFILE; } } nextlex() /* Function: Reads the next token and sets nexttok to the next token code. * Only if the hshenter==true, a revision number is entered into the * hashtable and a pointer to it is placed into nexthsh. * This is useful for avoiding that dates are placed into the hashtable. * For ID's and NUM's, NextString is set to the character string in the * string table. Assumption: nextc contains the next character. */ { register c; register FILE * fin, * frew; register char * sp; register enum tokens d; if (eof) { nexttok=EOFILE; return; } fin=finptr; frew=frewrite; loop: switch(nexttok=ctab[nextc]) { case UNKN: case IDCHAR: case PERIOD: serror("unknown Character: %c",nextc); nextc=GETC(fin,frew,rewriteflag); goto loop; case NEWLN: line++; # ifdef LEXDB VOID putchar('\n'); # endif /* Note: falls into next case */ case SPACE: nextc=GETC(fin,frew,rewriteflag); goto loop; case EOFILE: eof++; nexttok=EOFILE; return; case DIGIT: NextString = sp = Topchar; *sp++ = nextc; while ((d=ctab[c=GETC(fin,frew,rewriteflag)])==DIGIT || d==PERIOD) { *sp++ = c; /* 1.2. and 1.2 are different */ } *sp++ = '\0'; if (sp >= StringTab+strtsize) { /*may have written outside stringtable already*/ fatserror("Stringtable overflow"); } Topchar = sp; nextc = c; if (hshenter == true) lookup(); /* lookup updates NextString, Topchar*/ nexttok = NUM; return; case LETTER: NextString = sp = Topchar; *sp++ = nextc; while ((d=ctab[c=GETC(fin,frew,rewriteflag)])==LETTER || d==DIGIT || d==IDCHAR) { *sp++ = c; } *sp++ = '\0'; if (sp >= StringTab+strtsize) { /*may have written outside stringtable already*/ fatserror("Stringtable overflow"); } Topchar = sp; nextc = c; nexttok = ID; /* may be ID or keyword */ return; case SBEGIN: /* long string */ nexttok = STRING; /* note: only the initial SBEGIN has been read*/ /* read the string, and reset nextc afterwards*/ return; default: nextc=GETC(fin,frew,rewriteflag); return; } } int getlex(token) enum tokens token; /* Function: Checks if nexttok is the same as token. If so, * advances the input by calling nextlex and returns true. * otherwise returns false. * Doesn't work for strings and keywords; loses the character string for ids. */ { if (nexttok==token) { nextlex(); return(true); } else return(false); } int getkey (key) char * key; /* Function: If the current token is a keyword identical to key, * getkey advances the input by calling nextlex and returns true; * otherwise returns false. */ { register char *s1,*s2; if (nexttok==ID) { s1=key; s2=NextString; while(*s1 == *s2++) if (*s1++ == '\0') { /* match found */ Topchar = NextString; /*reset Topchar */ nextlex(); return(true); } } return(false); } char * getid() /* Function: Checks if nexttok is an identifier. If so, * advances the input by calling nextlex and returns a pointer * to the identifier; otherwise returns nil. * Treats keywords as identifiers. */ { register char * name; if (nexttok==ID) { name = NextString; nextlex(); return name; } else return nil; } struct hshentry * getnum() /* Function: Checks if nexttok is a number. If so, * advances the input by calling nextlex and returns a pointer * to the hashtable entry. Otherwise returns nil. * Doesn't work if hshenter is false. */ { register struct hshentry * num; if (nexttok==NUM) { num=nexthsh; nextlex(); return num; } else return nil; } readstring() /* skip over characters until terminating single SDELIM */ /* if rewriteflag==true, copy every character read to frewrite.*/ /* Does not advance nextlex at the end. */ { register c; register FILE * fin, * frew; fin=finptr; frew=frewrite; if (rewriteflag) { /* copy string verbatim to frewrite */ while ((c=getc(fin)) != EOF) { VOID putc(c,frew); if (c==SDELIM) { if ((c=getc(fin)) == EOF || putc(c,frew) != SDELIM) { /* end of string */ nextc=c; return; } } } } else { /* skip string */ while ((c=getc(fin)) != EOF) { if (c==SDELIM) { if ((c=getc(fin)) != SDELIM) { /* end of string */ nextc=c; return; } } } } nextc = c; error("Unterminated string"); } printstring() /* Function: copy a string to stdout, until terminated with a single SDELIM. * Does not advance nextlex at the end. */ { register c; register FILE * fin; fin=finptr; while ((c=getc(fin)) != EOF) { if (c==SDELIM) { if ((c=getc(fin)) != SDELIM) { /* end of string */ nextc=c; return; } } VOID putchar(c); } nextc = c; error("Unterminated string"); } savestring(target,length) char * target; int length; /* copies a string terminated with SDELIM from file finptr to buffer target, * but not more than length bytes. If the string is longer than length, * the extra characters are skipped. The string may be empty, in which * case a '\0' is placed into target. * Double SDELIM is replaced with SDELIM. * If rewriteflag==true, the string is also copied unchanged to frewrite. * Returns the length of the saved string. * Does not advance nextlex at the end. */ { register c; register FILE * fin, * frew; register char * tp, * max; fin=finptr; frew=frewrite; tp=target; max= target+length; /*max is one too large*/ while ((c=GETC(fin,frew,rewriteflag))!=EOF) { *tp++ =c; if (c== SDELIM) { if ((c=GETC(fin,frew,rewriteflag))!=SDELIM) { /* end of string */ *(tp-1)='\0'; nextc=c; return; } } if (tp >= max) { /* overflow */ error("string buffer overflow -- truncating string"); target[length-1]='\0'; /* skip rest of string */ while ((c=GETC(fin,frew,rewriteflag))!=EOF) { if ((c==SDELIM) && ((c=GETC(fin,frew,rewriteflag))!=SDELIM)) { /* end of string */ nextc=c; return; } } nextc = c; error("Can't find %c to terminate string before end of file",SDELIM); return; } } nextc = c; error("Can't find %c to terminate string before end of file",SDELIM); } char *checkid(id, delim) char *id, delim; /* Function: check whether the string starting at id is an */ /* identifier and return a pointer to the last char*/ /* of the identifer. White space, delim and '\0' */ /* are legal delimeters. Aborts the program if not */ /* a legal identifier. Useful for checking commands*/ { register enum tokens d; register char *temp; register char c,tc; temp = id; if ( ctab[*id] == LETTER ) { while( (d=ctab[c=(*++id)]) == LETTER || d==DIGIT || d==IDCHAR) ; if ( c!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) { /* append \0 to end of id before error message */ tc = c; while( (c=(*++id))!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) ; *id = '\0'; faterror("Invalid character %c in identifier %s",tc,temp); return nil ; } else return id; } else { /* append \0 to end of id before error message */ while( (c=(*++id))!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) ; *id = '\0'; faterror("Identifier %s does not start with letter",temp); return nil; } } writeerror() { static looping; if (looping) exit(2); looping = 1; faterror("write error"); } nlflush(iop) register FILE * iop; { if (putc('\n',iop)==EOF || fflush(iop)==EOF) writeerror(); } /*VARARGS1*/ serror(e,e1,e2,e3,e4,e5) char * e, * e1, * e2, * e3, * e4, * e5; /* non-fatal syntax error */ { nerror++; VOID fprintf(stderr,"%s error, line %d: ", cmdid, line); VOID fprintf(stderr,e, e1, e2, e3, e4, e5); nlflush(stderr); } /*VARARGS1*/ error(e,e1,e2,e3,e4,e5) char * e, * e1, * e2, * e3, * e4, * e5; /* non-fatal error */ { nerror++; VOID fprintf(stderr,"%s error: ",cmdid); VOID fprintf(stderr,e, e1, e2, e3, e4, e5); nlflush(stderr); } /*VARARGS1*/ fatserror(e,e1,e2,e3,e4,e5) char * e, * e1, * e2, * e3, * e4, * e5; /* fatal syntax error */ { nerror++; VOID fprintf(stderr,"%s error, line %d: ", cmdid,line); VOID fprintf(stderr,e, e1, e2, e3, e4, e5); VOID fprintf(stderr,"\n%s aborted\n",cmdid); VOID cleanup(); exit(2); } /*VARARGS1*/ faterror(e,e1,e2,e3,e4,e5) char * e, * e1, * e2, * e3, * e4, * e5; /* fatal error, terminates program after cleanup */ { nerror++; VOID fprintf(stderr,"%s error: ",cmdid); VOID fprintf(stderr,e, e1, e2, e3, e4, e5); VOID fprintf(stderr,"\n%s aborted\n",cmdid); VOID cleanup(); exit(2); } /*VARARGS1*/ warn(e,e1,e2,e3,e4,e5) char * e, * e1, * e2, * e3, * e4, * e5; /* prints a warning message */ { nwarn++; VOID fprintf(stderr,"%s warning: ",cmdid); VOID fprintf(stderr,e, e1, e2, e3, e4, e5); nlflush(stderr); } /*VARARGS1*/ diagnose(e,e1,e2,e3,e4,e5) char * e, * e1, * e2, * e3, * e4, * e5; /* prints a diagnostic message */ { if (!quietflag) { VOID fprintf(stderr,e, e1, e2, e3, e4, e5); nlflush(stderr); } } #ifdef _FSTDIO wbuf(c, fp) unsigned c; register FILE *fp; { register int result; if ((result = __sputc(c, fp)) == EOF) writeerror(); return result; } #else fflsbuf(c, iop) unsigned c; register FILE * iop; /* Function: Flush iop. * Same routine as _flsbuf in stdio, but aborts program on error. */ { register result; if ((result=_flsbuf(c,iop))==EOF) writeerror(); return result; } #endif #ifdef _FSTDIO fputs(s, iop) const char *s; FILE *iop; { VOID fprintf(iop, "%s", s); return 0; } #else fputs(s, iop) register char *s; register FILE *iop; /* Function: Put string s on file iop, abort on error. * Same as puts in stdio, but with different putc macro. */ { register r; register c; while (c = *s++) r = putc(c, iop); return(r); } #endif #if __STDC__ fprintf(FILE *iop, const char *fmt, ...) #else fprintf(iop, fmt, va_alist) FILE *iop; const char *fmt; va_dcl #endif /* Function: formatted output. Same as fprintf in stdio, * but aborts program on error */ { register int value; va_list ap; #ifdef __STDC__ va_start(ap, fmt); #else va_start(ap); #endif #ifdef VFPRINTF VOID vfprintf(iop, fmt, ap); #else _doprnt(fmt, ap, iop); #endif if (ferror(iop)) { writeerror(); value = EOF; } else value = 0; va_end(ap); return value; } #ifdef LEXDB /* test program reading a stream of lexems and printing the tokens. */ main(argc,argv) int argc; char * argv[]; { cmdid="lextest"; if (argc<2) { VOID fputs("No input file\n",stderr); exit(1); } if ((finptr=fopen(argv[1], "r")) == NULL) { faterror("Can't open input file %s\n",argv[1]); } Lexinit(); rewriteflag=false; while (nexttok != EOFILE) { switch (nexttok) { case ID: VOID printf("ID: %s",NextString); break; case NUM: if (hshenter==true) VOID printf("NUM: %s, index: %d",nexthsh->num, nexthsh-hshtab); else VOID printf("NUM, unentered: %s",NextString); hshenter = !hshenter; /*alternate between dates and numbers*/ break; case COLON: VOID printf("COLON"); break; case SEMI: VOID printf("SEMI"); break; case STRING: readstring(); VOID printf("STRING"); break; case UNKN: VOID printf("UNKN"); break; default: VOID printf("DEFAULT"); break; } VOID printf(" | "); nextlex(); } VOID printf("\nEnd of lexical analyzer test\n"); } cleanup() /* dummy */ {} #endif