/*
 *                     RCS file input
 */
#ifndef lint
static char rcsid[]= "$Id: rcslex.c,v 1.1.1.1 1993/03/21 09:45:37 cgd Exp $ Purdue CS";
#endif
/*********************************************************************************
 *                     Lexical Analysis.
 *                     Character mapping table,
 *                     hashtable, Lexinit, nextlex, getlex, getkey,
 *                     getid, getnum, readstring, printstring, savestring,
 *                     checkid, serror, fatserror, error, faterror, warn, diagnose
 *                     fflsbuf, puts, fprintf
 *                     Testprogram: define LEXDB
 *********************************************************************************
 */

/* Copyright (C) 1982, 1988, 1989 Walter Tichy
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms are permitted
 * provided that the above copyright notice and this paragraph are
 * duplicated in all such forms and that any documentation,
 * advertising materials, and other materials related to such
 * distribution and use acknowledge that the software was developed
 * by Walter Tichy.
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 *
 * Report all problems and direct all questions to:
 *   rcs-bugs@cs.purdue.edu
 * 


*/


/* $Log: rcslex.c,v $
/* Revision 1.1.1.1  1993/03/21 09:45:37  cgd
/* initial import of 386bsd-0.1 sources
/*
 * Revision 4.6  89/05/01  15:13:07  narten
 * changed copyright header to reflect current distribution rules
 * 
 * Revision 4.5  88/11/08  12:00:54  narten
 * changes from  eggert@sm.unisys.com (Paul Eggert)
 * 
 * Revision 4.5  88/08/28  15:01:12  eggert
 * Don't loop when writing error messages to a full filesystem.
 * Flush stderr/stdout when mixing output.
 * Yield exit status compatible with diff(1).
 * Shrink stdio code size; allow cc -R; remove lint.
 * 
 * Revision 4.4  87/12/18  11:44:47  narten
 * fixed to use "varargs" in "fprintf"; this is required if it is to
 * work on a SPARC machine such as a Sun-4
 * 
 * Revision 4.3  87/10/18  10:37:18  narten
 * Updating version numbers. Changes relative to 1.1 actually relative
 * to version 4.1
 * 
 * Revision 1.3  87/09/24  14:00:17  narten
 * Sources now pass through lint (if you ignore printf/sprintf/fprintf 
 * warnings)
 * 
 * Revision 1.2  87/03/27  14:22:33  jenkins
 * Port to suns
 * 
 * Revision 1.1  84/01/23  14:50:33  kcs
 * Initial revision
 * 
 * Revision 4.1  83/03/25  18:12:51  wft
 * Only changed $Header to $Id.
 * 
 * Revision 3.3  82/12/10  16:22:37  wft
 * Improved error messages, changed exit status on error to 1.
 *
 * Revision 3.2  82/11/28  21:27:10  wft
 * Renamed ctab to map and included EOFILE; ctab is now a macro in rcsbase.h.
 * Added fflsbuf(), fputs(), and fprintf(), which abort the RCS operations
 * properly in case there is an IO-error (e.g., file system full).
 *
 * Revision 3.1  82/10/11  19:43:56  wft
 * removed unused label out:;
 * made sure all calls to getc() return into an integer, not a char.
 */


/*
#define LEXDB
/* version LEXDB is for testing the lexical analyzer. The testprogram
 * reads a stream of lexemes, enters the revision numbers into the
 * hashtable, and prints the recognized tokens. Keywords are recognized
 * as identifiers.
 */


#include "rcsbase.h"
#if __STDC__
#include <stdarg.h>
#else
#include <varargs.h>
#endif


/* character mapping table */
enum tokens map[] = {
        EOFILE,         /* this will end up at ctab[-1] */
        UNKN,   INSERT, UNKN,   UNKN,   UNKN,   UNKN,   UNKN,   UNKN,
        UNKN,   SPACE,  NEWLN,  UNKN,   SPACE,  UNKN,   UNKN,   UNKN,
        UNKN,   UNKN,   UNKN,   UNKN,   UNKN,   UNKN,   UNKN,   UNKN,
        UNKN,   UNKN,   UNKN,   UNKN,   UNKN,   UNKN,   UNKN,   UNKN,
        SPACE,  EXCLA,  DQUOTE, HASH,   DOLLAR, PERCNT, AMPER,  SQUOTE,
        LPARN,  RPARN,  TIMES,  PLUS,   COMMA,  MINUS,  PERIOD, DIVIDE,
        DIGIT,  DIGIT,  DIGIT,  DIGIT,  DIGIT,  DIGIT,  DIGIT,  DIGIT,
        DIGIT,  DIGIT,  COLON,  SEMI,   LESS,   EQUAL,  GREAT,  QUEST,
        AT,     LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
        LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
        LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
        LETTER, LETTER, LETTER, LBRACK, BACKSL, RBRACK, UPARR,  UNDER,
        ACCENT, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
        LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
        LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
        LETTER, LETTER, LETTER, LBRACE, BAR,    RBRACE, TILDE,  UNKN
};


struct hshentry * nexthsh;  /*pointer to next hashtable-entry, set by lookup*/

enum tokens     nexttok;    /*next token, set by nextlex                    */

int             hshenter;   /*if true, next suitable lexeme will be entered */
                            /*into the symbol table. Handle with care.      */
int             nextc;      /*next input character, initialized by Lexinit  */

int             eof;        /*end-of-file indicator, set to >0 on end of file*/
int             line;       /*current line-number of input                  */
int             nerror;     /*counter for errors                            */
int             nwarn;      /*counter for warnings                          */
char *          cmdid;      /*command identification for error messages     */
int             quietflag;  /*indicates quiet mode                          */
FILE *          finptr;     /*input file descriptor                         */

FILE *          frewrite;   /*file descriptor for echoing input             */

int             rewriteflag;/*indicates whether to echo to frewrite         */

char            StringTab[strtsize]; /* string table and heap               */

char *          NextString;         /*pointer to next identifier in StringTab*/
char *          Topchar;            /*pointer to next free byte in StringTab*/
                                    /*set by nextlex, lookup                */
struct hshentry hshtab[hshsize];    /*hashtable                             */


lookup() {

/* Function: Looks up the character string pointed to by NextString in the
 * hashtable. If the string is not present, a new entry for it is created.
 * If the string is present, TopChar is moved back to save the space for
 * the string, and NextString is set to point to the original string.
 * In any case, the address of the corresponding hashtable entry is placed
 * into nexthsh.
 * Algorithm: Quadratic hash, covering all entries.
 * Assumptions: NextString points at the first character of the string.
 * Topchar points at the first empty byte after the string.
 */

        register int     ihash;      /* index into hashtable */
        register char    * sp, * np;
        int              c, delta, final, FirstScan; /*loop control*/

        /* calculate hash code */
        sp = NextString;
        ihash = 0;
        while (*sp) ihash += *sp++;

        /* set up first search loop (c=0,step=1,until (hshsiz-1)/2 */
        c=0;delta=1;final=(hshsize-1)/2;
        FirstScan=true;   /*first loop */

        for (;;) {
                ihash = (ihash+c)%hshsize;   /*next index*/

                if (hshtab[ihash].num == nil) {
                        /*empty slot found*/
                        hshtab[ihash].num = NextString;
                        nexthsh= &hshtab[ihash];/*save hashtable address*/
#                       ifdef LEXDB
                        VOID printf("\nEntered: %s at %d ",nexthsh->num, ihash);
#                       endif
                        return;
                }
                /* compare strings */
                sp=NextString;np=hshtab[ihash].num;
                while (*sp == *np++) {
                        if (*sp == 0) {
                                /* match found */
                                nexthsh= &hshtab[ihash];
                                Topchar = NextString;
                                NextString = nexthsh->num;
                                return;
                        } else sp++;
                }

                /* neither empty slot nor string found */
                /* calculate next index and repeat */
                if (c != final)
                        c += delta;
                else {
                        if (FirstScan) {
                                /*set up second sweep*/
                                delta = -1; final = 1; FirstScan= false;
                        } else {
                                fatserror("Hashtable overflow");
                        }
                }
        }
};


Lexinit()
/* Function: Initialization of lexical analyzer:
 * initializes the hastable,
 * initializes nextc, nexttok if finptr != NULL
 */
{       register int            c;

        for (c=hshsize-1; c>=0; c--) {
                hshtab[c].num = nil;
        }

        hshenter=true; eof=0; line=1; nerror=0; nwarn=0;
        NextString=nil; Topchar = &StringTab[0];
        if (finptr) {
                nextc = GETC(finptr,frewrite,rewriteflag); /*initial character*/
                nextlex();            /*initial token*/
        } else {
                nextc = '\0';
                nexttok=EOFILE;
        }
}


nextlex()

/* Function: Reads the next token and sets nexttok to the next token code.
 * Only if the hshenter==true, a revision number is entered into the
 * hashtable and a pointer to it is placed into nexthsh.
 * This is useful for avoiding that dates are placed into the hashtable.
 * For ID's and NUM's, NextString is set to the character string in the
 * string table. Assumption: nextc contains the next character.
 */
{       register c;
	register FILE * fin, * frew;
        register char * sp;
        register enum tokens d;

        if (eof) {
                nexttok=EOFILE;
                return;
        }
	fin=finptr; frew=frewrite;
loop:
        switch(nexttok=ctab[nextc]) {

        case UNKN:
        case IDCHAR:
        case PERIOD:
                serror("unknown Character: %c",nextc);
                nextc=GETC(fin,frew,rewriteflag);
                goto loop;

        case NEWLN:
                line++;
#               ifdef LEXDB
                VOID putchar('\n');
#               endif
                /* Note: falls into next case */

        case SPACE:
                nextc=GETC(fin,frew,rewriteflag);
                goto loop;

        case EOFILE:
                eof++;
                nexttok=EOFILE;
                return;

        case DIGIT:
                NextString = sp = Topchar;
                *sp++ = nextc;
                while ((d=ctab[c=GETC(fin,frew,rewriteflag)])==DIGIT ||
                        d==PERIOD) {
                        *sp++ = c;         /* 1.2. and 1.2 are different */
                }
                *sp++ = '\0';
                if (sp >= StringTab+strtsize) {
                        /*may have written outside stringtable already*/
                        fatserror("Stringtable overflow");
                }
                Topchar = sp;
                nextc = c;
                if (hshenter == true)
                        lookup();      /* lookup updates NextString, Topchar*/
                nexttok = NUM;
                return;


        case LETTER:
                NextString = sp = Topchar;
                *sp++ = nextc;
                while ((d=ctab[c=GETC(fin,frew,rewriteflag)])==LETTER ||
                        d==DIGIT || d==IDCHAR) {
                        *sp++ = c;
                }
                *sp++ = '\0';
                if (sp >= StringTab+strtsize) {
                        /*may have written outside stringtable already*/
                        fatserror("Stringtable overflow");
                }
                Topchar = sp;
                nextc = c;
                nexttok = ID;  /* may be ID or keyword */
                return;

        case SBEGIN: /* long string */
                nexttok = STRING;
                /* note: only the initial SBEGIN has been read*/
                /* read the string, and reset nextc afterwards*/
                return;

        default:
                nextc=GETC(fin,frew,rewriteflag);
                return;
        }
}


int getlex(token)
enum tokens token;
/* Function: Checks if nexttok is the same as token. If so,
 * advances the input by calling nextlex and returns true.
 * otherwise returns false.
 * Doesn't work for strings and keywords; loses the character string for ids.
 */
{
        if (nexttok==token) {
                nextlex();
                return(true);
        } else  return(false);
}

int getkey (key)
char * key;
/* Function: If the current token is a keyword identical to key,
 * getkey advances the input by calling nextlex and returns true;
 * otherwise returns false.
 */
{
        register char *s1,*s2;

        if (nexttok==ID) {
                s1=key; s2=NextString;
                while(*s1 == *s2++)
                     if (*s1++ == '\0') {
                         /* match found */
                         Topchar = NextString; /*reset Topchar */
                         nextlex();
                         return(true);
                     }
        }
        return(false);
}


char * getid()
/* Function: Checks if nexttok is an identifier. If so,
 * advances the input by calling nextlex and returns a pointer
 * to the identifier; otherwise returns nil.
 * Treats keywords as identifiers.
 */
{
        register char * name;
        if (nexttok==ID) {
                name = NextString;
                nextlex();
                return name;
        } else  return nil;
}


struct hshentry * getnum()
/* Function: Checks if nexttok is a number. If so,
 * advances the input by calling nextlex and returns a pointer
 * to the hashtable entry. Otherwise returns nil.
 * Doesn't work if hshenter is false.
 */
{
        register struct hshentry * num;
        if (nexttok==NUM) {
                num=nexthsh;
                nextlex();
                return num;
        } else  return nil;
}


readstring()
/* skip over characters until terminating single SDELIM        */
/* if rewriteflag==true, copy every character read to frewrite.*/
/* Does not advance nextlex at the end.                        */
{       register c;
	register FILE * fin,  * frew;
	fin=finptr; frew=frewrite;
        if (rewriteflag) {
                /* copy string verbatim to frewrite */
                while ((c=getc(fin)) != EOF) {
			VOID putc(c,frew);
                        if (c==SDELIM) {
                                if ((c=getc(fin)) == EOF || putc(c,frew) != SDELIM) {
                                        /* end of string */
                                        nextc=c;
                                        return;
                                }
                        }
                }
        } else {
                /* skip string */
                while ((c=getc(fin)) != EOF) {
                        if (c==SDELIM) {
                                if ((c=getc(fin)) != SDELIM) {
                                        /* end of string */
                                        nextc=c;
                                        return;
                                }
                        }
                }
        }
        nextc = c;
        error("Unterminated string");
}


printstring()
/* Function: copy a string to stdout, until terminated with a single SDELIM.
 * Does not advance nextlex at the end.
 */
{
        register c;
	register FILE * fin;
	fin=finptr;
	while ((c=getc(fin)) != EOF) {
                if (c==SDELIM) {
			if ((c=getc(fin)) != SDELIM) {
                                /* end of string */
                                nextc=c;
                                return;
                        }
                }
                VOID putchar(c);
        }
        nextc = c;
        error("Unterminated string");
}


savestring(target,length)
char * target; int length;
/* copies a string terminated with SDELIM from file finptr to buffer target,
 * but not more than length bytes. If the string is longer than length,
 * the extra characters are skipped. The string may be empty, in which
 * case a '\0' is placed into target.
 * Double SDELIM is replaced with SDELIM.
 * If rewriteflag==true, the string is also copied unchanged to frewrite.
 * Returns the length of the saved string.
 * Does not advance nextlex at the end.
 */
{
        register c;
	register FILE * fin, * frew;
        register char * tp, * max;

	fin=finptr; frew=frewrite;
        tp=target; max= target+length; /*max is one too large*/
        while ((c=GETC(fin,frew,rewriteflag))!=EOF) {
		*tp++ =c;
                if (c== SDELIM) {
                        if ((c=GETC(fin,frew,rewriteflag))!=SDELIM) {
                                /* end of string */
                                *(tp-1)='\0';
                                nextc=c;
                                return;
                        }
                }
                if (tp >= max) {
                        /* overflow */
                        error("string buffer overflow -- truncating string");
                        target[length-1]='\0';
                        /* skip rest of string */
                        while ((c=GETC(fin,frew,rewriteflag))!=EOF) {
                                if ((c==SDELIM) && ((c=GETC(fin,frew,rewriteflag))!=SDELIM)) {
                                        /* end of string */
                                        nextc=c;
                                        return;
                                }
                        }
                        nextc = c;
                        error("Can't find %c to terminate string before end of file",SDELIM);
                        return;
                }
        }
        nextc = c;
        error("Can't find %c to terminate string before end of file",SDELIM);
}


char  *checkid(id, delim)
char    *id, delim;
/*   Function:  check whether the string starting at id is an   */
/*              identifier and return a pointer to the last char*/
/*              of the identifer. White space, delim and '\0'   */
/*              are legal delimeters. Aborts the program if not */
/*              a legal identifier. Useful for checking commands*/
{
        register enum  tokens  d;
        register char    *temp;
        register char    c,tc;

        temp = id;
        if ( ctab[*id] == LETTER ) {
            while( (d=ctab[c=(*++id)]) == LETTER || d==DIGIT || d==IDCHAR) ;
            if ( c!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) {
                /* append \0 to end of id before error message */
                tc = c;
                while( (c=(*++id))!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) ;
                *id = '\0';
                faterror("Invalid character %c in identifier %s",tc,temp);
                return nil ;
            } else
                return id;
        } else {
            /* append \0 to end of id before error message */
            while( (c=(*++id))!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) ;
            *id = '\0';
            faterror("Identifier %s does not start with letter",temp);
            return nil;
        }
}

writeerror()
{
	static looping;
	if (looping)
		exit(2);
	looping = 1;
	faterror("write error");
}

nlflush(iop)
register FILE * iop;
{
	if (putc('\n',iop)==EOF || fflush(iop)==EOF)
                writeerror();
}


/*VARARGS1*/
serror(e,e1,e2,e3,e4,e5)
char * e, * e1, * e2, * e3, * e4, * e5;
/* non-fatal syntax error */
{       nerror++;
        VOID fprintf(stderr,"%s error, line %d: ", cmdid, line);
        VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
        nlflush(stderr);
}

/*VARARGS1*/
error(e,e1,e2,e3,e4,e5)
char * e, * e1, * e2, * e3, * e4, * e5;
/* non-fatal error */
{       nerror++;
        VOID fprintf(stderr,"%s error: ",cmdid);
        VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
        nlflush(stderr);
}

/*VARARGS1*/
fatserror(e,e1,e2,e3,e4,e5)
char * e, * e1, * e2, * e3, * e4, * e5;
/* fatal syntax error */
{       nerror++;
        VOID fprintf(stderr,"%s error, line %d: ", cmdid,line);
        VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
        VOID fprintf(stderr,"\n%s aborted\n",cmdid);
        VOID cleanup();
        exit(2);
}

/*VARARGS1*/
faterror(e,e1,e2,e3,e4,e5)
char * e, * e1, * e2, * e3, * e4, * e5;
/* fatal error, terminates program after cleanup */
{       nerror++;
        VOID fprintf(stderr,"%s error: ",cmdid);
        VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
        VOID fprintf(stderr,"\n%s aborted\n",cmdid);
        VOID cleanup();
        exit(2);
}

/*VARARGS1*/
warn(e,e1,e2,e3,e4,e5)
char * e, * e1, * e2, * e3, * e4, * e5;
/* prints a warning message */
{       nwarn++;
        VOID fprintf(stderr,"%s warning: ",cmdid);
        VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
        nlflush(stderr);
}


/*VARARGS1*/
diagnose(e,e1,e2,e3,e4,e5)
char * e, * e1, * e2, * e3, * e4, * e5;
/* prints a diagnostic message */
{
        if (!quietflag) {
                VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
                nlflush(stderr);
        }
}


#ifdef _FSTDIO
wbuf(c, fp)
	unsigned c;
	register FILE *fp;
{
	register int result;

	if ((result = __sputc(c, fp)) == EOF)
		writeerror();
	return result;
}
#else
fflsbuf(c, iop)
unsigned c; register FILE * iop;
/* Function: Flush iop.
 * Same routine as _flsbuf in stdio, but aborts program on error.
 */
{       register result;
        if ((result=_flsbuf(c,iop))==EOF)
                writeerror();
        return result;
}
#endif


#ifdef _FSTDIO
fputs(s, iop)
	const char *s;
	FILE *iop;
{

	VOID fprintf(iop, "%s", s);
	return 0;
}
#else
fputs(s, iop)
register char *s;
register FILE *iop;
/* Function: Put string s on file iop, abort on error.
 * Same as puts in stdio, but with different putc macro.
 */
{
	register r;
	register c;

	while (c = *s++)
		r = putc(c, iop);
	return(r);
}
#endif


#if __STDC__
fprintf(FILE *iop, const char *fmt, ...)
#else
fprintf(iop, fmt, va_alist)
FILE *iop;
const char *fmt;
va_dcl
#endif
/* Function: formatted output. Same as fprintf in stdio,
 * but aborts program on error
 */
{
	register int value;
	va_list ap;

#ifdef __STDC__
	va_start(ap, fmt);
#else
	va_start(ap);
#endif
#ifdef VFPRINTF
	VOID vfprintf(iop, fmt, ap);
#else
	_doprnt(fmt, ap, iop);
#endif
        if (ferror(iop)) {
		writeerror();
                value = EOF;
        } else value = 0;
	va_end(ap);
	return value;
}


#ifdef LEXDB
/* test program reading a stream of lexems and printing the tokens.
 */


main(argc,argv)
int argc; char * argv[];
{
        cmdid="lextest";
        if (argc<2) {
                VOID fputs("No input file\n",stderr);
                exit(1);
        }
        if ((finptr=fopen(argv[1], "r")) == NULL) {
                faterror("Can't open input file %s\n",argv[1]);
        }
        Lexinit();
        rewriteflag=false;
        while (nexttok != EOFILE) {
        switch (nexttok) {

        case ID:
                VOID printf("ID: %s",NextString);
                break;

        case NUM:
                if (hshenter==true)
                   VOID printf("NUM: %s, index: %d",nexthsh->num, nexthsh-hshtab);
                else
                   VOID printf("NUM, unentered: %s",NextString);
                hshenter = !hshenter; /*alternate between dates and numbers*/
                break;

        case COLON:
                VOID printf("COLON"); break;

        case SEMI:
                VOID printf("SEMI"); break;

        case STRING:
                readstring();
                VOID printf("STRING"); break;

        case UNKN:
                VOID printf("UNKN"); break;

        default:
                VOID printf("DEFAULT"); break;
        }
        VOID printf(" | ");
        nextlex();
        }
        VOID printf("\nEnd of lexical analyzer test\n");
}

cleanup()
/* dummy */
{}


#endif