Mercurial > ~dholland > hg > tradcpp > index.cgi
changeset 16:9dda765ee85c
expression evaluator
author | David A. Holland |
---|---|
date | Mon, 20 Dec 2010 00:32:20 -0500 |
parents | f6177d3ed5c2 |
children | 76da41da923f |
files | Makefile directive.c eval.c eval.h utils.c utils.h |
diffstat | 6 files changed, 691 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/Makefile Sun Dec 19 21:42:01 2010 -0500 +++ b/Makefile Mon Dec 20 00:32:20 2010 -0500 @@ -1,7 +1,7 @@ # $NetBSD$ PROG= tradcpp -SRCS= main.c files.c directive.c place.c array.c utils.c +SRCS= main.c files.c directive.c eval.c place.c array.c utils.c WARNS= 5 .include <bsd.prog.mk>
--- a/directive.c Sun Dec 19 21:42:01 2010 -0500 +++ b/directive.c Mon Dec 20 00:32:20 2010 -0500 @@ -11,8 +11,6 @@ #include "macro.h" #include "eval.h" -static const char ws[] = " \t\f\v"; - struct ifstate { struct ifstate *prev; struct place startplace; @@ -94,9 +92,10 @@ { char *expr; bool val; + struct place p3 = *p2; expr = macroexpand(p2, line, len, true); - val = eval(expr); + val = eval(&p3, expr); ifstate_push(p, val); free(expr); } @@ -122,6 +121,7 @@ d_elif(struct place *p, struct place *p2, char *line, size_t len) { char *expr; + struct place p3 = *p2; if (ifstate->seenelse) { complain(p, "#elif after #else"); @@ -132,7 +132,7 @@ ifstate->curtrue = false; } else { expr = macroexpand(p2, line, len, true); - ifstate->curtrue = eval(expr); + ifstate->curtrue = eval(&p3, expr); ifstate->evertrue = ifstate->curtrue; free(expr); }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/eval.c Mon Dec 20 00:32:20 2010 -0500 @@ -0,0 +1,669 @@ +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <errno.h> + +#include "utils.h" +#include "array.h" +#include "mode.h" +#include "place.h" +#include "eval.h" + +/* + * e ::= + * e1 ? e2 : e3 + * e1 || e2 + * e1 && e2 + * e1 | e2 + * e1 ^ e2 + * e1 & e2 + * e1 == e2 | e1 != e2 + * e1 < e2 | e1 <= e2 | e1 > e2 | e1 >= e2 + * e1 << e2 | e1 >> e2 + * e1 + e2 | e1 - e2 + * e1 * e2 | e1 / e2 | e1 % e2 + * !e | ~e | -e | +e + * ( e ) | ident + */ + +enum tokens { + T_EOF, /* end of input */ + T_VAL, /* value */ + T_LPAREN, /* parens */ + T_RPAREN, + T_PIPEPIPE, /* operators */ + T_AMPAMP, + T_EQEQ, + T_BANGEQ, + T_LTEQ, + T_GTEQ, + T_LTLT, + T_GTGT, + T_QUES, + T_COLON, + T_PIPE, + T_CARET, + T_AMP, + T_LT, + T_GT, + T_PLUS, + T_MINUS, + T_STAR, + T_SLASH, + T_PCT, + T_BANG, + T_TILDE, +}; + +static const struct { + char c1, c2; + enum tokens tok; +} tokens_2[] = { + { '|', '|', T_PIPEPIPE }, + { '&', '&', T_AMPAMP }, + { '=', '=', T_EQEQ }, + { '!', '=', T_BANGEQ }, + { '<', '=', T_LTEQ }, + { '>', '=', T_GTEQ }, + { '<', '<', T_LTLT }, + { '>', '>', T_GTGT }, +}; +static const unsigned num_tokens_2 = HOWMANY(tokens_2); + +static const struct { + char c1; + enum tokens tok; +} tokens_1[] = { + { '?', T_QUES }, + { ':', T_COLON }, + { '|', T_PIPE }, + { '^', T_CARET }, + { '&', T_AMP }, + { '<', T_LT }, + { '>', T_GT }, + { '+', T_PLUS }, + { '-', T_MINUS }, + { '*', T_STAR }, + { '/', T_SLASH }, + { '%', T_PCT }, + { '!', T_BANG }, + { '~', T_TILDE }, + { '(', T_LPAREN }, + { ')', T_RPAREN }, +}; +static const unsigned num_tokens_1 = HOWMANY(tokens_1); + +struct token { + struct place place; + enum tokens tok; + int val; +}; +DECLARRAY(token); +DEFARRAY(token, ); + +static struct tokenarray tokens; + +static +struct token * +token_create(const struct place *p, enum tokens tok, int val) +{ + struct token *t; + + t = domalloc(sizeof(*t)); + t->place = *p; + t->tok = tok; + t->val = val; + return t; +} + +static +void +token_destroy(struct token *t) +{ + free(t); +} + +DESTROYALL_ARRAY(token, ); + +static +bool +isuop(enum tokens tok) +{ + switch (tok) { + case T_BANG: + case T_TILDE: + case T_MINUS: + case T_PLUS: + return true; + default: + break; + } + return false; +} + +static +bool +isbop(enum tokens tok) +{ + switch (tok) { + case T_EOF: + case T_VAL: + case T_LPAREN: + case T_RPAREN: + case T_COLON: + case T_QUES: + case T_BANG: + case T_TILDE: + return false; + default: + break; + } + return true; +} + +static +bool +isop(enum tokens tok) +{ + switch (tok) { + case T_EOF: + case T_VAL: + case T_LPAREN: + case T_RPAREN: + return false; + default: + break; + } + return true; +} + +static +int +getprec(enum tokens tok) +{ + switch (tok) { + case T_STAR: case T_SLASH: case T_PCT: return 0; + case T_PLUS: case T_MINUS: return 1; + case T_LTLT: case T_GTGT: return 2; + case T_LT: case T_LTEQ: case T_GT: case T_GTEQ: return 3; + case T_EQEQ: case T_BANGEQ: return 4; + case T_AMP: return 5; + case T_CARET: return 6; + case T_PIPE: return 7; + case T_AMPAMP: return 8; + case T_PIPEPIPE: return 9; + default: break; + } + return 10; +} + +static +bool +looser(enum tokens t1, enum tokens t2) +{ + return getprec(t1) > getprec(t2); +} + +static +int +eval_uop(enum tokens op, int val) +{ + switch (op) { + case T_BANG: val = !val; break; + case T_TILDE: val = (int)~(unsigned)val; break; + case T_MINUS: val = -val; break; + case T_PLUS: break; + default: assert(0); break; + } + return val; +} + +static +int +eval_bop(struct place *p, enum tokens op, int lv, int rv) +{ + unsigned mask; + + switch (op) { + case T_PIPEPIPE: return lv || rv; + case T_AMPAMP: return lv && rv; + case T_PIPE: return (int)((unsigned)lv | (unsigned)rv); + case T_CARET: return (int)((unsigned)lv ^ (unsigned)rv); + case T_AMP: return (int)((unsigned)lv & (unsigned)rv); + case T_EQEQ: return lv == rv; + case T_BANGEQ: return lv != rv; + case T_LT: return lv < rv; + case T_GT: return lv > rv; + case T_LTEQ: return lv <= rv; + case T_GTEQ: return lv >= rv; + + case T_LTLT: + case T_GTGT: + if (rv < 0) { + complain(p, "Negative bit-shift"); + complain_fail(); + rv = 0; + } + if ((unsigned)rv >= CHAR_BIT * sizeof(unsigned)) { + complain(p, "Bit-shift farther than type width"); + complain_fail(); + rv = 0; + } + if (op == T_LTLT) { + return (int)((unsigned)lv << (unsigned)rv); + } + mask = ((unsigned)-1) << (CHAR_BIT * sizeof(unsigned) - rv); + lv = (int)(((unsigned)lv >> (unsigned)rv) | mask); + return lv; + + case T_MINUS: + if (rv == INT_MIN) { + if (lv == INT_MIN) { + return 0; + } + lv--; + rv++; + } + rv = -rv; + /* FALLTHROUGH */ + case T_PLUS: + if (rv > 0 && lv > (INT_MAX - rv)) { + complain(p, "Integer overflow"); + complain_fail(); + return INT_MAX; + } + if (rv < 0 && lv < (INT_MIN - rv)) { + complain(p, "Integer underflow"); + complain_fail(); + return INT_MIN; + } + return lv + rv; + + case T_STAR: + if (rv == 0) { + return 0; + } + if (rv == 1) { + return lv; + } + if (rv == -1 && lv == INT_MIN) { + lv++; + lv = -lv; + if (lv == INT_MAX) { + complain(p, "Integer overflow"); + complain_fail(); + return INT_MAX; + } + lv++; + return lv; + } + if (lv == INT_MIN && rv < 0) { + complain(p, "Integer overflow"); + complain_fail(); + return INT_MAX; + } + if (lv == INT_MIN && rv > 0) { + complain(p, "Integer underflow"); + complain_fail(); + return INT_MIN; + } + if (rv < 0) { + rv = -rv; + lv = -lv; + } + if (lv > 0 && lv > INT_MAX / rv) { + complain(p, "Integer overflow"); + complain_fail(); + return INT_MAX; + } + if (lv < 0 && lv < INT_MIN / rv) { + complain(p, "Integer underflow"); + complain_fail(); + return INT_MIN; + } + return lv * rv; + + case T_SLASH: + if (rv == 0) { + complain(p, "Division by zero"); + complain_fail(); + return 0; + } + return lv / rv; + + case T_PCT: + if (rv == 0) { + complain(p, "Modulus by zero"); + complain_fail(); + return 0; + } + return lv % rv; + + default: assert(0); break; + } + return 0; +} + +static +void +tryreduce(void) +{ + unsigned num; + struct token *t1, *t2, *t3, *t4, *t5, *t6; + + while (1) { + num = tokenarray_num(&tokens); + t1 = (num >= 1) ? tokenarray_get(&tokens, num-1) : NULL; + t2 = (num >= 2) ? tokenarray_get(&tokens, num-2) : NULL; + t3 = (num >= 3) ? tokenarray_get(&tokens, num-3) : NULL; + + if (num >= 3 && + t3->tok == T_LPAREN && + t2->tok == T_VAL && + t1->tok == T_RPAREN) { + /* (x) -> x */ + t2->place = t3->place; + token_destroy(t1); + token_destroy(t3); + tokenarray_remove(&tokens, num-1); + tokenarray_remove(&tokens, num-3); + continue; + } + + if (num >= 2 && + (num == 2 || isop(t3->tok) || t3->tok == T_LPAREN) && + isuop(t2->tok) && + t1->tok == T_VAL) { + /* unary operator */ + t1->val = eval_uop(t2->tok, t1->val); + t1->place = t2->place; + token_destroy(t2); + tokenarray_remove(&tokens, num-2); + continue; + } + if (num >= 2 && + (num == 2 || isop(t3->tok) || t3->tok == T_RPAREN) && + t2->tok != T_LPAREN && t2->tok != T_VAL && + t1->tok == T_VAL) { + complain(&t2->place, "Invalid unary operator"); + complain_fail(); + token_destroy(t2); + tokenarray_remove(&tokens, num-2); + continue; + } + + + t4 = (num >= 4) ? tokenarray_get(&tokens, num-4) : NULL; + + if (num >= 4 && + t4->tok == T_VAL && + isbop(t3->tok) && + t2->tok == T_VAL && + (isbop(t1->tok) || !isop(t1->tok))) { + /* binary operator */ + if (looser(t1->tok, t3->tok)) { + t4->val = eval_bop(&t3->place, + t4->val, t3->tok, t2->val); + token_destroy(t2); + token_destroy(t3); + tokenarray_remove(&tokens, num-2); + tokenarray_remove(&tokens, num-3); + continue; + } + break; + } + + t5 = (num >= 5) ? tokenarray_get(&tokens, num-5) : NULL; + t6 = (num >= 6) ? tokenarray_get(&tokens, num-6) : NULL; + + if (num >=6 && + t6->tok == T_VAL && + t5->tok == T_QUES && + t4->tok == T_VAL && + t3->tok == T_COLON && + t2->tok == T_VAL && + !isop(t1->tok)) { + /* conditional expression */ + t6->val = t6->val ? t4->val : t2->val; + token_destroy(t2); + token_destroy(t3); + token_destroy(t4); + token_destroy(t5); + tokenarray_remove(&tokens, num-2); + tokenarray_remove(&tokens, num-3); + tokenarray_remove(&tokens, num-4); + tokenarray_remove(&tokens, num-5); + continue; + } + + if (num >= 2 && + t2->tok == T_LPAREN && + t1->tok == T_RPAREN) { + complain(&t1->place, "Value expected within ()"); + complain_fail(); + t1->tok = T_VAL; + t1->val = 0; + token_destroy(t1); + tokenarray_remove(&tokens, num-1); + continue; + } + + if (num >= 2 && + t2->tok == T_VAL && + t1->tok == T_VAL) { + complain(&t1->place, "Operator expected"); + complain_fail(); + token_destroy(t1); + tokenarray_remove(&tokens, num-1); + continue; + } + + if (num >= 2 && + isop(t2->tok) && + t1->tok == T_EOF) { + complain(&t1->place, "Value expected after operator"); + complain_fail(); + token_destroy(t2); + tokenarray_remove(&tokens, num-2); + continue; + } + + if (num == 2 && + t2->tok == T_VAL && + t1->tok == T_RPAREN) { + complain(&t1->place, "Excess right parenthesis"); + complain_fail(); + token_destroy(t1); + tokenarray_remove(&tokens, num-1); + continue; + } + + if (num == 3 && + t3->tok == T_LPAREN && + t2->tok == T_VAL && + t1->tok == T_EOF) { + complain(&t1->place, "Unclosed left parenthesis"); + complain_fail(); + token_destroy(t3); + tokenarray_remove(&tokens, num-3); + continue; + } + + if (num == 2 && + t2->tok == T_VAL && + t1->tok == T_EOF) { + /* accepting state */ + break; + } + + if (num >= 1 && + t1->tok == T_EOF) { + /* any other configuration at eof is an error */ + complain(&t1->place, "Parse error"); + complain_fail(); + break; + } + + /* otherwise, wait for more input */ + break; + } +} + +static +void +token(struct place *p, enum tokens tok, int val) +{ + struct token *t; + + t = token_create(p, tok, val); + + tokenarray_add(&tokens, t, NULL); + tryreduce(); +} + +static +int +wordval(struct place *p, char *word) +{ + unsigned long val; + char *t; + + if (word[0] >= '0' && word[0] <= '9') { + errno = 0; + val = strtoul(word, &t, 0); + if (errno || *t != '\0') { + complain(p, "Invalid integer constant"); + complain_fail(); + return 0; + } + if (val > INT_MAX) { + complain(p, "Integer constant too large"); + complain_fail(); + return INT_MAX; + } + return val; + } + + /* if it's a symbol, warn and substitute 0. */ + if (warns.undef) { + complain(p, "Warning: value of undefined symbol %s is 0", + word); + if (mode.werror) { + complain_fail(); + } + } + return 0; +} + +static +bool +check_word(struct place *p, char *expr, size_t pos, size_t *len_ret) +{ + size_t len; + int val; + char tmp; + + if (!strchr(alnum, expr[pos])) { + return false; + } + len = strspn(expr + pos, alnum); + tmp = expr[pos + len]; + expr[pos + len] = '\0'; + val = wordval(p, expr + pos); + expr[pos + len] = tmp; + token(p, T_VAL, val); + *len_ret = len; + return true; +} + +static +bool +check_tokens_2(struct place *p, char *expr, size_t pos) +{ + unsigned i; + + for (i=0; i<num_tokens_2; i++) { + if (expr[pos] == tokens_2[i].c1 && + expr[pos+1] == tokens_2[i].c2) { + token(p, tokens_2[i].tok, 0); + return true; + } + } + return false; +} + +static +bool +check_tokens_1(struct place *p, char *expr, size_t pos) +{ + unsigned i; + + for (i=0; i<num_tokens_1; i++) { + if (expr[pos] == tokens_1[i].c1) { + token(p, tokens_1[i].tok, 0); + return true; + } + } + return false; +} + +static +void +tokenize(struct place *p, char *expr) +{ + size_t pos, len; + + pos = 0; + while (expr[pos] != '\0') { + len = strspn(expr+pos, ws); + pos += len; + p->column += len; + if (check_word(p, expr, pos, &len)) { + pos += len; + p->column += len; + continue; + } + if (check_tokens_2(p, expr, pos)) { + pos += 2; + p->column += 2; + continue; + } + if (check_tokens_1(p, expr, pos)) { + pos++; + p->column++; + continue; + } + complain(p, "Invalid character %u in #if-expression", + (unsigned char)expr[pos]); + complain_fail(); + pos++; + p->column++; + } + token(p, T_EOF, 0); +} + +bool +eval(struct place *p, char *expr) +{ + struct token *t1, *t2; + unsigned num; + bool result; + + tokenarray_init(&tokens); + tokenize(p, expr); + + result = false; + num = tokenarray_num(&tokens); + if (num == 2) { + t1 = tokenarray_get(&tokens, num-1); + t2 = tokenarray_get(&tokens, num-2); + if (t1->tok == T_VAL && + t2->tok == T_EOF) { + result = t1->val != 0; + } + } + + tokenarray_destroyall(&tokens); + tokenarray_cleanup(&tokens); + return result; +}
--- a/eval.h Sun Dec 19 21:42:01 2010 -0500 +++ b/eval.h Mon Dec 20 00:32:20 2010 -0500 @@ -1,1 +1,3 @@ -bool eval(char *expr); +#include <stdbool.h> + +bool eval(struct place *p, char *expr);
--- a/utils.c Sun Dec 19 21:42:01 2010 -0500 +++ b/utils.c Mon Dec 20 00:32:20 2010 -0500 @@ -33,6 +33,16 @@ #include "utils.h" +const char ws[] = + " \t\f\v" +; +const char alnum[] = + "0123456789" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "_" +; + void * domalloc(size_t len) {