Mercurial > ~dholland > hg > tradcpp > index.cgi
comparison eval.c @ 16:9dda765ee85c
expression evaluator
author | David A. Holland |
---|---|
date | Mon, 20 Dec 2010 00:32:20 -0500 |
parents | |
children | 8a955e3dda2c |
comparison
equal
deleted
inserted
replaced
15:f6177d3ed5c2 | 16:9dda765ee85c |
---|---|
1 #include <stdlib.h> | |
2 #include <string.h> | |
3 #include <limits.h> | |
4 #include <errno.h> | |
5 | |
6 #include "utils.h" | |
7 #include "array.h" | |
8 #include "mode.h" | |
9 #include "place.h" | |
10 #include "eval.h" | |
11 | |
12 /* | |
13 * e ::= | |
14 * e1 ? e2 : e3 | |
15 * e1 || e2 | |
16 * e1 && e2 | |
17 * e1 | e2 | |
18 * e1 ^ e2 | |
19 * e1 & e2 | |
20 * e1 == e2 | e1 != e2 | |
21 * e1 < e2 | e1 <= e2 | e1 > e2 | e1 >= e2 | |
22 * e1 << e2 | e1 >> e2 | |
23 * e1 + e2 | e1 - e2 | |
24 * e1 * e2 | e1 / e2 | e1 % e2 | |
25 * !e | ~e | -e | +e | |
26 * ( e ) | ident | |
27 */ | |
28 | |
29 enum tokens { | |
30 T_EOF, /* end of input */ | |
31 T_VAL, /* value */ | |
32 T_LPAREN, /* parens */ | |
33 T_RPAREN, | |
34 T_PIPEPIPE, /* operators */ | |
35 T_AMPAMP, | |
36 T_EQEQ, | |
37 T_BANGEQ, | |
38 T_LTEQ, | |
39 T_GTEQ, | |
40 T_LTLT, | |
41 T_GTGT, | |
42 T_QUES, | |
43 T_COLON, | |
44 T_PIPE, | |
45 T_CARET, | |
46 T_AMP, | |
47 T_LT, | |
48 T_GT, | |
49 T_PLUS, | |
50 T_MINUS, | |
51 T_STAR, | |
52 T_SLASH, | |
53 T_PCT, | |
54 T_BANG, | |
55 T_TILDE, | |
56 }; | |
57 | |
58 static const struct { | |
59 char c1, c2; | |
60 enum tokens tok; | |
61 } tokens_2[] = { | |
62 { '|', '|', T_PIPEPIPE }, | |
63 { '&', '&', T_AMPAMP }, | |
64 { '=', '=', T_EQEQ }, | |
65 { '!', '=', T_BANGEQ }, | |
66 { '<', '=', T_LTEQ }, | |
67 { '>', '=', T_GTEQ }, | |
68 { '<', '<', T_LTLT }, | |
69 { '>', '>', T_GTGT }, | |
70 }; | |
71 static const unsigned num_tokens_2 = HOWMANY(tokens_2); | |
72 | |
73 static const struct { | |
74 char c1; | |
75 enum tokens tok; | |
76 } tokens_1[] = { | |
77 { '?', T_QUES }, | |
78 { ':', T_COLON }, | |
79 { '|', T_PIPE }, | |
80 { '^', T_CARET }, | |
81 { '&', T_AMP }, | |
82 { '<', T_LT }, | |
83 { '>', T_GT }, | |
84 { '+', T_PLUS }, | |
85 { '-', T_MINUS }, | |
86 { '*', T_STAR }, | |
87 { '/', T_SLASH }, | |
88 { '%', T_PCT }, | |
89 { '!', T_BANG }, | |
90 { '~', T_TILDE }, | |
91 { '(', T_LPAREN }, | |
92 { ')', T_RPAREN }, | |
93 }; | |
94 static const unsigned num_tokens_1 = HOWMANY(tokens_1); | |
95 | |
96 struct token { | |
97 struct place place; | |
98 enum tokens tok; | |
99 int val; | |
100 }; | |
101 DECLARRAY(token); | |
102 DEFARRAY(token, ); | |
103 | |
104 static struct tokenarray tokens; | |
105 | |
106 static | |
107 struct token * | |
108 token_create(const struct place *p, enum tokens tok, int val) | |
109 { | |
110 struct token *t; | |
111 | |
112 t = domalloc(sizeof(*t)); | |
113 t->place = *p; | |
114 t->tok = tok; | |
115 t->val = val; | |
116 return t; | |
117 } | |
118 | |
119 static | |
120 void | |
121 token_destroy(struct token *t) | |
122 { | |
123 free(t); | |
124 } | |
125 | |
126 DESTROYALL_ARRAY(token, ); | |
127 | |
128 static | |
129 bool | |
130 isuop(enum tokens tok) | |
131 { | |
132 switch (tok) { | |
133 case T_BANG: | |
134 case T_TILDE: | |
135 case T_MINUS: | |
136 case T_PLUS: | |
137 return true; | |
138 default: | |
139 break; | |
140 } | |
141 return false; | |
142 } | |
143 | |
144 static | |
145 bool | |
146 isbop(enum tokens tok) | |
147 { | |
148 switch (tok) { | |
149 case T_EOF: | |
150 case T_VAL: | |
151 case T_LPAREN: | |
152 case T_RPAREN: | |
153 case T_COLON: | |
154 case T_QUES: | |
155 case T_BANG: | |
156 case T_TILDE: | |
157 return false; | |
158 default: | |
159 break; | |
160 } | |
161 return true; | |
162 } | |
163 | |
164 static | |
165 bool | |
166 isop(enum tokens tok) | |
167 { | |
168 switch (tok) { | |
169 case T_EOF: | |
170 case T_VAL: | |
171 case T_LPAREN: | |
172 case T_RPAREN: | |
173 return false; | |
174 default: | |
175 break; | |
176 } | |
177 return true; | |
178 } | |
179 | |
180 static | |
181 int | |
182 getprec(enum tokens tok) | |
183 { | |
184 switch (tok) { | |
185 case T_STAR: case T_SLASH: case T_PCT: return 0; | |
186 case T_PLUS: case T_MINUS: return 1; | |
187 case T_LTLT: case T_GTGT: return 2; | |
188 case T_LT: case T_LTEQ: case T_GT: case T_GTEQ: return 3; | |
189 case T_EQEQ: case T_BANGEQ: return 4; | |
190 case T_AMP: return 5; | |
191 case T_CARET: return 6; | |
192 case T_PIPE: return 7; | |
193 case T_AMPAMP: return 8; | |
194 case T_PIPEPIPE: return 9; | |
195 default: break; | |
196 } | |
197 return 10; | |
198 } | |
199 | |
200 static | |
201 bool | |
202 looser(enum tokens t1, enum tokens t2) | |
203 { | |
204 return getprec(t1) > getprec(t2); | |
205 } | |
206 | |
207 static | |
208 int | |
209 eval_uop(enum tokens op, int val) | |
210 { | |
211 switch (op) { | |
212 case T_BANG: val = !val; break; | |
213 case T_TILDE: val = (int)~(unsigned)val; break; | |
214 case T_MINUS: val = -val; break; | |
215 case T_PLUS: break; | |
216 default: assert(0); break; | |
217 } | |
218 return val; | |
219 } | |
220 | |
221 static | |
222 int | |
223 eval_bop(struct place *p, enum tokens op, int lv, int rv) | |
224 { | |
225 unsigned mask; | |
226 | |
227 switch (op) { | |
228 case T_PIPEPIPE: return lv || rv; | |
229 case T_AMPAMP: return lv && rv; | |
230 case T_PIPE: return (int)((unsigned)lv | (unsigned)rv); | |
231 case T_CARET: return (int)((unsigned)lv ^ (unsigned)rv); | |
232 case T_AMP: return (int)((unsigned)lv & (unsigned)rv); | |
233 case T_EQEQ: return lv == rv; | |
234 case T_BANGEQ: return lv != rv; | |
235 case T_LT: return lv < rv; | |
236 case T_GT: return lv > rv; | |
237 case T_LTEQ: return lv <= rv; | |
238 case T_GTEQ: return lv >= rv; | |
239 | |
240 case T_LTLT: | |
241 case T_GTGT: | |
242 if (rv < 0) { | |
243 complain(p, "Negative bit-shift"); | |
244 complain_fail(); | |
245 rv = 0; | |
246 } | |
247 if ((unsigned)rv >= CHAR_BIT * sizeof(unsigned)) { | |
248 complain(p, "Bit-shift farther than type width"); | |
249 complain_fail(); | |
250 rv = 0; | |
251 } | |
252 if (op == T_LTLT) { | |
253 return (int)((unsigned)lv << (unsigned)rv); | |
254 } | |
255 mask = ((unsigned)-1) << (CHAR_BIT * sizeof(unsigned) - rv); | |
256 lv = (int)(((unsigned)lv >> (unsigned)rv) | mask); | |
257 return lv; | |
258 | |
259 case T_MINUS: | |
260 if (rv == INT_MIN) { | |
261 if (lv == INT_MIN) { | |
262 return 0; | |
263 } | |
264 lv--; | |
265 rv++; | |
266 } | |
267 rv = -rv; | |
268 /* FALLTHROUGH */ | |
269 case T_PLUS: | |
270 if (rv > 0 && lv > (INT_MAX - rv)) { | |
271 complain(p, "Integer overflow"); | |
272 complain_fail(); | |
273 return INT_MAX; | |
274 } | |
275 if (rv < 0 && lv < (INT_MIN - rv)) { | |
276 complain(p, "Integer underflow"); | |
277 complain_fail(); | |
278 return INT_MIN; | |
279 } | |
280 return lv + rv; | |
281 | |
282 case T_STAR: | |
283 if (rv == 0) { | |
284 return 0; | |
285 } | |
286 if (rv == 1) { | |
287 return lv; | |
288 } | |
289 if (rv == -1 && lv == INT_MIN) { | |
290 lv++; | |
291 lv = -lv; | |
292 if (lv == INT_MAX) { | |
293 complain(p, "Integer overflow"); | |
294 complain_fail(); | |
295 return INT_MAX; | |
296 } | |
297 lv++; | |
298 return lv; | |
299 } | |
300 if (lv == INT_MIN && rv < 0) { | |
301 complain(p, "Integer overflow"); | |
302 complain_fail(); | |
303 return INT_MAX; | |
304 } | |
305 if (lv == INT_MIN && rv > 0) { | |
306 complain(p, "Integer underflow"); | |
307 complain_fail(); | |
308 return INT_MIN; | |
309 } | |
310 if (rv < 0) { | |
311 rv = -rv; | |
312 lv = -lv; | |
313 } | |
314 if (lv > 0 && lv > INT_MAX / rv) { | |
315 complain(p, "Integer overflow"); | |
316 complain_fail(); | |
317 return INT_MAX; | |
318 } | |
319 if (lv < 0 && lv < INT_MIN / rv) { | |
320 complain(p, "Integer underflow"); | |
321 complain_fail(); | |
322 return INT_MIN; | |
323 } | |
324 return lv * rv; | |
325 | |
326 case T_SLASH: | |
327 if (rv == 0) { | |
328 complain(p, "Division by zero"); | |
329 complain_fail(); | |
330 return 0; | |
331 } | |
332 return lv / rv; | |
333 | |
334 case T_PCT: | |
335 if (rv == 0) { | |
336 complain(p, "Modulus by zero"); | |
337 complain_fail(); | |
338 return 0; | |
339 } | |
340 return lv % rv; | |
341 | |
342 default: assert(0); break; | |
343 } | |
344 return 0; | |
345 } | |
346 | |
347 static | |
348 void | |
349 tryreduce(void) | |
350 { | |
351 unsigned num; | |
352 struct token *t1, *t2, *t3, *t4, *t5, *t6; | |
353 | |
354 while (1) { | |
355 num = tokenarray_num(&tokens); | |
356 t1 = (num >= 1) ? tokenarray_get(&tokens, num-1) : NULL; | |
357 t2 = (num >= 2) ? tokenarray_get(&tokens, num-2) : NULL; | |
358 t3 = (num >= 3) ? tokenarray_get(&tokens, num-3) : NULL; | |
359 | |
360 if (num >= 3 && | |
361 t3->tok == T_LPAREN && | |
362 t2->tok == T_VAL && | |
363 t1->tok == T_RPAREN) { | |
364 /* (x) -> x */ | |
365 t2->place = t3->place; | |
366 token_destroy(t1); | |
367 token_destroy(t3); | |
368 tokenarray_remove(&tokens, num-1); | |
369 tokenarray_remove(&tokens, num-3); | |
370 continue; | |
371 } | |
372 | |
373 if (num >= 2 && | |
374 (num == 2 || isop(t3->tok) || t3->tok == T_LPAREN) && | |
375 isuop(t2->tok) && | |
376 t1->tok == T_VAL) { | |
377 /* unary operator */ | |
378 t1->val = eval_uop(t2->tok, t1->val); | |
379 t1->place = t2->place; | |
380 token_destroy(t2); | |
381 tokenarray_remove(&tokens, num-2); | |
382 continue; | |
383 } | |
384 if (num >= 2 && | |
385 (num == 2 || isop(t3->tok) || t3->tok == T_RPAREN) && | |
386 t2->tok != T_LPAREN && t2->tok != T_VAL && | |
387 t1->tok == T_VAL) { | |
388 complain(&t2->place, "Invalid unary operator"); | |
389 complain_fail(); | |
390 token_destroy(t2); | |
391 tokenarray_remove(&tokens, num-2); | |
392 continue; | |
393 } | |
394 | |
395 | |
396 t4 = (num >= 4) ? tokenarray_get(&tokens, num-4) : NULL; | |
397 | |
398 if (num >= 4 && | |
399 t4->tok == T_VAL && | |
400 isbop(t3->tok) && | |
401 t2->tok == T_VAL && | |
402 (isbop(t1->tok) || !isop(t1->tok))) { | |
403 /* binary operator */ | |
404 if (looser(t1->tok, t3->tok)) { | |
405 t4->val = eval_bop(&t3->place, | |
406 t4->val, t3->tok, t2->val); | |
407 token_destroy(t2); | |
408 token_destroy(t3); | |
409 tokenarray_remove(&tokens, num-2); | |
410 tokenarray_remove(&tokens, num-3); | |
411 continue; | |
412 } | |
413 break; | |
414 } | |
415 | |
416 t5 = (num >= 5) ? tokenarray_get(&tokens, num-5) : NULL; | |
417 t6 = (num >= 6) ? tokenarray_get(&tokens, num-6) : NULL; | |
418 | |
419 if (num >=6 && | |
420 t6->tok == T_VAL && | |
421 t5->tok == T_QUES && | |
422 t4->tok == T_VAL && | |
423 t3->tok == T_COLON && | |
424 t2->tok == T_VAL && | |
425 !isop(t1->tok)) { | |
426 /* conditional expression */ | |
427 t6->val = t6->val ? t4->val : t2->val; | |
428 token_destroy(t2); | |
429 token_destroy(t3); | |
430 token_destroy(t4); | |
431 token_destroy(t5); | |
432 tokenarray_remove(&tokens, num-2); | |
433 tokenarray_remove(&tokens, num-3); | |
434 tokenarray_remove(&tokens, num-4); | |
435 tokenarray_remove(&tokens, num-5); | |
436 continue; | |
437 } | |
438 | |
439 if (num >= 2 && | |
440 t2->tok == T_LPAREN && | |
441 t1->tok == T_RPAREN) { | |
442 complain(&t1->place, "Value expected within ()"); | |
443 complain_fail(); | |
444 t1->tok = T_VAL; | |
445 t1->val = 0; | |
446 token_destroy(t1); | |
447 tokenarray_remove(&tokens, num-1); | |
448 continue; | |
449 } | |
450 | |
451 if (num >= 2 && | |
452 t2->tok == T_VAL && | |
453 t1->tok == T_VAL) { | |
454 complain(&t1->place, "Operator expected"); | |
455 complain_fail(); | |
456 token_destroy(t1); | |
457 tokenarray_remove(&tokens, num-1); | |
458 continue; | |
459 } | |
460 | |
461 if (num >= 2 && | |
462 isop(t2->tok) && | |
463 t1->tok == T_EOF) { | |
464 complain(&t1->place, "Value expected after operator"); | |
465 complain_fail(); | |
466 token_destroy(t2); | |
467 tokenarray_remove(&tokens, num-2); | |
468 continue; | |
469 } | |
470 | |
471 if (num == 2 && | |
472 t2->tok == T_VAL && | |
473 t1->tok == T_RPAREN) { | |
474 complain(&t1->place, "Excess right parenthesis"); | |
475 complain_fail(); | |
476 token_destroy(t1); | |
477 tokenarray_remove(&tokens, num-1); | |
478 continue; | |
479 } | |
480 | |
481 if (num == 3 && | |
482 t3->tok == T_LPAREN && | |
483 t2->tok == T_VAL && | |
484 t1->tok == T_EOF) { | |
485 complain(&t1->place, "Unclosed left parenthesis"); | |
486 complain_fail(); | |
487 token_destroy(t3); | |
488 tokenarray_remove(&tokens, num-3); | |
489 continue; | |
490 } | |
491 | |
492 if (num == 2 && | |
493 t2->tok == T_VAL && | |
494 t1->tok == T_EOF) { | |
495 /* accepting state */ | |
496 break; | |
497 } | |
498 | |
499 if (num >= 1 && | |
500 t1->tok == T_EOF) { | |
501 /* any other configuration at eof is an error */ | |
502 complain(&t1->place, "Parse error"); | |
503 complain_fail(); | |
504 break; | |
505 } | |
506 | |
507 /* otherwise, wait for more input */ | |
508 break; | |
509 } | |
510 } | |
511 | |
512 static | |
513 void | |
514 token(struct place *p, enum tokens tok, int val) | |
515 { | |
516 struct token *t; | |
517 | |
518 t = token_create(p, tok, val); | |
519 | |
520 tokenarray_add(&tokens, t, NULL); | |
521 tryreduce(); | |
522 } | |
523 | |
524 static | |
525 int | |
526 wordval(struct place *p, char *word) | |
527 { | |
528 unsigned long val; | |
529 char *t; | |
530 | |
531 if (word[0] >= '0' && word[0] <= '9') { | |
532 errno = 0; | |
533 val = strtoul(word, &t, 0); | |
534 if (errno || *t != '\0') { | |
535 complain(p, "Invalid integer constant"); | |
536 complain_fail(); | |
537 return 0; | |
538 } | |
539 if (val > INT_MAX) { | |
540 complain(p, "Integer constant too large"); | |
541 complain_fail(); | |
542 return INT_MAX; | |
543 } | |
544 return val; | |
545 } | |
546 | |
547 /* if it's a symbol, warn and substitute 0. */ | |
548 if (warns.undef) { | |
549 complain(p, "Warning: value of undefined symbol %s is 0", | |
550 word); | |
551 if (mode.werror) { | |
552 complain_fail(); | |
553 } | |
554 } | |
555 return 0; | |
556 } | |
557 | |
558 static | |
559 bool | |
560 check_word(struct place *p, char *expr, size_t pos, size_t *len_ret) | |
561 { | |
562 size_t len; | |
563 int val; | |
564 char tmp; | |
565 | |
566 if (!strchr(alnum, expr[pos])) { | |
567 return false; | |
568 } | |
569 len = strspn(expr + pos, alnum); | |
570 tmp = expr[pos + len]; | |
571 expr[pos + len] = '\0'; | |
572 val = wordval(p, expr + pos); | |
573 expr[pos + len] = tmp; | |
574 token(p, T_VAL, val); | |
575 *len_ret = len; | |
576 return true; | |
577 } | |
578 | |
579 static | |
580 bool | |
581 check_tokens_2(struct place *p, char *expr, size_t pos) | |
582 { | |
583 unsigned i; | |
584 | |
585 for (i=0; i<num_tokens_2; i++) { | |
586 if (expr[pos] == tokens_2[i].c1 && | |
587 expr[pos+1] == tokens_2[i].c2) { | |
588 token(p, tokens_2[i].tok, 0); | |
589 return true; | |
590 } | |
591 } | |
592 return false; | |
593 } | |
594 | |
595 static | |
596 bool | |
597 check_tokens_1(struct place *p, char *expr, size_t pos) | |
598 { | |
599 unsigned i; | |
600 | |
601 for (i=0; i<num_tokens_1; i++) { | |
602 if (expr[pos] == tokens_1[i].c1) { | |
603 token(p, tokens_1[i].tok, 0); | |
604 return true; | |
605 } | |
606 } | |
607 return false; | |
608 } | |
609 | |
610 static | |
611 void | |
612 tokenize(struct place *p, char *expr) | |
613 { | |
614 size_t pos, len; | |
615 | |
616 pos = 0; | |
617 while (expr[pos] != '\0') { | |
618 len = strspn(expr+pos, ws); | |
619 pos += len; | |
620 p->column += len; | |
621 if (check_word(p, expr, pos, &len)) { | |
622 pos += len; | |
623 p->column += len; | |
624 continue; | |
625 } | |
626 if (check_tokens_2(p, expr, pos)) { | |
627 pos += 2; | |
628 p->column += 2; | |
629 continue; | |
630 } | |
631 if (check_tokens_1(p, expr, pos)) { | |
632 pos++; | |
633 p->column++; | |
634 continue; | |
635 } | |
636 complain(p, "Invalid character %u in #if-expression", | |
637 (unsigned char)expr[pos]); | |
638 complain_fail(); | |
639 pos++; | |
640 p->column++; | |
641 } | |
642 token(p, T_EOF, 0); | |
643 } | |
644 | |
645 bool | |
646 eval(struct place *p, char *expr) | |
647 { | |
648 struct token *t1, *t2; | |
649 unsigned num; | |
650 bool result; | |
651 | |
652 tokenarray_init(&tokens); | |
653 tokenize(p, expr); | |
654 | |
655 result = false; | |
656 num = tokenarray_num(&tokens); | |
657 if (num == 2) { | |
658 t1 = tokenarray_get(&tokens, num-1); | |
659 t2 = tokenarray_get(&tokens, num-2); | |
660 if (t1->tok == T_VAL && | |
661 t2->tok == T_EOF) { | |
662 result = t1->val != 0; | |
663 } | |
664 } | |
665 | |
666 tokenarray_destroyall(&tokens); | |
667 tokenarray_cleanup(&tokens); | |
668 return result; | |
669 } |