comparison eval.c @ 16:9dda765ee85c

expression evaluator
author David A. Holland
date Mon, 20 Dec 2010 00:32:20 -0500
parents
children 8a955e3dda2c
comparison
equal deleted inserted replaced
15:f6177d3ed5c2 16:9dda765ee85c
1 #include <stdlib.h>
2 #include <string.h>
3 #include <limits.h>
4 #include <errno.h>
5
6 #include "utils.h"
7 #include "array.h"
8 #include "mode.h"
9 #include "place.h"
10 #include "eval.h"
11
12 /*
13 * e ::=
14 * e1 ? e2 : e3
15 * e1 || e2
16 * e1 && e2
17 * e1 | e2
18 * e1 ^ e2
19 * e1 & e2
20 * e1 == e2 | e1 != e2
21 * e1 < e2 | e1 <= e2 | e1 > e2 | e1 >= e2
22 * e1 << e2 | e1 >> e2
23 * e1 + e2 | e1 - e2
24 * e1 * e2 | e1 / e2 | e1 % e2
25 * !e | ~e | -e | +e
26 * ( e ) | ident
27 */
28
29 enum tokens {
30 T_EOF, /* end of input */
31 T_VAL, /* value */
32 T_LPAREN, /* parens */
33 T_RPAREN,
34 T_PIPEPIPE, /* operators */
35 T_AMPAMP,
36 T_EQEQ,
37 T_BANGEQ,
38 T_LTEQ,
39 T_GTEQ,
40 T_LTLT,
41 T_GTGT,
42 T_QUES,
43 T_COLON,
44 T_PIPE,
45 T_CARET,
46 T_AMP,
47 T_LT,
48 T_GT,
49 T_PLUS,
50 T_MINUS,
51 T_STAR,
52 T_SLASH,
53 T_PCT,
54 T_BANG,
55 T_TILDE,
56 };
57
58 static const struct {
59 char c1, c2;
60 enum tokens tok;
61 } tokens_2[] = {
62 { '|', '|', T_PIPEPIPE },
63 { '&', '&', T_AMPAMP },
64 { '=', '=', T_EQEQ },
65 { '!', '=', T_BANGEQ },
66 { '<', '=', T_LTEQ },
67 { '>', '=', T_GTEQ },
68 { '<', '<', T_LTLT },
69 { '>', '>', T_GTGT },
70 };
71 static const unsigned num_tokens_2 = HOWMANY(tokens_2);
72
73 static const struct {
74 char c1;
75 enum tokens tok;
76 } tokens_1[] = {
77 { '?', T_QUES },
78 { ':', T_COLON },
79 { '|', T_PIPE },
80 { '^', T_CARET },
81 { '&', T_AMP },
82 { '<', T_LT },
83 { '>', T_GT },
84 { '+', T_PLUS },
85 { '-', T_MINUS },
86 { '*', T_STAR },
87 { '/', T_SLASH },
88 { '%', T_PCT },
89 { '!', T_BANG },
90 { '~', T_TILDE },
91 { '(', T_LPAREN },
92 { ')', T_RPAREN },
93 };
94 static const unsigned num_tokens_1 = HOWMANY(tokens_1);
95
96 struct token {
97 struct place place;
98 enum tokens tok;
99 int val;
100 };
101 DECLARRAY(token);
102 DEFARRAY(token, );
103
104 static struct tokenarray tokens;
105
106 static
107 struct token *
108 token_create(const struct place *p, enum tokens tok, int val)
109 {
110 struct token *t;
111
112 t = domalloc(sizeof(*t));
113 t->place = *p;
114 t->tok = tok;
115 t->val = val;
116 return t;
117 }
118
119 static
120 void
121 token_destroy(struct token *t)
122 {
123 free(t);
124 }
125
126 DESTROYALL_ARRAY(token, );
127
128 static
129 bool
130 isuop(enum tokens tok)
131 {
132 switch (tok) {
133 case T_BANG:
134 case T_TILDE:
135 case T_MINUS:
136 case T_PLUS:
137 return true;
138 default:
139 break;
140 }
141 return false;
142 }
143
144 static
145 bool
146 isbop(enum tokens tok)
147 {
148 switch (tok) {
149 case T_EOF:
150 case T_VAL:
151 case T_LPAREN:
152 case T_RPAREN:
153 case T_COLON:
154 case T_QUES:
155 case T_BANG:
156 case T_TILDE:
157 return false;
158 default:
159 break;
160 }
161 return true;
162 }
163
164 static
165 bool
166 isop(enum tokens tok)
167 {
168 switch (tok) {
169 case T_EOF:
170 case T_VAL:
171 case T_LPAREN:
172 case T_RPAREN:
173 return false;
174 default:
175 break;
176 }
177 return true;
178 }
179
180 static
181 int
182 getprec(enum tokens tok)
183 {
184 switch (tok) {
185 case T_STAR: case T_SLASH: case T_PCT: return 0;
186 case T_PLUS: case T_MINUS: return 1;
187 case T_LTLT: case T_GTGT: return 2;
188 case T_LT: case T_LTEQ: case T_GT: case T_GTEQ: return 3;
189 case T_EQEQ: case T_BANGEQ: return 4;
190 case T_AMP: return 5;
191 case T_CARET: return 6;
192 case T_PIPE: return 7;
193 case T_AMPAMP: return 8;
194 case T_PIPEPIPE: return 9;
195 default: break;
196 }
197 return 10;
198 }
199
200 static
201 bool
202 looser(enum tokens t1, enum tokens t2)
203 {
204 return getprec(t1) > getprec(t2);
205 }
206
207 static
208 int
209 eval_uop(enum tokens op, int val)
210 {
211 switch (op) {
212 case T_BANG: val = !val; break;
213 case T_TILDE: val = (int)~(unsigned)val; break;
214 case T_MINUS: val = -val; break;
215 case T_PLUS: break;
216 default: assert(0); break;
217 }
218 return val;
219 }
220
221 static
222 int
223 eval_bop(struct place *p, enum tokens op, int lv, int rv)
224 {
225 unsigned mask;
226
227 switch (op) {
228 case T_PIPEPIPE: return lv || rv;
229 case T_AMPAMP: return lv && rv;
230 case T_PIPE: return (int)((unsigned)lv | (unsigned)rv);
231 case T_CARET: return (int)((unsigned)lv ^ (unsigned)rv);
232 case T_AMP: return (int)((unsigned)lv & (unsigned)rv);
233 case T_EQEQ: return lv == rv;
234 case T_BANGEQ: return lv != rv;
235 case T_LT: return lv < rv;
236 case T_GT: return lv > rv;
237 case T_LTEQ: return lv <= rv;
238 case T_GTEQ: return lv >= rv;
239
240 case T_LTLT:
241 case T_GTGT:
242 if (rv < 0) {
243 complain(p, "Negative bit-shift");
244 complain_fail();
245 rv = 0;
246 }
247 if ((unsigned)rv >= CHAR_BIT * sizeof(unsigned)) {
248 complain(p, "Bit-shift farther than type width");
249 complain_fail();
250 rv = 0;
251 }
252 if (op == T_LTLT) {
253 return (int)((unsigned)lv << (unsigned)rv);
254 }
255 mask = ((unsigned)-1) << (CHAR_BIT * sizeof(unsigned) - rv);
256 lv = (int)(((unsigned)lv >> (unsigned)rv) | mask);
257 return lv;
258
259 case T_MINUS:
260 if (rv == INT_MIN) {
261 if (lv == INT_MIN) {
262 return 0;
263 }
264 lv--;
265 rv++;
266 }
267 rv = -rv;
268 /* FALLTHROUGH */
269 case T_PLUS:
270 if (rv > 0 && lv > (INT_MAX - rv)) {
271 complain(p, "Integer overflow");
272 complain_fail();
273 return INT_MAX;
274 }
275 if (rv < 0 && lv < (INT_MIN - rv)) {
276 complain(p, "Integer underflow");
277 complain_fail();
278 return INT_MIN;
279 }
280 return lv + rv;
281
282 case T_STAR:
283 if (rv == 0) {
284 return 0;
285 }
286 if (rv == 1) {
287 return lv;
288 }
289 if (rv == -1 && lv == INT_MIN) {
290 lv++;
291 lv = -lv;
292 if (lv == INT_MAX) {
293 complain(p, "Integer overflow");
294 complain_fail();
295 return INT_MAX;
296 }
297 lv++;
298 return lv;
299 }
300 if (lv == INT_MIN && rv < 0) {
301 complain(p, "Integer overflow");
302 complain_fail();
303 return INT_MAX;
304 }
305 if (lv == INT_MIN && rv > 0) {
306 complain(p, "Integer underflow");
307 complain_fail();
308 return INT_MIN;
309 }
310 if (rv < 0) {
311 rv = -rv;
312 lv = -lv;
313 }
314 if (lv > 0 && lv > INT_MAX / rv) {
315 complain(p, "Integer overflow");
316 complain_fail();
317 return INT_MAX;
318 }
319 if (lv < 0 && lv < INT_MIN / rv) {
320 complain(p, "Integer underflow");
321 complain_fail();
322 return INT_MIN;
323 }
324 return lv * rv;
325
326 case T_SLASH:
327 if (rv == 0) {
328 complain(p, "Division by zero");
329 complain_fail();
330 return 0;
331 }
332 return lv / rv;
333
334 case T_PCT:
335 if (rv == 0) {
336 complain(p, "Modulus by zero");
337 complain_fail();
338 return 0;
339 }
340 return lv % rv;
341
342 default: assert(0); break;
343 }
344 return 0;
345 }
346
347 static
348 void
349 tryreduce(void)
350 {
351 unsigned num;
352 struct token *t1, *t2, *t3, *t4, *t5, *t6;
353
354 while (1) {
355 num = tokenarray_num(&tokens);
356 t1 = (num >= 1) ? tokenarray_get(&tokens, num-1) : NULL;
357 t2 = (num >= 2) ? tokenarray_get(&tokens, num-2) : NULL;
358 t3 = (num >= 3) ? tokenarray_get(&tokens, num-3) : NULL;
359
360 if (num >= 3 &&
361 t3->tok == T_LPAREN &&
362 t2->tok == T_VAL &&
363 t1->tok == T_RPAREN) {
364 /* (x) -> x */
365 t2->place = t3->place;
366 token_destroy(t1);
367 token_destroy(t3);
368 tokenarray_remove(&tokens, num-1);
369 tokenarray_remove(&tokens, num-3);
370 continue;
371 }
372
373 if (num >= 2 &&
374 (num == 2 || isop(t3->tok) || t3->tok == T_LPAREN) &&
375 isuop(t2->tok) &&
376 t1->tok == T_VAL) {
377 /* unary operator */
378 t1->val = eval_uop(t2->tok, t1->val);
379 t1->place = t2->place;
380 token_destroy(t2);
381 tokenarray_remove(&tokens, num-2);
382 continue;
383 }
384 if (num >= 2 &&
385 (num == 2 || isop(t3->tok) || t3->tok == T_RPAREN) &&
386 t2->tok != T_LPAREN && t2->tok != T_VAL &&
387 t1->tok == T_VAL) {
388 complain(&t2->place, "Invalid unary operator");
389 complain_fail();
390 token_destroy(t2);
391 tokenarray_remove(&tokens, num-2);
392 continue;
393 }
394
395
396 t4 = (num >= 4) ? tokenarray_get(&tokens, num-4) : NULL;
397
398 if (num >= 4 &&
399 t4->tok == T_VAL &&
400 isbop(t3->tok) &&
401 t2->tok == T_VAL &&
402 (isbop(t1->tok) || !isop(t1->tok))) {
403 /* binary operator */
404 if (looser(t1->tok, t3->tok)) {
405 t4->val = eval_bop(&t3->place,
406 t4->val, t3->tok, t2->val);
407 token_destroy(t2);
408 token_destroy(t3);
409 tokenarray_remove(&tokens, num-2);
410 tokenarray_remove(&tokens, num-3);
411 continue;
412 }
413 break;
414 }
415
416 t5 = (num >= 5) ? tokenarray_get(&tokens, num-5) : NULL;
417 t6 = (num >= 6) ? tokenarray_get(&tokens, num-6) : NULL;
418
419 if (num >=6 &&
420 t6->tok == T_VAL &&
421 t5->tok == T_QUES &&
422 t4->tok == T_VAL &&
423 t3->tok == T_COLON &&
424 t2->tok == T_VAL &&
425 !isop(t1->tok)) {
426 /* conditional expression */
427 t6->val = t6->val ? t4->val : t2->val;
428 token_destroy(t2);
429 token_destroy(t3);
430 token_destroy(t4);
431 token_destroy(t5);
432 tokenarray_remove(&tokens, num-2);
433 tokenarray_remove(&tokens, num-3);
434 tokenarray_remove(&tokens, num-4);
435 tokenarray_remove(&tokens, num-5);
436 continue;
437 }
438
439 if (num >= 2 &&
440 t2->tok == T_LPAREN &&
441 t1->tok == T_RPAREN) {
442 complain(&t1->place, "Value expected within ()");
443 complain_fail();
444 t1->tok = T_VAL;
445 t1->val = 0;
446 token_destroy(t1);
447 tokenarray_remove(&tokens, num-1);
448 continue;
449 }
450
451 if (num >= 2 &&
452 t2->tok == T_VAL &&
453 t1->tok == T_VAL) {
454 complain(&t1->place, "Operator expected");
455 complain_fail();
456 token_destroy(t1);
457 tokenarray_remove(&tokens, num-1);
458 continue;
459 }
460
461 if (num >= 2 &&
462 isop(t2->tok) &&
463 t1->tok == T_EOF) {
464 complain(&t1->place, "Value expected after operator");
465 complain_fail();
466 token_destroy(t2);
467 tokenarray_remove(&tokens, num-2);
468 continue;
469 }
470
471 if (num == 2 &&
472 t2->tok == T_VAL &&
473 t1->tok == T_RPAREN) {
474 complain(&t1->place, "Excess right parenthesis");
475 complain_fail();
476 token_destroy(t1);
477 tokenarray_remove(&tokens, num-1);
478 continue;
479 }
480
481 if (num == 3 &&
482 t3->tok == T_LPAREN &&
483 t2->tok == T_VAL &&
484 t1->tok == T_EOF) {
485 complain(&t1->place, "Unclosed left parenthesis");
486 complain_fail();
487 token_destroy(t3);
488 tokenarray_remove(&tokens, num-3);
489 continue;
490 }
491
492 if (num == 2 &&
493 t2->tok == T_VAL &&
494 t1->tok == T_EOF) {
495 /* accepting state */
496 break;
497 }
498
499 if (num >= 1 &&
500 t1->tok == T_EOF) {
501 /* any other configuration at eof is an error */
502 complain(&t1->place, "Parse error");
503 complain_fail();
504 break;
505 }
506
507 /* otherwise, wait for more input */
508 break;
509 }
510 }
511
512 static
513 void
514 token(struct place *p, enum tokens tok, int val)
515 {
516 struct token *t;
517
518 t = token_create(p, tok, val);
519
520 tokenarray_add(&tokens, t, NULL);
521 tryreduce();
522 }
523
524 static
525 int
526 wordval(struct place *p, char *word)
527 {
528 unsigned long val;
529 char *t;
530
531 if (word[0] >= '0' && word[0] <= '9') {
532 errno = 0;
533 val = strtoul(word, &t, 0);
534 if (errno || *t != '\0') {
535 complain(p, "Invalid integer constant");
536 complain_fail();
537 return 0;
538 }
539 if (val > INT_MAX) {
540 complain(p, "Integer constant too large");
541 complain_fail();
542 return INT_MAX;
543 }
544 return val;
545 }
546
547 /* if it's a symbol, warn and substitute 0. */
548 if (warns.undef) {
549 complain(p, "Warning: value of undefined symbol %s is 0",
550 word);
551 if (mode.werror) {
552 complain_fail();
553 }
554 }
555 return 0;
556 }
557
558 static
559 bool
560 check_word(struct place *p, char *expr, size_t pos, size_t *len_ret)
561 {
562 size_t len;
563 int val;
564 char tmp;
565
566 if (!strchr(alnum, expr[pos])) {
567 return false;
568 }
569 len = strspn(expr + pos, alnum);
570 tmp = expr[pos + len];
571 expr[pos + len] = '\0';
572 val = wordval(p, expr + pos);
573 expr[pos + len] = tmp;
574 token(p, T_VAL, val);
575 *len_ret = len;
576 return true;
577 }
578
579 static
580 bool
581 check_tokens_2(struct place *p, char *expr, size_t pos)
582 {
583 unsigned i;
584
585 for (i=0; i<num_tokens_2; i++) {
586 if (expr[pos] == tokens_2[i].c1 &&
587 expr[pos+1] == tokens_2[i].c2) {
588 token(p, tokens_2[i].tok, 0);
589 return true;
590 }
591 }
592 return false;
593 }
594
595 static
596 bool
597 check_tokens_1(struct place *p, char *expr, size_t pos)
598 {
599 unsigned i;
600
601 for (i=0; i<num_tokens_1; i++) {
602 if (expr[pos] == tokens_1[i].c1) {
603 token(p, tokens_1[i].tok, 0);
604 return true;
605 }
606 }
607 return false;
608 }
609
610 static
611 void
612 tokenize(struct place *p, char *expr)
613 {
614 size_t pos, len;
615
616 pos = 0;
617 while (expr[pos] != '\0') {
618 len = strspn(expr+pos, ws);
619 pos += len;
620 p->column += len;
621 if (check_word(p, expr, pos, &len)) {
622 pos += len;
623 p->column += len;
624 continue;
625 }
626 if (check_tokens_2(p, expr, pos)) {
627 pos += 2;
628 p->column += 2;
629 continue;
630 }
631 if (check_tokens_1(p, expr, pos)) {
632 pos++;
633 p->column++;
634 continue;
635 }
636 complain(p, "Invalid character %u in #if-expression",
637 (unsigned char)expr[pos]);
638 complain_fail();
639 pos++;
640 p->column++;
641 }
642 token(p, T_EOF, 0);
643 }
644
645 bool
646 eval(struct place *p, char *expr)
647 {
648 struct token *t1, *t2;
649 unsigned num;
650 bool result;
651
652 tokenarray_init(&tokens);
653 tokenize(p, expr);
654
655 result = false;
656 num = tokenarray_num(&tokens);
657 if (num == 2) {
658 t1 = tokenarray_get(&tokens, num-1);
659 t2 = tokenarray_get(&tokens, num-2);
660 if (t1->tok == T_VAL &&
661 t2->tok == T_EOF) {
662 result = t1->val != 0;
663 }
664 }
665
666 tokenarray_destroyall(&tokens);
667 tokenarray_cleanup(&tokens);
668 return result;
669 }