comparison anagram/agcore/cs.cpp @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.
author David A. Holland
date Sat, 22 Dec 2007 17:52:45 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:13d2b8934445
1 /*
2 * AnaGram, A System for Syntax Directed Programming
3 * Copyright 1993-2002 Parsifal Software. All Rights Reserved.
4 * See the file COPYING for license and usage terms.
5 *
6 * cs.cpp
7 */
8
9 #include "arrays.h"
10 #include "assert.h"
11 #include "bpu.h"
12 #include "config.h"
13 #include "cs.h"
14 #include "csexp.h"
15 #include "dict.h"
16 #include "error.h"
17 #include "myalloc.h"
18 #include "q1glbl.h"
19 #include "rpk.h"
20 #include "rule.h"
21 #include "stacks.h"
22 #include "symbol.h"
23 #include "token.h"
24 #include "tree.h"
25 #include "tsd.h"
26 #include "ut.h"
27
28 //#define INCLUDE_LOGGING
29 #include "log.h"
30
31
32 static int char_set_error;
33
34 static void build_partition_sets(list_dict *d);
35 static void partition_set(int ts, list_dict *d, int *ps, char *fa);
36
37
38 void set_universe(void) {
39 //unsigned tn;
40
41 LOGSECTION("set_universe");
42 LOGV(ntkns);
43 LOGV(nforms);
44 //min_char_number = 0;
45 max_char_number = -1;
46 /*
47 for (int ptIndex = 0; ptIndex++ < ntrees;) {
48 map_parse_tree[ptIndex].expression->checkRange();
49 }
50 */
51 for (ParseTree pt; pt.next(); ) {
52 pt->expression->checkRange();
53 }
54 CharSetExpression::checkMinimum(min_char_number);
55 if (max_char_number > 0x10000) {
56 max_char_number = 0x10000;
57 }
58 LOGV(min_char_number) LCV(max_char_number);
59
60 //for (tn = 0; tn++ < ntkns; )
61 for (Each<Token> token; token.loopNotFinished(); token.getNext()) {
62 //token_number_map *tp = &map_token_number[tn];
63 //Token token(tn);
64 //int pt = tp->parse_tree;
65 ParseTree tree = token->parse_tree;
66 //parse_tree_map *ptp;
67 //int alias, name;
68
69 //if (pt) continue;
70 if (tree.isNotNull()) {
71 continue;
72 }
73 Symbol alias = token->token_name;
74 if (alias.isNull()) {
75 continue;
76 }
77 //pt = map_token_name[alias].parse_tree;
78 tree = alias->parse_tree;
79 if (tree.isNull()) {
80 continue;
81 }
82 //ptp = &map_parse_tree[pt];
83
84 CharSetExpression *exp = tree->expression;
85 if (exp && exp->nameNode()) {
86 Symbol name = ((NamedCharSet *)exp)->name;
87 Token referencedToken = name->token_number;
88 if (referencedToken.isNull()) {
89 continue;
90 }
91 shell_production(token, referencedToken);
92 tree->token_number = token;
93 }
94 }
95
96 // if (error_token == 0) {
97 // error_token = find_token_number("error");
98 // }
99 if (grammar_token == 0) {
100 grammar_token = find_token_number("grammar");
101 }
102 if (eof_token == 0) {
103 eof_token = find_token_number("eof");
104 if (eof_token == 0) {
105 //int id = identify_string("eof", tkn_dict);
106 Symbol id("eof");
107 if (id && (auto_resynch || error_token)) {
108 eof_token = id_token(id);
109 }
110 }
111 }
112 nforms_base = nforms;
113 if (min_char_number > max_char_number) {
114 return;
115 }
116 if (max_char_number < 255) {
117 max_char_number = 255;
118 }
119 n_chars = max_char_number - min_char_number + 1;
120 check_size(map_char_number, n_chars, n_chars);
121 LOGV(ntkns);
122 LOGV(nforms);
123 }
124
125 static unsigned ntc_size;
126
127 void build_sets(void) {
128 int k;
129 unsigned i;
130 int id;
131 char ts[100];
132 int *ntc;
133 unsigned ntk = ntkns;
134
135 if (charRangeDiagnostic || min_char_number > max_char_number) {
136 return;
137 }
138
139 LOGSECTION("build_sets");
140 LOGV(ntkns) LCV(nforms) LCV(error_token);
141 /*
142 * count the number of named tokens that point to a given parse tree.
143 * If there is only one such named token, it can be assigned to the
144 * parse tree. Otherwise, the parse tree needs its own token, and
145 * the named tokens have to have shells created.
146 */
147 //ntc_size = ntrees+1;
148 ntc_size = ParseTree::count();
149 ZALLOCATE_ST(ntc, ntc_size);
150 LOGV(ntc_size);
151 ParseTree tree;
152 LOGS("Scanning trees");
153 for (tree = ParseTree(); tree.next(); ) {
154 int name = tree->token_name;
155 if (name && tree->expression->char_node()) {
156 int value = ((IndividualCode *)tree->expression)->value;
157 assert(value >= min_char_number);
158 LOGV(value) LCV(min_char_number) LCV(name);
159 map_char_number[value - min_char_number].token_name = name;
160 }
161 }
162 LOGS("finished tree loop");
163 for (i = 0; i++ < ntkns; ) {
164 token_number_map *tp = &map_token_number[i];
165 int name = tp->token_name;
166 unsigned pt;
167
168 if (tp->non_terminal_flag) {
169 continue;
170 }
171 if (name == 0) {
172 continue;
173 }
174 pt = map_token_name[name].parse_tree;
175 LOGV(pt) LCV(ParseTree(pt)->token_number);
176 LOGV((int) map_parse_tree[pt].expression);
177 if (map_parse_tree[pt].token_number) {
178 continue;
179 }
180 assert(pt < ntc_size);
181 ntc[pt]++;
182 }
183 LOGS("finished token loop");
184
185 /*
186 * Now check those trees that have more than one named token. Create
187 * a specific token for the tree.
188 */
189
190 for (tree = ParseTree(); tree.next();) {
191 int tn;
192
193 if (ntc[tree] <= 1) {
194 ntc[tree] = 0;
195 continue;
196 }
197 tn = tree->token_number;
198 if (tn == 0) {
199 if (tree->expression->nameNode()) {
200 int name = ((NamedCharSet *)tree->expression)->name;
201 //LOGV(netAllocations());
202 tn = id_token(name);
203 Token(tn)->value_type = default_input_type;
204 }
205 else {
206 tn = form_element_1(tree->expression);
207 }
208 }
209 ntc[tree] = tn;
210 }
211 LOGS("trees scanned again");
212
213 /*
214 * Now check the named terminals again and either make up the
215 * tree-token links or build shell productions
216 */
217
218 for (i = 0; i++ < ntk;) {
219 token_number_map *tp = &map_token_number[i];
220 int name = tp->token_name;
221 int pt;
222 int tn;
223 parse_tree_map *ptp;
224
225 if (tp->non_terminal_flag) {
226 continue;
227 }
228 if (name == 0) {
229 continue;
230 }
231 pt = map_token_name[name].parse_tree;
232 if (pt == 0) {
233 continue;
234 }
235 ptp = &map_parse_tree[pt];
236 tn = ptp->token_number;
237 if (tn == 0) {
238 tn = ntc[pt];
239 }
240 if (tn == 0) {
241 ptp->token_number = i;
242 continue;
243 }
244 shell_production(i, tn);
245 }
246 DEALLOCATE(ntc);
247 LOGS("tokens scanned again");
248 nforms_base = nforms;
249
250 LOGV(ntkns) LCV(nforms);
251 LOGV(error_token);
252 for (i = 0; i++ < ntkns;) {
253 token_number_map *tp = &map_token_number[i];
254 int nn;
255 if (tp->non_terminal_flag ||
256 (int) i == error_token ||
257 tp->key ||
258 tp->token_set_id) {
259 continue;
260 }
261 nn = tp->parse_tree;
262 LOGV(i) LCV(nn);
263 if (nn == 0 && (k = tp->token_name) != 0) {
264 nn = map_token_name[k].parse_tree;
265 }
266 LOGV(nn);
267 if (nn == 0) {
268 char_set_error = 5;
269 }
270 else {
271 tp->token_set_id = build_char_set(nn);
272 }
273 if (char_set_error == 0) {
274 continue;
275 }
276
277 id = tp->token_name;
278 if (id) {
279 sprintf(ts," = %s", Symbol(id)->string.pointer());
280 }
281 else {
282 ts[0] = 0;
283 }
284 switch (char_set_error) {
285 case 1:
286 ssprintf("Error defining T%03d: ",i);
287 break;
288 case 5:
289 ssprintf("Undefined token T%03d: ",i);
290 break;
291 }
292 atkn(i);
293 log_error();
294 }
295 build_partition_sets(char_set_dict);
296 LOGV(ntkns);
297 LOGV(nforms);
298 }
299
300 static void build_partition_sets(list_dict *d) {
301 LOGSECTION("build_partition_sets");
302 int i;
303 unsigned j, n;
304 int *db;
305 unsigned *ndx;
306 int *lb;
307 int m;
308 //int *ps;
309 int ts;
310 unsigned ntk;
311 //char *fa;
312 unsigned *fl;
313 int nf;
314 unsigned cn;
315 int *list;
316 unsigned pn,nc;
317 unsigned tn;
318
319 if (max_char_number == -1 && min_char_number == 0) {
320 return;
321 }
322 //ps = local_array(n_chars, int);
323 LocalArray<int> ps(n_chars);
324 n = d->nsx;
325 db = d->text;
326 ndx = d->ndx;
327 //set = local_array(n, int *);
328 LocalArray<int *> set(n);
329 //k = local_array(n, int);
330 LocalArray<int> k(n);
331 //l = local_array(n, int);
332 LocalArray<int> l(n);
333 for (i = 1; (unsigned) i < n; i++) {
334 set[i] = (db + ndx[i]);
335 l[i] = *(set[i]++) - 1;
336 k[i] = 0;
337 }
338
339 /*
340 * For each character in the character range, scan all the character
341 * sets. Make a list of the sets to which this particular character
342 * belongs. This list represents a unique partition of the character
343 * range. Use a dictionary to give each unique partition an
344 * identifying number. Map each character to the partition to which
345 * it belongs.
346 */
347
348 LOGS("Scanning characters");
349 for (i = min_char_number; i <= max_char_number; i++) {
350 int ix = i - min_char_number;
351 iws();
352 for (j = 1; j < n; j++) {
353 while (k[j] < l[j] && set[j][k[j]] < i) {
354 k[j]++;
355 }
356 if (k[j] == l[j] || set[j][k[j]] > i) {
357 continue;
358 }
359 k[j]++;
360 aws(j);
361 }
362 lb = list_base;
363 m = rws();
364 map_char_number[ix].part_number = ps[ix] = add_list_dict(lb,m,part_dict);
365 LOGV(ix) LCV(ps[ix]);
366 }
367
368 /*
369 * Scan each partition. For each partition, make a list of all the
370 * characters that belong to this partition.
371 */
372
373 LOGS("Scanning partitions");
374 m = part_dict->nsx;
375 check_size(map_part_number, m, m);
376 for (i = 0; (unsigned) i < part_dict->nsx; i++) {
377 iws();
378 for (j = 0; j < (unsigned) n_chars; j++) {
379 if ((unsigned) i == map_char_number[j].part_number) {
380 aws(j + min_char_number);
381 }
382 }
383 map_part_number[i].chars_index = store_list(chars_list);
384 fl = lstptr(map_part_number[i],chars);
385 m = map_part_number[i].size = rws();
386 #ifdef INCLUDE_LOGGING
387 if ((unsigned)(*fl - min_char_number) >
388 (max_char_number - min_char_number)) {
389 LOGV(*fl) LCV(min_char_number) LCV(max_char_number);
390 }
391 #endif
392 assert((unsigned)(*fl - min_char_number) <=
393 (unsigned) (max_char_number - min_char_number));
394 if (m == 1
395 && (j = map_char_number[*fl - min_char_number].token_number) != 0) {
396 map_part_number[i].token_number = j;
397 }
398 LOGV(*fl) LCV(min_char_number) LCV(i) LCV(j);
399 }
400
401 m = part_dict->nsx;
402 //fa = local_array(m, char);
403 LocalArray<char> fa(m);
404 m = char_set_dict->nsx;
405 check_size(map_char_set,m,m);
406 for (i = 1; i<m; i++) {
407 char_set_map *csp = &map_char_set[i];
408 partition_set(i, char_set_dict, ps, fa);
409 csp->part_index = store_list(part_list);
410 csp->nparts = rws();
411 }
412
413
414 ntk = ntkns+1;
415 //assert(ok_index(map_token_number, ntkns));
416 m = part_dict->nsx;
417
418 LOGS("Scanning tokens");
419 for (i = 1; (unsigned) i < ntk; i++) {
420 part_number_map *ppn;
421 unsigned tn;
422 //int ctn = i;
423 Token ctn(i);
424 unsigned fn;
425 ts = map_token_number[i].token_set_id;
426 if (ts == 0) {
427 continue;
428 }
429 nf = map_char_set[ts].nparts;
430 if (nf != 1) {
431 continue;
432 }
433 fl = lstptr(map_char_set[ts], part);
434 ppn = &map_part_number[pn = *fl];
435 assert(ok_index(map_part_number, pn));
436 list = (int *) lstptr(*ppn, chars);
437 //assert (pn > 0 && pn < m);
438 assert(pn < (unsigned) m);
439 nc = ppn->size;
440 tn = ppn->token_number;
441 if (tn == 0) {
442 while (nc--) {
443 cn = *list++ - min_char_number;
444 assert(cn < n_chars);
445 map_char_number[cn].token_number = i;
446 LOGV(cn) LCV(i);
447 }
448 assert(pn < part_dict->nsx);
449 ppn->token_number = i;
450 map_token_number[i].part_number = pn;
451 continue;
452 }
453 if (tn < ntk) {
454 //ntkns++;
455 //check_size(map_token_number, ntkns, ntkns);
456 //ctn = ntkns;
457 ctn = Token::create();
458 ctn->value_type = default_input_type;
459 //pf1x(ctn);
460 //fn = form2();
461 fn = makeRule(ctn);
462 at(bnf_table, tn, fn);
463 Rule(fn)->prim_tkn = tn;
464 ppn->token_number = ctn;
465 Token(tn)->operatorCandidate = ctn->operatorCandidate = 1;
466 while (nc--) {
467 cn = *list++ - min_char_number;
468 assert(cn < n_chars);
469 map_char_number[cn].token_number = ctn;
470 LOGV(cn) LCV(ctn);
471 }
472 map_token_number[ctn].part_number = pn;
473 map_token_number[ctn].value_type = default_input_type;
474 map_token_number[tn].value_type = default_input_type;
475 map_token_number[tn].part_number = 0;
476 map_token_number[tn].non_terminal_flag = 1;
477 tn = ctn;
478 }
479 //pf1x(tn);
480 //fn = form2();
481 fn = makeRule(tn);
482 LOGV(i) LCV(fn) LCV(tn);
483 at(bnf_table, i, fn);
484 Rule(fn)->prim_tkn = i;
485 map_token_number[i].non_terminal_flag = 1;
486 ctn = tn;
487 }
488 for (i = 1; (unsigned) i < ntk; i++) {
489 token_number_map *mti = &map_token_number[i];
490 ts = mti->token_set_id;
491 if (ts == 0) {
492 continue;
493 }
494 nf = map_char_set[ts].nparts;
495 if (nf <= 1) {
496 continue;
497 }
498 fl = lstptr(map_char_set[ts], part);
499 iws();
500 j = 0;
501 while (j < (unsigned) nf) {
502
503 pn = fl[j];
504 assert (pn > 0 && pn < (unsigned) m);
505 tn = map_part_number[pn].token_number;
506 if (tn == 0) {
507 //ntkns++;
508 //check_size(map_token_number,ntkns,ntkns);
509 list = (int *) lstptr(map_part_number[pn], chars);
510 nc = map_part_number[pn].size;
511 tn = Token::create();
512 while (nc--) {
513 cn = *list++ - min_char_number;
514 assert(cn < n_chars);
515 map_char_number[cn].token_number = tn;
516 LOGV(cn) LCV(tn);
517 }
518 //tn = ntkns;
519 assert(pn < part_dict->nsx);
520 map_part_number[pn].token_number = tn;
521 map_token_number[tn].part_number = pn;
522 map_token_number[tn].value_type = default_input_type;
523 map_token_number[tn].operatorCandidate = 1;
524 }
525 //pf1x(tn);
526 //aws(form2());
527 aws(makeRule(tn));
528 j++;
529 }
530 fl = (unsigned *) list_base;
531 nf = rws();
532 map_token_number[i].non_terminal_flag = 1;
533 for (j = 0; j < (unsigned) nf; j++) {
534 LOGV(i) LCV(fl[j]);
535 at(bnf_table, i, fl[j]);
536 //assert(ok_index(map_form_number, fl[j]));
537 Rule rule(fl[j]);
538 rule->prim_tkn = i;
539 }
540 }
541 }
542
543 static void partition_set(int ts, list_dict *d, int *ps, char *fa) {
544 int *list;
545 int n, np, i;
546
547 np = part_dict->nsx;
548 memset(fa, 0, np);
549 list = d->text + d->ndx[ts];
550 n = *(list++) - 1;
551 for (i = 0; i<n; i++) {
552 fa[ps[list[i] - min_char_number]] = 1;
553 }
554 iws();
555 for (i = 0; i < np; i++) {
556 if (fa[i]) {
557 aws(i);
558 }
559 }
560 }
561
562 int build_char_set(int nn) {
563 LOGSECTION("build_char_set");
564 //char *v;
565 //int i, k;
566 int cs = map_parse_tree[nn].char_set;
567 LOGV(cs) LCV(nn);
568
569 if (cs) {
570 return cs;
571 }
572 char_set_error = 0;
573 LOGV(n_chars);
574
575 //int *list = local_array(n_chars, int);
576 LocalArray<int> list(n_chars);
577 CharBitmap bitmap = map_parse_tree[nn].expression->bitmap();
578 if (!case_sensitive) {
579 bitmap.blurCase();
580 }
581 int count = bitmap.list(list);
582 LOGV(count);
583 cs = add_list_dict(list, count, char_set_dict);
584 LOGV(cs);
585 check_size(map_char_set, cs, cs);
586 map_char_set[cs].parse_tree = nn;
587 map_parse_tree[nn].char_set = cs;
588 return cs;
589 }
590
591 static void log_undef_sym(int tn) {
592 ssprintf("Undefined symbol: %s", Symbol(tn)->string.pointer());
593 log_error();
594 }
595
596 int getParseTree(int name) {
597 LOGSECTION("getParseTree");
598 int pt = map_token_name[name].parse_tree;
599 LOGV(pt);
600 if (pt) {
601 return pt;
602 }
603
604 int tn = map_token_name[name].token_number;
605 if (tn) {
606 pt = map_token_number[tn].parse_tree;
607 }
608 LOGV(tn) LCV(pt);
609 if (pt) {
610 return pt;
611 }
612
613 log_undef_sym(name);
614 return 0;
615 }