Mercurial > ~dholland > hg > ag > index.cgi
comparison anagram/agcore/cs.cpp @ 0:13d2b8934445
Import AnaGram (near-)release tree into Mercurial.
author | David A. Holland |
---|---|
date | Sat, 22 Dec 2007 17:52:45 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:13d2b8934445 |
---|---|
1 /* | |
2 * AnaGram, A System for Syntax Directed Programming | |
3 * Copyright 1993-2002 Parsifal Software. All Rights Reserved. | |
4 * See the file COPYING for license and usage terms. | |
5 * | |
6 * cs.cpp | |
7 */ | |
8 | |
9 #include "arrays.h" | |
10 #include "assert.h" | |
11 #include "bpu.h" | |
12 #include "config.h" | |
13 #include "cs.h" | |
14 #include "csexp.h" | |
15 #include "dict.h" | |
16 #include "error.h" | |
17 #include "myalloc.h" | |
18 #include "q1glbl.h" | |
19 #include "rpk.h" | |
20 #include "rule.h" | |
21 #include "stacks.h" | |
22 #include "symbol.h" | |
23 #include "token.h" | |
24 #include "tree.h" | |
25 #include "tsd.h" | |
26 #include "ut.h" | |
27 | |
28 //#define INCLUDE_LOGGING | |
29 #include "log.h" | |
30 | |
31 | |
32 static int char_set_error; | |
33 | |
34 static void build_partition_sets(list_dict *d); | |
35 static void partition_set(int ts, list_dict *d, int *ps, char *fa); | |
36 | |
37 | |
38 void set_universe(void) { | |
39 //unsigned tn; | |
40 | |
41 LOGSECTION("set_universe"); | |
42 LOGV(ntkns); | |
43 LOGV(nforms); | |
44 //min_char_number = 0; | |
45 max_char_number = -1; | |
46 /* | |
47 for (int ptIndex = 0; ptIndex++ < ntrees;) { | |
48 map_parse_tree[ptIndex].expression->checkRange(); | |
49 } | |
50 */ | |
51 for (ParseTree pt; pt.next(); ) { | |
52 pt->expression->checkRange(); | |
53 } | |
54 CharSetExpression::checkMinimum(min_char_number); | |
55 if (max_char_number > 0x10000) { | |
56 max_char_number = 0x10000; | |
57 } | |
58 LOGV(min_char_number) LCV(max_char_number); | |
59 | |
60 //for (tn = 0; tn++ < ntkns; ) | |
61 for (Each<Token> token; token.loopNotFinished(); token.getNext()) { | |
62 //token_number_map *tp = &map_token_number[tn]; | |
63 //Token token(tn); | |
64 //int pt = tp->parse_tree; | |
65 ParseTree tree = token->parse_tree; | |
66 //parse_tree_map *ptp; | |
67 //int alias, name; | |
68 | |
69 //if (pt) continue; | |
70 if (tree.isNotNull()) { | |
71 continue; | |
72 } | |
73 Symbol alias = token->token_name; | |
74 if (alias.isNull()) { | |
75 continue; | |
76 } | |
77 //pt = map_token_name[alias].parse_tree; | |
78 tree = alias->parse_tree; | |
79 if (tree.isNull()) { | |
80 continue; | |
81 } | |
82 //ptp = &map_parse_tree[pt]; | |
83 | |
84 CharSetExpression *exp = tree->expression; | |
85 if (exp && exp->nameNode()) { | |
86 Symbol name = ((NamedCharSet *)exp)->name; | |
87 Token referencedToken = name->token_number; | |
88 if (referencedToken.isNull()) { | |
89 continue; | |
90 } | |
91 shell_production(token, referencedToken); | |
92 tree->token_number = token; | |
93 } | |
94 } | |
95 | |
96 // if (error_token == 0) { | |
97 // error_token = find_token_number("error"); | |
98 // } | |
99 if (grammar_token == 0) { | |
100 grammar_token = find_token_number("grammar"); | |
101 } | |
102 if (eof_token == 0) { | |
103 eof_token = find_token_number("eof"); | |
104 if (eof_token == 0) { | |
105 //int id = identify_string("eof", tkn_dict); | |
106 Symbol id("eof"); | |
107 if (id && (auto_resynch || error_token)) { | |
108 eof_token = id_token(id); | |
109 } | |
110 } | |
111 } | |
112 nforms_base = nforms; | |
113 if (min_char_number > max_char_number) { | |
114 return; | |
115 } | |
116 if (max_char_number < 255) { | |
117 max_char_number = 255; | |
118 } | |
119 n_chars = max_char_number - min_char_number + 1; | |
120 check_size(map_char_number, n_chars, n_chars); | |
121 LOGV(ntkns); | |
122 LOGV(nforms); | |
123 } | |
124 | |
125 static unsigned ntc_size; | |
126 | |
127 void build_sets(void) { | |
128 int k; | |
129 unsigned i; | |
130 int id; | |
131 char ts[100]; | |
132 int *ntc; | |
133 unsigned ntk = ntkns; | |
134 | |
135 if (charRangeDiagnostic || min_char_number > max_char_number) { | |
136 return; | |
137 } | |
138 | |
139 LOGSECTION("build_sets"); | |
140 LOGV(ntkns) LCV(nforms) LCV(error_token); | |
141 /* | |
142 * count the number of named tokens that point to a given parse tree. | |
143 * If there is only one such named token, it can be assigned to the | |
144 * parse tree. Otherwise, the parse tree needs its own token, and | |
145 * the named tokens have to have shells created. | |
146 */ | |
147 //ntc_size = ntrees+1; | |
148 ntc_size = ParseTree::count(); | |
149 ZALLOCATE_ST(ntc, ntc_size); | |
150 LOGV(ntc_size); | |
151 ParseTree tree; | |
152 LOGS("Scanning trees"); | |
153 for (tree = ParseTree(); tree.next(); ) { | |
154 int name = tree->token_name; | |
155 if (name && tree->expression->char_node()) { | |
156 int value = ((IndividualCode *)tree->expression)->value; | |
157 assert(value >= min_char_number); | |
158 LOGV(value) LCV(min_char_number) LCV(name); | |
159 map_char_number[value - min_char_number].token_name = name; | |
160 } | |
161 } | |
162 LOGS("finished tree loop"); | |
163 for (i = 0; i++ < ntkns; ) { | |
164 token_number_map *tp = &map_token_number[i]; | |
165 int name = tp->token_name; | |
166 unsigned pt; | |
167 | |
168 if (tp->non_terminal_flag) { | |
169 continue; | |
170 } | |
171 if (name == 0) { | |
172 continue; | |
173 } | |
174 pt = map_token_name[name].parse_tree; | |
175 LOGV(pt) LCV(ParseTree(pt)->token_number); | |
176 LOGV((int) map_parse_tree[pt].expression); | |
177 if (map_parse_tree[pt].token_number) { | |
178 continue; | |
179 } | |
180 assert(pt < ntc_size); | |
181 ntc[pt]++; | |
182 } | |
183 LOGS("finished token loop"); | |
184 | |
185 /* | |
186 * Now check those trees that have more than one named token. Create | |
187 * a specific token for the tree. | |
188 */ | |
189 | |
190 for (tree = ParseTree(); tree.next();) { | |
191 int tn; | |
192 | |
193 if (ntc[tree] <= 1) { | |
194 ntc[tree] = 0; | |
195 continue; | |
196 } | |
197 tn = tree->token_number; | |
198 if (tn == 0) { | |
199 if (tree->expression->nameNode()) { | |
200 int name = ((NamedCharSet *)tree->expression)->name; | |
201 //LOGV(netAllocations()); | |
202 tn = id_token(name); | |
203 Token(tn)->value_type = default_input_type; | |
204 } | |
205 else { | |
206 tn = form_element_1(tree->expression); | |
207 } | |
208 } | |
209 ntc[tree] = tn; | |
210 } | |
211 LOGS("trees scanned again"); | |
212 | |
213 /* | |
214 * Now check the named terminals again and either make up the | |
215 * tree-token links or build shell productions | |
216 */ | |
217 | |
218 for (i = 0; i++ < ntk;) { | |
219 token_number_map *tp = &map_token_number[i]; | |
220 int name = tp->token_name; | |
221 int pt; | |
222 int tn; | |
223 parse_tree_map *ptp; | |
224 | |
225 if (tp->non_terminal_flag) { | |
226 continue; | |
227 } | |
228 if (name == 0) { | |
229 continue; | |
230 } | |
231 pt = map_token_name[name].parse_tree; | |
232 if (pt == 0) { | |
233 continue; | |
234 } | |
235 ptp = &map_parse_tree[pt]; | |
236 tn = ptp->token_number; | |
237 if (tn == 0) { | |
238 tn = ntc[pt]; | |
239 } | |
240 if (tn == 0) { | |
241 ptp->token_number = i; | |
242 continue; | |
243 } | |
244 shell_production(i, tn); | |
245 } | |
246 DEALLOCATE(ntc); | |
247 LOGS("tokens scanned again"); | |
248 nforms_base = nforms; | |
249 | |
250 LOGV(ntkns) LCV(nforms); | |
251 LOGV(error_token); | |
252 for (i = 0; i++ < ntkns;) { | |
253 token_number_map *tp = &map_token_number[i]; | |
254 int nn; | |
255 if (tp->non_terminal_flag || | |
256 (int) i == error_token || | |
257 tp->key || | |
258 tp->token_set_id) { | |
259 continue; | |
260 } | |
261 nn = tp->parse_tree; | |
262 LOGV(i) LCV(nn); | |
263 if (nn == 0 && (k = tp->token_name) != 0) { | |
264 nn = map_token_name[k].parse_tree; | |
265 } | |
266 LOGV(nn); | |
267 if (nn == 0) { | |
268 char_set_error = 5; | |
269 } | |
270 else { | |
271 tp->token_set_id = build_char_set(nn); | |
272 } | |
273 if (char_set_error == 0) { | |
274 continue; | |
275 } | |
276 | |
277 id = tp->token_name; | |
278 if (id) { | |
279 sprintf(ts," = %s", Symbol(id)->string.pointer()); | |
280 } | |
281 else { | |
282 ts[0] = 0; | |
283 } | |
284 switch (char_set_error) { | |
285 case 1: | |
286 ssprintf("Error defining T%03d: ",i); | |
287 break; | |
288 case 5: | |
289 ssprintf("Undefined token T%03d: ",i); | |
290 break; | |
291 } | |
292 atkn(i); | |
293 log_error(); | |
294 } | |
295 build_partition_sets(char_set_dict); | |
296 LOGV(ntkns); | |
297 LOGV(nforms); | |
298 } | |
299 | |
300 static void build_partition_sets(list_dict *d) { | |
301 LOGSECTION("build_partition_sets"); | |
302 int i; | |
303 unsigned j, n; | |
304 int *db; | |
305 unsigned *ndx; | |
306 int *lb; | |
307 int m; | |
308 //int *ps; | |
309 int ts; | |
310 unsigned ntk; | |
311 //char *fa; | |
312 unsigned *fl; | |
313 int nf; | |
314 unsigned cn; | |
315 int *list; | |
316 unsigned pn,nc; | |
317 unsigned tn; | |
318 | |
319 if (max_char_number == -1 && min_char_number == 0) { | |
320 return; | |
321 } | |
322 //ps = local_array(n_chars, int); | |
323 LocalArray<int> ps(n_chars); | |
324 n = d->nsx; | |
325 db = d->text; | |
326 ndx = d->ndx; | |
327 //set = local_array(n, int *); | |
328 LocalArray<int *> set(n); | |
329 //k = local_array(n, int); | |
330 LocalArray<int> k(n); | |
331 //l = local_array(n, int); | |
332 LocalArray<int> l(n); | |
333 for (i = 1; (unsigned) i < n; i++) { | |
334 set[i] = (db + ndx[i]); | |
335 l[i] = *(set[i]++) - 1; | |
336 k[i] = 0; | |
337 } | |
338 | |
339 /* | |
340 * For each character in the character range, scan all the character | |
341 * sets. Make a list of the sets to which this particular character | |
342 * belongs. This list represents a unique partition of the character | |
343 * range. Use a dictionary to give each unique partition an | |
344 * identifying number. Map each character to the partition to which | |
345 * it belongs. | |
346 */ | |
347 | |
348 LOGS("Scanning characters"); | |
349 for (i = min_char_number; i <= max_char_number; i++) { | |
350 int ix = i - min_char_number; | |
351 iws(); | |
352 for (j = 1; j < n; j++) { | |
353 while (k[j] < l[j] && set[j][k[j]] < i) { | |
354 k[j]++; | |
355 } | |
356 if (k[j] == l[j] || set[j][k[j]] > i) { | |
357 continue; | |
358 } | |
359 k[j]++; | |
360 aws(j); | |
361 } | |
362 lb = list_base; | |
363 m = rws(); | |
364 map_char_number[ix].part_number = ps[ix] = add_list_dict(lb,m,part_dict); | |
365 LOGV(ix) LCV(ps[ix]); | |
366 } | |
367 | |
368 /* | |
369 * Scan each partition. For each partition, make a list of all the | |
370 * characters that belong to this partition. | |
371 */ | |
372 | |
373 LOGS("Scanning partitions"); | |
374 m = part_dict->nsx; | |
375 check_size(map_part_number, m, m); | |
376 for (i = 0; (unsigned) i < part_dict->nsx; i++) { | |
377 iws(); | |
378 for (j = 0; j < (unsigned) n_chars; j++) { | |
379 if ((unsigned) i == map_char_number[j].part_number) { | |
380 aws(j + min_char_number); | |
381 } | |
382 } | |
383 map_part_number[i].chars_index = store_list(chars_list); | |
384 fl = lstptr(map_part_number[i],chars); | |
385 m = map_part_number[i].size = rws(); | |
386 #ifdef INCLUDE_LOGGING | |
387 if ((unsigned)(*fl - min_char_number) > | |
388 (max_char_number - min_char_number)) { | |
389 LOGV(*fl) LCV(min_char_number) LCV(max_char_number); | |
390 } | |
391 #endif | |
392 assert((unsigned)(*fl - min_char_number) <= | |
393 (unsigned) (max_char_number - min_char_number)); | |
394 if (m == 1 | |
395 && (j = map_char_number[*fl - min_char_number].token_number) != 0) { | |
396 map_part_number[i].token_number = j; | |
397 } | |
398 LOGV(*fl) LCV(min_char_number) LCV(i) LCV(j); | |
399 } | |
400 | |
401 m = part_dict->nsx; | |
402 //fa = local_array(m, char); | |
403 LocalArray<char> fa(m); | |
404 m = char_set_dict->nsx; | |
405 check_size(map_char_set,m,m); | |
406 for (i = 1; i<m; i++) { | |
407 char_set_map *csp = &map_char_set[i]; | |
408 partition_set(i, char_set_dict, ps, fa); | |
409 csp->part_index = store_list(part_list); | |
410 csp->nparts = rws(); | |
411 } | |
412 | |
413 | |
414 ntk = ntkns+1; | |
415 //assert(ok_index(map_token_number, ntkns)); | |
416 m = part_dict->nsx; | |
417 | |
418 LOGS("Scanning tokens"); | |
419 for (i = 1; (unsigned) i < ntk; i++) { | |
420 part_number_map *ppn; | |
421 unsigned tn; | |
422 //int ctn = i; | |
423 Token ctn(i); | |
424 unsigned fn; | |
425 ts = map_token_number[i].token_set_id; | |
426 if (ts == 0) { | |
427 continue; | |
428 } | |
429 nf = map_char_set[ts].nparts; | |
430 if (nf != 1) { | |
431 continue; | |
432 } | |
433 fl = lstptr(map_char_set[ts], part); | |
434 ppn = &map_part_number[pn = *fl]; | |
435 assert(ok_index(map_part_number, pn)); | |
436 list = (int *) lstptr(*ppn, chars); | |
437 //assert (pn > 0 && pn < m); | |
438 assert(pn < (unsigned) m); | |
439 nc = ppn->size; | |
440 tn = ppn->token_number; | |
441 if (tn == 0) { | |
442 while (nc--) { | |
443 cn = *list++ - min_char_number; | |
444 assert(cn < n_chars); | |
445 map_char_number[cn].token_number = i; | |
446 LOGV(cn) LCV(i); | |
447 } | |
448 assert(pn < part_dict->nsx); | |
449 ppn->token_number = i; | |
450 map_token_number[i].part_number = pn; | |
451 continue; | |
452 } | |
453 if (tn < ntk) { | |
454 //ntkns++; | |
455 //check_size(map_token_number, ntkns, ntkns); | |
456 //ctn = ntkns; | |
457 ctn = Token::create(); | |
458 ctn->value_type = default_input_type; | |
459 //pf1x(ctn); | |
460 //fn = form2(); | |
461 fn = makeRule(ctn); | |
462 at(bnf_table, tn, fn); | |
463 Rule(fn)->prim_tkn = tn; | |
464 ppn->token_number = ctn; | |
465 Token(tn)->operatorCandidate = ctn->operatorCandidate = 1; | |
466 while (nc--) { | |
467 cn = *list++ - min_char_number; | |
468 assert(cn < n_chars); | |
469 map_char_number[cn].token_number = ctn; | |
470 LOGV(cn) LCV(ctn); | |
471 } | |
472 map_token_number[ctn].part_number = pn; | |
473 map_token_number[ctn].value_type = default_input_type; | |
474 map_token_number[tn].value_type = default_input_type; | |
475 map_token_number[tn].part_number = 0; | |
476 map_token_number[tn].non_terminal_flag = 1; | |
477 tn = ctn; | |
478 } | |
479 //pf1x(tn); | |
480 //fn = form2(); | |
481 fn = makeRule(tn); | |
482 LOGV(i) LCV(fn) LCV(tn); | |
483 at(bnf_table, i, fn); | |
484 Rule(fn)->prim_tkn = i; | |
485 map_token_number[i].non_terminal_flag = 1; | |
486 ctn = tn; | |
487 } | |
488 for (i = 1; (unsigned) i < ntk; i++) { | |
489 token_number_map *mti = &map_token_number[i]; | |
490 ts = mti->token_set_id; | |
491 if (ts == 0) { | |
492 continue; | |
493 } | |
494 nf = map_char_set[ts].nparts; | |
495 if (nf <= 1) { | |
496 continue; | |
497 } | |
498 fl = lstptr(map_char_set[ts], part); | |
499 iws(); | |
500 j = 0; | |
501 while (j < (unsigned) nf) { | |
502 | |
503 pn = fl[j]; | |
504 assert (pn > 0 && pn < (unsigned) m); | |
505 tn = map_part_number[pn].token_number; | |
506 if (tn == 0) { | |
507 //ntkns++; | |
508 //check_size(map_token_number,ntkns,ntkns); | |
509 list = (int *) lstptr(map_part_number[pn], chars); | |
510 nc = map_part_number[pn].size; | |
511 tn = Token::create(); | |
512 while (nc--) { | |
513 cn = *list++ - min_char_number; | |
514 assert(cn < n_chars); | |
515 map_char_number[cn].token_number = tn; | |
516 LOGV(cn) LCV(tn); | |
517 } | |
518 //tn = ntkns; | |
519 assert(pn < part_dict->nsx); | |
520 map_part_number[pn].token_number = tn; | |
521 map_token_number[tn].part_number = pn; | |
522 map_token_number[tn].value_type = default_input_type; | |
523 map_token_number[tn].operatorCandidate = 1; | |
524 } | |
525 //pf1x(tn); | |
526 //aws(form2()); | |
527 aws(makeRule(tn)); | |
528 j++; | |
529 } | |
530 fl = (unsigned *) list_base; | |
531 nf = rws(); | |
532 map_token_number[i].non_terminal_flag = 1; | |
533 for (j = 0; j < (unsigned) nf; j++) { | |
534 LOGV(i) LCV(fl[j]); | |
535 at(bnf_table, i, fl[j]); | |
536 //assert(ok_index(map_form_number, fl[j])); | |
537 Rule rule(fl[j]); | |
538 rule->prim_tkn = i; | |
539 } | |
540 } | |
541 } | |
542 | |
543 static void partition_set(int ts, list_dict *d, int *ps, char *fa) { | |
544 int *list; | |
545 int n, np, i; | |
546 | |
547 np = part_dict->nsx; | |
548 memset(fa, 0, np); | |
549 list = d->text + d->ndx[ts]; | |
550 n = *(list++) - 1; | |
551 for (i = 0; i<n; i++) { | |
552 fa[ps[list[i] - min_char_number]] = 1; | |
553 } | |
554 iws(); | |
555 for (i = 0; i < np; i++) { | |
556 if (fa[i]) { | |
557 aws(i); | |
558 } | |
559 } | |
560 } | |
561 | |
562 int build_char_set(int nn) { | |
563 LOGSECTION("build_char_set"); | |
564 //char *v; | |
565 //int i, k; | |
566 int cs = map_parse_tree[nn].char_set; | |
567 LOGV(cs) LCV(nn); | |
568 | |
569 if (cs) { | |
570 return cs; | |
571 } | |
572 char_set_error = 0; | |
573 LOGV(n_chars); | |
574 | |
575 //int *list = local_array(n_chars, int); | |
576 LocalArray<int> list(n_chars); | |
577 CharBitmap bitmap = map_parse_tree[nn].expression->bitmap(); | |
578 if (!case_sensitive) { | |
579 bitmap.blurCase(); | |
580 } | |
581 int count = bitmap.list(list); | |
582 LOGV(count); | |
583 cs = add_list_dict(list, count, char_set_dict); | |
584 LOGV(cs); | |
585 check_size(map_char_set, cs, cs); | |
586 map_char_set[cs].parse_tree = nn; | |
587 map_parse_tree[nn].char_set = cs; | |
588 return cs; | |
589 } | |
590 | |
591 static void log_undef_sym(int tn) { | |
592 ssprintf("Undefined symbol: %s", Symbol(tn)->string.pointer()); | |
593 log_error(); | |
594 } | |
595 | |
596 int getParseTree(int name) { | |
597 LOGSECTION("getParseTree"); | |
598 int pt = map_token_name[name].parse_tree; | |
599 LOGV(pt); | |
600 if (pt) { | |
601 return pt; | |
602 } | |
603 | |
604 int tn = map_token_name[name].token_number; | |
605 if (tn) { | |
606 pt = map_token_number[tn].parse_tree; | |
607 } | |
608 LOGV(tn) LCV(pt); | |
609 if (pt) { | |
610 return pt; | |
611 } | |
612 | |
613 log_undef_sym(name); | |
614 return 0; | |
615 } |