Mercurial > ~dholland > hg > ag > index.cgi
comparison anagram/agcore/nckwtr.cpp @ 0:13d2b8934445
Import AnaGram (near-)release tree into Mercurial.
author | David A. Holland |
---|---|
date | Sat, 22 Dec 2007 17:52:45 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:13d2b8934445 |
---|---|
1 /* | |
2 * AnaGram, A System for Syntax Directed Programming | |
3 * Copyright 1993-2002 Parsifal Software. All Rights Reserved. | |
4 * See the file COPYING for license and usage terms. | |
5 * | |
6 * nckwtr.cpp - Conflict/Keyword Anomaly Trace Utility | |
7 */ | |
8 | |
9 | |
10 #include "agbaltree.h" | |
11 #include "arrays.h" | |
12 #include "assert.h" | |
13 #include "cd.h" | |
14 #include "data.h" | |
15 #include "dict.h" | |
16 #include "lexeme.h" | |
17 #include "myalloc.h" | |
18 #include "nckwtr.h" | |
19 #include "q1glbl.h" | |
20 #include "rule.h" | |
21 #include "stacks.h" | |
22 #include "token.h" | |
23 #include "tsd.h" | |
24 | |
25 //#define INCLUDE_LOGGING | |
26 #include "log.h" | |
27 | |
28 static int conflict_rule; | |
29 static int conflict_state; | |
30 | |
31 | |
32 /* | |
33 * Strategy | |
34 * | |
35 * The data given are two state numbers: the conflict state and the | |
36 * reduction state. In addition, the conflict rule is given. | |
37 * | |
38 * The approach is to work backward from the reduction state to a | |
39 * state that leads to the conflict state. | |
40 * | |
41 * If the characteristic token for the reduction state includes the | |
42 * conflict rule as an expansion rule, then we don't have to worry | |
43 * about null productions. Otherwise, there has been a shift of some | |
44 * zero-length production to get us to the reduction state. | |
45 * | |
46 * The first routine to build is a function which will call itself | |
47 * recursively to find the fork state. It will fail if it cannot find | |
48 * a fork state from its present state. | |
49 */ | |
50 | |
51 static int keyword_switch = 0; | |
52 | |
53 static AgBalancedTree<Triple<int> > triples; | |
54 | |
55 AgArray<int> new_reduction_stack; | |
56 int new_reduction_stack_depth; | |
57 AgArray<int> new_conflict_stack; | |
58 int new_conflict_stack_depth; | |
59 tsd *new_rule_derivation; | |
60 | |
61 | |
62 static int simple_path(int from, int to) { | |
63 unsigned *p = lstptr(map_state_number[to],previous_states); | |
64 int nt = map_state_number[to].n_previous_states; | |
65 int i; | |
66 | |
67 if (from == to) return 1; | |
68 for (i = 0; i < nt; i++) { | |
69 if (xws(p[i])) { | |
70 continue; | |
71 } | |
72 if (simple_path(from, p[i])) { | |
73 return 1; | |
74 } | |
75 fws(); | |
76 } | |
77 return 0; | |
78 } | |
79 | |
80 void clear_conflict_expansion(void) { | |
81 if (conflict_token == 0) { | |
82 return; | |
83 } | |
84 new_rule_derivation = delete_tsd(new_rule_derivation); | |
85 token_derivation = delete_tsd(token_derivation); | |
86 new_reduction_stack = AgArray<int>(); | |
87 new_conflict_stack = AgArray<int>(); | |
88 conflict_token = 0; | |
89 } | |
90 | |
91 static int new_analysis(int sn, int rn); | |
92 | |
93 void analyze_conflict(int csn, int tn, int cfn, int kws) { | |
94 int flag; | |
95 | |
96 LOGSECTION("analyze_conflict"); | |
97 kws = kws!=0; | |
98 if (conflict_state == csn | |
99 && conflict_token == tn | |
100 && keyword_switch == kws | |
101 && conflict_rule == cfn) { | |
102 return; | |
103 } | |
104 clear_conflict_expansion(); | |
105 conflict_state = csn; | |
106 conflict_token = tn; | |
107 conflict_rule = cfn; | |
108 keyword_switch = kws; | |
109 | |
110 flag = new_analysis(conflict_state, conflict_rule); | |
111 LOGV(flag); | |
112 assert(flag); | |
113 triples.reset(); | |
114 } | |
115 | |
116 static int equiv_token(Token charToken, Token ruleToken) { | |
117 if (charToken == ruleToken) { | |
118 return 1; | |
119 } | |
120 AgArray<Rule> expansion = charToken->expansionRuleList; | |
121 int n = expansion.size(); | |
122 while (n--) { | |
123 if (expansion[n]->length() == 0 ) { | |
124 continue; | |
125 } | |
126 if (expansion[n]->length() > 1 | |
127 && (unsigned) expansion[n].token(1) != disregard_token) { | |
128 continue; | |
129 } | |
130 if (expansion[n].token(0) == ruleToken) { | |
131 return 1; | |
132 } | |
133 } | |
134 return 0; | |
135 } | |
136 | |
137 static void validate_conflict_stack(void) { | |
138 int *ctp = list_base; | |
139 int cfsd = tis(); | |
140 int sx = 0; | |
141 int nsn = ctp[sx++]; | |
142 int kr = 0; | |
143 int kr_lim = new_rule_derivation->nt; | |
144 int rn, rx; | |
145 | |
146 LOGSECTION("validate_conflict_stack"); | |
147 LOGV(sx) LCV(cfsd); | |
148 if (sx >= cfsd) return; | |
149 #ifdef INCLUDE_LOGGING | |
150 { | |
151 int i; | |
152 for (i = 0; i < cfsd; i++) { | |
153 LOGV(i) LCV(ctp[i]); | |
154 } | |
155 int *sb = new_rule_derivation->sb; | |
156 for (i = 0; i < kr_lim; i += 2) { | |
157 LOGV(sb[i]) LCV(sb[i+1]); | |
158 } | |
159 } | |
160 #endif | |
161 check_tsd(new_rule_derivation); | |
162 xtxf(new_rule_derivation,kr, &rn, &rx); | |
163 LOGV(rn) LCV(rx); | |
164 while (rx == 0) { | |
165 xtxf(new_rule_derivation, ++kr, &rn, &rx); | |
166 rx--; | |
167 LOGV(rn) LCV(rx); | |
168 } | |
169 if (sx < cfsd) { | |
170 while (1) { | |
171 Token charToken = map_state_number[nsn].char_token; | |
172 int sn = ctp[sx++]; | |
173 LOGV(kr) LCV(kr_lim); | |
174 assert(kr < kr_lim); | |
175 //assert(equiv_token(tn, lstptr(map_form_number[rn], tokens)[--rx])); | |
176 Token ruleToken = Rule(rn).token(--rx); | |
177 LOGV(rn) LCV(Rule(rn)->length()) LCV(rx) LCV(ruleToken); | |
178 //assert(equiv_token(charToken, Rule(rn).token(--rx))); | |
179 assert(equiv_token(charToken, ruleToken)); | |
180 LOGV(nsn) LCV(sx) LCV(sn) LCV(charToken); | |
181 //int nextState = new_next_state(sn, ruleToken); | |
182 //LOGV(nextState); | |
183 //assert(nsn == nextState); | |
184 assert(equiv_token(charToken, transitionToken(sn,nsn))); | |
185 LOGV(sx) LCV(cfsd); | |
186 if (sx >= cfsd) { | |
187 break; | |
188 } | |
189 nsn = sn; | |
190 while (rx == 0) { | |
191 xtxf(new_rule_derivation, ++kr, &rn, &rx); | |
192 rx--; | |
193 } | |
194 } | |
195 } | |
196 LOGS("Exiting validate_conflict_stack"); | |
197 } | |
198 | |
199 /* | |
200 static int check_leading_token(unsigned hlt, unsigned llt) { | |
201 unsigned *ltp = lstptr(map_token_number[hlt],leading_tokens); | |
202 int nlt = map_token_number[hlt].n_leading_tokens; | |
203 if (hlt == llt) { | |
204 return 1; | |
205 } | |
206 while (nlt--) { | |
207 if (ltp[nlt] == llt) { | |
208 return 1; | |
209 } | |
210 } | |
211 return 0; | |
212 } | |
213 */ | |
214 | |
215 static int reduction_state_path(int sn) { | |
216 LOGSECTION("reduction_state_path"); | |
217 LOGV(sn); | |
218 int *is = dict_str(isht_dict, sn); | |
219 int nis = (*is++ - 1)/2; | |
220 while (nis --) { | |
221 int stateNumber = sn; | |
222 Rule rn = *is++; | |
223 unsigned rx = *is++; | |
224 LOGV(stateNumber) LCV(rn) LCV(rx); | |
225 iws(); | |
226 while (rx < rn->length()) { | |
227 Token tn = rn.token(rx); | |
228 //int flag = check_leading_token(tn,conflict_token); | |
229 int flag = tn.isLeadingToken(conflict_token); | |
230 aws(stateNumber); | |
231 LOGV(stateNumber) LCV(tis()); | |
232 if (flag) { | |
233 at(new_rule_derivation, (int) rn, rx); | |
234 return 1; | |
235 } | |
236 //if (!map_token_number[tn].zero_length_flag) { | |
237 // break; | |
238 //} | |
239 if (!tn->zero_length_flag) { | |
240 break; | |
241 } | |
242 LOGV(rn) LCV(rx) LCV(sn) LCV(tn); | |
243 stateNumber = new_next_state(stateNumber,tn); | |
244 if (stateNumber == 0) { | |
245 break; | |
246 } | |
247 rx++; | |
248 } | |
249 rws(); | |
250 } | |
251 return 0; | |
252 } | |
253 | |
254 static int ct_accessible(int sn) { | |
255 LOGSECTION("ct_accessible"); | |
256 LOGV(sn); | |
257 int *is = dict_str(isht_dict, sn); | |
258 int nis = (*is++ - 1)/2; | |
259 while (nis--) { | |
260 Rule rn = *is++; | |
261 unsigned rx = *is++; | |
262 while (rx < rn->length()) { | |
263 //int tn = lstptr(map_form_number[rn], tokens)[rx]; | |
264 Token tn = rn.token(rx); | |
265 //int flag = check_leading_token(tn,conflict_token); | |
266 int flag = tn.isLeadingToken(conflict_token); | |
267 if (flag) { | |
268 return 1; | |
269 } | |
270 //if (!map_token_number[tn].zero_length_flag) { | |
271 // break; | |
272 //} | |
273 if (!tn->zero_length_flag) { | |
274 break; | |
275 } | |
276 LOGV(rn) LCV(rx) LCV(sn) LCV(tn); | |
277 sn = new_next_state(sn,tn); | |
278 if (sn == 0) { | |
279 break; | |
280 } | |
281 rx++; | |
282 } | |
283 } | |
284 return 0; | |
285 } | |
286 | |
287 static int anomalous_keyword_path(int sn) { | |
288 int *is = dict_str(isht_dict, sn); | |
289 int nis = (*is++ - 1)/2; | |
290 while (nis--) { | |
291 Rule rn = *is++; | |
292 unsigned rx = *is++; | |
293 if (rx >= rn->non_vanishing_length) { | |
294 return 0; | |
295 } | |
296 } | |
297 is = dict_str(isht_dict, sn); | |
298 nis = (*is++ - 1)/2; | |
299 while (nis--) { | |
300 Rule rn = *is++; | |
301 unsigned rx = *is++; | |
302 if (rx < rn->length()) { | |
303 sws(sn); | |
304 at(new_rule_derivation, (int) rn, rx); | |
305 return 1; | |
306 } | |
307 } | |
308 return 0; | |
309 } | |
310 | |
311 static int hw_reduce(int sn, int rn, int rx); | |
312 | |
313 | |
314 static int nulls_remaining(int sn, int nsn) { | |
315 LOGSECTION("nulls_remaining"); | |
316 LOGV(sn) LCV(nsn); | |
317 int *is = dict_str(isht_dict, nsn); | |
318 int nis = (*is++ - 1)/2; | |
319 while (nis --) { | |
320 Rule rn = *is++; | |
321 unsigned rx = *is++; | |
322 if (rx < rn->non_vanishing_length) { | |
323 continue; | |
324 } | |
325 at(new_rule_derivation, (int) rn, rx); | |
326 if (hw_reduce(sn, rn, rx - 1)) { | |
327 return 1; | |
328 } | |
329 new_rule_derivation->nt--; | |
330 } | |
331 return 0; | |
332 } | |
333 | |
334 static int hw_reduce(int sn, int rn, int rx) { | |
335 LOGSECTION("hw_reduce"); | |
336 //if (triples.insert(IntegerTriple(sn, rn, rx))) { | |
337 // return 0; | |
338 //} | |
339 if (triples.insert(Triple<int>(sn, rn, rx))) { | |
340 return 0; | |
341 } | |
342 LOGV(sn) LCV(rn) LCV(rx); | |
343 if (rx) { | |
344 int i, j; | |
345 unsigned *p = lstptr(map_state_number[sn],previous_states); | |
346 int nt = map_state_number[sn].n_previous_states; | |
347 //int *sorted = local_array(nt, int); | |
348 LocalArray<int> sorted(nt); | |
349 for (i = 0; i < nt; i++) { | |
350 sorted[i] = p[i]; | |
351 LOGV(p[i]); | |
352 } | |
353 for (i = 0; i < nt; i++) { | |
354 //aws(p[i]); | |
355 //if (xws(sorted[i])) continue; | |
356 for (j = i+1; j < nt; j++) { | |
357 if (sorted[i] > sorted[j]) { | |
358 int temp = sorted[i]; | |
359 sorted[i] = sorted[j]; | |
360 sorted[j] = temp; | |
361 } | |
362 } | |
363 LOGV(sorted[i]); | |
364 //if (xws(sorted[i])) continue; | |
365 aws(sorted[i]); | |
366 | |
367 #ifdef INCLUDE_LOGGING | |
368 { | |
369 for (int index = 0; index < tis(); index++) { | |
370 LOGV(index) LCV(list_base[index]); | |
371 } | |
372 } | |
373 LOGV(sorted[i]) LCV(tis()); | |
374 #endif | |
375 | |
376 if (hw_reduce(sorted[i], rn, rx-1)) { | |
377 return 1; | |
378 } | |
379 fws(); | |
380 } | |
381 return 0; | |
382 } | |
383 | |
384 | |
385 { | |
386 int *sp = ibnfs + ibnfb[rn]; | |
387 int m = ibnfn[rn]; | |
388 int i; | |
389 | |
390 for (i = 0; i < m; i++) { | |
391 unsigned *gtp = lstptr(map_state_number[sn],gotos); | |
392 int ngt = map_state_number[sn].n_gotos; | |
393 unsigned tn = sp[i]; | |
394 int nsn, crn, crx; | |
395 int j, k; | |
396 | |
397 for (j = k = 0; j < ngt; j++, k+=2) { | |
398 if (gtp[k] == tn) break; | |
399 } | |
400 | |
401 if (j < ngt) { | |
402 nsn = gtp[k+1]; | |
403 if (keyword_switch) { | |
404 if (ct_accessible(nsn)) { | |
405 continue; | |
406 } | |
407 if (nulls_remaining(sn, nsn)) { | |
408 return 1; | |
409 } | |
410 if (anomalous_keyword_path(nsn)) { | |
411 return 1; | |
412 } | |
413 } | |
414 else { | |
415 if (reduction_state_path(nsn)) { | |
416 return 1; | |
417 } | |
418 if (nulls_remaining(sn, nsn)) { | |
419 return 1; | |
420 } | |
421 } | |
422 continue; | |
423 } | |
424 gtp = lstptr(map_state_number[sn], completions); | |
425 ngt = map_state_number[sn].n_completions; | |
426 for (j = k = 0; j < ngt; j++, k += 2) { | |
427 if (gtp[k] == tn) { | |
428 break; | |
429 } | |
430 } | |
431 if (j == ngt) { | |
432 continue; | |
433 } | |
434 crn = gtp[k+1]; | |
435 crx = Rule(crn)->length(); | |
436 int tx = new_rule_derivation->nt; | |
437 while (tx--) { | |
438 int trn, trx; | |
439 xtx(new_rule_derivation, tx, &trn, &trx); | |
440 if (trn == crn && trx == crx) { | |
441 break; | |
442 } | |
443 } | |
444 if (tx >= 0) { | |
445 continue; | |
446 } | |
447 at(new_rule_derivation, crn, crx); | |
448 if (hw_reduce(sn, crn, crx - 1)) { | |
449 return 1; | |
450 } | |
451 new_rule_derivation->nt--; | |
452 } | |
453 } | |
454 return 0; | |
455 } | |
456 | |
457 /* | |
458 static void trim_ct(void) { | |
459 int nt = new_rule_derivation->nt; | |
460 int k = nt - 1; | |
461 int i, j; | |
462 LOGSECTION("trim_ct"); | |
463 AgArray<int> list(buildStaticList()); | |
464 LOGV(list.size()); | |
465 int *cs = list.pointer(); | |
466 int ncs = list.size(); | |
467 // cs is list of states in reverse order | |
468 | |
469 int rn,rx, tn; | |
470 unsigned *rl; | |
471 int nrl; | |
472 | |
473 check_tsd(new_rule_derivation); | |
474 xtxf(new_rule_derivation, k, &rn, &rx); | |
475 tn = lstptr(map_form_number[rn],tokens)[rx-1]; | |
476 rl = lstptr(map_token_number[tn],forms); | |
477 nrl = map_token_number[tn].n_forms; | |
478 | |
479 iws(); | |
480 while (--k >= 0) { | |
481 | |
482 for (i = 0; i < k; i++) { | |
483 unsigned srn, srx; | |
484 int psx; | |
485 | |
486 xtxf(new_rule_derivation, i, &srn, &srx); | |
487 for (j = 0; j < nrl; j++) if (srn == rl[j]) break; | |
488 if (j == nrl) continue; | |
489 psx = ncs; | |
490 for (j = i+1; j < k + 1; j++) { | |
491 int jrn, jrx; | |
492 xtxf(new_rule_derivation, j, &jrn, &jrx); | |
493 psx -= jrx-1; | |
494 } | |
495 if (psx <= 1) continue; | |
496 if (cs[ncs-1] != cs[psx-1]) continue; | |
497 ncs = psx; | |
498 break; | |
499 } | |
500 if (i < k) { | |
501 memmove(&new_rule_derivation->sb[2*(i+1)], | |
502 &new_rule_derivation->sb[2*(k+1)], | |
503 (nt - k - 1)*2*sizeof(int)); | |
504 nt -= k-i; | |
505 new_rule_derivation->nt = nt; | |
506 k = i; | |
507 } | |
508 if (k <= 0) break; | |
509 xtxf(new_rule_derivation, k, &rn, &rx); | |
510 tn = lstptr(map_form_number[rn],tokens)[--rx]; | |
511 rl = lstptr(map_token_number[tn],forms); | |
512 nrl = map_token_number[tn].n_forms; | |
513 while (rx--) aws(cs[--ncs]); | |
514 } | |
515 while (ncs--) aws(cs[ncs]); | |
516 list = buildStaticList(); | |
517 cs = list.pointer(); | |
518 ncs = list.size(); | |
519 iws(); | |
520 while (ncs--) aws(cs[ncs]); | |
521 } | |
522 */ | |
523 | |
524 static int new_analysis(int sn, int rn) { | |
525 int length = Rule(rn)->length(); | |
526 | |
527 LOGSECTION("new_analysis"); | |
528 LOGV(sn) LCV(rn); | |
529 new_rule_derivation = init_tsd(2); | |
530 at(new_rule_derivation, rn, length); | |
531 sws(sn); | |
532 if (hw_reduce(sn,rn,length)) { | |
533 int fs; | |
534 | |
535 new_reduction_stack = buildStaticList(); | |
536 new_reduction_stack_depth = new_reduction_stack.size(); | |
537 fs = fws(); | |
538 //if (tis()) trim_ct(); | |
539 validate_conflict_stack(); | |
540 sws(fs); | |
541 simple_path(0,fs); | |
542 concat_list(); | |
543 new_conflict_stack = buildStaticList(); | |
544 new_conflict_stack_depth = new_conflict_stack.size(); | |
545 iws(); | |
546 while(new_reduction_stack_depth--) { | |
547 aws(new_reduction_stack[new_reduction_stack_depth]); | |
548 } | |
549 sws(fs); | |
550 simple_path(0, fs); | |
551 concat_list(); | |
552 new_reduction_stack = buildStaticList(); | |
553 new_reduction_stack_depth = new_reduction_stack.size(); | |
554 | |
555 if (!keyword_switch) { | |
556 int fn, fx, k; | |
557 k = new_rule_derivation->nt - 1; | |
558 xtxf(new_rule_derivation, k, &fn, &fx); | |
559 token_derivation = init_tsd(2); | |
560 tried = ZALLOCATE(ntkns+1, char); | |
561 //memset(tried,0,ntkns+1); | |
562 //x9x(lstptr(map_form_number[fn], tokens)[fx]); | |
563 x9x(Rule(fn).token(fx)); | |
564 at(token_derivation, fn, fx); | |
565 DEALLOCATE(tried); | |
566 } | |
567 return 1; | |
568 } | |
569 rws(); | |
570 return 0; | |
571 } | |
572 |