comparison anagram/agcore/nckwtr.cpp @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.
author David A. Holland
date Sat, 22 Dec 2007 17:52:45 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:13d2b8934445
1 /*
2 * AnaGram, A System for Syntax Directed Programming
3 * Copyright 1993-2002 Parsifal Software. All Rights Reserved.
4 * See the file COPYING for license and usage terms.
5 *
6 * nckwtr.cpp - Conflict/Keyword Anomaly Trace Utility
7 */
8
9
10 #include "agbaltree.h"
11 #include "arrays.h"
12 #include "assert.h"
13 #include "cd.h"
14 #include "data.h"
15 #include "dict.h"
16 #include "lexeme.h"
17 #include "myalloc.h"
18 #include "nckwtr.h"
19 #include "q1glbl.h"
20 #include "rule.h"
21 #include "stacks.h"
22 #include "token.h"
23 #include "tsd.h"
24
25 //#define INCLUDE_LOGGING
26 #include "log.h"
27
28 static int conflict_rule;
29 static int conflict_state;
30
31
32 /*
33 * Strategy
34 *
35 * The data given are two state numbers: the conflict state and the
36 * reduction state. In addition, the conflict rule is given.
37 *
38 * The approach is to work backward from the reduction state to a
39 * state that leads to the conflict state.
40 *
41 * If the characteristic token for the reduction state includes the
42 * conflict rule as an expansion rule, then we don't have to worry
43 * about null productions. Otherwise, there has been a shift of some
44 * zero-length production to get us to the reduction state.
45 *
46 * The first routine to build is a function which will call itself
47 * recursively to find the fork state. It will fail if it cannot find
48 * a fork state from its present state.
49 */
50
51 static int keyword_switch = 0;
52
53 static AgBalancedTree<Triple<int> > triples;
54
55 AgArray<int> new_reduction_stack;
56 int new_reduction_stack_depth;
57 AgArray<int> new_conflict_stack;
58 int new_conflict_stack_depth;
59 tsd *new_rule_derivation;
60
61
62 static int simple_path(int from, int to) {
63 unsigned *p = lstptr(map_state_number[to],previous_states);
64 int nt = map_state_number[to].n_previous_states;
65 int i;
66
67 if (from == to) return 1;
68 for (i = 0; i < nt; i++) {
69 if (xws(p[i])) {
70 continue;
71 }
72 if (simple_path(from, p[i])) {
73 return 1;
74 }
75 fws();
76 }
77 return 0;
78 }
79
80 void clear_conflict_expansion(void) {
81 if (conflict_token == 0) {
82 return;
83 }
84 new_rule_derivation = delete_tsd(new_rule_derivation);
85 token_derivation = delete_tsd(token_derivation);
86 new_reduction_stack = AgArray<int>();
87 new_conflict_stack = AgArray<int>();
88 conflict_token = 0;
89 }
90
91 static int new_analysis(int sn, int rn);
92
93 void analyze_conflict(int csn, int tn, int cfn, int kws) {
94 int flag;
95
96 LOGSECTION("analyze_conflict");
97 kws = kws!=0;
98 if (conflict_state == csn
99 && conflict_token == tn
100 && keyword_switch == kws
101 && conflict_rule == cfn) {
102 return;
103 }
104 clear_conflict_expansion();
105 conflict_state = csn;
106 conflict_token = tn;
107 conflict_rule = cfn;
108 keyword_switch = kws;
109
110 flag = new_analysis(conflict_state, conflict_rule);
111 LOGV(flag);
112 assert(flag);
113 triples.reset();
114 }
115
116 static int equiv_token(Token charToken, Token ruleToken) {
117 if (charToken == ruleToken) {
118 return 1;
119 }
120 AgArray<Rule> expansion = charToken->expansionRuleList;
121 int n = expansion.size();
122 while (n--) {
123 if (expansion[n]->length() == 0 ) {
124 continue;
125 }
126 if (expansion[n]->length() > 1
127 && (unsigned) expansion[n].token(1) != disregard_token) {
128 continue;
129 }
130 if (expansion[n].token(0) == ruleToken) {
131 return 1;
132 }
133 }
134 return 0;
135 }
136
137 static void validate_conflict_stack(void) {
138 int *ctp = list_base;
139 int cfsd = tis();
140 int sx = 0;
141 int nsn = ctp[sx++];
142 int kr = 0;
143 int kr_lim = new_rule_derivation->nt;
144 int rn, rx;
145
146 LOGSECTION("validate_conflict_stack");
147 LOGV(sx) LCV(cfsd);
148 if (sx >= cfsd) return;
149 #ifdef INCLUDE_LOGGING
150 {
151 int i;
152 for (i = 0; i < cfsd; i++) {
153 LOGV(i) LCV(ctp[i]);
154 }
155 int *sb = new_rule_derivation->sb;
156 for (i = 0; i < kr_lim; i += 2) {
157 LOGV(sb[i]) LCV(sb[i+1]);
158 }
159 }
160 #endif
161 check_tsd(new_rule_derivation);
162 xtxf(new_rule_derivation,kr, &rn, &rx);
163 LOGV(rn) LCV(rx);
164 while (rx == 0) {
165 xtxf(new_rule_derivation, ++kr, &rn, &rx);
166 rx--;
167 LOGV(rn) LCV(rx);
168 }
169 if (sx < cfsd) {
170 while (1) {
171 Token charToken = map_state_number[nsn].char_token;
172 int sn = ctp[sx++];
173 LOGV(kr) LCV(kr_lim);
174 assert(kr < kr_lim);
175 //assert(equiv_token(tn, lstptr(map_form_number[rn], tokens)[--rx]));
176 Token ruleToken = Rule(rn).token(--rx);
177 LOGV(rn) LCV(Rule(rn)->length()) LCV(rx) LCV(ruleToken);
178 //assert(equiv_token(charToken, Rule(rn).token(--rx)));
179 assert(equiv_token(charToken, ruleToken));
180 LOGV(nsn) LCV(sx) LCV(sn) LCV(charToken);
181 //int nextState = new_next_state(sn, ruleToken);
182 //LOGV(nextState);
183 //assert(nsn == nextState);
184 assert(equiv_token(charToken, transitionToken(sn,nsn)));
185 LOGV(sx) LCV(cfsd);
186 if (sx >= cfsd) {
187 break;
188 }
189 nsn = sn;
190 while (rx == 0) {
191 xtxf(new_rule_derivation, ++kr, &rn, &rx);
192 rx--;
193 }
194 }
195 }
196 LOGS("Exiting validate_conflict_stack");
197 }
198
199 /*
200 static int check_leading_token(unsigned hlt, unsigned llt) {
201 unsigned *ltp = lstptr(map_token_number[hlt],leading_tokens);
202 int nlt = map_token_number[hlt].n_leading_tokens;
203 if (hlt == llt) {
204 return 1;
205 }
206 while (nlt--) {
207 if (ltp[nlt] == llt) {
208 return 1;
209 }
210 }
211 return 0;
212 }
213 */
214
215 static int reduction_state_path(int sn) {
216 LOGSECTION("reduction_state_path");
217 LOGV(sn);
218 int *is = dict_str(isht_dict, sn);
219 int nis = (*is++ - 1)/2;
220 while (nis --) {
221 int stateNumber = sn;
222 Rule rn = *is++;
223 unsigned rx = *is++;
224 LOGV(stateNumber) LCV(rn) LCV(rx);
225 iws();
226 while (rx < rn->length()) {
227 Token tn = rn.token(rx);
228 //int flag = check_leading_token(tn,conflict_token);
229 int flag = tn.isLeadingToken(conflict_token);
230 aws(stateNumber);
231 LOGV(stateNumber) LCV(tis());
232 if (flag) {
233 at(new_rule_derivation, (int) rn, rx);
234 return 1;
235 }
236 //if (!map_token_number[tn].zero_length_flag) {
237 // break;
238 //}
239 if (!tn->zero_length_flag) {
240 break;
241 }
242 LOGV(rn) LCV(rx) LCV(sn) LCV(tn);
243 stateNumber = new_next_state(stateNumber,tn);
244 if (stateNumber == 0) {
245 break;
246 }
247 rx++;
248 }
249 rws();
250 }
251 return 0;
252 }
253
254 static int ct_accessible(int sn) {
255 LOGSECTION("ct_accessible");
256 LOGV(sn);
257 int *is = dict_str(isht_dict, sn);
258 int nis = (*is++ - 1)/2;
259 while (nis--) {
260 Rule rn = *is++;
261 unsigned rx = *is++;
262 while (rx < rn->length()) {
263 //int tn = lstptr(map_form_number[rn], tokens)[rx];
264 Token tn = rn.token(rx);
265 //int flag = check_leading_token(tn,conflict_token);
266 int flag = tn.isLeadingToken(conflict_token);
267 if (flag) {
268 return 1;
269 }
270 //if (!map_token_number[tn].zero_length_flag) {
271 // break;
272 //}
273 if (!tn->zero_length_flag) {
274 break;
275 }
276 LOGV(rn) LCV(rx) LCV(sn) LCV(tn);
277 sn = new_next_state(sn,tn);
278 if (sn == 0) {
279 break;
280 }
281 rx++;
282 }
283 }
284 return 0;
285 }
286
287 static int anomalous_keyword_path(int sn) {
288 int *is = dict_str(isht_dict, sn);
289 int nis = (*is++ - 1)/2;
290 while (nis--) {
291 Rule rn = *is++;
292 unsigned rx = *is++;
293 if (rx >= rn->non_vanishing_length) {
294 return 0;
295 }
296 }
297 is = dict_str(isht_dict, sn);
298 nis = (*is++ - 1)/2;
299 while (nis--) {
300 Rule rn = *is++;
301 unsigned rx = *is++;
302 if (rx < rn->length()) {
303 sws(sn);
304 at(new_rule_derivation, (int) rn, rx);
305 return 1;
306 }
307 }
308 return 0;
309 }
310
311 static int hw_reduce(int sn, int rn, int rx);
312
313
314 static int nulls_remaining(int sn, int nsn) {
315 LOGSECTION("nulls_remaining");
316 LOGV(sn) LCV(nsn);
317 int *is = dict_str(isht_dict, nsn);
318 int nis = (*is++ - 1)/2;
319 while (nis --) {
320 Rule rn = *is++;
321 unsigned rx = *is++;
322 if (rx < rn->non_vanishing_length) {
323 continue;
324 }
325 at(new_rule_derivation, (int) rn, rx);
326 if (hw_reduce(sn, rn, rx - 1)) {
327 return 1;
328 }
329 new_rule_derivation->nt--;
330 }
331 return 0;
332 }
333
334 static int hw_reduce(int sn, int rn, int rx) {
335 LOGSECTION("hw_reduce");
336 //if (triples.insert(IntegerTriple(sn, rn, rx))) {
337 // return 0;
338 //}
339 if (triples.insert(Triple<int>(sn, rn, rx))) {
340 return 0;
341 }
342 LOGV(sn) LCV(rn) LCV(rx);
343 if (rx) {
344 int i, j;
345 unsigned *p = lstptr(map_state_number[sn],previous_states);
346 int nt = map_state_number[sn].n_previous_states;
347 //int *sorted = local_array(nt, int);
348 LocalArray<int> sorted(nt);
349 for (i = 0; i < nt; i++) {
350 sorted[i] = p[i];
351 LOGV(p[i]);
352 }
353 for (i = 0; i < nt; i++) {
354 //aws(p[i]);
355 //if (xws(sorted[i])) continue;
356 for (j = i+1; j < nt; j++) {
357 if (sorted[i] > sorted[j]) {
358 int temp = sorted[i];
359 sorted[i] = sorted[j];
360 sorted[j] = temp;
361 }
362 }
363 LOGV(sorted[i]);
364 //if (xws(sorted[i])) continue;
365 aws(sorted[i]);
366
367 #ifdef INCLUDE_LOGGING
368 {
369 for (int index = 0; index < tis(); index++) {
370 LOGV(index) LCV(list_base[index]);
371 }
372 }
373 LOGV(sorted[i]) LCV(tis());
374 #endif
375
376 if (hw_reduce(sorted[i], rn, rx-1)) {
377 return 1;
378 }
379 fws();
380 }
381 return 0;
382 }
383
384
385 {
386 int *sp = ibnfs + ibnfb[rn];
387 int m = ibnfn[rn];
388 int i;
389
390 for (i = 0; i < m; i++) {
391 unsigned *gtp = lstptr(map_state_number[sn],gotos);
392 int ngt = map_state_number[sn].n_gotos;
393 unsigned tn = sp[i];
394 int nsn, crn, crx;
395 int j, k;
396
397 for (j = k = 0; j < ngt; j++, k+=2) {
398 if (gtp[k] == tn) break;
399 }
400
401 if (j < ngt) {
402 nsn = gtp[k+1];
403 if (keyword_switch) {
404 if (ct_accessible(nsn)) {
405 continue;
406 }
407 if (nulls_remaining(sn, nsn)) {
408 return 1;
409 }
410 if (anomalous_keyword_path(nsn)) {
411 return 1;
412 }
413 }
414 else {
415 if (reduction_state_path(nsn)) {
416 return 1;
417 }
418 if (nulls_remaining(sn, nsn)) {
419 return 1;
420 }
421 }
422 continue;
423 }
424 gtp = lstptr(map_state_number[sn], completions);
425 ngt = map_state_number[sn].n_completions;
426 for (j = k = 0; j < ngt; j++, k += 2) {
427 if (gtp[k] == tn) {
428 break;
429 }
430 }
431 if (j == ngt) {
432 continue;
433 }
434 crn = gtp[k+1];
435 crx = Rule(crn)->length();
436 int tx = new_rule_derivation->nt;
437 while (tx--) {
438 int trn, trx;
439 xtx(new_rule_derivation, tx, &trn, &trx);
440 if (trn == crn && trx == crx) {
441 break;
442 }
443 }
444 if (tx >= 0) {
445 continue;
446 }
447 at(new_rule_derivation, crn, crx);
448 if (hw_reduce(sn, crn, crx - 1)) {
449 return 1;
450 }
451 new_rule_derivation->nt--;
452 }
453 }
454 return 0;
455 }
456
457 /*
458 static void trim_ct(void) {
459 int nt = new_rule_derivation->nt;
460 int k = nt - 1;
461 int i, j;
462 LOGSECTION("trim_ct");
463 AgArray<int> list(buildStaticList());
464 LOGV(list.size());
465 int *cs = list.pointer();
466 int ncs = list.size();
467 // cs is list of states in reverse order
468
469 int rn,rx, tn;
470 unsigned *rl;
471 int nrl;
472
473 check_tsd(new_rule_derivation);
474 xtxf(new_rule_derivation, k, &rn, &rx);
475 tn = lstptr(map_form_number[rn],tokens)[rx-1];
476 rl = lstptr(map_token_number[tn],forms);
477 nrl = map_token_number[tn].n_forms;
478
479 iws();
480 while (--k >= 0) {
481
482 for (i = 0; i < k; i++) {
483 unsigned srn, srx;
484 int psx;
485
486 xtxf(new_rule_derivation, i, &srn, &srx);
487 for (j = 0; j < nrl; j++) if (srn == rl[j]) break;
488 if (j == nrl) continue;
489 psx = ncs;
490 for (j = i+1; j < k + 1; j++) {
491 int jrn, jrx;
492 xtxf(new_rule_derivation, j, &jrn, &jrx);
493 psx -= jrx-1;
494 }
495 if (psx <= 1) continue;
496 if (cs[ncs-1] != cs[psx-1]) continue;
497 ncs = psx;
498 break;
499 }
500 if (i < k) {
501 memmove(&new_rule_derivation->sb[2*(i+1)],
502 &new_rule_derivation->sb[2*(k+1)],
503 (nt - k - 1)*2*sizeof(int));
504 nt -= k-i;
505 new_rule_derivation->nt = nt;
506 k = i;
507 }
508 if (k <= 0) break;
509 xtxf(new_rule_derivation, k, &rn, &rx);
510 tn = lstptr(map_form_number[rn],tokens)[--rx];
511 rl = lstptr(map_token_number[tn],forms);
512 nrl = map_token_number[tn].n_forms;
513 while (rx--) aws(cs[--ncs]);
514 }
515 while (ncs--) aws(cs[ncs]);
516 list = buildStaticList();
517 cs = list.pointer();
518 ncs = list.size();
519 iws();
520 while (ncs--) aws(cs[ncs]);
521 }
522 */
523
524 static int new_analysis(int sn, int rn) {
525 int length = Rule(rn)->length();
526
527 LOGSECTION("new_analysis");
528 LOGV(sn) LCV(rn);
529 new_rule_derivation = init_tsd(2);
530 at(new_rule_derivation, rn, length);
531 sws(sn);
532 if (hw_reduce(sn,rn,length)) {
533 int fs;
534
535 new_reduction_stack = buildStaticList();
536 new_reduction_stack_depth = new_reduction_stack.size();
537 fs = fws();
538 //if (tis()) trim_ct();
539 validate_conflict_stack();
540 sws(fs);
541 simple_path(0,fs);
542 concat_list();
543 new_conflict_stack = buildStaticList();
544 new_conflict_stack_depth = new_conflict_stack.size();
545 iws();
546 while(new_reduction_stack_depth--) {
547 aws(new_reduction_stack[new_reduction_stack_depth]);
548 }
549 sws(fs);
550 simple_path(0, fs);
551 concat_list();
552 new_reduction_stack = buildStaticList();
553 new_reduction_stack_depth = new_reduction_stack.size();
554
555 if (!keyword_switch) {
556 int fn, fx, k;
557 k = new_rule_derivation->nt - 1;
558 xtxf(new_rule_derivation, k, &fn, &fx);
559 token_derivation = init_tsd(2);
560 tried = ZALLOCATE(ntkns+1, char);
561 //memset(tried,0,ntkns+1);
562 //x9x(lstptr(map_form_number[fn], tokens)[fx]);
563 x9x(Rule(fn).token(fx));
564 at(token_derivation, fn, fx);
565 DEALLOCATE(tried);
566 }
567 return 1;
568 }
569 rws();
570 return 0;
571 }
572