Mercurial > ~dholland > hg > ag > index.cgi
diff anagram/agcore/nckwtr.cpp @ 0:13d2b8934445
Import AnaGram (near-)release tree into Mercurial.
author | David A. Holland |
---|---|
date | Sat, 22 Dec 2007 17:52:45 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/anagram/agcore/nckwtr.cpp Sat Dec 22 17:52:45 2007 -0500 @@ -0,0 +1,572 @@ +/* + * AnaGram, A System for Syntax Directed Programming + * Copyright 1993-2002 Parsifal Software. All Rights Reserved. + * See the file COPYING for license and usage terms. + * + * nckwtr.cpp - Conflict/Keyword Anomaly Trace Utility + */ + + +#include "agbaltree.h" +#include "arrays.h" +#include "assert.h" +#include "cd.h" +#include "data.h" +#include "dict.h" +#include "lexeme.h" +#include "myalloc.h" +#include "nckwtr.h" +#include "q1glbl.h" +#include "rule.h" +#include "stacks.h" +#include "token.h" +#include "tsd.h" + +//#define INCLUDE_LOGGING +#include "log.h" + +static int conflict_rule; +static int conflict_state; + + +/* + * Strategy + * + * The data given are two state numbers: the conflict state and the + * reduction state. In addition, the conflict rule is given. + * + * The approach is to work backward from the reduction state to a + * state that leads to the conflict state. + * + * If the characteristic token for the reduction state includes the + * conflict rule as an expansion rule, then we don't have to worry + * about null productions. Otherwise, there has been a shift of some + * zero-length production to get us to the reduction state. + * + * The first routine to build is a function which will call itself + * recursively to find the fork state. It will fail if it cannot find + * a fork state from its present state. + */ + +static int keyword_switch = 0; + +static AgBalancedTree<Triple<int> > triples; + +AgArray<int> new_reduction_stack; +int new_reduction_stack_depth; +AgArray<int> new_conflict_stack; +int new_conflict_stack_depth; +tsd *new_rule_derivation; + + +static int simple_path(int from, int to) { + unsigned *p = lstptr(map_state_number[to],previous_states); + int nt = map_state_number[to].n_previous_states; + int i; + + if (from == to) return 1; + for (i = 0; i < nt; i++) { + if (xws(p[i])) { + continue; + } + if (simple_path(from, p[i])) { + return 1; + } + fws(); + } + return 0; +} + +void clear_conflict_expansion(void) { + if (conflict_token == 0) { + return; + } + new_rule_derivation = delete_tsd(new_rule_derivation); + token_derivation = delete_tsd(token_derivation); + new_reduction_stack = AgArray<int>(); + new_conflict_stack = AgArray<int>(); + conflict_token = 0; +} + +static int new_analysis(int sn, int rn); + +void analyze_conflict(int csn, int tn, int cfn, int kws) { + int flag; + + LOGSECTION("analyze_conflict"); + kws = kws!=0; + if (conflict_state == csn + && conflict_token == tn + && keyword_switch == kws + && conflict_rule == cfn) { + return; + } + clear_conflict_expansion(); + conflict_state = csn; + conflict_token = tn; + conflict_rule = cfn; + keyword_switch = kws; + + flag = new_analysis(conflict_state, conflict_rule); + LOGV(flag); + assert(flag); + triples.reset(); +} + +static int equiv_token(Token charToken, Token ruleToken) { + if (charToken == ruleToken) { + return 1; + } + AgArray<Rule> expansion = charToken->expansionRuleList; + int n = expansion.size(); + while (n--) { + if (expansion[n]->length() == 0 ) { + continue; + } + if (expansion[n]->length() > 1 + && (unsigned) expansion[n].token(1) != disregard_token) { + continue; + } + if (expansion[n].token(0) == ruleToken) { + return 1; + } + } + return 0; +} + +static void validate_conflict_stack(void) { + int *ctp = list_base; + int cfsd = tis(); + int sx = 0; + int nsn = ctp[sx++]; + int kr = 0; + int kr_lim = new_rule_derivation->nt; + int rn, rx; + + LOGSECTION("validate_conflict_stack"); + LOGV(sx) LCV(cfsd); + if (sx >= cfsd) return; +#ifdef INCLUDE_LOGGING + { + int i; + for (i = 0; i < cfsd; i++) { + LOGV(i) LCV(ctp[i]); + } + int *sb = new_rule_derivation->sb; + for (i = 0; i < kr_lim; i += 2) { + LOGV(sb[i]) LCV(sb[i+1]); + } + } +#endif + check_tsd(new_rule_derivation); + xtxf(new_rule_derivation,kr, &rn, &rx); + LOGV(rn) LCV(rx); + while (rx == 0) { + xtxf(new_rule_derivation, ++kr, &rn, &rx); + rx--; + LOGV(rn) LCV(rx); + } + if (sx < cfsd) { + while (1) { + Token charToken = map_state_number[nsn].char_token; + int sn = ctp[sx++]; + LOGV(kr) LCV(kr_lim); + assert(kr < kr_lim); + //assert(equiv_token(tn, lstptr(map_form_number[rn], tokens)[--rx])); + Token ruleToken = Rule(rn).token(--rx); + LOGV(rn) LCV(Rule(rn)->length()) LCV(rx) LCV(ruleToken); + //assert(equiv_token(charToken, Rule(rn).token(--rx))); + assert(equiv_token(charToken, ruleToken)); + LOGV(nsn) LCV(sx) LCV(sn) LCV(charToken); + //int nextState = new_next_state(sn, ruleToken); + //LOGV(nextState); + //assert(nsn == nextState); + assert(equiv_token(charToken, transitionToken(sn,nsn))); + LOGV(sx) LCV(cfsd); + if (sx >= cfsd) { + break; + } + nsn = sn; + while (rx == 0) { + xtxf(new_rule_derivation, ++kr, &rn, &rx); + rx--; + } + } + } + LOGS("Exiting validate_conflict_stack"); +} + +/* +static int check_leading_token(unsigned hlt, unsigned llt) { + unsigned *ltp = lstptr(map_token_number[hlt],leading_tokens); + int nlt = map_token_number[hlt].n_leading_tokens; + if (hlt == llt) { + return 1; + } + while (nlt--) { + if (ltp[nlt] == llt) { + return 1; + } + } + return 0; +} +*/ + +static int reduction_state_path(int sn) { + LOGSECTION("reduction_state_path"); + LOGV(sn); + int *is = dict_str(isht_dict, sn); + int nis = (*is++ - 1)/2; + while (nis --) { + int stateNumber = sn; + Rule rn = *is++; + unsigned rx = *is++; + LOGV(stateNumber) LCV(rn) LCV(rx); + iws(); + while (rx < rn->length()) { + Token tn = rn.token(rx); + //int flag = check_leading_token(tn,conflict_token); + int flag = tn.isLeadingToken(conflict_token); + aws(stateNumber); + LOGV(stateNumber) LCV(tis()); + if (flag) { + at(new_rule_derivation, (int) rn, rx); + return 1; + } + //if (!map_token_number[tn].zero_length_flag) { + // break; + //} + if (!tn->zero_length_flag) { + break; + } + LOGV(rn) LCV(rx) LCV(sn) LCV(tn); + stateNumber = new_next_state(stateNumber,tn); + if (stateNumber == 0) { + break; + } + rx++; + } + rws(); + } + return 0; +} + +static int ct_accessible(int sn) { + LOGSECTION("ct_accessible"); + LOGV(sn); + int *is = dict_str(isht_dict, sn); + int nis = (*is++ - 1)/2; + while (nis--) { + Rule rn = *is++; + unsigned rx = *is++; + while (rx < rn->length()) { + //int tn = lstptr(map_form_number[rn], tokens)[rx]; + Token tn = rn.token(rx); + //int flag = check_leading_token(tn,conflict_token); + int flag = tn.isLeadingToken(conflict_token); + if (flag) { + return 1; + } + //if (!map_token_number[tn].zero_length_flag) { + // break; + //} + if (!tn->zero_length_flag) { + break; + } + LOGV(rn) LCV(rx) LCV(sn) LCV(tn); + sn = new_next_state(sn,tn); + if (sn == 0) { + break; + } + rx++; + } + } + return 0; +} + +static int anomalous_keyword_path(int sn) { + int *is = dict_str(isht_dict, sn); + int nis = (*is++ - 1)/2; + while (nis--) { + Rule rn = *is++; + unsigned rx = *is++; + if (rx >= rn->non_vanishing_length) { + return 0; + } + } + is = dict_str(isht_dict, sn); + nis = (*is++ - 1)/2; + while (nis--) { + Rule rn = *is++; + unsigned rx = *is++; + if (rx < rn->length()) { + sws(sn); + at(new_rule_derivation, (int) rn, rx); + return 1; + } + } + return 0; +} + +static int hw_reduce(int sn, int rn, int rx); + + +static int nulls_remaining(int sn, int nsn) { + LOGSECTION("nulls_remaining"); + LOGV(sn) LCV(nsn); + int *is = dict_str(isht_dict, nsn); + int nis = (*is++ - 1)/2; + while (nis --) { + Rule rn = *is++; + unsigned rx = *is++; + if (rx < rn->non_vanishing_length) { + continue; + } + at(new_rule_derivation, (int) rn, rx); + if (hw_reduce(sn, rn, rx - 1)) { + return 1; + } + new_rule_derivation->nt--; + } + return 0; +} + +static int hw_reduce(int sn, int rn, int rx) { + LOGSECTION("hw_reduce"); + //if (triples.insert(IntegerTriple(sn, rn, rx))) { + // return 0; + //} + if (triples.insert(Triple<int>(sn, rn, rx))) { + return 0; + } + LOGV(sn) LCV(rn) LCV(rx); + if (rx) { + int i, j; + unsigned *p = lstptr(map_state_number[sn],previous_states); + int nt = map_state_number[sn].n_previous_states; + //int *sorted = local_array(nt, int); + LocalArray<int> sorted(nt); + for (i = 0; i < nt; i++) { + sorted[i] = p[i]; + LOGV(p[i]); + } + for (i = 0; i < nt; i++) { + //aws(p[i]); + //if (xws(sorted[i])) continue; + for (j = i+1; j < nt; j++) { + if (sorted[i] > sorted[j]) { + int temp = sorted[i]; + sorted[i] = sorted[j]; + sorted[j] = temp; + } + } + LOGV(sorted[i]); + //if (xws(sorted[i])) continue; + aws(sorted[i]); + +#ifdef INCLUDE_LOGGING + { + for (int index = 0; index < tis(); index++) { + LOGV(index) LCV(list_base[index]); + } + } + LOGV(sorted[i]) LCV(tis()); +#endif + + if (hw_reduce(sorted[i], rn, rx-1)) { + return 1; + } + fws(); + } + return 0; + } + + + { + int *sp = ibnfs + ibnfb[rn]; + int m = ibnfn[rn]; + int i; + + for (i = 0; i < m; i++) { + unsigned *gtp = lstptr(map_state_number[sn],gotos); + int ngt = map_state_number[sn].n_gotos; + unsigned tn = sp[i]; + int nsn, crn, crx; + int j, k; + + for (j = k = 0; j < ngt; j++, k+=2) { + if (gtp[k] == tn) break; + } + + if (j < ngt) { + nsn = gtp[k+1]; + if (keyword_switch) { + if (ct_accessible(nsn)) { + continue; + } + if (nulls_remaining(sn, nsn)) { + return 1; + } + if (anomalous_keyword_path(nsn)) { + return 1; + } + } + else { + if (reduction_state_path(nsn)) { + return 1; + } + if (nulls_remaining(sn, nsn)) { + return 1; + } + } + continue; + } + gtp = lstptr(map_state_number[sn], completions); + ngt = map_state_number[sn].n_completions; + for (j = k = 0; j < ngt; j++, k += 2) { + if (gtp[k] == tn) { + break; + } + } + if (j == ngt) { + continue; + } + crn = gtp[k+1]; + crx = Rule(crn)->length(); + int tx = new_rule_derivation->nt; + while (tx--) { + int trn, trx; + xtx(new_rule_derivation, tx, &trn, &trx); + if (trn == crn && trx == crx) { + break; + } + } + if (tx >= 0) { + continue; + } + at(new_rule_derivation, crn, crx); + if (hw_reduce(sn, crn, crx - 1)) { + return 1; + } + new_rule_derivation->nt--; + } + } + return 0; +} + +/* +static void trim_ct(void) { + int nt = new_rule_derivation->nt; + int k = nt - 1; + int i, j; + LOGSECTION("trim_ct"); + AgArray<int> list(buildStaticList()); + LOGV(list.size()); + int *cs = list.pointer(); + int ncs = list.size(); +// cs is list of states in reverse order + + int rn,rx, tn; + unsigned *rl; + int nrl; + + check_tsd(new_rule_derivation); + xtxf(new_rule_derivation, k, &rn, &rx); + tn = lstptr(map_form_number[rn],tokens)[rx-1]; + rl = lstptr(map_token_number[tn],forms); + nrl = map_token_number[tn].n_forms; + + iws(); + while (--k >= 0) { + + for (i = 0; i < k; i++) { + unsigned srn, srx; + int psx; + + xtxf(new_rule_derivation, i, &srn, &srx); + for (j = 0; j < nrl; j++) if (srn == rl[j]) break; + if (j == nrl) continue; + psx = ncs; + for (j = i+1; j < k + 1; j++) { + int jrn, jrx; + xtxf(new_rule_derivation, j, &jrn, &jrx); + psx -= jrx-1; + } + if (psx <= 1) continue; + if (cs[ncs-1] != cs[psx-1]) continue; + ncs = psx; + break; + } + if (i < k) { + memmove(&new_rule_derivation->sb[2*(i+1)], + &new_rule_derivation->sb[2*(k+1)], + (nt - k - 1)*2*sizeof(int)); + nt -= k-i; + new_rule_derivation->nt = nt; + k = i; + } + if (k <= 0) break; + xtxf(new_rule_derivation, k, &rn, &rx); + tn = lstptr(map_form_number[rn],tokens)[--rx]; + rl = lstptr(map_token_number[tn],forms); + nrl = map_token_number[tn].n_forms; + while (rx--) aws(cs[--ncs]); + } + while (ncs--) aws(cs[ncs]); + list = buildStaticList(); + cs = list.pointer(); + ncs = list.size(); + iws(); + while (ncs--) aws(cs[ncs]); +} +*/ + +static int new_analysis(int sn, int rn) { + int length = Rule(rn)->length(); + + LOGSECTION("new_analysis"); + LOGV(sn) LCV(rn); + new_rule_derivation = init_tsd(2); + at(new_rule_derivation, rn, length); + sws(sn); + if (hw_reduce(sn,rn,length)) { + int fs; + + new_reduction_stack = buildStaticList(); + new_reduction_stack_depth = new_reduction_stack.size(); + fs = fws(); + //if (tis()) trim_ct(); + validate_conflict_stack(); + sws(fs); + simple_path(0,fs); + concat_list(); + new_conflict_stack = buildStaticList(); + new_conflict_stack_depth = new_conflict_stack.size(); + iws(); + while(new_reduction_stack_depth--) { + aws(new_reduction_stack[new_reduction_stack_depth]); + } + sws(fs); + simple_path(0, fs); + concat_list(); + new_reduction_stack = buildStaticList(); + new_reduction_stack_depth = new_reduction_stack.size(); + + if (!keyword_switch) { + int fn, fx, k; + k = new_rule_derivation->nt - 1; + xtxf(new_rule_derivation, k, &fn, &fx); + token_derivation = init_tsd(2); + tried = ZALLOCATE(ntkns+1, char); + //memset(tried,0,ntkns+1); + //x9x(lstptr(map_form_number[fn], tokens)[fx]); + x9x(Rule(fn).token(fx)); + at(token_derivation, fn, fx); + DEALLOCATE(tried); + } + return 1; + } + rws(); + return 0; +} +