AnaGram interim repo (temporary): anagram/agcore/nckwtr.cpp comparison

comparison anagram/agcore/nckwtr.cpp @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.

author	David A. Holland
date	Sat, 22 Dec 2007 17:52:45 -0500
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:13d2b8934445
+/*
+* AnaGram, A System for Syntax Directed Programming
+* Copyright 1993-2002 Parsifal Software. All Rights Reserved.
+* See the file COPYING for license and usage terms.
+*
+* nckwtr.cpp - Conflict/Keyword Anomaly Trace Utility
+*/
+#include "agbaltree.h"
+#include "arrays.h"
+#include "assert.h"
+#include "cd.h"
+#include "data.h"
+#include "dict.h"
+#include "lexeme.h"
+#include "myalloc.h"
+#include "nckwtr.h"
+#include "q1glbl.h"
+#include "rule.h"
+#include "stacks.h"
+#include "token.h"
+#include "tsd.h"
+//#define INCLUDE_LOGGING
+#include "log.h"
+static int conflict_rule;
+static int conflict_state;
+/*
+* Strategy
+*
+* The data given are two state numbers: the conflict state and the
+* reduction state. In addition, the conflict rule is given.
+*
+* The approach is to work backward from the reduction state to a
+* state that leads to the conflict state.
+*
+* If the characteristic token for the reduction state includes the
+* conflict rule as an expansion rule, then we don't have to worry
+* about null productions. Otherwise, there has been a shift of some
+* zero-length production to get us to the reduction state.
+*
+* The first routine to build is a function which will call itself
+* recursively to find the fork state. It will fail if it cannot find
+* a fork state from its present state.
+*/
+static int keyword_switch = 0;
+static AgBalancedTree<Triple<int> > triples;
+AgArray<int> new_reduction_stack;
+int new_reduction_stack_depth;
+AgArray<int> new_conflict_stack;
+int new_conflict_stack_depth;
+tsd *new_rule_derivation;
+static int simple_path(int from, int to) {
+unsigned *p = lstptr(map_state_number[to],previous_states);
+int nt = map_state_number[to].n_previous_states;
+int i;
+if (from == to) return 1;
+for (i = 0; i < nt; i++) {
+if (xws(p[i])) {
+continue;
+}
+if (simple_path(from, p[i])) {
+return 1;
+}
+fws();
+}
+return 0;
+}
+void clear_conflict_expansion(void) {
+if (conflict_token == 0) {
+return;
+}
+new_rule_derivation = delete_tsd(new_rule_derivation);
+token_derivation = delete_tsd(token_derivation);
+new_reduction_stack = AgArray<int>();
+new_conflict_stack = AgArray<int>();
+conflict_token = 0;
+}
+static int new_analysis(int sn, int rn);
+void analyze_conflict(int csn, int tn, int cfn, int kws) {
+int flag;
+LOGSECTION("analyze_conflict");
+kws = kws!=0;
+if (conflict_state == csn
+&& conflict_token == tn
+&& keyword_switch == kws
+&& conflict_rule == cfn) {
+return;
+}
+clear_conflict_expansion();
+conflict_state = csn;
+conflict_token = tn;
+conflict_rule = cfn;
+keyword_switch = kws;
+flag = new_analysis(conflict_state, conflict_rule);
+LOGV(flag);
+assert(flag);
+triples.reset();
+}
+static int equiv_token(Token charToken, Token ruleToken) {
+if (charToken == ruleToken) {
+return 1;
+}
+AgArray<Rule> expansion = charToken->expansionRuleList;
+int n = expansion.size();
+while (n--) {
+if (expansion[n]->length() == 0 ) {
+continue;
+}
+if (expansion[n]->length() > 1
+	&& (unsigned) expansion[n].token(1) != disregard_token) {
+continue;
+}
+if (expansion[n].token(0) == ruleToken) {
+return 1;
+}
+}
+return 0;
+}
+static void validate_conflict_stack(void) {
+int *ctp = list_base;
+int cfsd = tis();
+int sx = 0;
+int nsn = ctp[sx++];
+int kr = 0;
+int kr_lim = new_rule_derivation->nt;
+int rn, rx;
+LOGSECTION("validate_conflict_stack");
+LOGV(sx) LCV(cfsd);
+if (sx >= cfsd) return;
+#ifdef INCLUDE_LOGGING
+{
+int i;
+for (i = 0; i < cfsd; i++) {
+LOGV(i) LCV(ctp[i]);
+}
+int *sb = new_rule_derivation->sb;
+for (i = 0; i < kr_lim; i += 2) {
+LOGV(sb[i]) LCV(sb[i+1]);
+}
+}
+#endif
+check_tsd(new_rule_derivation);
+xtxf(new_rule_derivation,kr, &rn, &rx);
+LOGV(rn) LCV(rx);
+while (rx == 0) {
+xtxf(new_rule_derivation, ++kr, &rn, &rx);
+rx--;
+LOGV(rn) LCV(rx);
+}
+if (sx < cfsd) {
+while (1) {
+Token charToken = map_state_number[nsn].char_token;
+int sn = ctp[sx++];
+LOGV(kr) LCV(kr_lim);
+assert(kr <  kr_lim);
+//assert(equiv_token(tn, lstptr(map_form_number[rn], tokens)[--rx]));
+Token ruleToken = Rule(rn).token(--rx);
+LOGV(rn) LCV(Rule(rn)->length()) LCV(rx) LCV(ruleToken);
+//assert(equiv_token(charToken, Rule(rn).token(--rx)));
+assert(equiv_token(charToken, ruleToken));
+LOGV(nsn) LCV(sx) LCV(sn) LCV(charToken);
+//int nextState = new_next_state(sn, ruleToken);
+//LOGV(nextState);
+//assert(nsn == nextState);
+assert(equiv_token(charToken, transitionToken(sn,nsn)));
+LOGV(sx) LCV(cfsd);
+if (sx >= cfsd) {
+	break;
+}
+nsn = sn;
+while (rx == 0) {
+	xtxf(new_rule_derivation, ++kr, &rn, &rx);
+	rx--;
+}
+}
+}
+LOGS("Exiting validate_conflict_stack");
+}
+/*
+static int check_leading_token(unsigned hlt, unsigned llt) {
+unsigned *ltp = lstptr(map_token_number[hlt],leading_tokens);
+int nlt = map_token_number[hlt].n_leading_tokens;
+if (hlt == llt) {
+return 1;
+}
+while (nlt--) {
+if (ltp[nlt] == llt) {
+return 1;
+}
+}
+return 0;
+}
+*/
+static int reduction_state_path(int sn) {
+LOGSECTION("reduction_state_path");
+LOGV(sn);
+int *is = dict_str(isht_dict, sn);
+int nis = (*is++ - 1)/2;
+while (nis --) {
+int stateNumber = sn;
+Rule rn = *is++;
+unsigned rx = *is++;
+LOGV(stateNumber) LCV(rn) LCV(rx);
+iws();
+while (rx < rn->length()) {
+Token tn = rn.token(rx);
+//int flag = check_leading_token(tn,conflict_token);
+int flag = tn.isLeadingToken(conflict_token);
+aws(stateNumber);
+LOGV(stateNumber) LCV(tis());
+if (flag) {
+at(new_rule_derivation, (int) rn, rx);
+return 1;
+}
+//if (!map_token_number[tn].zero_length_flag) {
+//  break;
+//}
+if (!tn->zero_length_flag) {
+	break;
+}
+LOGV(rn) LCV(rx) LCV(sn) LCV(tn);
+stateNumber = new_next_state(stateNumber,tn);
+if (stateNumber == 0) {
+	break;
+}
+rx++;
+}
+rws();
+}
+return 0;
+}
+static int ct_accessible(int sn) {
+LOGSECTION("ct_accessible");
+LOGV(sn);
+int *is = dict_str(isht_dict, sn);
+int nis = (*is++ - 1)/2;
+while (nis--) {
+Rule rn = *is++;
+unsigned rx = *is++;
+while (rx < rn->length()) {
+//int tn = lstptr(map_form_number[rn], tokens)[rx];
+Token tn = rn.token(rx);
+//int flag = check_leading_token(tn,conflict_token);
+int flag = tn.isLeadingToken(conflict_token);
+if (flag) {
+return 1;
+}
+//if (!map_token_number[tn].zero_length_flag) {
+//  break;
+//}
+if (!tn->zero_length_flag) {
+	break;
+}
+LOGV(rn) LCV(rx) LCV(sn) LCV(tn);
+sn = new_next_state(sn,tn);
+if (sn == 0) {
+	break;
+}
+rx++;
+}
+}
+return 0;
+}
+static int anomalous_keyword_path(int sn) {
+int *is = dict_str(isht_dict, sn);
+int nis = (*is++ - 1)/2;
+while (nis--) {
+Rule rn = *is++;
+unsigned rx = *is++;
+if (rx >= rn->non_vanishing_length) {
+return 0;
+}
+}
+is = dict_str(isht_dict, sn);
+nis = (*is++ - 1)/2;
+while (nis--) {
+Rule rn = *is++;
+unsigned rx = *is++;
+if (rx < rn->length()) {
+sws(sn);
+at(new_rule_derivation, (int) rn, rx);
+return 1;
+}
+}
+return 0;
+}
+static int hw_reduce(int sn, int rn, int rx);
+static int nulls_remaining(int sn, int nsn) {
+LOGSECTION("nulls_remaining");
+LOGV(sn) LCV(nsn);
+int *is = dict_str(isht_dict, nsn);
+int nis = (*is++ - 1)/2;
+while (nis --) {
+Rule rn = *is++;
+unsigned rx = *is++;
+if (rx < rn->non_vanishing_length) {
+continue;
+}
+at(new_rule_derivation, (int) rn, rx);
+if (hw_reduce(sn, rn, rx - 1)) {
+return 1;
+}
+new_rule_derivation->nt--;
+}
+return 0;
+}
+static int hw_reduce(int sn, int rn, int rx) {
+LOGSECTION("hw_reduce");
+//if (triples.insert(IntegerTriple(sn, rn, rx))) {
+//  return 0;
+//}
+if (triples.insert(Triple<int>(sn, rn, rx))) {
+return 0;
+}
+LOGV(sn) LCV(rn) LCV(rx);
+if (rx) {
+int i, j;
+unsigned *p = lstptr(map_state_number[sn],previous_states);
+int nt = map_state_number[sn].n_previous_states;
+//int *sorted = local_array(nt, int);
+LocalArray<int> sorted(nt);
+for (i = 0; i < nt; i++) {
+sorted[i] = p[i];
+LOGV(p[i]);
+}
+for (i = 0; i < nt; i++) {
+//aws(p[i]);
+//if (xws(sorted[i])) continue;
+for (j = i+1; j < nt; j++) {
+if (sorted[i] > sorted[j]) {
+int temp = sorted[i];
+sorted[i] = sorted[j];
+sorted[j] = temp;
+}
+}
+LOGV(sorted[i]);
+//if (xws(sorted[i])) continue;
+aws(sorted[i]);
+#ifdef INCLUDE_LOGGING
+{
+	for (int index = 0; index < tis(); index++) {
+	  LOGV(index) LCV(list_base[index]);
+	}
+}
+LOGV(sorted[i]) LCV(tis());
+#endif
+if (hw_reduce(sorted[i], rn, rx-1)) {
+	return 1;
+}
+fws();
+}
+return 0;
+}
+{
+int *sp = ibnfs + ibnfb[rn];
+int m = ibnfn[rn];
+int i;
+for (i = 0; i < m; i++) {
+unsigned *gtp = lstptr(map_state_number[sn],gotos);
+int ngt  = map_state_number[sn].n_gotos;
+unsigned tn = sp[i];
+int nsn, crn, crx;
+int j, k;
+for (j = k = 0; j < ngt; j++, k+=2) {
+	if (gtp[k] == tn) break;
+}
+if (j < ngt) {
+nsn = gtp[k+1];
+if (keyword_switch) {
+if (ct_accessible(nsn)) {
+	    continue;
+	  }
+if (nulls_remaining(sn, nsn)) {
+	    return 1;
+	  }
+if (anomalous_keyword_path(nsn)) {
+	    return 1;
+	  }
+}
+else {
+if (reduction_state_path(nsn)) {
+	    return 1;
+	  }
+if (nulls_remaining(sn, nsn)) {
+	    return 1;
+	  }
+}
+continue;
+}
+gtp = lstptr(map_state_number[sn], completions);
+ngt = map_state_number[sn].n_completions;
+for (j = k = 0; j < ngt; j++, k += 2) {
+	if (gtp[k] == tn) {
+	  break;
+	}
+}
+if (j == ngt) {
+	continue;
+}
+crn = gtp[k+1];
+crx = Rule(crn)->length();
+int tx = new_rule_derivation->nt;
+while (tx--) {
+	int trn, trx;
+	xtx(new_rule_derivation, tx, &trn, &trx);
+	if (trn == crn && trx == crx) {
+	  break;
+	}
+}
+if (tx >= 0) {
+	continue;
+}
+at(new_rule_derivation, crn, crx);
+if (hw_reduce(sn, crn, crx - 1)) {
+	return 1;
+}
+new_rule_derivation->nt--;
+}
+}
+return 0;
+}
+/*
+static void trim_ct(void) {
+int nt = new_rule_derivation->nt;
+int k = nt - 1;
+int i, j;
+LOGSECTION("trim_ct");
+AgArray<int> list(buildStaticList());
+LOGV(list.size());
+int *cs = list.pointer();
+int ncs = list.size();
+// cs is list of states in reverse order
+int rn,rx, tn;
+unsigned *rl;
+int nrl;
+check_tsd(new_rule_derivation);
+xtxf(new_rule_derivation, k, &rn, &rx);
+tn = lstptr(map_form_number[rn],tokens)[rx-1];
+rl = lstptr(map_token_number[tn],forms);
+nrl = map_token_number[tn].n_forms;
+iws();
+while (--k >= 0) {
+for (i = 0; i < k; i++) {
+unsigned srn, srx;
+int psx;
+xtxf(new_rule_derivation, i, &srn, &srx);
+for (j = 0; j < nrl; j++) if (srn == rl[j]) break;
+if (j == nrl) continue;
+psx = ncs;
+for (j = i+1; j < k + 1; j++) {
+int jrn, jrx;
+xtxf(new_rule_derivation, j, &jrn, &jrx);
+psx -= jrx-1;
+}
+if (psx <= 1) continue;
+if (cs[ncs-1] != cs[psx-1]) continue;
+ncs = psx;
+break;
+}
+if (i < k) {
+memmove(&new_rule_derivation->sb[2*(i+1)],
+&new_rule_derivation->sb[2*(k+1)],
+(nt - k - 1)*2*sizeof(int));
+nt -= k-i;
+new_rule_derivation->nt = nt;
+k = i;
+}
+if (k <= 0) break;
+xtxf(new_rule_derivation, k, &rn, &rx);
+tn = lstptr(map_form_number[rn],tokens)[--rx];
+rl = lstptr(map_token_number[tn],forms);
+nrl = map_token_number[tn].n_forms;
+while (rx--) aws(cs[--ncs]);
+}
+while (ncs--) aws(cs[ncs]);
+list = buildStaticList();
+cs = list.pointer();
+ncs = list.size();
+iws();
+while (ncs--) aws(cs[ncs]);
+}
+*/
+static int new_analysis(int sn, int rn) {
+int length = Rule(rn)->length();
+LOGSECTION("new_analysis");
+LOGV(sn) LCV(rn);
+new_rule_derivation = init_tsd(2);
+at(new_rule_derivation, rn, length);
+sws(sn);
+if (hw_reduce(sn,rn,length)) {
+int fs;
+new_reduction_stack = buildStaticList();
+new_reduction_stack_depth = new_reduction_stack.size();
+fs = fws();
+//if (tis()) trim_ct();
+validate_conflict_stack();
+sws(fs);
+simple_path(0,fs);
+concat_list();
+new_conflict_stack = buildStaticList();
+new_conflict_stack_depth = new_conflict_stack.size();
+iws();
+while(new_reduction_stack_depth--) {
+aws(new_reduction_stack[new_reduction_stack_depth]);
+}
+sws(fs);
+simple_path(0, fs);
+concat_list();
+new_reduction_stack = buildStaticList();
+new_reduction_stack_depth = new_reduction_stack.size();
+if (!keyword_switch) {
+int fn, fx, k;
+k = new_rule_derivation->nt - 1;
+xtxf(new_rule_derivation, k, &fn, &fx);
+token_derivation = init_tsd(2);
+tried = ZALLOCATE(ntkns+1, char);
+//memset(tried,0,ntkns+1);
+//x9x(lstptr(map_form_number[fn], tokens)[fx]);
+x9x(Rule(fn).token(fx));
+at(token_derivation, fn, fx);
+DEALLOCATE(tried);
+}
+return 1;
+}
+rws();
+return 0;
+}

Mercurial > ~dholland > hg > ag > index.cgi

comparison anagram/agcore/nckwtr.cpp @ 0:13d2b8934445