diff anagram/agcore/nckwtr.cpp @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.
author David A. Holland
date Sat, 22 Dec 2007 17:52:45 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/anagram/agcore/nckwtr.cpp	Sat Dec 22 17:52:45 2007 -0500
@@ -0,0 +1,572 @@
+/*
+ * AnaGram, A System for Syntax Directed Programming
+ * Copyright 1993-2002 Parsifal Software. All Rights Reserved.
+ * See the file COPYING for license and usage terms.
+ *
+ * nckwtr.cpp - Conflict/Keyword Anomaly Trace Utility
+ */
+
+
+#include "agbaltree.h"
+#include "arrays.h"
+#include "assert.h"
+#include "cd.h"
+#include "data.h"
+#include "dict.h"
+#include "lexeme.h"
+#include "myalloc.h"
+#include "nckwtr.h"
+#include "q1glbl.h"
+#include "rule.h"
+#include "stacks.h"
+#include "token.h"
+#include "tsd.h"
+
+//#define INCLUDE_LOGGING
+#include "log.h"
+
+static int conflict_rule;
+static int conflict_state;
+
+
+/*
+ * Strategy
+ *
+ * The data given are two state numbers: the conflict state and the
+ * reduction state. In addition, the conflict rule is given.
+ *
+ * The approach is to work backward from the reduction state to a
+ * state that leads to the conflict state.
+ *
+ * If the characteristic token for the reduction state includes the
+ * conflict rule as an expansion rule, then we don't have to worry
+ * about null productions. Otherwise, there has been a shift of some
+ * zero-length production to get us to the reduction state.
+ *
+ * The first routine to build is a function which will call itself
+ * recursively to find the fork state. It will fail if it cannot find
+ * a fork state from its present state.
+ */
+
+static int keyword_switch = 0;
+
+static AgBalancedTree<Triple<int> > triples;
+
+AgArray<int> new_reduction_stack;
+int new_reduction_stack_depth;
+AgArray<int> new_conflict_stack;
+int new_conflict_stack_depth;
+tsd *new_rule_derivation;
+
+
+static int simple_path(int from, int to) {
+  unsigned *p = lstptr(map_state_number[to],previous_states);
+  int nt = map_state_number[to].n_previous_states;
+  int i;
+
+  if (from == to) return 1;
+  for (i = 0; i < nt; i++) {
+    if (xws(p[i])) {
+      continue;
+    }
+    if (simple_path(from, p[i])) {
+      return 1;
+    }
+    fws();
+  }
+  return 0;
+}
+
+void clear_conflict_expansion(void) {
+  if (conflict_token == 0) {
+    return;
+  }
+  new_rule_derivation = delete_tsd(new_rule_derivation);
+  token_derivation = delete_tsd(token_derivation);
+  new_reduction_stack = AgArray<int>();
+  new_conflict_stack = AgArray<int>();
+  conflict_token = 0;
+}
+
+static int new_analysis(int sn, int rn);
+
+void analyze_conflict(int csn, int tn, int cfn, int kws) {
+  int flag;
+
+  LOGSECTION("analyze_conflict");
+  kws = kws!=0;
+  if (conflict_state == csn
+      && conflict_token == tn
+      && keyword_switch == kws
+      && conflict_rule == cfn) {
+    return;
+  }
+  clear_conflict_expansion();
+  conflict_state = csn;
+  conflict_token = tn;
+  conflict_rule = cfn;
+  keyword_switch = kws;
+
+  flag = new_analysis(conflict_state, conflict_rule);
+  LOGV(flag);
+  assert(flag);
+  triples.reset();
+}
+
+static int equiv_token(Token charToken, Token ruleToken) {
+  if (charToken == ruleToken) {
+    return 1;
+  }
+  AgArray<Rule> expansion = charToken->expansionRuleList;
+  int n = expansion.size();
+  while (n--) {
+    if (expansion[n]->length() == 0 ) {
+      continue;
+    }
+    if (expansion[n]->length() > 1
+	&& (unsigned) expansion[n].token(1) != disregard_token) {
+      continue;
+    }
+    if (expansion[n].token(0) == ruleToken) {
+      return 1;
+    }
+  }
+  return 0;
+}
+
+static void validate_conflict_stack(void) {
+  int *ctp = list_base;
+  int cfsd = tis();
+  int sx = 0;
+  int nsn = ctp[sx++];
+  int kr = 0;
+  int kr_lim = new_rule_derivation->nt;
+  int rn, rx;
+
+  LOGSECTION("validate_conflict_stack");
+  LOGV(sx) LCV(cfsd);
+  if (sx >= cfsd) return;
+#ifdef INCLUDE_LOGGING
+  { 
+    int i;
+    for (i = 0; i < cfsd; i++) {
+      LOGV(i) LCV(ctp[i]);
+    }
+    int *sb = new_rule_derivation->sb;
+    for (i = 0; i < kr_lim; i += 2) {
+      LOGV(sb[i]) LCV(sb[i+1]);
+    }
+  }
+#endif
+  check_tsd(new_rule_derivation);
+  xtxf(new_rule_derivation,kr, &rn, &rx);
+  LOGV(rn) LCV(rx);
+  while (rx == 0) {
+    xtxf(new_rule_derivation, ++kr, &rn, &rx);
+    rx--;
+    LOGV(rn) LCV(rx);
+  }
+  if (sx < cfsd) {
+    while (1) {
+      Token charToken = map_state_number[nsn].char_token;
+      int sn = ctp[sx++];
+      LOGV(kr) LCV(kr_lim);
+      assert(kr <  kr_lim);
+      //assert(equiv_token(tn, lstptr(map_form_number[rn], tokens)[--rx]));
+      Token ruleToken = Rule(rn).token(--rx);
+      LOGV(rn) LCV(Rule(rn)->length()) LCV(rx) LCV(ruleToken);
+      //assert(equiv_token(charToken, Rule(rn).token(--rx)));
+      assert(equiv_token(charToken, ruleToken));
+      LOGV(nsn) LCV(sx) LCV(sn) LCV(charToken);
+      //int nextState = new_next_state(sn, ruleToken);
+      //LOGV(nextState);
+      //assert(nsn == nextState);
+      assert(equiv_token(charToken, transitionToken(sn,nsn)));
+      LOGV(sx) LCV(cfsd);
+      if (sx >= cfsd) {
+	break;
+      }
+      nsn = sn;
+      while (rx == 0) {
+	xtxf(new_rule_derivation, ++kr, &rn, &rx);
+	rx--;
+      }
+    }
+  }
+  LOGS("Exiting validate_conflict_stack");
+}
+
+/*
+static int check_leading_token(unsigned hlt, unsigned llt) {
+  unsigned *ltp = lstptr(map_token_number[hlt],leading_tokens);
+  int nlt = map_token_number[hlt].n_leading_tokens;
+  if (hlt == llt) {
+    return 1;
+  }
+  while (nlt--) {
+    if (ltp[nlt] == llt) {
+      return 1;
+    }
+  }
+  return 0;
+}
+*/
+
+static int reduction_state_path(int sn) {
+  LOGSECTION("reduction_state_path");
+  LOGV(sn);
+  int *is = dict_str(isht_dict, sn);
+  int nis = (*is++ - 1)/2;
+  while (nis --) {
+    int stateNumber = sn;
+    Rule rn = *is++;
+    unsigned rx = *is++;
+    LOGV(stateNumber) LCV(rn) LCV(rx);
+    iws();
+    while (rx < rn->length()) {
+      Token tn = rn.token(rx);
+      //int flag = check_leading_token(tn,conflict_token);
+      int flag = tn.isLeadingToken(conflict_token);
+      aws(stateNumber);
+      LOGV(stateNumber) LCV(tis());
+      if (flag) {
+        at(new_rule_derivation, (int) rn, rx);
+        return 1;
+      }
+      //if (!map_token_number[tn].zero_length_flag) {
+      //  break;
+      //}
+      if (!tn->zero_length_flag) {
+	break;
+      }
+      LOGV(rn) LCV(rx) LCV(sn) LCV(tn);
+      stateNumber = new_next_state(stateNumber,tn);
+      if (stateNumber == 0) {
+	break;
+      }
+      rx++;
+    }
+    rws();
+  }
+  return 0;
+}
+
+static int ct_accessible(int sn) {
+  LOGSECTION("ct_accessible");
+  LOGV(sn);
+  int *is = dict_str(isht_dict, sn);
+  int nis = (*is++ - 1)/2;
+  while (nis--) {
+    Rule rn = *is++;
+    unsigned rx = *is++;
+    while (rx < rn->length()) {
+      //int tn = lstptr(map_form_number[rn], tokens)[rx];
+      Token tn = rn.token(rx);
+      //int flag = check_leading_token(tn,conflict_token);
+      int flag = tn.isLeadingToken(conflict_token);
+      if (flag) {
+        return 1;
+      }
+      //if (!map_token_number[tn].zero_length_flag) {
+      //  break;
+      //}
+      if (!tn->zero_length_flag) {
+	break;
+      }
+      LOGV(rn) LCV(rx) LCV(sn) LCV(tn);
+      sn = new_next_state(sn,tn);
+      if (sn == 0) {
+	break;
+      }
+      rx++;
+    }
+  }
+  return 0;
+}
+
+static int anomalous_keyword_path(int sn) {
+  int *is = dict_str(isht_dict, sn);
+  int nis = (*is++ - 1)/2;
+  while (nis--) {
+    Rule rn = *is++;
+    unsigned rx = *is++;
+    if (rx >= rn->non_vanishing_length) {
+      return 0;
+    }
+  }
+  is = dict_str(isht_dict, sn);
+  nis = (*is++ - 1)/2;
+  while (nis--) {
+    Rule rn = *is++;
+    unsigned rx = *is++;
+    if (rx < rn->length()) {
+      sws(sn);
+      at(new_rule_derivation, (int) rn, rx);
+      return 1;
+    }
+  }
+  return 0;
+}
+
+static int hw_reduce(int sn, int rn, int rx);
+
+
+static int nulls_remaining(int sn, int nsn) {
+  LOGSECTION("nulls_remaining");
+  LOGV(sn) LCV(nsn);
+  int *is = dict_str(isht_dict, nsn);
+  int nis = (*is++ - 1)/2;
+  while (nis --) {
+    Rule rn = *is++;
+    unsigned rx = *is++;
+    if (rx < rn->non_vanishing_length) {
+      continue;
+    }
+    at(new_rule_derivation, (int) rn, rx);
+    if (hw_reduce(sn, rn, rx - 1)) {
+      return 1;
+    }
+    new_rule_derivation->nt--;
+  }
+  return 0;
+}
+
+static int hw_reduce(int sn, int rn, int rx) {
+  LOGSECTION("hw_reduce");
+  //if (triples.insert(IntegerTriple(sn, rn, rx))) {
+  //  return 0;
+  //}
+  if (triples.insert(Triple<int>(sn, rn, rx))) {
+    return 0;
+  }
+  LOGV(sn) LCV(rn) LCV(rx);
+  if (rx) {
+    int i, j;
+    unsigned *p = lstptr(map_state_number[sn],previous_states);
+    int nt = map_state_number[sn].n_previous_states;
+    //int *sorted = local_array(nt, int);
+    LocalArray<int> sorted(nt);
+    for (i = 0; i < nt; i++) {
+      sorted[i] = p[i];
+      LOGV(p[i]);
+    }
+    for (i = 0; i < nt; i++) {
+      //aws(p[i]);
+      //if (xws(sorted[i])) continue;
+      for (j = i+1; j < nt; j++) {
+        if (sorted[i] > sorted[j]) {
+          int temp = sorted[i];
+          sorted[i] = sorted[j];
+          sorted[j] = temp;
+        }
+      }
+      LOGV(sorted[i]);
+      //if (xws(sorted[i])) continue;
+      aws(sorted[i]);
+
+#ifdef INCLUDE_LOGGING
+      {
+	for (int index = 0; index < tis(); index++) {
+	  LOGV(index) LCV(list_base[index]);
+	}
+      }
+      LOGV(sorted[i]) LCV(tis());
+#endif
+
+      if (hw_reduce(sorted[i], rn, rx-1)) {
+	return 1;
+      }
+      fws();
+    }
+    return 0;
+  }
+
+
+  {
+    int *sp = ibnfs + ibnfb[rn];
+    int m = ibnfn[rn];
+    int i;
+
+    for (i = 0; i < m; i++) {
+      unsigned *gtp = lstptr(map_state_number[sn],gotos);
+      int ngt  = map_state_number[sn].n_gotos;
+      unsigned tn = sp[i];
+      int nsn, crn, crx;
+      int j, k;
+
+      for (j = k = 0; j < ngt; j++, k+=2) {
+	if (gtp[k] == tn) break;
+      }
+
+      if (j < ngt) {
+        nsn = gtp[k+1];
+        if (keyword_switch) {
+          if (ct_accessible(nsn)) {
+	    continue;
+	  }
+          if (nulls_remaining(sn, nsn)) {
+	    return 1;
+	  }
+          if (anomalous_keyword_path(nsn)) {
+	    return 1;
+	  }
+        }
+        else {
+          if (reduction_state_path(nsn)) {
+	    return 1;
+	  }
+          if (nulls_remaining(sn, nsn)) {
+	    return 1;
+	  }
+        }
+        continue;
+      }
+      gtp = lstptr(map_state_number[sn], completions);
+      ngt = map_state_number[sn].n_completions;
+      for (j = k = 0; j < ngt; j++, k += 2) {
+	if (gtp[k] == tn) {
+	  break;
+	}
+      }
+      if (j == ngt) {
+	continue;
+      }
+      crn = gtp[k+1];
+      crx = Rule(crn)->length();
+      int tx = new_rule_derivation->nt;
+      while (tx--) {
+	int trn, trx;
+	xtx(new_rule_derivation, tx, &trn, &trx);
+	if (trn == crn && trx == crx) {
+	  break;
+	}
+      }
+      if (tx >= 0) {
+	continue;
+      }
+      at(new_rule_derivation, crn, crx);
+      if (hw_reduce(sn, crn, crx - 1)) {
+	return 1;
+      }
+      new_rule_derivation->nt--;
+    }
+  }
+  return 0;
+}
+
+/*
+static void trim_ct(void) {
+  int nt = new_rule_derivation->nt;
+  int k = nt - 1;
+  int i, j;
+  LOGSECTION("trim_ct");
+  AgArray<int> list(buildStaticList());
+  LOGV(list.size());
+  int *cs = list.pointer();
+  int ncs = list.size();
+// cs is list of states in reverse order
+
+  int rn,rx, tn;
+  unsigned *rl;
+  int nrl;
+
+  check_tsd(new_rule_derivation);
+  xtxf(new_rule_derivation, k, &rn, &rx);
+  tn = lstptr(map_form_number[rn],tokens)[rx-1];
+  rl = lstptr(map_token_number[tn],forms);
+  nrl = map_token_number[tn].n_forms;
+
+  iws();
+  while (--k >= 0) {
+
+    for (i = 0; i < k; i++) {
+      unsigned srn, srx;
+      int psx;
+
+      xtxf(new_rule_derivation, i, &srn, &srx);
+      for (j = 0; j < nrl; j++) if (srn == rl[j]) break;
+      if (j == nrl) continue;
+      psx = ncs;
+      for (j = i+1; j < k + 1; j++) {
+        int jrn, jrx;
+        xtxf(new_rule_derivation, j, &jrn, &jrx);
+        psx -= jrx-1;
+      }
+      if (psx <= 1) continue;
+      if (cs[ncs-1] != cs[psx-1]) continue;
+      ncs = psx;
+      break;
+    }
+    if (i < k) {
+      memmove(&new_rule_derivation->sb[2*(i+1)],
+              &new_rule_derivation->sb[2*(k+1)],
+              (nt - k - 1)*2*sizeof(int));
+      nt -= k-i;
+      new_rule_derivation->nt = nt;
+      k = i;
+    }
+    if (k <= 0) break;
+    xtxf(new_rule_derivation, k, &rn, &rx);
+    tn = lstptr(map_form_number[rn],tokens)[--rx];
+    rl = lstptr(map_token_number[tn],forms);
+    nrl = map_token_number[tn].n_forms;
+    while (rx--) aws(cs[--ncs]);
+  }
+  while (ncs--) aws(cs[ncs]);
+  list = buildStaticList();
+  cs = list.pointer();
+  ncs = list.size();
+  iws();
+  while (ncs--) aws(cs[ncs]);
+}
+*/
+
+static int new_analysis(int sn, int rn) {
+  int length = Rule(rn)->length();
+
+  LOGSECTION("new_analysis");
+  LOGV(sn) LCV(rn);
+  new_rule_derivation = init_tsd(2);
+  at(new_rule_derivation, rn, length);
+  sws(sn);
+  if (hw_reduce(sn,rn,length)) {
+    int fs;
+
+    new_reduction_stack = buildStaticList();
+    new_reduction_stack_depth = new_reduction_stack.size();
+    fs = fws();
+    //if (tis()) trim_ct();
+    validate_conflict_stack();
+    sws(fs);
+    simple_path(0,fs);
+    concat_list();
+    new_conflict_stack = buildStaticList();
+    new_conflict_stack_depth = new_conflict_stack.size();
+    iws();
+    while(new_reduction_stack_depth--) {
+      aws(new_reduction_stack[new_reduction_stack_depth]);
+    }
+    sws(fs);
+    simple_path(0, fs);
+    concat_list();
+    new_reduction_stack = buildStaticList();
+    new_reduction_stack_depth = new_reduction_stack.size();
+
+    if (!keyword_switch) {
+      int fn, fx, k;
+      k = new_rule_derivation->nt - 1;
+      xtxf(new_rule_derivation, k, &fn, &fx);
+      token_derivation = init_tsd(2);
+      tried = ZALLOCATE(ntkns+1, char);
+      //memset(tried,0,ntkns+1);
+      //x9x(lstptr(map_form_number[fn], tokens)[fx]);
+      x9x(Rule(fn).token(fx));
+      at(token_derivation, fn, fx);
+      DEALLOCATE(tried);
+    }
+    return 1;
+  }
+  rws();
+  return 0;
+}
+