view anagram/agcore/nckwtr.cpp @ 24:a4899cdfc2d6 default tip

Obfuscate the regexps to strip off the IBM compiler's copyright banners. I don't want bots scanning github to think they're real copyright notices because that could cause real problems.
author David A. Holland
date Mon, 13 Jun 2022 00:40:23 -0400
parents 13d2b8934445
children
line wrap: on
line source

/*
 * AnaGram, A System for Syntax Directed Programming
 * Copyright 1993-2002 Parsifal Software. All Rights Reserved.
 * See the file COPYING for license and usage terms.
 *
 * nckwtr.cpp - Conflict/Keyword Anomaly Trace Utility
 */


#include "agbaltree.h"
#include "arrays.h"
#include "assert.h"
#include "cd.h"
#include "data.h"
#include "dict.h"
#include "lexeme.h"
#include "myalloc.h"
#include "nckwtr.h"
#include "q1glbl.h"
#include "rule.h"
#include "stacks.h"
#include "token.h"
#include "tsd.h"

//#define INCLUDE_LOGGING
#include "log.h"

static int conflict_rule;
static int conflict_state;


/*
 * Strategy
 *
 * The data given are two state numbers: the conflict state and the
 * reduction state. In addition, the conflict rule is given.
 *
 * The approach is to work backward from the reduction state to a
 * state that leads to the conflict state.
 *
 * If the characteristic token for the reduction state includes the
 * conflict rule as an expansion rule, then we don't have to worry
 * about null productions. Otherwise, there has been a shift of some
 * zero-length production to get us to the reduction state.
 *
 * The first routine to build is a function which will call itself
 * recursively to find the fork state. It will fail if it cannot find
 * a fork state from its present state.
 */

static int keyword_switch = 0;

static AgBalancedTree<Triple<int> > triples;

AgArray<int> new_reduction_stack;
int new_reduction_stack_depth;
AgArray<int> new_conflict_stack;
int new_conflict_stack_depth;
tsd *new_rule_derivation;


static int simple_path(int from, int to) {
  unsigned *p = lstptr(map_state_number[to],previous_states);
  int nt = map_state_number[to].n_previous_states;
  int i;

  if (from == to) return 1;
  for (i = 0; i < nt; i++) {
    if (xws(p[i])) {
      continue;
    }
    if (simple_path(from, p[i])) {
      return 1;
    }
    fws();
  }
  return 0;
}

void clear_conflict_expansion(void) {
  if (conflict_token == 0) {
    return;
  }
  new_rule_derivation = delete_tsd(new_rule_derivation);
  token_derivation = delete_tsd(token_derivation);
  new_reduction_stack = AgArray<int>();
  new_conflict_stack = AgArray<int>();
  conflict_token = 0;
}

static int new_analysis(int sn, int rn);

void analyze_conflict(int csn, int tn, int cfn, int kws) {
  int flag;

  LOGSECTION("analyze_conflict");
  kws = kws!=0;
  if (conflict_state == csn
      && conflict_token == tn
      && keyword_switch == kws
      && conflict_rule == cfn) {
    return;
  }
  clear_conflict_expansion();
  conflict_state = csn;
  conflict_token = tn;
  conflict_rule = cfn;
  keyword_switch = kws;

  flag = new_analysis(conflict_state, conflict_rule);
  LOGV(flag);
  assert(flag);
  triples.reset();
}

static int equiv_token(Token charToken, Token ruleToken) {
  if (charToken == ruleToken) {
    return 1;
  }
  AgArray<Rule> expansion = charToken->expansionRuleList;
  int n = expansion.size();
  while (n--) {
    if (expansion[n]->length() == 0 ) {
      continue;
    }
    if (expansion[n]->length() > 1
	&& (unsigned) expansion[n].token(1) != disregard_token) {
      continue;
    }
    if (expansion[n].token(0) == ruleToken) {
      return 1;
    }
  }
  return 0;
}

static void validate_conflict_stack(void) {
  int *ctp = list_base;
  int cfsd = tis();
  int sx = 0;
  int nsn = ctp[sx++];
  int kr = 0;
  int kr_lim = new_rule_derivation->nt;
  int rn, rx;

  LOGSECTION("validate_conflict_stack");
  LOGV(sx) LCV(cfsd);
  if (sx >= cfsd) return;
#ifdef INCLUDE_LOGGING
  { 
    int i;
    for (i = 0; i < cfsd; i++) {
      LOGV(i) LCV(ctp[i]);
    }
    int *sb = new_rule_derivation->sb;
    for (i = 0; i < kr_lim; i += 2) {
      LOGV(sb[i]) LCV(sb[i+1]);
    }
  }
#endif
  check_tsd(new_rule_derivation);
  xtxf(new_rule_derivation,kr, &rn, &rx);
  LOGV(rn) LCV(rx);
  while (rx == 0) {
    xtxf(new_rule_derivation, ++kr, &rn, &rx);
    rx--;
    LOGV(rn) LCV(rx);
  }
  if (sx < cfsd) {
    while (1) {
      Token charToken = map_state_number[nsn].char_token;
      int sn = ctp[sx++];
      LOGV(kr) LCV(kr_lim);
      assert(kr <  kr_lim);
      //assert(equiv_token(tn, lstptr(map_form_number[rn], tokens)[--rx]));
      Token ruleToken = Rule(rn).token(--rx);
      LOGV(rn) LCV(Rule(rn)->length()) LCV(rx) LCV(ruleToken);
      //assert(equiv_token(charToken, Rule(rn).token(--rx)));
      assert(equiv_token(charToken, ruleToken));
      LOGV(nsn) LCV(sx) LCV(sn) LCV(charToken);
      //int nextState = new_next_state(sn, ruleToken);
      //LOGV(nextState);
      //assert(nsn == nextState);
      assert(equiv_token(charToken, transitionToken(sn,nsn)));
      LOGV(sx) LCV(cfsd);
      if (sx >= cfsd) {
	break;
      }
      nsn = sn;
      while (rx == 0) {
	xtxf(new_rule_derivation, ++kr, &rn, &rx);
	rx--;
      }
    }
  }
  LOGS("Exiting validate_conflict_stack");
}

/*
static int check_leading_token(unsigned hlt, unsigned llt) {
  unsigned *ltp = lstptr(map_token_number[hlt],leading_tokens);
  int nlt = map_token_number[hlt].n_leading_tokens;
  if (hlt == llt) {
    return 1;
  }
  while (nlt--) {
    if (ltp[nlt] == llt) {
      return 1;
    }
  }
  return 0;
}
*/

static int reduction_state_path(int sn) {
  LOGSECTION("reduction_state_path");
  LOGV(sn);
  int *is = dict_str(isht_dict, sn);
  int nis = (*is++ - 1)/2;
  while (nis --) {
    int stateNumber = sn;
    Rule rn = *is++;
    unsigned rx = *is++;
    LOGV(stateNumber) LCV(rn) LCV(rx);
    iws();
    while (rx < rn->length()) {
      Token tn = rn.token(rx);
      //int flag = check_leading_token(tn,conflict_token);
      int flag = tn.isLeadingToken(conflict_token);
      aws(stateNumber);
      LOGV(stateNumber) LCV(tis());
      if (flag) {
        at(new_rule_derivation, (int) rn, rx);
        return 1;
      }
      //if (!map_token_number[tn].zero_length_flag) {
      //  break;
      //}
      if (!tn->zero_length_flag) {
	break;
      }
      LOGV(rn) LCV(rx) LCV(sn) LCV(tn);
      stateNumber = new_next_state(stateNumber,tn);
      if (stateNumber == 0) {
	break;
      }
      rx++;
    }
    rws();
  }
  return 0;
}

static int ct_accessible(int sn) {
  LOGSECTION("ct_accessible");
  LOGV(sn);
  int *is = dict_str(isht_dict, sn);
  int nis = (*is++ - 1)/2;
  while (nis--) {
    Rule rn = *is++;
    unsigned rx = *is++;
    while (rx < rn->length()) {
      //int tn = lstptr(map_form_number[rn], tokens)[rx];
      Token tn = rn.token(rx);
      //int flag = check_leading_token(tn,conflict_token);
      int flag = tn.isLeadingToken(conflict_token);
      if (flag) {
        return 1;
      }
      //if (!map_token_number[tn].zero_length_flag) {
      //  break;
      //}
      if (!tn->zero_length_flag) {
	break;
      }
      LOGV(rn) LCV(rx) LCV(sn) LCV(tn);
      sn = new_next_state(sn,tn);
      if (sn == 0) {
	break;
      }
      rx++;
    }
  }
  return 0;
}

static int anomalous_keyword_path(int sn) {
  int *is = dict_str(isht_dict, sn);
  int nis = (*is++ - 1)/2;
  while (nis--) {
    Rule rn = *is++;
    unsigned rx = *is++;
    if (rx >= rn->non_vanishing_length) {
      return 0;
    }
  }
  is = dict_str(isht_dict, sn);
  nis = (*is++ - 1)/2;
  while (nis--) {
    Rule rn = *is++;
    unsigned rx = *is++;
    if (rx < rn->length()) {
      sws(sn);
      at(new_rule_derivation, (int) rn, rx);
      return 1;
    }
  }
  return 0;
}

static int hw_reduce(int sn, int rn, int rx);


static int nulls_remaining(int sn, int nsn) {
  LOGSECTION("nulls_remaining");
  LOGV(sn) LCV(nsn);
  int *is = dict_str(isht_dict, nsn);
  int nis = (*is++ - 1)/2;
  while (nis --) {
    Rule rn = *is++;
    unsigned rx = *is++;
    if (rx < rn->non_vanishing_length) {
      continue;
    }
    at(new_rule_derivation, (int) rn, rx);
    if (hw_reduce(sn, rn, rx - 1)) {
      return 1;
    }
    new_rule_derivation->nt--;
  }
  return 0;
}

static int hw_reduce(int sn, int rn, int rx) {
  LOGSECTION("hw_reduce");
  //if (triples.insert(IntegerTriple(sn, rn, rx))) {
  //  return 0;
  //}
  if (triples.insert(Triple<int>(sn, rn, rx))) {
    return 0;
  }
  LOGV(sn) LCV(rn) LCV(rx);
  if (rx) {
    int i, j;
    unsigned *p = lstptr(map_state_number[sn],previous_states);
    int nt = map_state_number[sn].n_previous_states;
    //int *sorted = local_array(nt, int);
    LocalArray<int> sorted(nt);
    for (i = 0; i < nt; i++) {
      sorted[i] = p[i];
      LOGV(p[i]);
    }
    for (i = 0; i < nt; i++) {
      //aws(p[i]);
      //if (xws(sorted[i])) continue;
      for (j = i+1; j < nt; j++) {
        if (sorted[i] > sorted[j]) {
          int temp = sorted[i];
          sorted[i] = sorted[j];
          sorted[j] = temp;
        }
      }
      LOGV(sorted[i]);
      //if (xws(sorted[i])) continue;
      aws(sorted[i]);

#ifdef INCLUDE_LOGGING
      {
	for (int index = 0; index < tis(); index++) {
	  LOGV(index) LCV(list_base[index]);
	}
      }
      LOGV(sorted[i]) LCV(tis());
#endif

      if (hw_reduce(sorted[i], rn, rx-1)) {
	return 1;
      }
      fws();
    }
    return 0;
  }


  {
    int *sp = ibnfs + ibnfb[rn];
    int m = ibnfn[rn];
    int i;

    for (i = 0; i < m; i++) {
      unsigned *gtp = lstptr(map_state_number[sn],gotos);
      int ngt  = map_state_number[sn].n_gotos;
      unsigned tn = sp[i];
      int nsn, crn, crx;
      int j, k;

      for (j = k = 0; j < ngt; j++, k+=2) {
	if (gtp[k] == tn) break;
      }

      if (j < ngt) {
        nsn = gtp[k+1];
        if (keyword_switch) {
          if (ct_accessible(nsn)) {
	    continue;
	  }
          if (nulls_remaining(sn, nsn)) {
	    return 1;
	  }
          if (anomalous_keyword_path(nsn)) {
	    return 1;
	  }
        }
        else {
          if (reduction_state_path(nsn)) {
	    return 1;
	  }
          if (nulls_remaining(sn, nsn)) {
	    return 1;
	  }
        }
        continue;
      }
      gtp = lstptr(map_state_number[sn], completions);
      ngt = map_state_number[sn].n_completions;
      for (j = k = 0; j < ngt; j++, k += 2) {
	if (gtp[k] == tn) {
	  break;
	}
      }
      if (j == ngt) {
	continue;
      }
      crn = gtp[k+1];
      crx = Rule(crn)->length();
      int tx = new_rule_derivation->nt;
      while (tx--) {
	int trn, trx;
	xtx(new_rule_derivation, tx, &trn, &trx);
	if (trn == crn && trx == crx) {
	  break;
	}
      }
      if (tx >= 0) {
	continue;
      }
      at(new_rule_derivation, crn, crx);
      if (hw_reduce(sn, crn, crx - 1)) {
	return 1;
      }
      new_rule_derivation->nt--;
    }
  }
  return 0;
}

/*
static void trim_ct(void) {
  int nt = new_rule_derivation->nt;
  int k = nt - 1;
  int i, j;
  LOGSECTION("trim_ct");
  AgArray<int> list(buildStaticList());
  LOGV(list.size());
  int *cs = list.pointer();
  int ncs = list.size();
// cs is list of states in reverse order

  int rn,rx, tn;
  unsigned *rl;
  int nrl;

  check_tsd(new_rule_derivation);
  xtxf(new_rule_derivation, k, &rn, &rx);
  tn = lstptr(map_form_number[rn],tokens)[rx-1];
  rl = lstptr(map_token_number[tn],forms);
  nrl = map_token_number[tn].n_forms;

  iws();
  while (--k >= 0) {

    for (i = 0; i < k; i++) {
      unsigned srn, srx;
      int psx;

      xtxf(new_rule_derivation, i, &srn, &srx);
      for (j = 0; j < nrl; j++) if (srn == rl[j]) break;
      if (j == nrl) continue;
      psx = ncs;
      for (j = i+1; j < k + 1; j++) {
        int jrn, jrx;
        xtxf(new_rule_derivation, j, &jrn, &jrx);
        psx -= jrx-1;
      }
      if (psx <= 1) continue;
      if (cs[ncs-1] != cs[psx-1]) continue;
      ncs = psx;
      break;
    }
    if (i < k) {
      memmove(&new_rule_derivation->sb[2*(i+1)],
              &new_rule_derivation->sb[2*(k+1)],
              (nt - k - 1)*2*sizeof(int));
      nt -= k-i;
      new_rule_derivation->nt = nt;
      k = i;
    }
    if (k <= 0) break;
    xtxf(new_rule_derivation, k, &rn, &rx);
    tn = lstptr(map_form_number[rn],tokens)[--rx];
    rl = lstptr(map_token_number[tn],forms);
    nrl = map_token_number[tn].n_forms;
    while (rx--) aws(cs[--ncs]);
  }
  while (ncs--) aws(cs[ncs]);
  list = buildStaticList();
  cs = list.pointer();
  ncs = list.size();
  iws();
  while (ncs--) aws(cs[ncs]);
}
*/

static int new_analysis(int sn, int rn) {
  int length = Rule(rn)->length();

  LOGSECTION("new_analysis");
  LOGV(sn) LCV(rn);
  new_rule_derivation = init_tsd(2);
  at(new_rule_derivation, rn, length);
  sws(sn);
  if (hw_reduce(sn,rn,length)) {
    int fs;

    new_reduction_stack = buildStaticList();
    new_reduction_stack_depth = new_reduction_stack.size();
    fs = fws();
    //if (tis()) trim_ct();
    validate_conflict_stack();
    sws(fs);
    simple_path(0,fs);
    concat_list();
    new_conflict_stack = buildStaticList();
    new_conflict_stack_depth = new_conflict_stack.size();
    iws();
    while(new_reduction_stack_depth--) {
      aws(new_reduction_stack[new_reduction_stack_depth]);
    }
    sws(fs);
    simple_path(0, fs);
    concat_list();
    new_reduction_stack = buildStaticList();
    new_reduction_stack_depth = new_reduction_stack.size();

    if (!keyword_switch) {
      int fn, fx, k;
      k = new_rule_derivation->nt - 1;
      xtxf(new_rule_derivation, k, &fn, &fx);
      token_derivation = init_tsd(2);
      tried = ZALLOCATE(ntkns+1, char);
      //memset(tried,0,ntkns+1);
      //x9x(lstptr(map_form_number[fn], tokens)[fx]);
      x9x(Rule(fn).token(fx));
      at(token_derivation, fn, fx);
      DEALLOCATE(tried);
    }
    return 1;
  }
  rws();
  return 0;
}