view anagram/agcore/nckwtr.cpp @ 21:1c9dac05d040

Add lint-style FALLTHROUGH annotations to fallthrough cases. (in the parse engine and thus the output code) Document this, because the old output causes warnings with gcc10.
author David A. Holland
date Mon, 13 Jun 2022 00:04:38 -0400
parents 13d2b8934445
children
line wrap: on
line source

/*
 * AnaGram, A System for Syntax Directed Programming
 * Copyright 1993-2002 Parsifal Software. All Rights Reserved.
 * See the file COPYING for license and usage terms.
 *
 * nckwtr.cpp - Conflict/Keyword Anomaly Trace Utility
 */


#include "agbaltree.h"
#include "arrays.h"
#include "assert.h"
#include "cd.h"
#include "data.h"
#include "dict.h"
#include "lexeme.h"
#include "myalloc.h"
#include "nckwtr.h"
#include "q1glbl.h"
#include "rule.h"
#include "stacks.h"
#include "token.h"
#include "tsd.h"

//#define INCLUDE_LOGGING
#include "log.h"

static int conflict_rule;
static int conflict_state;


/*
 * Strategy
 *
 * The data given are two state numbers: the conflict state and the
 * reduction state. In addition, the conflict rule is given.
 *
 * The approach is to work backward from the reduction state to a
 * state that leads to the conflict state.
 *
 * If the characteristic token for the reduction state includes the
 * conflict rule as an expansion rule, then we don't have to worry
 * about null productions. Otherwise, there has been a shift of some
 * zero-length production to get us to the reduction state.
 *
 * The first routine to build is a function which will call itself
 * recursively to find the fork state. It will fail if it cannot find
 * a fork state from its present state.
 */

static int keyword_switch = 0;

static AgBalancedTree<Triple<int> > triples;

AgArray<int> new_reduction_stack;
int new_reduction_stack_depth;
AgArray<int> new_conflict_stack;
int new_conflict_stack_depth;
tsd *new_rule_derivation;


static int simple_path(int from, int to) {
  unsigned *p = lstptr(map_state_number[to],previous_states);
  int nt = map_state_number[to].n_previous_states;
  int i;

  if (from == to) return 1;
  for (i = 0; i < nt; i++) {
    if (xws(p[i])) {
      continue;
    }
    if (simple_path(from, p[i])) {
      return 1;
    }
    fws();
  }
  return 0;
}

void clear_conflict_expansion(void) {
  if (conflict_token == 0) {
    return;
  }
  new_rule_derivation = delete_tsd(new_rule_derivation);
  token_derivation = delete_tsd(token_derivation);
  new_reduction_stack = AgArray<int>();
  new_conflict_stack = AgArray<int>();
  conflict_token = 0;
}

static int new_analysis(int sn, int rn);

void analyze_conflict(int csn, int tn, int cfn, int kws) {
  int flag;

  LOGSECTION("analyze_conflict");
  kws = kws!=0;
  if (conflict_state == csn
      && conflict_token == tn
      && keyword_switch == kws
      && conflict_rule == cfn) {
    return;
  }
  clear_conflict_expansion();
  conflict_state = csn;
  conflict_token = tn;
  conflict_rule = cfn;
  keyword_switch = kws;

  flag = new_analysis(conflict_state, conflict_rule);
  LOGV(flag);
  assert(flag);
  triples.reset();
}

static int equiv_token(Token charToken, Token ruleToken) {
  if (charToken == ruleToken) {
    return 1;
  }
  AgArray<Rule> expansion = charToken->expansionRuleList;
  int n = expansion.size();
  while (n--) {
    if (expansion[n]->length() == 0 ) {
      continue;
    }
    if (expansion[n]->length() > 1
	&& (unsigned) expansion[n].token(1) != disregard_token) {
      continue;
    }
    if (expansion[n].token(0) == ruleToken) {
      return 1;
    }
  }
  return 0;
}

static void validate_conflict_stack(void) {
  int *ctp = list_base;
  int cfsd = tis();
  int sx = 0;
  int nsn = ctp[sx++];
  int kr = 0;
  int kr_lim = new_rule_derivation->nt;
  int rn, rx;

  LOGSECTION("validate_conflict_stack");
  LOGV(sx) LCV(cfsd);
  if (sx >= cfsd) return;
#ifdef INCLUDE_LOGGING
  { 
    int i;
    for (i = 0; i < cfsd; i++) {
      LOGV(i) LCV(ctp[i]);
    }
    int *sb = new_rule_derivation->sb;
    for (i = 0; i < kr_lim; i += 2) {
      LOGV(sb[i]) LCV(sb[i+1]);
    }
  }
#endif
  check_tsd(new_rule_derivation);
  xtxf(new_rule_derivation,kr, &rn, &rx);
  LOGV(rn) LCV(rx);
  while (rx == 0) {
    xtxf(new_rule_derivation, ++kr, &rn, &rx);
    rx--;
    LOGV(rn) LCV(rx);
  }
  if (sx < cfsd) {
    while (1) {
      Token charToken = map_state_number[nsn].char_token;
      int sn = ctp[sx++];
      LOGV(kr) LCV(kr_lim);
      assert(kr <  kr_lim);
      //assert(equiv_token(tn, lstptr(map_form_number[rn], tokens)[--rx]));
      Token ruleToken = Rule(rn).token(--rx);
      LOGV(rn) LCV(Rule(rn)->length()) LCV(rx) LCV(ruleToken);
      //assert(equiv_token(charToken, Rule(rn).token(--rx)));
      assert(equiv_token(charToken, ruleToken));
      LOGV(nsn) LCV(sx) LCV(sn) LCV(charToken);
      //int nextState = new_next_state(sn, ruleToken);
      //LOGV(nextState);
      //assert(nsn == nextState);
      assert(equiv_token(charToken, transitionToken(sn,nsn)));
      LOGV(sx) LCV(cfsd);
      if (sx >= cfsd) {
	break;
      }
      nsn = sn;
      while (rx == 0) {
	xtxf(new_rule_derivation, ++kr, &rn, &rx);
	rx--;
      }
    }
  }
  LOGS("Exiting validate_conflict_stack");
}

/*
static int check_leading_token(unsigned hlt, unsigned llt) {
  unsigned *ltp = lstptr(map_token_number[hlt],leading_tokens);
  int nlt = map_token_number[hlt].n_leading_tokens;
  if (hlt == llt) {
    return 1;
  }
  while (nlt--) {
    if (ltp[nlt] == llt) {
      return 1;
    }
  }
  return 0;
}
*/

static int reduction_state_path(int sn) {
  LOGSECTION("reduction_state_path");
  LOGV(sn);
  int *is = dict_str(isht_dict, sn);
  int nis = (*is++ - 1)/2;
  while (nis --) {
    int stateNumber = sn;
    Rule rn = *is++;
    unsigned rx = *is++;
    LOGV(stateNumber) LCV(rn) LCV(rx);
    iws();
    while (rx < rn->length()) {
      Token tn = rn.token(rx);
      //int flag = check_leading_token(tn,conflict_token);
      int flag = tn.isLeadingToken(conflict_token);
      aws(stateNumber);
      LOGV(stateNumber) LCV(tis());
      if (flag) {
        at(new_rule_derivation, (int) rn, rx);
        return 1;
      }
      //if (!map_token_number[tn].zero_length_flag) {
      //  break;
      //}
      if (!tn->zero_length_flag) {
	break;
      }
      LOGV(rn) LCV(rx) LCV(sn) LCV(tn);
      stateNumber = new_next_state(stateNumber,tn);
      if (stateNumber == 0) {
	break;
      }
      rx++;
    }
    rws();
  }
  return 0;
}

static int ct_accessible(int sn) {
  LOGSECTION("ct_accessible");
  LOGV(sn);
  int *is = dict_str(isht_dict, sn);
  int nis = (*is++ - 1)/2;
  while (nis--) {
    Rule rn = *is++;
    unsigned rx = *is++;
    while (rx < rn->length()) {
      //int tn = lstptr(map_form_number[rn], tokens)[rx];
      Token tn = rn.token(rx);
      //int flag = check_leading_token(tn,conflict_token);
      int flag = tn.isLeadingToken(conflict_token);
      if (flag) {
        return 1;
      }
      //if (!map_token_number[tn].zero_length_flag) {
      //  break;
      //}
      if (!tn->zero_length_flag) {
	break;
      }
      LOGV(rn) LCV(rx) LCV(sn) LCV(tn);
      sn = new_next_state(sn,tn);
      if (sn == 0) {
	break;
      }
      rx++;
    }
  }
  return 0;
}

static int anomalous_keyword_path(int sn) {
  int *is = dict_str(isht_dict, sn);
  int nis = (*is++ - 1)/2;
  while (nis--) {
    Rule rn = *is++;
    unsigned rx = *is++;
    if (rx >= rn->non_vanishing_length) {
      return 0;
    }
  }
  is = dict_str(isht_dict, sn);
  nis = (*is++ - 1)/2;
  while (nis--) {
    Rule rn = *is++;
    unsigned rx = *is++;
    if (rx < rn->length()) {
      sws(sn);
      at(new_rule_derivation, (int) rn, rx);
      return 1;
    }
  }
  return 0;
}

static int hw_reduce(int sn, int rn, int rx);


static int nulls_remaining(int sn, int nsn) {
  LOGSECTION("nulls_remaining");
  LOGV(sn) LCV(nsn);
  int *is = dict_str(isht_dict, nsn);
  int nis = (*is++ - 1)/2;
  while (nis --) {
    Rule rn = *is++;
    unsigned rx = *is++;
    if (rx < rn->non_vanishing_length) {
      continue;
    }
    at(new_rule_derivation, (int) rn, rx);
    if (hw_reduce(sn, rn, rx - 1)) {
      return 1;
    }
    new_rule_derivation->nt--;
  }
  return 0;
}

static int hw_reduce(int sn, int rn, int rx) {
  LOGSECTION("hw_reduce");
  //if (triples.insert(IntegerTriple(sn, rn, rx))) {
  //  return 0;
  //}
  if (triples.insert(Triple<int>(sn, rn, rx))) {
    return 0;
  }
  LOGV(sn) LCV(rn) LCV(rx);
  if (rx) {
    int i, j;
    unsigned *p = lstptr(map_state_number[sn],previous_states);
    int nt = map_state_number[sn].n_previous_states;
    //int *sorted = local_array(nt, int);
    LocalArray<int> sorted(nt);
    for (i = 0; i < nt; i++) {
      sorted[i] = p[i];
      LOGV(p[i]);
    }
    for (i = 0; i < nt; i++) {
      //aws(p[i]);
      //if (xws(sorted[i])) continue;
      for (j = i+1; j < nt; j++) {
        if (sorted[i] > sorted[j]) {
          int temp = sorted[i];
          sorted[i] = sorted[j];
          sorted[j] = temp;
        }
      }
      LOGV(sorted[i]);
      //if (xws(sorted[i])) continue;
      aws(sorted[i]);

#ifdef INCLUDE_LOGGING
      {
	for (int index = 0; index < tis(); index++) {
	  LOGV(index) LCV(list_base[index]);
	}
      }
      LOGV(sorted[i]) LCV(tis());
#endif

      if (hw_reduce(sorted[i], rn, rx-1)) {
	return 1;
      }
      fws();
    }
    return 0;
  }


  {
    int *sp = ibnfs + ibnfb[rn];
    int m = ibnfn[rn];
    int i;

    for (i = 0; i < m; i++) {
      unsigned *gtp = lstptr(map_state_number[sn],gotos);
      int ngt  = map_state_number[sn].n_gotos;
      unsigned tn = sp[i];
      int nsn, crn, crx;
      int j, k;

      for (j = k = 0; j < ngt; j++, k+=2) {
	if (gtp[k] == tn) break;
      }

      if (j < ngt) {
        nsn = gtp[k+1];
        if (keyword_switch) {
          if (ct_accessible(nsn)) {
	    continue;
	  }
          if (nulls_remaining(sn, nsn)) {
	    return 1;
	  }
          if (anomalous_keyword_path(nsn)) {
	    return 1;
	  }
        }
        else {
          if (reduction_state_path(nsn)) {
	    return 1;
	  }
          if (nulls_remaining(sn, nsn)) {
	    return 1;
	  }
        }
        continue;
      }
      gtp = lstptr(map_state_number[sn], completions);
      ngt = map_state_number[sn].n_completions;
      for (j = k = 0; j < ngt; j++, k += 2) {
	if (gtp[k] == tn) {
	  break;
	}
      }
      if (j == ngt) {
	continue;
      }
      crn = gtp[k+1];
      crx = Rule(crn)->length();
      int tx = new_rule_derivation->nt;
      while (tx--) {
	int trn, trx;
	xtx(new_rule_derivation, tx, &trn, &trx);
	if (trn == crn && trx == crx) {
	  break;
	}
      }
      if (tx >= 0) {
	continue;
      }
      at(new_rule_derivation, crn, crx);
      if (hw_reduce(sn, crn, crx - 1)) {
	return 1;
      }
      new_rule_derivation->nt--;
    }
  }
  return 0;
}

/*
static void trim_ct(void) {
  int nt = new_rule_derivation->nt;
  int k = nt - 1;
  int i, j;
  LOGSECTION("trim_ct");
  AgArray<int> list(buildStaticList());
  LOGV(list.size());
  int *cs = list.pointer();
  int ncs = list.size();
// cs is list of states in reverse order

  int rn,rx, tn;
  unsigned *rl;
  int nrl;

  check_tsd(new_rule_derivation);
  xtxf(new_rule_derivation, k, &rn, &rx);
  tn = lstptr(map_form_number[rn],tokens)[rx-1];
  rl = lstptr(map_token_number[tn],forms);
  nrl = map_token_number[tn].n_forms;

  iws();
  while (--k >= 0) {

    for (i = 0; i < k; i++) {
      unsigned srn, srx;
      int psx;

      xtxf(new_rule_derivation, i, &srn, &srx);
      for (j = 0; j < nrl; j++) if (srn == rl[j]) break;
      if (j == nrl) continue;
      psx = ncs;
      for (j = i+1; j < k + 1; j++) {
        int jrn, jrx;
        xtxf(new_rule_derivation, j, &jrn, &jrx);
        psx -= jrx-1;
      }
      if (psx <= 1) continue;
      if (cs[ncs-1] != cs[psx-1]) continue;
      ncs = psx;
      break;
    }
    if (i < k) {
      memmove(&new_rule_derivation->sb[2*(i+1)],
              &new_rule_derivation->sb[2*(k+1)],
              (nt - k - 1)*2*sizeof(int));
      nt -= k-i;
      new_rule_derivation->nt = nt;
      k = i;
    }
    if (k <= 0) break;
    xtxf(new_rule_derivation, k, &rn, &rx);
    tn = lstptr(map_form_number[rn],tokens)[--rx];
    rl = lstptr(map_token_number[tn],forms);
    nrl = map_token_number[tn].n_forms;
    while (rx--) aws(cs[--ncs]);
  }
  while (ncs--) aws(cs[ncs]);
  list = buildStaticList();
  cs = list.pointer();
  ncs = list.size();
  iws();
  while (ncs--) aws(cs[ncs]);
}
*/

static int new_analysis(int sn, int rn) {
  int length = Rule(rn)->length();

  LOGSECTION("new_analysis");
  LOGV(sn) LCV(rn);
  new_rule_derivation = init_tsd(2);
  at(new_rule_derivation, rn, length);
  sws(sn);
  if (hw_reduce(sn,rn,length)) {
    int fs;

    new_reduction_stack = buildStaticList();
    new_reduction_stack_depth = new_reduction_stack.size();
    fs = fws();
    //if (tis()) trim_ct();
    validate_conflict_stack();
    sws(fs);
    simple_path(0,fs);
    concat_list();
    new_conflict_stack = buildStaticList();
    new_conflict_stack_depth = new_conflict_stack.size();
    iws();
    while(new_reduction_stack_depth--) {
      aws(new_reduction_stack[new_reduction_stack_depth]);
    }
    sws(fs);
    simple_path(0, fs);
    concat_list();
    new_reduction_stack = buildStaticList();
    new_reduction_stack_depth = new_reduction_stack.size();

    if (!keyword_switch) {
      int fn, fx, k;
      k = new_rule_derivation->nt - 1;
      xtxf(new_rule_derivation, k, &fn, &fx);
      token_derivation = init_tsd(2);
      tried = ZALLOCATE(ntkns+1, char);
      //memset(tried,0,ntkns+1);
      //x9x(lstptr(map_form_number[fn], tokens)[fx]);
      x9x(Rule(fn).token(fx));
      at(token_derivation, fn, fx);
      DEALLOCATE(tried);
    }
    return 1;
  }
  rws();
  return 0;
}