view anagram/agcore/q8.cpp @ 24:a4899cdfc2d6 default tip

Obfuscate the regexps to strip off the IBM compiler's copyright banners. I don't want bots scanning github to think they're real copyright notices because that could cause real problems.
author David A. Holland
date Mon, 13 Jun 2022 00:40:23 -0400
parents 13d2b8934445
children
line wrap: on
line source

/*
 * AnaGram, A System for Syntax Directed Programming
 * Copyright 1993-2002 Parsifal Software. All Rights Reserved.
 * See the file COPYING for license and usage terms.
 *
 * q8.cpp
 */

#include "agbaltree.h"
#include "arrays.h"
#include "assert.h"
#include "bitmap.h"
#include "dict.h"
#include "q1glbl.h"
#include "q8.h"
#include "rule.h"
#include "stacks.h"
#include "token.h"

//#define INCLUDE_LOGGING
#include "log.h"


int external_reduction;

static void find_free_states(int s) {
  LOGSECTION("find_free_states");
  LOGV(s);
  state_number_map *msn = &map_state_number[s];
  unsigned *p = lstptr(*msn, gotos);
  int n = msn->n_gotos;
  while (n--) {
    int t = *p++;
    int ns = *p++;
    if (map_token_number[t].zero_length_flag) {
      //if (distinguish_lexemes && t == disregard_token) continue;
      if (isws(ns)) {
	continue;
      }
      LOGV(t) LCV(disregard_token);
      find_free_states(ns);
    }
  }
}

static void frss13(int sn, int f) {
  LOGSECTION("frss13");
  LOGV(sn) LCV(f);
  int flag, kn, g, s, nf, n, *sp, m;
  unsigned t;
  unsigned *p;
  unsigned *px;
  state_number_map *msn = &map_state_number[sn];

  if (f == 0) {
    isws(0);
    return;
  }
  sp = ibnfs + ibnfb[f];
  m = ibnfn[f];
  while (m--) {
    int i;

    t = sp[m];
    if (map_token_number[t].subgrammar) {
      external_reduction = 1;
      continue;
    }
    flag = 0;
    px = lstptr(*msn,completions);
    i = 2*msn->n_completions;
    while (i) {
      i-=2;
      if (px[i] != t) {
	continue;
      }
      //g = px[i+1];
      Rule rule(px[i+1]);
      add_tuple_dict_new(frss_dict, sn, (int) rule, rule->length()-1);
      flag++;
      break;
    }
    if (flag) {
      continue;
    }
    px = lstptr(*msn, gotos);
    for (kn = msn->n_gotos; kn--; px +=2) {
      if (t == *px) {
	break;
      }
    }
    if (kn < 0) continue;
    s = px[1];
    isws(s);
    p = (unsigned *) dict_str(isht_dict,s);
    nf = *p++ - 1;
    p += nf;
    nf /= 2;
    while (nf--) {
      n = *--p;
      g = *--p;
      Rule rule = g;
      if ((unsigned) n < rule->non_vanishing_length) {
	continue;
      }
      //if (distinguish_lexemes && n < rule->length() && 
      //    (int) rule.token(n) == disregard_token) {
      //  continue;
      //}
      LOGV(sn) LCV(g) LCV(n);
      add_tuple_dict_new(frss_dict, sn, g, n-1);
    }
    find_free_states(s);
  }
}

void frss12(int sn, int f, int n) {
  unsigned k, ps;
  unsigned *p, nt;
  unsigned kk;

  LOGSECTION("frss12");
  LOGV(sn) LCV(f) LCV(n);
  add_tuple_dict_new(frss_dict,sn,f,n);
  external_reduction = 0;
  iws();
  k = 0;
  while (k < frss_dict->nsx) {
    completed_form_map *mcf;
    extract_tuple_dict(frss_dict, k, &sn, &f, &n);
    LOGV(k) LCV(frss_dict->nsx) LCV(sn) LCV(f) LCV(n) ;
    k++;
    if (f == 0) {
      continue;
    }
    int length = Rule(f)->length();
    assert(n <= length);
    LOGV(f) LCV(length) LCV(n);
    if (n == length) {
      kk = add_tuple_dict_new(completed_form_dict, sn, f);
      check_size(map_completed_form, kk, kk);
      mcf = &map_completed_form[kk];
      external_reduction |= mcf->external_reduction;
      if (mcf->reduction_states_index) {
        p = lstptr(*mcf, reduction_states);
        nt = mcf->n_reduction_states;
        while (nt--) {
	  isws(*p++);
	}
        continue;
      }
    }
    if (n) {
      LOGSECTION("Previous states");
      state_number_map *msn = &map_state_number[sn];
      unsigned *p = lstptr(*msn,previous_states);
      int nt = msn->n_previous_states;
      while (nt--) {
        ps = *p++;
        add_tuple_dict_new(frss_dict, ps, f, n-1);
        LOGV(ps) LCV(f) LCV(n-1);
      }
      continue;
    }
    frss13(sn, f);
  }
  reset_tuple_dict(frss_dict);
}

static AgArray< AgBalancedTree<int> > digraphList;

//static void grstt3(unsigned t, unsigned ft) {
static void grstt3(Token token, Token follower) {
  LOGSECTION("grstt3");
  LOGV((int) token) LCV((int) follower);
  unsigned nt;
  unsigned n;
  //Token token = t;
  //Token follower = ft;

  if (digraphList[token].insert(follower)) return;
  n = token->ruleList.size();
  while (n--) {
    Rule rule = token.rule(n);
    RuleDescriptor &ruleDescriptor(rule);

    if ((nt = ruleDescriptor.length()) == 0) {
      continue;
    }
    Token ruleToken;
    do {
      //token = rule.token(--nt);
      ruleToken = ruleDescriptor.token(--nt);
      if (!ruleToken->non_terminal_flag) {
	break;
      }
      grstt3(ruleToken, follower);
    } while (nt && ruleToken->zero_length_flag);
  }
}

void bfst3(void) {
  LOGSECTION("bfst3");
  unsigned n, k, kk;
  Rule ruleZero(0);

  int last = ruleZero->length() - 1;
  if (last < 0) {
    return;
  }
  LOGV(last) LCV(ruleZero->length()) LCV(ruleZero->elementList.size());
  if (ruleZero.token(last).isNull()) {
    return;
  }
  digraphList = AgArray< AgBalancedTree<int> >(Token::count());
  grstt3(ruleZero.token(last), Token());

  for (Each<Rule> rule; rule.loopNotFinished(); rule.getNext()) {
    RuleDescriptor &ruleDescriptor(rule);
    //n = rule->length();
    n = ruleDescriptor.length();
    if (n < 2) {
      continue;
    }
    k = 0;
    while (1) {
      LOGV(rule) LCV(rule->length()) LCV(k);
      //Token token = rule.token(k);

      //k++;
      if (++k >= n) {
	break;
      }
      Token token = ruleDescriptor.token(k-1);
      token_number_map &tokenDescriptor(token);
      //if (!token->non_terminal_flag &&
      //    !token->zero_length_flag) continue;
      if (!tokenDescriptor.non_terminal_flag &&
          !tokenDescriptor.zero_length_flag) {
	continue;
      }
      kk = k;
      while (kk < n) {
        LOGV(rule) LCV(rule->length()) LCV(kk);
        //Token followingToken = rule.token(kk);
        Token followingToken = ruleDescriptor.token(kk);
        grstt3(token, followingToken);
        if (followingToken->zero_length_flag == 0) {
	  break;
	}
	kk++;
      }
    }
  }
  for (Each<Token> t; t.loopNotFinished(); t.getNext()) {
    AgBalancedTree<int> &list = digraphList[t];
    Bitmap map(Token::count());
    AgStack<Token> followers;
    for (unsigned i = 0; i < list.size(); i++) {
      if (map.setBit(list[i])) {
	continue;
      }
      followers.push(list[i]);
    }
    t->followerList = followers;
  }
  digraphList.reset();
}

void ivgtt(void) {
  LOGSECTION("ivgtt");
  unsigned ns, i;

  if (nits == 0) {
    return;
  }
  LOGV(n_gotos);

  AgArray< AgBalancedTree< int > > predecessor(nits);
  for (i = 0; i < nits; i++){
    state_number_map *sp = &map_state_number[i];
    unsigned *p = lstptr(*sp, gotos);
    int nt = sp->n_gotos;
    LOGV(i) LCV(nt);
    while (nt--) {
      p++;
      ns = *p++;
      assert(ns < nits);
      predecessor[ns].insert(i);
    }
  }
  for (i = 0; i < nits; i++) {
    state_number_map *sp = &map_state_number[i];
    AgBalancedTree<int> &list = predecessor[i];
    int n = list.size();
    iws();
    LOGS("State") LS(i);
    for (int j = 0; j < n; j++) {
#ifdef INCLUDE_LOGGING
      LOGX() LS(list[j]);
#endif
      aws(list[j]);
    }
    sp->previous_states_index = store_list(previous_states_list);
    int nps = rws();
    sp->n_previous_states = nps;
  }
  LOGV(n_gotos);
}