view anagram/agcore/q1a.cpp @ 3:e23ad76d0588

Document the LaTeX packages used in the install notes.
author David A. Holland
date Sun, 26 Apr 2009 18:23:33 -0400
parents 13d2b8934445
children
line wrap: on
line source

/*
 * AnaGram, A System for Syntax Directed Programming
 * Copyright 1993-1999 Parsifal Software. All Rights Reserved.
 * See the file COPYING for license and usage terms.
 *
 * q1a.cpp
 */

#include "agarray.h"
#include "arrays.h"
#include "assert.h"
#include "binsort.h"
#include "bitmap.h"
#include "config.h"
#include "cs.h"
#include "dict.h"
#include "error.h"
#include "keyword.h"
#include "lexeme.h"
#include "myalloc.h"
#include "q1a.h"
#include "q1glbl.h"
#include "q5.h"
#include "q8.h"
#include "rproc.h"
#include "rule.h"
#include "stacks.h"
#include "token.h"
#include "tsd.h"
#include "ut.h"

//#define INCLUDE_LOGGING
#include "log.h"


/*
 The principal functions in Q1A are
   .  summarize_grammar()
   .  q1()

 summarize_grammar() is an umbrella procedure that is used to call all
 the functions that are used to create all the tables that describe
 a grammar. summarize_grammar() is called after the input file has
 been successfully parsed.

 q1() is a wrapper function which calls lalr() and rlalr(), both in Q5.
 lalr() generates the parser states and the goto table. rlalr() generates
 reduction tokens and the reduce table.
*/

int n_states_est;

const char *badRecursionFlag = 0;

int token_name_length = 0;
int item_length = 0;

static unsigned libnfs;


static AgArray<Token> findLeadingTokens(Token t) {
  LOGSECTION("findLeadingTokens");
  if (!t->non_terminal_flag) {
    return AgArray<Token>();
  }
  LOGV(t);
  //LOGV(myallocs) LCV(myfrees) LV(myallocBytes) LCV(myfreeBytes);
  Bitmap bitmap(Token::count());
  AgStack<Token> terminalStack;
  AgStack<Token> nonTerminalStack;
  bitmap.setBit(t);
  nonTerminalStack.push(t);
  for (unsigned i = 0; i < nonTerminalStack.size(); i++) {
    Token token = nonTerminalStack[i];
    int nRules = token->ruleList.size();
    for (int j = 0; j < nRules; j++) {
      Rule rule = token.rule(j);
      RuleDescriptor &ruleDescriptor(rule);
      //int length = rule->length();
      int length = ruleDescriptor.length();
      for (int k = 0; k < length; k++) {
        //Token ruleToken = rule.token(k);
        Token ruleToken = ruleDescriptor.token(k);
        token_number_map &ruleTokenDescriptor(ruleToken);
        if (!bitmap.setBit(ruleToken)) {
          if (ruleTokenDescriptor.non_terminal_flag) {
	    nonTerminalStack.push(ruleToken);
	  }
          else {
	    terminalStack.push(ruleToken);
	  }
        }
        if (ruleTokenDescriptor.zero_length_flag) {
	  continue;
	}
        break;
      }
    }
  }
  return terminalStack;
}


static void s8(Token token) {       /* build list of leading tokens */
  LOGSECTION("s8");
  LOGV(token);
  token->leadingTokenList = findLeadingTokens(token);
  if (token->leadingTokenList.size()) {
    return;
  }

  // No terminal tokens for token t,  Is a diagnostic needed?
  Bitmap map(Token::count());
  while (1) {
    if (token->ruleList.size() != 1) {
      // Diagnose if more than one rule
      break;
    }
    Rule rule = token.rule(0);
    int lf = rule->length();
    if (lf == 0) {
      // No diagnostic for null production
      return;
    }
    if (lf != 1) {
      // Diagnose if not a shell production
      break;
    }
    // shell production, get nested token
    Token producedToken = rule.token(0);
    if (producedToken->immediate_action) {
      return;
    }
    if (map.setBit(producedToken)) {
      // don't loop forever
      break;
    }
    // since leading token count is zero.
    assert(producedToken->non_terminal_flag);
  }
  ssprintf("No terminal tokens in expansion of T%03d: ", (int) token);
  atkn(token);
  log_error();
}

/*
 s7a() builds a list of (left) expansion rules for the token t.
 This is done by building a list of expansion rules, and for each
 rule in the list, adding its expansion rules to the list if they
 are not already there. This process continues until all rules in
 the list have been expanded.
*/
/*
static void s7a(Token token) {
  LOGSECTION("s7a");
  int n = token->ruleList.size();

  LOGV(token) LCV(n);
  iws();
  for (int i = 0; i < n; i++) {
    aws(token.rule(i));
  }
  for (n = 0; n < tis(); n++) {
    Rule rule(list_base[n]);
    int nr;
    LOGV(n) LCV(tis()) LCV(list_base[n]) LCV(nforms);
    assert(list_base[n] <= nforms);
    if (rule->length() == 0) continue;
    Token token(rule->token(0));

    if (!token->non_terminal_flag) continue;
    nr = token->ruleList.size();
    for (int i = 0; i < nr; i++) xws(token.rule(i));
  }
  token->expansionRuleList = makeArray(list_base, rws());
}
*/
static void findExpansionRules(Token t) {
  LOGSECTION("findExpansionRules");
  if (!t->non_terminal_flag) return;
  LOGV(t);
  Bitmap map(Rule::count());
  AgStack<Rule> ruleStack;
  int nRules = t->ruleList.size();
  for (int j = 0; j < nRules; j++) {
    Rule rule = t.rule(j);
    if (map.setBit(rule)) {
      continue;
    }
    ruleStack.push(rule);
  }
  for (unsigned i = 0; i < ruleStack.size(); i++) {
    Rule rule = ruleStack[i];
    int length = rule->length();
    if (length == 0) {
      continue;
    }
    Token token = rule.token(0);
/*
    if (length == 1 && token == t) {
      ssprintf("Circular definition of T%03d: ", (int) token);
      atkn(token);
      ssprintf(" in ");
      append_item(rule, -1);
      concat_string();
      log_error(rule->line, rule->col);
      badRecursionFlag = "circular definition";
    }
*/
    if (!token->non_terminal_flag) {
      continue;
    }
    int nRules = token->ruleList.size();
    for (int j = 0; j < nRules; j++) {
      Rule tokenRule = token.rule(j);
      if (map.setBit(tokenRule)) {
	continue;
      }
      ruleStack.push(tokenRule);
    }
  }
  t->expansionRuleList = ruleStack;
}

static void s5(Token token, char *sta) {
  int n, k;

  ok_ptr(sta);
  if (sta[token]) return;
  LOGSECTION("s5");
  LOGV(token) LCV(token->ruleList.size()) LCV(token->immediate_action);
  sta[token] = 1;
  if (token->ruleList.size() == 0) {
    token->non_terminal_flag = 0;
    assert(perm_index <= ntkns);
    token_perm[perm_index] = token;
    perm_index++;
    return;
  }
  token->non_terminal_flag = 1;
  n = token->ruleList.size();
  while (n--) {
    if (token.rule(n)->length()) {
      continue;
    }
    token->zero_length_flag = 1;
    break;
  }
  n = token->ruleList.size();
  for (int i = 0; i < n; i++) {
    Rule rule = token.rule(i);
    k = rule->length();
    if (k == 0) continue;
    int tokenIndex = 0;
    while (k--) {
      Token ruleToken = rule.token(tokenIndex++);
      s5(ruleToken,sta);
      if (ruleToken->zero_length_flag == 0) {
	break;
      }
    }
    if (k == -1) {
      token->zero_length_flag = 1;
    }
  }
  assert(perm_index <= ntkns);
  token_perm[perm_index] = token;
  perm_index++;
}


static int s6(Token token, char *sta) {
  LOGSECTION("s6");
  int zc, n, k;

  zc = 0;
  ok_ptr(sta);
  if (sta[token]) {
    return zc;
  }
  if (token->zero_length_flag) {
    return zc;
  }
  if (!token->non_terminal_flag) {
    return zc;
  }

  sta[token] = 1;
  n = token->ruleList.size();
  for (int i = 0; i < n; i++) {
    Rule rule = token.rule(i);
    if ((k = rule->length()) == 0) {
      continue;
    }
    int tokenIndex = 0;
    while (k--) {
      Token ruleToken = rule.token(tokenIndex++);
      s6(ruleToken,sta);
      if (ruleToken->zero_length_flag==0) {
	break;
      }
    }
    if (k == -1) {
      token->zero_length_flag = 1;
      zc++;
    }
  }
  return zc;
}

static void s4(void) {
  LOGSECTION("s4");
  int n, t, zc;
  char *sta;

  perm_index = 0;
  n = ntkns + 1;
  sta = ZALLOCATE(n, char);
  Each<Token> token;
  for (token.restart(); token.loopNotFinished(); token.getNext()) {
    s5(token, sta);
  }
  for (zc = 1; zc; ) {
    zc = 0;
    ok_ptr(sta);
    memset(sta, 0, n*sizeof(*sta));
    for (t = 0; t < n; t++) {
      zc += s6(token_perm[t],sta);
    }
  }
  DEALLOCATE(sta);
}

/*
 s3() builds the inverted bnf table from the bnf table.
 The ibnf table provides the mapping from rule to
 reduction token list.
*/

static void s3(void) {
  LOGSECTION("s3");
  //unsigned t, f, n, cf;
  unsigned n, cf;
  unsigned sp;
  int *p;
  unsigned nt;

  cf = 0;
  sp = 0;
  ibnfb[0] = sp;
  n = 0;
  p = ibnf_table->sb;
  nt = ibnf_table->nt;
  while (nt--) {
    Rule rule = *p++;
    Token token = *p++;
    if (token->sem_det_flag) {
      rule->fast_loop = 0;
    }
    if ((unsigned)rule != cf) {
      ibnfn[cf] = (int) n;
      ibnfb[rule] = sp;
      cf = rule;
      n = 0;
    }
    assert(sp < libnfs);
    ibnfs[sp] = token;
    sp++;
    n++;
  }
  ibnfn[cf] = (int) n;
  ibnf_table = delete_tsd(ibnf_table);
}

static void makeRuleLists(void) {
  LOGSECTION("makeRuleLists");
  AgArray< AgBalancedTree<int> > tree(Token::count());
  int *bnf = bnf_table->sb;
  int nbnf = bnf_table->nt;
  while (nbnf--) {
    Token token = *bnf++;
    Rule rule = *bnf++;
    tree[token].insert(rule);
  }
  Each<Token> token;
  for (token.restart(); token.loopNotFinished(); token.getNext()) {
    int n = tree[token].size();
    if (n == 0) {
      continue;
    }
    AgArray<Rule> list(n);
    while (n--) {
      list[n] = tree[token][n];
    }
    token->ruleList = list;
  }
}

static int stack_size_arbitrary;
int stack_size;

static int find_token_length(int tn) {
  Token token = tn;
  int nf;
  int length = 0;
  if (tut[token]) {
    return tut[token];
  }
  ok_ptr(tut);
  tut[token] = -1;
  nf = token->ruleList.size();
  for (int i = 0; i < nf; i++) {
    Rule rule = token.rule(i);
    int form_length = rule->length();
    int fx = 0;
    while (fx < form_length) {
      int k = find_token_length(rule.token(fx));
      if (k == -1) {
        if (fx) {
	  stack_size_arbitrary = 1;
	}
        k = 0;
      }
      k += fx;
      if (length < k) {
	length = k;
      }
      fx++;
    }
  }
  tut[tn] = length;
  return length;
}

unsigned disregard_cont_rule;
unsigned disregard_skip_rule;

void summarize_grammar(void) {
  LOGSECTION("summarize_grammar");
  unsigned n, t, tt; //, f;
  unsigned k;

  if (errorResynchDiagnostic) {
    auto_resynch = 0;
  }

  LOGS("Scan rules to finish up intermediate actions");
  Each<Rule> rule;
  for (rule.restart(); rule.loopNotFinished(); rule.getNext()) {
    LOGSECTION("Intermediate action cleanup");
    LOGV(rule);
    Procedure proc = rule->proc_name;
    Token primaryToken = rule->prim_tkn;

    if (primaryToken->sem_det_flag) {
      rule->fast_loop = 0;
    }
    if (rule->immediate_proc) {
      continue;
    }
    if (proc.isNotNull()) {
      int type = primaryToken->value_type;
      if (type == 0) {
	type = primaryToken->value_type = default_token_type;
      }
      if (proc->cSegment.length() == 0) {
	type = void_token_type;
      }
      proc->cast = type;
      //finish_proc_def(proc, rule, rule->length(), type);
    }
  }
  LOGS("Prod definitions finished");
  ZALLOCATE_ST(token_perm, ntkns+1);
  tut = ZALLOCATE(ntkns+1, int);

  makeRuleLists();
  s4();                                /* identify zero length tokens */
  LOGS("s4 finished");
  for (rule.restart(); rule.loopNotFinished(); rule.getNext()) {
    int i, length;

    if (rule->immediate_proc) {
      continue;
    }
    length = rule->length();
    LOGV(rule) LCV(rule->length()) LCV(rule->elementList.size());
    for (i = 0; i < length; i++) {
      Token ruleToken = rule.token(i);
      LOGV(ruleToken) LCV(ruleToken->immediate_action)
	LCV(ruleToken->ruleList.size());
      if (!ruleToken->immediate_action) {
	continue;
      }
      Rule actionRule = ruleToken.rule(0);
      LOGV(actionRule);
      Procedure proc = actionRule->proc_name;
      int type = default_token_type;
      if (proc->cSegment.length() == 0) {
	type = void_token_type;
      }
      proc->cast = type;
    }
  }
  LOGS("Immediate actions finished");
  if (disregard_token &&
      map_token_number[disregard_token].non_terminal_flag) {
    Token token = disregard_token;
    int nf = token->ruleList.size();
    for (int i = 0; i < nf; i++) {
      Rule rule = token.rule(i);
      if (rule->length() == 0) {
	disregard_skip_rule = rule;
      }
      else {
	disregard_cont_rule = rule;
      }
    }
  }
  LOGS("Disregard token analysis complete");

  n = nforms + 1;
  ibnfn = ZALLOCATE(n, int);
  ALLOCATE_ST(ibnfb, n);
  n = bnf_table->nt;
  resize_tsd(bnf_table,n);
  ALLOCATE_ST(ibnfs, n);
  libnfs = n;
  badRecursionFlag = 0;
  for (t = 0; t++ < ntkns; ) {
    tt = token_perm[t];
    memset(tut, 0, sizeof(*tut) * (ntkns+1));
    if (map_token_number[tt].non_terminal_flag) {
      //checkRecursion(tt);
      s8(tt);                          // expand forms
    }
  }
  memset(tut, 0, sizeof(*tut) * (ntkns+1));
  stack_size = stack_size_arbitrary = 0;
  Each<Token> token;
  for (token.restart(); token.loopNotFinished(); token.getNext()) {
    int length = find_token_length(token);

    if (stack_size < length) {
      stack_size = length;
    }
    Token permutedToken = token_perm[token];
    if (!permutedToken->non_terminal_flag) {
      continue;
    }
    //s7a(permutedToken);
    findExpansionRules(permutedToken);
  }
  s3();                                /* make inverted bnf tables */

  n_states_est = -(int)nforms;
  memset(tut, 0, sizeof(*tut) * (ntkns+1));
  for (rule.restart(); rule.loopNotFinished(); rule.getNext()) {
    int nr = ibnfn[rule];
    int *rl = ibnfs + ibnfb[rule];
    k = n = rule->length();
    n_states_est += n;
    //LOGV(rule) LCV(rule->length());
    while (k--) {
      int t = rule.token(k);
      int j = 0;
      while (j < nr) {
	if (rl[j] != t) {
	  break;
	}
	else {
	  j++;
	}
      }
      if (j != nr) {
	tut[t] = 1;
      }
    }
    k = n;
    //LOGV(rule) LCV(rule->precedence_level);
    if (rule->precedence_level == 0) {
      while (k--) {
        Token token = rule.token(k);
	token_number_map &td = map_token_number[token];
	if (td.non_terminal_flag && !td.operatorCandidate && 
	    td.parse_tree.isNull()) {
	  continue;
	}
        int pl = token->precedence_level;
        if (pl != 0) {
          rule->precedence_level = pl;
          rule->left_associative = token->left_associative;
          rule->right_associative = token->right_associative;
        }
	break;
      }
    }
    //LOGV(n);
    while (n) {
      n--;
      if (rule.token(n)->zero_length_flag) {
	continue;
      }
      n++;
      break;
    }
    assert(n <= rule->length());
    rule->non_vanishing_length = n;
  }
  for (rule.restart(); rule.loopNotFinished(); rule.getNext()) {
    int *rl = ibnfs + ibnfb[rule];
    int nr = ibnfn[rule];
    int length = rule->length();
    int fx;

    if (nr <= 1 || length == 0) {
      continue;
    }
    while (nr) {
      int tn = *rl++;
      if (tut[tn]) {
	break;
      }
      nr--;
    }
    if (nr == 0) {
      continue;
    }
    for (fx = 0; fx < length; ) {
      tut[rule.token(fx++)] = 1;
    }
  }
  for (t = 0; t++ < ntkns;){
    if (!tut[t]) {
      if ((int) t == error_token) {
        error_token = 0;
        continue;
      }
      if ((int) t == grammar_token) {
	continue;
      }
      ssprintf("Token not used, T%03d: ",t);
      atkn(t);
      log_error();
    }
  }
  for (Each<Keyword> keyword; keyword.loopNotFinished(); keyword.getNext()) {
    int pt = keyword->reserve;
    if (pt == 0) {
      continue;
    }
    keyword->reserve = build_char_set(pt);
  }
  LOGS("Reserve sets built");
  if (n_states_est <= 0) {
    n_states_est = 1;
  }
  bfst3();
  DEALLOCATE(tut);
  LOGS("tut deallocated");
  if ((auto_resynch || error_token) && eof_token == 0) {
    log_error("eof token not defined");
  }
  token_name_length = 0;
  for (token.restart(); token.loopNotFinished(); token.getNext()) {
    int n;
    ics();
    atkn(token);
    n = rcs();
    if (token_name_length < n) {
      token_name_length = n;
    }
  }
  LOGS("name length loop finished");
  for (token.restart(); token.loopNotFinished(); token.getNext()) {
    AgStack<Token> testTokenList;
    LOGV((int) token);
    testTokenList.push(token);
    unsigned i;
    int badRule = 0;
    for (i = 0; !badRule && i < testTokenList.size(); i++) {
      AgArray<Rule> &ruleList = testTokenList[i]->ruleList;
      LOGV(i) LCV((int) testTokenList[i]);
      unsigned j;
      for (j = 0; !badRule && j < ruleList.size(); j++) {
        Rule &r = ruleList[j];
        if (r->length() == 0) {
	  continue;
	}
        unsigned k;
        int nNonZeroLength = 0;
        int kDerived = -1;
        for (k = 0; k < r->length(); k++) {
          if (r.token(k)->zero_length_flag) {
	    continue;
	  }
          kDerived = k;
          nNonZeroLength++;
        }
        if (nNonZeroLength > 1) {
	  continue;
	}
        Token testToken;
        LOGV(nNonZeroLength);
        if (nNonZeroLength) {
          LOGV(kDerived);
          testToken = r.token(kDerived);
          LOGV((int) testToken);
          if (testToken == token) {
	    badRule = (int) r;
	  }
          LOGV(badRule);
        }
        else {
          for (k = 0; k < r->length(); k++) {
            testToken = r.token(k);
            if (testToken == token) {
              badRule = (int) r;
              break;
            }
          }
        }
        LOGV((int) testToken) LCV(nNonZeroLength);
        LOGV(badRule);
        if (badRule) {
          LOGV(r->length());
          if (r->length() > 1) {
            ssprintf("Empty recursion: Rule R%03d in expansion of T%03d: ",
		     (int) r, (int) token);
            atkn(token);
            badRecursionFlag = "empty recursion";
          }
          else {
            ssprintf("Circular definition of T%03d: ", (int) token);
            atkn(token);
            ssprintf(" in ");
            append_item(r, -1);
            concat_string();
            badRecursionFlag = "circular definition";
          }
          LOGV(badRecursionFlag);
          log_error(r->line, r->col);
        }
        else {
          LOGV(testTokenList.size());
          for (k = 0; k < testTokenList.size(); k++) {
	    if (testTokenList[k] == testToken) {
	      break;
	    }
	  }
          LOGV(k);
          if (k == testTokenList.size()) {
	    testTokenList.push(testToken);
	  }
        }
        LOGV(badRecursionFlag);
      }
    }
  }

  item_length = 0;
  for (rule.restart(); rule.loopNotFinished(); rule.getNext()) {
    int n;
    ics();
    append_item(rule, -1);
    n = rcs();
    if (item_length < n) {
      item_length = n;
    }
  }
  item_length++;
  for (rule.restart(); rule.loopNotFinished(); rule.getNext()) {
    if (rule->proc_name) {
      rule->reductionRequired = 1;
      continue;
    }
    int n = rule->length();
    while (n-- > 1) {
      Token t = rule->elementList[n].token;
      Cast c = t->value_type;
      if (!c.wrapperRequired()) {
	continue;
      }
      rule->reductionRequired = 1;
      break;
    }
  }
}

int get_reduction_states(int s, int f, int n) {
  LOGSECTION("get_reduction_states");
  LOGV(s) LCV(f) LCV(n);
  int k = add_tuple_dict_new(completed_form_dict,s,f);
  completed_form_map *cfp;

  check_size(map_completed_form,k,k);
  cfp = &map_completed_form[k];
  if (cfp->reduction_states_index == 0) {
    LOGS("Calling frss12)");
    frss12(s,f,n);
    cfp = &map_completed_form[k];
    cfp->external_reduction = external_reduction;
    cfp->reduction_states_index = store_list(reduction_states_list);
    cfp->n_reduction_states = rws();
  }
  return k;
}

static void find_reduction_states(tsd *rrc) {
  LOGSECTION("find_reduction_states");
  LOGV((int) rrc);
  int *p, nt;
  sort_tuples(rrc,2);
  p = rrc->sb;
  nt = rrc->nt;
  while (nt--) {
    int s, f;
    unsigned n;

    s = *p++;
    p++;
    f = *p++;
    n = *p++;

    if (n != (Rule(f)->length())) {
      continue;
    }
    LOGV(nt);
    get_reduction_states(s, f, n);
  }
  LOGS("find_reduction_states finished");
}

void q1(void) {
  LOGSECTION("q1");
  LOGV(ntkns);
  tut = ZALLOCATE(ntkns+1,int);
  LOGS("call lalr");
  lalr();
  //State::createStates(Rule());
  LOGS("call rlalr");
  rlalr();
  LOGV(nstates);
  map_state_number = (state_number_map *)
    set_array_size(map_state_number, nstates);

  LOGS("call find_reduction_states");
  if (res_con->nt) {
    find_reduction_states(res_con);
  }
  LOGS("returned from first call to find_reduction_states");

  if (unres_con->nt) {
    find_reduction_states(unres_con);
  }
  LOGS("returned from second call to find_reduction_states");

  DEALLOCATE(tut);
  empty_tuple_dict(frss_dict);
  LOGS("q1 finished");
}