view anagram/agcore/cs.cpp @ 21:1c9dac05d040

Add lint-style FALLTHROUGH annotations to fallthrough cases. (in the parse engine and thus the output code) Document this, because the old output causes warnings with gcc10.
author David A. Holland
date Mon, 13 Jun 2022 00:04:38 -0400
parents 13d2b8934445
children
line wrap: on
line source

/*
 * AnaGram, A System for Syntax Directed Programming
 * Copyright 1993-2002 Parsifal Software. All Rights Reserved.
 * See the file COPYING for license and usage terms.
 *
 * cs.cpp
 */

#include "arrays.h"
#include "assert.h"
#include "bpu.h"
#include "config.h"
#include "cs.h"
#include "csexp.h"
#include "dict.h"
#include "error.h"
#include "myalloc.h"
#include "q1glbl.h"
#include "rpk.h"
#include "rule.h"
#include "stacks.h"
#include "symbol.h"
#include "token.h"
#include "tree.h"
#include "tsd.h"
#include "ut.h"

//#define INCLUDE_LOGGING
#include "log.h"


static int char_set_error;

static void build_partition_sets(list_dict *d);
static void partition_set(int ts, list_dict *d, int *ps, char *fa);


void set_universe(void) {
  //unsigned tn;

  LOGSECTION("set_universe");
  LOGV(ntkns);
  LOGV(nforms);
  //min_char_number = 0;
  max_char_number = -1;
/*
  for (int ptIndex = 0; ptIndex++ < ntrees;) {
    map_parse_tree[ptIndex].expression->checkRange();
  }
*/
  for (ParseTree pt; pt.next(); ) {
    pt->expression->checkRange();
  }
  CharSetExpression::checkMinimum(min_char_number);
  if (max_char_number > 0x10000) {
    max_char_number = 0x10000;
  }
  LOGV(min_char_number) LCV(max_char_number);

  //for (tn = 0; tn++ < ntkns; )
  for (Each<Token> token; token.loopNotFinished(); token.getNext()) {
    //token_number_map *tp = &map_token_number[tn];
    //Token token(tn);
    //int pt = tp->parse_tree;
    ParseTree tree = token->parse_tree;
    //parse_tree_map *ptp;
    //int alias, name;

    //if (pt) continue;
    if (tree.isNotNull()) {
      continue;
    }
    Symbol alias = token->token_name;
    if (alias.isNull()) {
      continue;
    }
    //pt = map_token_name[alias].parse_tree;
    tree = alias->parse_tree;
    if (tree.isNull()) {
      continue;
    }
    //ptp = &map_parse_tree[pt];

    CharSetExpression *exp = tree->expression;
    if (exp && exp->nameNode()) {
      Symbol name = ((NamedCharSet *)exp)->name;
      Token referencedToken = name->token_number;
      if (referencedToken.isNull()) {
	continue;
      }
      shell_production(token, referencedToken);
      tree->token_number = token;
    }
  }

  // if (error_token == 0) {
  //   error_token = find_token_number("error");
  // }
  if (grammar_token == 0) {
    grammar_token = find_token_number("grammar");
  }
  if (eof_token == 0) {
    eof_token = find_token_number("eof");
    if (eof_token == 0) {
      //int id = identify_string("eof", tkn_dict);
      Symbol id("eof");
      if (id && (auto_resynch || error_token)) {
	eof_token = id_token(id);
      }
    }
  }
  nforms_base = nforms;
  if (min_char_number > max_char_number) {
    return;
  }
  if (max_char_number < 255) {
    max_char_number = 255;
  }
  n_chars = max_char_number - min_char_number + 1;
  check_size(map_char_number, n_chars, n_chars);
  LOGV(ntkns);
  LOGV(nforms);
}

static unsigned ntc_size;

void build_sets(void) {
  int k;
  unsigned i;
  int id;
  char ts[100];
  int *ntc;
  unsigned ntk = ntkns;

  if (charRangeDiagnostic || min_char_number > max_char_number) {
    return;
  }

  LOGSECTION("build_sets");
  LOGV(ntkns) LCV(nforms) LCV(error_token);
  /*
   * count the number of named tokens that point to a given parse tree.
   * If there is only one such named token, it can be assigned to the
   * parse tree. Otherwise, the parse tree needs its own token, and
   * the named tokens have to have shells created.
   */
  //ntc_size = ntrees+1;
  ntc_size = ParseTree::count();
  ZALLOCATE_ST(ntc, ntc_size);
  LOGV(ntc_size);
  ParseTree tree;
  LOGS("Scanning trees");
  for (tree = ParseTree(); tree.next(); ) {
    int name = tree->token_name;
    if (name && tree->expression->char_node()) {
      int value = ((IndividualCode *)tree->expression)->value;
      assert(value >= min_char_number);
      LOGV(value) LCV(min_char_number) LCV(name);
      map_char_number[value - min_char_number].token_name = name;
    }
  }
  LOGS("finished tree loop");
  for (i = 0; i++ < ntkns; ) {
    token_number_map *tp = &map_token_number[i];
    int name = tp->token_name;
    unsigned pt;

    if (tp->non_terminal_flag) {
      continue;
    }
    if (name == 0) {
      continue;
    }
    pt = map_token_name[name].parse_tree;
    LOGV(pt) LCV(ParseTree(pt)->token_number);
    LOGV((int) map_parse_tree[pt].expression);
    if (map_parse_tree[pt].token_number) {
      continue;
    }
    assert(pt < ntc_size);
    ntc[pt]++;
  }
  LOGS("finished token loop");

  /*
   * Now check those trees that have more than one named token. Create
   * a specific token for the tree.
   */

  for (tree = ParseTree(); tree.next();) {
    int tn;

    if (ntc[tree] <= 1) {
      ntc[tree] = 0;
      continue;
    }
    tn = tree->token_number;
    if (tn == 0) {
      if (tree->expression->nameNode()) {
        int name = ((NamedCharSet *)tree->expression)->name;
        //LOGV(netAllocations());
        tn = id_token(name);
        Token(tn)->value_type = default_input_type;
      }
      else {
	tn = form_element_1(tree->expression);
      }
    }
    ntc[tree] = tn;
  }
  LOGS("trees scanned again");

  /*
   * Now check the named terminals again and either make up the
   * tree-token links or build shell productions
   */

  for (i = 0; i++ < ntk;) {
    token_number_map *tp = &map_token_number[i];
    int name = tp->token_name;
    int pt;
    int tn;
    parse_tree_map *ptp;

    if (tp->non_terminal_flag) {
      continue;
    }
    if (name == 0) {
      continue;
    }
    pt = map_token_name[name].parse_tree;
    if (pt == 0) {
      continue;
    }
    ptp = &map_parse_tree[pt];
    tn = ptp->token_number;
    if (tn == 0) {
      tn = ntc[pt];
    }
    if (tn == 0) {
      ptp->token_number = i;
      continue;
    }
    shell_production(i, tn);
  }
  DEALLOCATE(ntc);
  LOGS("tokens scanned again");
  nforms_base = nforms;

  LOGV(ntkns) LCV(nforms);
  LOGV(error_token);
  for (i = 0; i++ < ntkns;) {
    token_number_map *tp = &map_token_number[i];
    int nn;
    if (tp->non_terminal_flag ||
        (int) i == error_token ||
        tp->key ||
        tp->token_set_id) {
      continue;
    }
    nn = tp->parse_tree;
    LOGV(i) LCV(nn);
    if (nn == 0 && (k = tp->token_name) != 0) {
        nn = map_token_name[k].parse_tree;
    }
    LOGV(nn);
    if (nn == 0) {
      char_set_error = 5;
    }
    else {
      tp->token_set_id = build_char_set(nn);
    }
    if (char_set_error == 0) {
      continue;
    }

    id = tp->token_name;
    if (id) {
      sprintf(ts," = %s", Symbol(id)->string.pointer());
    }
    else {
      ts[0] = 0;
    }
    switch (char_set_error) {
    case 1:
      ssprintf("Error defining T%03d: ",i);
      break;
    case 5:
      ssprintf("Undefined token T%03d: ",i);
      break;
    }
    atkn(i);
    log_error();
  }
  build_partition_sets(char_set_dict);
  LOGV(ntkns);
  LOGV(nforms);
}

static void build_partition_sets(list_dict *d) {
  LOGSECTION("build_partition_sets");
  int i;
  unsigned j, n;
  int *db;
  unsigned *ndx;
  int *lb;
  int m;
  //int *ps;
  int ts;
  unsigned ntk;
  //char *fa;
  unsigned *fl;
  int nf;
  unsigned cn;
  int *list;
  unsigned pn,nc;
  unsigned tn;

  if (max_char_number == -1 && min_char_number == 0) {
    return;
  }
  //ps = local_array(n_chars, int);
  LocalArray<int> ps(n_chars);
  n = d->nsx;
  db = d->text;
  ndx = d->ndx;
  //set = local_array(n, int *);
  LocalArray<int *> set(n);
  //k = local_array(n, int);
  LocalArray<int> k(n);
  //l = local_array(n, int);
  LocalArray<int> l(n);
  for (i = 1; (unsigned) i < n; i++) {
    set[i] = (db + ndx[i]);
    l[i] = *(set[i]++) - 1;
    k[i] = 0;
  }

  /*
   * For each character in the character range, scan all the character
   * sets. Make a list of the sets to which this particular character
   * belongs. This list represents a unique partition of the character
   * range. Use a dictionary to give each unique partition an
   * identifying number. Map each character to the partition to which
   * it belongs.
   */

  LOGS("Scanning characters");
  for (i = min_char_number; i <= max_char_number; i++) {
    int ix = i - min_char_number;
    iws();
    for (j = 1; j < n; j++) {
      while (k[j] < l[j] && set[j][k[j]] < i) {
	k[j]++;
      }
      if (k[j] == l[j] || set[j][k[j]] > i) {
	continue;
      }
      k[j]++;
      aws(j);
    }
    lb = list_base;
    m = rws();
    map_char_number[ix].part_number = ps[ix] = add_list_dict(lb,m,part_dict);
    LOGV(ix) LCV(ps[ix]);
  }

  /*
   * Scan each partition. For each partition, make a list of all the
   * characters that belong to this partition.
   */

  LOGS("Scanning partitions");
  m = part_dict->nsx;
  check_size(map_part_number, m, m);
  for (i = 0; (unsigned) i < part_dict->nsx; i++) {
    iws();
    for (j = 0; j < (unsigned) n_chars; j++) {
      if ((unsigned) i == map_char_number[j].part_number) {
	aws(j + min_char_number);
      }
    }
    map_part_number[i].chars_index = store_list(chars_list);
    fl = lstptr(map_part_number[i],chars);
    m = map_part_number[i].size = rws();
#ifdef INCLUDE_LOGGING
    if ((unsigned)(*fl - min_char_number) > 
	(max_char_number - min_char_number)) {
      LOGV(*fl) LCV(min_char_number) LCV(max_char_number);
    }
#endif
    assert((unsigned)(*fl - min_char_number) <= 
	   (unsigned) (max_char_number - min_char_number));
    if (m == 1 
	&& (j = map_char_number[*fl - min_char_number].token_number) != 0) {
      map_part_number[i].token_number = j;
    }
    LOGV(*fl) LCV(min_char_number) LCV(i) LCV(j);
  }

  m = part_dict->nsx;
  //fa = local_array(m, char);
  LocalArray<char> fa(m);
  m = char_set_dict->nsx;
  check_size(map_char_set,m,m);
  for (i = 1; i<m; i++) {
    char_set_map *csp = &map_char_set[i];
    partition_set(i, char_set_dict, ps, fa);
    csp->part_index = store_list(part_list);
    csp->nparts = rws();
  }


  ntk = ntkns+1;
  //assert(ok_index(map_token_number, ntkns));
  m = part_dict->nsx;

  LOGS("Scanning tokens");
  for (i = 1; (unsigned) i < ntk; i++) {
    part_number_map *ppn;
    unsigned tn;
    //int ctn = i;
    Token ctn(i);
    unsigned fn;
    ts = map_token_number[i].token_set_id;
    if (ts == 0) {
      continue;
    }
    nf = map_char_set[ts].nparts;
    if (nf != 1) {
      continue;
    }
    fl = lstptr(map_char_set[ts], part);
    ppn = &map_part_number[pn = *fl];
    assert(ok_index(map_part_number, pn));
    list = (int *) lstptr(*ppn, chars);
    //assert (pn > 0 && pn < m);
    assert(pn < (unsigned) m);
    nc = ppn->size;
    tn = ppn->token_number;
    if (tn == 0) {
      while (nc--) {
        cn = *list++ - min_char_number;
        assert(cn < n_chars);
        map_char_number[cn].token_number = i;
        LOGV(cn) LCV(i);
      }
      assert(pn < part_dict->nsx);
      ppn->token_number = i;
      map_token_number[i].part_number = pn;
      continue;
    }
    if (tn < ntk) {
      //ntkns++;
      //check_size(map_token_number, ntkns, ntkns);
      //ctn = ntkns;
      ctn = Token::create();
      ctn->value_type = default_input_type;
      //pf1x(ctn);
      //fn = form2();
      fn = makeRule(ctn);
      at(bnf_table, tn, fn);
      Rule(fn)->prim_tkn = tn;
      ppn->token_number = ctn;
      Token(tn)->operatorCandidate = ctn->operatorCandidate = 1;
      while (nc--) {
        cn = *list++ - min_char_number;
        assert(cn < n_chars);
        map_char_number[cn].token_number = ctn;
        LOGV(cn) LCV(ctn);
      }
      map_token_number[ctn].part_number = pn;
      map_token_number[ctn].value_type = default_input_type;
      map_token_number[tn].value_type = default_input_type;
      map_token_number[tn].part_number = 0;
      map_token_number[tn].non_terminal_flag = 1;
      tn = ctn;
    }
    //pf1x(tn);
    //fn = form2();
    fn = makeRule(tn);
    LOGV(i) LCV(fn) LCV(tn);
    at(bnf_table, i, fn);
    Rule(fn)->prim_tkn = i;
    map_token_number[i].non_terminal_flag = 1;
    ctn = tn;
  }
  for (i = 1; (unsigned) i < ntk; i++) {
    token_number_map *mti = &map_token_number[i];
    ts = mti->token_set_id;
    if (ts == 0) {
      continue;
    }
    nf = map_char_set[ts].nparts;
    if (nf <= 1) {
      continue;
    }
    fl = lstptr(map_char_set[ts], part);
    iws();
    j = 0;
    while (j < (unsigned) nf) {

      pn = fl[j];
      assert (pn > 0 && pn < (unsigned) m);
      tn = map_part_number[pn].token_number;
      if (tn == 0) {
        //ntkns++;
        //check_size(map_token_number,ntkns,ntkns);
	list = (int *) lstptr(map_part_number[pn], chars);
        nc = map_part_number[pn].size;
        tn = Token::create();
        while (nc--) {
          cn = *list++ - min_char_number;
          assert(cn < n_chars);
          map_char_number[cn].token_number = tn;
          LOGV(cn) LCV(tn);
        }
        //tn = ntkns;
        assert(pn < part_dict->nsx);
        map_part_number[pn].token_number = tn;
        map_token_number[tn].part_number = pn;
        map_token_number[tn].value_type = default_input_type;
	map_token_number[tn].operatorCandidate = 1;
      }
      //pf1x(tn);
      //aws(form2());
      aws(makeRule(tn));
      j++;
    }
    fl = (unsigned *) list_base;
    nf = rws();
    map_token_number[i].non_terminal_flag = 1;
    for (j = 0; j < (unsigned) nf; j++) {
      LOGV(i) LCV(fl[j]);
      at(bnf_table, i, fl[j]);
      //assert(ok_index(map_form_number, fl[j]));
      Rule rule(fl[j]);
      rule->prim_tkn = i;
    }
  }
}

static void partition_set(int ts, list_dict *d, int *ps, char *fa) {
  int *list;
  int n, np, i;

  np = part_dict->nsx;
  memset(fa, 0, np);
  list = d->text + d->ndx[ts];
  n = *(list++) - 1;
  for (i = 0; i<n; i++) {
    fa[ps[list[i] - min_char_number]] = 1;
  }
  iws();
  for (i = 0; i < np; i++) {
    if (fa[i]) {
      aws(i);
    }
  }
}

int build_char_set(int nn) {
  LOGSECTION("build_char_set");
  //char *v;
  //int i, k;
  int cs = map_parse_tree[nn].char_set;
  LOGV(cs) LCV(nn);

  if (cs) {
    return cs;
  }
  char_set_error = 0;
  LOGV(n_chars);

  //int *list = local_array(n_chars, int);
  LocalArray<int> list(n_chars);
  CharBitmap bitmap = map_parse_tree[nn].expression->bitmap();
  if (!case_sensitive) {
    bitmap.blurCase();
  }
  int count = bitmap.list(list);
  LOGV(count);
  cs = add_list_dict(list, count, char_set_dict);
  LOGV(cs);
  check_size(map_char_set, cs, cs);
  map_char_set[cs].parse_tree = nn;
  map_parse_tree[nn].char_set = cs;
  return cs;
}

static void log_undef_sym(int tn) {
  ssprintf("Undefined symbol: %s", Symbol(tn)->string.pointer());
  log_error();
}

int getParseTree(int name) {
  LOGSECTION("getParseTree");
  int pt = map_token_name[name].parse_tree;
  LOGV(pt);
  if (pt) {
    return pt;
  }

  int tn = map_token_name[name].token_number;
  if (tn) {
    pt = map_token_number[tn].parse_tree;
  }
  LOGV(tn) LCV(pt);
  if (pt) {
    return pt;
  }

  log_undef_sym(name);
  return 0;
}