view anagram/agcore/keyword.cpp @ 21:1c9dac05d040

Add lint-style FALLTHROUGH annotations to fallthrough cases. (in the parse engine and thus the output code) Document this, because the old output causes warnings with gcc10.
author David A. Holland
date Mon, 13 Jun 2022 00:04:38 -0400
parents 607e3be6bad8
children
line wrap: on
line source

/*
 * AnaGram, A System for Syntax Directed Programming
 * Copyright 1993-2002 Parsifal Software. All Rights Reserved.
 * See the file COPYING for license and usage terms.
 *
 * keyword.cpp
 */

#include "arrays.h"
#include "bitmap.h"
#include "bpe3.h"
#include "csexp.h"
#include "dict.h"
#include "ftpar.h"
#include "keyword.h"
#include "myalloc.h"
#include "q1glbl.h"
#include "stacks.h"
#include "tsd.h"
#include "ut.h"

//#define INCLUDE_LOGGING
#include "log.h"


static tsd *key_work_table;
static int n_ends = 0;


static void tokensTo(unsigned sn, AgStack<int> &tokenStack) {
  LOGSECTION("tokensTo");
  AgBalancedTree<int> tree;
  while (sn) {
    tree.insert(sn);
    state_number_map *sp = &map_state_number[sn];
    unsigned *list = lstptr(*sp, previous_states);
    int n = sp->n_previous_states;
    LOGV(sn) LCV(sp->n_previous_states);
    while (n--) {
      LOGV(n) LCV(*list) LCV(map_state_number[*list].char_token);
      while (tree.includes(*list)) {
	list++;
      }
      int k = map_state_number[*list].n_actions;
      while (k--) {
        if (lstptr(map_state_number[*list], p_actions)[k] != sn) {
	  continue;
	}
        if (lstptr(map_state_number[*list], a_actions)[k] != pe_go_to) {
	  continue;
	}
        break;
      }
      assert(k >= 0);
      tokenStack.push(lstptr(map_state_number[*list], t_actions)[k]);
      sn = *list;
      LOGV(n) LCV(sn);
      break;
    }
    assert(n >= 0);
  }
}

/*
 * Each time it is necessary for the parser to get a new token, it
 * begins by checking the key_word_index table indexed by the current
 * state. If the entry is zero, there are no keywords to be identified.
 * If the entry is non_zero, the entry identifies a location in the
 * key_word identification table. The id_key_word function is then
 * invoked to determine whether there is a keyword present.
 *
 * The id_key_word function is a table driven, finite state automaton.
 * The actions are as follows:
 *    jump to next state
 *      The string in hand is not itself a key, but matches the
 *      beginning of one or more keys.
 *    set key and jump to next state
 *      The string in hand might be a key, but it also matches the
 *      beginning of at least one other key. Save the token number and read
 *      pointer for this key in case we do not get a complete match on
 *      the longer keys.
 *    accept key
 *      The string in hand is a key, and there are no others which match
 *      partially.
 *    end key
 *      The string in hand is the beginning of a key, there are no other
 *      matches, and it is only necessary to see if we get a match for the
 *      remainder of the key.
 *    no match key
 *      The string in hand does not match any key but there is a
 *      substring which matched. Go return it.
 */

typedef enum {
  accept_key,
  set_key,
  jmp_key,
  end_key,
  no_match_key,
  cf_accept_key,
  cf_set_key,
  cf_end_key
} key_words;


static int build_key_table_state(char *sp[], int *tkn, unsigned ns) {
  int n;
  int nw = key_work_table->nt;
  unsigned i = 0, j, k;
  int ch, act, parm = 0, jmp = 0;

  check_tsd(key_work_table);
  while (i < ns) {
    ch = *sp[i]++;
    parm = jmp = 0;
    if (ch == 0) {
      continue;
    }
    for (j = i+1, k = 0; j < ns && *sp[j] == ch;) {
      sp[j++]++;
      k++;
    }
    if (*sp[i] == 0) {
      parm = tkn[i];
      if (k == 0) {
         act = accept_key;
      }
      else {
        act = set_key;
        jmp = build_key_table_state(&sp[i+1], &tkn[i+1], k);
      }
    }
    else if (k == 0) {
      act = end_key;
      parm = tkn[i];
      jmp = n_ends;
      ass(sp[i]);
      acs(0);
      n_ends += strlen(sp[i]) + 1;
    }
    else {
      act = jmp_key;
      jmp = build_key_table_state(&sp[i], &tkn[i], k+1);
    }
    at(key_work_table, ch, act, parm, jmp);
    i = j;
  }
  n = key_table->nt;
  k = nw;
  while (k < key_work_table->nt) {
    xtxf(key_work_table, k, &ch, &act, &parm, &jmp);
    at(key_table, ch, act, parm, jmp);
    k++;
  }
  at(key_table, 255, no_match_key, 0, 0);
  key_work_table->nt = nw;
  return n;
}

static int build_key_table(char *sp[], int tkn[], int ns) {
  int i, j;
  char *p;
  int t;
  LOGSECTION("build_key_table");

  for (i = 0; i < ns; i++) {
    for (j = i+1; j < ns; j++) {
      if (strcmp(sp[j], sp[i]) < 0) {
        p = sp[i];
        sp[i] = sp[j];
        sp[j] = p;
        t = tkn[i];
        tkn[i] = tkn[j];
        tkn[j] = t;
      }
    }
  }
  return build_key_table_state(sp, tkn, ns);
}

void build_key_tables(void) {
  unsigned i;
  unsigned nki = key_list_dict->nsx;
  //int *ki;
  extern char *key_ends;
  extern unsigned n_key_ends;
  extern unsigned nstates;

  LOGSECTION("build_key_tables");

  if (Keyword::count() <= 1) {
    return;
  }
  //ki = local_array(nki, int);
  LocalArray<int> ki(nki);
  memset(ki, 0, nki*sizeof(*ki));

  key_table = init_tsd(4);
  key_work_table = init_tsd(4);
  at(key_table, 0, 0, 0, 0);
  ics();
  n_ends = 0;
  ki[0] = 0;

  for (i = 1; i < key_list_dict->nsx; i++) {
    int *lb = dict_str(key_list_dict, i);
    int ns = *lb++ - 1;
    char **strings = ALLOCATE(ns, char *);
    int *tokens = ALLOCATE(ns, int);
    int j;

    for (j = 0; j < ns; j++) {
      Keyword key = map_token_number[lb[j]].key;
      strings[j] = key->string.pointer();
      tokens[j] = lb[j];
    }
    ki[i] = build_key_table(strings, tokens, ns);
    DEALLOCATE(strings);
    DEALLOCATE(tokens);
  }
  delete_tsd(key_work_table);
  n_key_ends = tis();
  key_ends = build_string();
  for (i = 0; i < nstates; i++) {
    state_number_map *sp = &map_state_number[i];
    int k = sp->key_list;

    if (k == 0) {
      continue;
    }
    sp->key_index = (unsigned short) ki[k];
  }
}

void check_key_reserve(void) {
  int n_keys = Keyword::count();
  int k = distinguishSets.size();

  if (n_keys <= 1 || k == 0) {
    return;
  }
  while (k--) {
    int pt = distinguishSets[k];
    CharBitmap bitmap = map_parse_tree[pt].expression->bitmap();
    for (Each<Keyword> keyword; keyword.loopNotFinished(); keyword.getNext()) {
      KeywordDescriptor &keywordDescriptor(keyword);
      //unsigned char *str = (unsigned char *) keyword->string.pointer();
      unsigned char *str = (unsigned char *) keywordDescriptor.string.pointer();
      //if (keyword->reserve) continue;
      if (keywordDescriptor.reserve) {
	continue;
      }
      while (*str && bitmap.getBit(*str)) {
	str++;
      }
      if (*str) {
	continue;
      }
      //keyword->reserve = pt;
      keywordDescriptor.reserve = pt;
    }
  }
}


void append_key(int kn) {
  unsigned char *s = (unsigned char *) Keyword(kn)->string.pointer();
  while (*s) {
    append_ascii_char(*s++);
  }
}

//int keyword_problem(unsigned sn,const unsigned char *p, int key){
//int keyword_problem(unsigned sn,const unsigned char *p, Keyword key){
//int keyword_problem(unsigned sn,const AgArray<int> input, Keyword key){
int keyword_problem(unsigned sn, const AgArray<int> input, Keyword) {
  LOGSECTION("keyword_problem");

  AgStack<int> tokens;

  // Get a path to this state
  tokensTo(sn, tokens);

#ifdef INCLUDE_LOGGING
  {
    for (int i = 0; i < tokens.size(); i++) {
      LOGV(tokens[i]);
    }
  }
#endif

  FtParser parser;

  // Advance to current state
  while (tokens.size()) {
    parser.parseToken(tokens.pop());
  }

/*
  LOGV(key);
  // Disable keyword under test
  parser.ignoreKeyword(key);
  parser.parse((const char *) p);
  traceCounts.discardData();
  if (parser.processState < FtParser::syntaxError) {
    return 1;
  }
  return -1;
*/

  int n = input.size();
  int flag = 1;
  for (int i = 0; flag == 1 && i < n; i++) {
    parser.parseToken(input[i]);
    if (parser.processState > FtParser::running) {
      flag = -1;
    }
  }
  traceCounts.discardData();
  return flag;
}

Keyword::Keyword()
  : KeyedObjectRegister<KeywordDescriptor>()
{}

Keyword::Keyword(unsigned x)
  : KeyedObjectRegister<KeywordDescriptor>(x)
{}

Keyword::Keyword(const Keyword &t)
  : KeyedObjectRegister<KeywordDescriptor>(t)
{}

void Keyword::operator = (const Keyword &t) {
  KeyedObjectRegister<KeywordDescriptor>::operator = (t);
}

Keyword::Keyword(const char *x)
  : KeyedObjectRegister<KeywordDescriptor>(KeywordDescriptor(x))
{}

void Keyword::reset() {
  LOGSECTION("Keyword::reset");
  KeyedObjectRegister<KeywordDescriptor>::reset();
  Keyword("");
  AgString string = Keyword((unsigned) 0)->string;
  LOGV(string.pointer());
  LOGV(descriptorList.size()) LCV(Keyword((unsigned) 0)->string);
  LOGS("reset complete");
}

Keyword Keyword::sorted(int x) {
  LOGSECTION("Keyword::sorted");
  LOGV(x);
  Keyword keyword = tree[x];
  LOGV(keyword) LCV(keyword->string);
  return keyword;
}

Keyword Keyword::create() {
  LOGSECTION("Keyword::create");
  tss();
  LOGV(string_base);
  Keyword keyword(string_base);
  rcs();
  LOGV(keyword);
  return keyword;
}

Keyword add_string_dict(const char *s, Keyword::Dict &d) {
  return d.add_string(s);
}

//Keyword::Map map_token_name;

KeywordDescriptor &Keyword::Map::operator [] (unsigned x) {
  LOGSECTION("Keyword::Map::operator []");
  LOGV(x) LCV(descriptorList.size());
  assert(x < Keyword::descriptorList.size());
  return Keyword::descriptorList[x];
}

Keyword Keyword::Dict::add_string(const char *s) {
  return Keyword(s);
}

KeywordDescriptor::KeywordDescriptor()
  : reserve(0)
{
  // Nothing to do
}

KeywordDescriptor::KeywordDescriptor(const char *s)
  : string(s), reserve(0)
{
  // Nothing to do
}

int KeywordDescriptor::operator < (const KeywordDescriptor &d) const {
  return string < d.string;
}

const int Keyword::first = 1;