Mercurial > ~dholland > hg > ag > index.cgi
view anagram/agcore/keyword.cpp @ 12:aab9ff6af791
Strengthen the build hack for non-DOS targets.
author | David A. Holland |
---|---|
date | Tue, 31 May 2022 00:58:42 -0400 |
parents | 607e3be6bad8 |
children |
line wrap: on
line source
/* * AnaGram, A System for Syntax Directed Programming * Copyright 1993-2002 Parsifal Software. All Rights Reserved. * See the file COPYING for license and usage terms. * * keyword.cpp */ #include "arrays.h" #include "bitmap.h" #include "bpe3.h" #include "csexp.h" #include "dict.h" #include "ftpar.h" #include "keyword.h" #include "myalloc.h" #include "q1glbl.h" #include "stacks.h" #include "tsd.h" #include "ut.h" //#define INCLUDE_LOGGING #include "log.h" static tsd *key_work_table; static int n_ends = 0; static void tokensTo(unsigned sn, AgStack<int> &tokenStack) { LOGSECTION("tokensTo"); AgBalancedTree<int> tree; while (sn) { tree.insert(sn); state_number_map *sp = &map_state_number[sn]; unsigned *list = lstptr(*sp, previous_states); int n = sp->n_previous_states; LOGV(sn) LCV(sp->n_previous_states); while (n--) { LOGV(n) LCV(*list) LCV(map_state_number[*list].char_token); while (tree.includes(*list)) { list++; } int k = map_state_number[*list].n_actions; while (k--) { if (lstptr(map_state_number[*list], p_actions)[k] != sn) { continue; } if (lstptr(map_state_number[*list], a_actions)[k] != pe_go_to) { continue; } break; } assert(k >= 0); tokenStack.push(lstptr(map_state_number[*list], t_actions)[k]); sn = *list; LOGV(n) LCV(sn); break; } assert(n >= 0); } } /* * Each time it is necessary for the parser to get a new token, it * begins by checking the key_word_index table indexed by the current * state. If the entry is zero, there are no keywords to be identified. * If the entry is non_zero, the entry identifies a location in the * key_word identification table. The id_key_word function is then * invoked to determine whether there is a keyword present. * * The id_key_word function is a table driven, finite state automaton. * The actions are as follows: * jump to next state * The string in hand is not itself a key, but matches the * beginning of one or more keys. * set key and jump to next state * The string in hand might be a key, but it also matches the * beginning of at least one other key. Save the token number and read * pointer for this key in case we do not get a complete match on * the longer keys. * accept key * The string in hand is a key, and there are no others which match * partially. * end key * The string in hand is the beginning of a key, there are no other * matches, and it is only necessary to see if we get a match for the * remainder of the key. * no match key * The string in hand does not match any key but there is a * substring which matched. Go return it. */ typedef enum { accept_key, set_key, jmp_key, end_key, no_match_key, cf_accept_key, cf_set_key, cf_end_key } key_words; static int build_key_table_state(char *sp[], int *tkn, unsigned ns) { int n; int nw = key_work_table->nt; unsigned i = 0, j, k; int ch, act, parm = 0, jmp = 0; check_tsd(key_work_table); while (i < ns) { ch = *sp[i]++; parm = jmp = 0; if (ch == 0) { continue; } for (j = i+1, k = 0; j < ns && *sp[j] == ch;) { sp[j++]++; k++; } if (*sp[i] == 0) { parm = tkn[i]; if (k == 0) { act = accept_key; } else { act = set_key; jmp = build_key_table_state(&sp[i+1], &tkn[i+1], k); } } else if (k == 0) { act = end_key; parm = tkn[i]; jmp = n_ends; ass(sp[i]); acs(0); n_ends += strlen(sp[i]) + 1; } else { act = jmp_key; jmp = build_key_table_state(&sp[i], &tkn[i], k+1); } at(key_work_table, ch, act, parm, jmp); i = j; } n = key_table->nt; k = nw; while (k < key_work_table->nt) { xtxf(key_work_table, k, &ch, &act, &parm, &jmp); at(key_table, ch, act, parm, jmp); k++; } at(key_table, 255, no_match_key, 0, 0); key_work_table->nt = nw; return n; } static int build_key_table(char *sp[], int tkn[], int ns) { int i, j; char *p; int t; LOGSECTION("build_key_table"); for (i = 0; i < ns; i++) { for (j = i+1; j < ns; j++) { if (strcmp(sp[j], sp[i]) < 0) { p = sp[i]; sp[i] = sp[j]; sp[j] = p; t = tkn[i]; tkn[i] = tkn[j]; tkn[j] = t; } } } return build_key_table_state(sp, tkn, ns); } void build_key_tables(void) { unsigned i; unsigned nki = key_list_dict->nsx; //int *ki; extern char *key_ends; extern unsigned n_key_ends; extern unsigned nstates; LOGSECTION("build_key_tables"); if (Keyword::count() <= 1) { return; } //ki = local_array(nki, int); LocalArray<int> ki(nki); memset(ki, 0, nki*sizeof(*ki)); key_table = init_tsd(4); key_work_table = init_tsd(4); at(key_table, 0, 0, 0, 0); ics(); n_ends = 0; ki[0] = 0; for (i = 1; i < key_list_dict->nsx; i++) { int *lb = dict_str(key_list_dict, i); int ns = *lb++ - 1; char **strings = ALLOCATE(ns, char *); int *tokens = ALLOCATE(ns, int); int j; for (j = 0; j < ns; j++) { Keyword key = map_token_number[lb[j]].key; strings[j] = key->string.pointer(); tokens[j] = lb[j]; } ki[i] = build_key_table(strings, tokens, ns); DEALLOCATE(strings); DEALLOCATE(tokens); } delete_tsd(key_work_table); n_key_ends = tis(); key_ends = build_string(); for (i = 0; i < nstates; i++) { state_number_map *sp = &map_state_number[i]; int k = sp->key_list; if (k == 0) { continue; } sp->key_index = (unsigned short) ki[k]; } } void check_key_reserve(void) { int n_keys = Keyword::count(); int k = distinguishSets.size(); if (n_keys <= 1 || k == 0) { return; } while (k--) { int pt = distinguishSets[k]; CharBitmap bitmap = map_parse_tree[pt].expression->bitmap(); for (Each<Keyword> keyword; keyword.loopNotFinished(); keyword.getNext()) { KeywordDescriptor &keywordDescriptor(keyword); //unsigned char *str = (unsigned char *) keyword->string.pointer(); unsigned char *str = (unsigned char *) keywordDescriptor.string.pointer(); //if (keyword->reserve) continue; if (keywordDescriptor.reserve) { continue; } while (*str && bitmap.getBit(*str)) { str++; } if (*str) { continue; } //keyword->reserve = pt; keywordDescriptor.reserve = pt; } } } void append_key(int kn) { unsigned char *s = (unsigned char *) Keyword(kn)->string.pointer(); while (*s) { append_ascii_char(*s++); } } //int keyword_problem(unsigned sn,const unsigned char *p, int key){ //int keyword_problem(unsigned sn,const unsigned char *p, Keyword key){ //int keyword_problem(unsigned sn,const AgArray<int> input, Keyword key){ int keyword_problem(unsigned sn, const AgArray<int> input, Keyword) { LOGSECTION("keyword_problem"); AgStack<int> tokens; // Get a path to this state tokensTo(sn, tokens); #ifdef INCLUDE_LOGGING { for (int i = 0; i < tokens.size(); i++) { LOGV(tokens[i]); } } #endif FtParser parser; // Advance to current state while (tokens.size()) { parser.parseToken(tokens.pop()); } /* LOGV(key); // Disable keyword under test parser.ignoreKeyword(key); parser.parse((const char *) p); traceCounts.discardData(); if (parser.processState < FtParser::syntaxError) { return 1; } return -1; */ int n = input.size(); int flag = 1; for (int i = 0; flag == 1 && i < n; i++) { parser.parseToken(input[i]); if (parser.processState > FtParser::running) { flag = -1; } } traceCounts.discardData(); return flag; } Keyword::Keyword() : KeyedObjectRegister<KeywordDescriptor>() {} Keyword::Keyword(unsigned x) : KeyedObjectRegister<KeywordDescriptor>(x) {} Keyword::Keyword(const Keyword &t) : KeyedObjectRegister<KeywordDescriptor>(t) {} void Keyword::operator = (const Keyword &t) { KeyedObjectRegister<KeywordDescriptor>::operator = (t); } Keyword::Keyword(const char *x) : KeyedObjectRegister<KeywordDescriptor>(KeywordDescriptor(x)) {} void Keyword::reset() { LOGSECTION("Keyword::reset"); KeyedObjectRegister<KeywordDescriptor>::reset(); Keyword(""); AgString string = Keyword((unsigned) 0)->string; LOGV(string.pointer()); LOGV(descriptorList.size()) LCV(Keyword((unsigned) 0)->string); LOGS("reset complete"); } Keyword Keyword::sorted(int x) { LOGSECTION("Keyword::sorted"); LOGV(x); Keyword keyword = tree[x]; LOGV(keyword) LCV(keyword->string); return keyword; } Keyword Keyword::create() { LOGSECTION("Keyword::create"); tss(); LOGV(string_base); Keyword keyword(string_base); rcs(); LOGV(keyword); return keyword; } Keyword add_string_dict(const char *s, Keyword::Dict &d) { return d.add_string(s); } //Keyword::Map map_token_name; KeywordDescriptor &Keyword::Map::operator [] (unsigned x) { LOGSECTION("Keyword::Map::operator []"); LOGV(x) LCV(descriptorList.size()); assert(x < Keyword::descriptorList.size()); return Keyword::descriptorList[x]; } Keyword Keyword::Dict::add_string(const char *s) { return Keyword(s); } KeywordDescriptor::KeywordDescriptor() : reserve(0) { // Nothing to do } KeywordDescriptor::KeywordDescriptor(const char *s) : string(s), reserve(0) { // Nothing to do } int KeywordDescriptor::operator < (const KeywordDescriptor &d) const { return string < d.string; } const int Keyword::first = 1;