Mercurial > ~dholland > hg > ag > index.cgi
diff anagram/agcore/cs.cpp @ 0:13d2b8934445
Import AnaGram (near-)release tree into Mercurial.
author | David A. Holland |
---|---|
date | Sat, 22 Dec 2007 17:52:45 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/anagram/agcore/cs.cpp Sat Dec 22 17:52:45 2007 -0500 @@ -0,0 +1,615 @@ +/* + * AnaGram, A System for Syntax Directed Programming + * Copyright 1993-2002 Parsifal Software. All Rights Reserved. + * See the file COPYING for license and usage terms. + * + * cs.cpp + */ + +#include "arrays.h" +#include "assert.h" +#include "bpu.h" +#include "config.h" +#include "cs.h" +#include "csexp.h" +#include "dict.h" +#include "error.h" +#include "myalloc.h" +#include "q1glbl.h" +#include "rpk.h" +#include "rule.h" +#include "stacks.h" +#include "symbol.h" +#include "token.h" +#include "tree.h" +#include "tsd.h" +#include "ut.h" + +//#define INCLUDE_LOGGING +#include "log.h" + + +static int char_set_error; + +static void build_partition_sets(list_dict *d); +static void partition_set(int ts, list_dict *d, int *ps, char *fa); + + +void set_universe(void) { + //unsigned tn; + + LOGSECTION("set_universe"); + LOGV(ntkns); + LOGV(nforms); + //min_char_number = 0; + max_char_number = -1; +/* + for (int ptIndex = 0; ptIndex++ < ntrees;) { + map_parse_tree[ptIndex].expression->checkRange(); + } +*/ + for (ParseTree pt; pt.next(); ) { + pt->expression->checkRange(); + } + CharSetExpression::checkMinimum(min_char_number); + if (max_char_number > 0x10000) { + max_char_number = 0x10000; + } + LOGV(min_char_number) LCV(max_char_number); + + //for (tn = 0; tn++ < ntkns; ) + for (Each<Token> token; token.loopNotFinished(); token.getNext()) { + //token_number_map *tp = &map_token_number[tn]; + //Token token(tn); + //int pt = tp->parse_tree; + ParseTree tree = token->parse_tree; + //parse_tree_map *ptp; + //int alias, name; + + //if (pt) continue; + if (tree.isNotNull()) { + continue; + } + Symbol alias = token->token_name; + if (alias.isNull()) { + continue; + } + //pt = map_token_name[alias].parse_tree; + tree = alias->parse_tree; + if (tree.isNull()) { + continue; + } + //ptp = &map_parse_tree[pt]; + + CharSetExpression *exp = tree->expression; + if (exp && exp->nameNode()) { + Symbol name = ((NamedCharSet *)exp)->name; + Token referencedToken = name->token_number; + if (referencedToken.isNull()) { + continue; + } + shell_production(token, referencedToken); + tree->token_number = token; + } + } + + // if (error_token == 0) { + // error_token = find_token_number("error"); + // } + if (grammar_token == 0) { + grammar_token = find_token_number("grammar"); + } + if (eof_token == 0) { + eof_token = find_token_number("eof"); + if (eof_token == 0) { + //int id = identify_string("eof", tkn_dict); + Symbol id("eof"); + if (id && (auto_resynch || error_token)) { + eof_token = id_token(id); + } + } + } + nforms_base = nforms; + if (min_char_number > max_char_number) { + return; + } + if (max_char_number < 255) { + max_char_number = 255; + } + n_chars = max_char_number - min_char_number + 1; + check_size(map_char_number, n_chars, n_chars); + LOGV(ntkns); + LOGV(nforms); +} + +static unsigned ntc_size; + +void build_sets(void) { + int k; + unsigned i; + int id; + char ts[100]; + int *ntc; + unsigned ntk = ntkns; + + if (charRangeDiagnostic || min_char_number > max_char_number) { + return; + } + + LOGSECTION("build_sets"); + LOGV(ntkns) LCV(nforms) LCV(error_token); + /* + * count the number of named tokens that point to a given parse tree. + * If there is only one such named token, it can be assigned to the + * parse tree. Otherwise, the parse tree needs its own token, and + * the named tokens have to have shells created. + */ + //ntc_size = ntrees+1; + ntc_size = ParseTree::count(); + ZALLOCATE_ST(ntc, ntc_size); + LOGV(ntc_size); + ParseTree tree; + LOGS("Scanning trees"); + for (tree = ParseTree(); tree.next(); ) { + int name = tree->token_name; + if (name && tree->expression->char_node()) { + int value = ((IndividualCode *)tree->expression)->value; + assert(value >= min_char_number); + LOGV(value) LCV(min_char_number) LCV(name); + map_char_number[value - min_char_number].token_name = name; + } + } + LOGS("finished tree loop"); + for (i = 0; i++ < ntkns; ) { + token_number_map *tp = &map_token_number[i]; + int name = tp->token_name; + unsigned pt; + + if (tp->non_terminal_flag) { + continue; + } + if (name == 0) { + continue; + } + pt = map_token_name[name].parse_tree; + LOGV(pt) LCV(ParseTree(pt)->token_number); + LOGV((int) map_parse_tree[pt].expression); + if (map_parse_tree[pt].token_number) { + continue; + } + assert(pt < ntc_size); + ntc[pt]++; + } + LOGS("finished token loop"); + + /* + * Now check those trees that have more than one named token. Create + * a specific token for the tree. + */ + + for (tree = ParseTree(); tree.next();) { + int tn; + + if (ntc[tree] <= 1) { + ntc[tree] = 0; + continue; + } + tn = tree->token_number; + if (tn == 0) { + if (tree->expression->nameNode()) { + int name = ((NamedCharSet *)tree->expression)->name; + //LOGV(netAllocations()); + tn = id_token(name); + Token(tn)->value_type = default_input_type; + } + else { + tn = form_element_1(tree->expression); + } + } + ntc[tree] = tn; + } + LOGS("trees scanned again"); + + /* + * Now check the named terminals again and either make up the + * tree-token links or build shell productions + */ + + for (i = 0; i++ < ntk;) { + token_number_map *tp = &map_token_number[i]; + int name = tp->token_name; + int pt; + int tn; + parse_tree_map *ptp; + + if (tp->non_terminal_flag) { + continue; + } + if (name == 0) { + continue; + } + pt = map_token_name[name].parse_tree; + if (pt == 0) { + continue; + } + ptp = &map_parse_tree[pt]; + tn = ptp->token_number; + if (tn == 0) { + tn = ntc[pt]; + } + if (tn == 0) { + ptp->token_number = i; + continue; + } + shell_production(i, tn); + } + DEALLOCATE(ntc); + LOGS("tokens scanned again"); + nforms_base = nforms; + + LOGV(ntkns) LCV(nforms); + LOGV(error_token); + for (i = 0; i++ < ntkns;) { + token_number_map *tp = &map_token_number[i]; + int nn; + if (tp->non_terminal_flag || + (int) i == error_token || + tp->key || + tp->token_set_id) { + continue; + } + nn = tp->parse_tree; + LOGV(i) LCV(nn); + if (nn == 0 && (k = tp->token_name) != 0) { + nn = map_token_name[k].parse_tree; + } + LOGV(nn); + if (nn == 0) { + char_set_error = 5; + } + else { + tp->token_set_id = build_char_set(nn); + } + if (char_set_error == 0) { + continue; + } + + id = tp->token_name; + if (id) { + sprintf(ts," = %s", Symbol(id)->string.pointer()); + } + else { + ts[0] = 0; + } + switch (char_set_error) { + case 1: + ssprintf("Error defining T%03d: ",i); + break; + case 5: + ssprintf("Undefined token T%03d: ",i); + break; + } + atkn(i); + log_error(); + } + build_partition_sets(char_set_dict); + LOGV(ntkns); + LOGV(nforms); +} + +static void build_partition_sets(list_dict *d) { + LOGSECTION("build_partition_sets"); + int i; + unsigned j, n; + int *db; + unsigned *ndx; + int *lb; + int m; + //int *ps; + int ts; + unsigned ntk; + //char *fa; + unsigned *fl; + int nf; + unsigned cn; + int *list; + unsigned pn,nc; + unsigned tn; + + if (max_char_number == -1 && min_char_number == 0) { + return; + } + //ps = local_array(n_chars, int); + LocalArray<int> ps(n_chars); + n = d->nsx; + db = d->text; + ndx = d->ndx; + //set = local_array(n, int *); + LocalArray<int *> set(n); + //k = local_array(n, int); + LocalArray<int> k(n); + //l = local_array(n, int); + LocalArray<int> l(n); + for (i = 1; (unsigned) i < n; i++) { + set[i] = (db + ndx[i]); + l[i] = *(set[i]++) - 1; + k[i] = 0; + } + + /* + * For each character in the character range, scan all the character + * sets. Make a list of the sets to which this particular character + * belongs. This list represents a unique partition of the character + * range. Use a dictionary to give each unique partition an + * identifying number. Map each character to the partition to which + * it belongs. + */ + + LOGS("Scanning characters"); + for (i = min_char_number; i <= max_char_number; i++) { + int ix = i - min_char_number; + iws(); + for (j = 1; j < n; j++) { + while (k[j] < l[j] && set[j][k[j]] < i) { + k[j]++; + } + if (k[j] == l[j] || set[j][k[j]] > i) { + continue; + } + k[j]++; + aws(j); + } + lb = list_base; + m = rws(); + map_char_number[ix].part_number = ps[ix] = add_list_dict(lb,m,part_dict); + LOGV(ix) LCV(ps[ix]); + } + + /* + * Scan each partition. For each partition, make a list of all the + * characters that belong to this partition. + */ + + LOGS("Scanning partitions"); + m = part_dict->nsx; + check_size(map_part_number, m, m); + for (i = 0; (unsigned) i < part_dict->nsx; i++) { + iws(); + for (j = 0; j < (unsigned) n_chars; j++) { + if ((unsigned) i == map_char_number[j].part_number) { + aws(j + min_char_number); + } + } + map_part_number[i].chars_index = store_list(chars_list); + fl = lstptr(map_part_number[i],chars); + m = map_part_number[i].size = rws(); +#ifdef INCLUDE_LOGGING + if ((unsigned)(*fl - min_char_number) > + (max_char_number - min_char_number)) { + LOGV(*fl) LCV(min_char_number) LCV(max_char_number); + } +#endif + assert((unsigned)(*fl - min_char_number) <= + (unsigned) (max_char_number - min_char_number)); + if (m == 1 + && (j = map_char_number[*fl - min_char_number].token_number) != 0) { + map_part_number[i].token_number = j; + } + LOGV(*fl) LCV(min_char_number) LCV(i) LCV(j); + } + + m = part_dict->nsx; + //fa = local_array(m, char); + LocalArray<char> fa(m); + m = char_set_dict->nsx; + check_size(map_char_set,m,m); + for (i = 1; i<m; i++) { + char_set_map *csp = &map_char_set[i]; + partition_set(i, char_set_dict, ps, fa); + csp->part_index = store_list(part_list); + csp->nparts = rws(); + } + + + ntk = ntkns+1; + //assert(ok_index(map_token_number, ntkns)); + m = part_dict->nsx; + + LOGS("Scanning tokens"); + for (i = 1; (unsigned) i < ntk; i++) { + part_number_map *ppn; + unsigned tn; + //int ctn = i; + Token ctn(i); + unsigned fn; + ts = map_token_number[i].token_set_id; + if (ts == 0) { + continue; + } + nf = map_char_set[ts].nparts; + if (nf != 1) { + continue; + } + fl = lstptr(map_char_set[ts], part); + ppn = &map_part_number[pn = *fl]; + assert(ok_index(map_part_number, pn)); + list = (int *) lstptr(*ppn, chars); + //assert (pn > 0 && pn < m); + assert(pn < (unsigned) m); + nc = ppn->size; + tn = ppn->token_number; + if (tn == 0) { + while (nc--) { + cn = *list++ - min_char_number; + assert(cn < n_chars); + map_char_number[cn].token_number = i; + LOGV(cn) LCV(i); + } + assert(pn < part_dict->nsx); + ppn->token_number = i; + map_token_number[i].part_number = pn; + continue; + } + if (tn < ntk) { + //ntkns++; + //check_size(map_token_number, ntkns, ntkns); + //ctn = ntkns; + ctn = Token::create(); + ctn->value_type = default_input_type; + //pf1x(ctn); + //fn = form2(); + fn = makeRule(ctn); + at(bnf_table, tn, fn); + Rule(fn)->prim_tkn = tn; + ppn->token_number = ctn; + Token(tn)->operatorCandidate = ctn->operatorCandidate = 1; + while (nc--) { + cn = *list++ - min_char_number; + assert(cn < n_chars); + map_char_number[cn].token_number = ctn; + LOGV(cn) LCV(ctn); + } + map_token_number[ctn].part_number = pn; + map_token_number[ctn].value_type = default_input_type; + map_token_number[tn].value_type = default_input_type; + map_token_number[tn].part_number = 0; + map_token_number[tn].non_terminal_flag = 1; + tn = ctn; + } + //pf1x(tn); + //fn = form2(); + fn = makeRule(tn); + LOGV(i) LCV(fn) LCV(tn); + at(bnf_table, i, fn); + Rule(fn)->prim_tkn = i; + map_token_number[i].non_terminal_flag = 1; + ctn = tn; + } + for (i = 1; (unsigned) i < ntk; i++) { + token_number_map *mti = &map_token_number[i]; + ts = mti->token_set_id; + if (ts == 0) { + continue; + } + nf = map_char_set[ts].nparts; + if (nf <= 1) { + continue; + } + fl = lstptr(map_char_set[ts], part); + iws(); + j = 0; + while (j < (unsigned) nf) { + + pn = fl[j]; + assert (pn > 0 && pn < (unsigned) m); + tn = map_part_number[pn].token_number; + if (tn == 0) { + //ntkns++; + //check_size(map_token_number,ntkns,ntkns); + list = (int *) lstptr(map_part_number[pn], chars); + nc = map_part_number[pn].size; + tn = Token::create(); + while (nc--) { + cn = *list++ - min_char_number; + assert(cn < n_chars); + map_char_number[cn].token_number = tn; + LOGV(cn) LCV(tn); + } + //tn = ntkns; + assert(pn < part_dict->nsx); + map_part_number[pn].token_number = tn; + map_token_number[tn].part_number = pn; + map_token_number[tn].value_type = default_input_type; + map_token_number[tn].operatorCandidate = 1; + } + //pf1x(tn); + //aws(form2()); + aws(makeRule(tn)); + j++; + } + fl = (unsigned *) list_base; + nf = rws(); + map_token_number[i].non_terminal_flag = 1; + for (j = 0; j < (unsigned) nf; j++) { + LOGV(i) LCV(fl[j]); + at(bnf_table, i, fl[j]); + //assert(ok_index(map_form_number, fl[j])); + Rule rule(fl[j]); + rule->prim_tkn = i; + } + } +} + +static void partition_set(int ts, list_dict *d, int *ps, char *fa) { + int *list; + int n, np, i; + + np = part_dict->nsx; + memset(fa, 0, np); + list = d->text + d->ndx[ts]; + n = *(list++) - 1; + for (i = 0; i<n; i++) { + fa[ps[list[i] - min_char_number]] = 1; + } + iws(); + for (i = 0; i < np; i++) { + if (fa[i]) { + aws(i); + } + } +} + +int build_char_set(int nn) { + LOGSECTION("build_char_set"); + //char *v; + //int i, k; + int cs = map_parse_tree[nn].char_set; + LOGV(cs) LCV(nn); + + if (cs) { + return cs; + } + char_set_error = 0; + LOGV(n_chars); + + //int *list = local_array(n_chars, int); + LocalArray<int> list(n_chars); + CharBitmap bitmap = map_parse_tree[nn].expression->bitmap(); + if (!case_sensitive) { + bitmap.blurCase(); + } + int count = bitmap.list(list); + LOGV(count); + cs = add_list_dict(list, count, char_set_dict); + LOGV(cs); + check_size(map_char_set, cs, cs); + map_char_set[cs].parse_tree = nn; + map_parse_tree[nn].char_set = cs; + return cs; +} + +static void log_undef_sym(int tn) { + ssprintf("Undefined symbol: %s", Symbol(tn)->string.pointer()); + log_error(); +} + +int getParseTree(int name) { + LOGSECTION("getParseTree"); + int pt = map_token_name[name].parse_tree; + LOGV(pt); + if (pt) { + return pt; + } + + int tn = map_token_name[name].token_number; + if (tn) { + pt = map_token_number[tn].parse_tree; + } + LOGV(tn) LCV(pt); + if (pt) { + return pt; + } + + log_undef_sym(name); + return 0; +}