Mercurial > ~dholland > hg > ag > index.cgi
view anagram/agcore/cs.cpp @ 0:13d2b8934445
Import AnaGram (near-)release tree into Mercurial.
author | David A. Holland |
---|---|
date | Sat, 22 Dec 2007 17:52:45 -0500 |
parents | |
children |
line wrap: on
line source
/* * AnaGram, A System for Syntax Directed Programming * Copyright 1993-2002 Parsifal Software. All Rights Reserved. * See the file COPYING for license and usage terms. * * cs.cpp */ #include "arrays.h" #include "assert.h" #include "bpu.h" #include "config.h" #include "cs.h" #include "csexp.h" #include "dict.h" #include "error.h" #include "myalloc.h" #include "q1glbl.h" #include "rpk.h" #include "rule.h" #include "stacks.h" #include "symbol.h" #include "token.h" #include "tree.h" #include "tsd.h" #include "ut.h" //#define INCLUDE_LOGGING #include "log.h" static int char_set_error; static void build_partition_sets(list_dict *d); static void partition_set(int ts, list_dict *d, int *ps, char *fa); void set_universe(void) { //unsigned tn; LOGSECTION("set_universe"); LOGV(ntkns); LOGV(nforms); //min_char_number = 0; max_char_number = -1; /* for (int ptIndex = 0; ptIndex++ < ntrees;) { map_parse_tree[ptIndex].expression->checkRange(); } */ for (ParseTree pt; pt.next(); ) { pt->expression->checkRange(); } CharSetExpression::checkMinimum(min_char_number); if (max_char_number > 0x10000) { max_char_number = 0x10000; } LOGV(min_char_number) LCV(max_char_number); //for (tn = 0; tn++ < ntkns; ) for (Each<Token> token; token.loopNotFinished(); token.getNext()) { //token_number_map *tp = &map_token_number[tn]; //Token token(tn); //int pt = tp->parse_tree; ParseTree tree = token->parse_tree; //parse_tree_map *ptp; //int alias, name; //if (pt) continue; if (tree.isNotNull()) { continue; } Symbol alias = token->token_name; if (alias.isNull()) { continue; } //pt = map_token_name[alias].parse_tree; tree = alias->parse_tree; if (tree.isNull()) { continue; } //ptp = &map_parse_tree[pt]; CharSetExpression *exp = tree->expression; if (exp && exp->nameNode()) { Symbol name = ((NamedCharSet *)exp)->name; Token referencedToken = name->token_number; if (referencedToken.isNull()) { continue; } shell_production(token, referencedToken); tree->token_number = token; } } // if (error_token == 0) { // error_token = find_token_number("error"); // } if (grammar_token == 0) { grammar_token = find_token_number("grammar"); } if (eof_token == 0) { eof_token = find_token_number("eof"); if (eof_token == 0) { //int id = identify_string("eof", tkn_dict); Symbol id("eof"); if (id && (auto_resynch || error_token)) { eof_token = id_token(id); } } } nforms_base = nforms; if (min_char_number > max_char_number) { return; } if (max_char_number < 255) { max_char_number = 255; } n_chars = max_char_number - min_char_number + 1; check_size(map_char_number, n_chars, n_chars); LOGV(ntkns); LOGV(nforms); } static unsigned ntc_size; void build_sets(void) { int k; unsigned i; int id; char ts[100]; int *ntc; unsigned ntk = ntkns; if (charRangeDiagnostic || min_char_number > max_char_number) { return; } LOGSECTION("build_sets"); LOGV(ntkns) LCV(nforms) LCV(error_token); /* * count the number of named tokens that point to a given parse tree. * If there is only one such named token, it can be assigned to the * parse tree. Otherwise, the parse tree needs its own token, and * the named tokens have to have shells created. */ //ntc_size = ntrees+1; ntc_size = ParseTree::count(); ZALLOCATE_ST(ntc, ntc_size); LOGV(ntc_size); ParseTree tree; LOGS("Scanning trees"); for (tree = ParseTree(); tree.next(); ) { int name = tree->token_name; if (name && tree->expression->char_node()) { int value = ((IndividualCode *)tree->expression)->value; assert(value >= min_char_number); LOGV(value) LCV(min_char_number) LCV(name); map_char_number[value - min_char_number].token_name = name; } } LOGS("finished tree loop"); for (i = 0; i++ < ntkns; ) { token_number_map *tp = &map_token_number[i]; int name = tp->token_name; unsigned pt; if (tp->non_terminal_flag) { continue; } if (name == 0) { continue; } pt = map_token_name[name].parse_tree; LOGV(pt) LCV(ParseTree(pt)->token_number); LOGV((int) map_parse_tree[pt].expression); if (map_parse_tree[pt].token_number) { continue; } assert(pt < ntc_size); ntc[pt]++; } LOGS("finished token loop"); /* * Now check those trees that have more than one named token. Create * a specific token for the tree. */ for (tree = ParseTree(); tree.next();) { int tn; if (ntc[tree] <= 1) { ntc[tree] = 0; continue; } tn = tree->token_number; if (tn == 0) { if (tree->expression->nameNode()) { int name = ((NamedCharSet *)tree->expression)->name; //LOGV(netAllocations()); tn = id_token(name); Token(tn)->value_type = default_input_type; } else { tn = form_element_1(tree->expression); } } ntc[tree] = tn; } LOGS("trees scanned again"); /* * Now check the named terminals again and either make up the * tree-token links or build shell productions */ for (i = 0; i++ < ntk;) { token_number_map *tp = &map_token_number[i]; int name = tp->token_name; int pt; int tn; parse_tree_map *ptp; if (tp->non_terminal_flag) { continue; } if (name == 0) { continue; } pt = map_token_name[name].parse_tree; if (pt == 0) { continue; } ptp = &map_parse_tree[pt]; tn = ptp->token_number; if (tn == 0) { tn = ntc[pt]; } if (tn == 0) { ptp->token_number = i; continue; } shell_production(i, tn); } DEALLOCATE(ntc); LOGS("tokens scanned again"); nforms_base = nforms; LOGV(ntkns) LCV(nforms); LOGV(error_token); for (i = 0; i++ < ntkns;) { token_number_map *tp = &map_token_number[i]; int nn; if (tp->non_terminal_flag || (int) i == error_token || tp->key || tp->token_set_id) { continue; } nn = tp->parse_tree; LOGV(i) LCV(nn); if (nn == 0 && (k = tp->token_name) != 0) { nn = map_token_name[k].parse_tree; } LOGV(nn); if (nn == 0) { char_set_error = 5; } else { tp->token_set_id = build_char_set(nn); } if (char_set_error == 0) { continue; } id = tp->token_name; if (id) { sprintf(ts," = %s", Symbol(id)->string.pointer()); } else { ts[0] = 0; } switch (char_set_error) { case 1: ssprintf("Error defining T%03d: ",i); break; case 5: ssprintf("Undefined token T%03d: ",i); break; } atkn(i); log_error(); } build_partition_sets(char_set_dict); LOGV(ntkns); LOGV(nforms); } static void build_partition_sets(list_dict *d) { LOGSECTION("build_partition_sets"); int i; unsigned j, n; int *db; unsigned *ndx; int *lb; int m; //int *ps; int ts; unsigned ntk; //char *fa; unsigned *fl; int nf; unsigned cn; int *list; unsigned pn,nc; unsigned tn; if (max_char_number == -1 && min_char_number == 0) { return; } //ps = local_array(n_chars, int); LocalArray<int> ps(n_chars); n = d->nsx; db = d->text; ndx = d->ndx; //set = local_array(n, int *); LocalArray<int *> set(n); //k = local_array(n, int); LocalArray<int> k(n); //l = local_array(n, int); LocalArray<int> l(n); for (i = 1; (unsigned) i < n; i++) { set[i] = (db + ndx[i]); l[i] = *(set[i]++) - 1; k[i] = 0; } /* * For each character in the character range, scan all the character * sets. Make a list of the sets to which this particular character * belongs. This list represents a unique partition of the character * range. Use a dictionary to give each unique partition an * identifying number. Map each character to the partition to which * it belongs. */ LOGS("Scanning characters"); for (i = min_char_number; i <= max_char_number; i++) { int ix = i - min_char_number; iws(); for (j = 1; j < n; j++) { while (k[j] < l[j] && set[j][k[j]] < i) { k[j]++; } if (k[j] == l[j] || set[j][k[j]] > i) { continue; } k[j]++; aws(j); } lb = list_base; m = rws(); map_char_number[ix].part_number = ps[ix] = add_list_dict(lb,m,part_dict); LOGV(ix) LCV(ps[ix]); } /* * Scan each partition. For each partition, make a list of all the * characters that belong to this partition. */ LOGS("Scanning partitions"); m = part_dict->nsx; check_size(map_part_number, m, m); for (i = 0; (unsigned) i < part_dict->nsx; i++) { iws(); for (j = 0; j < (unsigned) n_chars; j++) { if ((unsigned) i == map_char_number[j].part_number) { aws(j + min_char_number); } } map_part_number[i].chars_index = store_list(chars_list); fl = lstptr(map_part_number[i],chars); m = map_part_number[i].size = rws(); #ifdef INCLUDE_LOGGING if ((unsigned)(*fl - min_char_number) > (max_char_number - min_char_number)) { LOGV(*fl) LCV(min_char_number) LCV(max_char_number); } #endif assert((unsigned)(*fl - min_char_number) <= (unsigned) (max_char_number - min_char_number)); if (m == 1 && (j = map_char_number[*fl - min_char_number].token_number) != 0) { map_part_number[i].token_number = j; } LOGV(*fl) LCV(min_char_number) LCV(i) LCV(j); } m = part_dict->nsx; //fa = local_array(m, char); LocalArray<char> fa(m); m = char_set_dict->nsx; check_size(map_char_set,m,m); for (i = 1; i<m; i++) { char_set_map *csp = &map_char_set[i]; partition_set(i, char_set_dict, ps, fa); csp->part_index = store_list(part_list); csp->nparts = rws(); } ntk = ntkns+1; //assert(ok_index(map_token_number, ntkns)); m = part_dict->nsx; LOGS("Scanning tokens"); for (i = 1; (unsigned) i < ntk; i++) { part_number_map *ppn; unsigned tn; //int ctn = i; Token ctn(i); unsigned fn; ts = map_token_number[i].token_set_id; if (ts == 0) { continue; } nf = map_char_set[ts].nparts; if (nf != 1) { continue; } fl = lstptr(map_char_set[ts], part); ppn = &map_part_number[pn = *fl]; assert(ok_index(map_part_number, pn)); list = (int *) lstptr(*ppn, chars); //assert (pn > 0 && pn < m); assert(pn < (unsigned) m); nc = ppn->size; tn = ppn->token_number; if (tn == 0) { while (nc--) { cn = *list++ - min_char_number; assert(cn < n_chars); map_char_number[cn].token_number = i; LOGV(cn) LCV(i); } assert(pn < part_dict->nsx); ppn->token_number = i; map_token_number[i].part_number = pn; continue; } if (tn < ntk) { //ntkns++; //check_size(map_token_number, ntkns, ntkns); //ctn = ntkns; ctn = Token::create(); ctn->value_type = default_input_type; //pf1x(ctn); //fn = form2(); fn = makeRule(ctn); at(bnf_table, tn, fn); Rule(fn)->prim_tkn = tn; ppn->token_number = ctn; Token(tn)->operatorCandidate = ctn->operatorCandidate = 1; while (nc--) { cn = *list++ - min_char_number; assert(cn < n_chars); map_char_number[cn].token_number = ctn; LOGV(cn) LCV(ctn); } map_token_number[ctn].part_number = pn; map_token_number[ctn].value_type = default_input_type; map_token_number[tn].value_type = default_input_type; map_token_number[tn].part_number = 0; map_token_number[tn].non_terminal_flag = 1; tn = ctn; } //pf1x(tn); //fn = form2(); fn = makeRule(tn); LOGV(i) LCV(fn) LCV(tn); at(bnf_table, i, fn); Rule(fn)->prim_tkn = i; map_token_number[i].non_terminal_flag = 1; ctn = tn; } for (i = 1; (unsigned) i < ntk; i++) { token_number_map *mti = &map_token_number[i]; ts = mti->token_set_id; if (ts == 0) { continue; } nf = map_char_set[ts].nparts; if (nf <= 1) { continue; } fl = lstptr(map_char_set[ts], part); iws(); j = 0; while (j < (unsigned) nf) { pn = fl[j]; assert (pn > 0 && pn < (unsigned) m); tn = map_part_number[pn].token_number; if (tn == 0) { //ntkns++; //check_size(map_token_number,ntkns,ntkns); list = (int *) lstptr(map_part_number[pn], chars); nc = map_part_number[pn].size; tn = Token::create(); while (nc--) { cn = *list++ - min_char_number; assert(cn < n_chars); map_char_number[cn].token_number = tn; LOGV(cn) LCV(tn); } //tn = ntkns; assert(pn < part_dict->nsx); map_part_number[pn].token_number = tn; map_token_number[tn].part_number = pn; map_token_number[tn].value_type = default_input_type; map_token_number[tn].operatorCandidate = 1; } //pf1x(tn); //aws(form2()); aws(makeRule(tn)); j++; } fl = (unsigned *) list_base; nf = rws(); map_token_number[i].non_terminal_flag = 1; for (j = 0; j < (unsigned) nf; j++) { LOGV(i) LCV(fl[j]); at(bnf_table, i, fl[j]); //assert(ok_index(map_form_number, fl[j])); Rule rule(fl[j]); rule->prim_tkn = i; } } } static void partition_set(int ts, list_dict *d, int *ps, char *fa) { int *list; int n, np, i; np = part_dict->nsx; memset(fa, 0, np); list = d->text + d->ndx[ts]; n = *(list++) - 1; for (i = 0; i<n; i++) { fa[ps[list[i] - min_char_number]] = 1; } iws(); for (i = 0; i < np; i++) { if (fa[i]) { aws(i); } } } int build_char_set(int nn) { LOGSECTION("build_char_set"); //char *v; //int i, k; int cs = map_parse_tree[nn].char_set; LOGV(cs) LCV(nn); if (cs) { return cs; } char_set_error = 0; LOGV(n_chars); //int *list = local_array(n_chars, int); LocalArray<int> list(n_chars); CharBitmap bitmap = map_parse_tree[nn].expression->bitmap(); if (!case_sensitive) { bitmap.blurCase(); } int count = bitmap.list(list); LOGV(count); cs = add_list_dict(list, count, char_set_dict); LOGV(cs); check_size(map_char_set, cs, cs); map_char_set[cs].parse_tree = nn; map_parse_tree[nn].char_set = cs; return cs; } static void log_undef_sym(int tn) { ssprintf("Undefined symbol: %s", Symbol(tn)->string.pointer()); log_error(); } int getParseTree(int name) { LOGSECTION("getParseTree"); int pt = map_token_name[name].parse_tree; LOGV(pt); if (pt) { return pt; } int tn = map_token_name[name].token_number; if (tn) { pt = map_token_number[tn].parse_tree; } LOGV(tn) LCV(pt); if (pt) { return pt; } log_undef_sym(name); return 0; }