Mercurial > ~dholland > hg > ag > index.cgi
diff anagram/agcore/q1a.cpp @ 0:13d2b8934445
Import AnaGram (near-)release tree into Mercurial.
author | David A. Holland |
---|---|
date | Sat, 22 Dec 2007 17:52:45 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/anagram/agcore/q1a.cpp Sat Dec 22 17:52:45 2007 -0500 @@ -0,0 +1,862 @@ +/* + * AnaGram, A System for Syntax Directed Programming + * Copyright 1993-1999 Parsifal Software. All Rights Reserved. + * See the file COPYING for license and usage terms. + * + * q1a.cpp + */ + +#include "agarray.h" +#include "arrays.h" +#include "assert.h" +#include "binsort.h" +#include "bitmap.h" +#include "config.h" +#include "cs.h" +#include "dict.h" +#include "error.h" +#include "keyword.h" +#include "lexeme.h" +#include "myalloc.h" +#include "q1a.h" +#include "q1glbl.h" +#include "q5.h" +#include "q8.h" +#include "rproc.h" +#include "rule.h" +#include "stacks.h" +#include "token.h" +#include "tsd.h" +#include "ut.h" + +//#define INCLUDE_LOGGING +#include "log.h" + + +/* + The principal functions in Q1A are + . summarize_grammar() + . q1() + + summarize_grammar() is an umbrella procedure that is used to call all + the functions that are used to create all the tables that describe + a grammar. summarize_grammar() is called after the input file has + been successfully parsed. + + q1() is a wrapper function which calls lalr() and rlalr(), both in Q5. + lalr() generates the parser states and the goto table. rlalr() generates + reduction tokens and the reduce table. +*/ + +int n_states_est; + +const char *badRecursionFlag = 0; + +int token_name_length = 0; +int item_length = 0; + +static unsigned libnfs; + + +static AgArray<Token> findLeadingTokens(Token t) { + LOGSECTION("findLeadingTokens"); + if (!t->non_terminal_flag) { + return AgArray<Token>(); + } + LOGV(t); + //LOGV(myallocs) LCV(myfrees) LV(myallocBytes) LCV(myfreeBytes); + Bitmap bitmap(Token::count()); + AgStack<Token> terminalStack; + AgStack<Token> nonTerminalStack; + bitmap.setBit(t); + nonTerminalStack.push(t); + for (unsigned i = 0; i < nonTerminalStack.size(); i++) { + Token token = nonTerminalStack[i]; + int nRules = token->ruleList.size(); + for (int j = 0; j < nRules; j++) { + Rule rule = token.rule(j); + RuleDescriptor &ruleDescriptor(rule); + //int length = rule->length(); + int length = ruleDescriptor.length(); + for (int k = 0; k < length; k++) { + //Token ruleToken = rule.token(k); + Token ruleToken = ruleDescriptor.token(k); + token_number_map &ruleTokenDescriptor(ruleToken); + if (!bitmap.setBit(ruleToken)) { + if (ruleTokenDescriptor.non_terminal_flag) { + nonTerminalStack.push(ruleToken); + } + else { + terminalStack.push(ruleToken); + } + } + if (ruleTokenDescriptor.zero_length_flag) { + continue; + } + break; + } + } + } + return terminalStack; +} + + +static void s8(Token token) { /* build list of leading tokens */ + LOGSECTION("s8"); + LOGV(token); + token->leadingTokenList = findLeadingTokens(token); + if (token->leadingTokenList.size()) { + return; + } + + // No terminal tokens for token t, Is a diagnostic needed? + Bitmap map(Token::count()); + while (1) { + if (token->ruleList.size() != 1) { + // Diagnose if more than one rule + break; + } + Rule rule = token.rule(0); + int lf = rule->length(); + if (lf == 0) { + // No diagnostic for null production + return; + } + if (lf != 1) { + // Diagnose if not a shell production + break; + } + // shell production, get nested token + Token producedToken = rule.token(0); + if (producedToken->immediate_action) { + return; + } + if (map.setBit(producedToken)) { + // don't loop forever + break; + } + // since leading token count is zero. + assert(producedToken->non_terminal_flag); + } + ssprintf("No terminal tokens in expansion of T%03d: ", (int) token); + atkn(token); + log_error(); +} + +/* + s7a() builds a list of (left) expansion rules for the token t. + This is done by building a list of expansion rules, and for each + rule in the list, adding its expansion rules to the list if they + are not already there. This process continues until all rules in + the list have been expanded. +*/ +/* +static void s7a(Token token) { + LOGSECTION("s7a"); + int n = token->ruleList.size(); + + LOGV(token) LCV(n); + iws(); + for (int i = 0; i < n; i++) { + aws(token.rule(i)); + } + for (n = 0; n < tis(); n++) { + Rule rule(list_base[n]); + int nr; + LOGV(n) LCV(tis()) LCV(list_base[n]) LCV(nforms); + assert(list_base[n] <= nforms); + if (rule->length() == 0) continue; + Token token(rule->token(0)); + + if (!token->non_terminal_flag) continue; + nr = token->ruleList.size(); + for (int i = 0; i < nr; i++) xws(token.rule(i)); + } + token->expansionRuleList = makeArray(list_base, rws()); +} +*/ +static void findExpansionRules(Token t) { + LOGSECTION("findExpansionRules"); + if (!t->non_terminal_flag) return; + LOGV(t); + Bitmap map(Rule::count()); + AgStack<Rule> ruleStack; + int nRules = t->ruleList.size(); + for (int j = 0; j < nRules; j++) { + Rule rule = t.rule(j); + if (map.setBit(rule)) { + continue; + } + ruleStack.push(rule); + } + for (unsigned i = 0; i < ruleStack.size(); i++) { + Rule rule = ruleStack[i]; + int length = rule->length(); + if (length == 0) { + continue; + } + Token token = rule.token(0); +/* + if (length == 1 && token == t) { + ssprintf("Circular definition of T%03d: ", (int) token); + atkn(token); + ssprintf(" in "); + append_item(rule, -1); + concat_string(); + log_error(rule->line, rule->col); + badRecursionFlag = "circular definition"; + } +*/ + if (!token->non_terminal_flag) { + continue; + } + int nRules = token->ruleList.size(); + for (int j = 0; j < nRules; j++) { + Rule tokenRule = token.rule(j); + if (map.setBit(tokenRule)) { + continue; + } + ruleStack.push(tokenRule); + } + } + t->expansionRuleList = ruleStack; +} + +static void s5(Token token, char *sta) { + int n, k; + + ok_ptr(sta); + if (sta[token]) return; + LOGSECTION("s5"); + LOGV(token) LCV(token->ruleList.size()) LCV(token->immediate_action); + sta[token] = 1; + if (token->ruleList.size() == 0) { + token->non_terminal_flag = 0; + assert(perm_index <= ntkns); + token_perm[perm_index] = token; + perm_index++; + return; + } + token->non_terminal_flag = 1; + n = token->ruleList.size(); + while (n--) { + if (token.rule(n)->length()) { + continue; + } + token->zero_length_flag = 1; + break; + } + n = token->ruleList.size(); + for (int i = 0; i < n; i++) { + Rule rule = token.rule(i); + k = rule->length(); + if (k == 0) continue; + int tokenIndex = 0; + while (k--) { + Token ruleToken = rule.token(tokenIndex++); + s5(ruleToken,sta); + if (ruleToken->zero_length_flag == 0) { + break; + } + } + if (k == -1) { + token->zero_length_flag = 1; + } + } + assert(perm_index <= ntkns); + token_perm[perm_index] = token; + perm_index++; +} + + +static int s6(Token token, char *sta) { + LOGSECTION("s6"); + int zc, n, k; + + zc = 0; + ok_ptr(sta); + if (sta[token]) { + return zc; + } + if (token->zero_length_flag) { + return zc; + } + if (!token->non_terminal_flag) { + return zc; + } + + sta[token] = 1; + n = token->ruleList.size(); + for (int i = 0; i < n; i++) { + Rule rule = token.rule(i); + if ((k = rule->length()) == 0) { + continue; + } + int tokenIndex = 0; + while (k--) { + Token ruleToken = rule.token(tokenIndex++); + s6(ruleToken,sta); + if (ruleToken->zero_length_flag==0) { + break; + } + } + if (k == -1) { + token->zero_length_flag = 1; + zc++; + } + } + return zc; +} + +static void s4(void) { + LOGSECTION("s4"); + int n, t, zc; + char *sta; + + perm_index = 0; + n = ntkns + 1; + sta = ZALLOCATE(n, char); + Each<Token> token; + for (token.restart(); token.loopNotFinished(); token.getNext()) { + s5(token, sta); + } + for (zc = 1; zc; ) { + zc = 0; + ok_ptr(sta); + memset(sta, 0, n*sizeof(*sta)); + for (t = 0; t < n; t++) { + zc += s6(token_perm[t],sta); + } + } + DEALLOCATE(sta); +} + +/* + s3() builds the inverted bnf table from the bnf table. + The ibnf table provides the mapping from rule to + reduction token list. +*/ + +static void s3(void) { + LOGSECTION("s3"); + //unsigned t, f, n, cf; + unsigned n, cf; + unsigned sp; + int *p; + unsigned nt; + + cf = 0; + sp = 0; + ibnfb[0] = sp; + n = 0; + p = ibnf_table->sb; + nt = ibnf_table->nt; + while (nt--) { + Rule rule = *p++; + Token token = *p++; + if (token->sem_det_flag) { + rule->fast_loop = 0; + } + if ((unsigned)rule != cf) { + ibnfn[cf] = (int) n; + ibnfb[rule] = sp; + cf = rule; + n = 0; + } + assert(sp < libnfs); + ibnfs[sp] = token; + sp++; + n++; + } + ibnfn[cf] = (int) n; + ibnf_table = delete_tsd(ibnf_table); +} + +static void makeRuleLists(void) { + LOGSECTION("makeRuleLists"); + AgArray< AgBalancedTree<int> > tree(Token::count()); + int *bnf = bnf_table->sb; + int nbnf = bnf_table->nt; + while (nbnf--) { + Token token = *bnf++; + Rule rule = *bnf++; + tree[token].insert(rule); + } + Each<Token> token; + for (token.restart(); token.loopNotFinished(); token.getNext()) { + int n = tree[token].size(); + if (n == 0) { + continue; + } + AgArray<Rule> list(n); + while (n--) { + list[n] = tree[token][n]; + } + token->ruleList = list; + } +} + +static int stack_size_arbitrary; +int stack_size; + +static int find_token_length(int tn) { + Token token = tn; + int nf; + int length = 0; + if (tut[token]) { + return tut[token]; + } + ok_ptr(tut); + tut[token] = -1; + nf = token->ruleList.size(); + for (int i = 0; i < nf; i++) { + Rule rule = token.rule(i); + int form_length = rule->length(); + int fx = 0; + while (fx < form_length) { + int k = find_token_length(rule.token(fx)); + if (k == -1) { + if (fx) { + stack_size_arbitrary = 1; + } + k = 0; + } + k += fx; + if (length < k) { + length = k; + } + fx++; + } + } + tut[tn] = length; + return length; +} + +unsigned disregard_cont_rule; +unsigned disregard_skip_rule; + +void summarize_grammar(void) { + LOGSECTION("summarize_grammar"); + unsigned n, t, tt; //, f; + unsigned k; + + if (errorResynchDiagnostic) { + auto_resynch = 0; + } + + LOGS("Scan rules to finish up intermediate actions"); + Each<Rule> rule; + for (rule.restart(); rule.loopNotFinished(); rule.getNext()) { + LOGSECTION("Intermediate action cleanup"); + LOGV(rule); + Procedure proc = rule->proc_name; + Token primaryToken = rule->prim_tkn; + + if (primaryToken->sem_det_flag) { + rule->fast_loop = 0; + } + if (rule->immediate_proc) { + continue; + } + if (proc.isNotNull()) { + int type = primaryToken->value_type; + if (type == 0) { + type = primaryToken->value_type = default_token_type; + } + if (proc->cSegment.length() == 0) { + type = void_token_type; + } + proc->cast = type; + //finish_proc_def(proc, rule, rule->length(), type); + } + } + LOGS("Prod definitions finished"); + ZALLOCATE_ST(token_perm, ntkns+1); + tut = ZALLOCATE(ntkns+1, int); + + makeRuleLists(); + s4(); /* identify zero length tokens */ + LOGS("s4 finished"); + for (rule.restart(); rule.loopNotFinished(); rule.getNext()) { + int i, length; + + if (rule->immediate_proc) { + continue; + } + length = rule->length(); + LOGV(rule) LCV(rule->length()) LCV(rule->elementList.size()); + for (i = 0; i < length; i++) { + Token ruleToken = rule.token(i); + LOGV(ruleToken) LCV(ruleToken->immediate_action) + LCV(ruleToken->ruleList.size()); + if (!ruleToken->immediate_action) { + continue; + } + Rule actionRule = ruleToken.rule(0); + LOGV(actionRule); + Procedure proc = actionRule->proc_name; + int type = default_token_type; + if (proc->cSegment.length() == 0) { + type = void_token_type; + } + proc->cast = type; + } + } + LOGS("Immediate actions finished"); + if (disregard_token && + map_token_number[disregard_token].non_terminal_flag) { + Token token = disregard_token; + int nf = token->ruleList.size(); + for (int i = 0; i < nf; i++) { + Rule rule = token.rule(i); + if (rule->length() == 0) { + disregard_skip_rule = rule; + } + else { + disregard_cont_rule = rule; + } + } + } + LOGS("Disregard token analysis complete"); + + n = nforms + 1; + ibnfn = ZALLOCATE(n, int); + ALLOCATE_ST(ibnfb, n); + n = bnf_table->nt; + resize_tsd(bnf_table,n); + ALLOCATE_ST(ibnfs, n); + libnfs = n; + badRecursionFlag = 0; + for (t = 0; t++ < ntkns; ) { + tt = token_perm[t]; + memset(tut, 0, sizeof(*tut) * (ntkns+1)); + if (map_token_number[tt].non_terminal_flag) { + //checkRecursion(tt); + s8(tt); // expand forms + } + } + memset(tut, 0, sizeof(*tut) * (ntkns+1)); + stack_size = stack_size_arbitrary = 0; + Each<Token> token; + for (token.restart(); token.loopNotFinished(); token.getNext()) { + int length = find_token_length(token); + + if (stack_size < length) { + stack_size = length; + } + Token permutedToken = token_perm[token]; + if (!permutedToken->non_terminal_flag) { + continue; + } + //s7a(permutedToken); + findExpansionRules(permutedToken); + } + s3(); /* make inverted bnf tables */ + + n_states_est = -(int)nforms; + memset(tut, 0, sizeof(*tut) * (ntkns+1)); + for (rule.restart(); rule.loopNotFinished(); rule.getNext()) { + int nr = ibnfn[rule]; + int *rl = ibnfs + ibnfb[rule]; + k = n = rule->length(); + n_states_est += n; + //LOGV(rule) LCV(rule->length()); + while (k--) { + int t = rule.token(k); + int j = 0; + while (j < nr) { + if (rl[j] != t) { + break; + } + else { + j++; + } + } + if (j != nr) { + tut[t] = 1; + } + } + k = n; + //LOGV(rule) LCV(rule->precedence_level); + if (rule->precedence_level == 0) { + while (k--) { + Token token = rule.token(k); + token_number_map &td = map_token_number[token]; + if (td.non_terminal_flag && !td.operatorCandidate && + td.parse_tree.isNull()) { + continue; + } + int pl = token->precedence_level; + if (pl != 0) { + rule->precedence_level = pl; + rule->left_associative = token->left_associative; + rule->right_associative = token->right_associative; + } + break; + } + } + //LOGV(n); + while (n) { + n--; + if (rule.token(n)->zero_length_flag) { + continue; + } + n++; + break; + } + assert(n <= rule->length()); + rule->non_vanishing_length = n; + } + for (rule.restart(); rule.loopNotFinished(); rule.getNext()) { + int *rl = ibnfs + ibnfb[rule]; + int nr = ibnfn[rule]; + int length = rule->length(); + int fx; + + if (nr <= 1 || length == 0) { + continue; + } + while (nr) { + int tn = *rl++; + if (tut[tn]) { + break; + } + nr--; + } + if (nr == 0) { + continue; + } + for (fx = 0; fx < length; ) { + tut[rule.token(fx++)] = 1; + } + } + for (t = 0; t++ < ntkns;){ + if (!tut[t]) { + if ((int) t == error_token) { + error_token = 0; + continue; + } + if ((int) t == grammar_token) { + continue; + } + ssprintf("Token not used, T%03d: ",t); + atkn(t); + log_error(); + } + } + for (Each<Keyword> keyword; keyword.loopNotFinished(); keyword.getNext()) { + int pt = keyword->reserve; + if (pt == 0) { + continue; + } + keyword->reserve = build_char_set(pt); + } + LOGS("Reserve sets built"); + if (n_states_est <= 0) { + n_states_est = 1; + } + bfst3(); + DEALLOCATE(tut); + LOGS("tut deallocated"); + if ((auto_resynch || error_token) && eof_token == 0) { + log_error("eof token not defined"); + } + token_name_length = 0; + for (token.restart(); token.loopNotFinished(); token.getNext()) { + int n; + ics(); + atkn(token); + n = rcs(); + if (token_name_length < n) { + token_name_length = n; + } + } + LOGS("name length loop finished"); + for (token.restart(); token.loopNotFinished(); token.getNext()) { + AgStack<Token> testTokenList; + LOGV((int) token); + testTokenList.push(token); + unsigned i; + int badRule = 0; + for (i = 0; !badRule && i < testTokenList.size(); i++) { + AgArray<Rule> &ruleList = testTokenList[i]->ruleList; + LOGV(i) LCV((int) testTokenList[i]); + unsigned j; + for (j = 0; !badRule && j < ruleList.size(); j++) { + Rule &r = ruleList[j]; + if (r->length() == 0) { + continue; + } + unsigned k; + int nNonZeroLength = 0; + int kDerived = -1; + for (k = 0; k < r->length(); k++) { + if (r.token(k)->zero_length_flag) { + continue; + } + kDerived = k; + nNonZeroLength++; + } + if (nNonZeroLength > 1) { + continue; + } + Token testToken; + LOGV(nNonZeroLength); + if (nNonZeroLength) { + LOGV(kDerived); + testToken = r.token(kDerived); + LOGV((int) testToken); + if (testToken == token) { + badRule = (int) r; + } + LOGV(badRule); + } + else { + for (k = 0; k < r->length(); k++) { + testToken = r.token(k); + if (testToken == token) { + badRule = (int) r; + break; + } + } + } + LOGV((int) testToken) LCV(nNonZeroLength); + LOGV(badRule); + if (badRule) { + LOGV(r->length()); + if (r->length() > 1) { + ssprintf("Empty recursion: Rule R%03d in expansion of T%03d: ", + (int) r, (int) token); + atkn(token); + badRecursionFlag = "empty recursion"; + } + else { + ssprintf("Circular definition of T%03d: ", (int) token); + atkn(token); + ssprintf(" in "); + append_item(r, -1); + concat_string(); + badRecursionFlag = "circular definition"; + } + LOGV(badRecursionFlag); + log_error(r->line, r->col); + } + else { + LOGV(testTokenList.size()); + for (k = 0; k < testTokenList.size(); k++) { + if (testTokenList[k] == testToken) { + break; + } + } + LOGV(k); + if (k == testTokenList.size()) { + testTokenList.push(testToken); + } + } + LOGV(badRecursionFlag); + } + } + } + + item_length = 0; + for (rule.restart(); rule.loopNotFinished(); rule.getNext()) { + int n; + ics(); + append_item(rule, -1); + n = rcs(); + if (item_length < n) { + item_length = n; + } + } + item_length++; + for (rule.restart(); rule.loopNotFinished(); rule.getNext()) { + if (rule->proc_name) { + rule->reductionRequired = 1; + continue; + } + int n = rule->length(); + while (n-- > 1) { + Token t = rule->elementList[n].token; + Cast c = t->value_type; + if (!c.wrapperRequired()) { + continue; + } + rule->reductionRequired = 1; + break; + } + } +} + +int get_reduction_states(int s, int f, int n) { + LOGSECTION("get_reduction_states"); + LOGV(s) LCV(f) LCV(n); + int k = add_tuple_dict_new(completed_form_dict,s,f); + completed_form_map *cfp; + + check_size(map_completed_form,k,k); + cfp = &map_completed_form[k]; + if (cfp->reduction_states_index == 0) { + LOGS("Calling frss12)"); + frss12(s,f,n); + cfp = &map_completed_form[k]; + cfp->external_reduction = external_reduction; + cfp->reduction_states_index = store_list(reduction_states_list); + cfp->n_reduction_states = rws(); + } + return k; +} + +static void find_reduction_states(tsd *rrc) { + LOGSECTION("find_reduction_states"); + LOGV((int) rrc); + int *p, nt; + sort_tuples(rrc,2); + p = rrc->sb; + nt = rrc->nt; + while (nt--) { + int s, f; + unsigned n; + + s = *p++; + p++; + f = *p++; + n = *p++; + + if (n != (Rule(f)->length())) { + continue; + } + LOGV(nt); + get_reduction_states(s, f, n); + } + LOGS("find_reduction_states finished"); +} + +void q1(void) { + LOGSECTION("q1"); + LOGV(ntkns); + tut = ZALLOCATE(ntkns+1,int); + LOGS("call lalr"); + lalr(); + //State::createStates(Rule()); + LOGS("call rlalr"); + rlalr(); + LOGV(nstates); + map_state_number = (state_number_map *) + set_array_size(map_state_number, nstates); + + LOGS("call find_reduction_states"); + if (res_con->nt) { + find_reduction_states(res_con); + } + LOGS("returned from first call to find_reduction_states"); + + if (unres_con->nt) { + find_reduction_states(unres_con); + } + LOGS("returned from second call to find_reduction_states"); + + DEALLOCATE(tut); + empty_tuple_dict(frss_dict); + LOGS("q1 finished"); +} +