Mercurial > ~dholland > hg > ag > index.cgi
view anagram/agcore/nckwtr.cpp @ 24:a4899cdfc2d6 default tip
Obfuscate the regexps to strip off the IBM compiler's copyright banners.
I don't want bots scanning github to think they're real copyright
notices because that could cause real problems.
author | David A. Holland |
---|---|
date | Mon, 13 Jun 2022 00:40:23 -0400 |
parents | 13d2b8934445 |
children |
line wrap: on
line source
/* * AnaGram, A System for Syntax Directed Programming * Copyright 1993-2002 Parsifal Software. All Rights Reserved. * See the file COPYING for license and usage terms. * * nckwtr.cpp - Conflict/Keyword Anomaly Trace Utility */ #include "agbaltree.h" #include "arrays.h" #include "assert.h" #include "cd.h" #include "data.h" #include "dict.h" #include "lexeme.h" #include "myalloc.h" #include "nckwtr.h" #include "q1glbl.h" #include "rule.h" #include "stacks.h" #include "token.h" #include "tsd.h" //#define INCLUDE_LOGGING #include "log.h" static int conflict_rule; static int conflict_state; /* * Strategy * * The data given are two state numbers: the conflict state and the * reduction state. In addition, the conflict rule is given. * * The approach is to work backward from the reduction state to a * state that leads to the conflict state. * * If the characteristic token for the reduction state includes the * conflict rule as an expansion rule, then we don't have to worry * about null productions. Otherwise, there has been a shift of some * zero-length production to get us to the reduction state. * * The first routine to build is a function which will call itself * recursively to find the fork state. It will fail if it cannot find * a fork state from its present state. */ static int keyword_switch = 0; static AgBalancedTree<Triple<int> > triples; AgArray<int> new_reduction_stack; int new_reduction_stack_depth; AgArray<int> new_conflict_stack; int new_conflict_stack_depth; tsd *new_rule_derivation; static int simple_path(int from, int to) { unsigned *p = lstptr(map_state_number[to],previous_states); int nt = map_state_number[to].n_previous_states; int i; if (from == to) return 1; for (i = 0; i < nt; i++) { if (xws(p[i])) { continue; } if (simple_path(from, p[i])) { return 1; } fws(); } return 0; } void clear_conflict_expansion(void) { if (conflict_token == 0) { return; } new_rule_derivation = delete_tsd(new_rule_derivation); token_derivation = delete_tsd(token_derivation); new_reduction_stack = AgArray<int>(); new_conflict_stack = AgArray<int>(); conflict_token = 0; } static int new_analysis(int sn, int rn); void analyze_conflict(int csn, int tn, int cfn, int kws) { int flag; LOGSECTION("analyze_conflict"); kws = kws!=0; if (conflict_state == csn && conflict_token == tn && keyword_switch == kws && conflict_rule == cfn) { return; } clear_conflict_expansion(); conflict_state = csn; conflict_token = tn; conflict_rule = cfn; keyword_switch = kws; flag = new_analysis(conflict_state, conflict_rule); LOGV(flag); assert(flag); triples.reset(); } static int equiv_token(Token charToken, Token ruleToken) { if (charToken == ruleToken) { return 1; } AgArray<Rule> expansion = charToken->expansionRuleList; int n = expansion.size(); while (n--) { if (expansion[n]->length() == 0 ) { continue; } if (expansion[n]->length() > 1 && (unsigned) expansion[n].token(1) != disregard_token) { continue; } if (expansion[n].token(0) == ruleToken) { return 1; } } return 0; } static void validate_conflict_stack(void) { int *ctp = list_base; int cfsd = tis(); int sx = 0; int nsn = ctp[sx++]; int kr = 0; int kr_lim = new_rule_derivation->nt; int rn, rx; LOGSECTION("validate_conflict_stack"); LOGV(sx) LCV(cfsd); if (sx >= cfsd) return; #ifdef INCLUDE_LOGGING { int i; for (i = 0; i < cfsd; i++) { LOGV(i) LCV(ctp[i]); } int *sb = new_rule_derivation->sb; for (i = 0; i < kr_lim; i += 2) { LOGV(sb[i]) LCV(sb[i+1]); } } #endif check_tsd(new_rule_derivation); xtxf(new_rule_derivation,kr, &rn, &rx); LOGV(rn) LCV(rx); while (rx == 0) { xtxf(new_rule_derivation, ++kr, &rn, &rx); rx--; LOGV(rn) LCV(rx); } if (sx < cfsd) { while (1) { Token charToken = map_state_number[nsn].char_token; int sn = ctp[sx++]; LOGV(kr) LCV(kr_lim); assert(kr < kr_lim); //assert(equiv_token(tn, lstptr(map_form_number[rn], tokens)[--rx])); Token ruleToken = Rule(rn).token(--rx); LOGV(rn) LCV(Rule(rn)->length()) LCV(rx) LCV(ruleToken); //assert(equiv_token(charToken, Rule(rn).token(--rx))); assert(equiv_token(charToken, ruleToken)); LOGV(nsn) LCV(sx) LCV(sn) LCV(charToken); //int nextState = new_next_state(sn, ruleToken); //LOGV(nextState); //assert(nsn == nextState); assert(equiv_token(charToken, transitionToken(sn,nsn))); LOGV(sx) LCV(cfsd); if (sx >= cfsd) { break; } nsn = sn; while (rx == 0) { xtxf(new_rule_derivation, ++kr, &rn, &rx); rx--; } } } LOGS("Exiting validate_conflict_stack"); } /* static int check_leading_token(unsigned hlt, unsigned llt) { unsigned *ltp = lstptr(map_token_number[hlt],leading_tokens); int nlt = map_token_number[hlt].n_leading_tokens; if (hlt == llt) { return 1; } while (nlt--) { if (ltp[nlt] == llt) { return 1; } } return 0; } */ static int reduction_state_path(int sn) { LOGSECTION("reduction_state_path"); LOGV(sn); int *is = dict_str(isht_dict, sn); int nis = (*is++ - 1)/2; while (nis --) { int stateNumber = sn; Rule rn = *is++; unsigned rx = *is++; LOGV(stateNumber) LCV(rn) LCV(rx); iws(); while (rx < rn->length()) { Token tn = rn.token(rx); //int flag = check_leading_token(tn,conflict_token); int flag = tn.isLeadingToken(conflict_token); aws(stateNumber); LOGV(stateNumber) LCV(tis()); if (flag) { at(new_rule_derivation, (int) rn, rx); return 1; } //if (!map_token_number[tn].zero_length_flag) { // break; //} if (!tn->zero_length_flag) { break; } LOGV(rn) LCV(rx) LCV(sn) LCV(tn); stateNumber = new_next_state(stateNumber,tn); if (stateNumber == 0) { break; } rx++; } rws(); } return 0; } static int ct_accessible(int sn) { LOGSECTION("ct_accessible"); LOGV(sn); int *is = dict_str(isht_dict, sn); int nis = (*is++ - 1)/2; while (nis--) { Rule rn = *is++; unsigned rx = *is++; while (rx < rn->length()) { //int tn = lstptr(map_form_number[rn], tokens)[rx]; Token tn = rn.token(rx); //int flag = check_leading_token(tn,conflict_token); int flag = tn.isLeadingToken(conflict_token); if (flag) { return 1; } //if (!map_token_number[tn].zero_length_flag) { // break; //} if (!tn->zero_length_flag) { break; } LOGV(rn) LCV(rx) LCV(sn) LCV(tn); sn = new_next_state(sn,tn); if (sn == 0) { break; } rx++; } } return 0; } static int anomalous_keyword_path(int sn) { int *is = dict_str(isht_dict, sn); int nis = (*is++ - 1)/2; while (nis--) { Rule rn = *is++; unsigned rx = *is++; if (rx >= rn->non_vanishing_length) { return 0; } } is = dict_str(isht_dict, sn); nis = (*is++ - 1)/2; while (nis--) { Rule rn = *is++; unsigned rx = *is++; if (rx < rn->length()) { sws(sn); at(new_rule_derivation, (int) rn, rx); return 1; } } return 0; } static int hw_reduce(int sn, int rn, int rx); static int nulls_remaining(int sn, int nsn) { LOGSECTION("nulls_remaining"); LOGV(sn) LCV(nsn); int *is = dict_str(isht_dict, nsn); int nis = (*is++ - 1)/2; while (nis --) { Rule rn = *is++; unsigned rx = *is++; if (rx < rn->non_vanishing_length) { continue; } at(new_rule_derivation, (int) rn, rx); if (hw_reduce(sn, rn, rx - 1)) { return 1; } new_rule_derivation->nt--; } return 0; } static int hw_reduce(int sn, int rn, int rx) { LOGSECTION("hw_reduce"); //if (triples.insert(IntegerTriple(sn, rn, rx))) { // return 0; //} if (triples.insert(Triple<int>(sn, rn, rx))) { return 0; } LOGV(sn) LCV(rn) LCV(rx); if (rx) { int i, j; unsigned *p = lstptr(map_state_number[sn],previous_states); int nt = map_state_number[sn].n_previous_states; //int *sorted = local_array(nt, int); LocalArray<int> sorted(nt); for (i = 0; i < nt; i++) { sorted[i] = p[i]; LOGV(p[i]); } for (i = 0; i < nt; i++) { //aws(p[i]); //if (xws(sorted[i])) continue; for (j = i+1; j < nt; j++) { if (sorted[i] > sorted[j]) { int temp = sorted[i]; sorted[i] = sorted[j]; sorted[j] = temp; } } LOGV(sorted[i]); //if (xws(sorted[i])) continue; aws(sorted[i]); #ifdef INCLUDE_LOGGING { for (int index = 0; index < tis(); index++) { LOGV(index) LCV(list_base[index]); } } LOGV(sorted[i]) LCV(tis()); #endif if (hw_reduce(sorted[i], rn, rx-1)) { return 1; } fws(); } return 0; } { int *sp = ibnfs + ibnfb[rn]; int m = ibnfn[rn]; int i; for (i = 0; i < m; i++) { unsigned *gtp = lstptr(map_state_number[sn],gotos); int ngt = map_state_number[sn].n_gotos; unsigned tn = sp[i]; int nsn, crn, crx; int j, k; for (j = k = 0; j < ngt; j++, k+=2) { if (gtp[k] == tn) break; } if (j < ngt) { nsn = gtp[k+1]; if (keyword_switch) { if (ct_accessible(nsn)) { continue; } if (nulls_remaining(sn, nsn)) { return 1; } if (anomalous_keyword_path(nsn)) { return 1; } } else { if (reduction_state_path(nsn)) { return 1; } if (nulls_remaining(sn, nsn)) { return 1; } } continue; } gtp = lstptr(map_state_number[sn], completions); ngt = map_state_number[sn].n_completions; for (j = k = 0; j < ngt; j++, k += 2) { if (gtp[k] == tn) { break; } } if (j == ngt) { continue; } crn = gtp[k+1]; crx = Rule(crn)->length(); int tx = new_rule_derivation->nt; while (tx--) { int trn, trx; xtx(new_rule_derivation, tx, &trn, &trx); if (trn == crn && trx == crx) { break; } } if (tx >= 0) { continue; } at(new_rule_derivation, crn, crx); if (hw_reduce(sn, crn, crx - 1)) { return 1; } new_rule_derivation->nt--; } } return 0; } /* static void trim_ct(void) { int nt = new_rule_derivation->nt; int k = nt - 1; int i, j; LOGSECTION("trim_ct"); AgArray<int> list(buildStaticList()); LOGV(list.size()); int *cs = list.pointer(); int ncs = list.size(); // cs is list of states in reverse order int rn,rx, tn; unsigned *rl; int nrl; check_tsd(new_rule_derivation); xtxf(new_rule_derivation, k, &rn, &rx); tn = lstptr(map_form_number[rn],tokens)[rx-1]; rl = lstptr(map_token_number[tn],forms); nrl = map_token_number[tn].n_forms; iws(); while (--k >= 0) { for (i = 0; i < k; i++) { unsigned srn, srx; int psx; xtxf(new_rule_derivation, i, &srn, &srx); for (j = 0; j < nrl; j++) if (srn == rl[j]) break; if (j == nrl) continue; psx = ncs; for (j = i+1; j < k + 1; j++) { int jrn, jrx; xtxf(new_rule_derivation, j, &jrn, &jrx); psx -= jrx-1; } if (psx <= 1) continue; if (cs[ncs-1] != cs[psx-1]) continue; ncs = psx; break; } if (i < k) { memmove(&new_rule_derivation->sb[2*(i+1)], &new_rule_derivation->sb[2*(k+1)], (nt - k - 1)*2*sizeof(int)); nt -= k-i; new_rule_derivation->nt = nt; k = i; } if (k <= 0) break; xtxf(new_rule_derivation, k, &rn, &rx); tn = lstptr(map_form_number[rn],tokens)[--rx]; rl = lstptr(map_token_number[tn],forms); nrl = map_token_number[tn].n_forms; while (rx--) aws(cs[--ncs]); } while (ncs--) aws(cs[ncs]); list = buildStaticList(); cs = list.pointer(); ncs = list.size(); iws(); while (ncs--) aws(cs[ncs]); } */ static int new_analysis(int sn, int rn) { int length = Rule(rn)->length(); LOGSECTION("new_analysis"); LOGV(sn) LCV(rn); new_rule_derivation = init_tsd(2); at(new_rule_derivation, rn, length); sws(sn); if (hw_reduce(sn,rn,length)) { int fs; new_reduction_stack = buildStaticList(); new_reduction_stack_depth = new_reduction_stack.size(); fs = fws(); //if (tis()) trim_ct(); validate_conflict_stack(); sws(fs); simple_path(0,fs); concat_list(); new_conflict_stack = buildStaticList(); new_conflict_stack_depth = new_conflict_stack.size(); iws(); while(new_reduction_stack_depth--) { aws(new_reduction_stack[new_reduction_stack_depth]); } sws(fs); simple_path(0, fs); concat_list(); new_reduction_stack = buildStaticList(); new_reduction_stack_depth = new_reduction_stack.size(); if (!keyword_switch) { int fn, fx, k; k = new_rule_derivation->nt - 1; xtxf(new_rule_derivation, k, &fn, &fx); token_derivation = init_tsd(2); tried = ZALLOCATE(ntkns+1, char); //memset(tried,0,ntkns+1); //x9x(lstptr(map_form_number[fn], tokens)[fx]); x9x(Rule(fn).token(fx)); at(token_derivation, fn, fx); DEALLOCATE(tried); } return 1; } rws(); return 0; }