Mercurial > ~dholland > hg > ag > index.cgi
view anagram/agcore/lexeme.cpp @ 0:13d2b8934445
Import AnaGram (near-)release tree into Mercurial.
author | David A. Holland |
---|---|
date | Sat, 22 Dec 2007 17:52:45 -0500 |
parents | |
children |
line wrap: on
line source
/* * AnaGram, A System for Syntax Directed Programming * Copyright 1993-1999 Parsifal Software. All Rights Reserved. * See the file COPYING for license and usage terms. * * lexeme.cpp - lexeme analysis */ #include "arrays.h" #include "config.h" #include "data.h" #include "dict.h" #include "keyword.h" #include "lexeme.h" #include "q1glbl.h" #include "q5.h" #include "rpk.h" #include "rule.h" #include "stacks.h" #include "token.h" #include "tree.h" #include "tsd.h" //#define INCLUDE_LOGGING #include "log.h" #define FIX3 AgStack<int> disregardList; unsigned disregard_token; // Find and mark the lexical rules. static void find_lexical_rules(void) { LOGSECTION("find_lexical_rules"); unsigned ku; int k; iws(); LOGS("disregard list tokens"); // First, all the disregard tokens for (ku = disregardList.size(); ku--;) { //xws(disregard_list[ku]); xws(disregardList[ku]); //Token(disregardList[ku])->disregard = 1; LOGV(disregardList[ku]); } // Then all the lexemes LOGS("lexemes"); for (ku = 0; ku++ < ntkns;) if (map_token_number[ku].lexeme) { xws(ku); LOGV(ku); } // rules produced by disregard tokens and lexemes are // lexical rules. Any rule produced by a token found // in a lexical rule is also a lexical rule. // This loop, in other words, implements a closure LOGS("lexical rules"); for (k = 0; k < tis(); k++) { int tn = list_base[k]; int *bnf = bnf_table->sb; int nbnf = bnf_table->nt; while (nbnf--) { int t = *bnf++, f = *bnf++, n; if (t != tn) { continue; } Rule rule(f); n = rule->length(); rule->lexical = 1; LOGV(rule) LCV(rule->lexical); while (n--) { Token token = rule.token(n); if (token->non_terminal_flag) { xws(token); LOGV(token); } } } } rws(); } static void build_noise_token(void) { LOGSECTION("build_noise_token"); LOGV(disregardList.size()); if (disregardList.size() == 1) { disregard_token = vp_6(disregardList[0]); Token token = disregardList[0]; token->disregard = 1; } else { int n = disregardList.size();; //int *lb = disregard_list; iws(); int i; for (i = 0; i < n; i++) { ruleElementStack .push(AgStack<RuleElement>()) .top() .push(RuleElement(disregardList[i],0)); aws(vp_form3(0)); } disregard_token = vp_4(); } extern Token vpRepeatToken; Token disregard = Token(disregard_token); disregard->disregard = 1; vpRepeatToken->disregard = 1; LOGV(disregard); LOGV((int) vpRepeatToken); } static int in_disregard_list(int tn) { LOGSECTION("in_disregard_list"); int n = disregardList.size(); while (n--) { if (tn == disregardList[n]) { return 1; } } return 0; } static void subs_bnf(int tn, int nt) { int *p = bnf_table->sb; int n = bnf_table->nt; for (; n--; p += 2) { if (*p != tn) { continue; } *p = nt; Rule rule(p[1]); if ((int)rule->prim_tkn == tn) { rule->prim_tkn = nt; } } } static int alias(Token token) { LOGSECTION("alias"); Token pureToken = Token::create(); LOGV(token) LCV(pureToken); map_token_number[pureToken] = map_token_number[token]; LOGV(token) LCV(token->value_type) LCV(token->immediate_action); LOGV(pureToken) LCV(pureToken->value_type) LCV(token->immediate_action); if (token->key) { Keyword keyword =token->key; keyword->token_number = pureToken; } pureToken->pure = 1; LOGV(token->non_terminal_flag) LCV(token->token_set_id); if (token->non_terminal_flag) { LOGS("Substituting") LCV(token) LCV(pureToken); subs_bnf(token,pureToken); } token->junky = 1; if (token->token_set_id) { LOGV(token->token_set_id); pureToken->token_set_id = token->token_set_id; token->token_set_id = 0; int n = part_dict->nsx; while (n--) if (map_part_number[n].token_number == (unsigned) token) { map_part_number[n].token_number = pureToken; LOGV(n); break; } for (Each<Rule> rule; rule.loopNotFinished(); rule.getNext()) { if ((int) rule->prim_tkn == token) { rule->prim_tkn = pureToken; LOGV(rule); } } for (unsigned i = 0; i < n_chars; i++) { if (map_char_number[i].token_number == (unsigned) token) { map_char_number[i].token_number = pureToken; } } } else if (token->part_number) { LOGV(token->part_number); pureToken->part_number = token->part_number; map_part_number[token->part_number].token_number = pureToken; token->part_number = 0; for (Each<Rule> rule; rule.loopNotFinished(); rule.getNext()) { if ((int) rule->prim_tkn == token) { rule->prim_tkn = pureToken; LOGV(rule); } } for (unsigned i = 0; i < n_chars; i++) { if (map_char_number[i].token_number == (unsigned) token) { map_char_number[i].token_number = pureToken; } } } Rule rule = makeRule(pureToken, disregard_token); at(bnf_table, (int)token, (int)rule); token->non_terminal_flag = 1; rule->prim_tkn = token; ParseTree parseTree = token->parse_tree; if (parseTree) { parseTree->token_number = pureToken; } LOGV((int) token) LCV(token->value_type); LOGV((int) pureToken) LCV(pureToken->value_type); return pureToken; } #ifdef NOT_FIX3 static AgStack<int> findRules(Token token) { AgStack<int> rules; int *p = bnf_table->sb; int n = bnf_table->nt; for (; n--; p += 2) { if (*p != (int) token) continue; rules.push(p[1]); } return rules; } #endif /* scan rules, and and mark token usage as lexical, non-lexical or both then, for each token that has both lexical and non lexical usage, make a clone */ void set_lexemes(void) { int nf = nforms; nInputRules = nforms; LOGSECTION("set_lexemes"); LOGV(nforms); disregard_token = 0; if (disregardList.size() == 0) return; LocalArray<int> newTokenNumber(ntkns+1); #ifdef NOT_FIX3 int maxTokenNumber = ntkns; #endif memset(newTokenNumber, 0, (ntkns+1)*sizeof(*newTokenNumber)); Each<Rule> rule; #ifdef INCLUDE_LOGGING for (rule.restart(); (int) rule <= nf; rule.getNext()) { LOGV(rule) LCV(rule->lexical); } #endif find_lexical_rules(); #ifdef INCLUDE_LOGGING for (rule.restart(); (int) rule <= nf; rule.getNext()) { LOGV(rule) LCV(rule->lexical); } #endif build_noise_token(); //#ifdef FIX3 // mark lexical tokens for (rule.restart(); (int) rule <= nf; rule.getNext()) { int n = rule->length(); LOGV(rule) LCV(rule->lexical); if (n == 0 || !rule->lexical) { continue; } while (n--) { Token token = rule.token(n); token->lexical = 1; } } //#endif // Scan rules which are _not_ lexical for (rule.restart(); (int) rule <= nf; rule.getNext()) { int n = rule->length(); LOGV(rule) LCV(rule->lexical); if (n == 0 || rule->lexical) { continue; } LOGSECTION("Scanning non-lexical rule"); LOGV(rule); while (n--) { Token token = rule.token(n); LOGV(token) LCV(token->token_set_id) LCV(token->non_terminal_flag); LOGV(token->lexeme) LCV(token->lexical) LCV(in_disregard_list(token)); LOGV(token->disregard); if (newTokenNumber[token] || in_disregard_list(token) || (token->non_terminal_flag && token->token_set_id) || token->disregard || (int) token == eof_token || (int) token == error_token) { continue; } if (token->non_terminal_flag && !token->lexeme) { continue; } // newTokenNumber is the pure token newTokenNumber[token] = alias(token); LOGV(token) LCV(newTokenNumber[token]); } } #ifdef FIX3 for (rule.restart(); (int) rule <= nf; rule.getNext()) { int n = rule->length(); LOGV(rule) LCV(rule->lexical); if (n == 0 || rule->lexical) { continue; } LOGSECTION("Scanning non-lexical rule"); LOGV(rule); while (n--) { Token token = rule.token(n); LOGV(token) LCV(token->token_set_id) LCV(token->non_terminal_flag); LOGV(token->lexeme) LCV(token->lexical) LCV(in_disregard_list(token)); LOGV(token->disregard); if (newTokenNumber[token] || in_disregard_list(token) || (token->non_terminal_flag && token->token_set_id) || token->disregard || (int) token == eof_token || (int) token == error_token) { continue; } if (token->non_terminal_flag && !token->lexical) { continue; } // newTokenNumber is the pure token newTokenNumber[token] = alias(token); LOGV(token) LCV(newTokenNumber[token]); } } #endif #ifdef NOT_FIX3 for (rule.restart(); (int) rule <= nf; rule.getNext()) { int n = rule->length(); if (n == 0 || rule->lexical) { continue; } while (n-- > 0) { Token token = rule.token(n); if ((int) token >= maxTokenNumber || newTokenNumber[token]) continue; if (newTokenNumber[token] || in_disregard_list(token) || (int) token == eof_token || (token->non_terminal_flag && token->token_set_id) || (int) token == error_token) { continue; } if (token->non_terminal_flag && !token->lexical) { continue; } AgStack<int> ruleList = findRules(token); Token newToken = Token::create(); map_token_number[newToken] = map_token_number[token]; subs_bnf(token, newToken); newTokenNumber[token] = newToken; newToken->pure = 1; int i; for (i = 0; i < ruleList.size(); i++) { Rule oldRule = ruleList[i]; Rule newRule = Rule::create(); map_form_number[newRule] = map_form_number[oldRule]; int k = oldRule->elementList.size(); newRule->elementList = AgArray<RuleElement>(k); while (k--) { newRule->elementList[k] = oldRule->elementList[k]; } newRule->lexical = 0; at(bnf_table,(int)token,(int)newRule); token->non_terminal_flag = 1; newRule->prim_tkn = token; } } } #endif LOGS("alias loop complete"); for (rule.restart(); rule.loopNotFinished(); rule.getNext()) { int n = rule->length(); LOGV(rule) LCV(rule->lexical); if (n == 0) continue; if (!rule->lexical) continue; LOGSECTION("Substitution loop"); while (n-- > 0) { Token token = rule.token(n); if (newTokenNumber[token] == 0) { continue; } rule.token(n) = newTokenNumber[token]; LOGV(token) LCV(newTokenNumber[token]); } } LOGS("Rule loop complete"); nforms_base = nforms; }