Mercurial > ~dholland > hg > ag > index.cgi
diff anagram/agcore/csexp.cpp @ 0:13d2b8934445
Import AnaGram (near-)release tree into Mercurial.
author | David A. Holland |
---|---|
date | Sat, 22 Dec 2007 17:52:45 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/anagram/agcore/csexp.cpp Sat Dec 22 17:52:45 2007 -0500 @@ -0,0 +1,578 @@ +/* + * AnaGram, A System for Syntax Directed Programming + * Copyright 1993-2002 Parsifal Software. All Rights Reserved. + * See the file COPYING for license and usage terms. + * + * csexp.cpp + */ + +#include "config.h" +#include "cs.h" +#include "csexp.h" +#include "error.h" +#include "symbol.h" + +//#define INCLUDE_LOGGING +#include "log.h" + + +#define MAX_N_CHARS 0x10001 + +static char caseTable[32] = { + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0 +}; + +unsigned char agToUpper(unsigned char c) { + if (c >= 'a' && c <= 'z') { + return c ^= 0x20; + } + if (iso_latin_1 && (unsigned) c >= 0xe0) { + c ^= caseTable[c-0xe0]; + } + return c; +} + + +//asString + +AgString CharSetExpression::asString(Precedence p) const { + LOGSECTION_OFF("CharSetExpression::asString(Precedence)"); + //LOGSECTION("CharSetExpression::asString(Precedence)"); + LOGV(p) LCV(precedence); + if (p < precedence) { + return asString(); + } + AgString value = AgString("(").concat(asString()).concat(")"); + LOGV(value) LCV((int) p); + return value; +} + +AgString CharRange::asString() const { + LOGSECTION_OFF("CharRange::asString"); + char buf[20]; + sprintf(buf, "'%c-%c'", first, last); + LOGV(first) LCV(last) LCV(buf); + return AgString(buf); +} + +AgString CodeRange::asString() const { + LOGSECTION_OFF("CodeRange::asString"); + char buf[20]; + sprintf(buf, "%d..%d", first, last); + LOGV(first) LCV(last) LCV(buf); + return AgString(buf); +} + +AgString IndividualChar::asString() const { + LOGSECTION_OFF("IndividualChar::asString"); + LOGV(asciiValue); + char buf[10]; + if (asciiValue < ' ') { + switch (asciiValue) { + case '\a': sprintf(buf, "'\\a'"); break; + case '\b': sprintf(buf, "'\\b'"); break; + case '\f': sprintf(buf, "'\\f'"); break; + case '\n': sprintf(buf, "'\\n'"); break; + case '\r': sprintf(buf, "'\\r'"); break; + case '\t': sprintf(buf, "'\\t'"); break; + case '\v': sprintf(buf, "'\\v'"); break; + default: sprintf(buf, "'\\%03o'", asciiValue); break; + } + } + else { + switch (asciiValue) { + case '\\': sprintf(buf, "'\\\\'"); break; + case '\?': sprintf(buf, "'\\?'"); break; + case '\'': sprintf(buf, "'\\''"); break; + case '\"': sprintf(buf, "'\\\"'"); break; + default: sprintf(buf, "'%c'", asciiValue); break; + } + } + LOGV(asciiValue) LCV(buf); + return AgString(buf); +} + +AgString IndividualCode::asString() const { + LOGSECTION_OFF("IndividualCode::asString"); + LOGV(value); + char buf[20]; + sprintf(buf, "%d", value); + LOGV(value) LCV(buf); + return AgString(buf); +} + +AgString NamedCharSet::asString() const { + LOGSECTION_OFF("NamedCharSet::asString"); + return Symbol(name)->string; + //return AgString(dict_str(tkn_dict, name)); +} + +AgString CharSetUnion::asString() const { + LOGSECTION_OFF("CharSetUnion::asString"); + //LOGSECTION("CharSetUnion::asString"); + return left->asString().concat(" + ").concat(right->asString()); +} + +AgString CharSetIntersection::asString() const { + LOGSECTION_OFF("CharSetIntersection::asString"); + return left->asString(multiplicative).concat(" * "). + concat(right->asString(multiplicative)); +} + +AgString CharSetDifference::asString() const { + LOGSECTION_OFF("CharSetDifference::asString"); + AgString value = left->asString().concat(" - "). + concat(right->asString(additive)); + LOGV(value); + return value; +} + +AgString CharSetComplement::asString() const { + LOGSECTION_OFF("CharSetComplement::asString"); + //LOGSECTION_ON("CharSetComplement::asString"); + AgString value("~"); + LOGV(value); + value = value.concat(operand->asString(unary)); + LOGV(value); + return value; +} + +//bitmap + +CharBitmap CharRange::bitmap() const { + LOGSECTION("CharRange::bitmap"); + LOGV(asString()); + int i = first; + CharBitmap returnValue; + while (i <= last) { + returnValue.setBit(translateTable[i]); + if (!case_sensitive) { + returnValue.setBit(translateTable[agToUpper((char) i)]); + } + i++; + } + return returnValue; +} + +CharBitmap CodeRange::bitmap() const { + LOGSECTION("CodeRange::bitmap"); + LOGV(asString()); + LOGV(first) LCV(last); + return CharBitmap().setRange(first, last); +} + +CharBitmap IndividualChar::bitmap() const { + LOGSECTION("IndividualChar::bitmap"); + LOGV(asString()); + CharBitmap map; + map.setBit(translateTable[asciiValue]); + if (!case_sensitive) { + map.setBit(translateTable[agToUpper((char) asciiValue)]); + } + return map; +} + +CharBitmap IndividualCode::bitmap() const { + LOGSECTION("IndividualCode::bitmap"); + LOGV(asString()); + CharBitmap map; + map.setBit(value); + return map; +} + +CharBitmap NamedCharSet::bitmap() const { + LOGSECTION("NamedCharSet::bitmap"); + LOGV(asString()); + int parseTree = getParseTree(name); + LOGV(parseTree); + if (parseTree == 0) { + return CharBitmap(); + } + LOGV((int)(map_parse_tree[parseTree].expression)); + CharSetExpression *expression = map_parse_tree[parseTree].expression; + + return expression->bitmap(); +} + +CharBitmap CharSetUnion::bitmap() const { + LOGSECTION("CharSetUnion::bitmap"); + LOGV(asString()); + CharBitmap returnValue = left->bitmap(); + returnValue |= right->bitmap(); + return returnValue; +} + +CharBitmap CharSetIntersection::bitmap() const { + LOGSECTION("CharSetIntersection::bitmap"); + LOGV(asString()); + CharBitmap returnValue = left->bitmap(); + returnValue &= right->bitmap(); + return returnValue; +} + +CharBitmap CharSetDifference::bitmap() const { + LOGSECTION("CharSetDifference::bitmap"); + LOGV(asString()); + CharBitmap returnValue = left->bitmap(); + returnValue -= right->bitmap(); + return returnValue; +} + +CharBitmap CharSetComplement::bitmap() const { + LOGSECTION("CharSetComplement::bitmap"); + LOGV(asString()); + return ~operand->bitmap(); +} + +// checkMinimum + +void CharSetExpression::checkMinimum(int c) { + if (!negativeCharDiagnostic && c < 0 && pointer_input) { + // would be nice to dig up the line on which it was defined + errorList.push(Error("Negative character code in pointer mode")); + negativeCharDiagnostic = 1; + } +} + +// checkRange() + +void CharRange::checkRange() { + LOGSECTION("CharRange::checkRange"); + LOGV(asString()); + for (int i = first; i < last; i++) { + LOGV(i) LCV(translateTable[i]); + if (min_char_number > translateTable[i]) { + min_char_number = translateTable[i]; + } + if (max_char_number < translateTable[i]) { + max_char_number = translateTable[i]; + } + } + n_chars = max_char_number - min_char_number + 1; + LOGV(first) LCV(last); + LOGV(min_char_number) LCV(max_char_number); +} + +void CodeRange::checkRange() { + LOGSECTION("CodeRange::checkRange"); + LOGV(asString()); + + if (min_char_number > first) { + min_char_number = first; + } + if (max_char_number < last) { + max_char_number = last; + } + + n_chars = max_char_number - min_char_number + 1; + LOGV(first) LCV(last); + LOGV(min_char_number) LCV(max_char_number); +} + +void IndividualChar::checkRange() { + LOGSECTION("IndividualChar::checkRange"); + LOGV(asString()); + + if (min_char_number > translateTable[asciiValue]) { + min_char_number = translateTable[asciiValue]; + } + if (max_char_number < translateTable[asciiValue]) { + max_char_number = translateTable[asciiValue]; + } + + n_chars = max_char_number - min_char_number + 1; + LOGV(asciiValue); + LOGV(min_char_number) LCV(max_char_number); +} + +void IndividualCode::checkRange() { + LOGSECTION("IndividualCode::checkRange"); + LOGV(asString()); + + if (value > 0xffff) { + value = 0xffff; + log_error("Only 16 bit characters supported"); + } + + if (min_char_number > value) { + min_char_number = value; + } + if (max_char_number < value) { + max_char_number = value; + } + + n_chars = max_char_number - min_char_number + 1; + LOGV(value); + LOGV(min_char_number) LCV(max_char_number); +} + +void CharSetUnion::checkRange() { + LOGSECTION("CharSetUnion::checkRange"); + LOGV(asString()); + left->checkRange(); + right->checkRange(); + LOGV(min_char_number) LCV(max_char_number); +} + +void CharSetIntersection::checkRange() { + LOGSECTION("CharSetIntersection::checkRange"); + LOGV(asString()); + left->checkRange(); + right->checkRange(); + LOGV(min_char_number) LCV(max_char_number); +} + +void CharSetDifference::checkRange() { + LOGSECTION("CharSetDifference::checkRange"); + LOGV(asString()); + left->checkRange(); + right->checkRange(); + LOGV(min_char_number) LCV(max_char_number); +} + +void CharSetComplement::checkRange() { + LOGSECTION("CharSetComplement::checkRange"); + LOGV(asString()); + operand->checkRange(); + LOGV(min_char_number) LCV(max_char_number); +} + +//translate table + +CharSetExpression::TranslateTable CharSetExpression::translateTable; + + +// Comparisons + +int CharRange::operator == (const CharSetExpression &x) const { + LOGSECTION_OFF("CharRange::operator==(CharSetExpression &)"); + //if (typeid(*this) != typeid(x)) return 0; + if (!sameType(x)) { + return 0; + } + //const CharRange &arg = dynamic_cast<const CharRange &>(x); + const CharRange &arg = (const CharRange &) x; + return first == arg.first && last == arg.last; +} + +int CharRange::operator < (const CharSetExpression &x) const { + LOGSECTION_OFF("CharRange::operator <"); + LOGV(asString()) LCV(x.asString()); + //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x); + if (!sameType(x)) { + return CharSetExpression::operator < (x); + } + //const CharRange &arg = dynamic_cast<const CharRange &>(x); + const CharRange &arg = (const CharRange &) x; + int flag = (first == arg.first) ? last < arg.last : first < arg.first; + LOGV(flag); + return flag; +} + +int CodeRange::operator == (const CharSetExpression &x) const { + LOGSECTION("CodeRange::operator==(CharSetExpression &)"); + //if (typeid(*this) != typeid(x)) return 0; + if (!sameType(x)) { + return 0; + } + //const CodeRange &arg = dynamic_cast<const CodeRange &>(x); + const CodeRange &arg = (const CodeRange &) x; + return first == arg.first && last == arg.last; +} + +int CodeRange::operator < (const CharSetExpression &x) const { + LOGSECTION("CodeRange::operator <"); + LOGV(asString()) LCV(x.asString()); + //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x); + if (!sameType(x)) { + return CharSetExpression::operator < (x); + } + //const CodeRange &arg = dynamic_cast<const CodeRange &>(x); + const CodeRange &arg = (const CodeRange &) x; + int flag = first == arg.first ? last < arg.last : first < arg.first; + LOGV(flag); + return flag; +} + +int IndividualChar::operator == (const CharSetExpression &x) const { + LOGSECTION("IndividualChar::operator==(CharSetExpression &)"); + //if (typeid(*this) != typeid(x)) return 0; + if (!sameType(x)) { + return 0; + } + //const IndividualChar &arg = dynamic_cast<const IndividualChar &>(x); + const IndividualChar &arg = (const IndividualChar &) x; + return asciiValue == arg.asciiValue; +} +int IndividualChar::operator < (const CharSetExpression &x) const { + LOGSECTION("IndividualChar::operator <"); + LOGV(asString()) LCV(x.asString()); + //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x); + if (!sameType(x)) { + return CharSetExpression::operator < (x); + } + //const IndividualChar &arg = dynamic_cast<const IndividualChar &>(x); + const IndividualChar &arg = (const IndividualChar &) x; + int flag = asciiValue < arg.asciiValue; + LOGV(flag); + return flag; +} + +int IndividualCode::operator == (const CharSetExpression &x) const { + LOGSECTION("IndividualCode::operator==(CharSetExpression &)"); + //if (typeid(*this) != typeid(x)) return 0; + if (!sameType(x)) { + return 0; + } + //const IndividualCode &arg = dynamic_cast<const IndividualCode &>(x); + const IndividualCode &arg = (const IndividualCode &) x; + return value == arg.value; +} + +int IndividualCode::operator < (const CharSetExpression &x) const { + LOGSECTION("IndividualCode::operator <"); + LOGV(asString()) LCV(x.asString()); + //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x); + if (!sameType(x)) { + return CharSetExpression::operator < (x); + } + //const IndividualCode &arg = dynamic_cast<const IndividualCode &>(x); + const IndividualCode &arg = (const IndividualCode &) x; + int flag = value < arg.value; + LOGV(flag); + return flag; +} + +int NamedCharSet::operator == (const CharSetExpression &x) const { + LOGSECTION("NamedCharSet::operator==(CharSetExpression &)"); + //if (typeid(*this) != typeid(x)) return 0; + if (!sameType(x)) { + return 0; + } + //const NamedCharSet &arg = dynamic_cast<const NamedCharSet &>(x); + const NamedCharSet &arg = (const NamedCharSet &) x; + return name == arg.name; +} + +int NamedCharSet::operator < (const CharSetExpression &x) const { + LOGSECTION("NamedCharSet::operator <"); + LOGV(asString()) LCV(x.asString()); + //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x); + if (!sameType(x)) { + return CharSetExpression::operator < (x); + } + //const NamedCharSet &arg = dynamic_cast<const NamedCharSet &>(x); + const NamedCharSet &arg = (const NamedCharSet &) x; + int flag = name < arg.name; + LOGV(flag); + return flag; +} + +int CharSetUnion::operator == (const CharSetExpression &x) const { + LOGSECTION("CharSetUnion::operator==(CharSetExpression &)"); + //if (typeid(*this) != typeid(x)) return 0; + if (!sameType(x)) { + return 0; + } + //const CharSetUnion &arg = dynamic_cast<const CharSetUnion &>(x); + const CharSetUnion &arg = (const CharSetUnion &) x; + return (*left == *arg.left && *right == *arg.right) || + (*left == *arg.right && *right == *arg.left); +} + +int CharSetUnion::operator < (const CharSetExpression &x) const { + LOGSECTION("CharSetUnion::operator <"); + LOGV(asString()) LCV(x.asString()); + //if (typeid(*this) != typeid(x)) { + if (!sameType(x)) { + return CharSetExpression::operator < (x); + } + LOGS("Same type"); + //const CharSetUnion &arg = dynamic_cast<const CharSetUnion &>(x); + const CharSetUnion &arg = (const CharSetUnion &) x; + LOGS("Dynamic cast successful"); + int flag = (*this == arg) ? 0 : + (*left == *arg.left ? *right < *arg.right : *left < *arg.left); + LOGV(flag); + return flag; +} + +int CharSetIntersection::operator == (const CharSetExpression &x) const { + LOGSECTION("CharSetIntersection::operator==(CharSetExpression &)"); + //if (typeid(*this) != typeid(x)) return 0; + if (!sameType(x)) { + return 0; + } + //const CharSetIntersection &arg = dynamic_cast<const CharSetIntersection &>(x); + const CharSetIntersection &arg = (const CharSetIntersection &) x; + return (*left == *arg.left && *right == *arg.right) + || (*left == *arg.right && *right == *arg.left); +} + +int CharSetIntersection::operator < (const CharSetExpression &x) const { + LOGSECTION("CharSetIntersection::operator <"); + LOGV(asString()) LCV(x.asString()); + //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x); + if (!sameType(x)) { + return CharSetExpression::operator < (x); + } + //const CharSetIntersection &arg = dynamic_cast<const CharSetIntersection &>(x); + const CharSetIntersection &arg = (const CharSetIntersection &) x; + int flag = (*this == arg) ? 0 : + (*left == *arg.left ? *right < *arg.right : *left < *arg.left); + LOGV(flag); + return flag; +} + +int CharSetDifference::operator == (const CharSetExpression &x) const { + LOGSECTION("CharSetDifference::operator==(CharSetExpression &)"); + //if (typeid(*this) != typeid(x)) return 0; + if (!sameType(x)) { + return 0; + } + //const CharSetDifference &arg = dynamic_cast<const CharSetDifference &>(x); + const CharSetDifference &arg = (const CharSetDifference &) x; + return (*left == *arg.left && *right == *arg.right); +} +int CharSetDifference::operator < (const CharSetExpression &x) const { + LOGSECTION("CharSetDifference::operator <"); + LOGV(asString()) LCV(x.asString()); + //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x); + if (!sameType(x)) { + return CharSetExpression::operator < (x); + } + //const CharSetDifference &arg = dynamic_cast<const CharSetDifference &>(x); + const CharSetDifference &arg = (const CharSetDifference &) x; + int flag = (*this == arg) ? 0 : + (*left == *arg.left ? *right < *arg.right : *left < *arg.left); + LOGV(flag); + return flag; +} + +int CharSetComplement::operator == (const CharSetExpression &x) const { + LOGSECTION("CharSetComplement::operator==(CharSetExpression &)"); + //if (typeid(*this) != typeid(x)) return 0; + if (!sameType(x)) { + return 0; + } + //const CharSetComplement &arg = dynamic_cast<const CharSetComplement &>(x); + const CharSetComplement &arg = (const CharSetComplement &) x; + return *operand == *arg.operand; +} + +int CharSetComplement::operator < (const CharSetExpression &x) const { + LOGSECTION("CharSetComplement::operator <"); + LOGV(asString()) LCV(x.asString()); + //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x); + if (!sameType(x)) { + return CharSetExpression::operator < (x); + } + //const CharSetComplement &arg = dynamic_cast<const CharSetComplement &>(x); + const CharSetComplement &arg = (const CharSetComplement &) x; + int flag = *operand < *arg.operand; + LOGV(flag); + return flag; +}