diff anagram/agcore/csexp.cpp @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.
author David A. Holland
date Sat, 22 Dec 2007 17:52:45 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/anagram/agcore/csexp.cpp	Sat Dec 22 17:52:45 2007 -0500
@@ -0,0 +1,578 @@
+/*
+ * AnaGram, A System for Syntax Directed Programming
+ * Copyright 1993-2002 Parsifal Software. All Rights Reserved.
+ * See the file COPYING for license and usage terms.
+ *
+ * csexp.cpp
+ */
+
+#include "config.h"
+#include "cs.h"
+#include "csexp.h"
+#include "error.h"
+#include "symbol.h"
+
+//#define INCLUDE_LOGGING
+#include "log.h"
+
+
+#define MAX_N_CHARS 0x10001
+
+static char caseTable[32] = {
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,    0,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,    0
+};
+
+unsigned char agToUpper(unsigned char c) {
+  if (c >= 'a' && c <= 'z') {
+    return c ^= 0x20;
+  }
+  if (iso_latin_1 && (unsigned) c >= 0xe0) {
+    c ^= caseTable[c-0xe0];
+  }
+  return c;
+}
+
+
+//asString
+
+AgString CharSetExpression::asString(Precedence p) const {
+  LOGSECTION_OFF("CharSetExpression::asString(Precedence)");
+  //LOGSECTION("CharSetExpression::asString(Precedence)");
+  LOGV(p) LCV(precedence);
+  if (p < precedence) {
+    return asString();
+  }
+  AgString value = AgString("(").concat(asString()).concat(")");
+  LOGV(value) LCV((int) p);
+  return value;
+}
+
+AgString CharRange::asString() const {
+  LOGSECTION_OFF("CharRange::asString");
+  char buf[20];
+  sprintf(buf, "'%c-%c'", first, last);
+  LOGV(first) LCV(last) LCV(buf);
+  return AgString(buf);
+}
+
+AgString CodeRange::asString() const {
+  LOGSECTION_OFF("CodeRange::asString");
+  char buf[20];
+  sprintf(buf, "%d..%d", first, last);
+  LOGV(first) LCV(last) LCV(buf);
+  return AgString(buf);
+}
+
+AgString IndividualChar::asString() const {
+  LOGSECTION_OFF("IndividualChar::asString");
+  LOGV(asciiValue);
+  char buf[10];
+  if (asciiValue < ' ') {
+    switch (asciiValue) {
+      case '\a': sprintf(buf, "'\\a'"); break;
+      case '\b': sprintf(buf, "'\\b'"); break;
+      case '\f': sprintf(buf, "'\\f'"); break;
+      case '\n': sprintf(buf, "'\\n'"); break;
+      case '\r': sprintf(buf, "'\\r'"); break;
+      case '\t': sprintf(buf, "'\\t'"); break;
+      case '\v': sprintf(buf, "'\\v'"); break;
+      default:   sprintf(buf, "'\\%03o'", asciiValue); break;
+    }
+  }
+  else {
+    switch (asciiValue) {
+      case '\\': sprintf(buf, "'\\\\'"); break;
+      case '\?': sprintf(buf, "'\\?'"); break;
+      case '\'': sprintf(buf, "'\\''"); break;
+      case '\"': sprintf(buf, "'\\\"'"); break;
+      default:   sprintf(buf, "'%c'", asciiValue); break;
+    }
+  }
+  LOGV(asciiValue) LCV(buf);
+  return AgString(buf);
+}
+
+AgString IndividualCode::asString() const {
+  LOGSECTION_OFF("IndividualCode::asString");
+  LOGV(value);
+  char buf[20];
+  sprintf(buf, "%d", value);
+  LOGV(value) LCV(buf);
+  return AgString(buf);
+}
+
+AgString NamedCharSet::asString() const {
+  LOGSECTION_OFF("NamedCharSet::asString");
+  return Symbol(name)->string;
+  //return AgString(dict_str(tkn_dict, name));
+}
+
+AgString CharSetUnion::asString() const {
+  LOGSECTION_OFF("CharSetUnion::asString");
+  //LOGSECTION("CharSetUnion::asString");
+  return left->asString().concat(" + ").concat(right->asString());
+}
+
+AgString CharSetIntersection::asString() const {
+  LOGSECTION_OFF("CharSetIntersection::asString");
+  return left->asString(multiplicative).concat(" * ").
+    concat(right->asString(multiplicative));
+}
+
+AgString CharSetDifference::asString() const {
+  LOGSECTION_OFF("CharSetDifference::asString");
+  AgString value = left->asString().concat(" - ").
+    concat(right->asString(additive));
+  LOGV(value);
+  return value;
+}
+
+AgString CharSetComplement::asString() const {
+  LOGSECTION_OFF("CharSetComplement::asString");
+  //LOGSECTION_ON("CharSetComplement::asString");
+  AgString value("~");
+  LOGV(value);
+  value = value.concat(operand->asString(unary));
+  LOGV(value);
+  return value;
+}
+
+//bitmap
+
+CharBitmap CharRange::bitmap() const {
+  LOGSECTION("CharRange::bitmap");
+  LOGV(asString());
+  int i = first;
+  CharBitmap returnValue;
+  while (i <= last) {
+    returnValue.setBit(translateTable[i]);
+    if (!case_sensitive) {
+      returnValue.setBit(translateTable[agToUpper((char) i)]);
+    }
+    i++;
+  }
+  return returnValue;
+}
+
+CharBitmap CodeRange::bitmap() const {
+  LOGSECTION("CodeRange::bitmap");
+  LOGV(asString());
+  LOGV(first) LCV(last);
+  return CharBitmap().setRange(first, last);
+}
+
+CharBitmap IndividualChar::bitmap() const {
+  LOGSECTION("IndividualChar::bitmap");
+  LOGV(asString());
+  CharBitmap map;
+  map.setBit(translateTable[asciiValue]);
+  if (!case_sensitive) {
+    map.setBit(translateTable[agToUpper((char) asciiValue)]);
+  }
+  return map;
+}
+
+CharBitmap IndividualCode::bitmap() const {
+  LOGSECTION("IndividualCode::bitmap");
+  LOGV(asString());
+  CharBitmap map;
+  map.setBit(value);
+  return map;
+}
+
+CharBitmap NamedCharSet::bitmap() const {
+  LOGSECTION("NamedCharSet::bitmap");
+  LOGV(asString());
+  int parseTree = getParseTree(name);
+  LOGV(parseTree);
+  if (parseTree == 0) {
+    return CharBitmap();
+  }
+  LOGV((int)(map_parse_tree[parseTree].expression));
+  CharSetExpression *expression = map_parse_tree[parseTree].expression;
+
+  return expression->bitmap();
+}
+
+CharBitmap CharSetUnion::bitmap() const {
+  LOGSECTION("CharSetUnion::bitmap");
+  LOGV(asString());
+  CharBitmap returnValue = left->bitmap();
+  returnValue |= right->bitmap();
+  return returnValue;
+}
+
+CharBitmap CharSetIntersection::bitmap() const {
+  LOGSECTION("CharSetIntersection::bitmap");
+  LOGV(asString());
+  CharBitmap returnValue = left->bitmap();
+  returnValue &= right->bitmap();
+  return returnValue;
+}
+
+CharBitmap CharSetDifference::bitmap() const {
+  LOGSECTION("CharSetDifference::bitmap");
+  LOGV(asString());
+  CharBitmap returnValue = left->bitmap();
+  returnValue -= right->bitmap();
+  return returnValue;
+}
+
+CharBitmap CharSetComplement::bitmap() const {
+  LOGSECTION("CharSetComplement::bitmap");
+  LOGV(asString());
+  return ~operand->bitmap();
+}
+
+// checkMinimum
+
+void CharSetExpression::checkMinimum(int c) {
+  if (!negativeCharDiagnostic && c < 0 && pointer_input) {
+    // would be nice to dig up the line on which it was defined
+    errorList.push(Error("Negative character code in pointer mode"));
+    negativeCharDiagnostic = 1;
+  }
+}
+
+// checkRange()
+
+void CharRange::checkRange() {
+  LOGSECTION("CharRange::checkRange");
+  LOGV(asString());
+  for (int i = first; i < last; i++) {
+    LOGV(i) LCV(translateTable[i]);
+    if (min_char_number > translateTable[i]) {
+      min_char_number = translateTable[i];
+    }
+    if (max_char_number < translateTable[i]) {
+      max_char_number = translateTable[i];
+    }
+  }
+  n_chars = max_char_number - min_char_number + 1;
+  LOGV(first) LCV(last);
+  LOGV(min_char_number) LCV(max_char_number);
+}
+
+void CodeRange::checkRange() {
+  LOGSECTION("CodeRange::checkRange");
+  LOGV(asString());
+
+  if (min_char_number > first) {
+    min_char_number = first;
+  }
+  if (max_char_number < last) {
+    max_char_number = last;
+  }
+
+  n_chars = max_char_number - min_char_number + 1;
+  LOGV(first) LCV(last);
+  LOGV(min_char_number) LCV(max_char_number);
+}
+
+void IndividualChar::checkRange() {
+  LOGSECTION("IndividualChar::checkRange");
+  LOGV(asString());
+
+  if (min_char_number > translateTable[asciiValue]) {
+    min_char_number = translateTable[asciiValue];
+  }
+  if (max_char_number < translateTable[asciiValue]) {
+    max_char_number = translateTable[asciiValue];
+  }
+
+  n_chars = max_char_number - min_char_number + 1;
+  LOGV(asciiValue);
+  LOGV(min_char_number) LCV(max_char_number);
+}
+
+void IndividualCode::checkRange() {
+  LOGSECTION("IndividualCode::checkRange");
+  LOGV(asString());
+
+  if (value > 0xffff) {
+    value = 0xffff;
+    log_error("Only 16 bit characters supported");
+  }
+
+  if (min_char_number > value) {
+    min_char_number = value;
+  }
+  if (max_char_number < value) {
+    max_char_number = value;
+  }
+
+  n_chars = max_char_number - min_char_number + 1;
+  LOGV(value);
+  LOGV(min_char_number) LCV(max_char_number);
+}
+
+void CharSetUnion::checkRange() {
+  LOGSECTION("CharSetUnion::checkRange");
+  LOGV(asString());
+  left->checkRange();
+  right->checkRange();
+  LOGV(min_char_number) LCV(max_char_number);
+}
+
+void CharSetIntersection::checkRange() {
+  LOGSECTION("CharSetIntersection::checkRange");
+  LOGV(asString());
+  left->checkRange();
+  right->checkRange();
+  LOGV(min_char_number) LCV(max_char_number);
+}
+
+void CharSetDifference::checkRange() {
+  LOGSECTION("CharSetDifference::checkRange");
+  LOGV(asString());
+  left->checkRange();
+  right->checkRange();
+  LOGV(min_char_number) LCV(max_char_number);
+}
+
+void CharSetComplement::checkRange() {
+  LOGSECTION("CharSetComplement::checkRange");
+  LOGV(asString());
+  operand->checkRange();
+  LOGV(min_char_number) LCV(max_char_number);
+}
+
+//translate table
+
+CharSetExpression::TranslateTable CharSetExpression::translateTable;
+
+
+// Comparisons
+
+int CharRange::operator == (const CharSetExpression &x) const {
+  LOGSECTION_OFF("CharRange::operator==(CharSetExpression &)");
+  //if (typeid(*this) != typeid(x)) return 0;
+  if (!sameType(x)) {
+    return 0;
+  }
+  //const CharRange &arg = dynamic_cast<const CharRange &>(x);
+  const CharRange &arg = (const CharRange &) x;
+  return first == arg.first && last == arg.last;
+}
+
+int CharRange::operator < (const CharSetExpression &x) const {
+  LOGSECTION_OFF("CharRange::operator <");
+  LOGV(asString()) LCV(x.asString());
+  //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x);
+  if (!sameType(x)) {
+    return CharSetExpression::operator < (x);
+  }
+  //const CharRange &arg = dynamic_cast<const CharRange &>(x);
+  const CharRange &arg = (const CharRange &) x;
+  int flag = (first == arg.first) ? last < arg.last : first < arg.first;
+  LOGV(flag);
+  return flag;
+}
+
+int CodeRange::operator == (const CharSetExpression &x) const {
+  LOGSECTION("CodeRange::operator==(CharSetExpression &)");
+  //if (typeid(*this) != typeid(x)) return 0;
+  if (!sameType(x)) {
+    return 0;
+  }
+  //const CodeRange &arg = dynamic_cast<const CodeRange &>(x);
+  const CodeRange &arg = (const CodeRange &) x;
+  return first == arg.first && last == arg.last;
+}
+
+int CodeRange::operator < (const CharSetExpression &x) const {
+  LOGSECTION("CodeRange::operator <");
+  LOGV(asString()) LCV(x.asString());
+  //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x);
+  if (!sameType(x)) {
+    return CharSetExpression::operator < (x);
+  }
+  //const CodeRange &arg = dynamic_cast<const CodeRange &>(x);
+  const CodeRange &arg = (const CodeRange &) x;
+  int flag = first == arg.first ? last < arg.last : first < arg.first;
+  LOGV(flag);
+  return flag;
+}
+
+int IndividualChar::operator == (const CharSetExpression &x) const {
+  LOGSECTION("IndividualChar::operator==(CharSetExpression &)");
+  //if (typeid(*this) != typeid(x)) return 0;
+  if (!sameType(x)) {
+    return 0;
+  }
+  //const IndividualChar &arg = dynamic_cast<const IndividualChar &>(x);
+  const IndividualChar &arg = (const IndividualChar &) x;
+  return asciiValue == arg.asciiValue;
+}
+int IndividualChar::operator < (const CharSetExpression &x) const {
+  LOGSECTION("IndividualChar::operator <");
+  LOGV(asString()) LCV(x.asString());
+  //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x);
+  if (!sameType(x)) {
+    return CharSetExpression::operator < (x);
+  }
+  //const IndividualChar &arg = dynamic_cast<const IndividualChar &>(x);
+  const IndividualChar &arg = (const IndividualChar &) x;
+  int flag = asciiValue < arg.asciiValue;
+  LOGV(flag);
+  return flag;
+}
+
+int IndividualCode::operator == (const CharSetExpression &x) const {
+  LOGSECTION("IndividualCode::operator==(CharSetExpression &)");
+  //if (typeid(*this) != typeid(x)) return 0;
+  if (!sameType(x)) {
+    return 0;
+  }
+  //const IndividualCode &arg = dynamic_cast<const IndividualCode &>(x);
+  const IndividualCode &arg = (const IndividualCode &) x;
+  return value == arg.value;
+}
+
+int IndividualCode::operator < (const CharSetExpression &x) const {
+  LOGSECTION("IndividualCode::operator <");
+  LOGV(asString()) LCV(x.asString());
+  //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x);
+  if (!sameType(x)) {
+    return CharSetExpression::operator < (x);
+  }
+  //const IndividualCode &arg = dynamic_cast<const IndividualCode &>(x);
+  const IndividualCode &arg = (const IndividualCode &) x;
+  int flag = value < arg.value;
+  LOGV(flag);
+  return flag;
+}
+
+int NamedCharSet::operator == (const CharSetExpression &x) const {
+  LOGSECTION("NamedCharSet::operator==(CharSetExpression &)");
+  //if (typeid(*this) != typeid(x)) return 0;
+  if (!sameType(x)) {
+    return 0;
+  }
+  //const NamedCharSet &arg = dynamic_cast<const NamedCharSet &>(x);
+  const NamedCharSet &arg = (const NamedCharSet &) x;
+  return name == arg.name;
+}
+
+int NamedCharSet::operator < (const CharSetExpression &x) const {
+  LOGSECTION("NamedCharSet::operator <");
+  LOGV(asString()) LCV(x.asString());
+  //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x);
+  if (!sameType(x)) {
+    return CharSetExpression::operator < (x);
+  }
+  //const NamedCharSet &arg = dynamic_cast<const NamedCharSet &>(x);
+  const NamedCharSet &arg = (const NamedCharSet &) x;
+  int flag = name < arg.name;
+  LOGV(flag);
+  return flag;
+}
+
+int CharSetUnion::operator == (const CharSetExpression &x) const {
+  LOGSECTION("CharSetUnion::operator==(CharSetExpression &)");
+  //if (typeid(*this) != typeid(x)) return 0;
+  if (!sameType(x)) {
+    return 0;
+  }
+  //const CharSetUnion &arg = dynamic_cast<const CharSetUnion &>(x);
+  const CharSetUnion &arg = (const CharSetUnion &) x;
+  return (*left == *arg.left && *right == *arg.right) || 
+    (*left == *arg.right && *right == *arg.left);
+}
+
+int CharSetUnion::operator < (const CharSetExpression &x) const {
+  LOGSECTION("CharSetUnion::operator <");
+  LOGV(asString()) LCV(x.asString());
+  //if (typeid(*this) != typeid(x)) {
+  if (!sameType(x)) {
+    return CharSetExpression::operator < (x);
+  }
+  LOGS("Same type");
+  //const CharSetUnion &arg = dynamic_cast<const CharSetUnion &>(x);
+  const CharSetUnion &arg = (const CharSetUnion &) x;
+  LOGS("Dynamic cast successful");
+  int flag = (*this == arg) ? 0 :
+    (*left == *arg.left ? *right < *arg.right : *left < *arg.left);
+  LOGV(flag);
+  return flag;
+}
+
+int CharSetIntersection::operator == (const CharSetExpression &x) const {
+  LOGSECTION("CharSetIntersection::operator==(CharSetExpression &)");
+  //if (typeid(*this) != typeid(x)) return 0;
+  if (!sameType(x)) {
+    return 0;
+  }
+  //const CharSetIntersection &arg = dynamic_cast<const CharSetIntersection &>(x);
+  const CharSetIntersection &arg = (const CharSetIntersection &) x;
+  return (*left == *arg.left && *right == *arg.right)
+    || (*left == *arg.right && *right == *arg.left);
+}
+
+int CharSetIntersection::operator < (const CharSetExpression &x) const {
+  LOGSECTION("CharSetIntersection::operator <");
+  LOGV(asString()) LCV(x.asString());
+  //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x);
+  if (!sameType(x)) {
+    return CharSetExpression::operator < (x);
+  }
+  //const CharSetIntersection &arg = dynamic_cast<const CharSetIntersection &>(x);
+  const CharSetIntersection &arg = (const CharSetIntersection &) x;
+  int flag = (*this == arg) ? 0 :
+    (*left == *arg.left ? *right < *arg.right : *left < *arg.left);
+  LOGV(flag);
+  return flag;
+}
+
+int CharSetDifference::operator == (const CharSetExpression &x) const {
+  LOGSECTION("CharSetDifference::operator==(CharSetExpression &)");
+  //if (typeid(*this) != typeid(x)) return 0;
+  if (!sameType(x)) {
+    return 0;
+  }
+  //const CharSetDifference &arg = dynamic_cast<const CharSetDifference &>(x);
+  const CharSetDifference &arg = (const CharSetDifference &) x;
+  return (*left == *arg.left && *right == *arg.right);
+}
+int CharSetDifference::operator < (const CharSetExpression &x) const {
+  LOGSECTION("CharSetDifference::operator <");
+  LOGV(asString()) LCV(x.asString());
+  //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x);
+  if (!sameType(x)) {
+    return CharSetExpression::operator < (x);
+  }
+  //const CharSetDifference &arg = dynamic_cast<const CharSetDifference &>(x);
+  const CharSetDifference &arg = (const CharSetDifference &) x;
+  int flag = (*this == arg) ? 0 :
+     (*left == *arg.left ? *right < *arg.right : *left < *arg.left);
+  LOGV(flag);
+  return flag;
+}
+
+int CharSetComplement::operator == (const CharSetExpression &x) const {
+  LOGSECTION("CharSetComplement::operator==(CharSetExpression &)");
+  //if (typeid(*this) != typeid(x)) return 0;
+  if (!sameType(x)) {
+    return 0;
+  }
+  //const CharSetComplement &arg = dynamic_cast<const CharSetComplement &>(x);
+  const CharSetComplement &arg = (const CharSetComplement &) x;
+  return *operand == *arg.operand;
+}
+
+int CharSetComplement::operator < (const CharSetExpression &x) const {
+  LOGSECTION("CharSetComplement::operator <");
+  LOGV(asString()) LCV(x.asString());
+  //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x);
+  if (!sameType(x)) {
+    return CharSetExpression::operator < (x);
+  }
+  //const CharSetComplement &arg = dynamic_cast<const CharSetComplement &>(x);
+  const CharSetComplement &arg = (const CharSetComplement &) x;
+  int flag = *operand < *arg.operand;
+  LOGV(flag);
+  return flag;
+}