view anagram/agcore/csexp.cpp @ 24:a4899cdfc2d6 default tip

Obfuscate the regexps to strip off the IBM compiler's copyright banners. I don't want bots scanning github to think they're real copyright notices because that could cause real problems.
author David A. Holland
date Mon, 13 Jun 2022 00:40:23 -0400
parents 13d2b8934445
children
line wrap: on
line source

/*
 * AnaGram, A System for Syntax Directed Programming
 * Copyright 1993-2002 Parsifal Software. All Rights Reserved.
 * See the file COPYING for license and usage terms.
 *
 * csexp.cpp
 */

#include "config.h"
#include "cs.h"
#include "csexp.h"
#include "error.h"
#include "symbol.h"

//#define INCLUDE_LOGGING
#include "log.h"


#define MAX_N_CHARS 0x10001

static char caseTable[32] = {
  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,    0,
  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,    0
};

unsigned char agToUpper(unsigned char c) {
  if (c >= 'a' && c <= 'z') {
    return c ^= 0x20;
  }
  if (iso_latin_1 && (unsigned) c >= 0xe0) {
    c ^= caseTable[c-0xe0];
  }
  return c;
}


//asString

AgString CharSetExpression::asString(Precedence p) const {
  LOGSECTION_OFF("CharSetExpression::asString(Precedence)");
  //LOGSECTION("CharSetExpression::asString(Precedence)");
  LOGV(p) LCV(precedence);
  if (p < precedence) {
    return asString();
  }
  AgString value = AgString("(").concat(asString()).concat(")");
  LOGV(value) LCV((int) p);
  return value;
}

AgString CharRange::asString() const {
  LOGSECTION_OFF("CharRange::asString");
  char buf[20];
  sprintf(buf, "'%c-%c'", first, last);
  LOGV(first) LCV(last) LCV(buf);
  return AgString(buf);
}

AgString CodeRange::asString() const {
  LOGSECTION_OFF("CodeRange::asString");
  char buf[20];
  sprintf(buf, "%d..%d", first, last);
  LOGV(first) LCV(last) LCV(buf);
  return AgString(buf);
}

AgString IndividualChar::asString() const {
  LOGSECTION_OFF("IndividualChar::asString");
  LOGV(asciiValue);
  char buf[10];
  if (asciiValue < ' ') {
    switch (asciiValue) {
      case '\a': sprintf(buf, "'\\a'"); break;
      case '\b': sprintf(buf, "'\\b'"); break;
      case '\f': sprintf(buf, "'\\f'"); break;
      case '\n': sprintf(buf, "'\\n'"); break;
      case '\r': sprintf(buf, "'\\r'"); break;
      case '\t': sprintf(buf, "'\\t'"); break;
      case '\v': sprintf(buf, "'\\v'"); break;
      default:   sprintf(buf, "'\\%03o'", asciiValue); break;
    }
  }
  else {
    switch (asciiValue) {
      case '\\': sprintf(buf, "'\\\\'"); break;
      case '\?': sprintf(buf, "'\\?'"); break;
      case '\'': sprintf(buf, "'\\''"); break;
      case '\"': sprintf(buf, "'\\\"'"); break;
      default:   sprintf(buf, "'%c'", asciiValue); break;
    }
  }
  LOGV(asciiValue) LCV(buf);
  return AgString(buf);
}

AgString IndividualCode::asString() const {
  LOGSECTION_OFF("IndividualCode::asString");
  LOGV(value);
  char buf[20];
  sprintf(buf, "%d", value);
  LOGV(value) LCV(buf);
  return AgString(buf);
}

AgString NamedCharSet::asString() const {
  LOGSECTION_OFF("NamedCharSet::asString");
  return Symbol(name)->string;
  //return AgString(dict_str(tkn_dict, name));
}

AgString CharSetUnion::asString() const {
  LOGSECTION_OFF("CharSetUnion::asString");
  //LOGSECTION("CharSetUnion::asString");
  return left->asString().concat(" + ").concat(right->asString());
}

AgString CharSetIntersection::asString() const {
  LOGSECTION_OFF("CharSetIntersection::asString");
  return left->asString(multiplicative).concat(" * ").
    concat(right->asString(multiplicative));
}

AgString CharSetDifference::asString() const {
  LOGSECTION_OFF("CharSetDifference::asString");
  AgString value = left->asString().concat(" - ").
    concat(right->asString(additive));
  LOGV(value);
  return value;
}

AgString CharSetComplement::asString() const {
  LOGSECTION_OFF("CharSetComplement::asString");
  //LOGSECTION_ON("CharSetComplement::asString");
  AgString value("~");
  LOGV(value);
  value = value.concat(operand->asString(unary));
  LOGV(value);
  return value;
}

//bitmap

CharBitmap CharRange::bitmap() const {
  LOGSECTION("CharRange::bitmap");
  LOGV(asString());
  int i = first;
  CharBitmap returnValue;
  while (i <= last) {
    returnValue.setBit(translateTable[i]);
    if (!case_sensitive) {
      returnValue.setBit(translateTable[agToUpper((char) i)]);
    }
    i++;
  }
  return returnValue;
}

CharBitmap CodeRange::bitmap() const {
  LOGSECTION("CodeRange::bitmap");
  LOGV(asString());
  LOGV(first) LCV(last);
  return CharBitmap().setRange(first, last);
}

CharBitmap IndividualChar::bitmap() const {
  LOGSECTION("IndividualChar::bitmap");
  LOGV(asString());
  CharBitmap map;
  map.setBit(translateTable[asciiValue]);
  if (!case_sensitive) {
    map.setBit(translateTable[agToUpper((char) asciiValue)]);
  }
  return map;
}

CharBitmap IndividualCode::bitmap() const {
  LOGSECTION("IndividualCode::bitmap");
  LOGV(asString());
  CharBitmap map;
  map.setBit(value);
  return map;
}

CharBitmap NamedCharSet::bitmap() const {
  LOGSECTION("NamedCharSet::bitmap");
  LOGV(asString());
  int parseTree = getParseTree(name);
  LOGV(parseTree);
  if (parseTree == 0) {
    return CharBitmap();
  }
  LOGV((int)(map_parse_tree[parseTree].expression));
  CharSetExpression *expression = map_parse_tree[parseTree].expression;

  return expression->bitmap();
}

CharBitmap CharSetUnion::bitmap() const {
  LOGSECTION("CharSetUnion::bitmap");
  LOGV(asString());
  CharBitmap returnValue = left->bitmap();
  returnValue |= right->bitmap();
  return returnValue;
}

CharBitmap CharSetIntersection::bitmap() const {
  LOGSECTION("CharSetIntersection::bitmap");
  LOGV(asString());
  CharBitmap returnValue = left->bitmap();
  returnValue &= right->bitmap();
  return returnValue;
}

CharBitmap CharSetDifference::bitmap() const {
  LOGSECTION("CharSetDifference::bitmap");
  LOGV(asString());
  CharBitmap returnValue = left->bitmap();
  returnValue -= right->bitmap();
  return returnValue;
}

CharBitmap CharSetComplement::bitmap() const {
  LOGSECTION("CharSetComplement::bitmap");
  LOGV(asString());
  return ~operand->bitmap();
}

// checkMinimum

void CharSetExpression::checkMinimum(int c) {
  if (!negativeCharDiagnostic && c < 0 && pointer_input) {
    // would be nice to dig up the line on which it was defined
    errorList.push(Error("Negative character code in pointer mode"));
    negativeCharDiagnostic = 1;
  }
}

// checkRange()

void CharRange::checkRange() {
  LOGSECTION("CharRange::checkRange");
  LOGV(asString());
  for (int i = first; i < last; i++) {
    LOGV(i) LCV(translateTable[i]);
    if (min_char_number > translateTable[i]) {
      min_char_number = translateTable[i];
    }
    if (max_char_number < translateTable[i]) {
      max_char_number = translateTable[i];
    }
  }
  n_chars = max_char_number - min_char_number + 1;
  LOGV(first) LCV(last);
  LOGV(min_char_number) LCV(max_char_number);
}

void CodeRange::checkRange() {
  LOGSECTION("CodeRange::checkRange");
  LOGV(asString());

  if (min_char_number > first) {
    min_char_number = first;
  }
  if (max_char_number < last) {
    max_char_number = last;
  }

  n_chars = max_char_number - min_char_number + 1;
  LOGV(first) LCV(last);
  LOGV(min_char_number) LCV(max_char_number);
}

void IndividualChar::checkRange() {
  LOGSECTION("IndividualChar::checkRange");
  LOGV(asString());

  if (min_char_number > translateTable[asciiValue]) {
    min_char_number = translateTable[asciiValue];
  }
  if (max_char_number < translateTable[asciiValue]) {
    max_char_number = translateTable[asciiValue];
  }

  n_chars = max_char_number - min_char_number + 1;
  LOGV(asciiValue);
  LOGV(min_char_number) LCV(max_char_number);
}

void IndividualCode::checkRange() {
  LOGSECTION("IndividualCode::checkRange");
  LOGV(asString());

  if (value > 0xffff) {
    value = 0xffff;
    log_error("Only 16 bit characters supported");
  }

  if (min_char_number > value) {
    min_char_number = value;
  }
  if (max_char_number < value) {
    max_char_number = value;
  }

  n_chars = max_char_number - min_char_number + 1;
  LOGV(value);
  LOGV(min_char_number) LCV(max_char_number);
}

void CharSetUnion::checkRange() {
  LOGSECTION("CharSetUnion::checkRange");
  LOGV(asString());
  left->checkRange();
  right->checkRange();
  LOGV(min_char_number) LCV(max_char_number);
}

void CharSetIntersection::checkRange() {
  LOGSECTION("CharSetIntersection::checkRange");
  LOGV(asString());
  left->checkRange();
  right->checkRange();
  LOGV(min_char_number) LCV(max_char_number);
}

void CharSetDifference::checkRange() {
  LOGSECTION("CharSetDifference::checkRange");
  LOGV(asString());
  left->checkRange();
  right->checkRange();
  LOGV(min_char_number) LCV(max_char_number);
}

void CharSetComplement::checkRange() {
  LOGSECTION("CharSetComplement::checkRange");
  LOGV(asString());
  operand->checkRange();
  LOGV(min_char_number) LCV(max_char_number);
}

//translate table

CharSetExpression::TranslateTable CharSetExpression::translateTable;


// Comparisons

int CharRange::operator == (const CharSetExpression &x) const {
  LOGSECTION_OFF("CharRange::operator==(CharSetExpression &)");
  //if (typeid(*this) != typeid(x)) return 0;
  if (!sameType(x)) {
    return 0;
  }
  //const CharRange &arg = dynamic_cast<const CharRange &>(x);
  const CharRange &arg = (const CharRange &) x;
  return first == arg.first && last == arg.last;
}

int CharRange::operator < (const CharSetExpression &x) const {
  LOGSECTION_OFF("CharRange::operator <");
  LOGV(asString()) LCV(x.asString());
  //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x);
  if (!sameType(x)) {
    return CharSetExpression::operator < (x);
  }
  //const CharRange &arg = dynamic_cast<const CharRange &>(x);
  const CharRange &arg = (const CharRange &) x;
  int flag = (first == arg.first) ? last < arg.last : first < arg.first;
  LOGV(flag);
  return flag;
}

int CodeRange::operator == (const CharSetExpression &x) const {
  LOGSECTION("CodeRange::operator==(CharSetExpression &)");
  //if (typeid(*this) != typeid(x)) return 0;
  if (!sameType(x)) {
    return 0;
  }
  //const CodeRange &arg = dynamic_cast<const CodeRange &>(x);
  const CodeRange &arg = (const CodeRange &) x;
  return first == arg.first && last == arg.last;
}

int CodeRange::operator < (const CharSetExpression &x) const {
  LOGSECTION("CodeRange::operator <");
  LOGV(asString()) LCV(x.asString());
  //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x);
  if (!sameType(x)) {
    return CharSetExpression::operator < (x);
  }
  //const CodeRange &arg = dynamic_cast<const CodeRange &>(x);
  const CodeRange &arg = (const CodeRange &) x;
  int flag = first == arg.first ? last < arg.last : first < arg.first;
  LOGV(flag);
  return flag;
}

int IndividualChar::operator == (const CharSetExpression &x) const {
  LOGSECTION("IndividualChar::operator==(CharSetExpression &)");
  //if (typeid(*this) != typeid(x)) return 0;
  if (!sameType(x)) {
    return 0;
  }
  //const IndividualChar &arg = dynamic_cast<const IndividualChar &>(x);
  const IndividualChar &arg = (const IndividualChar &) x;
  return asciiValue == arg.asciiValue;
}
int IndividualChar::operator < (const CharSetExpression &x) const {
  LOGSECTION("IndividualChar::operator <");
  LOGV(asString()) LCV(x.asString());
  //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x);
  if (!sameType(x)) {
    return CharSetExpression::operator < (x);
  }
  //const IndividualChar &arg = dynamic_cast<const IndividualChar &>(x);
  const IndividualChar &arg = (const IndividualChar &) x;
  int flag = asciiValue < arg.asciiValue;
  LOGV(flag);
  return flag;
}

int IndividualCode::operator == (const CharSetExpression &x) const {
  LOGSECTION("IndividualCode::operator==(CharSetExpression &)");
  //if (typeid(*this) != typeid(x)) return 0;
  if (!sameType(x)) {
    return 0;
  }
  //const IndividualCode &arg = dynamic_cast<const IndividualCode &>(x);
  const IndividualCode &arg = (const IndividualCode &) x;
  return value == arg.value;
}

int IndividualCode::operator < (const CharSetExpression &x) const {
  LOGSECTION("IndividualCode::operator <");
  LOGV(asString()) LCV(x.asString());
  //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x);
  if (!sameType(x)) {
    return CharSetExpression::operator < (x);
  }
  //const IndividualCode &arg = dynamic_cast<const IndividualCode &>(x);
  const IndividualCode &arg = (const IndividualCode &) x;
  int flag = value < arg.value;
  LOGV(flag);
  return flag;
}

int NamedCharSet::operator == (const CharSetExpression &x) const {
  LOGSECTION("NamedCharSet::operator==(CharSetExpression &)");
  //if (typeid(*this) != typeid(x)) return 0;
  if (!sameType(x)) {
    return 0;
  }
  //const NamedCharSet &arg = dynamic_cast<const NamedCharSet &>(x);
  const NamedCharSet &arg = (const NamedCharSet &) x;
  return name == arg.name;
}

int NamedCharSet::operator < (const CharSetExpression &x) const {
  LOGSECTION("NamedCharSet::operator <");
  LOGV(asString()) LCV(x.asString());
  //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x);
  if (!sameType(x)) {
    return CharSetExpression::operator < (x);
  }
  //const NamedCharSet &arg = dynamic_cast<const NamedCharSet &>(x);
  const NamedCharSet &arg = (const NamedCharSet &) x;
  int flag = name < arg.name;
  LOGV(flag);
  return flag;
}

int CharSetUnion::operator == (const CharSetExpression &x) const {
  LOGSECTION("CharSetUnion::operator==(CharSetExpression &)");
  //if (typeid(*this) != typeid(x)) return 0;
  if (!sameType(x)) {
    return 0;
  }
  //const CharSetUnion &arg = dynamic_cast<const CharSetUnion &>(x);
  const CharSetUnion &arg = (const CharSetUnion &) x;
  return (*left == *arg.left && *right == *arg.right) || 
    (*left == *arg.right && *right == *arg.left);
}

int CharSetUnion::operator < (const CharSetExpression &x) const {
  LOGSECTION("CharSetUnion::operator <");
  LOGV(asString()) LCV(x.asString());
  //if (typeid(*this) != typeid(x)) {
  if (!sameType(x)) {
    return CharSetExpression::operator < (x);
  }
  LOGS("Same type");
  //const CharSetUnion &arg = dynamic_cast<const CharSetUnion &>(x);
  const CharSetUnion &arg = (const CharSetUnion &) x;
  LOGS("Dynamic cast successful");
  int flag = (*this == arg) ? 0 :
    (*left == *arg.left ? *right < *arg.right : *left < *arg.left);
  LOGV(flag);
  return flag;
}

int CharSetIntersection::operator == (const CharSetExpression &x) const {
  LOGSECTION("CharSetIntersection::operator==(CharSetExpression &)");
  //if (typeid(*this) != typeid(x)) return 0;
  if (!sameType(x)) {
    return 0;
  }
  //const CharSetIntersection &arg = dynamic_cast<const CharSetIntersection &>(x);
  const CharSetIntersection &arg = (const CharSetIntersection &) x;
  return (*left == *arg.left && *right == *arg.right)
    || (*left == *arg.right && *right == *arg.left);
}

int CharSetIntersection::operator < (const CharSetExpression &x) const {
  LOGSECTION("CharSetIntersection::operator <");
  LOGV(asString()) LCV(x.asString());
  //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x);
  if (!sameType(x)) {
    return CharSetExpression::operator < (x);
  }
  //const CharSetIntersection &arg = dynamic_cast<const CharSetIntersection &>(x);
  const CharSetIntersection &arg = (const CharSetIntersection &) x;
  int flag = (*this == arg) ? 0 :
    (*left == *arg.left ? *right < *arg.right : *left < *arg.left);
  LOGV(flag);
  return flag;
}

int CharSetDifference::operator == (const CharSetExpression &x) const {
  LOGSECTION("CharSetDifference::operator==(CharSetExpression &)");
  //if (typeid(*this) != typeid(x)) return 0;
  if (!sameType(x)) {
    return 0;
  }
  //const CharSetDifference &arg = dynamic_cast<const CharSetDifference &>(x);
  const CharSetDifference &arg = (const CharSetDifference &) x;
  return (*left == *arg.left && *right == *arg.right);
}
int CharSetDifference::operator < (const CharSetExpression &x) const {
  LOGSECTION("CharSetDifference::operator <");
  LOGV(asString()) LCV(x.asString());
  //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x);
  if (!sameType(x)) {
    return CharSetExpression::operator < (x);
  }
  //const CharSetDifference &arg = dynamic_cast<const CharSetDifference &>(x);
  const CharSetDifference &arg = (const CharSetDifference &) x;
  int flag = (*this == arg) ? 0 :
     (*left == *arg.left ? *right < *arg.right : *left < *arg.left);
  LOGV(flag);
  return flag;
}

int CharSetComplement::operator == (const CharSetExpression &x) const {
  LOGSECTION("CharSetComplement::operator==(CharSetExpression &)");
  //if (typeid(*this) != typeid(x)) return 0;
  if (!sameType(x)) {
    return 0;
  }
  //const CharSetComplement &arg = dynamic_cast<const CharSetComplement &>(x);
  const CharSetComplement &arg = (const CharSetComplement &) x;
  return *operand == *arg.operand;
}

int CharSetComplement::operator < (const CharSetExpression &x) const {
  LOGSECTION("CharSetComplement::operator <");
  LOGV(asString()) LCV(x.asString());
  //if (typeid(*this) != typeid(x)) return CharSetExpression::operator < (x);
  if (!sameType(x)) {
    return CharSetExpression::operator < (x);
  }
  //const CharSetComplement &arg = dynamic_cast<const CharSetComplement &>(x);
  const CharSetComplement &arg = (const CharSetComplement &) x;
  int flag = *operand < *arg.operand;
  LOGV(flag);
  return flag;
}