view anagram/agcore/textfile.cpp @ 24:a4899cdfc2d6 default tip

Obfuscate the regexps to strip off the IBM compiler's copyright banners. I don't want bots scanning github to think they're real copyright notices because that could cause real problems.
author David A. Holland
date Mon, 13 Jun 2022 00:40:23 -0400
parents 13d2b8934445
children
line wrap: on
line source

/*
 * AnaGram, A System for Syntax Directed Programming
 * Copyright 1993-2002 Parsifal Software. All Rights Reserved.
 * See the file COPYING for license and usage terms.
 *
 * textfile.cpp
 */

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>

#include "port.h"

#include "agstack.h"
#include "config.h"
#include "file.h"
#include "minmax.h"
//#include "stacks.h"
#include "textfile.h"

//#define INCLUDE_LOGGING
#include "log.h"


#define BUF_SIZE 0x4000000

text_file::text_file(const text_file &t)
  : name(t.name), text(t.text), lx(t.lx), width(t.width)
  , stringLength(t.stringLength)
  , truncated(0)
  , readFlags(t.readFlags)
{
  LOGSECTION("text_file::text_file");
  LOGV(name) LCV(width);
  LOGV((int) text.pointer());
}

text_file &text_file::operator =(const text_file &t) {
  name = t.name;           /* name of file */
  text = t.text;           /* body of file */
  lx = t.lx;               /* array of line indices */
  width = t.width;         /* length of longest line? */
  truncated = t.truncated;
  stringLength = t.stringLength;
  readFlags = t.readFlags;
  return *this;
}


text_file::text_file(const char *n)
  : name(n), text(), lx(), width(0), stringLength(0)
  , truncated(0)
  , readFlags(O_TEXT|O_RDONLY)
{
  LOGSECTION("text_file::text_file");
  LOGV(name);
  LOGV((int) text.pointer());
  read_file(readFlags);
}

text_file::text_file(const AgString n)
: name(n), text(), lx(), width(0), stringLength(0)
  , truncated(0)
  , readFlags(O_TEXT|O_RDONLY)
{
  LOGSECTION("text_file::text_file");
  LOGV(name.pointer());
  LOGV((int) text.pointer());
  LOGV((int) readFlags);
  read_file(readFlags);
}

text_file::text_file(const char *n, int flags)
  : name(n), text(), lx(), width(0), stringLength(0)
  , truncated(0)
  , readFlags(flags)
{
  LOGSECTION("text_file::text_file");
  LOGV(name);
  LOGV((int) text.pointer());
  read_file(flags);
}

text_file::text_file(const AgString n, int flags)
: name(n), text(), lx(), width(0), stringLength(0)
  , truncated(0)
  , readFlags(flags)
{
  LOGSECTION("text_file::text_file");
  LOGV(name.pointer());
  LOGV((int) text.pointer());
  LOGV((int) flags);
  read_file(flags);
}

void text_file::find_lines(void) {
  char *p, *b, *bl, *q;
  int w, wmax;
  LOGSECTION("text_file::find_lines");
  AgStack<int> lineStack;

  wmax = w = 0;
  p = b = text.pointer();
  LOGV(p);
  if (p != NULL) {
    do {
      p = strchr(bl = p,'\n');
      lineStack.push((int)(bl-b));
      if (p != NULL) {
	*p = 0;
      }
      else {
	q = strchr(bl, 0);
	if (q == bl) {
	  break;
	}
      }
      q = bl;
      w = 0;
      LOGV(lineStack.size()) LCV(bl);
      while (*q) {
	w += (*q++ == '\t') ? tab_spacing - w%tab_spacing : 1;
      }
      if (w > wmax) {
	wmax = w;
      }
      if (p == NULL) {
	break;
      }
      *p++ = '\n';
    } while(*p);
  }

  LOGS("Line scan complete");
  LOGV((int) &lineStack[0]) LCV((int) &lineStack[lineStack.size() - 1]);

  lx = AgArray<int>(lineStack);
  LOGS("Line array completed");

  width = wmax;
}

void text_file::read_file() {
  read_file(readFlags);
}

void text_file::read_file(int flags) {
  int fh;
  long n;

  LOGSECTION("text_file::read_file");
  LOGV(name);
  LOGV(flags);
  fh = open_shared_any(name.pointer(),flags);
  LOGV(fh);
  if (fh < 0) {
    LOGV(errno);
    LOGV(strerror(errno));
    return;
  }
  LOGV(name);
  char *buf, *bufptr;
  long length;
  long buffer_length;
  struct stat statbuf;

  fstat(fh, &statbuf);
  //file_time_stamp = statbuf.st_atime;
  length = statbuf.st_size;
  stringLength = 0;
  LOGV(length);
  buffer_length = min((long)(length+1), (long)(MAX_BYTES - 1 - sizeof(short)));
  LOGV((int) text.pointer());
  text = AgString(buffer_length);
  LOGV((int) text.pointer());
  buf = bufptr = text.pointer();
  LOGV((int) text.pointer());
  n = 0;
  while (1) {
    unsigned k;
    unsigned read_request = (unsigned) min(buffer_length, (long) BUF_SIZE);
    LOGV(read_request);
    if ((long)read_request > length) {
      read_request = (unsigned) length;
    }
    length -= read_request;
    LOGV(read_request) LCV(length);
    k = read(fh, bufptr, read_request);
    bufptr[k] = 0;
    LOGV(k) LCV(stringLength);
    // XXX howabout checking for -1 before dereferencing with it?
    if (k == 0 || k == (unsigned) -1) {
      break;
    }
    stringLength += k;
    LOGV(k) LCV(stringLength);
    bufptr += k;
    LOGV((int) (bufptr- 10)) LCV(bufptr - 10);
    buffer_length -= k;
    n += k;
  }
  truncated = length != 0;
  LOGV(stringLength) LCV(n) LCV(length);
  LOGV(buf[n-5]) LCV(buf[n-4]) LCV(buf[n-3]) LCV(buf[n-2]) LCV(buf[n-1]);
  buf[(unsigned)n] = 0;
  close(fh);
  find_lines();
  LOGV(lx.size()) LCV((int) lx.pointer());
  //printf("text_file::read_file\n");
}

int text_file::findNext(cint &loc, AgString s) {
  LOGSECTION("text_file::findNext");
  LOGV(name);
  LOGV(loc) LCV(s);
  searchProcess.setKey(s);
  assert((unsigned) loc.y < lx.size());
  unsigned start = lx[loc.y] + loc.x;
  LOGV(lx[loc.y]) LCV(start) LCV(stringLength);
  assert(start <= stringLength);
  if (start == stringLength) {
    return 0;
  }
  char *initial = text.pointer() + start + 1;
  char *ptr = searchProcess.scanForward(initial, stringLength - start);
  if (ptr == 0) {
    return 0;
  }
  unsigned index = ptr - text.pointer();
  while ((unsigned) loc.y < lx.size() && (unsigned) lx[loc.y] < index) {
    loc.y++;
  }
  if ((unsigned) loc.y >= lx.size() || (unsigned) lx[loc.y] > index) {
    loc.y--;
  }
  loc.x = index - lx[loc.y];
  return 1;
}

int text_file::findPrev(cint &loc, AgString s) {
  LOGSECTION("text_file::findPrev");
  LOGV(loc) LCV(s);
  searchProcess.setKey(s);
  assert((unsigned) loc.y < lx.size());
  unsigned length = lx[loc.y] + loc.x - 1;
  LOGV(length);
  char *ptr = searchProcess.scanReverse(text.pointer(), length);
  if (ptr == 0) {
    return 0;
  }
  unsigned index = ptr - text.pointer();
  while (loc.y && (unsigned) lx[loc.y] > index) {
    loc.y--;
  }
  loc.x = index - lx[loc.y];
  LOGV(loc) LCV(index);
  return 1;
}