view anagram/agcore/checksum.cpp @ 24:a4899cdfc2d6 default tip

Obfuscate the regexps to strip off the IBM compiler's copyright banners. I don't want bots scanning github to think they're real copyright notices because that could cause real problems.
author David A. Holland
date Mon, 13 Jun 2022 00:40:23 -0400
parents 13d2b8934445
children
line wrap: on
line source

/*
 * AnaGram, A System for Syntax Directed Programming
 * Copyright 1993-1999 Parsifal Software. All Rights Reserved.
 * Copyright 2006 David A. Holland. All Rights Reserved.
 * See the file COPYING for license and usage terms.
 *
 * checksum.cpp - self-checksum module
 */

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "port.h"

#ifdef AG_ON_WINDOWS
#include <windows.h>
#endif

#include "agstring.h"
#include "assert.h"
#include "checksum.h"
#include "file.h"

//#define INCLUDE_LOGGING
#include "log.h"


/*
 * Fletcher's check-sum routines for AnaGram
 *
 * Freely adapted from routines published in Dr. Dobbs Journal
 * May 1992, p. 64
 */

#define BLOCK_SIZE 0X4000U

static AgString basedir;

static void checkSum(int fh, int offset, u_long *res_sum, u_long *res_len) {
  LOGSECTION("checkSum");
  char *buf;
  unsigned short k1, k2;

  *res_sum = 0;
  *res_len = 0;

  //LOGV(fh);

  if (fh < 0) {
    return;
  }
  lseek(fh, 0, 2);
  int length = tell(fh);
  *res_len = length;
  //LOGV(length);
  //buf = ALLOCATE(BLOCK_SIZE + 1, char);
  buf = new char[BLOCK_SIZE + 1];

  k1 = (unsigned short) (length >> 16);
  k2 = (unsigned short) length;

  if (offset) {
    lseek(fh, offset+512, 0);
    length -= offset+512;
    //LOGV(length);
  }
  else {
    lseek(fh,0,0);
  }

  if (k1 == 0) k1++;
  if (k2 == 0) k2++;

  while (length > 0) {
    unsigned n;
    unsigned ni;
    unsigned i;
    unsigned short *b;

    n = BLOCK_SIZE;
    if ((long) n > length) {
      n = (unsigned short) length;
    }
    n = read(fh, buf, n);
    if (n == 0) {
      break;
    }
    ni = (n+1)/2;
    i = 0;
    b = (unsigned short *) buf;
    buf[n] = 0;
    while (i< ni) {
      k1 += b[i];
      if (k1 < b[i]) {
	k1++;
      }
      k2 += k1;
      if (k2 < k1) {
	k2++;
      }
      i++;
    }
    length -= n;
    if (length <= 0 && offset!=0) {
      //LOGV(k1) LCV(k2);
      length = offset;
      offset = 0;
      lseek(fh, 0, 0);
    }
  }
  k1 ^= (unsigned short) -1;
  k2 ^= (unsigned short) -1;
  *res_sum = 65535L*k2 + k1;

  LOGV(*res_sum);
  //DEALLOCATE(buf);
  delete [] buf;
}

#ifdef AG_ON_WINDOWS
static int open_binary(summable what) {
  char buf[_MAX_PATH];
  HMODULE module;

  switch (what) {
    case SUM_AG1:  module = GetModuleHandle("ag1"); break;
    case SUM_AG:   module = 0; break;
    case SUM_AGCL: module = 0; break;
  }

  GetModuleFileName(module, buf, sizeof(buf)-2);

  if (what != SUM_AG1) {
    size_t len = strlen(buf);
    if (len >= 6 && !stricmp(buf+len-6, "ag.exe") && what == SUM_AGCL) {
      strcpy(buf+len-6, "agcl.exe");
    }
    else if (len >= 8 && !stricmp(buf+len-8, "agcl.exe") && what == SUM_AG) {
      strcpy(buf+len-8, "ag.exe");
    }
  }

  return open_shared_denywrite(buf, O_BINARY|O_RDONLY);
}
#endif

#ifdef AG_ON_UNIX
static int open_binary(summable what) {
  char buf[PATH_MAX];
  const char *name = NULL;
  switch (what) {
    case SUM_AG1:  name = "ag1.so"; break;
    case SUM_AG:   name = "ag"; break;
    case SUM_AGCL: name = "agcl"; break;
  }

  snprintf(buf, sizeof(buf), "%s/%s", basedir.pointer(), name);
  return open(buf, O_RDONLY);
}
#endif

static int open_summable(summable what) {
  switch (what) {
    case SUM_AG1:
    case SUM_AG:
    case SUM_AGCL:
      return open_binary(what);
  }
  return -1;
}

static void close_summable(summable what, int fd) {
  switch (what) {
    case SUM_AG1:
    case SUM_AG:
    case SUM_AGCL:
      close(fd);
      break;
  }
}

void observeSum(sumentry *s) {
  LOGSECTION("observeSum");

  int fd = open_summable(s->what);
  if (fd >= 0) {
    LOGV(fd);
    checkSum(fd, s->offset, &s->observed.sum, &s->observed.length);
    close_summable(s->what, fd);
  }
  else {
    s->observed.sum = 0;
    s->observed.length = 0;
  }
}

void sum_remember_argv0(const char *argv0) {
  LOGSECTION("sum_remember_argv0");
  LOGV(argv0);
  AgString av0(argv0);

#ifdef AG_ON_WINDOWS
  basedir = av0.lastCut("\\/:").leftX();
  LOGV(basedir);
#endif

#ifdef AG_ON_UNIX
  /*
   * Not as simple on Unix - we get whatever the parent process sends
   * us, which is typically what the user typed into the shell to
   * invoke the program. That is, it might be an absolute path, a
   * relative path, or no path at all, and in the latter case we need
   * to search $PATH. Blah.
   *
   * Note that since both argv[0] and $PATH are under the user's
   * control, if the user is adversarial we can't count on finding the
   * same file that we're actually executing from. Fortunately we
   * don't care about that now AG is open source.
   */

  if (argv0[0]=='/') {
    /* absolute path */
    basedir = av0.lastCut("\\/:").leftX();
    LOGV(basedir);
  }
  else if (strrchr(argv0, '/')!=NULL) {
    /* relative path - good enough; we don't chdir before summing is done */
    basedir = av0.lastCut("\\/:").leftX();
    LOGV(basedir);
  }
  else {
    /* nothing */
    struct stat sb;
    const char *p = getenv("PATH");
    LOGV(p);
    if (p) {
      AgString path(p); // copy it
      for (char *s = strtok(path.pointer(), ":"); s; s = strtok(NULL, ":")) {
	char tmp[PATH_MAX];
	snprintf(tmp, sizeof(tmp), "%s/%s", s, argv0);
	LOGV(tmp);
	if (stat(tmp, &sb)==0) {
	  basedir = s;	// copy it
	  LOGV(basedir);
	  return;
	}
      }
    }

    /* oh well - let's make a guess */
    basedir = "/usr/local/lib/anagram";
    LOGV(basedir);
  }
#endif
}