diff anagram/agcore/dict.cpp @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.
author David A. Holland
date Sat, 22 Dec 2007 17:52:45 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/anagram/agcore/dict.cpp	Sat Dec 22 17:52:45 2007 -0500
@@ -0,0 +1,710 @@
+/*
+ * AnaGram, A System for Syntax Directed Programming
+ * Copyright 1993-2000 Parsifal Software. All Rights Reserved.
+ * See the file COPYING for license and usage terms.
+ *
+ * dict.cpp - old dictionary module
+ */
+
+#include <stdarg.h>
+#include "port.h"
+
+#include "arrays.h"
+#include "assert.h"
+#include "dict.h"
+#include "minmax.h"
+#include "myalloc.h"
+#include "stacks.h"
+
+//#define INCLUDE_LOGGING
+#include "log.h"
+
+
+#define HASH_TABLE_MAX 0x1000000
+
+#define EVEN_UP(x) (x) /* + ((x)%2) */
+
+static int find_str_entry(const char *s, string_dict *d);
+static int find_lst_entry(const int *s, list_dict *d);
+static int find_tpl_entry(const int *s, tuple_dict *d);
+
+
+#if 0 /* unused */
+void sort_string_dict(string_dict *d){
+  unsigned i;
+  //unsigned short k;
+  unsigned k;
+
+  if (d->nss >= d->nsx) return;
+  REALLOCATE_ST(d->st,d->nsx);
+{
+  unsigned *dest = d->st + d->nss;
+  unsigned *src = d->ndx + d->nss;
+  //k = (unsigned short) (d->nsx - d->nss);
+  k = (d->nsx - d->nss);
+  while (k--) dest[k] = src[k];
+}
+
+  /* 10/1/06 note: these two funcs are/were defined in merge.cpp */
+  sort_strings(d->text,d->st+d->nss,d->nsx-d->nss);
+  merge_strings(d->text,d->st, d->nss, d->nsx-d->nss);
+  REALLOCATE_ST(d->pt,d->nsx);
+  d->nss = d->nsx;
+  for (i = 0; i< d->nss; i++) {
+    k = d->st[i]-2;
+    //k = *((unsigned short *)(d->text+k));
+		memcpy(&k, d->text+k, sizeof(unsigned short));
+    d->pt[k] = i;
+  }
+}
+#endif /* 0 */
+
+list_dict *reset_list_dict(list_dict *d) {
+  int k;
+  ok_ptr(d);
+  if (d->nxs) {
+    ok_ptr(d->ndx);
+  }
+  if (d->nts) {
+    ok_ptr(d->text);
+  }
+  if (d->nhs) {
+    ok_ptr(d->ht);
+  }
+  d->nsx = 0;
+  d->ntc = 0;
+/*
+  k = sizeof(short)*d->nhs;
+  if (k) {
+    size_ok(d->ht, k, __FILE__, __LINE__);
+  }
+  memset(d->ht, 0xff, k);
+*/
+  k = sizeof(unsigned)*d->nhs;
+  if (k) {
+    size_ok(d->ht, k, __FILE__, __LINE__);
+  }
+  memset(d->ht, 0xff, k);
+  return NULL;
+}
+
+tuple_dict *reset_tuple_dict(tuple_dict *d) {
+  unsigned k;
+  ok_ptr(d);
+  if (d->nxs) {
+    ok_ptr(d->ndx);
+  }
+  if (d->nts) {
+    ok_ptr(d->text);
+  }
+  if (d->nhs) {
+    ok_ptr(d->ht);
+  }
+  d->nsx = 0;
+  d->ntc = 0;
+/*
+  k = sizeof(short)*d->nhs;
+  if (k) {
+    size_ok(d->ht, k, __FILE__, __LINE__);
+  }
+  memset(d->ht, 0xff, k);
+*/
+  k = sizeof(unsigned)*d->nhs;
+  if (k) {
+    size_ok(d->ht, k, __FILE__, __LINE__);
+  }
+  memset(d->ht, 0xff, k);
+  return NULL;
+}
+
+static void hash_string_dict(string_dict *d){
+  unsigned i,x;
+  const char *s;
+
+  //memset(d->ht, 0xff, sizeof(short)*d->nhs);
+  memset(d->ht, 0xff, sizeof(unsigned)*d->nhs);
+  for (i = 0; i < d->nsx; i++) {
+    s = &d->text[d->ndx[i]];
+    x = find_str_entry(s,d);
+    //d->ht[x] = (unsigned short) i;
+    d->ht[x] = i;
+  }
+}
+
+static void hash_list_dict(list_dict *d){
+  unsigned i,x;
+  const int *s;
+
+  //memset(d->ht, 0xff, sizeof(short)*d->nhs);
+  memset(d->ht, 0xff, sizeof(unsigned)*d->nhs);
+  for (i = 0; i < d->nsx; i++) {
+    s = &d->text[d->ndx[i]];
+    x = find_lst_entry(s,d);
+    //d->ht[x] = (unsigned short) i;
+    d->ht[x] = i;
+  }
+}
+
+static void hash_tpl_dict(tuple_dict *d){
+  unsigned i,x;
+  const int *s;
+  int n;
+
+  //memset(d->ht, 0xff, sizeof(short)*d->nhs);
+  memset(d->ht, 0xff, sizeof(unsigned)*d->nhs);
+  s = d->text;
+  n = d->tw;
+  for (i = 0; i < d->nsx; i++) {
+    x = find_tpl_entry(s,d);
+    //d->ht[x] = (unsigned short) i;
+    d->ht[x] = i;
+    s += n;
+  }
+}
+
+list_dict *delete_list_dict(list_dict *da) {
+  list_dict d;
+
+  if (da == NULL) {
+    return NULL;
+  }
+  d = *da;
+  DEALLOCATE(da);
+  if (d.ndx != NULL) DEALLOCATE(d.ndx);
+  if (d.text != NULL) DEALLOCATE(d.text);
+  if (d.ht != NULL) DEALLOCATE(d.ht);
+  if (d.st != NULL) DEALLOCATE(d.st);
+  if (d.pt != NULL) DEALLOCATE(d.pt);
+  return NULL;
+}
+
+tuple_dict *delete_tuple_dict(tuple_dict *da) {
+  tuple_dict d;
+
+  if (da == NULL) {
+    return NULL;
+  }
+  d = *da;
+  DEALLOCATE(da);
+  if (d.ndx != NULL) DEALLOCATE(d.ndx);
+  if (d.text != NULL) DEALLOCATE(d.text);
+  if (d.ht != NULL) DEALLOCATE(d.ht);
+  if (d.st != NULL) DEALLOCATE(d.st);
+  if (d.pt != NULL) DEALLOCATE(d.pt);
+  return NULL;
+}
+
+void empty_tuple_dict(tuple_dict *da) {
+  tuple_dict d;
+
+  if (da == NULL) {
+    return;
+  }
+  d = *da;
+  if (d.ndx != NULL) DEALLOCATE(d.ndx);
+  if (d.text != NULL) DEALLOCATE(d.text);
+  if (d.ht != NULL) DEALLOCATE(d.ht);
+  if (d.st != NULL) DEALLOCATE(d.st);
+  if (d.pt != NULL) DEALLOCATE(d.pt);
+  memset(&d, 0, sizeof(d));
+  d.tw = da->tw;
+  *da = d;
+}
+
+string_dict *delete_string_dict(string_dict *da) {
+  string_dict d;
+
+  if (da == NULL) {
+    return NULL;
+  }
+  d = *da;
+  DEALLOCATE(da);
+  if (d.ndx != NULL) DEALLOCATE(d.ndx);
+  if (d.text != NULL) DEALLOCATE(d.text);
+  if (d.ht != NULL) DEALLOCATE(d.ht);
+  if (d.st != NULL) DEALLOCATE(d.st);
+  if (d.pt != NULL) DEALLOCATE(d.pt);
+  return NULL;
+}
+
+int identify_string(const char *s, string_dict *d) {
+  unsigned he, x;
+
+  check_stack;
+  he = find_str_entry(s, d);
+  if ((x = d->ht[he]) >= d->nsx) {
+    x = 0;
+  }
+  return x;
+}
+
+
+int add_string_dict(const char *s,string_dict *d){
+  int len;
+  int he;
+  unsigned x;
+  long nhs, nsx;
+
+  check_stack;
+  len = strlen(s)+3;
+  if (d->ntc+len > d->nts) {
+    unsigned long k = d->ntc+len;
+    k += k/2;
+    k = max(200UL, min((unsigned long) MAX_BYTES,k));
+    LOGSECTION("add_string_dict::resize");
+    LOGV(k) LCV(d->ntc + len);
+    assert(k >= d->ntc + len);
+    REALLOCATE_ST(d->text, (unsigned) k);
+    d->nts = (unsigned) k;
+  }
+  if (d->nsx +1 > d->nxs) {
+    unsigned long k = d->nsx+1;
+    k += k/2;
+    k = max(100UL, min((unsigned long) MAX_INTS, k));
+    LOGSECTION("add_string_dict::resize");
+    LOGV(k) LCV(d->nsx + 1);
+    assert(k >= d->nsx + 1);
+    REALLOCATE_ST(d->ndx, (unsigned) k);
+    d->nxs = (unsigned) k;
+  }
+  if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) {
+    nhs = max(nhs, 256L);
+    while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) {
+      nhs *= 2;
+    }
+    LOGSECTION("add_string_dict::resize");
+    LOGV(nhs) LCV(d->nsx);
+    assert((unsigned) nhs > d->nsx);
+    REALLOCATE_ST(d->ht,(unsigned) nhs);
+    d->nhs = (unsigned) nhs;
+    hash_string_dict(d);
+  }
+  s = strcpy(d->text+d->ntc+2,s);
+  he = find_str_entry(s, d);
+  if ((x = d->ht[he]) >= d->nsx) {
+    //*((unsigned short *)(d->text+d->ntc)) = (unsigned short) d->nsx;
+    unsigned short length = d->nsx;
+    //memcpy(d->text+d->ntc, &d->nsx, sizeof(unsigned short));
+    memcpy(d->text+d->ntc, &length, sizeof(unsigned short));
+    d->ntc += 2;
+    x = d->nsx++;
+    //d->ndx[d->ht[he] = (unsigned short) x] = d->ntc;
+    d->ndx[d->ht[he] = x] = d->ntc;
+    d->ntc += EVEN_UP(len-2);
+  }
+  return x;
+}
+/*
+void close_list_dict(list_dict *d){
+  unsigned len;
+
+  if (d == NULL) return;
+  len = 2*(d->ntc/d->nsx) + d->ntc;
+  if (len < d->nts) {
+    REALLOCATE_ST(d->text,len);
+    d->nts = len;
+  }
+  if (d->nsx + 1< d->nxs) {
+    unsigned k = d->nsx + 1;
+    REALLOCATE_ST(d->ndx,k);
+    d->nxs = k;
+  }
+}
+*/
+int list_in_dict(int *s, int n, list_dict *d){
+  int k = d->nsx;
+  check_stack;
+  return k != add_list_dict(s, n, d);
+}
+
+int add_list_dict(const int *s,int n,list_dict *d){
+  LOGSECTION("add_list_dict");
+  unsigned len;
+  int he;
+  unsigned x;
+  long nhs, nsx;
+  int *sc;
+
+  check_stack;
+  len = n+3;
+  if (d->ntc+len > d->nts) {
+    unsigned long k = d->ntc + len;
+    k += k/2;
+    k = min((unsigned long)MAX_INTS, max(200UL, k));
+    LOGSECTION("add_list_dict::resize");
+    LOGV(k) LCV(d->ntc + len);
+    assert(k >= d->ntc + len);
+    REALLOCATE_ST(d->text,(unsigned) k);
+    d->nts = (unsigned) k;
+  }
+  if (d->nsx + 1 > d->nxs) {
+    unsigned long k = d->nsx + 1;
+    k += k/2;
+    k = min((unsigned long) MAX_INTS, max(200UL,k));
+    LOGSECTION("add_list_dict::resize");
+    LOGV(k) LCV(d->nsx + 1);
+    assert(k >= d->nsx+1);
+    REALLOCATE_ST(d->ndx, (unsigned) k);
+    d->nxs = (unsigned) k;
+  }
+  if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) {
+    nhs = max(nhs,256L);
+    while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) {
+      nhs *= 2;
+    }
+    LOGSECTION("add_list_dict::resize");
+    LOGV(nhs) LCV(d->nsx);
+    assert((unsigned)nhs > d->nsx);
+    REALLOCATE_ST(d->ht, (unsigned) nhs);
+    d->nhs = (unsigned) nhs;
+    hash_list_dict(d);
+  }
+  sc = (int *) memcpy(d->text+d->ntc+2, s, n*sizeof(int));
+  *--sc = n+1;
+  he = find_lst_entry(sc,d);
+  if ((x = d->ht[he]) >= d->nsx) {
+    //*((unsigned short *)(d->text+d->ntc)) = (unsigned short) d->nsx;
+    //memcpy(d->text+d->ntc, &d->nsx, sizeof(unsigned short));
+    //memcpy(d->text+d->ntc, &d->nsx, sizeof(unsigned));
+    d->text[d->ntc] = d->nsx;
+    //d->ndx[d->ht[he] = (unsigned short) (x = d->nsx++)] = ++d->ntc;
+    d->ndx[d->ht[he] = (x = d->nsx++)] = ++d->ntc;
+    d->ntc += n+1;
+  }
+  return x;
+}
+
+void extract_tuple_dict(tuple_dict *d, unsigned k, ...) {
+  va_list ap;
+  const int *p;
+  unsigned n;
+
+  assert(k < d->nsx);
+  n = d->tw;
+  p = d->text + n*k;
+  va_start(ap,k);
+  while (n--) {
+    *va_arg(ap,int*) = *p++;
+  }
+  va_end(ap);
+}
+
+int insert_tuple_dict(tuple_dict *d,...){
+  int he;
+  va_list s;
+  unsigned n;
+  long nsx,nhs;
+  //int *tpl;
+  unsigned i;
+
+  check_stack;
+  va_start(s, d);
+  n = d->tw;
+  //tpl = local_array(n,int);
+  LocalArray<int> tpl(n);
+  for (i = 0; i < n; i++) tpl[i] = va_arg(s,int);
+  va_end(s);
+  if (d->ntc+n > d->nts) {
+    unsigned long k = d->ntc + n;
+    k += k/2;
+    k = min((unsigned long) MAX_INTS, max(200UL,k));
+    LOGSECTION("insert_tuple_dict::resize");
+    LOGV(k) LCV(d->ntc + n);
+    assert(k >= d->ntc + n);
+    REALLOCATE_ST(d->text, (unsigned) k);
+    d->nts = (unsigned) k;
+  }
+  if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) {
+    nhs = max(nhs, 256L);
+    while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) {
+      nhs *= 2;
+    }
+    LOGSECTION("insert_tuple_dict::resize");
+    LOGV(nhs) LCV(d->nsx);
+    assert((unsigned)nhs > d->nsx);
+    REALLOCATE_ST(d->ht, (unsigned) nhs);
+    d->nhs = (unsigned) nhs;
+    hash_tpl_dict(d);
+  }
+  he = find_tpl_entry(tpl, d);
+  if ((unsigned)(d->ht[he]) >= d->nsx) {
+    //d->ht[he] = (unsigned short) d->nsx++;
+    d->ht[he] = d->nsx++;
+    memcpy(d->text+d->ntc, tpl, n*sizeof(int));
+    d->ntc += n;
+    return 0;
+  }
+  return 1;
+}
+
+
+int add_tuple_dict_new(tuple_dict *d, ...) {
+  int he;
+  unsigned x;
+  va_list s;
+  unsigned n;
+  long nsx, nhs;
+  //int *tpl;
+  unsigned i;
+
+  check_stack;
+  va_start(s, d);
+  n = d->tw;
+  //tpl = local_array(n, int);
+  LocalArray<int> tpl(n);
+  for (i = 0; i < n; i++) tpl[i] = va_arg(s, int);
+  va_end(s);
+  if (d->ntc+n > d->nts) {
+    unsigned long k = d->ntc + n;
+
+    k += k/2;
+    k = min((unsigned long) MAX_INTS, max(200UL,k));
+    LOGSECTION("add_tuple_dict_new::resize");
+    LOGV(k) LCV(d->ntc + n);
+    assert(k >= d->ntc + n);
+    REALLOCATE_ST(d->text, (unsigned)k);
+    d->nts = (unsigned) k;
+  }
+  if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) {
+    nhs = max(nhs, 256L);
+    while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) {
+      nhs *= 2;
+    }
+    LOGSECTION("add_tuple_dict_new::resize");
+    LOGV(nhs) LCV(d->nsx);
+    assert((unsigned) nhs > d->nsx);
+    REALLOCATE_ST(d->ht, (unsigned) nhs);
+    d->nhs = (unsigned) nhs;
+    hash_tpl_dict(d);
+  }
+  he = find_tpl_entry(tpl, d);
+  if ((x = d->ht[he]) >= d->nsx) {
+    //d->ht[he] = (unsigned short) (x = d->nsx++);
+    d->ht[he] = (x = d->nsx++);
+    memcpy(d->text+d->ntc, tpl, n*sizeof(int));
+    d->ntc += n;
+  }
+  return x;
+}
+
+/*
+int identify_tuple(tuple_dict *d, ...) {
+  int he;
+  unsigned x;
+  va_list s;
+  int n = d->tw;
+  int *tpl, i;
+
+  check_stack;
+  va_start(s,d);
+  tpl = local_array(n, int);
+  for (i = 0; i < n; i++) {
+    tpl[i] = va_arg(s, int);
+  }
+  va_end(s);
+  he = find_tpl_entry(tpl,d);
+  if ((x = d->ht[he]) >= d->nsx) {
+    return 0;
+  }
+  return x;
+}
+*/
+
+/*
+ * Use a variant of Fletcher's checksum
+ * See DDJ, May, 1992, p32.
+ */
+
+static int nstr = 0, nstrx = 0;
+static int nlst = 0, nlstx = 0;
+static int ntpl = 0, ntplx = 0;
+
+static int find_str_entry(const char *s, string_dict *d) {
+  unsigned x,m;
+  int he,dx;
+  const char *sp;
+
+  sp = s;
+  if (d->ignore_case) {
+    unsigned char k1 = 0, k2 = 0, c;
+    while ((c = *sp++) != 0) {
+      c &= 0337;
+      k1 += c;
+      if (k1) k1 ^= (unsigned char) 0xff;
+      k2 += k1;
+      if (k2) k2 ^= (unsigned char) 0xff;
+    }
+    he = 255*k1 + k2;
+    dx = 255*k2 + k1;
+    dx += dx + 1;
+
+    he += dx << 16;
+    dx += he << 16;
+
+    m = d->nhs - 1;
+    while ((x = d->ht[he &= m]) < d->nsx
+	   && 0 != stricmp(s,&d->text[d->ndx[x]])) {
+      he += dx;
+    }
+    return he;
+  }
+
+
+  {
+    unsigned char k1 = 0, k2 = 0, c;
+    while((c = *sp++) != 0) {
+      k1 += c;
+      if (k1) k1 ^= (unsigned char) 0xff;
+      k2 += k1;
+      if (k2) k2 ^= (unsigned char) 0xff;
+    }
+    he = 255*k1 + k2;
+    dx = 255*k2 + k1;
+  }
+  dx += dx + 1;
+
+  he += dx << 16;
+  dx += he << 16;
+
+  m = d->nhs - 1;
+  nstr++;
+  while ((x = d->ht[he &= m]) < d->nsx && 0 != strcmp(s,&d->text[d->ndx[x]])) {
+    he += dx;
+    nstrx++;
+  }
+  return he;
+}
+
+static int find_lst_entry(const int *s, list_dict *d){
+  unsigned x,m;
+  int he,dx;
+  int n;
+  const char *sp;
+  unsigned char k1 = 0, k2 = 0;
+
+  //sp = (const char *) s;
+  n = m = *s;
+  m *= sizeof(int);
+  sp = (const char *) s;
+  while (m--) {
+    k1 += *sp++;
+    if (k1) k1 ^= (unsigned char) 0xff;
+    k2 += k1;
+    if (k2) k2 ^= (unsigned char) 0xff;
+  }
+  he = 255*k1 + k2;
+  dx = 255*k2 + k1;
+  dx += dx + 1;
+
+  he += dx << 16;
+  dx += he << 16;
+
+  m = d->nhs-1;
+  n *= sizeof(int);
+  nlst++;
+  while ((x = d->ht[he &= m]) < d->nsx &&
+	 0 != memcmp(s,&d->text[d->ndx[x]], n))  {
+    he += dx;
+    nlstx++;
+  }
+  return he;
+}
+
+static int find_tpl_entry(const int *s, tuple_dict *d){
+  unsigned x,m;
+  int he,dx;
+  int n,nb;
+  unsigned nsx;
+  //unsigned short *ht;
+  unsigned *ht;
+  const int *text;
+  const char *sp;
+  unsigned char k1 = 0, k2 = 0;
+
+  n = d->tw;
+  sp = (const char *) s;
+  m = n*sizeof(int);
+  while (m--) {
+    k1 += *sp++;
+    if (k1) k1 ^= (unsigned char) 0xff;
+    k2 += k1;
+    if (k2) k2 ^= (unsigned char) 0xff;
+  }
+  he = 255*k1 + k2;
+  dx = 255*k2 + k1;
+  dx += dx + 1;
+
+  he += dx << 16;
+  dx += he << 16;
+
+  nb = n*sizeof(int);
+  m = d->nhs-1;
+  ht = d->ht;
+  nsx = d->nsx;
+  text = d->text;
+  ntpl++;
+  while ((x = ht[he &= m]) < nsx &&
+	 0 != memcmp(s,&text[n*x],nb))  {
+    he += dx;
+    ntplx++;
+  }
+  return he;
+}
+
+#if 0 /* not used */
+string_dict *restore_string_dict(char *text, int ntc, int ne, int ic,
+				 int byte_swap_required) {
+  string_dict *d;
+  int i, k;
+  unsigned *ndx;
+
+  ZALLOCATE_ST(d, 1);
+  d->text = text;
+  d->ntc = ntc;
+  d->nts = (ntc+1) & ~1;
+  d->ignore_case = ic;
+  d->nxs = d-> nsx = ne;
+  ndx = (unsigned *) ALLOCATE_ST(d->ndx, ne);
+  for (i = k = 0; i < ne; i++) {
+    //unsigned short x = *((unsigned short *)(d->text + k));
+    unsigned short x;
+    memcpy(&x,d->text + k, sizeof(unsigned short));
+    if (byte_swap_required) {
+      x = (unsigned short) ((x << 8) | (unsigned char) (x >> 8));
+    }
+    k += 2;
+    ndx[x] = k;
+    k += 1 + strlen(d->text + k);
+  }
+  i = 1;
+  while (8*i < 10*ne) i *= 2;
+  ALLOCATE_ST(d->ht, i);
+  d->nhs = i;
+  hash_string_dict(d);
+  return d;
+}
+#endif /* 0 */
+
+string_dict *null_str_dict(void){
+  string_dict *d;
+
+  ZALLOCATE_ST(d, 1);
+  add_string_dict("", d);
+/*  d->ignore_case = ignore_case; */
+  return d;
+}
+
+list_dict *null_list_dict(void){
+  list_dict *d;
+
+  ZALLOCATE_ST(d, 1);
+  return d;
+}
+
+tuple_dict *null_tuple_dict(int tw){
+  tuple_dict *d;
+
+  ZALLOCATE_ST(d, 1);
+  d->tw = tw;
+  return d;
+}