view anagram/agcore/dict.cpp @ 21:1c9dac05d040

Add lint-style FALLTHROUGH annotations to fallthrough cases. (in the parse engine and thus the output code) Document this, because the old output causes warnings with gcc10.
author David A. Holland
date Mon, 13 Jun 2022 00:04:38 -0400
parents 13d2b8934445
children
line wrap: on
line source

/*
 * AnaGram, A System for Syntax Directed Programming
 * Copyright 1993-2000 Parsifal Software. All Rights Reserved.
 * See the file COPYING for license and usage terms.
 *
 * dict.cpp - old dictionary module
 */

#include <stdarg.h>
#include "port.h"

#include "arrays.h"
#include "assert.h"
#include "dict.h"
#include "minmax.h"
#include "myalloc.h"
#include "stacks.h"

//#define INCLUDE_LOGGING
#include "log.h"


#define HASH_TABLE_MAX 0x1000000

#define EVEN_UP(x) (x) /* + ((x)%2) */

static int find_str_entry(const char *s, string_dict *d);
static int find_lst_entry(const int *s, list_dict *d);
static int find_tpl_entry(const int *s, tuple_dict *d);


#if 0 /* unused */
void sort_string_dict(string_dict *d){
  unsigned i;
  //unsigned short k;
  unsigned k;

  if (d->nss >= d->nsx) return;
  REALLOCATE_ST(d->st,d->nsx);
{
  unsigned *dest = d->st + d->nss;
  unsigned *src = d->ndx + d->nss;
  //k = (unsigned short) (d->nsx - d->nss);
  k = (d->nsx - d->nss);
  while (k--) dest[k] = src[k];
}

  /* 10/1/06 note: these two funcs are/were defined in merge.cpp */
  sort_strings(d->text,d->st+d->nss,d->nsx-d->nss);
  merge_strings(d->text,d->st, d->nss, d->nsx-d->nss);
  REALLOCATE_ST(d->pt,d->nsx);
  d->nss = d->nsx;
  for (i = 0; i< d->nss; i++) {
    k = d->st[i]-2;
    //k = *((unsigned short *)(d->text+k));
		memcpy(&k, d->text+k, sizeof(unsigned short));
    d->pt[k] = i;
  }
}
#endif /* 0 */

list_dict *reset_list_dict(list_dict *d) {
  int k;
  ok_ptr(d);
  if (d->nxs) {
    ok_ptr(d->ndx);
  }
  if (d->nts) {
    ok_ptr(d->text);
  }
  if (d->nhs) {
    ok_ptr(d->ht);
  }
  d->nsx = 0;
  d->ntc = 0;
/*
  k = sizeof(short)*d->nhs;
  if (k) {
    size_ok(d->ht, k, __FILE__, __LINE__);
  }
  memset(d->ht, 0xff, k);
*/
  k = sizeof(unsigned)*d->nhs;
  if (k) {
    size_ok(d->ht, k, __FILE__, __LINE__);
  }
  memset(d->ht, 0xff, k);
  return NULL;
}

tuple_dict *reset_tuple_dict(tuple_dict *d) {
  unsigned k;
  ok_ptr(d);
  if (d->nxs) {
    ok_ptr(d->ndx);
  }
  if (d->nts) {
    ok_ptr(d->text);
  }
  if (d->nhs) {
    ok_ptr(d->ht);
  }
  d->nsx = 0;
  d->ntc = 0;
/*
  k = sizeof(short)*d->nhs;
  if (k) {
    size_ok(d->ht, k, __FILE__, __LINE__);
  }
  memset(d->ht, 0xff, k);
*/
  k = sizeof(unsigned)*d->nhs;
  if (k) {
    size_ok(d->ht, k, __FILE__, __LINE__);
  }
  memset(d->ht, 0xff, k);
  return NULL;
}

static void hash_string_dict(string_dict *d){
  unsigned i,x;
  const char *s;

  //memset(d->ht, 0xff, sizeof(short)*d->nhs);
  memset(d->ht, 0xff, sizeof(unsigned)*d->nhs);
  for (i = 0; i < d->nsx; i++) {
    s = &d->text[d->ndx[i]];
    x = find_str_entry(s,d);
    //d->ht[x] = (unsigned short) i;
    d->ht[x] = i;
  }
}

static void hash_list_dict(list_dict *d){
  unsigned i,x;
  const int *s;

  //memset(d->ht, 0xff, sizeof(short)*d->nhs);
  memset(d->ht, 0xff, sizeof(unsigned)*d->nhs);
  for (i = 0; i < d->nsx; i++) {
    s = &d->text[d->ndx[i]];
    x = find_lst_entry(s,d);
    //d->ht[x] = (unsigned short) i;
    d->ht[x] = i;
  }
}

static void hash_tpl_dict(tuple_dict *d){
  unsigned i,x;
  const int *s;
  int n;

  //memset(d->ht, 0xff, sizeof(short)*d->nhs);
  memset(d->ht, 0xff, sizeof(unsigned)*d->nhs);
  s = d->text;
  n = d->tw;
  for (i = 0; i < d->nsx; i++) {
    x = find_tpl_entry(s,d);
    //d->ht[x] = (unsigned short) i;
    d->ht[x] = i;
    s += n;
  }
}

list_dict *delete_list_dict(list_dict *da) {
  list_dict d;

  if (da == NULL) {
    return NULL;
  }
  d = *da;
  DEALLOCATE(da);
  if (d.ndx != NULL) DEALLOCATE(d.ndx);
  if (d.text != NULL) DEALLOCATE(d.text);
  if (d.ht != NULL) DEALLOCATE(d.ht);
  if (d.st != NULL) DEALLOCATE(d.st);
  if (d.pt != NULL) DEALLOCATE(d.pt);
  return NULL;
}

tuple_dict *delete_tuple_dict(tuple_dict *da) {
  tuple_dict d;

  if (da == NULL) {
    return NULL;
  }
  d = *da;
  DEALLOCATE(da);
  if (d.ndx != NULL) DEALLOCATE(d.ndx);
  if (d.text != NULL) DEALLOCATE(d.text);
  if (d.ht != NULL) DEALLOCATE(d.ht);
  if (d.st != NULL) DEALLOCATE(d.st);
  if (d.pt != NULL) DEALLOCATE(d.pt);
  return NULL;
}

void empty_tuple_dict(tuple_dict *da) {
  tuple_dict d;

  if (da == NULL) {
    return;
  }
  d = *da;
  if (d.ndx != NULL) DEALLOCATE(d.ndx);
  if (d.text != NULL) DEALLOCATE(d.text);
  if (d.ht != NULL) DEALLOCATE(d.ht);
  if (d.st != NULL) DEALLOCATE(d.st);
  if (d.pt != NULL) DEALLOCATE(d.pt);
  memset(&d, 0, sizeof(d));
  d.tw = da->tw;
  *da = d;
}

string_dict *delete_string_dict(string_dict *da) {
  string_dict d;

  if (da == NULL) {
    return NULL;
  }
  d = *da;
  DEALLOCATE(da);
  if (d.ndx != NULL) DEALLOCATE(d.ndx);
  if (d.text != NULL) DEALLOCATE(d.text);
  if (d.ht != NULL) DEALLOCATE(d.ht);
  if (d.st != NULL) DEALLOCATE(d.st);
  if (d.pt != NULL) DEALLOCATE(d.pt);
  return NULL;
}

int identify_string(const char *s, string_dict *d) {
  unsigned he, x;

  check_stack;
  he = find_str_entry(s, d);
  if ((x = d->ht[he]) >= d->nsx) {
    x = 0;
  }
  return x;
}


int add_string_dict(const char *s,string_dict *d){
  int len;
  int he;
  unsigned x;
  long nhs, nsx;

  check_stack;
  len = strlen(s)+3;
  if (d->ntc+len > d->nts) {
    unsigned long k = d->ntc+len;
    k += k/2;
    k = max(200UL, min((unsigned long) MAX_BYTES,k));
    LOGSECTION("add_string_dict::resize");
    LOGV(k) LCV(d->ntc + len);
    assert(k >= d->ntc + len);
    REALLOCATE_ST(d->text, (unsigned) k);
    d->nts = (unsigned) k;
  }
  if (d->nsx +1 > d->nxs) {
    unsigned long k = d->nsx+1;
    k += k/2;
    k = max(100UL, min((unsigned long) MAX_INTS, k));
    LOGSECTION("add_string_dict::resize");
    LOGV(k) LCV(d->nsx + 1);
    assert(k >= d->nsx + 1);
    REALLOCATE_ST(d->ndx, (unsigned) k);
    d->nxs = (unsigned) k;
  }
  if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) {
    nhs = max(nhs, 256L);
    while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) {
      nhs *= 2;
    }
    LOGSECTION("add_string_dict::resize");
    LOGV(nhs) LCV(d->nsx);
    assert((unsigned) nhs > d->nsx);
    REALLOCATE_ST(d->ht,(unsigned) nhs);
    d->nhs = (unsigned) nhs;
    hash_string_dict(d);
  }
  s = strcpy(d->text+d->ntc+2,s);
  he = find_str_entry(s, d);
  if ((x = d->ht[he]) >= d->nsx) {
    //*((unsigned short *)(d->text+d->ntc)) = (unsigned short) d->nsx;
    unsigned short length = d->nsx;
    //memcpy(d->text+d->ntc, &d->nsx, sizeof(unsigned short));
    memcpy(d->text+d->ntc, &length, sizeof(unsigned short));
    d->ntc += 2;
    x = d->nsx++;
    //d->ndx[d->ht[he] = (unsigned short) x] = d->ntc;
    d->ndx[d->ht[he] = x] = d->ntc;
    d->ntc += EVEN_UP(len-2);
  }
  return x;
}
/*
void close_list_dict(list_dict *d){
  unsigned len;

  if (d == NULL) return;
  len = 2*(d->ntc/d->nsx) + d->ntc;
  if (len < d->nts) {
    REALLOCATE_ST(d->text,len);
    d->nts = len;
  }
  if (d->nsx + 1< d->nxs) {
    unsigned k = d->nsx + 1;
    REALLOCATE_ST(d->ndx,k);
    d->nxs = k;
  }
}
*/
int list_in_dict(int *s, int n, list_dict *d){
  int k = d->nsx;
  check_stack;
  return k != add_list_dict(s, n, d);
}

int add_list_dict(const int *s,int n,list_dict *d){
  LOGSECTION("add_list_dict");
  unsigned len;
  int he;
  unsigned x;
  long nhs, nsx;
  int *sc;

  check_stack;
  len = n+3;
  if (d->ntc+len > d->nts) {
    unsigned long k = d->ntc + len;
    k += k/2;
    k = min((unsigned long)MAX_INTS, max(200UL, k));
    LOGSECTION("add_list_dict::resize");
    LOGV(k) LCV(d->ntc + len);
    assert(k >= d->ntc + len);
    REALLOCATE_ST(d->text,(unsigned) k);
    d->nts = (unsigned) k;
  }
  if (d->nsx + 1 > d->nxs) {
    unsigned long k = d->nsx + 1;
    k += k/2;
    k = min((unsigned long) MAX_INTS, max(200UL,k));
    LOGSECTION("add_list_dict::resize");
    LOGV(k) LCV(d->nsx + 1);
    assert(k >= d->nsx+1);
    REALLOCATE_ST(d->ndx, (unsigned) k);
    d->nxs = (unsigned) k;
  }
  if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) {
    nhs = max(nhs,256L);
    while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) {
      nhs *= 2;
    }
    LOGSECTION("add_list_dict::resize");
    LOGV(nhs) LCV(d->nsx);
    assert((unsigned)nhs > d->nsx);
    REALLOCATE_ST(d->ht, (unsigned) nhs);
    d->nhs = (unsigned) nhs;
    hash_list_dict(d);
  }
  sc = (int *) memcpy(d->text+d->ntc+2, s, n*sizeof(int));
  *--sc = n+1;
  he = find_lst_entry(sc,d);
  if ((x = d->ht[he]) >= d->nsx) {
    //*((unsigned short *)(d->text+d->ntc)) = (unsigned short) d->nsx;
    //memcpy(d->text+d->ntc, &d->nsx, sizeof(unsigned short));
    //memcpy(d->text+d->ntc, &d->nsx, sizeof(unsigned));
    d->text[d->ntc] = d->nsx;
    //d->ndx[d->ht[he] = (unsigned short) (x = d->nsx++)] = ++d->ntc;
    d->ndx[d->ht[he] = (x = d->nsx++)] = ++d->ntc;
    d->ntc += n+1;
  }
  return x;
}

void extract_tuple_dict(tuple_dict *d, unsigned k, ...) {
  va_list ap;
  const int *p;
  unsigned n;

  assert(k < d->nsx);
  n = d->tw;
  p = d->text + n*k;
  va_start(ap,k);
  while (n--) {
    *va_arg(ap,int*) = *p++;
  }
  va_end(ap);
}

int insert_tuple_dict(tuple_dict *d,...){
  int he;
  va_list s;
  unsigned n;
  long nsx,nhs;
  //int *tpl;
  unsigned i;

  check_stack;
  va_start(s, d);
  n = d->tw;
  //tpl = local_array(n,int);
  LocalArray<int> tpl(n);
  for (i = 0; i < n; i++) tpl[i] = va_arg(s,int);
  va_end(s);
  if (d->ntc+n > d->nts) {
    unsigned long k = d->ntc + n;
    k += k/2;
    k = min((unsigned long) MAX_INTS, max(200UL,k));
    LOGSECTION("insert_tuple_dict::resize");
    LOGV(k) LCV(d->ntc + n);
    assert(k >= d->ntc + n);
    REALLOCATE_ST(d->text, (unsigned) k);
    d->nts = (unsigned) k;
  }
  if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) {
    nhs = max(nhs, 256L);
    while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) {
      nhs *= 2;
    }
    LOGSECTION("insert_tuple_dict::resize");
    LOGV(nhs) LCV(d->nsx);
    assert((unsigned)nhs > d->nsx);
    REALLOCATE_ST(d->ht, (unsigned) nhs);
    d->nhs = (unsigned) nhs;
    hash_tpl_dict(d);
  }
  he = find_tpl_entry(tpl, d);
  if ((unsigned)(d->ht[he]) >= d->nsx) {
    //d->ht[he] = (unsigned short) d->nsx++;
    d->ht[he] = d->nsx++;
    memcpy(d->text+d->ntc, tpl, n*sizeof(int));
    d->ntc += n;
    return 0;
  }
  return 1;
}


int add_tuple_dict_new(tuple_dict *d, ...) {
  int he;
  unsigned x;
  va_list s;
  unsigned n;
  long nsx, nhs;
  //int *tpl;
  unsigned i;

  check_stack;
  va_start(s, d);
  n = d->tw;
  //tpl = local_array(n, int);
  LocalArray<int> tpl(n);
  for (i = 0; i < n; i++) tpl[i] = va_arg(s, int);
  va_end(s);
  if (d->ntc+n > d->nts) {
    unsigned long k = d->ntc + n;

    k += k/2;
    k = min((unsigned long) MAX_INTS, max(200UL,k));
    LOGSECTION("add_tuple_dict_new::resize");
    LOGV(k) LCV(d->ntc + n);
    assert(k >= d->ntc + n);
    REALLOCATE_ST(d->text, (unsigned)k);
    d->nts = (unsigned) k;
  }
  if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) {
    nhs = max(nhs, 256L);
    while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) {
      nhs *= 2;
    }
    LOGSECTION("add_tuple_dict_new::resize");
    LOGV(nhs) LCV(d->nsx);
    assert((unsigned) nhs > d->nsx);
    REALLOCATE_ST(d->ht, (unsigned) nhs);
    d->nhs = (unsigned) nhs;
    hash_tpl_dict(d);
  }
  he = find_tpl_entry(tpl, d);
  if ((x = d->ht[he]) >= d->nsx) {
    //d->ht[he] = (unsigned short) (x = d->nsx++);
    d->ht[he] = (x = d->nsx++);
    memcpy(d->text+d->ntc, tpl, n*sizeof(int));
    d->ntc += n;
  }
  return x;
}

/*
int identify_tuple(tuple_dict *d, ...) {
  int he;
  unsigned x;
  va_list s;
  int n = d->tw;
  int *tpl, i;

  check_stack;
  va_start(s,d);
  tpl = local_array(n, int);
  for (i = 0; i < n; i++) {
    tpl[i] = va_arg(s, int);
  }
  va_end(s);
  he = find_tpl_entry(tpl,d);
  if ((x = d->ht[he]) >= d->nsx) {
    return 0;
  }
  return x;
}
*/

/*
 * Use a variant of Fletcher's checksum
 * See DDJ, May, 1992, p32.
 */

static int nstr = 0, nstrx = 0;
static int nlst = 0, nlstx = 0;
static int ntpl = 0, ntplx = 0;

static int find_str_entry(const char *s, string_dict *d) {
  unsigned x,m;
  int he,dx;
  const char *sp;

  sp = s;
  if (d->ignore_case) {
    unsigned char k1 = 0, k2 = 0, c;
    while ((c = *sp++) != 0) {
      c &= 0337;
      k1 += c;
      if (k1) k1 ^= (unsigned char) 0xff;
      k2 += k1;
      if (k2) k2 ^= (unsigned char) 0xff;
    }
    he = 255*k1 + k2;
    dx = 255*k2 + k1;
    dx += dx + 1;

    he += dx << 16;
    dx += he << 16;

    m = d->nhs - 1;
    while ((x = d->ht[he &= m]) < d->nsx
	   && 0 != stricmp(s,&d->text[d->ndx[x]])) {
      he += dx;
    }
    return he;
  }


  {
    unsigned char k1 = 0, k2 = 0, c;
    while((c = *sp++) != 0) {
      k1 += c;
      if (k1) k1 ^= (unsigned char) 0xff;
      k2 += k1;
      if (k2) k2 ^= (unsigned char) 0xff;
    }
    he = 255*k1 + k2;
    dx = 255*k2 + k1;
  }
  dx += dx + 1;

  he += dx << 16;
  dx += he << 16;

  m = d->nhs - 1;
  nstr++;
  while ((x = d->ht[he &= m]) < d->nsx && 0 != strcmp(s,&d->text[d->ndx[x]])) {
    he += dx;
    nstrx++;
  }
  return he;
}

static int find_lst_entry(const int *s, list_dict *d){
  unsigned x,m;
  int he,dx;
  int n;
  const char *sp;
  unsigned char k1 = 0, k2 = 0;

  //sp = (const char *) s;
  n = m = *s;
  m *= sizeof(int);
  sp = (const char *) s;
  while (m--) {
    k1 += *sp++;
    if (k1) k1 ^= (unsigned char) 0xff;
    k2 += k1;
    if (k2) k2 ^= (unsigned char) 0xff;
  }
  he = 255*k1 + k2;
  dx = 255*k2 + k1;
  dx += dx + 1;

  he += dx << 16;
  dx += he << 16;

  m = d->nhs-1;
  n *= sizeof(int);
  nlst++;
  while ((x = d->ht[he &= m]) < d->nsx &&
	 0 != memcmp(s,&d->text[d->ndx[x]], n))  {
    he += dx;
    nlstx++;
  }
  return he;
}

static int find_tpl_entry(const int *s, tuple_dict *d){
  unsigned x,m;
  int he,dx;
  int n,nb;
  unsigned nsx;
  //unsigned short *ht;
  unsigned *ht;
  const int *text;
  const char *sp;
  unsigned char k1 = 0, k2 = 0;

  n = d->tw;
  sp = (const char *) s;
  m = n*sizeof(int);
  while (m--) {
    k1 += *sp++;
    if (k1) k1 ^= (unsigned char) 0xff;
    k2 += k1;
    if (k2) k2 ^= (unsigned char) 0xff;
  }
  he = 255*k1 + k2;
  dx = 255*k2 + k1;
  dx += dx + 1;

  he += dx << 16;
  dx += he << 16;

  nb = n*sizeof(int);
  m = d->nhs-1;
  ht = d->ht;
  nsx = d->nsx;
  text = d->text;
  ntpl++;
  while ((x = ht[he &= m]) < nsx &&
	 0 != memcmp(s,&text[n*x],nb))  {
    he += dx;
    ntplx++;
  }
  return he;
}

#if 0 /* not used */
string_dict *restore_string_dict(char *text, int ntc, int ne, int ic,
				 int byte_swap_required) {
  string_dict *d;
  int i, k;
  unsigned *ndx;

  ZALLOCATE_ST(d, 1);
  d->text = text;
  d->ntc = ntc;
  d->nts = (ntc+1) & ~1;
  d->ignore_case = ic;
  d->nxs = d-> nsx = ne;
  ndx = (unsigned *) ALLOCATE_ST(d->ndx, ne);
  for (i = k = 0; i < ne; i++) {
    //unsigned short x = *((unsigned short *)(d->text + k));
    unsigned short x;
    memcpy(&x,d->text + k, sizeof(unsigned short));
    if (byte_swap_required) {
      x = (unsigned short) ((x << 8) | (unsigned char) (x >> 8));
    }
    k += 2;
    ndx[x] = k;
    k += 1 + strlen(d->text + k);
  }
  i = 1;
  while (8*i < 10*ne) i *= 2;
  ALLOCATE_ST(d->ht, i);
  d->nhs = i;
  hash_string_dict(d);
  return d;
}
#endif /* 0 */

string_dict *null_str_dict(void){
  string_dict *d;

  ZALLOCATE_ST(d, 1);
  add_string_dict("", d);
/*  d->ignore_case = ignore_case; */
  return d;
}

list_dict *null_list_dict(void){
  list_dict *d;

  ZALLOCATE_ST(d, 1);
  return d;
}

tuple_dict *null_tuple_dict(int tw){
  tuple_dict *d;

  ZALLOCATE_ST(d, 1);
  d->tw = tw;
  return d;
}