Mercurial > ~dholland > hg > ag > index.cgi
diff anagram/agcore/dict.cpp @ 0:13d2b8934445
Import AnaGram (near-)release tree into Mercurial.
author | David A. Holland |
---|---|
date | Sat, 22 Dec 2007 17:52:45 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/anagram/agcore/dict.cpp Sat Dec 22 17:52:45 2007 -0500 @@ -0,0 +1,710 @@ +/* + * AnaGram, A System for Syntax Directed Programming + * Copyright 1993-2000 Parsifal Software. All Rights Reserved. + * See the file COPYING for license and usage terms. + * + * dict.cpp - old dictionary module + */ + +#include <stdarg.h> +#include "port.h" + +#include "arrays.h" +#include "assert.h" +#include "dict.h" +#include "minmax.h" +#include "myalloc.h" +#include "stacks.h" + +//#define INCLUDE_LOGGING +#include "log.h" + + +#define HASH_TABLE_MAX 0x1000000 + +#define EVEN_UP(x) (x) /* + ((x)%2) */ + +static int find_str_entry(const char *s, string_dict *d); +static int find_lst_entry(const int *s, list_dict *d); +static int find_tpl_entry(const int *s, tuple_dict *d); + + +#if 0 /* unused */ +void sort_string_dict(string_dict *d){ + unsigned i; + //unsigned short k; + unsigned k; + + if (d->nss >= d->nsx) return; + REALLOCATE_ST(d->st,d->nsx); +{ + unsigned *dest = d->st + d->nss; + unsigned *src = d->ndx + d->nss; + //k = (unsigned short) (d->nsx - d->nss); + k = (d->nsx - d->nss); + while (k--) dest[k] = src[k]; +} + + /* 10/1/06 note: these two funcs are/were defined in merge.cpp */ + sort_strings(d->text,d->st+d->nss,d->nsx-d->nss); + merge_strings(d->text,d->st, d->nss, d->nsx-d->nss); + REALLOCATE_ST(d->pt,d->nsx); + d->nss = d->nsx; + for (i = 0; i< d->nss; i++) { + k = d->st[i]-2; + //k = *((unsigned short *)(d->text+k)); + memcpy(&k, d->text+k, sizeof(unsigned short)); + d->pt[k] = i; + } +} +#endif /* 0 */ + +list_dict *reset_list_dict(list_dict *d) { + int k; + ok_ptr(d); + if (d->nxs) { + ok_ptr(d->ndx); + } + if (d->nts) { + ok_ptr(d->text); + } + if (d->nhs) { + ok_ptr(d->ht); + } + d->nsx = 0; + d->ntc = 0; +/* + k = sizeof(short)*d->nhs; + if (k) { + size_ok(d->ht, k, __FILE__, __LINE__); + } + memset(d->ht, 0xff, k); +*/ + k = sizeof(unsigned)*d->nhs; + if (k) { + size_ok(d->ht, k, __FILE__, __LINE__); + } + memset(d->ht, 0xff, k); + return NULL; +} + +tuple_dict *reset_tuple_dict(tuple_dict *d) { + unsigned k; + ok_ptr(d); + if (d->nxs) { + ok_ptr(d->ndx); + } + if (d->nts) { + ok_ptr(d->text); + } + if (d->nhs) { + ok_ptr(d->ht); + } + d->nsx = 0; + d->ntc = 0; +/* + k = sizeof(short)*d->nhs; + if (k) { + size_ok(d->ht, k, __FILE__, __LINE__); + } + memset(d->ht, 0xff, k); +*/ + k = sizeof(unsigned)*d->nhs; + if (k) { + size_ok(d->ht, k, __FILE__, __LINE__); + } + memset(d->ht, 0xff, k); + return NULL; +} + +static void hash_string_dict(string_dict *d){ + unsigned i,x; + const char *s; + + //memset(d->ht, 0xff, sizeof(short)*d->nhs); + memset(d->ht, 0xff, sizeof(unsigned)*d->nhs); + for (i = 0; i < d->nsx; i++) { + s = &d->text[d->ndx[i]]; + x = find_str_entry(s,d); + //d->ht[x] = (unsigned short) i; + d->ht[x] = i; + } +} + +static void hash_list_dict(list_dict *d){ + unsigned i,x; + const int *s; + + //memset(d->ht, 0xff, sizeof(short)*d->nhs); + memset(d->ht, 0xff, sizeof(unsigned)*d->nhs); + for (i = 0; i < d->nsx; i++) { + s = &d->text[d->ndx[i]]; + x = find_lst_entry(s,d); + //d->ht[x] = (unsigned short) i; + d->ht[x] = i; + } +} + +static void hash_tpl_dict(tuple_dict *d){ + unsigned i,x; + const int *s; + int n; + + //memset(d->ht, 0xff, sizeof(short)*d->nhs); + memset(d->ht, 0xff, sizeof(unsigned)*d->nhs); + s = d->text; + n = d->tw; + for (i = 0; i < d->nsx; i++) { + x = find_tpl_entry(s,d); + //d->ht[x] = (unsigned short) i; + d->ht[x] = i; + s += n; + } +} + +list_dict *delete_list_dict(list_dict *da) { + list_dict d; + + if (da == NULL) { + return NULL; + } + d = *da; + DEALLOCATE(da); + if (d.ndx != NULL) DEALLOCATE(d.ndx); + if (d.text != NULL) DEALLOCATE(d.text); + if (d.ht != NULL) DEALLOCATE(d.ht); + if (d.st != NULL) DEALLOCATE(d.st); + if (d.pt != NULL) DEALLOCATE(d.pt); + return NULL; +} + +tuple_dict *delete_tuple_dict(tuple_dict *da) { + tuple_dict d; + + if (da == NULL) { + return NULL; + } + d = *da; + DEALLOCATE(da); + if (d.ndx != NULL) DEALLOCATE(d.ndx); + if (d.text != NULL) DEALLOCATE(d.text); + if (d.ht != NULL) DEALLOCATE(d.ht); + if (d.st != NULL) DEALLOCATE(d.st); + if (d.pt != NULL) DEALLOCATE(d.pt); + return NULL; +} + +void empty_tuple_dict(tuple_dict *da) { + tuple_dict d; + + if (da == NULL) { + return; + } + d = *da; + if (d.ndx != NULL) DEALLOCATE(d.ndx); + if (d.text != NULL) DEALLOCATE(d.text); + if (d.ht != NULL) DEALLOCATE(d.ht); + if (d.st != NULL) DEALLOCATE(d.st); + if (d.pt != NULL) DEALLOCATE(d.pt); + memset(&d, 0, sizeof(d)); + d.tw = da->tw; + *da = d; +} + +string_dict *delete_string_dict(string_dict *da) { + string_dict d; + + if (da == NULL) { + return NULL; + } + d = *da; + DEALLOCATE(da); + if (d.ndx != NULL) DEALLOCATE(d.ndx); + if (d.text != NULL) DEALLOCATE(d.text); + if (d.ht != NULL) DEALLOCATE(d.ht); + if (d.st != NULL) DEALLOCATE(d.st); + if (d.pt != NULL) DEALLOCATE(d.pt); + return NULL; +} + +int identify_string(const char *s, string_dict *d) { + unsigned he, x; + + check_stack; + he = find_str_entry(s, d); + if ((x = d->ht[he]) >= d->nsx) { + x = 0; + } + return x; +} + + +int add_string_dict(const char *s,string_dict *d){ + int len; + int he; + unsigned x; + long nhs, nsx; + + check_stack; + len = strlen(s)+3; + if (d->ntc+len > d->nts) { + unsigned long k = d->ntc+len; + k += k/2; + k = max(200UL, min((unsigned long) MAX_BYTES,k)); + LOGSECTION("add_string_dict::resize"); + LOGV(k) LCV(d->ntc + len); + assert(k >= d->ntc + len); + REALLOCATE_ST(d->text, (unsigned) k); + d->nts = (unsigned) k; + } + if (d->nsx +1 > d->nxs) { + unsigned long k = d->nsx+1; + k += k/2; + k = max(100UL, min((unsigned long) MAX_INTS, k)); + LOGSECTION("add_string_dict::resize"); + LOGV(k) LCV(d->nsx + 1); + assert(k >= d->nsx + 1); + REALLOCATE_ST(d->ndx, (unsigned) k); + d->nxs = (unsigned) k; + } + if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) { + nhs = max(nhs, 256L); + while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) { + nhs *= 2; + } + LOGSECTION("add_string_dict::resize"); + LOGV(nhs) LCV(d->nsx); + assert((unsigned) nhs > d->nsx); + REALLOCATE_ST(d->ht,(unsigned) nhs); + d->nhs = (unsigned) nhs; + hash_string_dict(d); + } + s = strcpy(d->text+d->ntc+2,s); + he = find_str_entry(s, d); + if ((x = d->ht[he]) >= d->nsx) { + //*((unsigned short *)(d->text+d->ntc)) = (unsigned short) d->nsx; + unsigned short length = d->nsx; + //memcpy(d->text+d->ntc, &d->nsx, sizeof(unsigned short)); + memcpy(d->text+d->ntc, &length, sizeof(unsigned short)); + d->ntc += 2; + x = d->nsx++; + //d->ndx[d->ht[he] = (unsigned short) x] = d->ntc; + d->ndx[d->ht[he] = x] = d->ntc; + d->ntc += EVEN_UP(len-2); + } + return x; +} +/* +void close_list_dict(list_dict *d){ + unsigned len; + + if (d == NULL) return; + len = 2*(d->ntc/d->nsx) + d->ntc; + if (len < d->nts) { + REALLOCATE_ST(d->text,len); + d->nts = len; + } + if (d->nsx + 1< d->nxs) { + unsigned k = d->nsx + 1; + REALLOCATE_ST(d->ndx,k); + d->nxs = k; + } +} +*/ +int list_in_dict(int *s, int n, list_dict *d){ + int k = d->nsx; + check_stack; + return k != add_list_dict(s, n, d); +} + +int add_list_dict(const int *s,int n,list_dict *d){ + LOGSECTION("add_list_dict"); + unsigned len; + int he; + unsigned x; + long nhs, nsx; + int *sc; + + check_stack; + len = n+3; + if (d->ntc+len > d->nts) { + unsigned long k = d->ntc + len; + k += k/2; + k = min((unsigned long)MAX_INTS, max(200UL, k)); + LOGSECTION("add_list_dict::resize"); + LOGV(k) LCV(d->ntc + len); + assert(k >= d->ntc + len); + REALLOCATE_ST(d->text,(unsigned) k); + d->nts = (unsigned) k; + } + if (d->nsx + 1 > d->nxs) { + unsigned long k = d->nsx + 1; + k += k/2; + k = min((unsigned long) MAX_INTS, max(200UL,k)); + LOGSECTION("add_list_dict::resize"); + LOGV(k) LCV(d->nsx + 1); + assert(k >= d->nsx+1); + REALLOCATE_ST(d->ndx, (unsigned) k); + d->nxs = (unsigned) k; + } + if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) { + nhs = max(nhs,256L); + while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) { + nhs *= 2; + } + LOGSECTION("add_list_dict::resize"); + LOGV(nhs) LCV(d->nsx); + assert((unsigned)nhs > d->nsx); + REALLOCATE_ST(d->ht, (unsigned) nhs); + d->nhs = (unsigned) nhs; + hash_list_dict(d); + } + sc = (int *) memcpy(d->text+d->ntc+2, s, n*sizeof(int)); + *--sc = n+1; + he = find_lst_entry(sc,d); + if ((x = d->ht[he]) >= d->nsx) { + //*((unsigned short *)(d->text+d->ntc)) = (unsigned short) d->nsx; + //memcpy(d->text+d->ntc, &d->nsx, sizeof(unsigned short)); + //memcpy(d->text+d->ntc, &d->nsx, sizeof(unsigned)); + d->text[d->ntc] = d->nsx; + //d->ndx[d->ht[he] = (unsigned short) (x = d->nsx++)] = ++d->ntc; + d->ndx[d->ht[he] = (x = d->nsx++)] = ++d->ntc; + d->ntc += n+1; + } + return x; +} + +void extract_tuple_dict(tuple_dict *d, unsigned k, ...) { + va_list ap; + const int *p; + unsigned n; + + assert(k < d->nsx); + n = d->tw; + p = d->text + n*k; + va_start(ap,k); + while (n--) { + *va_arg(ap,int*) = *p++; + } + va_end(ap); +} + +int insert_tuple_dict(tuple_dict *d,...){ + int he; + va_list s; + unsigned n; + long nsx,nhs; + //int *tpl; + unsigned i; + + check_stack; + va_start(s, d); + n = d->tw; + //tpl = local_array(n,int); + LocalArray<int> tpl(n); + for (i = 0; i < n; i++) tpl[i] = va_arg(s,int); + va_end(s); + if (d->ntc+n > d->nts) { + unsigned long k = d->ntc + n; + k += k/2; + k = min((unsigned long) MAX_INTS, max(200UL,k)); + LOGSECTION("insert_tuple_dict::resize"); + LOGV(k) LCV(d->ntc + n); + assert(k >= d->ntc + n); + REALLOCATE_ST(d->text, (unsigned) k); + d->nts = (unsigned) k; + } + if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) { + nhs = max(nhs, 256L); + while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) { + nhs *= 2; + } + LOGSECTION("insert_tuple_dict::resize"); + LOGV(nhs) LCV(d->nsx); + assert((unsigned)nhs > d->nsx); + REALLOCATE_ST(d->ht, (unsigned) nhs); + d->nhs = (unsigned) nhs; + hash_tpl_dict(d); + } + he = find_tpl_entry(tpl, d); + if ((unsigned)(d->ht[he]) >= d->nsx) { + //d->ht[he] = (unsigned short) d->nsx++; + d->ht[he] = d->nsx++; + memcpy(d->text+d->ntc, tpl, n*sizeof(int)); + d->ntc += n; + return 0; + } + return 1; +} + + +int add_tuple_dict_new(tuple_dict *d, ...) { + int he; + unsigned x; + va_list s; + unsigned n; + long nsx, nhs; + //int *tpl; + unsigned i; + + check_stack; + va_start(s, d); + n = d->tw; + //tpl = local_array(n, int); + LocalArray<int> tpl(n); + for (i = 0; i < n; i++) tpl[i] = va_arg(s, int); + va_end(s); + if (d->ntc+n > d->nts) { + unsigned long k = d->ntc + n; + + k += k/2; + k = min((unsigned long) MAX_INTS, max(200UL,k)); + LOGSECTION("add_tuple_dict_new::resize"); + LOGV(k) LCV(d->ntc + n); + assert(k >= d->ntc + n); + REALLOCATE_ST(d->text, (unsigned)k); + d->nts = (unsigned) k; + } + if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) { + nhs = max(nhs, 256L); + while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) { + nhs *= 2; + } + LOGSECTION("add_tuple_dict_new::resize"); + LOGV(nhs) LCV(d->nsx); + assert((unsigned) nhs > d->nsx); + REALLOCATE_ST(d->ht, (unsigned) nhs); + d->nhs = (unsigned) nhs; + hash_tpl_dict(d); + } + he = find_tpl_entry(tpl, d); + if ((x = d->ht[he]) >= d->nsx) { + //d->ht[he] = (unsigned short) (x = d->nsx++); + d->ht[he] = (x = d->nsx++); + memcpy(d->text+d->ntc, tpl, n*sizeof(int)); + d->ntc += n; + } + return x; +} + +/* +int identify_tuple(tuple_dict *d, ...) { + int he; + unsigned x; + va_list s; + int n = d->tw; + int *tpl, i; + + check_stack; + va_start(s,d); + tpl = local_array(n, int); + for (i = 0; i < n; i++) { + tpl[i] = va_arg(s, int); + } + va_end(s); + he = find_tpl_entry(tpl,d); + if ((x = d->ht[he]) >= d->nsx) { + return 0; + } + return x; +} +*/ + +/* + * Use a variant of Fletcher's checksum + * See DDJ, May, 1992, p32. + */ + +static int nstr = 0, nstrx = 0; +static int nlst = 0, nlstx = 0; +static int ntpl = 0, ntplx = 0; + +static int find_str_entry(const char *s, string_dict *d) { + unsigned x,m; + int he,dx; + const char *sp; + + sp = s; + if (d->ignore_case) { + unsigned char k1 = 0, k2 = 0, c; + while ((c = *sp++) != 0) { + c &= 0337; + k1 += c; + if (k1) k1 ^= (unsigned char) 0xff; + k2 += k1; + if (k2) k2 ^= (unsigned char) 0xff; + } + he = 255*k1 + k2; + dx = 255*k2 + k1; + dx += dx + 1; + + he += dx << 16; + dx += he << 16; + + m = d->nhs - 1; + while ((x = d->ht[he &= m]) < d->nsx + && 0 != stricmp(s,&d->text[d->ndx[x]])) { + he += dx; + } + return he; + } + + + { + unsigned char k1 = 0, k2 = 0, c; + while((c = *sp++) != 0) { + k1 += c; + if (k1) k1 ^= (unsigned char) 0xff; + k2 += k1; + if (k2) k2 ^= (unsigned char) 0xff; + } + he = 255*k1 + k2; + dx = 255*k2 + k1; + } + dx += dx + 1; + + he += dx << 16; + dx += he << 16; + + m = d->nhs - 1; + nstr++; + while ((x = d->ht[he &= m]) < d->nsx && 0 != strcmp(s,&d->text[d->ndx[x]])) { + he += dx; + nstrx++; + } + return he; +} + +static int find_lst_entry(const int *s, list_dict *d){ + unsigned x,m; + int he,dx; + int n; + const char *sp; + unsigned char k1 = 0, k2 = 0; + + //sp = (const char *) s; + n = m = *s; + m *= sizeof(int); + sp = (const char *) s; + while (m--) { + k1 += *sp++; + if (k1) k1 ^= (unsigned char) 0xff; + k2 += k1; + if (k2) k2 ^= (unsigned char) 0xff; + } + he = 255*k1 + k2; + dx = 255*k2 + k1; + dx += dx + 1; + + he += dx << 16; + dx += he << 16; + + m = d->nhs-1; + n *= sizeof(int); + nlst++; + while ((x = d->ht[he &= m]) < d->nsx && + 0 != memcmp(s,&d->text[d->ndx[x]], n)) { + he += dx; + nlstx++; + } + return he; +} + +static int find_tpl_entry(const int *s, tuple_dict *d){ + unsigned x,m; + int he,dx; + int n,nb; + unsigned nsx; + //unsigned short *ht; + unsigned *ht; + const int *text; + const char *sp; + unsigned char k1 = 0, k2 = 0; + + n = d->tw; + sp = (const char *) s; + m = n*sizeof(int); + while (m--) { + k1 += *sp++; + if (k1) k1 ^= (unsigned char) 0xff; + k2 += k1; + if (k2) k2 ^= (unsigned char) 0xff; + } + he = 255*k1 + k2; + dx = 255*k2 + k1; + dx += dx + 1; + + he += dx << 16; + dx += he << 16; + + nb = n*sizeof(int); + m = d->nhs-1; + ht = d->ht; + nsx = d->nsx; + text = d->text; + ntpl++; + while ((x = ht[he &= m]) < nsx && + 0 != memcmp(s,&text[n*x],nb)) { + he += dx; + ntplx++; + } + return he; +} + +#if 0 /* not used */ +string_dict *restore_string_dict(char *text, int ntc, int ne, int ic, + int byte_swap_required) { + string_dict *d; + int i, k; + unsigned *ndx; + + ZALLOCATE_ST(d, 1); + d->text = text; + d->ntc = ntc; + d->nts = (ntc+1) & ~1; + d->ignore_case = ic; + d->nxs = d-> nsx = ne; + ndx = (unsigned *) ALLOCATE_ST(d->ndx, ne); + for (i = k = 0; i < ne; i++) { + //unsigned short x = *((unsigned short *)(d->text + k)); + unsigned short x; + memcpy(&x,d->text + k, sizeof(unsigned short)); + if (byte_swap_required) { + x = (unsigned short) ((x << 8) | (unsigned char) (x >> 8)); + } + k += 2; + ndx[x] = k; + k += 1 + strlen(d->text + k); + } + i = 1; + while (8*i < 10*ne) i *= 2; + ALLOCATE_ST(d->ht, i); + d->nhs = i; + hash_string_dict(d); + return d; +} +#endif /* 0 */ + +string_dict *null_str_dict(void){ + string_dict *d; + + ZALLOCATE_ST(d, 1); + add_string_dict("", d); +/* d->ignore_case = ignore_case; */ + return d; +} + +list_dict *null_list_dict(void){ + list_dict *d; + + ZALLOCATE_ST(d, 1); + return d; +} + +tuple_dict *null_tuple_dict(int tw){ + tuple_dict *d; + + ZALLOCATE_ST(d, 1); + d->tw = tw; + return d; +}