Mercurial > ~dholland > hg > ag > index.cgi
view anagram/agcore/dict.cpp @ 0:13d2b8934445
Import AnaGram (near-)release tree into Mercurial.
author | David A. Holland |
---|---|
date | Sat, 22 Dec 2007 17:52:45 -0500 |
parents | |
children |
line wrap: on
line source
/* * AnaGram, A System for Syntax Directed Programming * Copyright 1993-2000 Parsifal Software. All Rights Reserved. * See the file COPYING for license and usage terms. * * dict.cpp - old dictionary module */ #include <stdarg.h> #include "port.h" #include "arrays.h" #include "assert.h" #include "dict.h" #include "minmax.h" #include "myalloc.h" #include "stacks.h" //#define INCLUDE_LOGGING #include "log.h" #define HASH_TABLE_MAX 0x1000000 #define EVEN_UP(x) (x) /* + ((x)%2) */ static int find_str_entry(const char *s, string_dict *d); static int find_lst_entry(const int *s, list_dict *d); static int find_tpl_entry(const int *s, tuple_dict *d); #if 0 /* unused */ void sort_string_dict(string_dict *d){ unsigned i; //unsigned short k; unsigned k; if (d->nss >= d->nsx) return; REALLOCATE_ST(d->st,d->nsx); { unsigned *dest = d->st + d->nss; unsigned *src = d->ndx + d->nss; //k = (unsigned short) (d->nsx - d->nss); k = (d->nsx - d->nss); while (k--) dest[k] = src[k]; } /* 10/1/06 note: these two funcs are/were defined in merge.cpp */ sort_strings(d->text,d->st+d->nss,d->nsx-d->nss); merge_strings(d->text,d->st, d->nss, d->nsx-d->nss); REALLOCATE_ST(d->pt,d->nsx); d->nss = d->nsx; for (i = 0; i< d->nss; i++) { k = d->st[i]-2; //k = *((unsigned short *)(d->text+k)); memcpy(&k, d->text+k, sizeof(unsigned short)); d->pt[k] = i; } } #endif /* 0 */ list_dict *reset_list_dict(list_dict *d) { int k; ok_ptr(d); if (d->nxs) { ok_ptr(d->ndx); } if (d->nts) { ok_ptr(d->text); } if (d->nhs) { ok_ptr(d->ht); } d->nsx = 0; d->ntc = 0; /* k = sizeof(short)*d->nhs; if (k) { size_ok(d->ht, k, __FILE__, __LINE__); } memset(d->ht, 0xff, k); */ k = sizeof(unsigned)*d->nhs; if (k) { size_ok(d->ht, k, __FILE__, __LINE__); } memset(d->ht, 0xff, k); return NULL; } tuple_dict *reset_tuple_dict(tuple_dict *d) { unsigned k; ok_ptr(d); if (d->nxs) { ok_ptr(d->ndx); } if (d->nts) { ok_ptr(d->text); } if (d->nhs) { ok_ptr(d->ht); } d->nsx = 0; d->ntc = 0; /* k = sizeof(short)*d->nhs; if (k) { size_ok(d->ht, k, __FILE__, __LINE__); } memset(d->ht, 0xff, k); */ k = sizeof(unsigned)*d->nhs; if (k) { size_ok(d->ht, k, __FILE__, __LINE__); } memset(d->ht, 0xff, k); return NULL; } static void hash_string_dict(string_dict *d){ unsigned i,x; const char *s; //memset(d->ht, 0xff, sizeof(short)*d->nhs); memset(d->ht, 0xff, sizeof(unsigned)*d->nhs); for (i = 0; i < d->nsx; i++) { s = &d->text[d->ndx[i]]; x = find_str_entry(s,d); //d->ht[x] = (unsigned short) i; d->ht[x] = i; } } static void hash_list_dict(list_dict *d){ unsigned i,x; const int *s; //memset(d->ht, 0xff, sizeof(short)*d->nhs); memset(d->ht, 0xff, sizeof(unsigned)*d->nhs); for (i = 0; i < d->nsx; i++) { s = &d->text[d->ndx[i]]; x = find_lst_entry(s,d); //d->ht[x] = (unsigned short) i; d->ht[x] = i; } } static void hash_tpl_dict(tuple_dict *d){ unsigned i,x; const int *s; int n; //memset(d->ht, 0xff, sizeof(short)*d->nhs); memset(d->ht, 0xff, sizeof(unsigned)*d->nhs); s = d->text; n = d->tw; for (i = 0; i < d->nsx; i++) { x = find_tpl_entry(s,d); //d->ht[x] = (unsigned short) i; d->ht[x] = i; s += n; } } list_dict *delete_list_dict(list_dict *da) { list_dict d; if (da == NULL) { return NULL; } d = *da; DEALLOCATE(da); if (d.ndx != NULL) DEALLOCATE(d.ndx); if (d.text != NULL) DEALLOCATE(d.text); if (d.ht != NULL) DEALLOCATE(d.ht); if (d.st != NULL) DEALLOCATE(d.st); if (d.pt != NULL) DEALLOCATE(d.pt); return NULL; } tuple_dict *delete_tuple_dict(tuple_dict *da) { tuple_dict d; if (da == NULL) { return NULL; } d = *da; DEALLOCATE(da); if (d.ndx != NULL) DEALLOCATE(d.ndx); if (d.text != NULL) DEALLOCATE(d.text); if (d.ht != NULL) DEALLOCATE(d.ht); if (d.st != NULL) DEALLOCATE(d.st); if (d.pt != NULL) DEALLOCATE(d.pt); return NULL; } void empty_tuple_dict(tuple_dict *da) { tuple_dict d; if (da == NULL) { return; } d = *da; if (d.ndx != NULL) DEALLOCATE(d.ndx); if (d.text != NULL) DEALLOCATE(d.text); if (d.ht != NULL) DEALLOCATE(d.ht); if (d.st != NULL) DEALLOCATE(d.st); if (d.pt != NULL) DEALLOCATE(d.pt); memset(&d, 0, sizeof(d)); d.tw = da->tw; *da = d; } string_dict *delete_string_dict(string_dict *da) { string_dict d; if (da == NULL) { return NULL; } d = *da; DEALLOCATE(da); if (d.ndx != NULL) DEALLOCATE(d.ndx); if (d.text != NULL) DEALLOCATE(d.text); if (d.ht != NULL) DEALLOCATE(d.ht); if (d.st != NULL) DEALLOCATE(d.st); if (d.pt != NULL) DEALLOCATE(d.pt); return NULL; } int identify_string(const char *s, string_dict *d) { unsigned he, x; check_stack; he = find_str_entry(s, d); if ((x = d->ht[he]) >= d->nsx) { x = 0; } return x; } int add_string_dict(const char *s,string_dict *d){ int len; int he; unsigned x; long nhs, nsx; check_stack; len = strlen(s)+3; if (d->ntc+len > d->nts) { unsigned long k = d->ntc+len; k += k/2; k = max(200UL, min((unsigned long) MAX_BYTES,k)); LOGSECTION("add_string_dict::resize"); LOGV(k) LCV(d->ntc + len); assert(k >= d->ntc + len); REALLOCATE_ST(d->text, (unsigned) k); d->nts = (unsigned) k; } if (d->nsx +1 > d->nxs) { unsigned long k = d->nsx+1; k += k/2; k = max(100UL, min((unsigned long) MAX_INTS, k)); LOGSECTION("add_string_dict::resize"); LOGV(k) LCV(d->nsx + 1); assert(k >= d->nsx + 1); REALLOCATE_ST(d->ndx, (unsigned) k); d->nxs = (unsigned) k; } if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) { nhs = max(nhs, 256L); while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) { nhs *= 2; } LOGSECTION("add_string_dict::resize"); LOGV(nhs) LCV(d->nsx); assert((unsigned) nhs > d->nsx); REALLOCATE_ST(d->ht,(unsigned) nhs); d->nhs = (unsigned) nhs; hash_string_dict(d); } s = strcpy(d->text+d->ntc+2,s); he = find_str_entry(s, d); if ((x = d->ht[he]) >= d->nsx) { //*((unsigned short *)(d->text+d->ntc)) = (unsigned short) d->nsx; unsigned short length = d->nsx; //memcpy(d->text+d->ntc, &d->nsx, sizeof(unsigned short)); memcpy(d->text+d->ntc, &length, sizeof(unsigned short)); d->ntc += 2; x = d->nsx++; //d->ndx[d->ht[he] = (unsigned short) x] = d->ntc; d->ndx[d->ht[he] = x] = d->ntc; d->ntc += EVEN_UP(len-2); } return x; } /* void close_list_dict(list_dict *d){ unsigned len; if (d == NULL) return; len = 2*(d->ntc/d->nsx) + d->ntc; if (len < d->nts) { REALLOCATE_ST(d->text,len); d->nts = len; } if (d->nsx + 1< d->nxs) { unsigned k = d->nsx + 1; REALLOCATE_ST(d->ndx,k); d->nxs = k; } } */ int list_in_dict(int *s, int n, list_dict *d){ int k = d->nsx; check_stack; return k != add_list_dict(s, n, d); } int add_list_dict(const int *s,int n,list_dict *d){ LOGSECTION("add_list_dict"); unsigned len; int he; unsigned x; long nhs, nsx; int *sc; check_stack; len = n+3; if (d->ntc+len > d->nts) { unsigned long k = d->ntc + len; k += k/2; k = min((unsigned long)MAX_INTS, max(200UL, k)); LOGSECTION("add_list_dict::resize"); LOGV(k) LCV(d->ntc + len); assert(k >= d->ntc + len); REALLOCATE_ST(d->text,(unsigned) k); d->nts = (unsigned) k; } if (d->nsx + 1 > d->nxs) { unsigned long k = d->nsx + 1; k += k/2; k = min((unsigned long) MAX_INTS, max(200UL,k)); LOGSECTION("add_list_dict::resize"); LOGV(k) LCV(d->nsx + 1); assert(k >= d->nsx+1); REALLOCATE_ST(d->ndx, (unsigned) k); d->nxs = (unsigned) k; } if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) { nhs = max(nhs,256L); while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) { nhs *= 2; } LOGSECTION("add_list_dict::resize"); LOGV(nhs) LCV(d->nsx); assert((unsigned)nhs > d->nsx); REALLOCATE_ST(d->ht, (unsigned) nhs); d->nhs = (unsigned) nhs; hash_list_dict(d); } sc = (int *) memcpy(d->text+d->ntc+2, s, n*sizeof(int)); *--sc = n+1; he = find_lst_entry(sc,d); if ((x = d->ht[he]) >= d->nsx) { //*((unsigned short *)(d->text+d->ntc)) = (unsigned short) d->nsx; //memcpy(d->text+d->ntc, &d->nsx, sizeof(unsigned short)); //memcpy(d->text+d->ntc, &d->nsx, sizeof(unsigned)); d->text[d->ntc] = d->nsx; //d->ndx[d->ht[he] = (unsigned short) (x = d->nsx++)] = ++d->ntc; d->ndx[d->ht[he] = (x = d->nsx++)] = ++d->ntc; d->ntc += n+1; } return x; } void extract_tuple_dict(tuple_dict *d, unsigned k, ...) { va_list ap; const int *p; unsigned n; assert(k < d->nsx); n = d->tw; p = d->text + n*k; va_start(ap,k); while (n--) { *va_arg(ap,int*) = *p++; } va_end(ap); } int insert_tuple_dict(tuple_dict *d,...){ int he; va_list s; unsigned n; long nsx,nhs; //int *tpl; unsigned i; check_stack; va_start(s, d); n = d->tw; //tpl = local_array(n,int); LocalArray<int> tpl(n); for (i = 0; i < n; i++) tpl[i] = va_arg(s,int); va_end(s); if (d->ntc+n > d->nts) { unsigned long k = d->ntc + n; k += k/2; k = min((unsigned long) MAX_INTS, max(200UL,k)); LOGSECTION("insert_tuple_dict::resize"); LOGV(k) LCV(d->ntc + n); assert(k >= d->ntc + n); REALLOCATE_ST(d->text, (unsigned) k); d->nts = (unsigned) k; } if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) { nhs = max(nhs, 256L); while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) { nhs *= 2; } LOGSECTION("insert_tuple_dict::resize"); LOGV(nhs) LCV(d->nsx); assert((unsigned)nhs > d->nsx); REALLOCATE_ST(d->ht, (unsigned) nhs); d->nhs = (unsigned) nhs; hash_tpl_dict(d); } he = find_tpl_entry(tpl, d); if ((unsigned)(d->ht[he]) >= d->nsx) { //d->ht[he] = (unsigned short) d->nsx++; d->ht[he] = d->nsx++; memcpy(d->text+d->ntc, tpl, n*sizeof(int)); d->ntc += n; return 0; } return 1; } int add_tuple_dict_new(tuple_dict *d, ...) { int he; unsigned x; va_list s; unsigned n; long nsx, nhs; //int *tpl; unsigned i; check_stack; va_start(s, d); n = d->tw; //tpl = local_array(n, int); LocalArray<int> tpl(n); for (i = 0; i < n; i++) tpl[i] = va_arg(s, int); va_end(s); if (d->ntc+n > d->nts) { unsigned long k = d->ntc + n; k += k/2; k = min((unsigned long) MAX_INTS, max(200UL,k)); LOGSECTION("add_tuple_dict_new::resize"); LOGV(k) LCV(d->ntc + n); assert(k >= d->ntc + n); REALLOCATE_ST(d->text, (unsigned)k); d->nts = (unsigned) k; } if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) { nhs = max(nhs, 256L); while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) { nhs *= 2; } LOGSECTION("add_tuple_dict_new::resize"); LOGV(nhs) LCV(d->nsx); assert((unsigned) nhs > d->nsx); REALLOCATE_ST(d->ht, (unsigned) nhs); d->nhs = (unsigned) nhs; hash_tpl_dict(d); } he = find_tpl_entry(tpl, d); if ((x = d->ht[he]) >= d->nsx) { //d->ht[he] = (unsigned short) (x = d->nsx++); d->ht[he] = (x = d->nsx++); memcpy(d->text+d->ntc, tpl, n*sizeof(int)); d->ntc += n; } return x; } /* int identify_tuple(tuple_dict *d, ...) { int he; unsigned x; va_list s; int n = d->tw; int *tpl, i; check_stack; va_start(s,d); tpl = local_array(n, int); for (i = 0; i < n; i++) { tpl[i] = va_arg(s, int); } va_end(s); he = find_tpl_entry(tpl,d); if ((x = d->ht[he]) >= d->nsx) { return 0; } return x; } */ /* * Use a variant of Fletcher's checksum * See DDJ, May, 1992, p32. */ static int nstr = 0, nstrx = 0; static int nlst = 0, nlstx = 0; static int ntpl = 0, ntplx = 0; static int find_str_entry(const char *s, string_dict *d) { unsigned x,m; int he,dx; const char *sp; sp = s; if (d->ignore_case) { unsigned char k1 = 0, k2 = 0, c; while ((c = *sp++) != 0) { c &= 0337; k1 += c; if (k1) k1 ^= (unsigned char) 0xff; k2 += k1; if (k2) k2 ^= (unsigned char) 0xff; } he = 255*k1 + k2; dx = 255*k2 + k1; dx += dx + 1; he += dx << 16; dx += he << 16; m = d->nhs - 1; while ((x = d->ht[he &= m]) < d->nsx && 0 != stricmp(s,&d->text[d->ndx[x]])) { he += dx; } return he; } { unsigned char k1 = 0, k2 = 0, c; while((c = *sp++) != 0) { k1 += c; if (k1) k1 ^= (unsigned char) 0xff; k2 += k1; if (k2) k2 ^= (unsigned char) 0xff; } he = 255*k1 + k2; dx = 255*k2 + k1; } dx += dx + 1; he += dx << 16; dx += he << 16; m = d->nhs - 1; nstr++; while ((x = d->ht[he &= m]) < d->nsx && 0 != strcmp(s,&d->text[d->ndx[x]])) { he += dx; nstrx++; } return he; } static int find_lst_entry(const int *s, list_dict *d){ unsigned x,m; int he,dx; int n; const char *sp; unsigned char k1 = 0, k2 = 0; //sp = (const char *) s; n = m = *s; m *= sizeof(int); sp = (const char *) s; while (m--) { k1 += *sp++; if (k1) k1 ^= (unsigned char) 0xff; k2 += k1; if (k2) k2 ^= (unsigned char) 0xff; } he = 255*k1 + k2; dx = 255*k2 + k1; dx += dx + 1; he += dx << 16; dx += he << 16; m = d->nhs-1; n *= sizeof(int); nlst++; while ((x = d->ht[he &= m]) < d->nsx && 0 != memcmp(s,&d->text[d->ndx[x]], n)) { he += dx; nlstx++; } return he; } static int find_tpl_entry(const int *s, tuple_dict *d){ unsigned x,m; int he,dx; int n,nb; unsigned nsx; //unsigned short *ht; unsigned *ht; const int *text; const char *sp; unsigned char k1 = 0, k2 = 0; n = d->tw; sp = (const char *) s; m = n*sizeof(int); while (m--) { k1 += *sp++; if (k1) k1 ^= (unsigned char) 0xff; k2 += k1; if (k2) k2 ^= (unsigned char) 0xff; } he = 255*k1 + k2; dx = 255*k2 + k1; dx += dx + 1; he += dx << 16; dx += he << 16; nb = n*sizeof(int); m = d->nhs-1; ht = d->ht; nsx = d->nsx; text = d->text; ntpl++; while ((x = ht[he &= m]) < nsx && 0 != memcmp(s,&text[n*x],nb)) { he += dx; ntplx++; } return he; } #if 0 /* not used */ string_dict *restore_string_dict(char *text, int ntc, int ne, int ic, int byte_swap_required) { string_dict *d; int i, k; unsigned *ndx; ZALLOCATE_ST(d, 1); d->text = text; d->ntc = ntc; d->nts = (ntc+1) & ~1; d->ignore_case = ic; d->nxs = d-> nsx = ne; ndx = (unsigned *) ALLOCATE_ST(d->ndx, ne); for (i = k = 0; i < ne; i++) { //unsigned short x = *((unsigned short *)(d->text + k)); unsigned short x; memcpy(&x,d->text + k, sizeof(unsigned short)); if (byte_swap_required) { x = (unsigned short) ((x << 8) | (unsigned char) (x >> 8)); } k += 2; ndx[x] = k; k += 1 + strlen(d->text + k); } i = 1; while (8*i < 10*ne) i *= 2; ALLOCATE_ST(d->ht, i); d->nhs = i; hash_string_dict(d); return d; } #endif /* 0 */ string_dict *null_str_dict(void){ string_dict *d; ZALLOCATE_ST(d, 1); add_string_dict("", d); /* d->ignore_case = ignore_case; */ return d; } list_dict *null_list_dict(void){ list_dict *d; ZALLOCATE_ST(d, 1); return d; } tuple_dict *null_tuple_dict(int tw){ tuple_dict *d; ZALLOCATE_ST(d, 1); d->tw = tw; return d; }