comparison anagram/agcore/dict.cpp @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.
author David A. Holland
date Sat, 22 Dec 2007 17:52:45 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:13d2b8934445
1 /*
2 * AnaGram, A System for Syntax Directed Programming
3 * Copyright 1993-2000 Parsifal Software. All Rights Reserved.
4 * See the file COPYING for license and usage terms.
5 *
6 * dict.cpp - old dictionary module
7 */
8
9 #include <stdarg.h>
10 #include "port.h"
11
12 #include "arrays.h"
13 #include "assert.h"
14 #include "dict.h"
15 #include "minmax.h"
16 #include "myalloc.h"
17 #include "stacks.h"
18
19 //#define INCLUDE_LOGGING
20 #include "log.h"
21
22
23 #define HASH_TABLE_MAX 0x1000000
24
25 #define EVEN_UP(x) (x) /* + ((x)%2) */
26
27 static int find_str_entry(const char *s, string_dict *d);
28 static int find_lst_entry(const int *s, list_dict *d);
29 static int find_tpl_entry(const int *s, tuple_dict *d);
30
31
32 #if 0 /* unused */
33 void sort_string_dict(string_dict *d){
34 unsigned i;
35 //unsigned short k;
36 unsigned k;
37
38 if (d->nss >= d->nsx) return;
39 REALLOCATE_ST(d->st,d->nsx);
40 {
41 unsigned *dest = d->st + d->nss;
42 unsigned *src = d->ndx + d->nss;
43 //k = (unsigned short) (d->nsx - d->nss);
44 k = (d->nsx - d->nss);
45 while (k--) dest[k] = src[k];
46 }
47
48 /* 10/1/06 note: these two funcs are/were defined in merge.cpp */
49 sort_strings(d->text,d->st+d->nss,d->nsx-d->nss);
50 merge_strings(d->text,d->st, d->nss, d->nsx-d->nss);
51 REALLOCATE_ST(d->pt,d->nsx);
52 d->nss = d->nsx;
53 for (i = 0; i< d->nss; i++) {
54 k = d->st[i]-2;
55 //k = *((unsigned short *)(d->text+k));
56 memcpy(&k, d->text+k, sizeof(unsigned short));
57 d->pt[k] = i;
58 }
59 }
60 #endif /* 0 */
61
62 list_dict *reset_list_dict(list_dict *d) {
63 int k;
64 ok_ptr(d);
65 if (d->nxs) {
66 ok_ptr(d->ndx);
67 }
68 if (d->nts) {
69 ok_ptr(d->text);
70 }
71 if (d->nhs) {
72 ok_ptr(d->ht);
73 }
74 d->nsx = 0;
75 d->ntc = 0;
76 /*
77 k = sizeof(short)*d->nhs;
78 if (k) {
79 size_ok(d->ht, k, __FILE__, __LINE__);
80 }
81 memset(d->ht, 0xff, k);
82 */
83 k = sizeof(unsigned)*d->nhs;
84 if (k) {
85 size_ok(d->ht, k, __FILE__, __LINE__);
86 }
87 memset(d->ht, 0xff, k);
88 return NULL;
89 }
90
91 tuple_dict *reset_tuple_dict(tuple_dict *d) {
92 unsigned k;
93 ok_ptr(d);
94 if (d->nxs) {
95 ok_ptr(d->ndx);
96 }
97 if (d->nts) {
98 ok_ptr(d->text);
99 }
100 if (d->nhs) {
101 ok_ptr(d->ht);
102 }
103 d->nsx = 0;
104 d->ntc = 0;
105 /*
106 k = sizeof(short)*d->nhs;
107 if (k) {
108 size_ok(d->ht, k, __FILE__, __LINE__);
109 }
110 memset(d->ht, 0xff, k);
111 */
112 k = sizeof(unsigned)*d->nhs;
113 if (k) {
114 size_ok(d->ht, k, __FILE__, __LINE__);
115 }
116 memset(d->ht, 0xff, k);
117 return NULL;
118 }
119
120 static void hash_string_dict(string_dict *d){
121 unsigned i,x;
122 const char *s;
123
124 //memset(d->ht, 0xff, sizeof(short)*d->nhs);
125 memset(d->ht, 0xff, sizeof(unsigned)*d->nhs);
126 for (i = 0; i < d->nsx; i++) {
127 s = &d->text[d->ndx[i]];
128 x = find_str_entry(s,d);
129 //d->ht[x] = (unsigned short) i;
130 d->ht[x] = i;
131 }
132 }
133
134 static void hash_list_dict(list_dict *d){
135 unsigned i,x;
136 const int *s;
137
138 //memset(d->ht, 0xff, sizeof(short)*d->nhs);
139 memset(d->ht, 0xff, sizeof(unsigned)*d->nhs);
140 for (i = 0; i < d->nsx; i++) {
141 s = &d->text[d->ndx[i]];
142 x = find_lst_entry(s,d);
143 //d->ht[x] = (unsigned short) i;
144 d->ht[x] = i;
145 }
146 }
147
148 static void hash_tpl_dict(tuple_dict *d){
149 unsigned i,x;
150 const int *s;
151 int n;
152
153 //memset(d->ht, 0xff, sizeof(short)*d->nhs);
154 memset(d->ht, 0xff, sizeof(unsigned)*d->nhs);
155 s = d->text;
156 n = d->tw;
157 for (i = 0; i < d->nsx; i++) {
158 x = find_tpl_entry(s,d);
159 //d->ht[x] = (unsigned short) i;
160 d->ht[x] = i;
161 s += n;
162 }
163 }
164
165 list_dict *delete_list_dict(list_dict *da) {
166 list_dict d;
167
168 if (da == NULL) {
169 return NULL;
170 }
171 d = *da;
172 DEALLOCATE(da);
173 if (d.ndx != NULL) DEALLOCATE(d.ndx);
174 if (d.text != NULL) DEALLOCATE(d.text);
175 if (d.ht != NULL) DEALLOCATE(d.ht);
176 if (d.st != NULL) DEALLOCATE(d.st);
177 if (d.pt != NULL) DEALLOCATE(d.pt);
178 return NULL;
179 }
180
181 tuple_dict *delete_tuple_dict(tuple_dict *da) {
182 tuple_dict d;
183
184 if (da == NULL) {
185 return NULL;
186 }
187 d = *da;
188 DEALLOCATE(da);
189 if (d.ndx != NULL) DEALLOCATE(d.ndx);
190 if (d.text != NULL) DEALLOCATE(d.text);
191 if (d.ht != NULL) DEALLOCATE(d.ht);
192 if (d.st != NULL) DEALLOCATE(d.st);
193 if (d.pt != NULL) DEALLOCATE(d.pt);
194 return NULL;
195 }
196
197 void empty_tuple_dict(tuple_dict *da) {
198 tuple_dict d;
199
200 if (da == NULL) {
201 return;
202 }
203 d = *da;
204 if (d.ndx != NULL) DEALLOCATE(d.ndx);
205 if (d.text != NULL) DEALLOCATE(d.text);
206 if (d.ht != NULL) DEALLOCATE(d.ht);
207 if (d.st != NULL) DEALLOCATE(d.st);
208 if (d.pt != NULL) DEALLOCATE(d.pt);
209 memset(&d, 0, sizeof(d));
210 d.tw = da->tw;
211 *da = d;
212 }
213
214 string_dict *delete_string_dict(string_dict *da) {
215 string_dict d;
216
217 if (da == NULL) {
218 return NULL;
219 }
220 d = *da;
221 DEALLOCATE(da);
222 if (d.ndx != NULL) DEALLOCATE(d.ndx);
223 if (d.text != NULL) DEALLOCATE(d.text);
224 if (d.ht != NULL) DEALLOCATE(d.ht);
225 if (d.st != NULL) DEALLOCATE(d.st);
226 if (d.pt != NULL) DEALLOCATE(d.pt);
227 return NULL;
228 }
229
230 int identify_string(const char *s, string_dict *d) {
231 unsigned he, x;
232
233 check_stack;
234 he = find_str_entry(s, d);
235 if ((x = d->ht[he]) >= d->nsx) {
236 x = 0;
237 }
238 return x;
239 }
240
241
242 int add_string_dict(const char *s,string_dict *d){
243 int len;
244 int he;
245 unsigned x;
246 long nhs, nsx;
247
248 check_stack;
249 len = strlen(s)+3;
250 if (d->ntc+len > d->nts) {
251 unsigned long k = d->ntc+len;
252 k += k/2;
253 k = max(200UL, min((unsigned long) MAX_BYTES,k));
254 LOGSECTION("add_string_dict::resize");
255 LOGV(k) LCV(d->ntc + len);
256 assert(k >= d->ntc + len);
257 REALLOCATE_ST(d->text, (unsigned) k);
258 d->nts = (unsigned) k;
259 }
260 if (d->nsx +1 > d->nxs) {
261 unsigned long k = d->nsx+1;
262 k += k/2;
263 k = max(100UL, min((unsigned long) MAX_INTS, k));
264 LOGSECTION("add_string_dict::resize");
265 LOGV(k) LCV(d->nsx + 1);
266 assert(k >= d->nsx + 1);
267 REALLOCATE_ST(d->ndx, (unsigned) k);
268 d->nxs = (unsigned) k;
269 }
270 if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) {
271 nhs = max(nhs, 256L);
272 while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) {
273 nhs *= 2;
274 }
275 LOGSECTION("add_string_dict::resize");
276 LOGV(nhs) LCV(d->nsx);
277 assert((unsigned) nhs > d->nsx);
278 REALLOCATE_ST(d->ht,(unsigned) nhs);
279 d->nhs = (unsigned) nhs;
280 hash_string_dict(d);
281 }
282 s = strcpy(d->text+d->ntc+2,s);
283 he = find_str_entry(s, d);
284 if ((x = d->ht[he]) >= d->nsx) {
285 //*((unsigned short *)(d->text+d->ntc)) = (unsigned short) d->nsx;
286 unsigned short length = d->nsx;
287 //memcpy(d->text+d->ntc, &d->nsx, sizeof(unsigned short));
288 memcpy(d->text+d->ntc, &length, sizeof(unsigned short));
289 d->ntc += 2;
290 x = d->nsx++;
291 //d->ndx[d->ht[he] = (unsigned short) x] = d->ntc;
292 d->ndx[d->ht[he] = x] = d->ntc;
293 d->ntc += EVEN_UP(len-2);
294 }
295 return x;
296 }
297 /*
298 void close_list_dict(list_dict *d){
299 unsigned len;
300
301 if (d == NULL) return;
302 len = 2*(d->ntc/d->nsx) + d->ntc;
303 if (len < d->nts) {
304 REALLOCATE_ST(d->text,len);
305 d->nts = len;
306 }
307 if (d->nsx + 1< d->nxs) {
308 unsigned k = d->nsx + 1;
309 REALLOCATE_ST(d->ndx,k);
310 d->nxs = k;
311 }
312 }
313 */
314 int list_in_dict(int *s, int n, list_dict *d){
315 int k = d->nsx;
316 check_stack;
317 return k != add_list_dict(s, n, d);
318 }
319
320 int add_list_dict(const int *s,int n,list_dict *d){
321 LOGSECTION("add_list_dict");
322 unsigned len;
323 int he;
324 unsigned x;
325 long nhs, nsx;
326 int *sc;
327
328 check_stack;
329 len = n+3;
330 if (d->ntc+len > d->nts) {
331 unsigned long k = d->ntc + len;
332 k += k/2;
333 k = min((unsigned long)MAX_INTS, max(200UL, k));
334 LOGSECTION("add_list_dict::resize");
335 LOGV(k) LCV(d->ntc + len);
336 assert(k >= d->ntc + len);
337 REALLOCATE_ST(d->text,(unsigned) k);
338 d->nts = (unsigned) k;
339 }
340 if (d->nsx + 1 > d->nxs) {
341 unsigned long k = d->nsx + 1;
342 k += k/2;
343 k = min((unsigned long) MAX_INTS, max(200UL,k));
344 LOGSECTION("add_list_dict::resize");
345 LOGV(k) LCV(d->nsx + 1);
346 assert(k >= d->nsx+1);
347 REALLOCATE_ST(d->ndx, (unsigned) k);
348 d->nxs = (unsigned) k;
349 }
350 if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) {
351 nhs = max(nhs,256L);
352 while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) {
353 nhs *= 2;
354 }
355 LOGSECTION("add_list_dict::resize");
356 LOGV(nhs) LCV(d->nsx);
357 assert((unsigned)nhs > d->nsx);
358 REALLOCATE_ST(d->ht, (unsigned) nhs);
359 d->nhs = (unsigned) nhs;
360 hash_list_dict(d);
361 }
362 sc = (int *) memcpy(d->text+d->ntc+2, s, n*sizeof(int));
363 *--sc = n+1;
364 he = find_lst_entry(sc,d);
365 if ((x = d->ht[he]) >= d->nsx) {
366 //*((unsigned short *)(d->text+d->ntc)) = (unsigned short) d->nsx;
367 //memcpy(d->text+d->ntc, &d->nsx, sizeof(unsigned short));
368 //memcpy(d->text+d->ntc, &d->nsx, sizeof(unsigned));
369 d->text[d->ntc] = d->nsx;
370 //d->ndx[d->ht[he] = (unsigned short) (x = d->nsx++)] = ++d->ntc;
371 d->ndx[d->ht[he] = (x = d->nsx++)] = ++d->ntc;
372 d->ntc += n+1;
373 }
374 return x;
375 }
376
377 void extract_tuple_dict(tuple_dict *d, unsigned k, ...) {
378 va_list ap;
379 const int *p;
380 unsigned n;
381
382 assert(k < d->nsx);
383 n = d->tw;
384 p = d->text + n*k;
385 va_start(ap,k);
386 while (n--) {
387 *va_arg(ap,int*) = *p++;
388 }
389 va_end(ap);
390 }
391
392 int insert_tuple_dict(tuple_dict *d,...){
393 int he;
394 va_list s;
395 unsigned n;
396 long nsx,nhs;
397 //int *tpl;
398 unsigned i;
399
400 check_stack;
401 va_start(s, d);
402 n = d->tw;
403 //tpl = local_array(n,int);
404 LocalArray<int> tpl(n);
405 for (i = 0; i < n; i++) tpl[i] = va_arg(s,int);
406 va_end(s);
407 if (d->ntc+n > d->nts) {
408 unsigned long k = d->ntc + n;
409 k += k/2;
410 k = min((unsigned long) MAX_INTS, max(200UL,k));
411 LOGSECTION("insert_tuple_dict::resize");
412 LOGV(k) LCV(d->ntc + n);
413 assert(k >= d->ntc + n);
414 REALLOCATE_ST(d->text, (unsigned) k);
415 d->nts = (unsigned) k;
416 }
417 if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) {
418 nhs = max(nhs, 256L);
419 while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) {
420 nhs *= 2;
421 }
422 LOGSECTION("insert_tuple_dict::resize");
423 LOGV(nhs) LCV(d->nsx);
424 assert((unsigned)nhs > d->nsx);
425 REALLOCATE_ST(d->ht, (unsigned) nhs);
426 d->nhs = (unsigned) nhs;
427 hash_tpl_dict(d);
428 }
429 he = find_tpl_entry(tpl, d);
430 if ((unsigned)(d->ht[he]) >= d->nsx) {
431 //d->ht[he] = (unsigned short) d->nsx++;
432 d->ht[he] = d->nsx++;
433 memcpy(d->text+d->ntc, tpl, n*sizeof(int));
434 d->ntc += n;
435 return 0;
436 }
437 return 1;
438 }
439
440
441 int add_tuple_dict_new(tuple_dict *d, ...) {
442 int he;
443 unsigned x;
444 va_list s;
445 unsigned n;
446 long nsx, nhs;
447 //int *tpl;
448 unsigned i;
449
450 check_stack;
451 va_start(s, d);
452 n = d->tw;
453 //tpl = local_array(n, int);
454 LocalArray<int> tpl(n);
455 for (i = 0; i < n; i++) tpl[i] = va_arg(s, int);
456 va_end(s);
457 if (d->ntc+n > d->nts) {
458 unsigned long k = d->ntc + n;
459
460 k += k/2;
461 k = min((unsigned long) MAX_INTS, max(200UL,k));
462 LOGSECTION("add_tuple_dict_new::resize");
463 LOGV(k) LCV(d->ntc + n);
464 assert(k >= d->ntc + n);
465 REALLOCATE_ST(d->text, (unsigned)k);
466 d->nts = (unsigned) k;
467 }
468 if (10*(nsx = d->nsx) >= 8*(nhs = d->nhs) && nhs < HASH_TABLE_MAX) {
469 nhs = max(nhs, 256L);
470 while (10*nsx > 8*nhs && nhs < HASH_TABLE_MAX) {
471 nhs *= 2;
472 }
473 LOGSECTION("add_tuple_dict_new::resize");
474 LOGV(nhs) LCV(d->nsx);
475 assert((unsigned) nhs > d->nsx);
476 REALLOCATE_ST(d->ht, (unsigned) nhs);
477 d->nhs = (unsigned) nhs;
478 hash_tpl_dict(d);
479 }
480 he = find_tpl_entry(tpl, d);
481 if ((x = d->ht[he]) >= d->nsx) {
482 //d->ht[he] = (unsigned short) (x = d->nsx++);
483 d->ht[he] = (x = d->nsx++);
484 memcpy(d->text+d->ntc, tpl, n*sizeof(int));
485 d->ntc += n;
486 }
487 return x;
488 }
489
490 /*
491 int identify_tuple(tuple_dict *d, ...) {
492 int he;
493 unsigned x;
494 va_list s;
495 int n = d->tw;
496 int *tpl, i;
497
498 check_stack;
499 va_start(s,d);
500 tpl = local_array(n, int);
501 for (i = 0; i < n; i++) {
502 tpl[i] = va_arg(s, int);
503 }
504 va_end(s);
505 he = find_tpl_entry(tpl,d);
506 if ((x = d->ht[he]) >= d->nsx) {
507 return 0;
508 }
509 return x;
510 }
511 */
512
513 /*
514 * Use a variant of Fletcher's checksum
515 * See DDJ, May, 1992, p32.
516 */
517
518 static int nstr = 0, nstrx = 0;
519 static int nlst = 0, nlstx = 0;
520 static int ntpl = 0, ntplx = 0;
521
522 static int find_str_entry(const char *s, string_dict *d) {
523 unsigned x,m;
524 int he,dx;
525 const char *sp;
526
527 sp = s;
528 if (d->ignore_case) {
529 unsigned char k1 = 0, k2 = 0, c;
530 while ((c = *sp++) != 0) {
531 c &= 0337;
532 k1 += c;
533 if (k1) k1 ^= (unsigned char) 0xff;
534 k2 += k1;
535 if (k2) k2 ^= (unsigned char) 0xff;
536 }
537 he = 255*k1 + k2;
538 dx = 255*k2 + k1;
539 dx += dx + 1;
540
541 he += dx << 16;
542 dx += he << 16;
543
544 m = d->nhs - 1;
545 while ((x = d->ht[he &= m]) < d->nsx
546 && 0 != stricmp(s,&d->text[d->ndx[x]])) {
547 he += dx;
548 }
549 return he;
550 }
551
552
553 {
554 unsigned char k1 = 0, k2 = 0, c;
555 while((c = *sp++) != 0) {
556 k1 += c;
557 if (k1) k1 ^= (unsigned char) 0xff;
558 k2 += k1;
559 if (k2) k2 ^= (unsigned char) 0xff;
560 }
561 he = 255*k1 + k2;
562 dx = 255*k2 + k1;
563 }
564 dx += dx + 1;
565
566 he += dx << 16;
567 dx += he << 16;
568
569 m = d->nhs - 1;
570 nstr++;
571 while ((x = d->ht[he &= m]) < d->nsx && 0 != strcmp(s,&d->text[d->ndx[x]])) {
572 he += dx;
573 nstrx++;
574 }
575 return he;
576 }
577
578 static int find_lst_entry(const int *s, list_dict *d){
579 unsigned x,m;
580 int he,dx;
581 int n;
582 const char *sp;
583 unsigned char k1 = 0, k2 = 0;
584
585 //sp = (const char *) s;
586 n = m = *s;
587 m *= sizeof(int);
588 sp = (const char *) s;
589 while (m--) {
590 k1 += *sp++;
591 if (k1) k1 ^= (unsigned char) 0xff;
592 k2 += k1;
593 if (k2) k2 ^= (unsigned char) 0xff;
594 }
595 he = 255*k1 + k2;
596 dx = 255*k2 + k1;
597 dx += dx + 1;
598
599 he += dx << 16;
600 dx += he << 16;
601
602 m = d->nhs-1;
603 n *= sizeof(int);
604 nlst++;
605 while ((x = d->ht[he &= m]) < d->nsx &&
606 0 != memcmp(s,&d->text[d->ndx[x]], n)) {
607 he += dx;
608 nlstx++;
609 }
610 return he;
611 }
612
613 static int find_tpl_entry(const int *s, tuple_dict *d){
614 unsigned x,m;
615 int he,dx;
616 int n,nb;
617 unsigned nsx;
618 //unsigned short *ht;
619 unsigned *ht;
620 const int *text;
621 const char *sp;
622 unsigned char k1 = 0, k2 = 0;
623
624 n = d->tw;
625 sp = (const char *) s;
626 m = n*sizeof(int);
627 while (m--) {
628 k1 += *sp++;
629 if (k1) k1 ^= (unsigned char) 0xff;
630 k2 += k1;
631 if (k2) k2 ^= (unsigned char) 0xff;
632 }
633 he = 255*k1 + k2;
634 dx = 255*k2 + k1;
635 dx += dx + 1;
636
637 he += dx << 16;
638 dx += he << 16;
639
640 nb = n*sizeof(int);
641 m = d->nhs-1;
642 ht = d->ht;
643 nsx = d->nsx;
644 text = d->text;
645 ntpl++;
646 while ((x = ht[he &= m]) < nsx &&
647 0 != memcmp(s,&text[n*x],nb)) {
648 he += dx;
649 ntplx++;
650 }
651 return he;
652 }
653
654 #if 0 /* not used */
655 string_dict *restore_string_dict(char *text, int ntc, int ne, int ic,
656 int byte_swap_required) {
657 string_dict *d;
658 int i, k;
659 unsigned *ndx;
660
661 ZALLOCATE_ST(d, 1);
662 d->text = text;
663 d->ntc = ntc;
664 d->nts = (ntc+1) & ~1;
665 d->ignore_case = ic;
666 d->nxs = d-> nsx = ne;
667 ndx = (unsigned *) ALLOCATE_ST(d->ndx, ne);
668 for (i = k = 0; i < ne; i++) {
669 //unsigned short x = *((unsigned short *)(d->text + k));
670 unsigned short x;
671 memcpy(&x,d->text + k, sizeof(unsigned short));
672 if (byte_swap_required) {
673 x = (unsigned short) ((x << 8) | (unsigned char) (x >> 8));
674 }
675 k += 2;
676 ndx[x] = k;
677 k += 1 + strlen(d->text + k);
678 }
679 i = 1;
680 while (8*i < 10*ne) i *= 2;
681 ALLOCATE_ST(d->ht, i);
682 d->nhs = i;
683 hash_string_dict(d);
684 return d;
685 }
686 #endif /* 0 */
687
688 string_dict *null_str_dict(void){
689 string_dict *d;
690
691 ZALLOCATE_ST(d, 1);
692 add_string_dict("", d);
693 /* d->ignore_case = ignore_case; */
694 return d;
695 }
696
697 list_dict *null_list_dict(void){
698 list_dict *d;
699
700 ZALLOCATE_ST(d, 1);
701 return d;
702 }
703
704 tuple_dict *null_tuple_dict(int tw){
705 tuple_dict *d;
706
707 ZALLOCATE_ST(d, 1);
708 d->tw = tw;
709 return d;
710 }