annotate oldclasslib/source/strdict.cpp @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.
author David A. Holland
date Sat, 22 Dec 2007 17:52:45 -0500
parents
children 607e3be6bad8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
1 /*
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
2 * AnaGram, a System for Syntax Directed Parsing
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
3 * String Dictionary Class Definition
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
4 *
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
5 * Copyright 1993 Parsifal Software. All Rights Reserved.
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
6 *
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
7 * This software is provided 'as-is', without any express or implied
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
8 * warranty. In no event will the authors be held liable for any damages
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
9 * arising from the use of this software.
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
10 *
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
11 * Permission is granted to anyone to use this software for any purpose,
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
12 * including commercial applications, and to alter it and redistribute it
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
13 * freely, subject to the following restrictions:
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
14 *
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
15 * 1. The origin of this software must not be misrepresented; you must not
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
16 * claim that you wrote the original software. If you use this software
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
17 * in a product, an acknowledgment in the product documentation would be
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
18 * appreciated but is not required.
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
19 * 2. Altered source versions must be plainly marked as such, and must not be
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
20 * misrepresented as being the original software.
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
21 * 3. This notice may not be removed or altered from any source distribution.
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
22 */
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
23
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
24 #include "strdict.h"
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
25 #include <assert.h>
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
26 #include <string.h>
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
27
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
28
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
29 // Constructor
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
30
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
31 string_dictionary::string_dictionary(unsigned size, unsigned wl) {
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
32 assert((size << 1) > size);
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
33 assert((long) size * wl < 65536L);
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
34
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
35 sxs = new unsigned[size]; // allocate index storage
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
36 csxmax = wl*size; // calculate char storage rqmt
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
37 cs = new char[csxmax]; // allocate character storage
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
38 csx = 1; // can't use byte zero
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
39 for (htl = 1; htl < size; htl <<= 1); // calculate size of hash table
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
40 htmask = htl - 1; // htl is a power of two
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
41 ht = new unsigned[htl]; // allocate storage for hash table
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
42 memset(ht, 0, htl*sizeof(unsigned)); // init to zeros
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
43 nsmax = size;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
44 ns = 1; // can't use zero
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
45 n_calls = n_collisions = 0; // clear statistics
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
46 copy_flag = 0; // live
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
47 }
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
48
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
49 string_dictionary::string_dictionary(string_dictionary &s) {
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
50 *this = s;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
51 copy_flag++; //memorex
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
52 }
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
53
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
54
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
55 // Reset Dictionary
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
56
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
57 string_dictionary &reset(string_dictionary &d) {
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
58 assert(d.copy_flag == 0);
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
59 d.csx = 1; // can't use byte zero
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
60 memset(d.ht, 0, d.htl*sizeof(unsigned)); // init to zeros
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
61 d.ns = 1; // can't use zero
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
62 d.n_calls = d.n_collisions = 0; // reset statistics
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
63 return d;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
64 }
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
65
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
66
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
67 // Destructor
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
68
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
69 string_dictionary::~string_dictionary() {
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
70 if (copy_flag) return;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
71 delete [] sxs;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
72 delete [] cs;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
73 delete [] ht;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
74 }
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
75
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
76
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
77 // Hash Utility
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
78
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
79 unsigned string_dictionary::hash(const char *s) {
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
80 unsigned char k1 = 1, k2 = 1;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
81 unsigned char c;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
82 unsigned hx; // hash index
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
83 unsigned sx; // string index
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
84 int k = 0;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
85 while((c = s[k++]) != 0) { // accumulate hash code elements
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
86 if ((k1 += c) < c) k1++;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
87 if ((k2 += k1) < k1) k2++;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
88 }
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
89 k1--;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
90 k2--;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
91 hx = (255*k1 + k2) & htmask; // calculate hash code
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
92 sx = ht[hx];
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
93 n_calls++; // count number of calls
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
94 if (sx && strcmp(s, &cs[sxs[sx]])) {
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
95 unsigned dx = (2*(255*k2 + k1) + 1) & htmask;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
96 do { // no cycles, since dx and htl are relatively prime
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
97 n_collisions++; // count collisions
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
98 sx = ht[hx = htmask & (hx+dx)];
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
99 } while (sx && strcmp(s, &cs[sxs[sx]]));
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
100 }
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
101 return hx;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
102 }
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
103
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
104
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
105 // Recover String
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
106
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
107 unsigned string_dictionary::operator [] (const char *s) {
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
108 return ht[hash(s)];
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
109 }
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
110
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
111
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
112 // Enter String in Dictionary
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
113
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
114 unsigned string_dictionary::operator << (const char *s) {
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
115 unsigned hx = hash(s); // get hash code
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
116 unsigned k;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
117 if (ht[hx]) return ht[hx]; // return index if already there
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
118 assert(copy_flag == 0);
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
119 assert (ns < nsmax); // make sure table's not full
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
120 if (s) k = strlen(s) + 1; // get string length
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
121 else k = 1;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
122 assert(csx + k < csxmax); // make sure there's room
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
123 strcpy(&cs[csx],s); // save string
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
124 sxs[ns] = csx; // save string index
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
125 csx += k; // advance storage index
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
126 return ht[hx] = ns++; // return index
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
127 }
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
128
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
129
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
130 // Look up String in Dictionary
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
131
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
132 char *string_dictionary::operator [] (unsigned sx) {
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
133 if (sx && sx <= ns) return &cs[sxs[sx]];
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
134 return 0;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
135 }
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
136
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
137
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
138 // Find Number of Entries in Dictionary
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
139
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
140 unsigned size(string_dictionary &sd) {
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
141 return sd.ns - 1;
13d2b8934445 Import AnaGram (near-)release tree into Mercurial.
David A. Holland
parents:
diff changeset
142 }