comparison oldclasslib/source/strdict.cpp @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.
author David A. Holland
date Sat, 22 Dec 2007 17:52:45 -0500
parents
children 607e3be6bad8
comparison
equal deleted inserted replaced
-1:000000000000 0:13d2b8934445
1 /*
2 * AnaGram, a System for Syntax Directed Parsing
3 * String Dictionary Class Definition
4 *
5 * Copyright 1993 Parsifal Software. All Rights Reserved.
6 *
7 * This software is provided 'as-is', without any express or implied
8 * warranty. In no event will the authors be held liable for any damages
9 * arising from the use of this software.
10 *
11 * Permission is granted to anyone to use this software for any purpose,
12 * including commercial applications, and to alter it and redistribute it
13 * freely, subject to the following restrictions:
14 *
15 * 1. The origin of this software must not be misrepresented; you must not
16 * claim that you wrote the original software. If you use this software
17 * in a product, an acknowledgment in the product documentation would be
18 * appreciated but is not required.
19 * 2. Altered source versions must be plainly marked as such, and must not be
20 * misrepresented as being the original software.
21 * 3. This notice may not be removed or altered from any source distribution.
22 */
23
24 #include "strdict.h"
25 #include <assert.h>
26 #include <string.h>
27
28
29 // Constructor
30
31 string_dictionary::string_dictionary(unsigned size, unsigned wl) {
32 assert((size << 1) > size);
33 assert((long) size * wl < 65536L);
34
35 sxs = new unsigned[size]; // allocate index storage
36 csxmax = wl*size; // calculate char storage rqmt
37 cs = new char[csxmax]; // allocate character storage
38 csx = 1; // can't use byte zero
39 for (htl = 1; htl < size; htl <<= 1); // calculate size of hash table
40 htmask = htl - 1; // htl is a power of two
41 ht = new unsigned[htl]; // allocate storage for hash table
42 memset(ht, 0, htl*sizeof(unsigned)); // init to zeros
43 nsmax = size;
44 ns = 1; // can't use zero
45 n_calls = n_collisions = 0; // clear statistics
46 copy_flag = 0; // live
47 }
48
49 string_dictionary::string_dictionary(string_dictionary &s) {
50 *this = s;
51 copy_flag++; //memorex
52 }
53
54
55 // Reset Dictionary
56
57 string_dictionary &reset(string_dictionary &d) {
58 assert(d.copy_flag == 0);
59 d.csx = 1; // can't use byte zero
60 memset(d.ht, 0, d.htl*sizeof(unsigned)); // init to zeros
61 d.ns = 1; // can't use zero
62 d.n_calls = d.n_collisions = 0; // reset statistics
63 return d;
64 }
65
66
67 // Destructor
68
69 string_dictionary::~string_dictionary() {
70 if (copy_flag) return;
71 delete [] sxs;
72 delete [] cs;
73 delete [] ht;
74 }
75
76
77 // Hash Utility
78
79 unsigned string_dictionary::hash(const char *s) {
80 unsigned char k1 = 1, k2 = 1;
81 unsigned char c;
82 unsigned hx; // hash index
83 unsigned sx; // string index
84 int k = 0;
85 while((c = s[k++]) != 0) { // accumulate hash code elements
86 if ((k1 += c) < c) k1++;
87 if ((k2 += k1) < k1) k2++;
88 }
89 k1--;
90 k2--;
91 hx = (255*k1 + k2) & htmask; // calculate hash code
92 sx = ht[hx];
93 n_calls++; // count number of calls
94 if (sx && strcmp(s, &cs[sxs[sx]])) {
95 unsigned dx = (2*(255*k2 + k1) + 1) & htmask;
96 do { // no cycles, since dx and htl are relatively prime
97 n_collisions++; // count collisions
98 sx = ht[hx = htmask & (hx+dx)];
99 } while (sx && strcmp(s, &cs[sxs[sx]]));
100 }
101 return hx;
102 }
103
104
105 // Recover String
106
107 unsigned string_dictionary::operator [] (const char *s) {
108 return ht[hash(s)];
109 }
110
111
112 // Enter String in Dictionary
113
114 unsigned string_dictionary::operator << (const char *s) {
115 unsigned hx = hash(s); // get hash code
116 unsigned k;
117 if (ht[hx]) return ht[hx]; // return index if already there
118 assert(copy_flag == 0);
119 assert (ns < nsmax); // make sure table's not full
120 if (s) k = strlen(s) + 1; // get string length
121 else k = 1;
122 assert(csx + k < csxmax); // make sure there's room
123 strcpy(&cs[csx],s); // save string
124 sxs[ns] = csx; // save string index
125 csx += k; // advance storage index
126 return ht[hx] = ns++; // return index
127 }
128
129
130 // Look up String in Dictionary
131
132 char *string_dictionary::operator [] (unsigned sx) {
133 if (sx && sx <= ns) return &cs[sxs[sx]];
134 return 0;
135 }
136
137
138 // Find Number of Entries in Dictionary
139
140 unsigned size(string_dictionary &sd) {
141 return sd.ns - 1;
142 }