diff oldclasslib/source/strdict.cpp @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.
author David A. Holland
date Sat, 22 Dec 2007 17:52:45 -0500
parents
children 607e3be6bad8
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/oldclasslib/source/strdict.cpp	Sat Dec 22 17:52:45 2007 -0500
@@ -0,0 +1,142 @@
+/*
+ * AnaGram, a System for Syntax Directed Parsing
+ * String Dictionary Class Definition
+ *
+ * Copyright 1993 Parsifal Software. All Rights Reserved.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+#include "strdict.h"
+#include <assert.h>
+#include <string.h>
+
+
+// Constructor
+
+string_dictionary::string_dictionary(unsigned size, unsigned wl) {
+  assert((size << 1) > size);
+  assert((long) size * wl < 65536L);
+
+  sxs = new unsigned[size];                // allocate index storage
+  csxmax = wl*size;                        // calculate char storage rqmt
+  cs = new char[csxmax];                   // allocate character storage
+  csx = 1;                                 // can't use byte zero
+  for (htl = 1; htl < size; htl <<= 1);    // calculate size of hash table
+  htmask = htl - 1;                        // htl is a power of two
+  ht = new unsigned[htl];                  // allocate storage for hash table
+  memset(ht, 0, htl*sizeof(unsigned));     // init to zeros
+  nsmax = size;
+  ns = 1;                                  // can't use zero
+  n_calls = n_collisions = 0;              // clear statistics
+  copy_flag = 0;                           // live
+}
+
+string_dictionary::string_dictionary(string_dictionary &s) {
+  *this = s;
+  copy_flag++;                             //memorex
+}
+
+
+// Reset Dictionary
+
+string_dictionary &reset(string_dictionary &d) {
+  assert(d.copy_flag == 0);
+  d.csx = 1;                               // can't use byte zero
+  memset(d.ht, 0, d.htl*sizeof(unsigned)); // init to zeros
+  d.ns = 1;                                // can't use zero
+  d.n_calls = d.n_collisions = 0;          // reset statistics
+  return d;
+}
+
+
+// Destructor
+
+string_dictionary::~string_dictionary() {
+  if (copy_flag) return;
+  delete [] sxs;
+  delete [] cs;
+  delete [] ht;
+}
+
+
+// Hash Utility
+
+unsigned string_dictionary::hash(const char *s) {
+  unsigned char k1 = 1, k2 = 1;
+  unsigned char c;
+  unsigned hx;                             // hash index
+  unsigned sx;                             // string index
+  int k = 0;
+  while((c = s[k++]) != 0) {               // accumulate hash code elements
+    if ((k1 += c) < c)   k1++;
+    if ((k2 += k1) < k1) k2++;
+  }
+  k1--;
+  k2--;
+  hx = (255*k1 + k2) & htmask;             // calculate hash code
+  sx = ht[hx];
+  n_calls++;                               // count number of calls
+  if (sx && strcmp(s, &cs[sxs[sx]])) {
+    unsigned dx = (2*(255*k2 + k1) + 1) & htmask;
+    do {          // no cycles, since dx and htl are relatively prime
+      n_collisions++;                      // count collisions
+      sx = ht[hx = htmask & (hx+dx)];
+    } while (sx && strcmp(s, &cs[sxs[sx]]));
+  }
+  return hx;
+}
+
+
+// Recover String
+
+unsigned string_dictionary::operator [] (const char *s) {
+  return ht[hash(s)];
+}
+
+
+// Enter String in Dictionary
+
+unsigned string_dictionary::operator << (const char *s) {
+  unsigned hx = hash(s);                // get hash code
+  unsigned k;
+  if (ht[hx]) return ht[hx];            // return index if already there
+  assert(copy_flag == 0);
+  assert (ns < nsmax);                  // make sure table's not full
+  if (s) k = strlen(s) + 1;             // get string length
+  else k = 1;
+  assert(csx + k < csxmax);             // make sure there's room
+  strcpy(&cs[csx],s);                   // save string
+  sxs[ns] = csx;                        // save string index
+  csx += k;                             // advance storage index
+  return ht[hx] = ns++;                 // return index
+}
+
+
+// Look up String in Dictionary
+
+char *string_dictionary::operator [] (unsigned sx) {
+  if (sx && sx <= ns) return &cs[sxs[sx]];
+  return 0;
+}
+
+
+// Find Number of Entries in Dictionary
+
+unsigned size(string_dictionary &sd) {
+  return sd.ns - 1;
+}