view oldclasslib/source/strdict.cpp @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.
author David A. Holland
date Sat, 22 Dec 2007 17:52:45 -0500
parents
children 607e3be6bad8
line wrap: on
line source

/*
 * AnaGram, a System for Syntax Directed Parsing
 * String Dictionary Class Definition
 *
 * Copyright 1993 Parsifal Software. All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */

#include "strdict.h"
#include <assert.h>
#include <string.h>


// Constructor

string_dictionary::string_dictionary(unsigned size, unsigned wl) {
  assert((size << 1) > size);
  assert((long) size * wl < 65536L);

  sxs = new unsigned[size];                // allocate index storage
  csxmax = wl*size;                        // calculate char storage rqmt
  cs = new char[csxmax];                   // allocate character storage
  csx = 1;                                 // can't use byte zero
  for (htl = 1; htl < size; htl <<= 1);    // calculate size of hash table
  htmask = htl - 1;                        // htl is a power of two
  ht = new unsigned[htl];                  // allocate storage for hash table
  memset(ht, 0, htl*sizeof(unsigned));     // init to zeros
  nsmax = size;
  ns = 1;                                  // can't use zero
  n_calls = n_collisions = 0;              // clear statistics
  copy_flag = 0;                           // live
}

string_dictionary::string_dictionary(string_dictionary &s) {
  *this = s;
  copy_flag++;                             //memorex
}


// Reset Dictionary

string_dictionary &reset(string_dictionary &d) {
  assert(d.copy_flag == 0);
  d.csx = 1;                               // can't use byte zero
  memset(d.ht, 0, d.htl*sizeof(unsigned)); // init to zeros
  d.ns = 1;                                // can't use zero
  d.n_calls = d.n_collisions = 0;          // reset statistics
  return d;
}


// Destructor

string_dictionary::~string_dictionary() {
  if (copy_flag) return;
  delete [] sxs;
  delete [] cs;
  delete [] ht;
}


// Hash Utility

unsigned string_dictionary::hash(const char *s) {
  unsigned char k1 = 1, k2 = 1;
  unsigned char c;
  unsigned hx;                             // hash index
  unsigned sx;                             // string index
  int k = 0;
  while((c = s[k++]) != 0) {               // accumulate hash code elements
    if ((k1 += c) < c)   k1++;
    if ((k2 += k1) < k1) k2++;
  }
  k1--;
  k2--;
  hx = (255*k1 + k2) & htmask;             // calculate hash code
  sx = ht[hx];
  n_calls++;                               // count number of calls
  if (sx && strcmp(s, &cs[sxs[sx]])) {
    unsigned dx = (2*(255*k2 + k1) + 1) & htmask;
    do {          // no cycles, since dx and htl are relatively prime
      n_collisions++;                      // count collisions
      sx = ht[hx = htmask & (hx+dx)];
    } while (sx && strcmp(s, &cs[sxs[sx]]));
  }
  return hx;
}


// Recover String

unsigned string_dictionary::operator [] (const char *s) {
  return ht[hash(s)];
}


// Enter String in Dictionary

unsigned string_dictionary::operator << (const char *s) {
  unsigned hx = hash(s);                // get hash code
  unsigned k;
  if (ht[hx]) return ht[hx];            // return index if already there
  assert(copy_flag == 0);
  assert (ns < nsmax);                  // make sure table's not full
  if (s) k = strlen(s) + 1;             // get string length
  else k = 1;
  assert(csx + k < csxmax);             // make sure there's room
  strcpy(&cs[csx],s);                   // save string
  sxs[ns] = csx;                        // save string index
  csx += k;                             // advance storage index
  return ht[hx] = ns++;                 // return index
}


// Look up String in Dictionary

char *string_dictionary::operator [] (unsigned sx) {
  if (sx && sx <= ns) return &cs[sxs[sx]];
  return 0;
}


// Find Number of Entries in Dictionary

unsigned size(string_dictionary &sd) {
  return sd.ns - 1;
}