Logo Search packages:      
Sourcecode: hammerhead version File versions  Download package

dictionary.h

//
// $Id: dictionary.h,v 1.3 2003/03/03 23:18:48 dredd Exp $
//
// $Source: /cvsroot/hammerhead/hammerhead/src/dictionary.h,v $
// $Revision: 1.3 $
// $Date: 2003/03/03 23:18:48 $
// $State: Exp $
//
// Generic Dictionary Class
//
// Author: Geoff Wong
// Note: Generic Zikzak library code.
//
// NOTES: 
//   should extend to hash generic keys (need function to
//   extract key from elements being stored
//
//   * should do something other than return '0' on a failed find
//     (need a special exception value)
//
//   Should fix copy constructor so Dict<Blah> X = Y; works nicely.
//   Should fix copy constructor so Dict<Blah>& X = Y; works nicely.
//
//   Use (example):
//   Dictionary<string *> dict;
//   dict["test"] = new string("fud");
//   x = dict["test"];
//
//   Uses a hash table internally for O(1) lookups.
//   Additions are O(1)/O(n) - the table initially starts
//   off with 317 elements but automagically resizes when it is 70% full
//

#ifndef DICTIONARY_H
#define DICTIONARY_H

// #include <defalloc.h>
#include <assert.h>
#include <stdio.h>
// #include "istring.h"

#include <string>
using namespace std;

template <class T> class Dictionary;
template <class T> class dict_iterator;

template <class T>
bool operator==(const dict_iterator<T>& it1,
               const dict_iterator<T>& it2)
{
    if ((it1.where == it2.where)
        && (it1.ihash == it2.ihash)) return true;
    return false;
}

template <class T>
bool operator!=(const dict_iterator<T>& it1,
               const dict_iterator<T>& it2)
{
    return !(it1 == it2);
}

template <class T>
class dict_iterator 
{
public:
    Dictionary<T> * ihash;
    unsigned int where;

    dict_iterator()
    {
        ihash = NULL;
        where = 0;
    }

    dict_iterator(Dictionary<T> * t,  int w)
    {
        ihash = t;
        where = w;
    }

    dict_iterator& operator++(int)
    {
        if (ihash == NULL) return *this;

            do {
                where++;
            } while (where < ihash->hsize &&
                     ((ihash->hashtable[where].used == false)
                      || (ihash->hashtable[where].deleted == true)));

            //                                if (where == ihash->hsize) where = 0;
            return *this;
    }

    T& element()
    {
        assert(ihash != NULL);
        //                        if (ihash == NULL) return NULL;
        return ihash->hashtable[where].element;
    }

    T& operator*()
    {
        return element();
    }
                
    const char * key()
    {
        if (ihash == NULL) return "";
        return ihash->hashtable[where].key.c_str();
    }

    inline bool operator==(const dict_iterator &it1)
    {
        if ((it1.where == where) 
                && (it1.ihash == ihash)) return true;
            return false;
    }

    // It'd be nice if this worked - we wouldn't need
    // everything to be public ...
    //friend bool operator== <T> (const dict_iterator<T>& it1, 
    //                const dict_iterator<T>& it2);
};

template <class T>
class Dictionary
{
    unsigned int gap;

    int cacheIndex;
    // the index of the last bucket accessed.

protected:
    //        int last_full;        // last full bucket

public:

    class Bucket 
    {
    public:
        string key;
        T element;
        bool used;
        bool deleted;
    };

    int deleted;
    Bucket * hashtable;
    unsigned int hsize;
    int elements;

    int find(const char * key)
        // returns the actual index in the hash table - internal use only!
        // returns -1 on failure to find
    {
        unsigned int h;

        if (key == NULL)
            return 0;
        h = hash(key);

        while (gap < hsize * 3) 
        {
            if ((hashtable[h].used == true) && 
                (hashtable[h].deleted == false) &&
                (hashtable[h].key == key))
            {
                /* function found */
                assert(h < hsize);
                cacheIndex = h;
                return h;
            }
            if (hashtable[h].used==false && hashtable[h].deleted==false) 
            {
                return -1;
            }
            h = (h + gap) % hsize;
        }
        return -1;
    }

    int add_element(const char * key, T ele)
    {
        int h;

        if (hashtable == NULL) return -1;
        if (key == NULL) return -1;

        if ((h = find(key)) != -1)
        {
            // no duplicate keys - don't add just return the index
            return h;
        }

        /* check to increase size here! */
        if (((elements + deleted) * 1000 / hsize) > 701)         
        {
            // if this happens we're out of memory probably!
            if (upsize() == false) return -1;        
        }

        h = hash(key);
        while ((hashtable[h]).used == true)
        {
            h = (h+gap) % hsize;
        }
        hashtable[h].key = key;
        hashtable[h].used = true;
        if (hashtable[h].deleted == true)
            deleted--;
        hashtable[h].deleted = false;
        hashtable[h].element = ele;
        elements++;

        assert((unsigned int)h < hsize);

        return h;        
    }


    typedef dict_iterator<T> iterator;

    const iterator nulliterator;

    // NAME: hash(str)
    // NOTES: computes a hash value from the first 8 chars of 
    //            a null terminated C string.
    int hash(const char * str)
    {
        int i = 0; 
        int h = 0; 
        while (str[i] && i < 8) {
            h = (h << 2) + str[i];
            i++;
        }
        //        fprintf(stderr, "hash(%s) = %d\n", str, h % hsize);
        return h % hsize;
    }

    iterator begin()
    {
        // find first full bucket and return
        unsigned int i = 0;
        if (elements == 0)
            return nulliterator;
        while (i < hsize) 
        {
            if (hashtable[i].used == true && 
                hashtable[i].deleted == false) 
            {
                return iterator(this, i);
            }
            i++;
        }
        return nulliterator;
    }

    iterator end()
    {
        // remember last full bucket and return
        if (elements == 0)
            return nulliterator;
        return iterator(this, hsize);
    }

    bool upsize()
    {
        // fprintf(stderr, "Upsizing: %d -> ", hsize);
        unsigned int hash_primes[] = {
            37, 93, 193, 317, 701,
            1279, 2557, 5119, 10223, 16001, 23001, 40939,  
            81919, 163819, 371001, 707001, 29000039
        };
        // more primes: 11, 19, 37, 79, 157, 317, 431, 631, 20479, 

        unsigned int old_size;
        unsigned int i;
        Bucket * old_table;
                        
        for (i = 0; i < sizeof(hash_primes); i++) 
        {
            if (hash_primes[i] > hsize) break;
        }

        if (i > sizeof(hash_primes)) 
        {        
            /* no more primes :-( */
            /* btw: that'd be a HUGE error I expect */
            // fprintf(stderr, "No more hash table space\n");
            return false;
        }
        old_table = hashtable;
        old_size = hsize;
        gap = old_size;

        hsize = hash_primes[i];
        // fprintf(stderr, "%d\n", hsize);

        hashtable = new Bucket[hsize];
#if 1
        // initialisation
        for (i = 0; i < hsize; i++) 
        {
            hashtable[i].used = false;
            hashtable[i].deleted = false;
            hashtable[i].key = "";
            hashtable[i].element = T();
        }
#endif
        elements = 0;
        deleted = 0;

        /* ugly time - go through each element an add to the new table */
        /* how can we do this when we don't know what the "keys" are? */
        for (i = 0; i < old_size; i++) 
        {
            if (old_table[i].used == true && old_table[i].deleted == false) 
            {
                add_element(old_table[i].key.c_str(), old_table[i].element);
            }
            old_table[i].key = "";
        }
        delete [] old_table;
        return true;
    }


    bool remove(const char * key)
    {
        int h;
        if (hashtable == NULL) return false;

        h = hash(key);

        while (gap < hsize * 3) 
        {
            if ((hashtable[h].used == true) &&
                (hashtable[h].deleted == false) &&
                (hashtable[h].key == key))
            {
                hashtable[h].key = "";
                hashtable[h].deleted = true;
                hashtable[h].used = false;
                hashtable[h].element = T();
                elements--;
                deleted++;
                return true;
            }
            if (hashtable[h].used==false && hashtable[h].deleted==false) 
            {
                return false;
            }
            h = (h + gap) % hsize;
        }
        return false;
    }

#if 0
    bool remove(string &key)
    {
        return remove(key.c_str());
    }
#endif

    void clear_all_elements()
    {
        for (unsigned int i = 0; i < hsize; i++) 
        {
#if 0
            if (hashtable[i].used == true && hashtable[i].deleted == false) 
            {
                hashtable[i].element = T();
                // destroy(&(hashtable[i].element));
                // delete hashtable[i].element;
            }
#endif
            hashtable[i].element = T();
            hashtable[i].used = false;
            hashtable[i].deleted = false;
            hashtable[i].key = "";
        }
        elements = 0;
        deleted = 0;
    }

    bool exists(const char * key)
    {
        if (find(key) == -1) 
            return false;
        return true;
    }
 
    T * element_array()
    {
        T * ret;
        unsigned int i, count = 0;
 
        if (elements == 0) return NULL;
 
        ret = new T[elements];
        for (i = 0; i < hsize; i++)
        {
            if (hashtable[i].used == true &&
                hashtable[i].deleted == false)
            {
                ret[count++] = hashtable[i].element;
            }  
        }
        return ret;
    }

    T& operator[](const char * str)
    {
        if (cacheIndex != -1 && hashtable[cacheIndex].key == str)
        {
            return hashtable[cacheIndex].element;
        }

        int h;
                
        // add it (with NULL value - risk with integers)
        h = add_element(str, T());
        assert(h != -1);
        assert((unsigned int)h < hsize);
        cacheIndex = h;

        return hashtable[h].element;
    }

    int size() { return hsize; }
    int num_elements() { return elements; }

    Dictionary()
    {
        cacheIndex = -1;
        gap = 157;
        hsize = 317;
        hashtable = new Bucket[hsize];
#if 1
        for (unsigned int i = 0; i < hsize; i++) 
        {
            hashtable[i].used = false;
            hashtable[i].deleted = false;
            hashtable[i].key = "";
            hashtable[i].element = T();
        }
#endif
        elements = 0;
        deleted = 0;
    }

    Dictionary(int size)
    {
        hsize = size;                // should make sure it's the next largest prime
        hashtable = new Bucket[hsize];
#if 1
        for (unsigned int i = 0; i < hsize; i++) 
        {
            hashtable[i].used = false;
            hashtable[i].deleted = false;
            hashtable[i].key = "";
            hashtable[i].element = T();
        }
#endif
        elements = 0;
        deleted = 0;
    }

    ~Dictionary()
    {
        // clear_all_elements();
        delete [] hashtable;
    }

};

#endif

Generated by  Doxygen 1.6.0   Back to index