• Skip to content
  • Skip to link menu
KDE 4.4 API Reference
  • KDE API Reference
  • KDevelop Platform Libraries
  • Sitemap
  • Contact Us
 

util

densehashtable.h

00001 // Copyright (c) 2005, Google Inc.
00002 // All rights reserved.
00003 //
00004 // Redistribution and use in source and binary forms, with or without
00005 // modification, are permitted provided that the following conditions are
00006 // met:
00007 //
00008 //     * Redistributions of source code must retain the above copyright
00009 // notice, this list of conditions and the following disclaimer.
00010 //     * Redistributions in binary form must reproduce the above
00011 // copyright notice, this list of conditions and the following disclaimer
00012 // in the documentation and/or other materials provided with the
00013 // distribution.
00014 //     * Neither the name of Google Inc. nor the names of its
00015 // contributors may be used to endorse or promote products derived from
00016 // this software without specific prior written permission.
00017 //
00018 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00019 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00020 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00021 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00022 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00023 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00024 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00025 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00026 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00027 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00028 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029 
00030 // ---
00031 // Author: Craig Silverstein
00032 //
00033 // A dense hashtable is a particular implementation of
00034 // a hashtable: one that is meant to minimize memory allocation.
00035 // It does this by using an array to store all the data.  We
00036 // steal a value from the key space to indicate "empty" array
00037 // elements (ie indices where no item lives) and another to indicate
00038 // "deleted" elements.
00039 //
00040 // (Note it is possible to change the value of the delete key
00041 // on the fly; you can even remove it, though after that point
00042 // the hashtable is insert_only until you set it again.  The empty
00043 // value however can't be changed.)
00044 //
00045 // To minimize allocation and pointer overhead, we use internal
00046 // probing, in which the hashtable is a single table, and collisions
00047 // are resolved by trying to insert again in another bucket.  The
00048 // most cache-efficient internal probing schemes are linear probing
00049 // (which suffers, alas, from clumping) and quadratic probing, which
00050 // is what we implement by default.
00051 //
00052 // Type requirements: value_type is required to be Copy Constructible
00053 // and Default Constructible. It is not required to be (and commonly
00054 // isn't) Assignable.
00055 //
00056 // You probably shouldn't use this code directly.  Use
00057 // <google/dense_hash_map> or <google/dense_hash_set> instead.
00058 
00059 // You can change the following below:
00060 // HT_OCCUPANCY_FLT      -- how full before we double size
00061 // HT_EMPTY_FLT          -- how empty before we halve size
00062 // HT_MIN_BUCKETS        -- default smallest bucket size
00063 //
00064 // How to decide what values to use?
00065 // HT_EMPTY_FLT's default of .4 * OCCUPANCY_FLT, is probably good.
00066 // HT_MIN_BUCKETS is probably unnecessary since you can specify
00067 // (indirectly) the starting number of buckets at construct-time.
00068 // For HT_OCCUPANCY_FLT, you can use this chart to try to trade-off
00069 // expected lookup time to the space taken up.  By default, this
00070 // code uses quadratic probing, though you can change it to linear
00071 // via _JUMP below if you really want to.
00072 //
00073 // From http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html
00074 // NUMBER OF PROBES / LOOKUP       Successful            Unsuccessful
00075 // Quadratic collision resolution   1 - ln(1-L) - L/2    1/(1-L) - L - ln(1-L)
00076 // Linear collision resolution     [1+1/(1-L)]/2         [1+1/(1-L)2]/2
00077 //
00078 // -- HT_OCCUPANCY_FLT --         0.10  0.50  0.60  0.75  0.80  0.90  0.99
00079 // QUADRATIC COLLISION RES.
00080 //    probes/successful lookup    1.05  1.44  1.62  2.01  2.21  2.85  5.11
00081 //    probes/unsuccessful lookup  1.11  2.19  2.82  4.64  5.81  11.4  103.6
00082 // LINEAR COLLISION RES.
00083 //    probes/successful lookup    1.06  1.5   1.75  2.5   3.0   5.5   50.5
00084 //    probes/unsuccessful lookup  1.12  2.5   3.6   8.5   13.0  50.0  5000.0
00085 
00086 #ifndef _DENSEHASHTABLE_H_
00087 #define _DENSEHASHTABLE_H_
00088 
00089 // The probing method
00090 // Linear probing
00091 // #define JUMP_(key, num_probes)    ( 1 )
00092 // Quadratic-ish probing
00093 #define JUMP_(key, num_probes)    ( num_probes )
00094 
00095 
00096 // Hashtable class, used to implement the hashed associative containers
00097 // hash_set and hash_map.
00098 
00099 #if defined(Q_OS_WIN)
00100 #include "sparseconfig_windows.h"
00101 #else
00102 #include "sparseconfig.h"
00103 #endif
00104 #include <assert.h>
00105 #include <stdlib.h>             // for abort()
00106 #include <algorithm>            // For swap(), eg
00107 #include <iostream>             // For cerr
00108 #include <memory>               // For uninitialized_fill, uninitialized_copy
00109 #include <utility>              // for pair<>
00110 #include <iterator>             // for facts about iterator tags
00111 #include <util/google/type_traits.h> // for true_type, integral_constant, etc.
00112 
00113 _START_GOOGLE_NAMESPACE_
00114 
00115 using STL_NAMESPACE::pair;
00116 
00117 template <class Value, class Key, class HashFcn,
00118           class ExtractKey, class EqualKey, class Alloc>
00119 class dense_hashtable;
00120 
00121 template <class V, class K, class HF, class ExK, class EqK, class A>
00122 struct dense_hashtable_iterator;
00123 
00124 template <class V, class K, class HF, class ExK, class EqK, class A>
00125 struct dense_hashtable_const_iterator;
00126 
00127 // We're just an array, but we need to skip over empty and deleted elements
00128 template <class V, class K, class HF, class ExK, class EqK, class A>
00129 struct dense_hashtable_iterator {
00130  public:
00131   typedef dense_hashtable_iterator<V,K,HF,ExK,EqK,A>       iterator;
00132   typedef dense_hashtable_const_iterator<V,K,HF,ExK,EqK,A> const_iterator;
00133 
00134   typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
00135   typedef V value_type;
00136   typedef ptrdiff_t difference_type;
00137   typedef size_t size_type;
00138   typedef V& reference;                // Value
00139   typedef V* pointer;
00140 
00141   // "Real" constructor and default constructor
00142   dense_hashtable_iterator(const dense_hashtable<V,K,HF,ExK,EqK,A> *h,
00143                            pointer it, pointer it_end, bool advance)
00144     : ht(h), pos(it), end(it_end)   {
00145     if (advance)  advance_past_empty_and_deleted();
00146   }
00147   dense_hashtable_iterator() { }
00148   // The default destructor is fine; we don't define one
00149   // The default operator= is fine; we don't define one
00150 
00151   // Happy dereferencer
00152   reference operator*() const { return *pos; }
00153   pointer operator->() const { return &(operator*()); }
00154 
00155   // Arithmetic.  The only hard part is making sure that
00156   // we're not on an empty or marked-deleted array element
00157   void advance_past_empty_and_deleted() {
00158     while ( pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)) )
00159       ++pos;
00160   }
00161   iterator& operator++()   {
00162     assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this;
00163   }
00164   iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
00165 
00166   // Comparison.
00167   bool operator==(const iterator& it) const { return pos == it.pos; }
00168   bool operator!=(const iterator& it) const { return pos != it.pos; }
00169 
00170 
00171   // The actual data
00172   const dense_hashtable<V,K,HF,ExK,EqK,A> *ht;
00173   pointer pos, end;
00174 };
00175 
00176 
00177 // Now do it all again, but with const-ness!
00178 template <class V, class K, class HF, class ExK, class EqK, class A>
00179 struct dense_hashtable_const_iterator {
00180  public:
00181   typedef dense_hashtable_iterator<V,K,HF,ExK,EqK,A>       iterator;
00182   typedef dense_hashtable_const_iterator<V,K,HF,ExK,EqK,A> const_iterator;
00183 
00184   typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
00185   typedef V value_type;
00186   typedef ptrdiff_t difference_type;
00187   typedef size_t size_type;
00188   typedef const V& reference;                // Value
00189   typedef const V* pointer;
00190 
00191   // "Real" constructor and default constructor
00192   dense_hashtable_const_iterator(const dense_hashtable<V,K,HF,ExK,EqK,A> *h,
00193                                  pointer it, pointer it_end, bool advance)
00194     : ht(h), pos(it), end(it_end)   {
00195     if (advance)  advance_past_empty_and_deleted();
00196   }
00197   dense_hashtable_const_iterator() { }
00198   // This lets us convert regular iterators to const iterators
00199   dense_hashtable_const_iterator(const iterator &it)
00200     : ht(it.ht), pos(it.pos), end(it.end) { }
00201   // The default destructor is fine; we don't define one
00202   // The default operator= is fine; we don't define one
00203 
00204   // Happy dereferencer
00205   reference operator*() const { return *pos; }
00206   pointer operator->() const { return &(operator*()); }
00207 
00208   // Arithmetic.  The only hard part is making sure that
00209   // we're not on an empty or marked-deleted array element
00210   void advance_past_empty_and_deleted() {
00211     while ( pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)) )
00212       ++pos;
00213   }
00214   const_iterator& operator++()   {
00215     assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this;
00216   }
00217   const_iterator operator++(int) { const_iterator tmp(*this); ++*this; return tmp; }
00218 
00219   // Comparison.
00220   bool operator==(const const_iterator& it) const { return pos == it.pos; }
00221   bool operator!=(const const_iterator& it) const { return pos != it.pos; }
00222 
00223 
00224   // The actual data
00225   const dense_hashtable<V,K,HF,ExK,EqK,A> *ht;
00226   pointer pos, end;
00227 };
00228 
00229 template <class Value, class Key, class HashFcn,
00230           class ExtractKey, class EqualKey, class Alloc>
00231 class dense_hashtable {
00232  public:
00233   typedef Key key_type;
00234   typedef Value value_type;
00235   typedef HashFcn hasher;
00236   typedef EqualKey key_equal;
00237 
00238   typedef size_t            size_type;
00239   typedef ptrdiff_t         difference_type;
00240   typedef value_type*       pointer;
00241   typedef const value_type* const_pointer;
00242   typedef value_type&       reference;
00243   typedef const value_type& const_reference;
00244   typedef dense_hashtable_iterator<Value, Key, HashFcn,
00245                                    ExtractKey, EqualKey, Alloc>
00246   iterator;
00247 
00248   typedef dense_hashtable_const_iterator<Value, Key, HashFcn,
00249                                           ExtractKey, EqualKey, Alloc>
00250   const_iterator;
00251 
00252   // How full we let the table get before we resize.  Knuth says .8 is
00253   // good -- higher causes us to probe too much, though saves memory
00254   static const float HT_OCCUPANCY_FLT; // = 0.8;
00255 
00256   // How empty we let the table get before we resize lower.
00257   // (0.0 means never resize lower.)
00258   // It should be less than OCCUPANCY_FLT / 2 or we thrash resizing
00259   static const float HT_EMPTY_FLT; // = 0.4 * HT_OCCUPANCY_FLT
00260 
00261   // Minimum size we're willing to let hashtables be.
00262   // Must be a power of two, and at least 4.
00263   // Note, however, that for a given hashtable, the initial size is a
00264   // function of the first constructor arg, and may be >HT_MIN_BUCKETS.
00265   static const size_t HT_MIN_BUCKETS  = 32;
00266 
00267 
00268   // ITERATOR FUNCTIONS
00269   iterator begin()             { return iterator(this, table,
00270                                                  table + num_buckets, true); }
00271   iterator end()               { return iterator(this, table + num_buckets,
00272                                                  table + num_buckets, true); }
00273   const_iterator begin() const { return const_iterator(this, table,
00274                                                        table+num_buckets,true);}
00275   const_iterator end() const   { return const_iterator(this, table + num_buckets,
00276                                                        table+num_buckets,true);}
00277 
00278   // ACCESSOR FUNCTIONS for the things we templatize on, basically
00279   hasher hash_funct() const { return hash; }
00280   key_equal key_eq() const  { return equals; }
00281 
00282   // Annoyingly, we can't copy values around, because they might have
00283   // const components (they're probably pair<const X, Y>).  We use
00284   // explicit destructor invocation and placement new to get around
00285   // this.  Arg.
00286  private:
00287   void set_value(value_type* dst, const value_type& src) {
00288     dst->~value_type();
00289     new(dst) value_type(src);
00290   }
00291 
00292   void destroy_buckets(size_type first, size_type last) {
00293     for ( ; first != last; ++first)
00294       table[first].~value_type();
00295   }
00296 
00297   // DELETE HELPER FUNCTIONS
00298   // This lets the user describe a key that will indicate deleted
00299   // table entries.  This key should be an "impossible" entry --
00300   // if you try to insert it for real, you won't be able to retrieve it!
00301   // (NB: while you pass in an entire value, only the key part is looked
00302   // at.  This is just because I don't know how to assign just a key.)
00303  private:
00304   void squash_deleted() {           // gets rid of any deleted entries we have
00305     if ( num_deleted ) {            // get rid of deleted before writing
00306       dense_hashtable tmp(*this);   // copying will get rid of deleted
00307       swap(tmp);                    // now we are tmp
00308     }
00309     assert(num_deleted == 0);
00310   }
00311 
00312  public:
00313   void set_deleted_key(const value_type &val) {
00314     // the empty indicator (if specified) and the deleted indicator
00315     // must be different
00316     assert(!use_empty || !equals(get_key(val), get_key(emptyval)));
00317     // It's only safe to change what "deleted" means if we purge deleted guys
00318     squash_deleted();
00319     use_deleted = true;
00320     set_value(&delval, val);
00321   }
00322   void clear_deleted_key() {
00323     squash_deleted();
00324     use_deleted = false;
00325   }
00326 
00327   // These are public so the iterators can use them
00328   // True if the item at position bucknum is "deleted" marker
00329   bool test_deleted(size_type bucknum) const {
00330     // The num_deleted test is crucial for read(): after read(), the ht values
00331     // are garbage, and we don't want to think some of them are deleted.
00332     return (use_deleted && num_deleted > 0 &&
00333             equals(get_key(delval), get_key(table[bucknum])));
00334   }
00335   bool test_deleted(const iterator &it) const {
00336     return (use_deleted && num_deleted > 0 &&
00337             equals(get_key(delval), get_key(*it)));
00338   }
00339   bool test_deleted(const const_iterator &it) const {
00340     return (use_deleted && num_deleted > 0 &&
00341             equals(get_key(delval), get_key(*it)));
00342   }
00343   // Set it so test_deleted is true.  true if object didn't used to be deleted
00344   // See below (at erase()) to explain why we allow const_iterators
00345   bool set_deleted(const_iterator &it) {
00346     assert(use_deleted);             // bad if set_deleted_key() wasn't called
00347     bool retval = !test_deleted(it);
00348     // &* converts from iterator to value-type
00349     set_value(const_cast<value_type*>(&(*it)), delval);
00350     return retval;
00351   }
00352   // Set it so test_deleted is false.  true if object used to be deleted
00353   bool clear_deleted(const_iterator &it) {
00354     assert(use_deleted);             // bad if set_deleted_key() wasn't called
00355     // happens automatically when we assign something else in its place
00356     return test_deleted(it);
00357   }
00358 
00359   // EMPTY HELPER FUNCTIONS
00360   // This lets the user describe a key that will indicate empty (unused)
00361   // table entries.  This key should be an "impossible" entry --
00362   // if you try to insert it for real, you won't be able to retrieve it!
00363   // (NB: while you pass in an entire value, only the key part is looked
00364   // at.  This is just because I don't know how to assign just a key.)
00365  public:
00366   // These are public so the iterators can use them
00367   // True if the item at position bucknum is "empty" marker
00368   bool test_empty(size_type bucknum) const {
00369     assert(use_empty);              // we always need to know what's empty!
00370     return equals(get_key(emptyval), get_key(table[bucknum]));
00371   }
00372   bool test_empty(const iterator &it) const {
00373     assert(use_empty);              // we always need to know what's empty!
00374     return equals(get_key(emptyval), get_key(*it));
00375   }
00376   bool test_empty(const const_iterator &it) const {
00377     assert(use_empty);              // we always need to know what's empty!
00378     return equals(get_key(emptyval), get_key(*it));
00379   }
00380 
00381  private:
00382   // You can either set a range empty or an individual element
00383   void set_empty(size_type bucknum) {
00384     assert(use_empty);
00385     set_value(&table[bucknum], emptyval);
00386   }
00387   void fill_range_with_empty(value_type* table_start, value_type* table_end) {
00388     // Like set_empty(range), but doesn't destroy previous contents
00389     STL_NAMESPACE::uninitialized_fill(table_start, table_end, emptyval);
00390   }
00391   void set_empty(size_type buckstart, size_type buckend) {
00392     assert(use_empty);
00393     destroy_buckets(buckstart, buckend);
00394     fill_range_with_empty(table + buckstart, table + buckend);
00395   }
00396 
00397  public:
00398   // TODO(csilvers): change all callers of this to pass in a key instead,
00399   //                 and take a const key_type instead of const value_type.
00400   void set_empty_key(const value_type &val) {
00401     // Once you set the empty key, you can't change it
00402     assert(!use_empty);
00403     // The deleted indicator (if specified) and the empty indicator
00404     // must be different.
00405     assert(!use_deleted || !equals(get_key(val), get_key(delval)));
00406     use_empty = true;
00407     set_value(&emptyval, val);
00408 
00409     assert(!table);                  // must set before first use
00410     // num_buckets was set in constructor even though table was NULL
00411     table = (value_type *) malloc(num_buckets * sizeof(*table));
00412     assert(table);
00413     fill_range_with_empty(table, table + num_buckets);
00414   }
00415 
00416   // FUNCTIONS CONCERNING SIZE
00417  public:
00418   size_type size() const      { return num_elements - num_deleted; }
00419   // Buckets are always a power of 2
00420   size_type max_size() const  { return (size_type(-1) >> 1U) + 1; }
00421   bool empty() const          { return size() == 0; }
00422   size_type bucket_count() const      { return num_buckets; }
00423   size_type max_bucket_count() const  { return max_size(); }
00424   size_type nonempty_bucket_count() const { return num_elements; }
00425 
00426  private:
00427   // Because of the above, size_type(-1) is never legal; use it for errors
00428   static const size_type ILLEGAL_BUCKET = size_type(-1);
00429 
00430  private:
00431   // This is the smallest size a hashtable can be without being too crowded
00432   // If you like, you can give a min #buckets as well as a min #elts
00433   size_type min_size(size_type num_elts, size_type min_buckets_wanted) {
00434     size_type sz = HT_MIN_BUCKETS;             // min buckets allowed
00435     while ( sz < min_buckets_wanted || num_elts >= sz * HT_OCCUPANCY_FLT )
00436       sz *= 2;
00437     return sz;
00438   }
00439 
00440   // Used after a string of deletes
00441   void maybe_shrink() {
00442     assert(num_elements >= num_deleted);
00443     assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two
00444     assert(bucket_count() >= HT_MIN_BUCKETS);
00445 
00446     if ( (num_elements-num_deleted) < shrink_threshold &&
00447          bucket_count() > HT_MIN_BUCKETS ) {
00448       size_type sz = bucket_count() / 2;    // find how much we should shrink
00449       while ( sz > HT_MIN_BUCKETS &&
00450               (num_elements - num_deleted) < sz * HT_EMPTY_FLT )
00451         sz /= 2;                            // stay a power of 2
00452       dense_hashtable tmp(*this, sz);       // Do the actual resizing
00453       swap(tmp);                            // now we are tmp
00454     }
00455     consider_shrink = false;                // because we just considered it
00456   }
00457 
00458   // We'll let you resize a hashtable -- though this makes us copy all!
00459   // When you resize, you say, "make it big enough for this many more elements"
00460   void resize_delta(size_type delta, size_type min_buckets_wanted = 0) {
00461     if ( consider_shrink )                   // see if lots of deletes happened
00462       maybe_shrink();
00463     if ( bucket_count() > min_buckets_wanted &&
00464          (num_elements + delta) <= enlarge_threshold )
00465       return;                                // we're ok as we are
00466 
00467     // Sometimes, we need to resize just to get rid of all the
00468     // "deleted" buckets that are clogging up the hashtable.  So when
00469     // deciding whether to resize, count the deleted buckets (which
00470     // are currently taking up room).  But later, when we decide what
00471     // size to resize to, *don't* count deleted buckets, since they
00472     // get discarded during the resize.
00473     const size_type needed_size = min_size(num_elements + delta,
00474                                            min_buckets_wanted);
00475     if ( needed_size > bucket_count() ) {      // we don't have enough buckets
00476       const size_type resize_to = min_size(num_elements - num_deleted + delta,
00477                                            min_buckets_wanted);
00478       dense_hashtable tmp(*this, resize_to);
00479       swap(tmp);                             // now we are tmp
00480     }
00481   }
00482 
00483   // Increase number of buckets, assuming value_type has trivial copy
00484   // constructor and destructor.  (Really, we want it to have "trivial
00485   // move", because that's what realloc does.  But there's no way to
00486   // capture that using type_traits, so we pretend that move(x, y) is
00487   // equivalent to "x.~T(); new(x) T(y);" which is pretty much
00488   // correct, if a bit conservative.)
00489   void expand_array(size_t resize_to, true_type) {
00490     table = (value_type *) realloc(table, resize_to * sizeof(value_type));
00491     assert(table);
00492     fill_range_with_empty(table + num_buckets, table + resize_to);
00493   }
00494 
00495   // Increase number of buckets, without special assumptions about value_type.
00496   // TODO(austern): make this exception safe. Handle exceptions from
00497   // value_type's copy constructor.
00498   void expand_array(size_t resize_to, false_type) {
00499     value_type* new_table =
00500       (value_type *) malloc(resize_to * sizeof(value_type));
00501     assert(new_table);
00502     STL_NAMESPACE::uninitialized_copy(table, table + num_buckets, new_table);
00503     fill_range_with_empty(new_table + num_buckets, new_table + resize_to);
00504     destroy_buckets(0, num_buckets);
00505     free(table);
00506     table = new_table;
00507   }
00508 
00509   // Used to actually do the rehashing when we grow/shrink a hashtable
00510   void copy_from(const dense_hashtable &ht, size_type min_buckets_wanted = 0) {
00511     clear();            // clear table, set num_deleted to 0
00512 
00513     // If we need to change the size of our table, do it now
00514     const size_type resize_to = min_size(ht.size(), min_buckets_wanted);
00515     if ( resize_to > bucket_count() ) { // we don't have enough buckets
00516       typedef integral_constant<bool,
00517           (has_trivial_copy<value_type>::value &&
00518            has_trivial_destructor<value_type>::value)>
00519           realloc_ok; // we pretend mv(x,y) == "x.~T(); new(x) T(y)"
00520       expand_array(resize_to, realloc_ok());
00521       num_buckets = resize_to;
00522       reset_thresholds();
00523     }
00524 
00525     // We use a normal iterator to get non-deleted bcks from ht
00526     // We could use insert() here, but since we know there are
00527     // no duplicates and no deleted items, we can be more efficient
00528     assert((bucket_count() & (bucket_count()-1)) == 0);      // a power of two
00529     for ( const_iterator it = ht.begin(); it != ht.end(); ++it ) {
00530       size_type num_probes = 0;              // how many times we've probed
00531       size_type bucknum;
00532       const size_type bucket_count_minus_one = bucket_count() - 1;
00533       for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
00534            !test_empty(bucknum);                               // not empty
00535            bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
00536         ++num_probes;
00537         assert(num_probes < bucket_count()); // or else the hashtable is full
00538       }
00539       set_value(&table[bucknum], *it);       // copies the value to here
00540       num_elements++;
00541     }
00542   }
00543 
00544   // Required by the spec for hashed associative container
00545  public:
00546   // Though the docs say this should be num_buckets, I think it's much
00547   // more useful as req_elements.  As a special feature, calling with
00548   // req_elements==0 will cause us to shrink if we can, saving space.
00549   void resize(size_type req_elements) {       // resize to this or larger
00550     if ( consider_shrink || req_elements == 0 )
00551       maybe_shrink();
00552     if ( req_elements > num_elements )
00553       return resize_delta(req_elements - num_elements, 0);
00554   }
00555 
00556 
00557   // CONSTRUCTORS -- as required by the specs, we take a size,
00558   // but also let you specify a hashfunction, key comparator,
00559   // and key extractor.  We also define a copy constructor and =.
00560   // DESTRUCTOR -- needs to free the table
00561   explicit dense_hashtable(size_type n = 0,
00562                            const HashFcn& hf = HashFcn(),
00563                            const EqualKey& eql = EqualKey(),
00564                            const ExtractKey& ext = ExtractKey())
00565     : hash(hf), equals(eql), get_key(ext), num_deleted(0),
00566       use_deleted(false), use_empty(false),
00567       delval(), emptyval(),
00568       table(NULL), num_buckets(min_size(0, n)), num_elements(0) {
00569     // table is NULL until emptyval is set.  However, we set num_buckets
00570     // here so we know how much space to allocate once emptyval is set
00571     reset_thresholds();
00572   }
00573 
00574   // As a convenience for resize(), we allow an optional second argument
00575   // which lets you make this new hashtable a different size than ht
00576   dense_hashtable(const dense_hashtable& ht, size_type min_buckets_wanted = 0)
00577     : hash(ht.hash), equals(ht.equals), get_key(ht.get_key), num_deleted(0),
00578       use_deleted(ht.use_deleted), use_empty(ht.use_empty),
00579       delval(ht.delval), emptyval(ht.emptyval),
00580       table(NULL), num_buckets(0),
00581       num_elements(0) {
00582     reset_thresholds();
00583     copy_from(ht, min_buckets_wanted);   // copy_from() ignores deleted entries
00584   }
00585 
00586   dense_hashtable& operator= (const dense_hashtable& ht) {
00587     if (&ht == this)  return *this;        // don't copy onto ourselves
00588     clear();
00589     hash = ht.hash;
00590     equals = ht.equals;
00591     get_key = ht.get_key;
00592     use_deleted = ht.use_deleted;
00593     use_empty = ht.use_empty;
00594     set_value(&delval, ht.delval);
00595     set_value(&emptyval, ht.emptyval);
00596     copy_from(ht);                         // sets num_deleted to 0 too
00597     return *this;
00598   }
00599 
00600   ~dense_hashtable() {
00601     if (table) {
00602       destroy_buckets(0, num_buckets);
00603       free(table);
00604     }
00605   }
00606 
00607   // Many STL algorithms use swap instead of copy constructors
00608   void swap(dense_hashtable& ht) {
00609     STL_NAMESPACE::swap(hash, ht.hash);
00610     STL_NAMESPACE::swap(equals, ht.equals);
00611     STL_NAMESPACE::swap(get_key, ht.get_key);
00612     STL_NAMESPACE::swap(num_deleted, ht.num_deleted);
00613     STL_NAMESPACE::swap(use_deleted, ht.use_deleted);
00614     STL_NAMESPACE::swap(use_empty, ht.use_empty);
00615     { value_type tmp;     // for annoying reasons, swap() doesn't work
00616       set_value(&tmp, delval);
00617       set_value(&delval, ht.delval);
00618       set_value(&ht.delval, tmp);
00619     }
00620     { value_type tmp;     // for annoying reasons, swap() doesn't work
00621       set_value(&tmp, emptyval);
00622       set_value(&emptyval, ht.emptyval);
00623       set_value(&ht.emptyval, tmp);
00624     }
00625     STL_NAMESPACE::swap(table, ht.table);
00626     STL_NAMESPACE::swap(num_buckets, ht.num_buckets);
00627     STL_NAMESPACE::swap(num_elements, ht.num_elements);
00628     reset_thresholds();
00629     ht.reset_thresholds();
00630   }
00631 
00632   // It's always nice to be able to clear a table without deallocating it
00633   void clear() {
00634     if (table)
00635       destroy_buckets(0, num_buckets);
00636     num_buckets = min_size(0,0);          // our new size
00637     reset_thresholds();
00638     table = (value_type *) realloc(table, num_buckets * sizeof(*table));
00639     assert(table);
00640     fill_range_with_empty(table, table + num_buckets);
00641     num_elements = 0;
00642     num_deleted = 0;
00643   }
00644 
00645   // Clear the table without resizing it.
00646   // Mimicks the stl_hashtable's behaviour when clear()-ing in that it
00647   // does not modify the bucket count
00648   void clear_no_resize() {
00649     if (table) {
00650       set_empty(0, num_buckets);
00651     }
00652     // don't consider to shrink before another erase()
00653     reset_thresholds();
00654     num_elements = 0;
00655     num_deleted = 0;
00656   }
00657 
00658   // LOOKUP ROUTINES
00659  private:
00660   // Returns a pair of positions: 1st where the object is, 2nd where
00661   // it would go if you wanted to insert it.  1st is ILLEGAL_BUCKET
00662   // if object is not found; 2nd is ILLEGAL_BUCKET if it is.
00663   // Note: because of deletions where-to-insert is not trivial: it's the
00664   // first deleted bucket we see, as long as we don't find the key later
00665   pair<size_type, size_type> find_position(const key_type &key) const {
00666     size_type num_probes = 0;              // how many times we've probed
00667     const size_type bucket_count_minus_one = bucket_count() - 1;
00668     size_type bucknum = hash(key) & bucket_count_minus_one;
00669     size_type insert_pos = ILLEGAL_BUCKET; // where we would insert
00670     while ( 1 ) {                          // probe until something happens
00671       if ( test_empty(bucknum) ) {         // bucket is empty
00672         if ( insert_pos == ILLEGAL_BUCKET )   // found no prior place to insert
00673           return pair<size_type,size_type>(ILLEGAL_BUCKET, bucknum);
00674         else
00675           return pair<size_type,size_type>(ILLEGAL_BUCKET, insert_pos);
00676 
00677       } else if ( test_deleted(bucknum) ) {// keep searching, but mark to insert
00678         if ( insert_pos == ILLEGAL_BUCKET )
00679           insert_pos = bucknum;
00680 
00681       } else if ( equals(key, get_key(table[bucknum])) ) {
00682         return pair<size_type,size_type>(bucknum, ILLEGAL_BUCKET);
00683       }
00684       ++num_probes;                        // we're doing another probe
00685       bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
00686       assert(num_probes < bucket_count()); // don't probe too many times!
00687     }
00688   }
00689 
00690  public:
00691   iterator find(const key_type& key) {
00692     if ( size() == 0 ) return end();
00693     pair<size_type, size_type> pos = find_position(key);
00694     if ( pos.first == ILLEGAL_BUCKET )     // alas, not there
00695       return end();
00696     else
00697       return iterator(this, table + pos.first, table + num_buckets, false);
00698   }
00699 
00700   const_iterator find(const key_type& key) const {
00701     if ( size() == 0 ) return end();
00702     pair<size_type, size_type> pos = find_position(key);
00703     if ( pos.first == ILLEGAL_BUCKET )     // alas, not there
00704       return end();
00705     else
00706       return const_iterator(this, table + pos.first, table+num_buckets, false);
00707   }
00708 
00709   // Counts how many elements have key key.  For maps, it's either 0 or 1.
00710   size_type count(const key_type &key) const {
00711     pair<size_type, size_type> pos = find_position(key);
00712     return pos.first == ILLEGAL_BUCKET ? 0 : 1;
00713   }
00714 
00715   // Likewise, equal_range doesn't really make sense for us.  Oh well.
00716   pair<iterator,iterator> equal_range(const key_type& key) {
00717     const iterator pos = find(key);      // either an iterator or end
00718     return pair<iterator,iterator>(pos, pos);
00719   }
00720   pair<const_iterator,const_iterator> equal_range(const key_type& key) const {
00721     const const_iterator pos = find(key);      // either an iterator or end
00722     return pair<iterator,iterator>(pos, pos);
00723   }
00724 
00725 
00726   // INSERTION ROUTINES
00727  private:
00728   // If you know *this is big enough to hold obj, use this routine
00729   pair<iterator, bool> insert_noresize(const value_type& obj) {
00730     const pair<size_type,size_type> pos = find_position(get_key(obj));
00731     if ( pos.first != ILLEGAL_BUCKET) {      // object was already there
00732       return pair<iterator,bool>(iterator(this, table + pos.first,
00733                                           table + num_buckets, false),
00734                                  false);          // false: we didn't insert
00735     } else {                                 // pos.second says where to put it
00736       if ( test_deleted(pos.second) ) {      // just replace if it's been del.
00737         const_iterator delpos(this, table + pos.second,              // shrug:
00738                               table + num_buckets, false);// shouldn't need const
00739         clear_deleted(delpos);
00740         assert( num_deleted > 0);
00741         --num_deleted;                       // used to be, now it isn't
00742       } else {
00743         ++num_elements;                      // replacing an empty bucket
00744       }
00745       set_value(&table[pos.second], obj);
00746       return pair<iterator,bool>(iterator(this, table + pos.second,
00747                                           table + num_buckets, false),
00748                                  true);           // true: we did insert
00749     }
00750   }
00751 
00752  public:
00753   // This is the normal insert routine, used by the outside world
00754   pair<iterator, bool> insert(const value_type& obj) {
00755     resize_delta(1);                      // adding an object, grow if need be
00756     return insert_noresize(obj);
00757   }
00758 
00759   // When inserting a lot at a time, we specialize on the type of iterator
00760   template <class InputIterator>
00761   void insert(InputIterator f, InputIterator l) {
00762     // specializes on iterator type
00763     insert(f, l, typename STL_NAMESPACE::iterator_traits<InputIterator>::iterator_category());
00764   }
00765 
00766   // Iterator supports operator-, resize before inserting
00767   template <class ForwardIterator>
00768   void insert(ForwardIterator f, ForwardIterator l,
00769               STL_NAMESPACE::forward_iterator_tag) {
00770     size_type n = STL_NAMESPACE::distance(f, l);   // TODO(csilvers): standard?
00771     resize_delta(n);
00772     for ( ; n > 0; --n, ++f)
00773       insert_noresize(*f);
00774   }
00775 
00776   // Arbitrary iterator, can't tell how much to resize
00777   template <class InputIterator>
00778   void insert(InputIterator f, InputIterator l,
00779               STL_NAMESPACE::input_iterator_tag) {
00780     for ( ; f != l; ++f)
00781       insert(*f);
00782   }
00783 
00784 
00785   // DELETION ROUTINES
00786   size_type erase(const key_type& key) {
00787     const_iterator pos = find(key);   // shrug: shouldn't need to be const
00788     if ( pos != end() ) {
00789       assert(!test_deleted(pos));  // or find() shouldn't have returned it
00790       set_deleted(pos);
00791       ++num_deleted;
00792       consider_shrink = true;      // will think about shrink after next insert
00793       return 1;                    // because we deleted one thing
00794     } else {
00795       return 0;                    // because we deleted nothing
00796     }
00797   }
00798 
00799   // This is really evil: really it should be iterator, not const_iterator.
00800   // But...the only reason keys are const is to allow lookup.
00801   // Since that's a moot issue for deleted keys, we allow const_iterators
00802   void erase(const_iterator pos) {
00803     if ( pos == end() ) return;    // sanity check
00804     if ( set_deleted(pos) ) {      // true if object has been newly deleted
00805       ++num_deleted;
00806       consider_shrink = true;      // will think about shrink after next insert
00807     }
00808   }
00809 
00810   void erase(const_iterator f, const_iterator l) {
00811     for ( ; f != l; ++f) {
00812       if ( set_deleted(f)  )       // should always be true
00813         ++num_deleted;
00814     }
00815     consider_shrink = true;        // will think about shrink after next insert
00816   }
00817 
00818 
00819   // COMPARISON
00820   bool operator==(const dense_hashtable& ht) const {
00821     if (size() != ht.size()) {
00822       return false;
00823     } else if (this == &ht) {
00824       return true;
00825     } else {
00826       // Iterate through the elements in "this" and see if the
00827       // corresponding element is in ht
00828       for ( const_iterator it = begin(); it != end(); ++it ) {
00829         const_iterator it2 = ht.find(get_key(*it));
00830         if ((it2 == ht.end()) || (*it != *it2)) {
00831           return false;
00832         }
00833       }
00834       return true;
00835     }
00836   }
00837   bool operator!=(const dense_hashtable& ht) const {
00838     return !(*this == ht);
00839   }
00840 
00841 
00842   // I/O
00843   // We support reading and writing hashtables to disk.  Alas, since
00844   // I don't know how to write a hasher or key_equal, you have to make
00845   // sure everything but the table is the same.  We compact before writing
00846   //
00847   // NOTE: These functions are currently TODO.  They've not been implemented.
00848   bool write_metadata(FILE *fp) {
00849     squash_deleted();           // so we don't have to worry about delval
00850     return false;               // TODO
00851   }
00852 
00853   bool read_metadata(FILE *fp) {
00854     num_deleted = 0;            // since we got rid before writing
00855     assert(use_empty);          // have to set this before calling us
00856     if (table)  free(table);    // we'll make our own
00857     // TODO: read magic number
00858     // TODO: read num_buckets
00859     reset_thresholds();
00860     table = (value_type *) malloc(num_buckets * sizeof(*table));
00861     assert(table);
00862     fill_range_with_empty(table, table + num_buckets);
00863     // TODO: read num_elements
00864     for ( size_type i = 0; i < num_elements; ++i ) {
00865       // TODO: read bucket_num
00866       // TODO: set with non-empty, non-deleted value
00867     }
00868     return false;               // TODO
00869   }
00870 
00871   // If your keys and values are simple enough, we can write them to
00872   // disk for you.  "simple enough" means value_type is a POD type
00873   // that contains no pointers.  However, we don't try to normalize
00874   // endianness
00875   bool write_nopointer_data(FILE *fp) const {
00876     for ( const_iterator it = begin(); it != end(); ++it ) {
00877       // TODO: skip empty/deleted values
00878       if ( !fwrite(&*it, sizeof(*it), 1, fp) )  return false;
00879     }
00880     return false;
00881   }
00882 
00883   // When reading, we have to override the potential const-ness of *it
00884   bool read_nopointer_data(FILE *fp) {
00885     for ( iterator it = begin(); it != end(); ++it ) {
00886       // TODO: skip empty/deleted values
00887       if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) )
00888         return false;
00889     }
00890     return false;
00891   }
00892 
00893  private:
00894   // The actual data
00895   hasher hash;                      // required by hashed_associative_container
00896   key_equal equals;
00897   ExtractKey get_key;
00898   size_type num_deleted;        // how many occupied buckets are marked deleted
00899   bool use_deleted;                          // false until delval has been set
00900   bool use_empty;                          // you must do this before you start
00901   value_type delval;                         // which key marks deleted entries
00902   value_type emptyval;                        // which key marks unused entries
00903   value_type *table;
00904   size_type num_buckets;
00905   size_type num_elements;
00906   size_type shrink_threshold;                     // num_buckets * HT_EMPTY_FLT
00907   size_type enlarge_threshold;                // num_buckets * HT_OCCUPANCY_FLT
00908   bool consider_shrink;   // true if we should try to shrink before next insert
00909 
00910   void reset_thresholds() {
00911     enlarge_threshold = static_cast<size_type>(num_buckets*HT_OCCUPANCY_FLT);
00912     shrink_threshold = static_cast<size_type>(num_buckets*HT_EMPTY_FLT);
00913     consider_shrink = false;   // whatever caused us to reset already considered
00914   }
00915 };
00916 
00917 // We need a global swap as well
00918 template <class V, class K, class HF, class ExK, class EqK, class A>
00919 inline void swap(dense_hashtable<V,K,HF,ExK,EqK,A> &x,
00920                  dense_hashtable<V,K,HF,ExK,EqK,A> &y) {
00921   x.swap(y);
00922 }
00923 
00924 #undef JUMP_
00925 
00926 template <class V, class K, class HF, class ExK, class EqK, class A>
00927 const typename dense_hashtable<V,K,HF,ExK,EqK,A>::size_type
00928   dense_hashtable<V,K,HF,ExK,EqK,A>::ILLEGAL_BUCKET;
00929 
00930 // How full we let the table get before we resize.  Knuth says .8 is
00931 // good -- higher causes us to probe too much, though saves memory
00932 template <class V, class K, class HF, class ExK, class EqK, class A>
00933 const float dense_hashtable<V,K,HF,ExK,EqK,A>::HT_OCCUPANCY_FLT = 0.5f;
00934 
00935 // How empty we let the table get before we resize lower.
00936 // It should be less than OCCUPANCY_FLT / 2 or we thrash resizing
00937 template <class V, class K, class HF, class ExK, class EqK, class A>
00938 const float dense_hashtable<V,K,HF,ExK,EqK,A>::HT_EMPTY_FLT = 0.4f *
00939 dense_hashtable<V,K,HF,ExK,EqK,A>::HT_OCCUPANCY_FLT;
00940 
00941 _END_GOOGLE_NAMESPACE_
00942 
00943 #endif /* _DENSEHASHTABLE_H_ */

util

Skip menu "util"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

KDevelop Platform Libraries

Skip menu "KDevelop Platform Libraries"
  • interfaces
  • language
  •   codegen
  •   duchain
  •   editor
  • outputview
  • project
  • shell
  • sublime
  • util
  • vcs
Generated for KDevelop Platform Libraries by doxygen 1.5.9-20090814
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal