• Skip to content
  • Skip to link menu
KDE 4.4 API Reference
  • KDE API Reference
  • KDevelop Platform Libraries
  • Sitemap
  • Contact Us
 

util

sparsehashtable.h

00001 // Copyright (c) 2005, Google Inc.
00002 // All rights reserved.
00003 //
00004 // Redistribution and use in source and binary forms, with or without
00005 // modification, are permitted provided that the following conditions are
00006 // met:
00007 //
00008 //     * Redistributions of source code must retain the above copyright
00009 // notice, this list of conditions and the following disclaimer.
00010 //     * Redistributions in binary form must reproduce the above
00011 // copyright notice, this list of conditions and the following disclaimer
00012 // in the documentation and/or other materials provided with the
00013 // distribution.
00014 //     * Neither the name of Google Inc. nor the names of its
00015 // contributors may be used to endorse or promote products derived from
00016 // this software without specific prior written permission.
00017 //
00018 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00019 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00020 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00021 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00022 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00023 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00024 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00025 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00026 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00027 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00028 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029 
00030 // ---
00031 // Author: Craig Silverstein
00032 //
00033 // A sparse hashtable is a particular implementation of
00034 // a hashtable: one that is meant to minimize memory use.
00035 // It does this by using a *sparse table* (cf sparsetable.h),
00036 // which uses between 1 and 2 bits to store empty buckets
00037 // (we may need another bit for hashtables that support deletion).
00038 //
00039 // When empty buckets are so cheap, an appealing hashtable
00040 // implementation is internal probing, in which the hashtable
00041 // is a single table, and collisions are resolved by trying
00042 // to insert again in another bucket.  The most cache-efficient
00043 // internal probing schemes are linear probing (which suffers,
00044 // alas, from clumping) and quadratic probing, which is what
00045 // we implement by default.
00046 //
00047 // Deleted buckets are a bit of a pain.  We have to somehow mark
00048 // deleted buckets (the probing must distinguish them from empty
00049 // buckets).  The most principled way is to have another bitmap,
00050 // but that's annoying and takes up space.  Instead we let the
00051 // user specify an "impossible" key.  We set deleted buckets
00052 // to have the impossible key.
00053 //
00054 // Note it is possible to change the value of the delete key
00055 // on the fly; you can even remove it, though after that point
00056 // the hashtable is insert_only until you set it again.
00057 //
00058 // You probably shouldn't use this code directly.  Use
00059 // <google/sparse_hash_table> or <google/sparse_hash_set> instead.
00060 
00061 // You can change the following below:
00062 // HT_OCCUPANCY_FLT      -- how full before we double size
00063 // HT_EMPTY_FLT          -- how empty before  we halve size
00064 // HT_MIN_BUCKETS        -- smallest bucket size
00065 //
00066 // How to decide what values to use?
00067 // HT_EMPTY_FLT's default of .4 * OCCUPANCY_FLT, is probably good.
00068 // HT_MIN_BUCKETS is probably unnecessary since you can specify
00069 // (indirectly) the starting number of buckets at construct-time.
00070 // For HT_OCCUPANCY_FLT, you can use this chart to try to trade-off
00071 // expected lookup time to the space taken up.  By default, this
00072 // code uses quadratic probing, though you can change it to linear
00073 // via _JUMP below if you really want to.
00074 //
00075 // From http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html
00076 // NUMBER OF PROBES / LOOKUP       Successful            Unsuccessful
00077 // Quadratic collision resolution   1 - ln(1-L) - L/2    1/(1-L) - L - ln(1-L)
00078 // Linear collision resolution     [1+1/(1-L)]/2         [1+1/(1-L)2]/2
00079 //
00080 // -- HT_OCCUPANCY_FLT --         0.10  0.50  0.60  0.75  0.80  0.90  0.99
00081 // QUADRATIC COLLISION RES.
00082 //    probes/successful lookup    1.05  1.44  1.62  2.01  2.21  2.85  5.11
00083 //    probes/unsuccessful lookup  1.11  2.19  2.82  4.64  5.81  11.4  103.6
00084 // LINEAR COLLISION RES.
00085 //    probes/successful lookup    1.06  1.5   1.75  2.5   3.0   5.5   50.5
00086 //    probes/unsuccessful lookup  1.12  2.5   3.6   8.5   13.0  50.0  5000.0
00087 //
00088 // The value type is required to be copy constructible and default
00089 // constructible, but it need not be (and commonly isn't) assignable.
00090 
00091 #ifndef _SPARSEHASHTABLE_H_
00092 #define _SPARSEHASHTABLE_H_
00093 
00094 #ifndef SPARSEHASH_STAT_UPDATE
00095 #define SPARSEHASH_STAT_UPDATE(x) ((void) 0)
00096 #endif
00097 
00098 // The probing method
00099 // Linear probing
00100 // #define JUMP_(key, num_probes)    ( 1 )
00101 // Quadratic-ish probing
00102 #define JUMP_(key, num_probes)    ( num_probes )
00103 
00104 
00105 // Hashtable class, used to implement the hashed associative containers
00106 // hash_set and hash_map.
00107 
00108 #if defined(Q_OS_WIN)
00109 #include "sparseconfig_windows.h"
00110 #else
00111 #include "sparseconfig.h"
00112 #endif
00113 #include <assert.h>
00114 #include <algorithm>              // For swap(), eg
00115 #include <iterator>               // for facts about iterator tags
00116 #include <utility>                // for pair<>
00117 #include <google/sparsetable>     // Since that's basically what we are
00118 
00119 _START_GOOGLE_NAMESPACE_
00120 
00121 using STL_NAMESPACE::pair;
00122 
00123 // Alloc is completely ignored.  It is present as a template parameter only
00124 // for the sake of being compatible with the old SGI hashtable interface.
00125 // TODO(csilvers): is that the right thing to do?
00126 
00127 template <class Value, class Key, class HashFcn,
00128           class ExtractKey, class EqualKey, class Alloc>
00129 class sparse_hashtable;
00130 
00131 template <class V, class K, class HF, class ExK, class EqK, class A>
00132 struct sparse_hashtable_iterator;
00133 
00134 template <class V, class K, class HF, class ExK, class EqK, class A>
00135 struct sparse_hashtable_const_iterator;
00136 
00137 // As far as iterating, we're basically just a sparsetable
00138 // that skips over deleted elements.
00139 template <class V, class K, class HF, class ExK, class EqK, class A>
00140 struct sparse_hashtable_iterator {
00141  public:
00142   typedef sparse_hashtable_iterator<V,K,HF,ExK,EqK,A>       iterator;
00143   typedef sparse_hashtable_const_iterator<V,K,HF,ExK,EqK,A> const_iterator;
00144   typedef typename sparsetable<V>::nonempty_iterator        st_iterator;
00145 
00146   typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
00147   typedef V value_type;
00148   typedef ptrdiff_t difference_type;
00149   typedef size_t size_type;
00150   typedef V& reference;                // Value
00151   typedef V* pointer;
00152 
00153   // "Real" constructor and default constructor
00154   sparse_hashtable_iterator(const sparse_hashtable<V,K,HF,ExK,EqK,A> *h,
00155                             st_iterator it, st_iterator it_end)
00156     : ht(h), pos(it), end(it_end)   { advance_past_deleted(); }
00157   sparse_hashtable_iterator() { }      // not ever used internally
00158   // The default destructor is fine; we don't define one
00159   // The default operator= is fine; we don't define one
00160 
00161   // Happy dereferencer
00162   reference operator*() const { return *pos; }
00163   pointer operator->() const { return &(operator*()); }
00164 
00165   // Arithmetic.  The only hard part is making sure that
00166   // we're not on a marked-deleted array element
00167   void advance_past_deleted() {
00168     while ( pos != end && ht->test_deleted(*this) )
00169       ++pos;
00170   }
00171   iterator& operator++()   {
00172     assert(pos != end); ++pos; advance_past_deleted(); return *this;
00173   }
00174   iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
00175 
00176   // Comparison.
00177   bool operator==(const iterator& it) const { return pos == it.pos; }
00178   bool operator!=(const iterator& it) const { return pos != it.pos; }
00179 
00180 
00181   // The actual data
00182   const sparse_hashtable<V,K,HF,ExK,EqK,A> *ht;
00183   st_iterator pos, end;
00184 };
00185 
00186 // Now do it all again, but with const-ness!
00187 template <class V, class K, class HF, class ExK, class EqK, class A>
00188 struct sparse_hashtable_const_iterator {
00189  public:
00190   typedef sparse_hashtable_iterator<V,K,HF,ExK,EqK,A>       iterator;
00191   typedef sparse_hashtable_const_iterator<V,K,HF,ExK,EqK,A> const_iterator;
00192   typedef typename sparsetable<V>::const_nonempty_iterator  st_iterator;
00193 
00194   typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
00195   typedef V value_type;
00196   typedef ptrdiff_t difference_type;
00197   typedef size_t size_type;
00198   typedef const V& reference;                // Value
00199   typedef const V* pointer;
00200 
00201   // "Real" constructor and default constructor
00202   sparse_hashtable_const_iterator(const sparse_hashtable<V,K,HF,ExK,EqK,A> *h,
00203                                   st_iterator it, st_iterator it_end)
00204     : ht(h), pos(it), end(it_end)   { advance_past_deleted(); }
00205   // This lets us convert regular iterators to const iterators
00206   sparse_hashtable_const_iterator() { }      // never used internally
00207   sparse_hashtable_const_iterator(const iterator &it)
00208     : ht(it.ht), pos(it.pos), end(it.end) { }
00209   // The default destructor is fine; we don't define one
00210   // The default operator= is fine; we don't define one
00211 
00212   // Happy dereferencer
00213   reference operator*() const { return *pos; }
00214   pointer operator->() const { return &(operator*()); }
00215 
00216   // Arithmetic.  The only hard part is making sure that
00217   // we're not on a marked-deleted array element
00218   void advance_past_deleted() {
00219     while ( pos != end && ht->test_deleted(*this) )
00220       ++pos;
00221   }
00222   const_iterator& operator++() {
00223     assert(pos != end); ++pos; advance_past_deleted(); return *this;
00224   }
00225   const_iterator operator++(int) { const_iterator tmp(*this); ++*this; return tmp; }
00226 
00227   // Comparison.
00228   bool operator==(const const_iterator& it) const { return pos == it.pos; }
00229   bool operator!=(const const_iterator& it) const { return pos != it.pos; }
00230 
00231 
00232   // The actual data
00233   const sparse_hashtable<V,K,HF,ExK,EqK,A> *ht;
00234   st_iterator pos, end;
00235 };
00236 
00237 // And once again, but this time freeing up memory as we iterate
00238 template <class V, class K, class HF, class ExK, class EqK, class A>
00239 struct sparse_hashtable_destructive_iterator {
00240  public:
00241   typedef sparse_hashtable_destructive_iterator<V,K,HF,ExK,EqK,A> iterator;
00242   typedef typename sparsetable<V>::destructive_iterator     st_iterator;
00243 
00244   typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
00245   typedef V value_type;
00246   typedef ptrdiff_t difference_type;
00247   typedef size_t size_type;
00248   typedef V& reference;                // Value
00249   typedef V* pointer;
00250 
00251   // "Real" constructor and default constructor
00252   sparse_hashtable_destructive_iterator(const
00253                                         sparse_hashtable<V,K,HF,ExK,EqK,A> *h,
00254                                         st_iterator it, st_iterator it_end)
00255     : ht(h), pos(it), end(it_end)   { advance_past_deleted(); }
00256   sparse_hashtable_destructive_iterator() { }          // never used internally
00257   // The default destructor is fine; we don't define one
00258   // The default operator= is fine; we don't define one
00259 
00260   // Happy dereferencer
00261   reference operator*() const { return *pos; }
00262   pointer operator->() const { return &(operator*()); }
00263 
00264   // Arithmetic.  The only hard part is making sure that
00265   // we're not on a marked-deleted array element
00266   void advance_past_deleted() {
00267     while ( pos != end && ht->test_deleted(*this) )
00268       ++pos;
00269   }
00270   iterator& operator++()   {
00271     assert(pos != end); ++pos; advance_past_deleted(); return *this;
00272   }
00273   iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
00274 
00275   // Comparison.
00276   bool operator==(const iterator& it) const { return pos == it.pos; }
00277   bool operator!=(const iterator& it) const { return pos != it.pos; }
00278 
00279 
00280   // The actual data
00281   const sparse_hashtable<V,K,HF,ExK,EqK,A> *ht;
00282   st_iterator pos, end;
00283 };
00284 
00285 
00286 template <class Value, class Key, class HashFcn,
00287           class ExtractKey, class EqualKey, class Alloc>
00288 class sparse_hashtable {
00289  public:
00290   typedef Key key_type;
00291   typedef Value value_type;
00292   typedef HashFcn hasher;
00293   typedef EqualKey key_equal;
00294 
00295   typedef size_t            size_type;
00296   typedef ptrdiff_t         difference_type;
00297   typedef value_type*       pointer;
00298   typedef const value_type* const_pointer;
00299   typedef value_type&       reference;
00300   typedef const value_type& const_reference;
00301   typedef sparse_hashtable_iterator<Value, Key, HashFcn,
00302                                     ExtractKey, EqualKey, Alloc>
00303   iterator;
00304 
00305   typedef sparse_hashtable_const_iterator<Value, Key, HashFcn,
00306                                           ExtractKey, EqualKey, Alloc>
00307   const_iterator;
00308 
00309   typedef sparse_hashtable_destructive_iterator<Value, Key, HashFcn,
00310                                                 ExtractKey, EqualKey, Alloc>
00311   destructive_iterator;
00312 
00313   // How full we let the table get before we resize.  Knuth says .8 is
00314   // good -- higher causes us to probe too much, though saves memory
00315   static const float HT_OCCUPANCY_FLT; // = 0.8f;
00316 
00317   // How empty we let the table get before we resize lower.
00318   // It should be less than OCCUPANCY_FLT / 2 or we thrash resizing
00319   static const float HT_EMPTY_FLT; // = 0.4 * HT_OCCUPANCY_FLT;
00320 
00321   // Minimum size we're willing to let hashtables be.
00322   // Must be a power of two, and at least 4.
00323   // Note, however, that for a given hashtable, the minimum size is
00324   // determined by the first constructor arg, and may be >HT_MIN_BUCKETS.
00325   static const size_t HT_MIN_BUCKETS = 32;
00326 
00327 
00328   // ITERATOR FUNCTIONS
00329   iterator begin()             { return iterator(this, table.nonempty_begin(),
00330                                                  table.nonempty_end()); }
00331   iterator end()               { return iterator(this, table.nonempty_end(),
00332                                                  table.nonempty_end()); }
00333   const_iterator begin() const { return const_iterator(this,
00334                                                        table.nonempty_begin(),
00335                                                        table.nonempty_end()); }
00336   const_iterator end() const   { return const_iterator(this,
00337                                                        table.nonempty_end(),
00338                                                        table.nonempty_end()); }
00339 
00340   // This is used when resizing
00341   destructive_iterator destructive_begin() {
00342     return destructive_iterator(this, table.destructive_begin(),
00343                                 table.destructive_end());
00344   }
00345   destructive_iterator destructive_end() {
00346     return destructive_iterator(this, table.destructive_end(),
00347                                 table.destructive_end());
00348   }
00349 
00350 
00351   // ACCESSOR FUNCTIONS for the things we templatize on, basically
00352   hasher hash_funct() const { return hash; }
00353   key_equal key_eq() const  { return equals; }
00354 
00355   // We need to copy values when we set the special marker for deleted
00356   // elements, but, annoyingly, we can't just use the copy assignment
00357   // operator because value_type might not be assignable (it's often
00358   // pair<const X, Y>).  We use explicit destructor invocation and
00359   // placement new to get around this.  Arg.
00360  private:
00361   void set_value(value_type* dst, const value_type src) {
00362     dst->~value_type();   // delete the old value, if any
00363     new(dst) value_type(src);
00364   }
00365 
00366   // This is used as a tag for the copy constructor, saying to destroy its
00367   // arg We have two ways of destructively copying: with potentially growing
00368   // the hashtable as we copy, and without.  To make sure the outside world
00369   // can't do a destructive copy, we make the typename private.
00370   enum MoveDontCopyT {MoveDontCopy, MoveDontGrow};
00371 
00372 
00373   // DELETE HELPER FUNCTIONS
00374   // This lets the user describe a key that will indicate deleted
00375   // table entries.  This key should be an "impossible" entry --
00376   // if you try to insert it for real, you won't be able to retrieve it!
00377   // (NB: while you pass in an entire value, only the key part is looked
00378   // at.  This is just because I don't know how to assign just a key.)
00379  private:
00380   void squash_deleted() {           // gets rid of any deleted entries we have
00381     if ( num_deleted ) {            // get rid of deleted before writing
00382       sparse_hashtable tmp(MoveDontGrow, *this);
00383       swap(tmp);                    // now we are tmp
00384     }
00385     assert(num_deleted == 0);
00386   }
00387 
00388  public:
00389   void set_deleted_key(const value_type &val) {
00390     // It's only safe to change what "deleted" means if we purge deleted guys
00391     squash_deleted();
00392     use_deleted = true;
00393     set_value(&delval, val);        // save the key (and rest of val too)
00394   }
00395   void clear_deleted_key() {
00396     squash_deleted();
00397     use_deleted = false;
00398   }
00399 
00400   // These are public so the iterators can use them
00401   // True if the item at position bucknum is "deleted" marker
00402   bool test_deleted(size_type bucknum) const {
00403     // The num_deleted test is crucial for read(): after read(), the ht values
00404     // are garbage, and we don't want to think some of them are deleted.
00405     return (use_deleted && num_deleted > 0 && table.test(bucknum) &&
00406             equals(get_key(delval), get_key(table.get(bucknum))));
00407   }
00408   bool test_deleted(const iterator &it) const {
00409     return (use_deleted && num_deleted > 0 &&
00410             equals(get_key(delval), get_key(*it)));
00411   }
00412   bool test_deleted(const const_iterator &it) const {
00413     return (use_deleted && num_deleted > 0 &&
00414             equals(get_key(delval), get_key(*it)));
00415   }
00416   bool test_deleted(const destructive_iterator &it) const {
00417     return (use_deleted && num_deleted > 0 &&
00418             equals(get_key(delval), get_key(*it)));
00419   }
00420   // Set it so test_deleted is true.  true if object didn't used to be deleted
00421   // See below (at erase()) to explain why we allow const_iterators
00422   bool set_deleted(const_iterator &it) {
00423     assert(use_deleted);             // bad if set_deleted_key() wasn't called
00424     bool retval = !test_deleted(it);
00425     // &* converts from iterator to value-type
00426     set_value(const_cast<value_type*>(&(*it)), delval);
00427     return retval;
00428   }
00429   // Set it so test_deleted is false.  true if object used to be deleted
00430   bool clear_deleted(const_iterator &it) {
00431     assert(use_deleted);             // bad if set_deleted_key() wasn't called
00432     // happens automatically when we assign something else in its place
00433     return test_deleted(it);
00434   }
00435 
00436 
00437   // FUNCTIONS CONCERNING SIZE
00438   size_type size() const      { return table.num_nonempty() - num_deleted; }
00439   // Buckets are always a power of 2
00440   size_type max_size() const  { return (size_type(-1) >> 1U) + 1; }
00441   bool empty() const          { return size() == 0; }
00442   size_type bucket_count() const      { return table.size(); }
00443   size_type max_bucket_count() const  { return max_size(); }
00444 
00445  private:
00446   // Because of the above, size_type(-1) is never legal; use it for errors
00447   static const size_type ILLEGAL_BUCKET = size_type(-1);
00448 
00449  private:
00450   // This is the smallest size a hashtable can be without being too crowded
00451   // If you like, you can give a min #buckets as well as a min #elts
00452   size_type min_size(size_type num_elts, size_type min_buckets_wanted) {
00453     size_type sz = HT_MIN_BUCKETS;
00454     while ( sz < min_buckets_wanted || num_elts >= sz * HT_OCCUPANCY_FLT )
00455       sz *= 2;
00456     return sz;
00457   }
00458 
00459   // Used after a string of deletes
00460   void maybe_shrink() {
00461     assert(table.num_nonempty() >= num_deleted);
00462     assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two
00463     assert(bucket_count() >= HT_MIN_BUCKETS);
00464 
00465     if ( (table.num_nonempty()-num_deleted) <= shrink_threshold &&
00466          bucket_count() > HT_MIN_BUCKETS ) {
00467       size_type sz = bucket_count() / 2;    // find how much we should shrink
00468       while ( sz > HT_MIN_BUCKETS &&
00469               (table.num_nonempty() - num_deleted) <= sz * HT_EMPTY_FLT )
00470         sz /= 2;                            // stay a power of 2
00471       sparse_hashtable tmp(MoveDontCopy, *this, sz);
00472       swap(tmp);                            // now we are tmp
00473     }
00474     consider_shrink = false;                // because we just considered it
00475   }
00476 
00477   // We'll let you resize a hashtable -- though this makes us copy all!
00478   // When you resize, you say, "make it big enough for this many more elements"
00479   void resize_delta(size_type delta, size_type min_buckets_wanted = 0) {
00480     if ( consider_shrink )                   // see if lots of deletes happened
00481       maybe_shrink();
00482     if ( bucket_count() > min_buckets_wanted &&
00483          (table.num_nonempty() + delta) <= enlarge_threshold )
00484       return;                                // we're ok as we are
00485 
00486     // Sometimes, we need to resize just to get rid of all the
00487     // "deleted" buckets that are clogging up the hashtable.  So when
00488     // deciding whether to resize, count the deleted buckets (which
00489     // are currently taking up room).  But later, when we decide what
00490     // size to resize to, *don't* count deleted buckets, since they
00491     // get discarded during the resize.
00492     const size_type needed_size = min_size(table.num_nonempty() + delta,
00493                                            min_buckets_wanted);
00494     if ( needed_size > bucket_count() ) {      // we don't have enough buckets
00495       const size_type resize_to = min_size(table.num_nonempty() - num_deleted
00496                                            + delta, min_buckets_wanted);
00497       sparse_hashtable tmp(MoveDontCopy, *this, resize_to);
00498       swap(tmp);                             // now we are tmp
00499     }
00500   }
00501 
00502   // Used to actually do the rehashing when we grow/shrink a hashtable
00503   void copy_from(const sparse_hashtable &ht, size_type min_buckets_wanted = 0) {
00504     clear();            // clear table, set num_deleted to 0
00505 
00506     // If we need to change the size of our table, do it now
00507     const size_type resize_to = min_size(ht.size(), min_buckets_wanted);
00508     if ( resize_to > bucket_count() ) {      // we don't have enough buckets
00509       table.resize(resize_to);               // sets the number of buckets
00510       reset_thresholds();
00511     }
00512 
00513     // We use a normal iterator to get non-deleted bcks from ht
00514     // We could use insert() here, but since we know there are
00515     // no duplicates and no deleted items, we can be more efficient
00516     assert( (bucket_count() & (bucket_count()-1)) == 0);      // a power of two
00517     for ( const_iterator it = ht.begin(); it != ht.end(); ++it ) {
00518       size_type num_probes = 0;              // how many times we've probed
00519       size_type bucknum;
00520       const size_type bucket_count_minus_one = bucket_count() - 1;
00521       for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
00522            table.test(bucknum);                          // not empty
00523            bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
00524         ++num_probes;
00525         assert(num_probes < bucket_count()); // or else the hashtable is full
00526       }
00527       table.set(bucknum, *it);               // copies the value to here
00528     }
00529   }
00530 
00531   // Implementation is like copy_from, but it destroys the table of the
00532   // "from" guy by freeing sparsetable memory as we iterate.  This is
00533   // useful in resizing, since we're throwing away the "from" guy anyway.
00534   void move_from(MoveDontCopyT mover, sparse_hashtable &ht,
00535                  size_type min_buckets_wanted = 0) {
00536     clear();            // clear table, set num_deleted to 0
00537 
00538     // If we need to change the size of our table, do it now
00539     size_t resize_to;
00540     if ( mover == MoveDontGrow )
00541       resize_to = ht.bucket_count();         // keep same size as old ht
00542     else                                     // MoveDontCopy
00543       resize_to = min_size(ht.size(), min_buckets_wanted);
00544     if ( resize_to > bucket_count() ) {      // we don't have enough buckets
00545       table.resize(resize_to);               // sets the number of buckets
00546       reset_thresholds();
00547     }
00548 
00549     // We use a normal iterator to get non-deleted bcks from ht
00550     // We could use insert() here, but since we know there are
00551     // no duplicates and no deleted items, we can be more efficient
00552     assert( (bucket_count() & (bucket_count()-1)) == 0);      // a power of two
00553     // THIS IS THE MAJOR LINE THAT DIFFERS FROM COPY_FROM():
00554     for ( destructive_iterator it = ht.destructive_begin();
00555           it != ht.destructive_end(); ++it ) {
00556       size_type num_probes = 0;              // how many times we've probed
00557       size_type bucknum;
00558       for ( bucknum = hash(get_key(*it)) & (bucket_count()-1);  // h % buck_cnt
00559             table.test(bucknum);                          // not empty
00560             bucknum = (bucknum + JUMP_(key, num_probes)) & (bucket_count()-1) ) {
00561         ++num_probes;
00562         assert(num_probes < bucket_count()); // or else the hashtable is full
00563       }
00564       table.set(bucknum, *it);               // copies the value to here
00565     }
00566   }
00567 
00568 
00569   // Required by the spec for hashed associative container
00570  public:
00571   // Though the docs say this should be num_buckets, I think it's much
00572   // more useful as num_elements.  As a special feature, calling with
00573   // req_elements==0 will cause us to shrink if we can, saving space.
00574   void resize(size_type req_elements) {       // resize to this or larger
00575     if ( consider_shrink || req_elements == 0 )
00576       maybe_shrink();
00577     if ( req_elements > table.num_nonempty() )    // we only grow
00578       resize_delta(req_elements - table.num_nonempty(), 0);
00579   }
00580 
00581 
00582   // CONSTRUCTORS -- as required by the specs, we take a size,
00583   // but also let you specify a hashfunction, key comparator,
00584   // and key extractor.  We also define a copy constructor and =.
00585   // DESTRUCTOR -- the default is fine, surprisingly.
00586   explicit sparse_hashtable(size_type n = 0,
00587                             const HashFcn& hf = HashFcn(),
00588                             const EqualKey& eql = EqualKey(),
00589                             const ExtractKey& ext = ExtractKey())
00590     : hash(hf), equals(eql), get_key(ext), num_deleted(0),
00591       use_deleted(false), delval(), table(min_size(0, n)) {    // start small
00592     reset_thresholds();
00593   }
00594 
00595   // As a convenience for resize(), we allow an optional second argument
00596   // which lets you make this new hashtable a different size than ht.
00597   // We also provide a mechanism of saying you want to "move" the ht argument
00598   // into us instead of copying.
00599   sparse_hashtable(const sparse_hashtable& ht, size_type min_buckets_wanted = 0)
00600     : hash(ht.hash), equals(ht.equals), get_key(ht.get_key),
00601       num_deleted(0), use_deleted(ht.use_deleted), delval(ht.delval), table() {
00602     reset_thresholds();
00603     copy_from(ht, min_buckets_wanted);   // copy_from() ignores deleted entries
00604   }
00605   sparse_hashtable(MoveDontCopyT mover, sparse_hashtable& ht,
00606                    size_type min_buckets_wanted=0)
00607     : hash(ht.hash), equals(ht.equals), get_key(ht.get_key),
00608       num_deleted(0), use_deleted(ht.use_deleted), delval(ht.delval), table() {
00609     reset_thresholds();
00610     move_from(mover, ht, min_buckets_wanted);  // ignores deleted entries
00611   }
00612 
00613   sparse_hashtable& operator= (const sparse_hashtable& ht) {
00614     if (&ht == this)  return *this;        // don't copy onto ourselves
00615     clear();
00616     hash = ht.hash;
00617     equals = ht.equals;
00618     get_key = ht.get_key;
00619     use_deleted = ht.use_deleted;
00620     set_value(&delval, ht.delval);
00621     copy_from(ht);                         // sets num_deleted to 0 too
00622     return *this;
00623   }
00624 
00625   // Many STL algorithms use swap instead of copy constructors
00626   void swap(sparse_hashtable& ht) {
00627     STL_NAMESPACE::swap(hash, ht.hash);
00628     STL_NAMESPACE::swap(equals, ht.equals);
00629     STL_NAMESPACE::swap(get_key, ht.get_key);
00630     STL_NAMESPACE::swap(num_deleted, ht.num_deleted);
00631     STL_NAMESPACE::swap(use_deleted, ht.use_deleted);
00632     { value_type tmp;     // for annoying reasons, swap() doesn't work
00633       set_value(&tmp, delval);
00634       set_value(&delval, ht.delval);
00635       set_value(&ht.delval, tmp);
00636     }
00637     table.swap(ht.table);
00638     reset_thresholds();
00639     ht.reset_thresholds();
00640   }
00641 
00642   // It's always nice to be able to clear a table without deallocating it
00643   void clear() {
00644     table.clear();
00645     reset_thresholds();
00646     num_deleted = 0;
00647   }
00648 
00649 
00650   // LOOKUP ROUTINES
00651  private:
00652   // Returns a pair of positions: 1st where the object is, 2nd where
00653   // it would go if you wanted to insert it.  1st is ILLEGAL_BUCKET
00654   // if object is not found; 2nd is ILLEGAL_BUCKET if it is.
00655   // Note: because of deletions where-to-insert is not trivial: it's the
00656   // first deleted bucket we see, as long as we don't find the key later
00657   pair<size_type, size_type> find_position(const key_type &key) const {
00658     size_type num_probes = 0;              // how many times we've probed
00659     const size_type bucket_count_minus_one = bucket_count() - 1;
00660     size_type bucknum = hash(key) & bucket_count_minus_one;
00661     size_type insert_pos = ILLEGAL_BUCKET; // where we would insert
00662     SPARSEHASH_STAT_UPDATE(total_lookups += 1);
00663     while ( 1 ) {                          // probe until something happens
00664       if ( !table.test(bucknum) ) {        // bucket is empty
00665         SPARSEHASH_STAT_UPDATE(total_probes += num_probes);
00666         if ( insert_pos == ILLEGAL_BUCKET )  // found no prior place to insert
00667           return pair<size_type,size_type>(ILLEGAL_BUCKET, bucknum);
00668         else
00669           return pair<size_type,size_type>(ILLEGAL_BUCKET, insert_pos);
00670 
00671       } else if ( test_deleted(bucknum) ) {// keep searching, but mark to insert
00672         if ( insert_pos == ILLEGAL_BUCKET )
00673           insert_pos = bucknum;
00674 
00675       } else if ( equals(key, get_key(table.get(bucknum))) ) {
00676         SPARSEHASH_STAT_UPDATE(total_probes += num_probes);
00677         return pair<size_type,size_type>(bucknum, ILLEGAL_BUCKET);
00678       }
00679       ++num_probes;                        // we're doing another probe
00680       bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
00681       assert(num_probes < bucket_count()); // don't probe too many times!
00682     }
00683   }
00684 
00685  public:
00686   iterator find(const key_type& key) {
00687     if ( size() == 0 ) return end();
00688     pair<size_type, size_type> pos = find_position(key);
00689     if ( pos.first == ILLEGAL_BUCKET )     // alas, not there
00690       return end();
00691     else
00692       return iterator(this, table.get_iter(pos.first), table.nonempty_end());
00693   }
00694 
00695   const_iterator find(const key_type& key) const {
00696     if ( size() == 0 ) return end();
00697     pair<size_type, size_type> pos = find_position(key);
00698     if ( pos.first == ILLEGAL_BUCKET )     // alas, not there
00699       return end();
00700     else
00701       return const_iterator(this,
00702                             table.get_iter(pos.first), table.nonempty_end());
00703   }
00704 
00705   // Counts how many elements have key key.  For maps, it's either 0 or 1.
00706   size_type count(const key_type &key) const {
00707     pair<size_type, size_type> pos = find_position(key);
00708     return pos.first == ILLEGAL_BUCKET ? 0 : 1;
00709   }
00710 
00711   // Likewise, equal_range doesn't really make sense for us.  Oh well.
00712   pair<iterator,iterator> equal_range(const key_type& key) {
00713     const iterator pos = find(key);      // either an iterator or end
00714     return pair<iterator,iterator>(pos, pos);
00715   }
00716   pair<const_iterator,const_iterator> equal_range(const key_type& key) const {
00717     const const_iterator pos = find(key);      // either an iterator or end
00718     return pair<iterator,iterator>(pos, pos);
00719   }
00720 
00721 
00722   // INSERTION ROUTINES
00723  private:
00724   // If you know *this is big enough to hold obj, use this routine
00725   pair<iterator, bool> insert_noresize(const value_type& obj) {
00726     const pair<size_type,size_type> pos = find_position(get_key(obj));
00727     if ( pos.first != ILLEGAL_BUCKET) {      // object was already there
00728       return pair<iterator,bool>(iterator(this, table.get_iter(pos.first),
00729                                           table.nonempty_end()),
00730                                  false);          // false: we didn't insert
00731     } else {                                 // pos.second says where to put it
00732       if ( test_deleted(pos.second) ) {      // just replace if it's been del.
00733         // The set() below will undelete this object.  We just worry about stats
00734         assert(num_deleted > 0);
00735         --num_deleted;                       // used to be, now it isn't
00736       }
00737       table.set(pos.second, obj);
00738       return pair<iterator,bool>(iterator(this, table.get_iter(pos.second),
00739                                           table.nonempty_end()),
00740                                  true);           // true: we did insert
00741     }
00742   }
00743 
00744  public:
00745   // This is the normal insert routine, used by the outside world
00746   pair<iterator, bool> insert(const value_type& obj) {
00747     resize_delta(1);                      // adding an object, grow if need be
00748     return insert_noresize(obj);
00749   }
00750 
00751   // When inserting a lot at a time, we specialize on the type of iterator
00752   template <class InputIterator>
00753   void insert(InputIterator f, InputIterator l) {
00754     // specializes on iterator type
00755     insert(f, l, typename STL_NAMESPACE::iterator_traits<InputIterator>::iterator_category());
00756   }
00757 
00758   // Iterator supports operator-, resize before inserting
00759   template <class ForwardIterator>
00760   void insert(ForwardIterator f, ForwardIterator l,
00761               STL_NAMESPACE::forward_iterator_tag) {
00762     size_type n = STL_NAMESPACE::distance(f, l);   // TODO(csilvers): standard?
00763     resize_delta(n);
00764     for ( ; n > 0; --n, ++f)
00765       insert_noresize(*f);
00766   }
00767 
00768   // Arbitrary iterator, can't tell how much to resize
00769   template <class InputIterator>
00770   void insert(InputIterator f, InputIterator l,
00771               STL_NAMESPACE::input_iterator_tag) {
00772     for ( ; f != l; ++f)
00773       insert(*f);
00774   }
00775 
00776 
00777   // DELETION ROUTINES
00778   size_type erase(const key_type& key) {
00779     const_iterator pos = find(key);   // shrug: shouldn't need to be const
00780     if ( pos != end() ) {
00781       assert(!test_deleted(pos));  // or find() shouldn't have returned it
00782       set_deleted(pos);
00783       ++num_deleted;
00784       consider_shrink = true;      // will think about shrink after next insert
00785       return 1;                    // because we deleted one thing
00786     } else {
00787       return 0;                    // because we deleted nothing
00788     }
00789   }
00790 
00791   // This is really evil: really it should be iterator, not const_iterator.
00792   // But...the only reason keys are const is to allow lookup.
00793   // Since that's a moot issue for deleted keys, we allow const_iterators
00794   void erase(const_iterator pos) {
00795     if ( pos == end() ) return;    // sanity check
00796     if ( set_deleted(pos) ) {      // true if object has been newly deleted
00797       ++num_deleted;
00798       consider_shrink = true;      // will think about shrink after next insert
00799     }
00800   }
00801 
00802   void erase(const_iterator f, const_iterator l) {
00803     for ( ; f != l; ++f) {
00804       if ( set_deleted(f)  )       // should always be true
00805         ++num_deleted;
00806     }
00807     consider_shrink = true;        // will think about shrink after next insert
00808   }
00809 
00810 
00811   // COMPARISON
00812   bool operator==(const sparse_hashtable& ht) const {
00813     // We really want to check that the hash functions are the same
00814     // but alas there's no way to do this.  We just hope.
00815     return ( num_deleted == ht.num_deleted && table == ht.table );
00816   }
00817   bool operator!=(const sparse_hashtable& ht) const {
00818     return !(*this == ht);
00819   }
00820 
00821 
00822   // I/O
00823   // We support reading and writing hashtables to disk.  NOTE that
00824   // this only stores the hashtable metadata, not the stuff you've
00825   // actually put in the hashtable!  Alas, since I don't know how to
00826   // write a hasher or key_equal, you have to make sure everything
00827   // but the table is the same.  We compact before writing.
00828   bool write_metadata(FILE *fp) {
00829     squash_deleted();           // so we don't have to worry about delkey
00830     return table.write_metadata(fp);
00831   }
00832 
00833   bool read_metadata(FILE *fp) {
00834     num_deleted = 0;            // since we got rid before writing
00835     bool result = table.read_metadata(fp);
00836     reset_thresholds();
00837     return result;
00838   }
00839 
00840   // Only meaningful if value_type is a POD.
00841   bool write_nopointer_data(FILE *fp) {
00842     return table.write_nopointer_data(fp);
00843   }
00844 
00845   // Only meaningful if value_type is a POD.
00846   bool read_nopointer_data(FILE *fp) {
00847     return table.read_nopointer_data(fp);
00848   }
00849 
00850  private:
00851   // The actual data
00852   hasher hash;                      // required by hashed_associative_container
00853   key_equal equals;
00854   ExtractKey get_key;
00855   size_type num_deleted;        // how many occupied buckets are marked deleted
00856   bool use_deleted;                          // false until delval has been set
00857   value_type delval;                         // which key marks deleted entries
00858   sparsetable<value_type> table;      // holds num_buckets and num_elements too
00859   size_type shrink_threshold;                    // table.size() * HT_EMPTY_FLT
00860   size_type enlarge_threshold;               // table.size() * HT_OCCUPANCY_FLT
00861   bool consider_shrink;   // true if we should try to shrink before next insert
00862 
00863   void reset_thresholds() {
00864     enlarge_threshold = static_cast<size_type>(table.size()*HT_OCCUPANCY_FLT);
00865     shrink_threshold = static_cast<size_type>(table.size()*HT_EMPTY_FLT);
00866     consider_shrink = false;   // whatever caused us to reset already considered
00867   }
00868 };
00869 
00870 // We need a global swap as well
00871 template <class V, class K, class HF, class ExK, class EqK, class A>
00872 inline void swap(sparse_hashtable<V,K,HF,ExK,EqK,A> &x,
00873                  sparse_hashtable<V,K,HF,ExK,EqK,A> &y) {
00874   x.swap(y);
00875 }
00876 
00877 #undef JUMP_
00878 
00879 template <class V, class K, class HF, class ExK, class EqK, class A>
00880 const typename sparse_hashtable<V,K,HF,ExK,EqK,A>::size_type
00881   sparse_hashtable<V,K,HF,ExK,EqK,A>::ILLEGAL_BUCKET;
00882 
00883 // How full we let the table get before we resize.  Knuth says .8 is
00884 // good -- higher causes us to probe too much, though saves memory
00885 template <class V, class K, class HF, class ExK, class EqK, class A>
00886 const float sparse_hashtable<V,K,HF,ExK,EqK,A>::HT_OCCUPANCY_FLT = 0.8f;
00887 
00888 // How empty we let the table get before we resize lower.
00889 // It should be less than OCCUPANCY_FLT / 2 or we thrash resizing
00890 template <class V, class K, class HF, class ExK, class EqK, class A>
00891 const float sparse_hashtable<V,K,HF,ExK,EqK,A>::HT_EMPTY_FLT = 0.4f *
00892 sparse_hashtable<V,K,HF,ExK,EqK,A>::HT_OCCUPANCY_FLT;
00893 
00894 _END_GOOGLE_NAMESPACE_
00895 
00896 #endif /* _SPARSEHASHTABLE_H_ */

util

Skip menu "util"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

KDevelop Platform Libraries

Skip menu "KDevelop Platform Libraries"
  • interfaces
  • language
  •   codegen
  •   duchain
  •   editor
  • outputview
  • project
  • shell
  • sublime
  • util
  • vcs
Generated for KDevelop Platform Libraries by doxygen 1.5.9-20090814
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal