// Copyright (c) 2005, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // --- // Author: Craig Silverstein // // This is just a very thin wrapper over sparsehashtable.h, just // like sgi stl's stl_hash_set is a very thin wrapper over // stl_hashtable. The major thing we define is operator[], because // we have a concept of a data_type which stl_hashtable doesn't // (it only has a key and a value). // // This is more different from sparse_hash_map than you might think, // because all iterators for sets are const (you obviously can't // change the key, and for sets there is no value). // // We adhere mostly to the STL semantics for hash-map. One important // exception is that insert() may invalidate iterators entirely -- STL // semantics are that insert() may reorder iterators, but they all // still refer to something valid in the hashtable. Not so for us. // Likewise, insert() may invalidate pointers into the hashtable. // (Whether insert invalidates iterators and pointers depends on // whether it results in a hashtable resize). On the plus side, // delete() doesn't invalidate iterators or pointers at all, or even // change the ordering of elements. // // Here are a few "power user" tips: // // 1) set_deleted_key(): // Unlike STL's hash_map, if you want to use erase() you // *must* call set_deleted_key() after construction. // // 2) resize(0): // When an item is deleted, its memory isn't freed right // away. This allows you to iterate over a hashtable, // and call erase(), without invalidating the iterator. // To force the memory to be freed, call resize(0). // For tr1 compatibility, this can also be called as rehash(0). // // 3) min_load_factor(0.0) // Setting the minimum load factor to 0.0 guarantees that // the hash table will never shrink. // // Roughly speaking: // (1) dense_hash_set: fastest, uses the most memory unless entries are small // (2) sparse_hash_set: slowest, uses the least memory // (3) hash_set / unordered_set (STL): in the middle // // Typically I use sparse_hash_set when I care about space and/or when // I need to save the hashtable on disk. I use hash_set otherwise. I // don't personally use dense_hash_set ever; some people use it for // small sets with lots of lookups. // // - dense_hash_set has, typically, about 78% memory overhead (if your // data takes up X bytes, the hash_set uses .78X more bytes in overhead). // - sparse_hash_set has about 4 bits overhead per entry. // - sparse_hash_set can be 3-7 times slower than the others for lookup and, // especially, inserts. See time_hash_map.cc for details. // // See /usr/(local/)?doc/sparsehash-*/sparse_hash_set.html // for information about how to use this class. #ifndef _SPARSE_HASH_SET_H_ #define _SPARSE_HASH_SET_H_ #include #include // for FILE * in read()/write() #include // for the default template args #include // for equal_to #include // for alloc<> #include // for pair<> #include HASH_FUN_H // defined in config.h #include #include _START_GOOGLE_NAMESPACE_ using STL_NAMESPACE::pair; template , // defined in sparseconfig.h class EqualKey = STL_NAMESPACE::equal_to, class Alloc = libc_allocator_with_realloc > class sparse_hash_set { private: // Apparently identity is not stl-standard, so we define our own struct Identity { Value& operator()(Value& v) const { return v; } const Value& operator()(const Value& v) const { return v; } }; struct SetKey { void operator()(Value* value, const Value& new_key) const { *value = new_key; } }; typedef sparse_hashtable ht; ht rep; public: typedef typename ht::key_type key_type; typedef typename ht::value_type value_type; typedef typename ht::hasher hasher; typedef typename ht::key_equal key_equal; typedef Alloc allocator_type; typedef typename ht::size_type size_type; typedef typename ht::difference_type difference_type; typedef typename ht::const_pointer pointer; typedef typename ht::const_pointer const_pointer; typedef typename ht::const_reference reference; typedef typename ht::const_reference const_reference; typedef typename ht::const_iterator iterator; typedef typename ht::const_iterator const_iterator; typedef typename ht::const_local_iterator local_iterator; typedef typename ht::const_local_iterator const_local_iterator; // Iterator functions -- recall all iterators are const iterator begin() const { return rep.begin(); } iterator end() const { return rep.end(); } // These come from tr1's unordered_set. For us, a bucket has 0 or 1 elements. local_iterator begin(size_type i) const { return rep.begin(i); } local_iterator end(size_type i) const { return rep.end(i); } // Accessor functions allocator_type get_allocator() const { return rep.get_allocator(); } hasher hash_funct() const { return rep.hash_funct(); } hasher hash_function() const { return hash_funct(); } // tr1 name key_equal key_eq() const { return rep.key_eq(); } // Constructors explicit sparse_hash_set(size_type expected_max_items_in_table = 0, const hasher& hf = hasher(), const key_equal& eql = key_equal(), const allocator_type& alloc = allocator_type()) : rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) { } template sparse_hash_set(InputIterator f, InputIterator l, size_type expected_max_items_in_table = 0, const hasher& hf = hasher(), const key_equal& eql = key_equal(), const allocator_type& alloc = allocator_type()) : rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) { rep.insert(f, l); } // We use the default copy constructor // We use the default operator=() // We use the default destructor void clear() { rep.clear(); } void swap(sparse_hash_set& hs) { rep.swap(hs.rep); } // Functions concerning size size_type size() const { return rep.size(); } size_type max_size() const { return rep.max_size(); } bool empty() const { return rep.empty(); } size_type bucket_count() const { return rep.bucket_count(); } size_type max_bucket_count() const { return rep.max_bucket_count(); } // These are tr1 methods. bucket() is the bucket the key is or would be in. size_type bucket_size(size_type i) const { return rep.bucket_size(i); } size_type bucket(const key_type& key) const { return rep.bucket(key); } float load_factor() const { return size() * 1.0f / bucket_count(); } float max_load_factor() const { float shrink, grow; rep.get_resizing_parameters(&shrink, &grow); return grow; } void max_load_factor(float new_grow) { float shrink, grow; rep.get_resizing_parameters(&shrink, &grow); rep.set_resizing_parameters(shrink, new_grow); } // These aren't tr1 methods but perhaps ought to be. float min_load_factor() const { float shrink, grow; rep.get_resizing_parameters(&shrink, &grow); return shrink; } void min_load_factor(float new_shrink) { float shrink, grow; rep.get_resizing_parameters(&shrink, &grow); rep.set_resizing_parameters(new_shrink, grow); } // Deprecated; use min_load_factor() or max_load_factor() instead. void set_resizing_parameters(float shrink, float grow) { rep.set_resizing_parameters(shrink, grow); } void resize(size_type hint) { rep.resize(hint); } void rehash(size_type hint) { resize(hint); } // the tr1 name // Lookup routines iterator find(const key_type& key) const { return rep.find(key); } size_type count(const key_type& key) const { return rep.count(key); } pair equal_range(const key_type& key) const { return rep.equal_range(key); } // Insertion routines pair insert(const value_type& obj) { pair p = rep.insert(obj); return pair(p.first, p.second); // const to non-const } template void insert(InputIterator f, InputIterator l) { rep.insert(f, l); } void insert(const_iterator f, const_iterator l) { rep.insert(f, l); } // required for std::insert_iterator; the passed-in iterator is ignored iterator insert(iterator, const value_type& obj) { return insert(obj).first; } // Deletion routines // THESE ARE NON-STANDARD! I make you specify an "impossible" key // value to identify deleted buckets. You can change the key as // time goes on, or get rid of it entirely to be insert-only. void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); } void clear_deleted_key() { rep.clear_deleted_key(); } key_type deleted_key() const { return rep.deleted_key(); } // These are standard size_type erase(const key_type& key) { return rep.erase(key); } void erase(iterator it) { rep.erase(it); } void erase(iterator f, iterator l) { rep.erase(f, l); } // Comparison bool operator==(const sparse_hash_set& hs) const { return rep == hs.rep; } bool operator!=(const sparse_hash_set& hs) const { return rep != hs.rep; } // I/O -- this is an add-on for writing metainformation to disk bool write_metadata(FILE *fp) { return rep.write_metadata(fp); } bool read_metadata(FILE *fp) { return rep.read_metadata(fp); } bool write_nopointer_data(FILE *fp) { return rep.write_nopointer_data(fp); } bool read_nopointer_data(FILE *fp) { return rep.read_nopointer_data(fp); } }; template inline void swap(sparse_hash_set& hs1, sparse_hash_set& hs2) { hs1.swap(hs2); } _END_GOOGLE_NAMESPACE_ #endif /* _SPARSE_HASH_SET_H_ */