Ticket #8915: hashmap-py4.patch

File hashmap-py4.patch, 21.0 KB (added by fingolfin, 16 years ago)

4th revision

  • common/memorypool.h

     
    3232namespace Common {
    3333
    3434class MemoryPool {
    35 private:
     35protected:
    3636        MemoryPool(const MemoryPool&);
    3737        MemoryPool& operator=(const MemoryPool&);
     38       
     39        struct Page {
     40                void *start;
     41                size_t numChunks;
     42        };
    3843
    3944        size_t                  _chunkSize;
    40         Array<void*>    _pages;
    41         void*                   _next;
     45        Array<Page>             _pages;
     46        void                    *_next;
     47        size_t                  _chunksPerPage;
    4248
    43         void*   allocPage();
    44         bool    isPointerInPage(void* ptr, void* page);
     49        void    allocPage();
     50        void    addPageToPool(const Page &page);
     51        bool    isPointerInPage(void *ptr, const Page &page);
     52
    4553public:
    4654        MemoryPool(size_t chunkSize);
    4755        ~MemoryPool();
    4856
    49         void*   malloc();
    50         void    free(void* ptr);
     57        void    *malloc();
     58        void    free(void *ptr);
    5159
    5260        void    freeUnusedPages();
    5361};
    5462
     63template<size_t CHUNK_SIZE, size_t NUM_INTERNAL_CHUNKS = 32>
     64class FixedSizeMemoryPool : public MemoryPool {
     65private:
     66        enum {
     67                REAL_CHUNK_SIZE = (CHUNK_SIZE + sizeof(void*) - 1) & (~(sizeof(void*) - 1))
     68        };
     69
     70        byte    _storage[NUM_INTERNAL_CHUNKS * REAL_CHUNK_SIZE];
     71public:
     72        FixedSizeMemoryPool() : MemoryPool(CHUNK_SIZE) {
     73                assert(REAL_CHUNK_SIZE == _chunkSize);
     74                // Insert some static storage
     75                Page internalPage = { _storage, NUM_INTERNAL_CHUNKS };
     76                addPageToPool(internalPage);
     77        }
     78};
     79
    5580}       // End of namespace Common
    5681
    5782#endif
  • common/hashmap.cpp

     
    2424 */
    2525
    2626// The hash map (associative array) implementation in this file is
    27 // based on code by Andrew Y. Ng, 1996:
     27// based on the PyDict implementation of CPython. The erase() method
     28// is based on example code in the Wikipedia article on Hash tables.
    2829
    29 /*
    30  * Copyright (c) 1998-2003 Massachusetts Institute of Technology.
    31  * This code was developed as part of the Haystack research project
    32  * (http://haystack.lcs.mit.edu/). Permission is hereby granted,
    33  * free of charge, to any person obtaining a copy of this software
    34  * and associated documentation files (the "Software"), to deal in
    35  * the Software without restriction, including without limitation
    36  * the rights to use, copy, modify, merge, publish, distribute,
    37  * sublicense, and/or sell copies of the Software, and to permit
    38  * persons to whom the Software is furnished to do so, subject to
    39  * the following conditions:
    40  *
    41  * The above copyright notice and this permission notice shall be
    42  * included in all copies or substantial portions of the Software.
    43  *
    44  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    45  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
    46  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    47  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
    48  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
    49  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    50  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
    51  * OTHER DEALINGS IN THE SOFTWARE.
    52  */
    53 
    5430#include "common/hashmap.h"
    5531
    5632namespace Common {
    5733
    58 // const char *:
     34// Hash function for strings, taken from CPython.
    5935uint hashit(const char *p) {
    60         uint hash = 0;
     36        uint hash = *p << 7;
    6137        byte c;
    62         while ((c = *p++))
    63                 hash = (hash * 31 + c);
    64         return hash;
     38        int size = 0;
     39        while ((c = *p++)) {
     40                hash = (1000003 * hash) ^ c;
     41                size++;
     42        }
     43        return hash ^ size;
    6544}
    6645
     46// Like hashit, but converts every char to lowercase before hashing.
    6747uint hashit_lower(const char *p) {
    68         uint hash = 0;
     48        uint hash = tolower(*p) << 7;
    6949        byte c;
    70         while ((c = *p++))
    71                 hash = (hash * 31 + tolower(c));
    72         return hash;
     50        int size = 0;
     51        while ((c = *p++)) {
     52                hash = (1000003 * hash) ^ tolower(c);
     53                size++;
     54        }
     55        return hash ^ size;
    7356}
    7457
    75 // The following table is taken from the GNU ISO C++ Library's hashtable.h file.
    76 static const uint primes[] = {
    77         53ul,         97ul,         193ul,       389ul,       769ul,
    78         1543ul,       3079ul,       6151ul,      12289ul,     24593ul,
    79         49157ul,      98317ul,      196613ul,    393241ul,    786433ul,
    80         1572869ul,    3145739ul,    6291469ul,   12582917ul,  25165843ul,
    81         50331653ul,   100663319ul,  201326611ul, 402653189ul, 805306457ul,
    82         1610612741ul, 3221225473ul, 4294967291ul
    83 };
    84 
    85 uint nextTableSize(uint x) {
    86         int i = 0;
    87         while (x >= primes[i])
    88                 i++;
    89         return primes[i];
    90 }
    91 
    9258#ifdef DEBUG_HASH_COLLISIONS
    9359static double
    9460        g_collisions = 0,
     
    9864        g_size = 0;
    9965static int g_max_capacity = 0, g_max_size = 0;
    10066static int g_totalHashmaps = 0;
     67static int g_stats[4] = {0,0,0,0};
    10168
    10269void updateHashCollisionStats(int collisions, int lookups, int arrsize, int nele) {
    10370        g_collisions += collisions;
     
    10875        g_size += nele;
    10976        g_totalHashmaps++;
    11077       
     78        if (3*nele <= 2*8)
     79                g_stats[0]++;
     80        if (3*nele <= 2*16)
     81                g_stats[1]++;
     82        if (3*nele <= 2*32)
     83                g_stats[2]++;
     84        if (3*nele <= 2*64)
     85                g_stats[3]++;
     86       
    11187        g_max_capacity = MAX(g_max_capacity, arrsize);
    11288        g_max_size = MAX(g_max_size, nele);
    11389
     
    11894                100 * g_collPerLook / g_totalHashmaps,
    11995                g_size / g_totalHashmaps, g_max_size,
    12096                g_capacity / g_totalHashmaps, g_max_capacity);
     97        fprintf(stdout, "  %d less than %d; %d less than %d; %d less than %d; %d less than %d\n",
     98                        g_stats[0], 2*8/3,
     99                        g_stats[1],2*16/3,
     100                        g_stats[2],2*32/3,
     101                        g_stats[3],2*64/3);
     102
     103        // TODO:
     104        // * Should record the maximal size of the map during its lifetime, not that at its death
     105        // * Should do some statistics: how many maps are less than 2/3*8, 2/3*16, 2/3*32, ...
    121106}
    122107#endif
    123108
  • common/memorypool.cpp

     
    2828
    2929namespace Common {
    3030
    31 static const size_t CHUNK_PAGE_SIZE = 32;
    32 
    33 void* MemoryPool::allocPage() {
    34         void* result = ::malloc(CHUNK_PAGE_SIZE * _chunkSize);
    35         _pages.push_back(result);
    36         void* current = result;
    37         for (size_t i = 1; i < CHUNK_PAGE_SIZE; ++i) {
    38                 void* next    = ((char*)current + _chunkSize);
    39                 *(void**)current = next;
    40 
    41                 current = next;
    42         }
    43         *(void**)current = NULL;
    44         return result;
    45 }
    46 
    4731MemoryPool::MemoryPool(size_t chunkSize) {
    4832        // You must at least fit the pointer in the node (technically unneeded considering the next rounding statement)
    4933        _chunkSize = MAX(chunkSize, sizeof(void*));
     
    5236        _chunkSize = (_chunkSize + sizeof(void*) - 1) & (~(sizeof(void*) - 1));
    5337
    5438        _next = NULL;
     39       
     40        _chunksPerPage = 32;
    5541}
    5642
    5743MemoryPool::~MemoryPool() {
    58         for (size_t i = 0; i<_pages.size(); ++i)
    59                 ::free(_pages[i]);
     44        for (size_t i = 0; i < _pages.size(); ++i)
     45                ::free(_pages[i].start);
    6046}
    6147
    62 void* MemoryPool::malloc() {
    63 #if 1
    64         if (!_next)
    65                 _next = allocPage();
     48void MemoryPool::allocPage() {
     49        Page page;     
     50       
     51        // Allocate a new page
     52        page.numChunks = _chunksPerPage;
     53        page.start = ::malloc(page.numChunks * _chunkSize);
     54        assert(page.start);
     55        _pages.push_back(page);
     56       
     57        // Next time, we'll alocate a page twice as big as this one.
     58        _chunksPerPage *= 2;
     59       
     60        // Add the page to the pool of free chunk
     61        addPageToPool(page);
     62}
    6663
    67         void* result = _next;
     64void MemoryPool::addPageToPool(const Page &page) {
     65        assert(_next == 0);
     66
     67        // Add all chunks of the new page to the linked list (pool) of free chunks
     68        void *current = page.start;
     69        for (size_t i = 1; i < page.numChunks; ++i) {
     70                void *next    = ((char*)current + _chunkSize);
     71                *(void **)current = next;
     72
     73                current = next;
     74        }
     75       
     76        // Last chunk points to the old _next
     77        *(void**)current = _next;
     78
     79        // From now on, the first free chunk is the first chunk of the new page
     80        _next = page.start;
     81}
     82
     83void *MemoryPool::malloc() {
     84        if (!_next)     // No free chunks left? Allocate a new page
     85                allocPage();
     86
     87        assert(_next);
     88        void *result = _next;
    6889        _next = *(void**)result;
    6990        return result;
    70 #else
    71         return ::malloc(_chunkSize);
    72 #endif
    7391}
    7492
    7593void MemoryPool::free(void* ptr) {
    76 #if 1
     94        // Add the chunk back to (the start of) the list of free chunks
    7795        *(void**)ptr = _next;
    7896        _next = ptr;
    79 #else
    80         ::free(ptr);
    81 #endif
    8297}
    8398
    8499// Technically not compliant C++ to compare unrelated pointers. In practice...
    85 bool MemoryPool::isPointerInPage(void* ptr, void* page) {
    86         return (ptr >= page) && (ptr < (char*)page + CHUNK_PAGE_SIZE * _chunkSize);
     100bool MemoryPool::isPointerInPage(void *ptr, const Page &page) {
     101        return (ptr >= page.start) && (ptr < (char*)page.start + page.numChunks * _chunkSize);
    87102}
    88103
    89104void MemoryPool::freeUnusedPages() {
     
    94109                numberOfFreeChunksPerPage[i] = 0;
    95110        }
    96111
    97         void* iterator = _next;
     112        // Compute for each page how many chunks in it are still in use.
     113        void *iterator = _next;
    98114        while (iterator) {
    99                 // This should be a binary search
     115                // TODO: This should be a binary search (requires us to keep _pages sorted)
    100116                for (size_t i = 0; i < _pages.size(); ++i) {
    101117                        if (isPointerInPage(iterator, _pages[i])) {
    102118                                ++numberOfFreeChunksPerPage[i];
     
    106122                iterator = *(void**)iterator;
    107123        }
    108124
     125        // Free all pages which are not in use.
     126        // TODO: Might want to reset _chunksPerPage here (e.g. to the larges
     127        //      _pages[i].numChunks value still in use).
    109128        size_t freedPagesCount = 0;
    110         for (size_t i = 0; i < _pages.size(); ++i) {
    111                 if (numberOfFreeChunksPerPage[i] == CHUNK_PAGE_SIZE) {
    112                         ::free(_pages[i]);
    113                         _pages[i] = NULL; // TODO : Remove NULL values
     129        for (size_t i = 0; i < _pages.size(); )  {
     130                if (numberOfFreeChunksPerPage[i] == _pages[i].numChunks) {
     131                        ::free(_pages[i].start);
    114132                        ++freedPagesCount;
     133                        _pages.remove_at(i);
     134                        // We just removed an entry, so we do not advance "i"
     135                } else {
     136                        ++i;
    115137                }
    116138        }
    117139
  • common/hashmap.h

     
    2424 */
    2525
    2626// The hash map (associative array) implementation in this file is
    27 // based on code by Andrew Y. Ng, 1996:
     27// based on the PyDict implementation of CPython. The erase() method
     28// is based on example code in the Wikipedia article on Hash tables.
    2829
    29 /*
    30  * Copyright (c) 1998-2003 Massachusetts Institute of Technology.
    31  * This code was developed as part of the Haystack research project
    32  * (http://haystack.lcs.mit.edu/). Permission is hereby granted,
    33  * free of charge, to any person obtaining a copy of this software
    34  * and associated documentation files (the "Software"), to deal in
    35  * the Software without restriction, including without limitation
    36  * the rights to use, copy, modify, merge, publish, distribute,
    37  * sublicense, and/or sell copies of the Software, and to permit
    38  * persons to whom the Software is furnished to do so, subject to
    39  * the following conditions:
    40  *
    41  * The above copyright notice and this permission notice shall be
    42  * included in all copies or substantial portions of the Software.
    43  *
    44  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    45  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
    46  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    47  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
    48  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
    49  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    50  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
    51  * OTHER DEALINGS IN THE SOFTWARE.
    52  */
    53 
    5430#ifndef COMMON_HASHMAP_H
    5531#define COMMON_HASHMAP_H
    5632
     
    7450
    7551namespace Common {
    7652
    77 // The table sizes ideally are primes. We use a helper function to find
    78 // suitable table sizes.
    79 uint nextTableSize(uint x);
    80 
    81 
    8253// Enable the following #define if you want to check how many collisions the
    8354// code produces (many collisions indicate either a bad hash function, or a
    8455// hash table that is too small).
     
    11384                Node(const Key &key) : _key(key), _value() {}
    11485        };
    11586
     87        enum {
     88                HASHMAP_PERTURB_SHIFT = 5,
     89                HASHMAP_MIN_CAPACITY = 16,
     90               
     91                // The quotient of the next two constants controls how much the
     92                // internal storage of the hashmap may fill up before being
     93                // increased automatically.
     94                // Note: the quotient of these two must be between and different
     95                // from 0 and 1.
     96                HASHMAP_LOADFACTOR_NUMERATOR = 2,
     97                HASHMAP_LOADFACTOR_DENOMINATOR = 3
     98        };
    11699
     100
    117101#ifdef USE_HASHMAP_MEMORY_POOL
    118         MemoryPool _nodePool;
     102        FixedSizeMemoryPool<sizeof(Node), HASHMAP_MIN_CAPACITY> _nodePool;
    119103
    120104        Node *allocNode(const Key &key) {
    121105                void* mem = _nodePool.malloc();
     
    137121#endif
    138122
    139123        Node **_storage;        // hashtable of size arrsize.
    140         uint _capacity;
     124        uint _mask;             /**< Capacity of the HashMap minus one; must be a power of two of minus one */
    141125        uint _size;
    142126
    143127        HashFunc _hash;
     
    153137        void assign(const HM_t &map);
    154138        int lookup(const Key &key) const;
    155139        int lookupAndCreateIfMissing(const Key &key);
    156         void expand_array(uint newsize);
     140        void expandStorage(uint newCapacity);
    157141
    158142        template<class T> friend class IteratorImpl;
    159143
     
    175159
    176160                NodeType *deref() const {
    177161                        assert(_hashmap != 0);
    178                         assert(_idx < _hashmap->_capacity);
     162                        assert(_idx <= _hashmap->_mask);
    179163                        Node *node = _hashmap->_storage[_idx];
    180164                        assert(node != 0);
    181165                        return node;
     
    196180                        assert(_hashmap);
    197181                        do {
    198182                                _idx++;
    199                         } while (_idx < _hashmap->_capacity && _hashmap->_storage[_idx] == 0);
    200                         if (_idx >= _hashmap->_capacity)
     183                        } while (_idx <= _hashmap->_mask && _hashmap->_storage[_idx] == 0);
     184                        if (_idx > _hashmap->_mask)
    201185                                _idx = (uint)-1;
    202186
    203187                        return *this;
     
    247231
    248232        iterator        begin() {
    249233                // Find and return the _key non-empty entry
    250                 for (uint ctr = 0; ctr < _capacity; ++ctr) {
     234                for (uint ctr = 0; ctr <= _mask; ++ctr) {
    251235                        if (_storage[ctr])
    252236                                return iterator(ctr, this);
    253237                }
     
    259243
    260244        const_iterator  begin() const {
    261245                // Find and return the first non-empty entry
    262                 for (uint ctr = 0; ctr < _capacity; ++ctr) {
     246                for (uint ctr = 0; ctr <= _mask; ++ctr) {
    263247                        if (_storage[ctr])
    264248                                return const_iterator(ctr, this);
    265249                }
     
    298282 */
    299283template<class Key, class Val, class HashFunc, class EqualFunc>
    300284HashMap<Key, Val, HashFunc, EqualFunc>::HashMap() :
    301 #ifdef USE_HASHMAP_MEMORY_POOL
    302         _nodePool(sizeof(Node)),
    303 #endif
    304285        _defaultVal() {
    305         _capacity = nextTableSize(0);
    306         _storage = new Node *[_capacity];
     286        _mask = HASHMAP_MIN_CAPACITY - 1;
     287        _storage = new Node *[HASHMAP_MIN_CAPACITY];
    307288        assert(_storage != NULL);
    308         memset(_storage, 0, _capacity * sizeof(Node *));
     289        memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *));
    309290
    310291        _size = 0;
    311292
     
    322303 */
    323304template<class Key, class Val, class HashFunc, class EqualFunc>
    324305HashMap<Key, Val, HashFunc, EqualFunc>::HashMap(const HM_t &map) :
    325 #ifdef USE_HASHMAP_MEMORY_POOL
    326         _nodePool(sizeof(Node)),
    327 #endif
    328306        _defaultVal()  {
    329307        assign(map);
    330308}
     
    334312 */
    335313template<class Key, class Val, class HashFunc, class EqualFunc>
    336314HashMap<Key, Val, HashFunc, EqualFunc>::~HashMap() {
    337         for (uint ctr = 0; ctr < _capacity; ++ctr)
     315        for (uint ctr = 0; ctr <= _mask; ++ctr)
    338316                if (_storage[ctr] != NULL)
    339317                  freeNode(_storage[ctr]);
    340318
    341319        delete[] _storage;
    342320#ifdef DEBUG_HASH_COLLISIONS
    343321        extern void updateHashCollisionStats(int, int, int, int);
    344         updateHashCollisionStats(_collisions, _lookups, _capacity, _size);
     322        updateHashCollisionStats(_collisions, _lookups, _mask+1, _size);
    345323#endif
    346324}
    347325
     
    354332 */
    355333template<class Key, class Val, class HashFunc, class EqualFunc>
    356334void HashMap<Key, Val, HashFunc, EqualFunc>::assign(const HM_t &map) {
    357         _capacity = map._capacity;
    358         _storage = new Node *[_capacity];
     335        _mask = map._mask;
     336        _storage = new Node *[_mask+1];
    359337        assert(_storage != NULL);
    360         memset(_storage, 0, _capacity * sizeof(Node *));
     338        memset(_storage, 0, (_mask+1) * sizeof(Node *));
    361339
    362340        // Simply clone the map given to us, one by one.
    363341        _size = 0;
    364         for (uint ctr = 0; ctr < _capacity; ++ctr) {
     342        for (uint ctr = 0; ctr <= _mask; ++ctr) {
    365343                if (map._storage[ctr] != NULL) {
    366344                        _storage[ctr] = allocNode(map._storage[ctr]->_key);
    367345                        _storage[ctr]->_value = map._storage[ctr]->_value;
     
    375353
    376354template<class Key, class Val, class HashFunc, class EqualFunc>
    377355void HashMap<Key, Val, HashFunc, EqualFunc>::clear(bool shrinkArray) {
    378         for (uint ctr = 0; ctr < _capacity; ++ctr) {
     356        for (uint ctr = 0; ctr <= _mask; ++ctr) {
    379357                if (_storage[ctr] != NULL) {
    380358                        freeNode(_storage[ctr]);
    381359                        _storage[ctr] = NULL;
    382360                }
    383361        }
    384362
    385         if (shrinkArray && _capacity > nextTableSize(0)) {
     363#ifdef USE_HASHMAP_MEMORY_POOL
     364        _nodePool.freeUnusedPages();
     365#endif
     366
     367        if (shrinkArray && _mask >= HASHMAP_MIN_CAPACITY) {
    386368                delete[] _storage;
    387369
    388                 _capacity = nextTableSize(0);
    389                 _storage = new Node *[_capacity];
     370                _mask = HASHMAP_MIN_CAPACITY;
     371                _storage = new Node *[HASHMAP_MIN_CAPACITY];
    390372                assert(_storage != NULL);
    391                 memset(_storage, 0, _capacity * sizeof(Node *));
     373                memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *));
    392374        }
    393375
    394376        _size = 0;
    395377}
    396378
    397379template<class Key, class Val, class HashFunc, class EqualFunc>
    398 void HashMap<Key, Val, HashFunc, EqualFunc>::expand_array(uint newsize) {
    399         assert(newsize > _capacity);
    400         uint ctr, dex;
     380void HashMap<Key, Val, HashFunc, EqualFunc>::expandStorage(uint newCapacity) {
     381        assert(newCapacity > _mask+1);
    401382
    402383        const uint old_size = _size;
    403         const uint old_capacity = _capacity;
     384        const uint old_mask = _mask;
    404385        Node **old_storage = _storage;
    405386
    406387        // allocate a new array
    407388        _size = 0;
    408         _capacity = newsize;
    409         _storage = new Node *[_capacity];
     389        _mask = newCapacity - 1;
     390        _storage = new Node *[newCapacity];
    410391        assert(_storage != NULL);
    411         memset(_storage, 0, _capacity * sizeof(Node *));
     392        memset(_storage, 0, newCapacity * sizeof(Node *));
    412393
    413394        // rehash all the old elements
    414         for (ctr = 0; ctr < old_capacity; ++ctr) {
     395        for (uint ctr = 0; ctr <= old_mask; ++ctr) {
    415396                if (old_storage[ctr] == NULL)
    416397                        continue;
    417398
     
    419400                // Since we know that no key exists twice in the old table, we
    420401                // can do this slightly better than by calling lookup, since we
    421402                // don't have to call _equal().
    422                 dex = _hash(old_storage[ctr]->_key) % _capacity;
    423                 while (_storage[dex] != NULL) {
    424                         dex = (dex + 1) % _capacity;
     403                const uint hash = _hash(old_storage[ctr]->_key);
     404                uint idx = hash & _mask;
     405                for (uint perturb = hash; _storage[idx] != NULL; perturb >>= HASHMAP_PERTURB_SHIFT) {
     406                        idx = (5 * idx + perturb + 1) & _mask;
    425407                }
    426408
    427                 _storage[dex] = old_storage[ctr];
     409                _storage[idx] = old_storage[ctr];
    428410                _size++;
    429411        }
    430412
     
    439421
    440422template<class Key, class Val, class HashFunc, class EqualFunc>
    441423int HashMap<Key, Val, HashFunc, EqualFunc>::lookup(const Key &key) const {
    442         uint ctr = _hash(key) % _capacity;
     424        const uint hash = _hash(key);
     425        uint ctr = hash & _mask;
     426        for (uint perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) {
     427                if (_storage[ctr] == NULL || _equal(_storage[ctr]->_key, key))
     428                        break;
    443429
    444         while (_storage[ctr] != NULL && !_equal(_storage[ctr]->_key, key)) {
    445                 ctr = (ctr + 1) % _capacity;
     430                ctr = (5 * ctr + perturb + 1) & _mask;
    446431
    447432#ifdef DEBUG_HASH_COLLISIONS
    448433                _collisions++;
     
    453438        _lookups++;
    454439        fprintf(stderr, "collisions %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d\n",
    455440                _collisions, _lookups, ((double) _collisions / (double)_lookups),
    456                 (const void *)this, _capacity, _size);
     441                (const void *)this, _mask+1, _size);
    457442#endif
    458443
    459444        return ctr;
     
    467452                _storage[ctr] = allocNode(key);
    468453                _size++;
    469454
    470                 // Keep the load factor below 75%.
    471                 if (_size > _capacity * 75 / 100) {
    472                         expand_array(nextTableSize(_capacity));
     455                // Keep the load factor below a certain threshold.
     456                uint capacity = _mask + 1;
     457                if (_size * HASHMAP_LOADFACTOR_DENOMINATOR > capacity * HASHMAP_LOADFACTOR_NUMERATOR) {
     458                        capacity = capacity < 500 ? (capacity * 4) : (capacity * 2);
     459                        expandStorage(capacity);
    473460                        ctr = lookup(key);
    474461                }
    475462        }
     
    520507template<class Key, class Val, class HashFunc, class EqualFunc>
    521508void HashMap<Key, Val, HashFunc, EqualFunc>::erase(const Key &key) {
    522509        // This is based on code in the Wikipedia article on Hash tables.
    523         uint i = lookup(key);
     510
     511        const uint hash = _hash(key);
     512        uint i = hash & _mask;
     513        uint perturb;
     514
     515        for (perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) {
     516                if (_storage[i] == NULL || _equal(_storage[i]->_key, key))
     517                        break;
     518
     519                i = (5 * i + perturb + 1) & _mask;
     520        }
     521
    524522        if (_storage[i] == NULL)
    525523                return; // key wasn't present, so no work has to be done
     524
    526525        // If we remove a key, we must check all subsequent keys and possibly
    527526        // reinsert them.
    528527        uint j = i;
    529528        freeNode(_storage[i]);
    530529        _storage[i] = NULL;
    531         while (true) {
     530        for (perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) {
    532531                // Look at the next table slot
    533                 j = (j + 1) % _capacity;
     532                j = (5 * j + perturb + 1) & _mask;
    534533                // If the next slot is empty, we are done
    535534                if (_storage[j] == NULL)
    536535                        break;
    537536                // Compute the slot where the content of the next slot should normally be,
    538537                // assuming an empty table, and check whether we have to move it.
    539                 uint k = _hash(_storage[j]->_key) % _capacity;
     538                uint k = _hash(_storage[j]->_key) & _mask;
    540539                if ((j > i && (k <= i || k > j)) ||
    541540                    (j < i && (k <= i && k > j)) ) {
    542541                        _storage[i] = _storage[j];