/home/mdboom/Work/builds/cpython/Python/hashtable.c
| Line | Count | Source (jump to first uncovered line) | 
| 1 | /* The implementation of the hash table (_Py_hashtable_t) is based on the | 
| 2 |    cfuhash project: | 
| 3 |    http://sourceforge.net/projects/libcfu/ | 
| 4 |  | 
| 5 |    Copyright of cfuhash: | 
| 6 |    ---------------------------------- | 
| 7 |    Creation date: 2005-06-24 21:22:40 | 
| 8 |    Authors: Don | 
| 9 |    Change log: | 
| 10 |  | 
| 11 |    Copyright (c) 2005 Don Owens | 
| 12 |    All rights reserved. | 
| 13 |  | 
| 14 |    This code is released under the BSD license: | 
| 15 |  | 
| 16 |    Redistribution and use in source and binary forms, with or without | 
| 17 |    modification, are permitted provided that the following conditions | 
| 18 |    are met: | 
| 19 |  | 
| 20 |      * Redistributions of source code must retain the above copyright | 
| 21 |        notice, this list of conditions and the following disclaimer. | 
| 22 |  | 
| 23 |      * Redistributions in binary form must reproduce the above | 
| 24 |        copyright notice, this list of conditions and the following | 
| 25 |        disclaimer in the documentation and/or other materials provided | 
| 26 |        with the distribution. | 
| 27 |  | 
| 28 |      * Neither the name of the author nor the names of its | 
| 29 |        contributors may be used to endorse or promote products derived | 
| 30 |        from this software without specific prior written permission. | 
| 31 |  | 
| 32 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
| 33 |    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
| 34 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | 
| 35 |    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | 
| 36 |    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | 
| 37 |    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | 
| 38 |    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | 
| 39 |    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 
| 40 |    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | 
| 41 |    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
| 42 |    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | 
| 43 |    OF THE POSSIBILITY OF SUCH DAMAGE. | 
| 44 |    ---------------------------------- | 
| 45 | */ | 
| 46 |  | 
| 47 | #include "Python.h" | 
| 48 | #include "pycore_hashtable.h" | 
| 49 |  | 
| 50 | #define HASHTABLE_MIN_SIZE 16 | 
| 51 | #define HASHTABLE_HIGH 0.50 | 
| 52 | #define HASHTABLE_LOW 0.10 | 
| 53 | #define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH) | 
| 54 |  | 
| 55 | #define BUCKETS_HEAD(SLIST) \ | 
| 56 |         ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST))) | 
| 57 | #define TABLE_HEAD(HT, BUCKET) \ | 
| 58 |         ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET])) | 
| 59 | #define ENTRY_NEXT(ENTRY) \ | 
| 60 |         ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY)) | 
| 61 |  | 
| 62 | /* Forward declaration */ | 
| 63 | static int hashtable_rehash(_Py_hashtable_t *ht); | 
| 64 |  | 
| 65 | static void | 
| 66 | _Py_slist_init(_Py_slist_t *list) | 
| 67 | { | 
| 68 |     list->head = NULL; | 
| 69 | } | 
| 70 |  | 
| 71 |  | 
| 72 | static void | 
| 73 | _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item) | 
| 74 | { | 
| 75 |     item->next = list->head; | 
| 76 |     list->head = item; | 
| 77 | } | 
| 78 |  | 
| 79 |  | 
| 80 | static void | 
| 81 | _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, | 
| 82 |                  _Py_slist_item_t *item) | 
| 83 | { | 
| 84 |     if (previous != NULL)   Branch (84:9): [True: 35.1k, False: 943k]
 | 
| 85 |         previous->next = item->next; | 
| 86 |     else | 
| 87 |         list->head = item->next; | 
| 88 | } | 
| 89 |  | 
| 90 |  | 
| 91 | Py_uhash_t | 
| 92 | _Py_hashtable_hash_ptr(const void *key) | 
| 93 | { | 
| 94 |     return (Py_uhash_t)_Py_HashPointerRaw(key); | 
| 95 | } | 
| 96 |  | 
| 97 |  | 
| 98 | int | 
| 99 | _Py_hashtable_compare_direct(const void *key1, const void *key2) | 
| 100 | { | 
| 101 |     return (key1 == key2); | 
| 102 | } | 
| 103 |  | 
| 104 |  | 
| 105 | /* makes sure the real size of the buckets array is a power of 2 */ | 
| 106 | static size_t | 
| 107 | round_size(size_t s) | 
| 108 | { | 
| 109 |     size_t i; | 
| 110 |     if (s < HASHTABLE_MIN_SIZE)   Branch (110:9): [True: 239, False: 562]
 | 
| 111 |         return HASHTABLE_MIN_SIZE; | 
| 112 |     i = 1; | 
| 113 |     while (i < s)   Branch (113:12): [True: 3.86k, False: 562]
 | 
| 114 |         i <<= 1; | 
| 115 |     return i; | 
| 116 | } | 
| 117 |  | 
| 118 |  | 
| 119 | size_t | 
| 120 | _Py_hashtable_size(const _Py_hashtable_t *ht) | 
| 121 | { | 
| 122 |     size_t size = sizeof(_Py_hashtable_t); | 
| 123 |     /* buckets */ | 
| 124 |     size += ht->nbuckets * sizeof(_Py_hashtable_entry_t *); | 
| 125 |     /* entries */ | 
| 126 |     size += ht->nentries * sizeof(_Py_hashtable_entry_t); | 
| 127 |     return size; | 
| 128 | } | 
| 129 |  | 
| 130 |  | 
| 131 | _Py_hashtable_entry_t * | 
| 132 | _Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *key) | 
| 133 | { | 
| 134 |     Py_uhash_t key_hash = ht->hash_func(key); | 
| 135 |     size_t index = key_hash & (ht->nbuckets - 1); | 
| 136 |     _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index); | 
| 137 |     while (1) {  Branch (137:12): [Folded - Ignored]
 | 
| 138 |         if (entry == NULL) {  Branch (138:13): [True: 12.6k, False: 4.45M]
 | 
| 139 |             return NULL; | 
| 140 |         } | 
| 141 |         if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)4.41M) {   Branch (141:13): [True: 4.41M, False: 48.4k]
  Branch (141:44): [True: 4.41M, False: 0]
 | 
| 142 |             break; | 
| 143 |         } | 
| 144 |         entry = ENTRY_NEXT(entry); | 
| 145 |     } | 
| 146 |     return entry; | 
| 147 | } | 
| 148 |  | 
| 149 |  | 
| 150 | // Specialized for: | 
| 151 | // hash_func == _Py_hashtable_hash_ptr | 
| 152 | // compare_func == _Py_hashtable_compare_direct | 
| 153 | static _Py_hashtable_entry_t * | 
| 154 | _Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *key) | 
| 155 | { | 
| 156 |     Py_uhash_t key_hash = _Py_hashtable_hash_ptr(key); | 
| 157 |     size_t index = key_hash & (ht->nbuckets - 1); | 
| 158 |     _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index); | 
| 159 |     while (1) {  Branch (159:12): [Folded - Ignored]
 | 
| 160 |         if (entry == NULL) {  Branch (160:13): [True: 1.00M, False: 930k]
 | 
| 161 |             return NULL; | 
| 162 |         } | 
| 163 |         // Compare directly keys (ignore entry->key_hash) | 
| 164 |         if (entry->key == key) {  Branch (164:13): [True: 653k, False: 277k]
 | 
| 165 |             break; | 
| 166 |         } | 
| 167 |         entry = ENTRY_NEXT(entry); | 
| 168 |     } | 
| 169 |     return entry; | 
| 170 | } | 
| 171 |  | 
| 172 |  | 
| 173 | void* | 
| 174 | _Py_hashtable_steal(_Py_hashtable_t *ht, const void *key) | 
| 175 | { | 
| 176 |     Py_uhash_t key_hash = ht->hash_func(key); | 
| 177 |     size_t index = key_hash & (ht->nbuckets - 1); | 
| 178 |  | 
| 179 |     _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index); | 
| 180 |     _Py_hashtable_entry_t *previous = NULL; | 
| 181 |     while (1) {  Branch (181:12): [Folded - Ignored]
 | 
| 182 |         if (entry == NULL) {  Branch (182:13): [True: 15.2k, False: 1.01M]
 | 
| 183 |             // not found | 
| 184 |             return NULL; | 
| 185 |         } | 
| 186 |         if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)978k) {   Branch (186:13): [True: 978k, False: 38.7k]
  Branch (186:44): [True: 978k, False: 0]
 | 
| 187 |             break; | 
| 188 |         } | 
| 189 |         previous = entry; | 
| 190 |         entry = ENTRY_NEXT(entry); | 
| 191 |     } | 
| 192 |  | 
| 193 |     _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous, | 
| 194 |                      (_Py_slist_item_t *)entry); | 
| 195 |     ht->nentries--; | 
| 196 |  | 
| 197 |     void *value = entry->value; | 
| 198 |     ht->alloc.free(entry); | 
| 199 |  | 
| 200 |     if ((float)ht->nentries / (float)ht->nbuckets < HASHTABLE_LOW) {  Branch (200:9): [True: 141, False: 978k]
 | 
| 201 |         // Ignore failure: error cannot be reported to the caller | 
| 202 |         hashtable_rehash(ht); | 
| 203 |     } | 
| 204 |     return value; | 
| 205 | } | 
| 206 |  | 
| 207 |  | 
| 208 | int | 
| 209 | _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value) | 
| 210 | { | 
| 211 |     _Py_hashtable_entry_t *entry; | 
| 212 |  | 
| 213 | #ifndef NDEBUG | 
| 214 |     /* Don't write the assertion on a single line because it is interesting | 
| 215 |        to know the duplicated entry if the assertion failed. The entry can | 
| 216 |        be read using a debugger. */ | 
| 217 |     entry = ht->get_entry_func(ht, key); | 
| 218 |     assert(entry == NULL); | 
| 219 | #endif | 
| 220 |  | 
| 221 |  | 
| 222 |     entry = ht->alloc.malloc(sizeof(_Py_hashtable_entry_t)); | 
| 223 |     if (entry == NULL) {  Branch (223:9): [True: 0, False: 1.01M]
 | 
| 224 |         /* memory allocation failed */ | 
| 225 |         return -1; | 
| 226 |     } | 
| 227 |  | 
| 228 |     entry->key_hash = ht->hash_func(key); | 
| 229 |     entry->key = (void *)key; | 
| 230 |     entry->value = value; | 
| 231 |  | 
| 232 |     ht->nentries++; | 
| 233 |     if ((float)ht->nentries / (float)ht->nbuckets > HASHTABLE_HIGH) {  Branch (233:9): [True: 523, False: 1.01M]
 | 
| 234 |         if (hashtable_rehash(ht) < 0) {  Branch (234:13): [True: 0, False: 523]
 | 
| 235 |             ht->nentries--; | 
| 236 |             ht->alloc.free(entry); | 
| 237 |             return -1; | 
| 238 |         } | 
| 239 |     } | 
| 240 |  | 
| 241 |     size_t index = entry->key_hash & (ht->nbuckets - 1); | 
| 242 |     _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry); | 
| 243 |     return 0; | 
| 244 | } | 
| 245 |  | 
| 246 |  | 
| 247 | void* | 
| 248 | _Py_hashtable_get(_Py_hashtable_t *ht, const void *key) | 
| 249 | { | 
| 250 |     _Py_hashtable_entry_t *entry = ht->get_entry_func(ht, key); | 
| 251 |     if (entry != NULL) {  Branch (251:9): [True: 577k, False: 994k]
 | 
| 252 |         return entry->value; | 
| 253 |     } | 
| 254 |     else { | 
| 255 |         return NULL; | 
| 256 |     } | 
| 257 | } | 
| 258 |  | 
| 259 |  | 
| 260 | int | 
| 261 | _Py_hashtable_foreach(_Py_hashtable_t *ht, | 
| 262 |                       _Py_hashtable_foreach_func func, | 
| 263 |                       void *user_data) | 
| 264 | { | 
| 265 |     for (size_t hv = 0; hv < ht->nbuckets; hv++912) {   Branch (265:25): [True: 912, False: 43]
 | 
| 266 |         _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, hv); | 
| 267 |         while (entry != NULL) {  Branch (267:16): [True: 143, False: 912]
 | 
| 268 |             int res = func(ht, entry->key, entry->value, user_data); | 
| 269 |             if (res) {  Branch (269:17): [True: 0, False: 143]
 | 
| 270 |                 return res; | 
| 271 |             } | 
| 272 |             entry = ENTRY_NEXT(entry); | 
| 273 |         } | 
| 274 |     } | 
| 275 |     return 0; | 
| 276 | } | 
| 277 |  | 
| 278 |  | 
| 279 | static int | 
| 280 | hashtable_rehash(_Py_hashtable_t *ht) | 
| 281 | { | 
| 282 |     size_t new_size = round_size((size_t)(ht->nentries * HASHTABLE_REHASH_FACTOR)); | 
| 283 |     if (new_size == ht->nbuckets) {  Branch (283:9): [True: 194, False: 607]
 | 
| 284 |         return 0; | 
| 285 |     } | 
| 286 |  | 
| 287 |     size_t buckets_size = new_size * sizeof(ht->buckets[0]); | 
| 288 |     _Py_slist_t *new_buckets = ht->alloc.malloc(buckets_size); | 
| 289 |     if (new_buckets == NULL) {  Branch (289:9): [True: 0, False: 607]
 | 
| 290 |         /* memory allocation failed */ | 
| 291 |         return -1; | 
| 292 |     } | 
| 293 |     memset(new_buckets, 0, buckets_size); | 
| 294 |  | 
| 295 |     for (size_t bucket = 0; bucket < ht->nbuckets; bucket++2.01M) {   Branch (295:29): [True: 2.01M, False: 607]
 | 
| 296 |         _Py_hashtable_entry_t *entry = BUCKETS_HEAD(ht->buckets[bucket]); | 
| 297 |         while (entry != NULL) {  Branch (297:16): [True: 475k, False: 2.01M]
 | 
| 298 |             assert(ht->hash_func(entry->key) == entry->key_hash); | 
| 299 |             _Py_hashtable_entry_t *next = ENTRY_NEXT(entry); | 
| 300 |             size_t entry_index = entry->key_hash & (new_size - 1); | 
| 301 |  | 
| 302 |             _Py_slist_prepend(&new_buckets[entry_index], (_Py_slist_item_t*)entry); | 
| 303 |  | 
| 304 |             entry = next; | 
| 305 |         } | 
| 306 |     } | 
| 307 |  | 
| 308 |     ht->alloc.free(ht->buckets); | 
| 309 |     ht->nbuckets = new_size; | 
| 310 |     ht->buckets = new_buckets; | 
| 311 |     return 0; | 
| 312 | } | 
| 313 |  | 
| 314 |  | 
| 315 | _Py_hashtable_t * | 
| 316 | _Py_hashtable_new_full(_Py_hashtable_hash_func hash_func, | 
| 317 |                        _Py_hashtable_compare_func compare_func, | 
| 318 |                        _Py_hashtable_destroy_func key_destroy_func, | 
| 319 |                        _Py_hashtable_destroy_func value_destroy_func, | 
| 320 |                        _Py_hashtable_allocator_t *allocator) | 
| 321 | { | 
| 322 |     _Py_hashtable_allocator_t alloc; | 
| 323 |     if (allocator == NULL) {  Branch (323:9): [True: 2.72k, False: 49]
 | 
| 324 |         alloc.malloc = PyMem_Malloc; | 
| 325 |         alloc.free = PyMem_Free; | 
| 326 |     } | 
| 327 |     else { | 
| 328 |         alloc = *allocator; | 
| 329 |     } | 
| 330 |  | 
| 331 |     _Py_hashtable_t *ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t)); | 
| 332 |     if (ht == NULL) {  Branch (332:9): [True: 0, False: 2.77k]
 | 
| 333 |         return ht; | 
| 334 |     } | 
| 335 |  | 
| 336 |     ht->nbuckets = HASHTABLE_MIN_SIZE; | 
| 337 |     ht->nentries = 0; | 
| 338 |  | 
| 339 |     size_t buckets_size = ht->nbuckets * sizeof(ht->buckets[0]); | 
| 340 |     ht->buckets = alloc.malloc(buckets_size); | 
| 341 |     if (ht->buckets == NULL) {  Branch (341:9): [True: 0, False: 2.77k]
 | 
| 342 |         alloc.free(ht); | 
| 343 |         return NULL; | 
| 344 |     } | 
| 345 |     memset(ht->buckets, 0, buckets_size); | 
| 346 |  | 
| 347 |     ht->get_entry_func = _Py_hashtable_get_entry_generic; | 
| 348 |     ht->hash_func = hash_func; | 
| 349 |     ht->compare_func = compare_func; | 
| 350 |     ht->key_destroy_func = key_destroy_func; | 
| 351 |     ht->value_destroy_func = value_destroy_func; | 
| 352 |     ht->alloc = alloc; | 
| 353 |     if (ht->hash_func == _Py_hashtable_hash_ptr   Branch (353:9): [True: 2.75k, False: 23]
 | 
| 354 |         && ht->compare_func == _Py_hashtable_compare_direct2.75k)   Branch (354:12): [True: 2.75k, False: 0]
 | 
| 355 |     { | 
| 356 |         ht->get_entry_func = _Py_hashtable_get_entry_ptr; | 
| 357 |     } | 
| 358 |     return ht; | 
| 359 | } | 
| 360 |  | 
| 361 |  | 
| 362 | _Py_hashtable_t * | 
| 363 | _Py_hashtable_new(_Py_hashtable_hash_func hash_func, | 
| 364 |                   _Py_hashtable_compare_func compare_func) | 
| 365 | { | 
| 366 |     return _Py_hashtable_new_full(hash_func, compare_func, | 
| 367 |                                   NULL, NULL, NULL); | 
| 368 | } | 
| 369 |  | 
| 370 |  | 
| 371 | static void | 
| 372 | _Py_hashtable_destroy_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry) | 
| 373 | { | 
| 374 |     if (ht->key_destroy_func) {  Branch (374:9): [True: 13.6k, False: 20.1k]
 | 
| 375 |         ht->key_destroy_func(entry->key); | 
| 376 |     } | 
| 377 |     if (ht->value_destroy_func) {  Branch (377:9): [True: 20.1k, False: 13.6k]
 | 
| 378 |         ht->value_destroy_func(entry->value); | 
| 379 |     } | 
| 380 |     ht->alloc.free(entry); | 
| 381 | } | 
| 382 |  | 
| 383 |  | 
| 384 | void | 
| 385 | _Py_hashtable_clear(_Py_hashtable_t *ht) | 
| 386 | { | 
| 387 |     for (size_t i=0; i < ht->nbuckets; i++73.2k) {   Branch (387:22): [True: 73.2k, False: 137]
 | 
| 388 |         _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i); | 
| 389 |         while (entry != NULL) {  Branch (389:16): [True: 20.1k, False: 73.2k]
 | 
| 390 |             _Py_hashtable_entry_t *next = ENTRY_NEXT(entry); | 
| 391 |             _Py_hashtable_destroy_entry(ht, entry); | 
| 392 |             entry = next; | 
| 393 |         } | 
| 394 |         _Py_slist_init(&ht->buckets[i]); | 
| 395 |     } | 
| 396 |     ht->nentries = 0; | 
| 397 |     // Ignore failure: clear function is not expected to fail | 
| 398 |     // because of a memory allocation failure. | 
| 399 |     (void)hashtable_rehash(ht); | 
| 400 | } | 
| 401 |  | 
| 402 |  | 
| 403 | void | 
| 404 | _Py_hashtable_destroy(_Py_hashtable_t *ht) | 
| 405 | { | 
| 406 |     for (size_t i = 0; i < ht->nbuckets; i++57.2k) {   Branch (406:24): [True: 57.2k, False: 2.77k]
 | 
| 407 |         _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i); | 
| 408 |         while (entry) {  Branch (408:16): [True: 13.6k, False: 57.2k]
 | 
| 409 |             _Py_hashtable_entry_t *entry_next = ENTRY_NEXT(entry); | 
| 410 |             _Py_hashtable_destroy_entry(ht, entry); | 
| 411 |             entry = entry_next; | 
| 412 |         } | 
| 413 |     } | 
| 414 |  | 
| 415 |     ht->alloc.free(ht->buckets); | 
| 416 |     ht->alloc.free(ht); | 
| 417 | } |