/home/mdboom/Work/builds/cpython/Python/hashtable.c
Line | Count | Source (jump to first uncovered line) |
1 | /* The implementation of the hash table (_Py_hashtable_t) is based on the |
2 | cfuhash project: |
3 | http://sourceforge.net/projects/libcfu/ |
4 | |
5 | Copyright of cfuhash: |
6 | ---------------------------------- |
7 | Creation date: 2005-06-24 21:22:40 |
8 | Authors: Don |
9 | Change log: |
10 | |
11 | Copyright (c) 2005 Don Owens |
12 | All rights reserved. |
13 | |
14 | This code is released under the BSD license: |
15 | |
16 | Redistribution and use in source and binary forms, with or without |
17 | modification, are permitted provided that the following conditions |
18 | are met: |
19 | |
20 | * Redistributions of source code must retain the above copyright |
21 | notice, this list of conditions and the following disclaimer. |
22 | |
23 | * Redistributions in binary form must reproduce the above |
24 | copyright notice, this list of conditions and the following |
25 | disclaimer in the documentation and/or other materials provided |
26 | with the distribution. |
27 | |
28 | * Neither the name of the author nor the names of its |
29 | contributors may be used to endorse or promote products derived |
30 | from this software without specific prior written permission. |
31 | |
32 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
33 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
34 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
35 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
36 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
37 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
38 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
39 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
40 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
41 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
42 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
43 | OF THE POSSIBILITY OF SUCH DAMAGE. |
44 | ---------------------------------- |
45 | */ |
46 | |
47 | #include "Python.h" |
48 | #include "pycore_hashtable.h" |
49 | |
50 | #define HASHTABLE_MIN_SIZE 16 |
51 | #define HASHTABLE_HIGH 0.50 |
52 | #define HASHTABLE_LOW 0.10 |
53 | #define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH) |
54 | |
55 | #define BUCKETS_HEAD(SLIST) \ |
56 | ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST))) |
57 | #define TABLE_HEAD(HT, BUCKET) \ |
58 | ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET])) |
59 | #define ENTRY_NEXT(ENTRY) \ |
60 | ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY)) |
61 | |
62 | /* Forward declaration */ |
63 | static int hashtable_rehash(_Py_hashtable_t *ht); |
64 | |
65 | static void |
66 | _Py_slist_init(_Py_slist_t *list) |
67 | { |
68 | list->head = NULL; |
69 | } |
70 | |
71 | |
72 | static void |
73 | _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item) |
74 | { |
75 | item->next = list->head; |
76 | list->head = item; |
77 | } |
78 | |
79 | |
80 | static void |
81 | _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, |
82 | _Py_slist_item_t *item) |
83 | { |
84 | if (previous != NULL) Branch (84:9): [True: 35.1k, False: 943k]
|
85 | previous->next = item->next; |
86 | else |
87 | list->head = item->next; |
88 | } |
89 | |
90 | |
91 | Py_uhash_t |
92 | _Py_hashtable_hash_ptr(const void *key) |
93 | { |
94 | return (Py_uhash_t)_Py_HashPointerRaw(key); |
95 | } |
96 | |
97 | |
98 | int |
99 | _Py_hashtable_compare_direct(const void *key1, const void *key2) |
100 | { |
101 | return (key1 == key2); |
102 | } |
103 | |
104 | |
105 | /* makes sure the real size of the buckets array is a power of 2 */ |
106 | static size_t |
107 | round_size(size_t s) |
108 | { |
109 | size_t i; |
110 | if (s < HASHTABLE_MIN_SIZE) Branch (110:9): [True: 239, False: 562]
|
111 | return HASHTABLE_MIN_SIZE; |
112 | i = 1; |
113 | while (i < s) Branch (113:12): [True: 3.86k, False: 562]
|
114 | i <<= 1; |
115 | return i; |
116 | } |
117 | |
118 | |
119 | size_t |
120 | _Py_hashtable_size(const _Py_hashtable_t *ht) |
121 | { |
122 | size_t size = sizeof(_Py_hashtable_t); |
123 | /* buckets */ |
124 | size += ht->nbuckets * sizeof(_Py_hashtable_entry_t *); |
125 | /* entries */ |
126 | size += ht->nentries * sizeof(_Py_hashtable_entry_t); |
127 | return size; |
128 | } |
129 | |
130 | |
131 | _Py_hashtable_entry_t * |
132 | _Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *key) |
133 | { |
134 | Py_uhash_t key_hash = ht->hash_func(key); |
135 | size_t index = key_hash & (ht->nbuckets - 1); |
136 | _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index); |
137 | while (1) { Branch (137:12): [Folded - Ignored]
|
138 | if (entry == NULL) { Branch (138:13): [True: 12.6k, False: 4.45M]
|
139 | return NULL; |
140 | } |
141 | if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)4.41M ) { Branch (141:13): [True: 4.41M, False: 48.4k]
Branch (141:44): [True: 4.41M, False: 0]
|
142 | break; |
143 | } |
144 | entry = ENTRY_NEXT(entry); |
145 | } |
146 | return entry; |
147 | } |
148 | |
149 | |
150 | // Specialized for: |
151 | // hash_func == _Py_hashtable_hash_ptr |
152 | // compare_func == _Py_hashtable_compare_direct |
153 | static _Py_hashtable_entry_t * |
154 | _Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *key) |
155 | { |
156 | Py_uhash_t key_hash = _Py_hashtable_hash_ptr(key); |
157 | size_t index = key_hash & (ht->nbuckets - 1); |
158 | _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index); |
159 | while (1) { Branch (159:12): [Folded - Ignored]
|
160 | if (entry == NULL) { Branch (160:13): [True: 1.00M, False: 930k]
|
161 | return NULL; |
162 | } |
163 | // Compare directly keys (ignore entry->key_hash) |
164 | if (entry->key == key) { Branch (164:13): [True: 653k, False: 277k]
|
165 | break; |
166 | } |
167 | entry = ENTRY_NEXT(entry); |
168 | } |
169 | return entry; |
170 | } |
171 | |
172 | |
173 | void* |
174 | _Py_hashtable_steal(_Py_hashtable_t *ht, const void *key) |
175 | { |
176 | Py_uhash_t key_hash = ht->hash_func(key); |
177 | size_t index = key_hash & (ht->nbuckets - 1); |
178 | |
179 | _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index); |
180 | _Py_hashtable_entry_t *previous = NULL; |
181 | while (1) { Branch (181:12): [Folded - Ignored]
|
182 | if (entry == NULL) { Branch (182:13): [True: 15.2k, False: 1.01M]
|
183 | // not found |
184 | return NULL; |
185 | } |
186 | if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)978k ) { Branch (186:13): [True: 978k, False: 38.7k]
Branch (186:44): [True: 978k, False: 0]
|
187 | break; |
188 | } |
189 | previous = entry; |
190 | entry = ENTRY_NEXT(entry); |
191 | } |
192 | |
193 | _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous, |
194 | (_Py_slist_item_t *)entry); |
195 | ht->nentries--; |
196 | |
197 | void *value = entry->value; |
198 | ht->alloc.free(entry); |
199 | |
200 | if ((float)ht->nentries / (float)ht->nbuckets < HASHTABLE_LOW) { Branch (200:9): [True: 141, False: 978k]
|
201 | // Ignore failure: error cannot be reported to the caller |
202 | hashtable_rehash(ht); |
203 | } |
204 | return value; |
205 | } |
206 | |
207 | |
208 | int |
209 | _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value) |
210 | { |
211 | _Py_hashtable_entry_t *entry; |
212 | |
213 | #ifndef NDEBUG |
214 | /* Don't write the assertion on a single line because it is interesting |
215 | to know the duplicated entry if the assertion failed. The entry can |
216 | be read using a debugger. */ |
217 | entry = ht->get_entry_func(ht, key); |
218 | assert(entry == NULL); |
219 | #endif |
220 | |
221 | |
222 | entry = ht->alloc.malloc(sizeof(_Py_hashtable_entry_t)); |
223 | if (entry == NULL) { Branch (223:9): [True: 0, False: 1.01M]
|
224 | /* memory allocation failed */ |
225 | return -1; |
226 | } |
227 | |
228 | entry->key_hash = ht->hash_func(key); |
229 | entry->key = (void *)key; |
230 | entry->value = value; |
231 | |
232 | ht->nentries++; |
233 | if ((float)ht->nentries / (float)ht->nbuckets > HASHTABLE_HIGH) { Branch (233:9): [True: 523, False: 1.01M]
|
234 | if (hashtable_rehash(ht) < 0) { Branch (234:13): [True: 0, False: 523]
|
235 | ht->nentries--; |
236 | ht->alloc.free(entry); |
237 | return -1; |
238 | } |
239 | } |
240 | |
241 | size_t index = entry->key_hash & (ht->nbuckets - 1); |
242 | _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry); |
243 | return 0; |
244 | } |
245 | |
246 | |
247 | void* |
248 | _Py_hashtable_get(_Py_hashtable_t *ht, const void *key) |
249 | { |
250 | _Py_hashtable_entry_t *entry = ht->get_entry_func(ht, key); |
251 | if (entry != NULL) { Branch (251:9): [True: 577k, False: 994k]
|
252 | return entry->value; |
253 | } |
254 | else { |
255 | return NULL; |
256 | } |
257 | } |
258 | |
259 | |
260 | int |
261 | _Py_hashtable_foreach(_Py_hashtable_t *ht, |
262 | _Py_hashtable_foreach_func func, |
263 | void *user_data) |
264 | { |
265 | for (size_t hv = 0; hv < ht->nbuckets; hv++912 ) { Branch (265:25): [True: 912, False: 43]
|
266 | _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, hv); |
267 | while (entry != NULL) { Branch (267:16): [True: 143, False: 912]
|
268 | int res = func(ht, entry->key, entry->value, user_data); |
269 | if (res) { Branch (269:17): [True: 0, False: 143]
|
270 | return res; |
271 | } |
272 | entry = ENTRY_NEXT(entry); |
273 | } |
274 | } |
275 | return 0; |
276 | } |
277 | |
278 | |
279 | static int |
280 | hashtable_rehash(_Py_hashtable_t *ht) |
281 | { |
282 | size_t new_size = round_size((size_t)(ht->nentries * HASHTABLE_REHASH_FACTOR)); |
283 | if (new_size == ht->nbuckets) { Branch (283:9): [True: 194, False: 607]
|
284 | return 0; |
285 | } |
286 | |
287 | size_t buckets_size = new_size * sizeof(ht->buckets[0]); |
288 | _Py_slist_t *new_buckets = ht->alloc.malloc(buckets_size); |
289 | if (new_buckets == NULL) { Branch (289:9): [True: 0, False: 607]
|
290 | /* memory allocation failed */ |
291 | return -1; |
292 | } |
293 | memset(new_buckets, 0, buckets_size); |
294 | |
295 | for (size_t bucket = 0; bucket < ht->nbuckets; bucket++2.01M ) { Branch (295:29): [True: 2.01M, False: 607]
|
296 | _Py_hashtable_entry_t *entry = BUCKETS_HEAD(ht->buckets[bucket]); |
297 | while (entry != NULL) { Branch (297:16): [True: 475k, False: 2.01M]
|
298 | assert(ht->hash_func(entry->key) == entry->key_hash); |
299 | _Py_hashtable_entry_t *next = ENTRY_NEXT(entry); |
300 | size_t entry_index = entry->key_hash & (new_size - 1); |
301 | |
302 | _Py_slist_prepend(&new_buckets[entry_index], (_Py_slist_item_t*)entry); |
303 | |
304 | entry = next; |
305 | } |
306 | } |
307 | |
308 | ht->alloc.free(ht->buckets); |
309 | ht->nbuckets = new_size; |
310 | ht->buckets = new_buckets; |
311 | return 0; |
312 | } |
313 | |
314 | |
315 | _Py_hashtable_t * |
316 | _Py_hashtable_new_full(_Py_hashtable_hash_func hash_func, |
317 | _Py_hashtable_compare_func compare_func, |
318 | _Py_hashtable_destroy_func key_destroy_func, |
319 | _Py_hashtable_destroy_func value_destroy_func, |
320 | _Py_hashtable_allocator_t *allocator) |
321 | { |
322 | _Py_hashtable_allocator_t alloc; |
323 | if (allocator == NULL) { Branch (323:9): [True: 2.72k, False: 49]
|
324 | alloc.malloc = PyMem_Malloc; |
325 | alloc.free = PyMem_Free; |
326 | } |
327 | else { |
328 | alloc = *allocator; |
329 | } |
330 | |
331 | _Py_hashtable_t *ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t)); |
332 | if (ht == NULL) { Branch (332:9): [True: 0, False: 2.77k]
|
333 | return ht; |
334 | } |
335 | |
336 | ht->nbuckets = HASHTABLE_MIN_SIZE; |
337 | ht->nentries = 0; |
338 | |
339 | size_t buckets_size = ht->nbuckets * sizeof(ht->buckets[0]); |
340 | ht->buckets = alloc.malloc(buckets_size); |
341 | if (ht->buckets == NULL) { Branch (341:9): [True: 0, False: 2.77k]
|
342 | alloc.free(ht); |
343 | return NULL; |
344 | } |
345 | memset(ht->buckets, 0, buckets_size); |
346 | |
347 | ht->get_entry_func = _Py_hashtable_get_entry_generic; |
348 | ht->hash_func = hash_func; |
349 | ht->compare_func = compare_func; |
350 | ht->key_destroy_func = key_destroy_func; |
351 | ht->value_destroy_func = value_destroy_func; |
352 | ht->alloc = alloc; |
353 | if (ht->hash_func == _Py_hashtable_hash_ptr Branch (353:9): [True: 2.75k, False: 23]
|
354 | && ht->compare_func == _Py_hashtable_compare_direct2.75k ) Branch (354:12): [True: 2.75k, False: 0]
|
355 | { |
356 | ht->get_entry_func = _Py_hashtable_get_entry_ptr; |
357 | } |
358 | return ht; |
359 | } |
360 | |
361 | |
362 | _Py_hashtable_t * |
363 | _Py_hashtable_new(_Py_hashtable_hash_func hash_func, |
364 | _Py_hashtable_compare_func compare_func) |
365 | { |
366 | return _Py_hashtable_new_full(hash_func, compare_func, |
367 | NULL, NULL, NULL); |
368 | } |
369 | |
370 | |
371 | static void |
372 | _Py_hashtable_destroy_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry) |
373 | { |
374 | if (ht->key_destroy_func) { Branch (374:9): [True: 13.6k, False: 20.1k]
|
375 | ht->key_destroy_func(entry->key); |
376 | } |
377 | if (ht->value_destroy_func) { Branch (377:9): [True: 20.1k, False: 13.6k]
|
378 | ht->value_destroy_func(entry->value); |
379 | } |
380 | ht->alloc.free(entry); |
381 | } |
382 | |
383 | |
384 | void |
385 | _Py_hashtable_clear(_Py_hashtable_t *ht) |
386 | { |
387 | for (size_t i=0; i < ht->nbuckets; i++73.2k ) { Branch (387:22): [True: 73.2k, False: 137]
|
388 | _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i); |
389 | while (entry != NULL) { Branch (389:16): [True: 20.1k, False: 73.2k]
|
390 | _Py_hashtable_entry_t *next = ENTRY_NEXT(entry); |
391 | _Py_hashtable_destroy_entry(ht, entry); |
392 | entry = next; |
393 | } |
394 | _Py_slist_init(&ht->buckets[i]); |
395 | } |
396 | ht->nentries = 0; |
397 | // Ignore failure: clear function is not expected to fail |
398 | // because of a memory allocation failure. |
399 | (void)hashtable_rehash(ht); |
400 | } |
401 | |
402 | |
403 | void |
404 | _Py_hashtable_destroy(_Py_hashtable_t *ht) |
405 | { |
406 | for (size_t i = 0; i < ht->nbuckets; i++57.2k ) { Branch (406:24): [True: 57.2k, False: 2.77k]
|
407 | _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i); |
408 | while (entry) { Branch (408:16): [True: 13.6k, False: 57.2k]
|
409 | _Py_hashtable_entry_t *entry_next = ENTRY_NEXT(entry); |
410 | _Py_hashtable_destroy_entry(ht, entry); |
411 | entry = entry_next; |
412 | } |
413 | } |
414 | |
415 | ht->alloc.free(ht->buckets); |
416 | ht->alloc.free(ht); |
417 | } |