Line data Source code
1 : /* The implementation of the hash table (_Py_hashtable_t) is based on the
2 : cfuhash project:
3 : http://sourceforge.net/projects/libcfu/
4 :
5 : Copyright of cfuhash:
6 : ----------------------------------
7 : Creation date: 2005-06-24 21:22:40
8 : Authors: Don
9 : Change log:
10 :
11 : Copyright (c) 2005 Don Owens
12 : All rights reserved.
13 :
14 : This code is released under the BSD license:
15 :
16 : Redistribution and use in source and binary forms, with or without
17 : modification, are permitted provided that the following conditions
18 : are met:
19 :
20 : * Redistributions of source code must retain the above copyright
21 : notice, this list of conditions and the following disclaimer.
22 :
23 : * Redistributions in binary form must reproduce the above
24 : copyright notice, this list of conditions and the following
25 : disclaimer in the documentation and/or other materials provided
26 : with the distribution.
27 :
28 : * Neither the name of the author nor the names of its
29 : contributors may be used to endorse or promote products derived
30 : from this software without specific prior written permission.
31 :
32 : THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 : "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 : LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 : FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 : COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 : INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 : (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 : SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 : HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 : STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 : ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 : OF THE POSSIBILITY OF SUCH DAMAGE.
44 : ----------------------------------
45 : */
46 :
47 : #include "Python.h"
48 : #include "pycore_hashtable.h"
49 :
50 : #define HASHTABLE_MIN_SIZE 16
51 : #define HASHTABLE_HIGH 0.50
52 : #define HASHTABLE_LOW 0.10
53 : #define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
54 :
55 : #define BUCKETS_HEAD(SLIST) \
56 : ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
57 : #define TABLE_HEAD(HT, BUCKET) \
58 : ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
59 : #define ENTRY_NEXT(ENTRY) \
60 : ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
61 :
62 : /* Forward declaration */
63 : static int hashtable_rehash(_Py_hashtable_t *ht);
64 :
65 : static void
66 386096 : _Py_slist_init(_Py_slist_t *list)
67 : {
68 386096 : list->head = NULL;
69 386096 : }
70 :
71 :
72 : static void
73 5989680 : _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
74 : {
75 5989680 : item->next = list->head;
76 5989680 : list->head = item;
77 5989680 : }
78 :
79 :
80 : static void
81 1632200 : _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
82 : _Py_slist_item_t *item)
83 : {
84 1632200 : if (previous != NULL)
85 60458 : previous->next = item->next;
86 : else
87 1571740 : list->head = item->next;
88 1632200 : }
89 :
90 :
91 : Py_uhash_t
92 17563800 : _Py_hashtable_hash_ptr(const void *key)
93 : {
94 17563800 : return (Py_uhash_t)_Py_HashPointerRaw(key);
95 : }
96 :
97 :
98 : int
99 1632260 : _Py_hashtable_compare_direct(const void *key1, const void *key2)
100 : {
101 1632260 : return (key1 == key2);
102 : }
103 :
104 :
105 : /* makes sure the real size of the buckets array is a power of 2 */
106 : static size_t
107 30433 : round_size(size_t s)
108 : {
109 : size_t i;
110 30433 : if (s < HASHTABLE_MIN_SIZE)
111 311 : return HASHTABLE_MIN_SIZE;
112 30122 : i = 1;
113 240798 : while (i < s)
114 210676 : i <<= 1;
115 30122 : return i;
116 : }
117 :
118 :
119 : size_t
120 6 : _Py_hashtable_size(const _Py_hashtable_t *ht)
121 : {
122 6 : size_t size = sizeof(_Py_hashtable_t);
123 : /* buckets */
124 6 : size += ht->nbuckets * sizeof(_Py_hashtable_entry_t *);
125 : /* entries */
126 6 : size += ht->nentries * sizeof(_Py_hashtable_entry_t);
127 6 : return size;
128 : }
129 :
130 :
131 : _Py_hashtable_entry_t *
132 9566550 : _Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *key)
133 : {
134 9566550 : Py_uhash_t key_hash = ht->hash_func(key);
135 9566550 : size_t index = key_hash & (ht->nbuckets - 1);
136 9566550 : _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
137 : while (1) {
138 10890800 : if (entry == NULL) {
139 136480 : return NULL;
140 : }
141 10754300 : if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
142 9430070 : break;
143 : }
144 1324280 : entry = ENTRY_NEXT(entry);
145 : }
146 9430070 : return entry;
147 : }
148 :
149 :
150 : // Specialized for:
151 : // hash_func == _Py_hashtable_hash_ptr
152 : // compare_func == _Py_hashtable_compare_direct
153 : static _Py_hashtable_entry_t *
154 10047100 : _Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *key)
155 : {
156 10047100 : Py_uhash_t key_hash = _Py_hashtable_hash_ptr(key);
157 10047100 : size_t index = key_hash & (ht->nbuckets - 1);
158 10047100 : _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
159 : while (1) {
160 13459600 : if (entry == NULL) {
161 6145240 : return NULL;
162 : }
163 : // Compare directly keys (ignore entry->key_hash)
164 7314370 : if (entry->key == key) {
165 3901910 : break;
166 : }
167 3412460 : entry = ENTRY_NEXT(entry);
168 : }
169 3901910 : return entry;
170 : }
171 :
172 :
173 : void*
174 1695770 : _Py_hashtable_steal(_Py_hashtable_t *ht, const void *key)
175 : {
176 1695770 : Py_uhash_t key_hash = ht->hash_func(key);
177 1695770 : size_t index = key_hash & (ht->nbuckets - 1);
178 :
179 1695770 : _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
180 1695770 : _Py_hashtable_entry_t *previous = NULL;
181 : while (1) {
182 1781120 : if (entry == NULL) {
183 : // not found
184 63576 : return NULL;
185 : }
186 1717540 : if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
187 1632200 : break;
188 : }
189 85341 : previous = entry;
190 85341 : entry = ENTRY_NEXT(entry);
191 : }
192 :
193 1632200 : _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
194 : (_Py_slist_item_t *)entry);
195 1632200 : ht->nentries--;
196 :
197 1632200 : void *value = entry->value;
198 1632200 : ht->alloc.free(entry);
199 :
200 1632200 : if ((float)ht->nentries / (float)ht->nbuckets < HASHTABLE_LOW) {
201 : // Ignore failure: error cannot be reported to the caller
202 219 : hashtable_rehash(ht);
203 : }
204 1632200 : return value;
205 : }
206 :
207 :
208 : int
209 3126810 : _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value)
210 : {
211 : _Py_hashtable_entry_t *entry;
212 :
213 : #ifndef NDEBUG
214 : /* Don't write the assertion on a single line because it is interesting
215 : to know the duplicated entry if the assertion failed. The entry can
216 : be read using a debugger. */
217 3126810 : entry = ht->get_entry_func(ht, key);
218 3126810 : assert(entry == NULL);
219 : #endif
220 :
221 :
222 3126810 : entry = ht->alloc.malloc(sizeof(_Py_hashtable_entry_t));
223 3126810 : if (entry == NULL) {
224 : /* memory allocation failed */
225 0 : return -1;
226 : }
227 :
228 3126810 : entry->key_hash = ht->hash_func(key);
229 3126810 : entry->key = (void *)key;
230 3126810 : entry->value = value;
231 :
232 3126810 : ht->nentries++;
233 3126810 : if ((float)ht->nentries / (float)ht->nbuckets > HASHTABLE_HIGH) {
234 30041 : if (hashtable_rehash(ht) < 0) {
235 0 : ht->nentries--;
236 0 : ht->alloc.free(entry);
237 0 : return -1;
238 : }
239 : }
240 :
241 3126810 : size_t index = entry->key_hash & (ht->nbuckets - 1);
242 3126810 : _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
243 3126810 : return 0;
244 : }
245 :
246 :
247 : void*
248 2888120 : _Py_hashtable_get(_Py_hashtable_t *ht, const void *key)
249 : {
250 2888120 : _Py_hashtable_entry_t *entry = ht->get_entry_func(ht, key);
251 2888120 : if (entry != NULL) {
252 1185300 : return entry->value;
253 : }
254 : else {
255 1702820 : return NULL;
256 : }
257 : }
258 :
259 :
260 : int
261 43 : _Py_hashtable_foreach(_Py_hashtable_t *ht,
262 : _Py_hashtable_foreach_func func,
263 : void *user_data)
264 : {
265 955 : for (size_t hv = 0; hv < ht->nbuckets; hv++) {
266 912 : _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, hv);
267 1049 : while (entry != NULL) {
268 137 : int res = func(ht, entry->key, entry->value, user_data);
269 137 : if (res) {
270 0 : return res;
271 : }
272 137 : entry = ENTRY_NEXT(entry);
273 : }
274 : }
275 43 : return 0;
276 : }
277 :
278 :
279 : static int
280 30433 : hashtable_rehash(_Py_hashtable_t *ht)
281 : {
282 30433 : size_t new_size = round_size((size_t)(ht->nentries * HASHTABLE_REHASH_FACTOR));
283 30433 : if (new_size == ht->nbuckets) {
284 239 : return 0;
285 : }
286 :
287 30194 : size_t buckets_size = new_size * sizeof(ht->buckets[0]);
288 30194 : _Py_slist_t *new_buckets = ht->alloc.malloc(buckets_size);
289 30194 : if (new_buckets == NULL) {
290 : /* memory allocation failed */
291 0 : return -1;
292 : }
293 30194 : memset(new_buckets, 0, buckets_size);
294 :
295 7822260 : for (size_t bucket = 0; bucket < ht->nbuckets; bucket++) {
296 7792060 : _Py_hashtable_entry_t *entry = BUCKETS_HEAD(ht->buckets[bucket]);
297 10654900 : while (entry != NULL) {
298 2862870 : assert(ht->hash_func(entry->key) == entry->key_hash);
299 2862870 : _Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
300 2862870 : size_t entry_index = entry->key_hash & (new_size - 1);
301 :
302 2862870 : _Py_slist_prepend(&new_buckets[entry_index], (_Py_slist_item_t*)entry);
303 :
304 2862870 : entry = next;
305 : }
306 : }
307 :
308 30194 : ht->alloc.free(ht->buckets);
309 30194 : ht->nbuckets = new_size;
310 30194 : ht->buckets = new_buckets;
311 30194 : return 0;
312 : }
313 :
314 :
315 : _Py_hashtable_t *
316 15580 : _Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,
317 : _Py_hashtable_compare_func compare_func,
318 : _Py_hashtable_destroy_func key_destroy_func,
319 : _Py_hashtable_destroy_func value_destroy_func,
320 : _Py_hashtable_allocator_t *allocator)
321 : {
322 : _Py_hashtable_allocator_t alloc;
323 15580 : if (allocator == NULL) {
324 15455 : alloc.malloc = PyMem_Malloc;
325 15455 : alloc.free = PyMem_Free;
326 : }
327 : else {
328 125 : alloc = *allocator;
329 : }
330 :
331 15580 : _Py_hashtable_t *ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
332 15580 : if (ht == NULL) {
333 0 : return ht;
334 : }
335 :
336 15580 : ht->nbuckets = HASHTABLE_MIN_SIZE;
337 15580 : ht->nentries = 0;
338 :
339 15580 : size_t buckets_size = ht->nbuckets * sizeof(ht->buckets[0]);
340 15580 : ht->buckets = alloc.malloc(buckets_size);
341 15580 : if (ht->buckets == NULL) {
342 0 : alloc.free(ht);
343 0 : return NULL;
344 : }
345 15580 : memset(ht->buckets, 0, buckets_size);
346 :
347 15580 : ht->get_entry_func = _Py_hashtable_get_entry_generic;
348 15580 : ht->hash_func = hash_func;
349 15580 : ht->compare_func = compare_func;
350 15580 : ht->key_destroy_func = key_destroy_func;
351 15580 : ht->value_destroy_func = value_destroy_func;
352 15580 : ht->alloc = alloc;
353 15580 : if (ht->hash_func == _Py_hashtable_hash_ptr
354 15081 : && ht->compare_func == _Py_hashtable_compare_direct)
355 : {
356 15081 : ht->get_entry_func = _Py_hashtable_get_entry_ptr;
357 : }
358 15580 : return ht;
359 : }
360 :
361 :
362 : _Py_hashtable_t *
363 1 : _Py_hashtable_new(_Py_hashtable_hash_func hash_func,
364 : _Py_hashtable_compare_func compare_func)
365 : {
366 1 : return _Py_hashtable_new_full(hash_func, compare_func,
367 : NULL, NULL, NULL);
368 : }
369 :
370 :
371 : static void
372 1494610 : _Py_hashtable_destroy_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry)
373 : {
374 1494610 : if (ht->key_destroy_func) {
375 1389930 : ht->key_destroy_func(entry->key);
376 : }
377 1494610 : if (ht->value_destroy_func) {
378 104655 : ht->value_destroy_func(entry->value);
379 : }
380 1494610 : ht->alloc.free(entry);
381 1494610 : }
382 :
383 :
384 : void
385 173 : _Py_hashtable_clear(_Py_hashtable_t *ht)
386 : {
387 386269 : for (size_t i=0; i < ht->nbuckets; i++) {
388 386096 : _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
389 480327 : while (entry != NULL) {
390 94231 : _Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
391 94231 : _Py_hashtable_destroy_entry(ht, entry);
392 94231 : entry = next;
393 : }
394 386096 : _Py_slist_init(&ht->buckets[i]);
395 : }
396 173 : ht->nentries = 0;
397 : // Ignore failure: clear function is not expected to fail
398 : // because of a memory allocation failure.
399 173 : (void)hashtable_rehash(ht);
400 173 : }
401 :
402 :
403 : void
404 15580 : _Py_hashtable_destroy(_Py_hashtable_t *ht)
405 : {
406 4135680 : for (size_t i = 0; i < ht->nbuckets; i++) {
407 4120100 : _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
408 5520470 : while (entry) {
409 1400380 : _Py_hashtable_entry_t *entry_next = ENTRY_NEXT(entry);
410 1400380 : _Py_hashtable_destroy_entry(ht, entry);
411 1400380 : entry = entry_next;
412 : }
413 : }
414 :
415 15580 : ht->alloc.free(ht->buckets);
416 15580 : ht->alloc.free(ht);
417 15580 : }
|