Coverage Report

Created: 2022-07-08 09:39

/home/mdboom/Work/builds/cpython/Objects/obmalloc.c
Line
Count
Source (jump to first uncovered line)
1
#include "Python.h"
2
#include "pycore_pymem.h"         // _PyTraceMalloc_Config
3
#include "pycore_code.h"         // stats
4
5
#include <stdbool.h>
6
#include <stdlib.h>               // malloc()
7
8
9
/* Defined in tracemalloc.c */
10
extern void _PyMem_DumpTraceback(int fd, const void *ptr);
11
12
13
/* Python's malloc wrappers (see pymem.h) */
14
15
#undef  uint
16
#define uint    unsigned int    /* assuming >= 16 bits */
17
18
/* Forward declaration */
19
static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
20
static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
21
static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
22
static void _PyMem_DebugRawFree(void *ctx, void *ptr);
23
24
static void* _PyMem_DebugMalloc(void *ctx, size_t size);
25
static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
26
static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
27
static void _PyMem_DebugFree(void *ctx, void *p);
28
29
static void _PyObject_DebugDumpAddress(const void *p);
30
static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
31
32
static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain);
33
34
#if defined(__has_feature)  /* Clang */
35
#  if __has_feature(address_sanitizer) /* is ASAN enabled? */
36
#    define _Py_NO_SANITIZE_ADDRESS \
37
        __attribute__((no_sanitize("address")))
38
#  endif
39
#  if __has_feature(thread_sanitizer)  /* is TSAN enabled? */
40
#    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
41
#  endif
42
#  if __has_feature(memory_sanitizer)  /* is MSAN enabled? */
43
#    define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
44
#  endif
45
#elif defined(__GNUC__)
46
#  if defined(__SANITIZE_ADDRESS__)    /* GCC 4.8+, is ASAN enabled? */
47
#    define _Py_NO_SANITIZE_ADDRESS \
48
        __attribute__((no_sanitize_address))
49
#  endif
50
   // TSAN is supported since GCC 5.1, but __SANITIZE_THREAD__ macro
51
   // is provided only since GCC 7.
52
#  if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
53
#    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
54
#  endif
55
#endif
56
57
#ifndef _Py_NO_SANITIZE_ADDRESS
58
#  define _Py_NO_SANITIZE_ADDRESS
59
#endif
60
#ifndef _Py_NO_SANITIZE_THREAD
61
#  define _Py_NO_SANITIZE_THREAD
62
#endif
63
#ifndef _Py_NO_SANITIZE_MEMORY
64
#  define _Py_NO_SANITIZE_MEMORY
65
#endif
66
67
#ifdef WITH_PYMALLOC
68
69
#ifdef MS_WINDOWS
70
#  include <windows.h>
71
#elif defined(HAVE_MMAP)
72
#  include <sys/mman.h>
73
#  ifdef MAP_ANONYMOUS
74
#    define ARENAS_USE_MMAP
75
#  endif
76
#endif
77
78
/* Forward declaration */
79
static void* _PyObject_Malloc(void *ctx, size_t size);
80
static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
81
static void _PyObject_Free(void *ctx, void *p);
82
static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
83
#endif
84
85
86
/* bpo-35053: Declare tracemalloc configuration here rather than
87
   Modules/_tracemalloc.c because _tracemalloc can be compiled as dynamic
88
   library, whereas _Py_NewReference() requires it. */
89
struct _PyTraceMalloc_Config _Py_tracemalloc_config = _PyTraceMalloc_Config_INIT;
90
91
92
static void *
93
_PyMem_RawMalloc(void *Py_UNUSED(ctx), size_t size)
94
{
95
    /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
96
       for malloc(0), which would be treated as an error. Some platforms would
97
       return a pointer with no memory behind it, which would break pymalloc.
98
       To solve these problems, allocate an extra byte. */
99
    if (size == 0)
  Branch (99:9): [True: 787k, False: 8.66M]
100
        size = 1;
101
    return malloc(size);
102
}
103
104
static void *
105
_PyMem_RawCalloc(void *Py_UNUSED(ctx), size_t nelem, size_t elsize)
106
{
107
    /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
108
       for calloc(0, 0), which would be treated as an error. Some platforms
109
       would return a pointer with no memory behind it, which would break
110
       pymalloc.  To solve these problems, allocate an extra byte. */
111
    if (nelem == 0 || 
elsize == 0957k
) {
  Branch (111:9): [True: 109, False: 957k]
  Branch (111:23): [True: 12, False: 957k]
112
        nelem = 1;
113
        elsize = 1;
114
    }
115
    return calloc(nelem, elsize);
116
}
117
118
static void *
119
_PyMem_RawRealloc(void *Py_UNUSED(ctx), void *ptr, size_t size)
120
{
121
    if (size == 0)
  Branch (121:9): [True: 42.2k, False: 1.38M]
122
        size = 1;
123
    return realloc(ptr, size);
124
}
125
126
static void
127
_PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)
128
{
129
    free(ptr);
130
}
131
132
133
#ifdef MS_WINDOWS
134
static void *
135
_PyObject_ArenaVirtualAlloc(void *Py_UNUSED(ctx), size_t size)
136
{
137
    return VirtualAlloc(NULL, size,
138
                        MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
139
}
140
141
static void
142
_PyObject_ArenaVirtualFree(void *Py_UNUSED(ctx), void *ptr,
143
    size_t Py_UNUSED(size))
144
{
145
    VirtualFree(ptr, 0, MEM_RELEASE);
146
}
147
148
#elif defined(ARENAS_USE_MMAP)
149
static void *
150
_PyObject_ArenaMmap(void *Py_UNUSED(ctx), size_t size)
151
{
152
    void *ptr;
153
    ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
154
               MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
155
    if (ptr == MAP_FAILED)
  Branch (155:9): [True: 0, False: 14.8k]
156
        return NULL;
157
    assert(ptr != NULL);
158
    return ptr;
159
}
160
161
static void
162
_PyObject_ArenaMunmap(void *Py_UNUSED(ctx), void *ptr, size_t size)
163
{
164
    munmap(ptr, size);
165
}
166
167
#else
168
static void *
169
_PyObject_ArenaMalloc(void *Py_UNUSED(ctx), size_t size)
170
{
171
    return malloc(size);
172
}
173
174
static void
175
_PyObject_ArenaFree(void *Py_UNUSED(ctx), void *ptr, size_t Py_UNUSED(size))
176
{
177
    free(ptr);
178
}
179
#endif
180
181
#define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
182
#ifdef WITH_PYMALLOC
183
#  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
184
#endif
185
186
#define PYRAW_ALLOC MALLOC_ALLOC
187
#ifdef WITH_PYMALLOC
188
#  define PYOBJ_ALLOC PYMALLOC_ALLOC
189
#else
190
#  define PYOBJ_ALLOC MALLOC_ALLOC
191
#endif
192
#define PYMEM_ALLOC PYOBJ_ALLOC
193
194
typedef struct {
195
    /* We tag each block with an API ID in order to tag API violations */
196
    char api_id;
197
    PyMemAllocatorEx alloc;
198
} debug_alloc_api_t;
199
static struct {
200
    debug_alloc_api_t raw;
201
    debug_alloc_api_t mem;
202
    debug_alloc_api_t obj;
203
} _PyMem_Debug = {
204
    {'r', PYRAW_ALLOC},
205
    {'m', PYMEM_ALLOC},
206
    {'o', PYOBJ_ALLOC}
207
    };
208
209
#define PYDBGRAW_ALLOC \
210
    {&_PyMem_Debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
211
#define PYDBGMEM_ALLOC \
212
    {&_PyMem_Debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
213
#define PYDBGOBJ_ALLOC \
214
    {&_PyMem_Debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
215
216
#ifdef Py_DEBUG
217
static PyMemAllocatorEx _PyMem_Raw = PYDBGRAW_ALLOC;
218
static PyMemAllocatorEx _PyMem = PYDBGMEM_ALLOC;
219
static PyMemAllocatorEx _PyObject = PYDBGOBJ_ALLOC;
220
#else
221
static PyMemAllocatorEx _PyMem_Raw = PYRAW_ALLOC;
222
static PyMemAllocatorEx _PyMem = PYMEM_ALLOC;
223
static PyMemAllocatorEx _PyObject = PYOBJ_ALLOC;
224
#endif
225
226
227
static int
228
pymem_set_default_allocator(PyMemAllocatorDomain domain, int debug,
229
                            PyMemAllocatorEx *old_alloc)
230
{
231
    if (old_alloc != NULL) {
  Branch (231:9): [True: 1.09k, False: 12]
232
        PyMem_GetAllocator(domain, old_alloc);
233
    }
234
235
236
    PyMemAllocatorEx new_alloc;
237
    switch(domain)
238
    {
239
    case PYMEM_DOMAIN_RAW:
  Branch (239:5): [True: 1.09k, False: 8]
240
        new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
241
        break;
242
    case PYMEM_DOMAIN_MEM:
  Branch (242:5): [True: 4, False: 1.09k]
243
        new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
244
        break;
245
    case PYMEM_DOMAIN_OBJ:
  Branch (245:5): [True: 4, False: 1.09k]
246
        new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
247
        break;
248
    default:
  Branch (248:5): [True: 0, False: 1.10k]
249
        /* unknown domain */
250
        return -1;
251
    }
252
    PyMem_SetAllocator(domain, &new_alloc);
253
    if (debug) {
  Branch (253:9): [True: 12, False: 1.09k]
254
        _PyMem_SetupDebugHooksDomain(domain);
255
    }
256
    return 0;
257
}
258
259
260
int
261
_PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,
262
                           PyMemAllocatorEx *old_alloc)
263
{
264
#ifdef Py_DEBUG
265
    const int debug = 1;
266
#else
267
    const int debug = 0;
268
#endif
269
    return pymem_set_default_allocator(domain, debug, old_alloc);
270
}
271
272
273
int
274
_PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
275
{
276
    if (name == NULL || *name == '\0') {
  Branch (276:9): [True: 0, False: 4]
  Branch (276:25): [True: 0, False: 4]
277
        /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
278
           nameions): use default memory allocators */
279
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
280
    }
281
    else if (strcmp(name, "default") == 0) {
  Branch (281:14): [True: 0, False: 4]
282
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
283
    }
284
    else if (strcmp(name, "debug") == 0) {
  Branch (284:14): [True: 0, False: 4]
285
        *allocator = PYMEM_ALLOCATOR_DEBUG;
286
    }
287
#ifdef WITH_PYMALLOC
288
    else if (strcmp(name, "pymalloc") == 0) {
  Branch (288:14): [True: 0, False: 4]
289
        *allocator = PYMEM_ALLOCATOR_PYMALLOC;
290
    }
291
    else if (strcmp(name, "pymalloc_debug") == 0) {
  Branch (291:14): [True: 0, False: 4]
292
        *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
293
    }
294
#endif
295
    else if (strcmp(name, "malloc") == 0) {
  Branch (295:14): [True: 4, False: 0]
296
        *allocator = PYMEM_ALLOCATOR_MALLOC;
297
    }
298
    else if (strcmp(name, "malloc_debug") == 0) {
  Branch (298:14): [True: 0, False: 0]
299
        *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
300
    }
301
    else {
302
        /* unknown allocator */
303
        return -1;
304
    }
305
    return 0;
306
}
307
308
309
int
310
_PyMem_SetupAllocators(PyMemAllocatorName allocator)
311
{
312
    switch (allocator) {
313
    case PYMEM_ALLOCATOR_NOT_SET:
  Branch (313:5): [True: 0, False: 8]
314
        /* do nothing */
315
        break;
316
317
    case PYMEM_ALLOCATOR_DEFAULT:
  Branch (317:5): [True: 0, False: 8]
318
        (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL);
319
        (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_MEM, NULL);
320
        (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_OBJ, NULL);
321
        break;
322
323
    case PYMEM_ALLOCATOR_DEBUG:
  Branch (323:5): [True: 4, False: 4]
324
        (void)pymem_set_default_allocator(PYMEM_DOMAIN_RAW, 1, NULL);
325
        (void)pymem_set_default_allocator(PYMEM_DOMAIN_MEM, 1, NULL);
326
        (void)pymem_set_default_allocator(PYMEM_DOMAIN_OBJ, 1, NULL);
327
        break;
328
329
#ifdef WITH_PYMALLOC
330
    case PYMEM_ALLOCATOR_PYMALLOC:
  Branch (330:5): [True: 0, False: 8]
331
    case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
  Branch (331:5): [True: 0, False: 8]
332
    {
333
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
334
        PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
335
336
        PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
337
        PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &pymalloc);
338
        PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &pymalloc);
339
340
        if (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG) {
  Branch (340:13): [True: 0, False: 0]
341
            PyMem_SetupDebugHooks();
342
        }
343
        break;
344
    }
345
#endif
346
347
    case PYMEM_ALLOCATOR_MALLOC:
  Branch (347:5): [True: 4, False: 4]
348
    case PYMEM_ALLOCATOR_MALLOC_DEBUG:
  Branch (348:5): [True: 0, False: 8]
349
    {
350
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
351
        PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
352
        PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &malloc_alloc);
353
        PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &malloc_alloc);
354
355
        if (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG) {
  Branch (355:13): [True: 0, False: 4]
356
            PyMem_SetupDebugHooks();
357
        }
358
        break;
359
    }
360
361
    default:
  Branch (361:5): [True: 0, False: 8]
362
        /* unknown allocator */
363
        return -1;
364
    }
365
    return 0;
366
}
367
368
369
static int
370
pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
371
{
372
    return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
373
}
374
375
376
const char*
377
_PyMem_GetCurrentAllocatorName(void)
378
{
379
    PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
380
#ifdef WITH_PYMALLOC
381
    PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
382
#endif
383
384
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
  Branch (384:9): [True: 1, False: 0]
385
        pymemallocator_eq(&_PyMem, &malloc_alloc) &&
  Branch (385:9): [True: 0, False: 1]
386
        
pymemallocator_eq(&_PyObject, &malloc_alloc)0
)
  Branch (386:9): [True: 0, False: 0]
387
    {
388
        return "malloc";
389
    }
390
#ifdef WITH_PYMALLOC
391
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
  Branch (391:9): [True: 1, False: 0]
392
        pymemallocator_eq(&_PyMem, &pymalloc) &&
  Branch (392:9): [True: 1, False: 0]
393
        pymemallocator_eq(&_PyObject, &pymalloc))
  Branch (393:9): [True: 1, False: 0]
394
    {
395
        return "pymalloc";
396
    }
397
#endif
398
399
    PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
400
    PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
401
    PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
402
403
    if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
  Branch (403:9): [True: 0, False: 0]
404
        pymemallocator_eq(&_PyMem, &dbg_mem) &&
  Branch (404:9): [True: 0, False: 0]
405
        pymemallocator_eq(&_PyObject, &dbg_obj))
  Branch (405:9): [True: 0, False: 0]
406
    {
407
        /* Debug hooks installed */
408
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
  Branch (408:13): [True: 0, False: 0]
409
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
  Branch (409:13): [True: 0, False: 0]
410
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
  Branch (410:13): [True: 0, False: 0]
411
        {
412
            return "malloc_debug";
413
        }
414
#ifdef WITH_PYMALLOC
415
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
  Branch (415:13): [True: 0, False: 0]
416
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
  Branch (416:13): [True: 0, False: 0]
417
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
  Branch (417:13): [True: 0, False: 0]
418
        {
419
            return "pymalloc_debug";
420
        }
421
#endif
422
    }
423
    return NULL;
424
}
425
426
427
#undef MALLOC_ALLOC
428
#undef PYMALLOC_ALLOC
429
#undef PYRAW_ALLOC
430
#undef PYMEM_ALLOC
431
#undef PYOBJ_ALLOC
432
#undef PYDBGRAW_ALLOC
433
#undef PYDBGMEM_ALLOC
434
#undef PYDBGOBJ_ALLOC
435
436
437
static PyObjectArenaAllocator _PyObject_Arena = {NULL,
438
#ifdef MS_WINDOWS
439
    _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
440
#elif defined(ARENAS_USE_MMAP)
441
    _PyObject_ArenaMmap, _PyObject_ArenaMunmap
442
#else
443
    _PyObject_ArenaMalloc, _PyObject_ArenaFree
444
#endif
445
    };
446
447
#ifdef WITH_PYMALLOC
448
static int
449
_PyMem_DebugEnabled(void)
450
{
451
    return (_PyObject.malloc == _PyMem_DebugMalloc);
452
}
453
454
static int
455
_PyMem_PymallocEnabled(void)
456
{
457
    if (_PyMem_DebugEnabled()) {
  Branch (457:9): [True: 0, False: 3]
458
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
459
    }
460
    else {
461
        return (_PyObject.malloc == _PyObject_Malloc);
462
    }
463
}
464
#endif
465
466
467
static void
468
_PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)
469
{
470
    PyMemAllocatorEx alloc;
471
472
    if (domain == PYMEM_DOMAIN_RAW) {
  Branch (472:9): [True: 4, False: 8]
473
        if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
  Branch (473:13): [True: 0, False: 4]
474
            return;
475
        }
476
477
        PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
478
        alloc.ctx = &_PyMem_Debug.raw;
479
        alloc.malloc = _PyMem_DebugRawMalloc;
480
        alloc.calloc = _PyMem_DebugRawCalloc;
481
        alloc.realloc = _PyMem_DebugRawRealloc;
482
        alloc.free = _PyMem_DebugRawFree;
483
        PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
484
    }
485
    else if (domain == PYMEM_DOMAIN_MEM) {
  Branch (485:14): [True: 4, False: 4]
486
        if (_PyMem.malloc == _PyMem_DebugMalloc) {
  Branch (486:13): [True: 0, False: 4]
487
            return;
488
        }
489
490
        PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
491
        alloc.ctx = &_PyMem_Debug.mem;
492
        alloc.malloc = _PyMem_DebugMalloc;
493
        alloc.calloc = _PyMem_DebugCalloc;
494
        alloc.realloc = _PyMem_DebugRealloc;
495
        alloc.free = _PyMem_DebugFree;
496
        PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
497
    }
498
    else if (domain == PYMEM_DOMAIN_OBJ)  {
  Branch (498:14): [True: 4, False: 0]
499
        if (_PyObject.malloc == _PyMem_DebugMalloc) {
  Branch (499:13): [True: 0, False: 4]
500
            return;
501
        }
502
503
        PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
504
        alloc.ctx = &_PyMem_Debug.obj;
505
        alloc.malloc = _PyMem_DebugMalloc;
506
        alloc.calloc = _PyMem_DebugCalloc;
507
        alloc.realloc = _PyMem_DebugRealloc;
508
        alloc.free = _PyMem_DebugFree;
509
        PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
510
    }
511
}
512
513
514
void
515
PyMem_SetupDebugHooks(void)
516
{
517
    _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_RAW);
518
    _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_MEM);
519
    _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_OBJ);
520
}
521
522
void
523
PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
524
{
525
    switch(domain)
526
    {
527
    case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
  Branch (527:5): [True: 1.12k, False: 62]
528
    case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
  Branch (528:5): [True: 31, False: 1.15k]
529
    case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
  Branch (529:5): [True: 31, False: 1.15k]
530
    default:
  Branch (530:5): [True: 0, False: 1.18k]
531
        /* unknown domain: set all attributes to NULL */
532
        allocator->ctx = NULL;
533
        allocator->malloc = NULL;
534
        allocator->calloc = NULL;
535
        allocator->realloc = NULL;
536
        allocator->free = NULL;
537
    }
538
}
539
540
void
541
PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
542
{
543
    switch(domain)
  Branch (543:12): [True: 0, False: 2.38k]
544
    {
545
    case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
  Branch (545:5): [True: 2.24k, False: 132]
546
    case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
  Branch (546:5): [True: 66, False: 2.31k]
547
    case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
  Branch (547:5): [True: 66, False: 2.31k]
548
    /* ignore unknown domain */
549
    }
550
}
551
552
void
553
PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
554
{
555
    *allocator = _PyObject_Arena;
556
}
557
558
void *
559
_PyObject_VirtualAlloc(size_t size)
560
{
561
    return _PyObject_Arena.alloc(_PyObject_Arena.ctx, size);
562
}
563
564
void
565
_PyObject_VirtualFree(void *obj, size_t size)
566
{
567
    _PyObject_Arena.free(_PyObject_Arena.ctx, obj, size);
568
}
569
570
void
571
PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
572
{
573
    _PyObject_Arena = *allocator;
574
}
575
576
void *
577
PyMem_RawMalloc(size_t size)
578
{
579
    /*
580
     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
581
     * Most python internals blindly use a signed Py_ssize_t to track
582
     * things without checking for overflows or negatives.
583
     * As size_t is unsigned, checking for size < 0 is not required.
584
     */
585
    if (size > (size_t)PY_SSIZE_T_MAX)
  Branch (585:9): [True: 0, False: 7.78M]
586
        return NULL;
587
    return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
588
}
589
590
void *
591
PyMem_RawCalloc(size_t nelem, size_t elsize)
592
{
593
    /* see PyMem_RawMalloc() */
594
    if (elsize != 0 && 
nelem > (size_t)658k
PY_SSIZE_T_MAX658k
/ elsize)
  Branch (594:9): [True: 658k, False: 15]
  Branch (594:24): [True: 0, False: 658k]
595
        return NULL;
596
    return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
597
}
598
599
void*
600
PyMem_RawRealloc(void *ptr, size_t new_size)
601
{
602
    /* see PyMem_RawMalloc() */
603
    if (new_size > (size_t)PY_SSIZE_T_MAX)
  Branch (603:9): [True: 0, False: 1.37M]
604
        return NULL;
605
    return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
606
}
607
608
void PyMem_RawFree(void *ptr)
609
{
610
    _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
611
}
612
613
614
void *
615
PyMem_Malloc(size_t size)
616
{
617
    /* see PyMem_RawMalloc() */
618
    if (size > (size_t)PY_SSIZE_T_MAX)
  Branch (618:9): [True: 0, False: 43.0M]
619
        return NULL;
620
    OBJECT_STAT_INC_COND(allocations512, size < 512);
621
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
622
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
623
    OBJECT_STAT_INC(allocations);
624
    return _PyMem.malloc(_PyMem.ctx, size);
625
}
626
627
void *
628
PyMem_Calloc(size_t nelem, size_t elsize)
629
{
630
    /* see PyMem_RawMalloc() */
631
    if (elsize != 0 && 
nelem > (size_t)18.8M
PY_SSIZE_T_MAX18.8M
/ elsize)
  Branch (631:9): [True: 18.8M, False: 13]
  Branch (631:24): [True: 0, False: 18.8M]
632
        return NULL;
633
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
634
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
635
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
636
    OBJECT_STAT_INC(allocations);
637
    return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
638
}
639
640
void *
641
PyMem_Realloc(void *ptr, size_t new_size)
642
{
643
    /* see PyMem_RawMalloc() */
644
    if (new_size > (size_t)PY_SSIZE_T_MAX)
  Branch (644:9): [True: 0, False: 13.2M]
645
        return NULL;
646
    return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
647
}
648
649
void
650
PyMem_Free(void *ptr)
651
{
652
    OBJECT_STAT_INC(frees);
653
    _PyMem.free(_PyMem.ctx, ptr);
654
}
655
656
657
wchar_t*
658
_PyMem_RawWcsdup(const wchar_t *str)
659
{
660
    assert(str != NULL);
661
662
    size_t len = wcslen(str);
663
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
  Branch (663:9): [True: 0, False: 17.0k]
664
        return NULL;
665
    }
666
667
    size_t size = (len + 1) * sizeof(wchar_t);
668
    wchar_t *str2 = PyMem_RawMalloc(size);
669
    if (str2 == NULL) {
  Branch (669:9): [True: 0, False: 17.0k]
670
        return NULL;
671
    }
672
673
    memcpy(str2, str, size);
674
    return str2;
675
}
676
677
char *
678
_PyMem_RawStrdup(const char *str)
679
{
680
    assert(str != NULL);
681
    size_t size = strlen(str) + 1;
682
    char *copy = PyMem_RawMalloc(size);
683
    if (copy == NULL) {
  Branch (683:9): [True: 0, False: 691]
684
        return NULL;
685
    }
686
    memcpy(copy, str, size);
687
    return copy;
688
}
689
690
char *
691
_PyMem_Strdup(const char *str)
692
{
693
    assert(str != NULL);
694
    size_t size = strlen(str) + 1;
695
    char *copy = PyMem_Malloc(size);
696
    if (copy == NULL) {
  Branch (696:9): [True: 0, False: 157k]
697
        return NULL;
698
    }
699
    memcpy(copy, str, size);
700
    return copy;
701
}
702
703
void *
704
PyObject_Malloc(size_t size)
705
{
706
    /* see PyMem_RawMalloc() */
707
    if (size > (size_t)PY_SSIZE_T_MAX)
  Branch (707:9): [True: 0, False: 409M]
708
        return NULL;
709
    OBJECT_STAT_INC_COND(allocations512, size < 512);
710
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
711
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
712
    OBJECT_STAT_INC(allocations);
713
    return _PyObject.malloc(_PyObject.ctx, size);
714
}
715
716
void *
717
PyObject_Calloc(size_t nelem, size_t elsize)
718
{
719
    /* see PyMem_RawMalloc() */
720
    if (elsize != 0 && 
nelem > (size_t)1.23M
PY_SSIZE_T_MAX1.23M
/ elsize)
  Branch (720:9): [True: 1.23M, False: 1]
  Branch (720:24): [True: 0, False: 1.23M]
721
        return NULL;
722
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
723
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
724
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
725
    OBJECT_STAT_INC(allocations);
726
    return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
727
}
728
729
void *
730
PyObject_Realloc(void *ptr, size_t new_size)
731
{
732
    /* see PyMem_RawMalloc() */
733
    if (new_size > (size_t)PY_SSIZE_T_MAX)
  Branch (733:9): [True: 0, False: 15.3M]
734
        return NULL;
735
    return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
736
}
737
738
void
739
PyObject_Free(void *ptr)
740
{
741
    OBJECT_STAT_INC(frees);
742
    _PyObject.free(_PyObject.ctx, ptr);
743
}
744
745
746
/* If we're using GCC, use __builtin_expect() to reduce overhead of
747
   the valgrind checks */
748
#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
749
#  define UNLIKELY(value) __builtin_expect((value), 0)
750
#  define LIKELY(value) __builtin_expect((value), 1)
751
#else
752
#  define UNLIKELY(value) (value)
753
#  define LIKELY(value) (value)
754
#endif
755
756
#ifdef WITH_PYMALLOC
757
758
#ifdef WITH_VALGRIND
759
#include <valgrind/valgrind.h>
760
761
/* -1 indicates that we haven't checked that we're running on valgrind yet. */
762
static int running_on_valgrind = -1;
763
#endif
764
765
766
/* An object allocator for Python.
767
768
   Here is an introduction to the layers of the Python memory architecture,
769
   showing where the object allocator is actually used (layer +2), It is
770
   called for every object allocation and deallocation (PyObject_New/Del),
771
   unless the object-specific allocators implement a proprietary allocation
772
   scheme (ex.: ints use a simple free list). This is also the place where
773
   the cyclic garbage collector operates selectively on container objects.
774
775
776
    Object-specific allocators
777
    _____   ______   ______       ________
778
   [ int ] [ dict ] [ list ] ... [ string ]       Python core         |
779
+3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
780
    _______________________________       |                           |
781
   [   Python's object allocator   ]      |                           |
782
+2 | ####### Object memory ####### | <------ Internal buffers ------> |
783
    ______________________________________________________________    |
784
   [          Python's raw memory allocator (PyMem_ API)          ]   |
785
+1 | <----- Python memory (under PyMem manager's control) ------> |   |
786
    __________________________________________________________________
787
   [    Underlying general-purpose allocator (ex: C library malloc)   ]
788
 0 | <------ Virtual memory allocated for the python process -------> |
789
790
   =========================================================================
791
    _______________________________________________________________________
792
   [                OS-specific Virtual Memory Manager (VMM)               ]
793
-1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
794
    __________________________________   __________________________________
795
   [                                  ] [                                  ]
796
-2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
797
798
*/
799
/*==========================================================================*/
800
801
/* A fast, special-purpose memory allocator for small blocks, to be used
802
   on top of a general-purpose malloc -- heavily based on previous art. */
803
804
/* Vladimir Marangozov -- August 2000 */
805
806
/*
807
 * "Memory management is where the rubber meets the road -- if we do the wrong
808
 * thing at any level, the results will not be good. And if we don't make the
809
 * levels work well together, we are in serious trouble." (1)
810
 *
811
 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
812
 *    "Dynamic Storage Allocation: A Survey and Critical Review",
813
 *    in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
814
 */
815
816
/* #undef WITH_MEMORY_LIMITS */         /* disable mem limit checks  */
817
818
/*==========================================================================*/
819
820
/*
821
 * Allocation strategy abstract:
822
 *
823
 * For small requests, the allocator sub-allocates <Big> blocks of memory.
824
 * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
825
 * system's allocator.
826
 *
827
 * Small requests are grouped in size classes spaced 8 bytes apart, due
828
 * to the required valid alignment of the returned address. Requests of
829
 * a particular size are serviced from memory pools of 4K (one VMM page).
830
 * Pools are fragmented on demand and contain free lists of blocks of one
831
 * particular size class. In other words, there is a fixed-size allocator
832
 * for each size class. Free pools are shared by the different allocators
833
 * thus minimizing the space reserved for a particular size class.
834
 *
835
 * This allocation strategy is a variant of what is known as "simple
836
 * segregated storage based on array of free lists". The main drawback of
837
 * simple segregated storage is that we might end up with lot of reserved
838
 * memory for the different free lists, which degenerate in time. To avoid
839
 * this, we partition each free list in pools and we share dynamically the
840
 * reserved space between all free lists. This technique is quite efficient
841
 * for memory intensive programs which allocate mainly small-sized blocks.
842
 *
843
 * For small requests we have the following table:
844
 *
845
 * Request in bytes     Size of allocated block      Size class idx
846
 * ----------------------------------------------------------------
847
 *        1-8                     8                       0
848
 *        9-16                   16                       1
849
 *       17-24                   24                       2
850
 *       25-32                   32                       3
851
 *       33-40                   40                       4
852
 *       41-48                   48                       5
853
 *       49-56                   56                       6
854
 *       57-64                   64                       7
855
 *       65-72                   72                       8
856
 *        ...                   ...                     ...
857
 *      497-504                 504                      62
858
 *      505-512                 512                      63
859
 *
860
 *      0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
861
 *      allocator.
862
 */
863
864
/*==========================================================================*/
865
866
/*
867
 * -- Main tunable settings section --
868
 */
869
870
/*
871
 * Alignment of addresses returned to the user. 8-bytes alignment works
872
 * on most current architectures (with 32-bit or 64-bit address buses).
873
 * The alignment value is also used for grouping small requests in size
874
 * classes spaced ALIGNMENT bytes apart.
875
 *
876
 * You shouldn't change this unless you know what you are doing.
877
 */
878
879
#if SIZEOF_VOID_P > 4
880
#define ALIGNMENT              16               /* must be 2^N */
881
#define ALIGNMENT_SHIFT         4
882
#else
883
#define ALIGNMENT               8               /* must be 2^N */
884
#define ALIGNMENT_SHIFT         3
885
#endif
886
887
/* Return the number of bytes in size class I, as a uint. */
888
#define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
889
890
/*
891
 * Max size threshold below which malloc requests are considered to be
892
 * small enough in order to use preallocated memory pools. You can tune
893
 * this value according to your application behaviour and memory needs.
894
 *
895
 * Note: a size threshold of 512 guarantees that newly created dictionaries
896
 * will be allocated from preallocated memory pools on 64-bit.
897
 *
898
 * The following invariants must hold:
899
 *      1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
900
 *      2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
901
 *
902
 * Although not required, for better performance and space efficiency,
903
 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
904
 */
905
#define SMALL_REQUEST_THRESHOLD 512
906
#define NB_SMALL_SIZE_CLASSES   (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
907
908
/*
909
 * The system's VMM page size can be obtained on most unices with a
910
 * getpagesize() call or deduced from various header files. To make
911
 * things simpler, we assume that it is 4K, which is OK for most systems.
912
 * It is probably better if this is the native page size, but it doesn't
913
 * have to be.  In theory, if SYSTEM_PAGE_SIZE is larger than the native page
914
 * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
915
 * violation fault.  4K is apparently OK for all the platforms that python
916
 * currently targets.
917
 */
918
#define SYSTEM_PAGE_SIZE        (4 * 1024)
919
920
/*
921
 * Maximum amount of memory managed by the allocator for small requests.
922
 */
923
#ifdef WITH_MEMORY_LIMITS
924
#ifndef SMALL_MEMORY_LIMIT
925
#define SMALL_MEMORY_LIMIT      (64 * 1024 * 1024)      /* 64 MB -- more? */
926
#endif
927
#endif
928
929
#if !defined(WITH_PYMALLOC_RADIX_TREE)
930
/* Use radix-tree to track arena memory regions, for address_in_range().
931
 * Enable by default since it allows larger pool sizes.  Can be disabled
932
 * using -DWITH_PYMALLOC_RADIX_TREE=0 */
933
#define WITH_PYMALLOC_RADIX_TREE 1
934
#endif
935
936
#if SIZEOF_VOID_P > 4
937
/* on 64-bit platforms use larger pools and arenas if we can */
938
#define USE_LARGE_ARENAS
939
#if WITH_PYMALLOC_RADIX_TREE
940
/* large pools only supported if radix-tree is enabled */
941
#define USE_LARGE_POOLS
942
#endif
943
#endif
944
945
/*
946
 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
947
 * on a page boundary. This is a reserved virtual address space for the
948
 * current process (obtained through a malloc()/mmap() call). In no way this
949
 * means that the memory arenas will be used entirely. A malloc(<Big>) is
950
 * usually an address range reservation for <Big> bytes, unless all pages within
951
 * this space are referenced subsequently. So malloc'ing big blocks and not
952
 * using them does not mean "wasting memory". It's an addressable range
953
 * wastage...
954
 *
955
 * Arenas are allocated with mmap() on systems supporting anonymous memory
956
 * mappings to reduce heap fragmentation.
957
 */
958
#ifdef USE_LARGE_ARENAS
959
#define ARENA_BITS              20                    /* 1 MiB */
960
#else
961
#define ARENA_BITS              18                    /* 256 KiB */
962
#endif
963
#define ARENA_SIZE              (1 << ARENA_BITS)
964
#define ARENA_SIZE_MASK         (ARENA_SIZE - 1)
965
966
#ifdef WITH_MEMORY_LIMITS
967
#define MAX_ARENAS              (SMALL_MEMORY_LIMIT / ARENA_SIZE)
968
#endif
969
970
/*
971
 * Size of the pools used for small blocks.  Must be a power of 2.
972
 */
973
#ifdef USE_LARGE_POOLS
974
#define POOL_BITS               14                  /* 16 KiB */
975
#else
976
#define POOL_BITS               12                  /* 4 KiB */
977
#endif
978
#define POOL_SIZE               (1 << POOL_BITS)
979
#define POOL_SIZE_MASK          (POOL_SIZE - 1)
980
981
#if !WITH_PYMALLOC_RADIX_TREE
982
#if POOL_SIZE != SYSTEM_PAGE_SIZE
983
#   error "pool size must be equal to system page size"
984
#endif
985
#endif
986
987
#define MAX_POOLS_IN_ARENA  (ARENA_SIZE / POOL_SIZE)
988
#if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE
989
#   error "arena size not an exact multiple of pool size"
990
#endif
991
992
/*
993
 * -- End of tunable settings section --
994
 */
995
996
/*==========================================================================*/
997
998
/* When you say memory, my mind reasons in terms of (pointers to) blocks */
999
typedef uint8_t block;
1000
1001
/* Pool for small blocks. */
1002
struct pool_header {
1003
    union { block *_padding;
1004
            uint count; } ref;          /* number of allocated blocks    */
1005
    block *freeblock;                   /* pool's free list head         */
1006
    struct pool_header *nextpool;       /* next pool of this size class  */
1007
    struct pool_header *prevpool;       /* previous pool       ""        */
1008
    uint arenaindex;                    /* index into arenas of base adr */
1009
    uint szidx;                         /* block size class index        */
1010
    uint nextoffset;                    /* bytes to virgin block         */
1011
    uint maxnextoffset;                 /* largest valid nextoffset      */
1012
};
1013
1014
typedef struct pool_header *poolp;
1015
1016
/* Record keeping for arenas. */
1017
struct arena_object {
1018
    /* The address of the arena, as returned by malloc.  Note that 0
1019
     * will never be returned by a successful malloc, and is used
1020
     * here to mark an arena_object that doesn't correspond to an
1021
     * allocated arena.
1022
     */
1023
    uintptr_t address;
1024
1025
    /* Pool-aligned pointer to the next pool to be carved off. */
1026
    block* pool_address;
1027
1028
    /* The number of available pools in the arena:  free pools + never-
1029
     * allocated pools.
1030
     */
1031
    uint nfreepools;
1032
1033
    /* The total number of pools in the arena, whether or not available. */
1034
    uint ntotalpools;
1035
1036
    /* Singly-linked list of available pools. */
1037
    struct pool_header* freepools;
1038
1039
    /* Whenever this arena_object is not associated with an allocated
1040
     * arena, the nextarena member is used to link all unassociated
1041
     * arena_objects in the singly-linked `unused_arena_objects` list.
1042
     * The prevarena member is unused in this case.
1043
     *
1044
     * When this arena_object is associated with an allocated arena
1045
     * with at least one available pool, both members are used in the
1046
     * doubly-linked `usable_arenas` list, which is maintained in
1047
     * increasing order of `nfreepools` values.
1048
     *
1049
     * Else this arena_object is associated with an allocated arena
1050
     * all of whose pools are in use.  `nextarena` and `prevarena`
1051
     * are both meaningless in this case.
1052
     */
1053
    struct arena_object* nextarena;
1054
    struct arena_object* prevarena;
1055
};
1056
1057
#define POOL_OVERHEAD   _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
1058
1059
#define DUMMY_SIZE_IDX          0xffff  /* size class of newly cached pools */
1060
1061
/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
1062
#define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
1063
1064
/* Return total number of blocks in pool of size index I, as a uint. */
1065
#define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
1066
1067
/*==========================================================================*/
1068
1069
/*
1070
 * Pool table -- headed, circular, doubly-linked lists of partially used pools.
1071
1072
This is involved.  For an index i, usedpools[i+i] is the header for a list of
1073
all partially used pools holding small blocks with "size class idx" i. So
1074
usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
1075
16, and so on:  index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
1076
1077
Pools are carved off an arena's highwater mark (an arena_object's pool_address
1078
member) as needed.  Once carved off, a pool is in one of three states forever
1079
after:
1080
1081
used == partially used, neither empty nor full
1082
    At least one block in the pool is currently allocated, and at least one
1083
    block in the pool is not currently allocated (note this implies a pool
1084
    has room for at least two blocks).
1085
    This is a pool's initial state, as a pool is created only when malloc
1086
    needs space.
1087
    The pool holds blocks of a fixed size, and is in the circular list headed
1088
    at usedpools[i] (see above).  It's linked to the other used pools of the
1089
    same size class via the pool_header's nextpool and prevpool members.
1090
    If all but one block is currently allocated, a malloc can cause a
1091
    transition to the full state.  If all but one block is not currently
1092
    allocated, a free can cause a transition to the empty state.
1093
1094
full == all the pool's blocks are currently allocated
1095
    On transition to full, a pool is unlinked from its usedpools[] list.
1096
    It's not linked to from anything then anymore, and its nextpool and
1097
    prevpool members are meaningless until it transitions back to used.
1098
    A free of a block in a full pool puts the pool back in the used state.
1099
    Then it's linked in at the front of the appropriate usedpools[] list, so
1100
    that the next allocation for its size class will reuse the freed block.
1101
1102
empty == all the pool's blocks are currently available for allocation
1103
    On transition to empty, a pool is unlinked from its usedpools[] list,
1104
    and linked to the front of its arena_object's singly-linked freepools list,
1105
    via its nextpool member.  The prevpool member has no meaning in this case.
1106
    Empty pools have no inherent size class:  the next time a malloc finds
1107
    an empty list in usedpools[], it takes the first pool off of freepools.
1108
    If the size class needed happens to be the same as the size class the pool
1109
    last had, some pool initialization can be skipped.
1110
1111
1112
Block Management
1113
1114
Blocks within pools are again carved out as needed.  pool->freeblock points to
1115
the start of a singly-linked list of free blocks within the pool.  When a
1116
block is freed, it's inserted at the front of its pool's freeblock list.  Note
1117
that the available blocks in a pool are *not* linked all together when a pool
1118
is initialized.  Instead only "the first two" (lowest addresses) blocks are
1119
set up, returning the first such block, and setting pool->freeblock to a
1120
one-block list holding the second such block.  This is consistent with that
1121
pymalloc strives at all levels (arena, pool, and block) never to touch a piece
1122
of memory until it's actually needed.
1123
1124
So long as a pool is in the used state, we're certain there *is* a block
1125
available for allocating, and pool->freeblock is not NULL.  If pool->freeblock
1126
points to the end of the free list before we've carved the entire pool into
1127
blocks, that means we simply haven't yet gotten to one of the higher-address
1128
blocks.  The offset from the pool_header to the start of "the next" virgin
1129
block is stored in the pool_header nextoffset member, and the largest value
1130
of nextoffset that makes sense is stored in the maxnextoffset member when a
1131
pool is initialized.  All the blocks in a pool have been passed out at least
1132
once when and only when nextoffset > maxnextoffset.
1133
1134
1135
Major obscurity:  While the usedpools vector is declared to have poolp
1136
entries, it doesn't really.  It really contains two pointers per (conceptual)
1137
poolp entry, the nextpool and prevpool members of a pool_header.  The
1138
excruciating initialization code below fools C so that
1139
1140
    usedpool[i+i]
1141
1142
"acts like" a genuine poolp, but only so long as you only reference its
1143
nextpool and prevpool members.  The "- 2*sizeof(block *)" gibberish is
1144
compensating for that a pool_header's nextpool and prevpool members
1145
immediately follow a pool_header's first two members:
1146
1147
    union { block *_padding;
1148
            uint count; } ref;
1149
    block *freeblock;
1150
1151
each of which consume sizeof(block *) bytes.  So what usedpools[i+i] really
1152
contains is a fudged-up pointer p such that *if* C believes it's a poolp
1153
pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
1154
circular list is empty).
1155
1156
It's unclear why the usedpools setup is so convoluted.  It could be to
1157
minimize the amount of cache required to hold this heavily-referenced table
1158
(which only *needs* the two interpool pointer members of a pool_header). OTOH,
1159
referencing code has to remember to "double the index" and doing so isn't
1160
free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
1161
on that C doesn't insert any padding anywhere in a pool_header at or before
1162
the prevpool member.
1163
**************************************************************************** */
1164
1165
#define PTA(x)  ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
1166
#define PT(x)   PTA(x), PTA(x)
1167
1168
static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
1169
    PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
1170
#if NB_SMALL_SIZE_CLASSES > 8
1171
    , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
1172
#if NB_SMALL_SIZE_CLASSES > 16
1173
    , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
1174
#if NB_SMALL_SIZE_CLASSES > 24
1175
    , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
1176
#if NB_SMALL_SIZE_CLASSES > 32
1177
    , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
1178
#if NB_SMALL_SIZE_CLASSES > 40
1179
    , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
1180
#if NB_SMALL_SIZE_CLASSES > 48
1181
    , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
1182
#if NB_SMALL_SIZE_CLASSES > 56
1183
    , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
1184
#if NB_SMALL_SIZE_CLASSES > 64
1185
#error "NB_SMALL_SIZE_CLASSES should be less than 64"
1186
#endif /* NB_SMALL_SIZE_CLASSES > 64 */
1187
#endif /* NB_SMALL_SIZE_CLASSES > 56 */
1188
#endif /* NB_SMALL_SIZE_CLASSES > 48 */
1189
#endif /* NB_SMALL_SIZE_CLASSES > 40 */
1190
#endif /* NB_SMALL_SIZE_CLASSES > 32 */
1191
#endif /* NB_SMALL_SIZE_CLASSES > 24 */
1192
#endif /* NB_SMALL_SIZE_CLASSES > 16 */
1193
#endif /* NB_SMALL_SIZE_CLASSES >  8 */
1194
};
1195
1196
/*==========================================================================
1197
Arena management.
1198
1199
`arenas` is a vector of arena_objects.  It contains maxarenas entries, some of
1200
which may not be currently used (== they're arena_objects that aren't
1201
currently associated with an allocated arena).  Note that arenas proper are
1202
separately malloc'ed.
1203
1204
Prior to Python 2.5, arenas were never free()'ed.  Starting with Python 2.5,
1205
we do try to free() arenas, and use some mild heuristic strategies to increase
1206
the likelihood that arenas eventually can be freed.
1207
1208
unused_arena_objects
1209
1210
    This is a singly-linked list of the arena_objects that are currently not
1211
    being used (no arena is associated with them).  Objects are taken off the
1212
    head of the list in new_arena(), and are pushed on the head of the list in
1213
    PyObject_Free() when the arena is empty.  Key invariant:  an arena_object
1214
    is on this list if and only if its .address member is 0.
1215
1216
usable_arenas
1217
1218
    This is a doubly-linked list of the arena_objects associated with arenas
1219
    that have pools available.  These pools are either waiting to be reused,
1220
    or have not been used before.  The list is sorted to have the most-
1221
    allocated arenas first (ascending order based on the nfreepools member).
1222
    This means that the next allocation will come from a heavily used arena,
1223
    which gives the nearly empty arenas a chance to be returned to the system.
1224
    In my unscientific tests this dramatically improved the number of arenas
1225
    that could be freed.
1226
1227
Note that an arena_object associated with an arena all of whose pools are
1228
currently in use isn't on either list.
1229
1230
Changed in Python 3.8:  keeping usable_arenas sorted by number of free pools
1231
used to be done by one-at-a-time linear search when an arena's number of
1232
free pools changed.  That could, overall, consume time quadratic in the
1233
number of arenas.  That didn't really matter when there were only a few
1234
hundred arenas (typical!), but could be a timing disaster when there were
1235
hundreds of thousands.  See bpo-37029.
1236
1237
Now we have a vector of "search fingers" to eliminate the need to search:
1238
nfp2lasta[nfp] returns the last ("rightmost") arena in usable_arenas
1239
with nfp free pools.  This is NULL if and only if there is no arena with
1240
nfp free pools in usable_arenas.
1241
*/
1242
1243
/* Array of objects used to track chunks of memory (arenas). */
1244
static struct arena_object* arenas = NULL;
1245
/* Number of slots currently allocated in the `arenas` vector. */
1246
static uint maxarenas = 0;
1247
1248
/* The head of the singly-linked, NULL-terminated list of available
1249
 * arena_objects.
1250
 */
1251
static struct arena_object* unused_arena_objects = NULL;
1252
1253
/* The head of the doubly-linked, NULL-terminated at each end, list of
1254
 * arena_objects associated with arenas that have pools available.
1255
 */
1256
static struct arena_object* usable_arenas = NULL;
1257
1258
/* nfp2lasta[nfp] is the last arena in usable_arenas with nfp free pools */
1259
static struct arena_object* nfp2lasta[MAX_POOLS_IN_ARENA + 1] = { NULL };
1260
1261
/* How many arena_objects do we initially allocate?
1262
 * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
1263
 * `arenas` vector.
1264
 */
1265
#define INITIAL_ARENA_OBJECTS 16
1266
1267
/* Number of arenas allocated that haven't been free()'d. */
1268
static size_t narenas_currently_allocated = 0;
1269
1270
/* Total number of times malloc() called to allocate an arena. */
1271
static size_t ntimes_arena_allocated = 0;
1272
/* High water mark (max value ever seen) for narenas_currently_allocated. */
1273
static size_t narenas_highwater = 0;
1274
1275
static Py_ssize_t raw_allocated_blocks;
1276
1277
Py_ssize_t
1278
_Py_GetAllocatedBlocks(void)
1279
{
1280
    Py_ssize_t n = raw_allocated_blocks;
1281
    /* add up allocated blocks for used pools */
1282
    for (
uint3
i = 0; i < maxarenas;
++i1.53k
) {
  Branch (1282:22): [True: 1.53k, False: 3]
1283
        /* Skip arenas which are not allocated. */
1284
        if (arenas[i].address == 0) {
  Branch (1284:13): [True: 1.23k, False: 297]
1285
            continue;
1286
        }
1287
1288
        uintptr_t base = (uintptr_t)_Py_ALIGN_UP(arenas[i].address, POOL_SIZE);
1289
1290
        /* visit every pool in the arena */
1291
        assert(base <= (uintptr_t) arenas[i].pool_address);
1292
        for (; base < (uintptr_t) arenas[i].pool_address; 
base += 18.8k
POOL_SIZE18.8k
) {
  Branch (1292:16): [True: 18.8k, False: 297]
1293
            poolp p = (poolp)base;
1294
            n += p->ref.count;
1295
        }
1296
    }
1297
    return n;
1298
}
1299
1300
#if WITH_PYMALLOC_RADIX_TREE
1301
/*==========================================================================*/
1302
/* radix tree for tracking arena usage.  If enabled, used to implement
1303
   address_in_range().
1304
1305
   memory address bit allocation for keys
1306
1307
   64-bit pointers, IGNORE_BITS=0 and 2^20 arena size:
1308
     15 -> MAP_TOP_BITS
1309
     15 -> MAP_MID_BITS
1310
     14 -> MAP_BOT_BITS
1311
     20 -> ideal aligned arena
1312
   ----
1313
     64
1314
1315
   64-bit pointers, IGNORE_BITS=16, and 2^20 arena size:
1316
     16 -> IGNORE_BITS
1317
     10 -> MAP_TOP_BITS
1318
     10 -> MAP_MID_BITS
1319
      8 -> MAP_BOT_BITS
1320
     20 -> ideal aligned arena
1321
   ----
1322
     64
1323
1324
   32-bit pointers and 2^18 arena size:
1325
     14 -> MAP_BOT_BITS
1326
     18 -> ideal aligned arena
1327
   ----
1328
     32
1329
1330
*/
1331
1332
#if SIZEOF_VOID_P == 8
1333
1334
/* number of bits in a pointer */
1335
#define POINTER_BITS 64
1336
1337
/* High bits of memory addresses that will be ignored when indexing into the
1338
 * radix tree.  Setting this to zero is the safe default.  For most 64-bit
1339
 * machines, setting this to 16 would be safe.  The kernel would not give
1340
 * user-space virtual memory addresses that have significant information in
1341
 * those high bits.  The main advantage to setting IGNORE_BITS > 0 is that less
1342
 * virtual memory will be used for the top and middle radix tree arrays.  Those
1343
 * arrays are allocated in the BSS segment and so will typically consume real
1344
 * memory only if actually accessed.
1345
 */
1346
#define IGNORE_BITS 0
1347
1348
/* use the top and mid layers of the radix tree */
1349
#define USE_INTERIOR_NODES
1350
1351
#elif SIZEOF_VOID_P == 4
1352
1353
#define POINTER_BITS 32
1354
#define IGNORE_BITS 0
1355
1356
#else
1357
1358
 /* Currently this code works for 64-bit or 32-bit pointers only.  */
1359
#error "obmalloc radix tree requires 64-bit or 32-bit pointers."
1360
1361
#endif /* SIZEOF_VOID_P */
1362
1363
/* arena_coverage_t members require this to be true  */
1364
#if ARENA_BITS >= 32
1365
#   error "arena size must be < 2^32"
1366
#endif
1367
1368
/* the lower bits of the address that are not ignored */
1369
#define ADDRESS_BITS (POINTER_BITS - IGNORE_BITS)
1370
1371
#ifdef USE_INTERIOR_NODES
1372
/* number of bits used for MAP_TOP and MAP_MID nodes */
1373
#define INTERIOR_BITS ((ADDRESS_BITS - ARENA_BITS + 2) / 3)
1374
#else
1375
#define INTERIOR_BITS 0
1376
#endif
1377
1378
#define MAP_TOP_BITS INTERIOR_BITS
1379
#define MAP_TOP_LENGTH (1 << MAP_TOP_BITS)
1380
#define MAP_TOP_MASK (MAP_TOP_LENGTH - 1)
1381
1382
#define MAP_MID_BITS INTERIOR_BITS
1383
#define MAP_MID_LENGTH (1 << MAP_MID_BITS)
1384
#define MAP_MID_MASK (MAP_MID_LENGTH - 1)
1385
1386
#define MAP_BOT_BITS (ADDRESS_BITS - ARENA_BITS - 2*INTERIOR_BITS)
1387
#define MAP_BOT_LENGTH (1 << MAP_BOT_BITS)
1388
#define MAP_BOT_MASK (MAP_BOT_LENGTH - 1)
1389
1390
#define MAP_BOT_SHIFT ARENA_BITS
1391
#define MAP_MID_SHIFT (MAP_BOT_BITS + MAP_BOT_SHIFT)
1392
#define MAP_TOP_SHIFT (MAP_MID_BITS + MAP_MID_SHIFT)
1393
1394
#define AS_UINT(p) ((uintptr_t)(p))
1395
#define MAP_BOT_INDEX(p) ((AS_UINT(p) >> MAP_BOT_SHIFT) & MAP_BOT_MASK)
1396
#define MAP_MID_INDEX(p) ((AS_UINT(p) >> MAP_MID_SHIFT) & MAP_MID_MASK)
1397
#define MAP_TOP_INDEX(p) ((AS_UINT(p) >> MAP_TOP_SHIFT) & MAP_TOP_MASK)
1398
1399
#if IGNORE_BITS > 0
1400
/* Return the ignored part of the pointer address.  Those bits should be same
1401
 * for all valid pointers if IGNORE_BITS is set correctly.
1402
 */
1403
#define HIGH_BITS(p) (AS_UINT(p) >> ADDRESS_BITS)
1404
#else
1405
#define HIGH_BITS(p) 0
1406
#endif
1407
1408
1409
/* This is the leaf of the radix tree.  See arena_map_mark_used() for the
1410
 * meaning of these members. */
1411
typedef struct {
1412
    int32_t tail_hi;
1413
    int32_t tail_lo;
1414
} arena_coverage_t;
1415
1416
typedef struct arena_map_bot {
1417
    /* The members tail_hi and tail_lo are accessed together.  So, it
1418
     * better to have them as an array of structs, rather than two
1419
     * arrays.
1420
     */
1421
    arena_coverage_t arenas[MAP_BOT_LENGTH];
1422
} arena_map_bot_t;
1423
1424
#ifdef USE_INTERIOR_NODES
1425
typedef struct arena_map_mid {
1426
    struct arena_map_bot *ptrs[MAP_MID_LENGTH];
1427
} arena_map_mid_t;
1428
1429
typedef struct arena_map_top {
1430
    struct arena_map_mid *ptrs[MAP_TOP_LENGTH];
1431
} arena_map_top_t;
1432
#endif
1433
1434
/* The root of radix tree.  Note that by initializing like this, the memory
1435
 * should be in the BSS.  The OS will only memory map pages as the MAP_MID
1436
 * nodes get used (OS pages are demand loaded as needed).
1437
 */
1438
#ifdef USE_INTERIOR_NODES
1439
static arena_map_top_t arena_map_root;
1440
/* accounting for number of used interior nodes */
1441
static int arena_map_mid_count;
1442
static int arena_map_bot_count;
1443
#else
1444
static arena_map_bot_t arena_map_root;
1445
#endif
1446
1447
/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1448
 * it cannot be created */
1449
static arena_map_bot_t *
1450
arena_map_get(block *p, int create)
1451
{
1452
#ifdef USE_INTERIOR_NODES
1453
    /* sanity check that IGNORE_BITS is correct */
1454
    assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1455
    int i1 = MAP_TOP_INDEX(p);
1456
    if (arena_map_root.ptrs[i1] == NULL) {
  Branch (1456:9): [True: 69, False: 516M]
1457
        if (!create) {
  Branch (1457:13): [True: 0, False: 69]
1458
            return NULL;
1459
        }
1460
        arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1461
        if (n == NULL) {
  Branch (1461:13): [True: 0, False: 69]
1462
            return NULL;
1463
        }
1464
        arena_map_root.ptrs[i1] = n;
1465
        arena_map_mid_count++;
1466
    }
1467
    int i2 = MAP_MID_INDEX(p);
1468
    if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
  Branch (1468:9): [True: 2.01M, False: 514M]
1469
        if (!create) {
  Branch (1469:13): [True: 2.01M, False: 69]
1470
            return NULL;
1471
        }
1472
        arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
1473
        if (n == NULL) {
  Branch (1473:13): [True: 0, False: 69]
1474
            return NULL;
1475
        }
1476
        arena_map_root.ptrs[i1]->ptrs[i2] = n;
1477
        arena_map_bot_count++;
1478
    }
1479
    return arena_map_root.ptrs[i1]->ptrs[i2];
1480
#else
1481
    return &arena_map_root;
1482
#endif
1483
}
1484
1485
1486
/* The radix tree only tracks arenas.  So, for 16 MiB arenas, we throw
1487
 * away 24 bits of the address.  That reduces the space requirement of
1488
 * the tree compared to similar radix tree page-map schemes.  In
1489
 * exchange for slashing the space requirement, it needs more
1490
 * computation to check an address.
1491
 *
1492
 * Tracking coverage is done by "ideal" arena address.  It is easier to
1493
 * explain in decimal so let's say that the arena size is 100 bytes.
1494
 * Then, ideal addresses are 100, 200, 300, etc.  For checking if a
1495
 * pointer address is inside an actual arena, we have to check two ideal
1496
 * arena addresses.  E.g. if pointer is 357, we need to check 200 and
1497
 * 300.  In the rare case that an arena is aligned in the ideal way
1498
 * (e.g. base address of arena is 200) then we only have to check one
1499
 * ideal address.
1500
 *
1501
 * The tree nodes for 200 and 300 both store the address of arena.
1502
 * There are two cases: the arena starts at a lower ideal arena and
1503
 * extends to this one, or the arena starts in this arena and extends to
1504
 * the next ideal arena.  The tail_lo and tail_hi members correspond to
1505
 * these two cases.
1506
 */
1507
1508
1509
/* mark or unmark addresses covered by arena */
1510
static int
1511
arena_map_mark_used(uintptr_t arena_base, int is_used)
1512
{
1513
    /* sanity check that IGNORE_BITS is correct */
1514
    assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
1515
    arena_map_bot_t *n_hi = arena_map_get((block *)arena_base, is_used);
1516
    if (n_hi == NULL) {
  Branch (1516:9): [True: 0, False: 3.22k]
1517
        assert(is_used); /* otherwise node should already exist */
1518
        return 0; /* failed to allocate space for node */
1519
    }
1520
    int i3 = MAP_BOT_INDEX((block *)arena_base);
1521
    int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
1522
    if (tail == 0) {
  Branch (1522:9): [True: 1.05k, False: 2.17k]
1523
        /* is ideal arena address */
1524
        n_hi->arenas[i3].tail_hi = is_used ? 
-1532
:
0522
;
  Branch (1524:36): [True: 532, False: 522]
1525
    }
1526
    else {
1527
        /* arena_base address is not ideal (aligned to arena size) and
1528
         * so it potentially covers two MAP_BOT nodes.  Get the MAP_BOT node
1529
         * for the next arena.  Note that it might be in different MAP_TOP
1530
         * and MAP_MID nodes as well so we need to call arena_map_get()
1531
         * again (do the full tree traversal).
1532
         */
1533
        n_hi->arenas[i3].tail_hi = is_used ? 
tail1.18k
:
0986
;
  Branch (1533:36): [True: 1.18k, False: 986]
1534
        uintptr_t arena_base_next = arena_base + ARENA_SIZE;
1535
        /* If arena_base is a legit arena address, so is arena_base_next - 1
1536
         * (last address in arena).  If arena_base_next overflows then it
1537
         * must overflow to 0.  However, that would mean arena_base was
1538
         * "ideal" and we should not be in this case. */
1539
        assert(arena_base < arena_base_next);
1540
        arena_map_bot_t *n_lo = arena_map_get((block *)arena_base_next, is_used);
1541
        if (n_lo == NULL) {
  Branch (1541:13): [True: 0, False: 2.17k]
1542
            assert(is_used); /* otherwise should already exist */
1543
            n_hi->arenas[i3].tail_hi = 0;
1544
            return 0; /* failed to allocate space for node */
1545
        }
1546
        int i3_next = MAP_BOT_INDEX(arena_base_next);
1547
        n_lo->arenas[i3_next].tail_lo = is_used ? 
tail1.18k
:
0986
;
  Branch (1547:41): [True: 1.18k, False: 986]
1548
    }
1549
    return 1;
1550
}
1551
1552
/* Return true if 'p' is a pointer inside an obmalloc arena.
1553
 * _PyObject_Free() calls this so it needs to be very fast. */
1554
static int
1555
arena_map_is_used(block *p)
1556
{
1557
    arena_map_bot_t *n = arena_map_get(p, 0);
1558
    if (n == NULL) {
  Branch (1558:9): [True: 2.01M, False: 514M]
1559
        return 0;
1560
    }
1561
    int i3 = MAP_BOT_INDEX(p);
1562
    /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
1563
    int32_t hi = n->arenas[i3].tail_hi;
1564
    int32_t lo = n->arenas[i3].tail_lo;
1565
    int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
1566
    return (tail < lo) || 
(122M
tail >= hi122M
&&
hi != 0122M
);
  Branch (1566:12): [True: 391M, False: 122M]
  Branch (1566:28): [True: 122M, False: 6]
  Branch (1566:42): [True: 115M, False: 6.62M]
1567
}
1568
1569
/* end of radix tree logic */
1570
/*==========================================================================*/
1571
#endif /* WITH_PYMALLOC_RADIX_TREE */
1572
1573
1574
/* Allocate a new arena.  If we run out of memory, return NULL.  Else
1575
 * allocate a new arena, and return the address of an arena_object
1576
 * describing the new arena.  It's expected that the caller will set
1577
 * `usable_arenas` to the return value.
1578
 */
1579
static struct arena_object*
1580
new_arena(void)
1581
{
1582
    struct arena_object* arenaobj;
1583
    uint excess;        /* number of bytes above pool alignment */
1584
    void *address;
1585
    static int debug_stats = -1;
1586
1587
    if (debug_stats == -1) {
  Branch (1587:9): [True: 69, False: 1.64k]
1588
        const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1589
        debug_stats = (opt != NULL && 
*opt != '\0'0
);
  Branch (1589:24): [True: 0, False: 69]
  Branch (1589:39): [True: 0, False: 0]
1590
    }
1591
    if (debug_stats) {
  Branch (1591:9): [True: 0, False: 1.71k]
1592
        _PyObject_DebugMallocStats(stderr);
1593
    }
1594
1595
    if (unused_arena_objects == NULL) {
  Branch (1595:9): [True: 74, False: 1.64k]
1596
        uint i;
1597
        uint numarenas;
1598
        size_t nbytes;
1599
1600
        /* Double the number of arena objects on each allocation.
1601
         * Note that it's possible for `numarenas` to overflow.
1602
         */
1603
        numarenas = maxarenas ? 
maxarenas << 15
:
INITIAL_ARENA_OBJECTS69
;
  Branch (1603:21): [True: 5, False: 69]
1604
        if (numarenas <= maxarenas)
  Branch (1604:13): [True: 0, False: 74]
1605
            return NULL;                /* overflow */
1606
#if SIZEOF_SIZE_T <= SIZEOF_INT
1607
        if (numarenas > SIZE_MAX / sizeof(*arenas))
1608
            return NULL;                /* overflow */
1609
#endif
1610
        nbytes = numarenas * sizeof(*arenas);
1611
        arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
1612
        if (arenaobj == NULL)
  Branch (1612:13): [True: 0, False: 74]
1613
            return NULL;
1614
        arenas = arenaobj;
1615
1616
        /* We might need to fix pointers that were copied.  However,
1617
         * new_arena only gets called when all the pages in the
1618
         * previous arenas are full.  Thus, there are *no* pointers
1619
         * into the old array. Thus, we don't have to worry about
1620
         * invalid pointers.  Just to be sure, some asserts:
1621
         */
1622
        assert(usable_arenas == NULL);
1623
        assert(unused_arena_objects == NULL);
1624
1625
        /* Put the new arenas on the unused_arena_objects list. */
1626
        for (i = maxarenas; i < numarenas; 
++i1.60k
) {
  Branch (1626:29): [True: 1.60k, False: 74]
1627
            arenas[i].address = 0;              /* mark as unassociated */
1628
            arenas[i].nextarena = i < numarenas - 1 ?
  Branch (1628:35): [True: 1.52k, False: 74]
1629
                                   &arenas[i+1] : NULL;
1630
        }
1631
1632
        /* Update globals. */
1633
        unused_arena_objects = &arenas[maxarenas];
1634
        maxarenas = numarenas;
1635
    }
1636
1637
    /* Take the next available arena object off the head of the list. */
1638
    assert(unused_arena_objects != NULL);
1639
    arenaobj = unused_arena_objects;
1640
    unused_arena_objects = arenaobj->nextarena;
1641
    assert(arenaobj->address == 0);
1642
    address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1643
#if WITH_PYMALLOC_RADIX_TREE
1644
    if (address != NULL) {
  Branch (1644:9): [True: 1.71k, False: 0]
1645
        if (!arena_map_mark_used((uintptr_t)address, 1)) {
  Branch (1645:13): [True: 0, False: 1.71k]
1646
            /* marking arena in radix tree failed, abort */
1647
            _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
1648
            address = NULL;
1649
        }
1650
    }
1651
#endif
1652
    if (address == NULL) {
  Branch (1652:9): [True: 0, False: 1.71k]
1653
        /* The allocation failed: return NULL after putting the
1654
         * arenaobj back.
1655
         */
1656
        arenaobj->nextarena = unused_arena_objects;
1657
        unused_arena_objects = arenaobj;
1658
        return NULL;
1659
    }
1660
    arenaobj->address = (uintptr_t)address;
1661
1662
    ++narenas_currently_allocated;
1663
    ++ntimes_arena_allocated;
1664
    if (narenas_currently_allocated > narenas_highwater)
  Branch (1664:9): [True: 509, False: 1.20k]
1665
        narenas_highwater = narenas_currently_allocated;
1666
    arenaobj->freepools = NULL;
1667
    /* pool_address <- first pool-aligned address in the arena
1668
       nfreepools <- number of whole pools that fit after alignment */
1669
    arenaobj->pool_address = (block*)arenaobj->address;
1670
    arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
1671
    excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
1672
    if (excess != 0) {
  Branch (1672:9): [True: 914, False: 803]
1673
        --arenaobj->nfreepools;
1674
        arenaobj->pool_address += POOL_SIZE - excess;
1675
    }
1676
    arenaobj->ntotalpools = arenaobj->nfreepools;
1677
1678
    return arenaobj;
1679
}
1680
1681
1682
1683
#if WITH_PYMALLOC_RADIX_TREE
1684
/* Return true if and only if P is an address that was allocated by
1685
   pymalloc.  When the radix tree is used, 'poolp' is unused.
1686
 */
1687
static bool
1688
address_in_range(void *p, poolp Py_UNUSED(pool))
1689
{
1690
    return arena_map_is_used(p);
1691
}
1692
#else
1693
/*
1694
address_in_range(P, POOL)
1695
1696
Return true if and only if P is an address that was allocated by pymalloc.
1697
POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1698
(the caller is asked to compute this because the macro expands POOL more than
1699
once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1700
variable and pass the latter to the macro; because address_in_range is
1701
called on every alloc/realloc/free, micro-efficiency is important here).
1702
1703
Tricky:  Let B be the arena base address associated with the pool, B =
1704
arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
1705
1706
    B <= P < B + ARENA_SIZE
1707
1708
Subtracting B throughout, this is true iff
1709
1710
    0 <= P-B < ARENA_SIZE
1711
1712
By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1713
1714
Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
1715
before the first arena has been allocated.  `arenas` is still NULL in that
1716
case.  We're relying on that maxarenas is also 0 in that case, so that
1717
(POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
1718
into a NULL arenas.
1719
1720
Details:  given P and POOL, the arena_object corresponding to P is AO =
1721
arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
1722
stores, etc), POOL is the correct address of P's pool, AO.address is the
1723
correct base address of the pool's arena, and P must be within ARENA_SIZE of
1724
AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
1725
(NULL)).  Therefore address_in_range correctly reports that obmalloc
1726
controls P.
1727
1728
Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1729
call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
1730
in this case -- it may even be uninitialized trash.  If the trash arenaindex
1731
is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1732
control P.
1733
1734
Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
1735
allocated arena, obmalloc controls all the memory in slice AO.address :
1736
AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
1737
so P doesn't lie in that slice, so the macro correctly reports that P is not
1738
controlled by obmalloc.
1739
1740
Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1741
arena_object (one not currently associated with an allocated arena),
1742
AO.address is 0, and the second test in the macro reduces to:
1743
1744
    P < ARENA_SIZE
1745
1746
If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1747
that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
1748
of the test still passes, and the third clause (AO.address != 0) is necessary
1749
to get the correct result:  AO.address is 0 in this case, so the macro
1750
correctly reports that P is not controlled by obmalloc (despite that P lies in
1751
slice AO.address : AO.address + ARENA_SIZE).
1752
1753
Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
1754
2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1755
corresponded to a currently-allocated arena, so the "P is not controlled by
1756
obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1757
was impossible.
1758
1759
Note that the logic is excruciating, and reading up possibly uninitialized
1760
memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1761
creates problems for some memory debuggers.  The overwhelming advantage is
1762
that this test determines whether an arbitrary address is controlled by
1763
obmalloc in a small constant time, independent of the number of arenas
1764
obmalloc controls.  Since this test is needed at every entry point, it's
1765
extremely desirable that it be this fast.
1766
*/
1767
1768
static bool _Py_NO_SANITIZE_ADDRESS
1769
            _Py_NO_SANITIZE_THREAD
1770
            _Py_NO_SANITIZE_MEMORY
1771
address_in_range(void *p, poolp pool)
1772
{
1773
    // Since address_in_range may be reading from memory which was not allocated
1774
    // by Python, it is important that pool->arenaindex is read only once, as
1775
    // another thread may be concurrently modifying the value without holding
1776
    // the GIL. The following dance forces the compiler to read pool->arenaindex
1777
    // only once.
1778
    uint arenaindex = *((volatile uint *)&pool->arenaindex);
1779
    return arenaindex < maxarenas &&
1780
        (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
1781
        arenas[arenaindex].address != 0;
1782
}
1783
1784
#endif /* !WITH_PYMALLOC_RADIX_TREE */
1785
1786
/*==========================================================================*/
1787
1788
// Called when freelist is exhausted.  Extend the freelist if there is
1789
// space for a block.  Otherwise, remove this pool from usedpools.
1790
static void
1791
pymalloc_pool_extend(poolp pool, uint size)
1792
{
1793
    if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
1794
        /* There is room for another block. */
1795
        pool->freeblock = (block*)pool + pool->nextoffset;
1796
        pool->nextoffset += INDEX2SIZE(size);
1797
        *(block **)(pool->freeblock) = NULL;
1798
        return;
1799
    }
1800
1801
    /* Pool is full, unlink from used pools. */
1802
    poolp next;
1803
    next = pool->nextpool;
1804
    pool = pool->prevpool;
1805
    next->prevpool = pool;
1806
    pool->nextpool = next;
1807
}
1808
1809
/* called when pymalloc_alloc can not allocate a block from usedpool.
1810
 * This function takes new pool and allocate a block from it.
1811
 */
1812
static void*
1813
allocate_from_new_pool(uint size)
1814
{
1815
    /* There isn't a pool of the right size class immediately
1816
     * available:  use a free pool.
1817
     */
1818
    if (UNLIKELY(usable_arenas == NULL)) {
1819
        /* No arena has a free pool:  allocate a new arena. */
1820
#ifdef WITH_MEMORY_LIMITS
1821
        if (narenas_currently_allocated >= MAX_ARENAS) {
1822
            return NULL;
1823
        }
1824
#endif
1825
        usable_arenas = new_arena();
1826
        if (usable_arenas == NULL) {
  Branch (1826:13): [True: 0, False: 1.71k]
1827
            return NULL;
1828
        }
1829
        usable_arenas->nextarena = usable_arenas->prevarena = NULL;
1830
        assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
1831
        nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
1832
    }
1833
    assert(usable_arenas->address != 0);
1834
1835
    /* This arena already had the smallest nfreepools value, so decreasing
1836
     * nfreepools doesn't change that, and we don't need to rearrange the
1837
     * usable_arenas list.  However, if the arena becomes wholly allocated,
1838
     * we need to remove its arena_object from usable_arenas.
1839
     */
1840
    assert(usable_arenas->nfreepools > 0);
1841
    if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
  Branch (1841:9): [True: 424k, False: 389]
1842
        /* It's the last of this size, so there won't be any. */
1843
        nfp2lasta[usable_arenas->nfreepools] = NULL;
1844
    }
1845
    /* If any free pools will remain, it will be the new smallest. */
1846
    if (usable_arenas->nfreepools > 1) {
  Branch (1846:9): [True: 414k, False: 10.9k]
1847
        assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
1848
        nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
1849
    }
1850
1851
    /* Try to get a cached free pool. */
1852
    poolp pool = usable_arenas->freepools;
1853
    if (LIKELY(pool != NULL)) {
1854
        /* Unlink from cached pools. */
1855
        usable_arenas->freepools = pool->nextpool;
1856
        usable_arenas->nfreepools--;
1857
        if (UNLIKELY(usable_arenas->nfreepools == 0)) {
1858
            /* Wholly allocated:  remove. */
1859
            assert(usable_arenas->freepools == NULL);
1860
            assert(usable_arenas->nextarena == NULL ||
1861
                   usable_arenas->nextarena->prevarena ==
1862
                   usable_arenas);
1863
            usable_arenas = usable_arenas->nextarena;
1864
            if (usable_arenas != NULL) {
  Branch (1864:17): [True: 6.99k, False: 2.36k]
1865
                usable_arenas->prevarena = NULL;
1866
                assert(usable_arenas->address != 0);
1867
            }
1868
        }
1869
        else {
1870
            /* nfreepools > 0:  it must be that freepools
1871
             * isn't NULL, or that we haven't yet carved
1872
             * off all the arena's pools for the first
1873
             * time.
1874
             */
1875
            assert(usable_arenas->freepools != NULL ||
1876
                   usable_arenas->pool_address <=
1877
                   (block*)usable_arenas->address +
1878
                       ARENA_SIZE - POOL_SIZE);
1879
        }
1880
    }
1881
    else {
1882
        /* Carve off a new pool. */
1883
        assert(usable_arenas->nfreepools > 0);
1884
        assert(usable_arenas->freepools == NULL);
1885
        pool = (poolp)usable_arenas->pool_address;
1886
        assert((block*)pool <= (block*)usable_arenas->address +
1887
                                 ARENA_SIZE - POOL_SIZE);
1888
        pool->arenaindex = (uint)(usable_arenas - arenas);
1889
        assert(&arenas[pool->arenaindex] == usable_arenas);
1890
        pool->szidx = DUMMY_SIZE_IDX;
1891
        usable_arenas->pool_address += POOL_SIZE;
1892
        --usable_arenas->nfreepools;
1893
1894
        if (usable_arenas->nfreepools == 0) {
  Branch (1894:13): [True: 1.60k, False: 103k]
1895
            assert(usable_arenas->nextarena == NULL ||
1896
                   usable_arenas->nextarena->prevarena ==
1897
                   usable_arenas);
1898
            /* Unlink the arena:  it is completely allocated. */
1899
            usable_arenas = usable_arenas->nextarena;
1900
            if (usable_arenas != NULL) {
  Branch (1900:17): [True: 29, False: 1.57k]
1901
                usable_arenas->prevarena = NULL;
1902
                assert(usable_arenas->address != 0);
1903
            }
1904
        }
1905
    }
1906
1907
    /* Frontlink to used pools. */
1908
    block *bp;
1909
    poolp next = usedpools[size + size]; /* == prev */
1910
    pool->nextpool = next;
1911
    pool->prevpool = next;
1912
    next->nextpool = pool;
1913
    next->prevpool = pool;
1914
    pool->ref.count = 1;
1915
    if (pool->szidx == size) {
  Branch (1915:9): [True: 256k, False: 168k]
1916
        /* Luckily, this pool last contained blocks
1917
         * of the same size class, so its header
1918
         * and free list are already initialized.
1919
         */
1920
        bp = pool->freeblock;
1921
        assert(bp != NULL);
1922
        pool->freeblock = *(block **)bp;
1923
        return bp;
1924
    }
1925
    /*
1926
     * Initialize the pool header, set up the free list to
1927
     * contain just the second block, and return the first
1928
     * block.
1929
     */
1930
    pool->szidx = size;
1931
    size = INDEX2SIZE(size);
1932
    bp = (block *)pool + POOL_OVERHEAD;
1933
    pool->nextoffset = POOL_OVERHEAD + (size << 1);
1934
    pool->maxnextoffset = POOL_SIZE - size;
1935
    pool->freeblock = bp + size;
1936
    *(block **)(pool->freeblock) = NULL;
1937
    return bp;
1938
}
1939
1940
/* pymalloc allocator
1941
1942
   Return a pointer to newly allocated memory if pymalloc allocated memory.
1943
1944
   Return NULL if pymalloc failed to allocate the memory block: on bigger
1945
   requests, on error in the code below (as a last chance to serve the request)
1946
   or when the max memory limit has been reached.
1947
*/
1948
static inline void*
1949
pymalloc_alloc(void *Py_UNUSED(ctx), size_t nbytes)
1950
{
1951
#ifdef WITH_VALGRIND
1952
    if (UNLIKELY(running_on_valgrind == -1)) {
1953
        running_on_valgrind = RUNNING_ON_VALGRIND;
1954
    }
1955
    if (UNLIKELY(running_on_valgrind)) {
1956
        return NULL;
1957
    }
1958
#endif
1959
1960
    if (UNLIKELY(nbytes == 0)) {
1961
        return NULL;
1962
    }
1963
    if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
1964
        return NULL;
1965
    }
1966
1967
    uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
1968
    poolp pool = usedpools[size + size];
1969
    block *bp;
1970
1971
    if (LIKELY(pool != pool->nextpool)) {
1972
        /*
1973
         * There is a used pool for this size class.
1974
         * Pick up the head block of its free list.
1975
         */
1976
        ++pool->ref.count;
1977
        bp = pool->freeblock;
1978
        assert(bp != NULL);
1979
1980
        if (UNLIKELY((pool->freeblock = *(block **)bp) == NULL)) {
1981
            // Reached the end of the free list, try to extend it.
1982
            pymalloc_pool_extend(pool, size);
1983
        }
1984
    }
1985
    else {
1986
        /* There isn't a pool of the right size class immediately
1987
         * available:  use a free pool.
1988
         */
1989
        bp = allocate_from_new_pool(size);
1990
    }
1991
1992
    return (void *)bp;
1993
}
1994
1995
1996
static void *
1997
_PyObject_Malloc(void *ctx, size_t nbytes)
1998
{
1999
    void* ptr = pymalloc_alloc(ctx, nbytes);
2000
    if (LIKELY(ptr != NULL)) {
2001
        return ptr;
2002
    }
2003
2004
    ptr = PyMem_RawMalloc(nbytes);
2005
    if (ptr != NULL) {
  Branch (2005:9): [True: 6.63M, False: 7]
2006
        raw_allocated_blocks++;
2007
    }
2008
    return ptr;
2009
}
2010
2011
2012
static void *
2013
_PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
2014
{
2015
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2016
    size_t nbytes = nelem * elsize;
2017
2018
    void* ptr = pymalloc_alloc(ctx, nbytes);
2019
    if (LIKELY(ptr != NULL)) {
2020
        memset(ptr, 0, nbytes);
2021
        return ptr;
2022
    }
2023
2024
    ptr = PyMem_RawCalloc(nelem, elsize);
2025
    if (ptr != NULL) {
  Branch (2025:9): [True: 651k, False: 0]
2026
        raw_allocated_blocks++;
2027
    }
2028
    return ptr;
2029
}
2030
2031
2032
static void
2033
insert_to_usedpool(poolp pool)
2034
{
2035
    assert(pool->ref.count > 0);            /* else the pool is empty */
2036
2037
    uint size = pool->szidx;
2038
    poolp next = usedpools[size + size];
2039
    poolp prev = next->prevpool;
2040
2041
    /* insert pool before next:   prev <-> pool <-> next */
2042
    pool->nextpool = next;
2043
    pool->prevpool = prev;
2044
    next->prevpool = pool;
2045
    prev->nextpool = pool;
2046
}
2047
2048
static void
2049
insert_to_freepool(poolp pool)
2050
{
2051
    poolp next = pool->nextpool;
2052
    poolp prev = pool->prevpool;
2053
    next->prevpool = prev;
2054
    prev->nextpool = next;
2055
2056
    /* Link the pool to freepools.  This is a singly-linked
2057
     * list, and pool->prevpool isn't used there.
2058
     */
2059
    struct arena_object *ao = &arenas[pool->arenaindex];
2060
    pool->nextpool = ao->freepools;
2061
    ao->freepools = pool;
2062
    uint nf = ao->nfreepools;
2063
    /* If this is the rightmost arena with this number of free pools,
2064
     * nfp2lasta[nf] needs to change.  Caution:  if nf is 0, there
2065
     * are no arenas in usable_arenas with that value.
2066
     */
2067
    struct arena_object* lastnf = nfp2lasta[nf];
2068
    assert((nf == 0 && lastnf == NULL) ||
2069
           (nf > 0 &&
2070
            lastnf != NULL &&
2071
            lastnf->nfreepools == nf &&
2072
            (lastnf->nextarena == NULL ||
2073
             nf < lastnf->nextarena->nfreepools)));
2074
    if (lastnf == ao) {  /* it is the rightmost */
  Branch (2074:9): [True: 381k, False: 40.9k]
2075
        struct arena_object* p = ao->prevarena;
2076
        nfp2lasta[nf] = (p != NULL && 
p->nfreepools == nf81.0k
) ?
p2.50k
: NULL;
  Branch (2076:26): [True: 81.0k, False: 300k]
  Branch (2076:39): [True: 2.50k, False: 78.5k]
2077
    }
2078
    ao->nfreepools = ++nf;
2079
2080
    /* All the rest is arena management.  We just freed
2081
     * a pool, and there are 4 cases for arena mgmt:
2082
     * 1. If all the pools are free, return the arena to
2083
     *    the system free().  Except if this is the last
2084
     *    arena in the list, keep it to avoid thrashing:
2085
     *    keeping one wholly free arena in the list avoids
2086
     *    pathological cases where a simple loop would
2087
     *    otherwise provoke needing to allocate and free an
2088
     *    arena on every iteration.  See bpo-37257.
2089
     * 2. If this is the only free pool in the arena,
2090
     *    add the arena back to the `usable_arenas` list.
2091
     * 3. If the "next" arena has a smaller count of free
2092
     *    pools, we have to "slide this arena right" to
2093
     *    restore that usable_arenas is sorted in order of
2094
     *    nfreepools.
2095
     * 4. Else there's nothing more to do.
2096
     */
2097
    if (nf == ao->ntotalpools && 
ao->nextarena != NULL2.81k
) {
  Branch (2097:9): [True: 2.81k, False: 419k]
  Branch (2097:34): [True: 1.50k, False: 1.30k]
2098
        /* Case 1.  First unlink ao from usable_arenas.
2099
         */
2100
        assert(ao->prevarena == NULL ||
2101
               ao->prevarena->address != 0);
2102
        assert(ao ->nextarena == NULL ||
2103
               ao->nextarena->address != 0);
2104
2105
        /* Fix the pointer in the prevarena, or the
2106
         * usable_arenas pointer.
2107
         */
2108
        if (ao->prevarena == NULL) {
  Branch (2108:13): [True: 236, False: 1.27k]
2109
            usable_arenas = ao->nextarena;
2110
            assert(usable_arenas == NULL ||
2111
                   usable_arenas->address != 0);
2112
        }
2113
        else {
2114
            assert(ao->prevarena->nextarena == ao);
2115
            ao->prevarena->nextarena =
2116
                ao->nextarena;
2117
        }
2118
        /* Fix the pointer in the nextarena. */
2119
        if (ao->nextarena != NULL) {
  Branch (2119:13): [True: 1.50k, False: 0]
2120
            assert(ao->nextarena->prevarena == ao);
2121
            ao->nextarena->prevarena =
2122
                ao->prevarena;
2123
        }
2124
        /* Record that this arena_object slot is
2125
         * available to be reused.
2126
         */
2127
        ao->nextarena = unused_arena_objects;
2128
        unused_arena_objects = ao;
2129
2130
#if WITH_PYMALLOC_RADIX_TREE
2131
        /* mark arena region as not under control of obmalloc */
2132
        arena_map_mark_used(ao->address, 0);
2133
#endif
2134
2135
        /* Free the entire arena. */
2136
        _PyObject_Arena.free(_PyObject_Arena.ctx,
2137
                             (void *)ao->address, ARENA_SIZE);
2138
        ao->address = 0;                        /* mark unassociated */
2139
        --narenas_currently_allocated;
2140
2141
        return;
2142
    }
2143
2144
    if (nf == 1) {
  Branch (2144:9): [True: 10.9k, False: 410k]
2145
        /* Case 2.  Put ao at the head of
2146
         * usable_arenas.  Note that because
2147
         * ao->nfreepools was 0 before, ao isn't
2148
         * currently on the usable_arenas list.
2149
         */
2150
        ao->nextarena = usable_arenas;
2151
        ao->prevarena = NULL;
2152
        if (usable_arenas)
  Branch (2152:13): [True: 8.66k, False: 2.29k]
2153
            usable_arenas->prevarena = ao;
2154
        usable_arenas = ao;
2155
        assert(usable_arenas->address != 0);
2156
        if (nfp2lasta[1] == NULL) {
  Branch (2156:13): [True: 10.1k, False: 828]
2157
            nfp2lasta[1] = ao;
2158
        }
2159
2160
        return;
2161
    }
2162
2163
    /* If this arena is now out of order, we need to keep
2164
     * the list sorted.  The list is kept sorted so that
2165
     * the "most full" arenas are used first, which allows
2166
     * the nearly empty arenas to be completely freed.  In
2167
     * a few un-scientific tests, it seems like this
2168
     * approach allowed a lot more memory to be freed.
2169
     */
2170
    /* If this is the only arena with nf, record that. */
2171
    if (nfp2lasta[nf] == NULL) {
  Branch (2171:9): [True: 378k, False: 32.0k]
2172
        nfp2lasta[nf] = ao;
2173
    } /* else the rightmost with nf doesn't change */
2174
    /* If this was the rightmost of the old size, it remains in place. */
2175
    if (ao == lastnf) {
  Branch (2175:9): [True: 380k, False: 29.3k]
2176
        /* Case 4.  Nothing to do. */
2177
        return;
2178
    }
2179
    /* If ao were the only arena in the list, the last block would have
2180
     * gotten us out.
2181
     */
2182
    assert(ao->nextarena != NULL);
2183
2184
    /* Case 3:  We have to move the arena towards the end of the list,
2185
     * because it has more free pools than the arena to its right.  It needs
2186
     * to move to follow lastnf.
2187
     * First unlink ao from usable_arenas.
2188
     */
2189
    if (ao->prevarena != NULL) {
  Branch (2189:9): [True: 26.9k, False: 2.43k]
2190
        /* ao isn't at the head of the list */
2191
        assert(ao->prevarena->nextarena == ao);
2192
        ao->prevarena->nextarena = ao->nextarena;
2193
    }
2194
    else {
2195
        /* ao is at the head of the list */
2196
        assert(usable_arenas == ao);
2197
        usable_arenas = ao->nextarena;
2198
    }
2199
    ao->nextarena->prevarena = ao->prevarena;
2200
    /* And insert after lastnf. */
2201
    ao->prevarena = lastnf;
2202
    ao->nextarena = lastnf->nextarena;
2203
    if (ao->nextarena != NULL) {
  Branch (2203:9): [True: 29.0k, False: 308]
2204
        ao->nextarena->prevarena = ao;
2205
    }
2206
    lastnf->nextarena = ao;
2207
    /* Verify that the swaps worked. */
2208
    assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2209
    assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2210
    assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2211
    assert((usable_arenas == ao && ao->prevarena == NULL)
2212
           || ao->prevarena->nextarena == ao);
2213
}
2214
2215
/* Free a memory block allocated by pymalloc_alloc().
2216
   Return 1 if it was freed.
2217
   Return 0 if the block was not allocated by pymalloc_alloc(). */
2218
static inline int
2219
pymalloc_free(void *Py_UNUSED(ctx), void *p)
2220
{
2221
    assert(p != NULL);
2222
2223
#ifdef WITH_VALGRIND
2224
    if (UNLIKELY(running_on_valgrind > 0)) {
2225
        return 0;
2226
    }
2227
#endif
2228
2229
    poolp pool = POOL_ADDR(p);
2230
    if (UNLIKELY(!address_in_range(p, pool))) {
2231
        return 0;
2232
    }
2233
    /* We allocated this address. */
2234
2235
    /* Link p to the start of the pool's freeblock list.  Since
2236
     * the pool had at least the p block outstanding, the pool
2237
     * wasn't empty (so it's already in a usedpools[] list, or
2238
     * was full and is in no list -- it's not in the freeblocks
2239
     * list in any case).
2240
     */
2241
    assert(pool->ref.count > 0);            /* else it was empty */
2242
    block *lastfree = pool->freeblock;
2243
    *(block **)p = lastfree;
2244
    pool->freeblock = (block *)p;
2245
    pool->ref.count--;
2246
2247
    if (UNLIKELY(lastfree == NULL)) {
2248
        /* Pool was full, so doesn't currently live in any list:
2249
         * link it to the front of the appropriate usedpools[] list.
2250
         * This mimics LRU pool usage for new allocations and
2251
         * targets optimal filling when several pools contain
2252
         * blocks of the same size class.
2253
         */
2254
        insert_to_usedpool(pool);
2255
        return 1;
2256
    }
2257
2258
    /* freeblock wasn't NULL, so the pool wasn't full,
2259
     * and the pool is in a usedpools[] list.
2260
     */
2261
    if (LIKELY(pool->ref.count != 0)) {
2262
        /* pool isn't empty:  leave it in usedpools */
2263
        return 1;
2264
    }
2265
2266
    /* Pool is now empty:  unlink from usedpools, and
2267
     * link to the front of freepools.  This ensures that
2268
     * previously freed pools will be allocated later
2269
     * (being not referenced, they are perhaps paged out).
2270
     */
2271
    insert_to_freepool(pool);
2272
    return 1;
2273
}
2274
2275
2276
static void
2277
_PyObject_Free(void *ctx, void *p)
2278
{
2279
    /* PyObject_Free(NULL) has no effect */
2280
    if (p == NULL) {
  Branch (2280:9): [True: 1.41M, False: 496M]
2281
        return;
2282
    }
2283
2284
    if (UNLIKELY(!pymalloc_free(ctx, p))) {
2285
        /* pymalloc didn't allocate this address */
2286
        PyMem_RawFree(p);
2287
        raw_allocated_blocks--;
2288
    }
2289
}
2290
2291
2292
/* pymalloc realloc.
2293
2294
   If nbytes==0, then as the Python docs promise, we do not treat this like
2295
   free(p), and return a non-NULL result.
2296
2297
   Return 1 if pymalloc reallocated memory and wrote the new pointer into
2298
   newptr_p.
2299
2300
   Return 0 if pymalloc didn't allocated p. */
2301
static int
2302
pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
2303
{
2304
    void *bp;
2305
    poolp pool;
2306
    size_t size;
2307
2308
    assert(p != NULL);
2309
2310
#ifdef WITH_VALGRIND
2311
    /* Treat running_on_valgrind == -1 the same as 0 */
2312
    if (UNLIKELY(running_on_valgrind > 0)) {
2313
        return 0;
2314
    }
2315
#endif
2316
2317
    pool = POOL_ADDR(p);
2318
    if (!address_in_range(p, pool)) {
  Branch (2318:9): [True: 1.36M, False: 18.5M]
2319
        /* pymalloc is not managing this block.
2320
2321
           If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2322
           over this block.  However, if we do, we need to copy the valid data
2323
           from the C-managed block to one of our blocks, and there's no
2324
           portable way to know how much of the memory space starting at p is
2325
           valid.
2326
2327
           As bug 1185883 pointed out the hard way, it's possible that the
2328
           C-managed block is "at the end" of allocated VM space, so that a
2329
           memory fault can occur if we try to copy nbytes bytes starting at p.
2330
           Instead we punt: let C continue to manage this block. */
2331
        return 0;
2332
    }
2333
2334
    /* pymalloc is in charge of this block */
2335
    size = INDEX2SIZE(pool->szidx);
2336
    if (nbytes <= size) {
  Branch (2336:9): [True: 14.6M, False: 3.96M]
2337
        /* The block is staying the same or shrinking.
2338
2339
           If it's shrinking, there's a tradeoff: it costs cycles to copy the
2340
           block to a smaller size class, but it wastes memory not to copy it.
2341
2342
           The compromise here is to copy on shrink only if at least 25% of
2343
           size can be shaved off. */
2344
        if (4 * nbytes > 3 * size) {
  Branch (2344:13): [True: 1.77M, False: 12.8M]
2345
            /* It's the same, or shrinking and new/old > 3/4. */
2346
            *newptr_p = p;
2347
            return 1;
2348
        }
2349
        size = nbytes;
2350
    }
2351
2352
    bp = _PyObject_Malloc(ctx, nbytes);
2353
    if (bp != NULL) {
  Branch (2353:9): [True: 16.7M, False: 0]
2354
        memcpy(bp, p, size);
2355
        _PyObject_Free(ctx, p);
2356
    }
2357
    *newptr_p = bp;
2358
    return 1;
2359
}
2360
2361
2362
static void *
2363
_PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2364
{
2365
    void *ptr2;
2366
2367
    if (ptr == NULL) {
  Branch (2367:9): [True: 8.54M, False: 19.9M]
2368
        return _PyObject_Malloc(ctx, nbytes);
2369
    }
2370
2371
    if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) {
  Branch (2371:9): [True: 18.5M, False: 1.36M]
2372
        return ptr2;
2373
    }
2374
2375
    return PyMem_RawRealloc(ptr, nbytes);
2376
}
2377
2378
#else   /* ! WITH_PYMALLOC */
2379
2380
/*==========================================================================*/
2381
/* pymalloc not enabled:  Redirect the entry points to malloc.  These will
2382
 * only be used by extensions that are compiled with pymalloc enabled. */
2383
2384
Py_ssize_t
2385
_Py_GetAllocatedBlocks(void)
2386
{
2387
    return 0;
2388
}
2389
2390
#endif /* WITH_PYMALLOC */
2391
2392
2393
/*==========================================================================*/
2394
/* A x-platform debugging allocator.  This doesn't manage memory directly,
2395
 * it wraps a real allocator, adding extra debugging info to the memory blocks.
2396
 */
2397
2398
/* Uncomment this define to add the "serialno" field */
2399
/* #define PYMEM_DEBUG_SERIALNO */
2400
2401
#ifdef PYMEM_DEBUG_SERIALNO
2402
static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
2403
2404
/* serialno is always incremented via calling this routine.  The point is
2405
 * to supply a single place to set a breakpoint.
2406
 */
2407
static void
2408
bumpserialno(void)
2409
{
2410
    ++serialno;
2411
}
2412
#endif
2413
2414
#define SST SIZEOF_SIZE_T
2415
2416
#ifdef PYMEM_DEBUG_SERIALNO
2417
#  define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2418
#else
2419
#  define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2420
#endif
2421
2422
/* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2423
static size_t
2424
read_size_t(const void *p)
2425
{
2426
    const uint8_t *q = (const uint8_t *)p;
2427
    size_t result = *q++;
2428
    int i;
2429
2430
    for (i = 
SST333k
; --i > 0;
++q2.33M
)
  Branch (2430:19): [True: 2.33M, False: 333k]
2431
        result = (result << 8) | *q;
2432
    return result;
2433
}
2434
2435
/* Write n as a big-endian size_t, MSB at address p, LSB at
2436
 * p + sizeof(size_t) - 1.
2437
 */
2438
static void
2439
write_size_t(void *p, size_t n)
2440
{
2441
    uint8_t *q = (uint8_t *)p + SST - 1;
2442
    int i;
2443
2444
    for (i = 
SST168k
; --i >= 0;
--q1.34M
) {
  Branch (2444:19): [True: 1.34M, False: 168k]
2445
        *q = (uint8_t)(n & 0xff);
2446
        n >>= 8;
2447
    }
2448
}
2449
2450
/* Let S = sizeof(size_t).  The debug malloc asks for 4 * S extra bytes and
2451
   fills them with useful stuff, here calling the underlying malloc's result p:
2452
2453
p[0: S]
2454
    Number of bytes originally asked for.  This is a size_t, big-endian (easier
2455
    to read in a memory dump).
2456
p[S]
2457
    API ID.  See PEP 445.  This is a character, but seems undocumented.
2458
p[S+1: 2*S]
2459
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch under- writes and reads.
2460
p[2*S: 2*S+n]
2461
    The requested memory, filled with copies of PYMEM_CLEANBYTE.
2462
    Used to catch reference to uninitialized memory.
2463
    &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
2464
    handled the request itself.
2465
p[2*S+n: 2*S+n+S]
2466
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch over- writes and reads.
2467
p[2*S+n+S: 2*S+n+2*S]
2468
    A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2469
    and _PyMem_DebugRealloc.
2470
    This is a big-endian size_t.
2471
    If "bad memory" is detected later, the serial number gives an
2472
    excellent way to set a breakpoint on the next run, to capture the
2473
    instant at which this block was passed out.
2474
2475
If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2476
for 3 * S extra bytes, and omits the last serialno field.
2477
*/
2478
2479
static void *
2480
_PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2481
{
2482
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2483
    uint8_t *p;           /* base address of malloc'ed pad block */
2484
    uint8_t *data;        /* p + 2*SST == pointer to data bytes */
2485
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2486
    size_t total;         /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
2487
2488
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
  Branch (2488:9): [True: 0, False: 162k]
2489
        /* integer overflow: can't represent total as a Py_ssize_t */
2490
        return NULL;
2491
    }
2492
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2493
2494
    /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2495
                ^--- p    ^--- data   ^--- tail
2496
       S: nbytes stored as size_t
2497
       I: API identifier (1 byte)
2498
       F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2499
       C: Clean bytes used later to store actual data
2500
       N: Serial number stored as size_t
2501
2502
       If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
2503
       is omitted. */
2504
2505
    if (use_calloc) {
  Branch (2505:9): [True: 1.19k, False: 161k]
2506
        p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2507
    }
2508
    else {
2509
        p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2510
    }
2511
    if (p == NULL) {
  Branch (2511:9): [True: 0, False: 162k]
2512
        return NULL;
2513
    }
2514
    data = p + 2*SST;
2515
2516
#ifdef PYMEM_DEBUG_SERIALNO
2517
    bumpserialno();
2518
#endif
2519
2520
    /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2521
    write_size_t(p, nbytes);
2522
    p[SST] = (uint8_t)api->api_id;
2523
    memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2524
2525
    if (nbytes > 0 && 
!use_calloc162k
) {
  Branch (2525:9): [True: 162k, False: 56]
  Branch (2525:23): [True: 161k, False: 1.19k]
2526
        memset(data, PYMEM_CLEANBYTE, nbytes);
2527
    }
2528
2529
    /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2530
    tail = data + nbytes;
2531
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
2532
#ifdef PYMEM_DEBUG_SERIALNO
2533
    write_size_t(tail + SST, serialno);
2534
#endif
2535
2536
    return data;
2537
}
2538
2539
static void *
2540
_PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2541
{
2542
    return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2543
}
2544
2545
static void *
2546
_PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2547
{
2548
    size_t nbytes;
2549
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2550
    nbytes = nelem * elsize;
2551
    return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2552
}
2553
2554
2555
/* The debug free first checks the 2*SST bytes on each end for sanity (in
2556
   particular, that the FORBIDDENBYTEs with the api ID are still intact).
2557
   Then fills the original bytes with PYMEM_DEADBYTE.
2558
   Then calls the underlying free.
2559
*/
2560
static void
2561
_PyMem_DebugRawFree(void *ctx, void *p)
2562
{
2563
    /* PyMem_Free(NULL) has no effect */
2564
    if (p == NULL) {
  Branch (2564:9): [True: 5.45k, False: 160k]
2565
        return;
2566
    }
2567
2568
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2569
    uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
2570
    size_t nbytes;
2571
2572
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2573
    nbytes = read_size_t(q);
2574
    nbytes += PYMEM_DEBUG_EXTRA_BYTES;
2575
    memset(q, PYMEM_DEADBYTE, nbytes);
2576
    api->alloc.free(api->alloc.ctx, q);
2577
}
2578
2579
2580
static void *
2581
_PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
2582
{
2583
    if (p == NULL) {
  Branch (2583:9): [True: 1.48k, False: 6.15k]
2584
        return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2585
    }
2586
2587
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2588
    uint8_t *head;        /* base address of malloc'ed pad block */
2589
    uint8_t *data;        /* pointer to data bytes */
2590
    uint8_t *r;
2591
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2592
    size_t total;         /* 2 * SST + nbytes + 2 * SST */
2593
    size_t original_nbytes;
2594
#define ERASED_SIZE 64
2595
    uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
2596
2597
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2598
2599
    data = (uint8_t *)p;
2600
    head = data - 2*SST;
2601
    original_nbytes = read_size_t(head);
2602
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
  Branch (2602:9): [True: 0, False: 6.15k]
2603
        /* integer overflow: can't represent total as a Py_ssize_t */
2604
        return NULL;
2605
    }
2606
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2607
2608
    tail = data + original_nbytes;
2609
#ifdef PYMEM_DEBUG_SERIALNO
2610
    size_t block_serialno = read_size_t(tail + SST);
2611
#endif
2612
    /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
2613
       ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
2614
     */
2615
    if (original_nbytes <= sizeof(save)) {
  Branch (2615:9): [True: 1.25k, False: 4.90k]
2616
        memcpy(save, data, original_nbytes);
2617
        memset(data - 2 * SST, PYMEM_DEADBYTE,
2618
               original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
2619
    }
2620
    else {
2621
        memcpy(save, data, ERASED_SIZE);
2622
        memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
2623
        memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
2624
        memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
2625
               ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
2626
    }
2627
2628
    /* Resize and add decorations. */
2629
    r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
2630
    if (r == NULL) {
  Branch (2630:9): [True: 0, False: 6.15k]
2631
        /* if realloc() failed: rewrite header and footer which have
2632
           just been erased */
2633
        nbytes = original_nbytes;
2634
    }
2635
    else {
2636
        head = r;
2637
#ifdef PYMEM_DEBUG_SERIALNO
2638
        bumpserialno();
2639
        block_serialno = serialno;
2640
#endif
2641
    }
2642
    data = head + 2*SST;
2643
2644
    write_size_t(head, nbytes);
2645
    head[SST] = (uint8_t)api->api_id;
2646
    memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2647
2648
    tail = data + nbytes;
2649
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
2650
#ifdef PYMEM_DEBUG_SERIALNO
2651
    write_size_t(tail + SST, block_serialno);
2652
#endif
2653
2654
    /* Restore saved bytes. */
2655
    if (original_nbytes <= sizeof(save)) {
  Branch (2655:9): [True: 1.25k, False: 4.90k]
2656
        memcpy(data, save, Py_MIN(nbytes, original_nbytes));
2657
    }
2658
    else {
2659
        size_t i = original_nbytes - ERASED_SIZE;
2660
        memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
2661
        if (nbytes > i) {
  Branch (2661:13): [True: 2.31k, False: 2.59k]
2662
            memcpy(data + i, &save[ERASED_SIZE],
2663
                   Py_MIN(nbytes - i, ERASED_SIZE));
2664
        }
2665
    }
2666
2667
    if (r == NULL) {
  Branch (2667:9): [True: 0, False: 6.15k]
2668
        return NULL;
2669
    }
2670
2671
    if (nbytes > original_nbytes) {
  Branch (2671:9): [True: 3.22k, False: 2.93k]
2672
        /* growing: mark new extra memory clean */
2673
        memset(data + original_nbytes, PYMEM_CLEANBYTE,
2674
               nbytes - original_nbytes);
2675
    }
2676
2677
    return data;
2678
}
2679
2680
static inline void
2681
_PyMem_DebugCheckGIL(const char *func)
2682
{
2683
    if (!PyGILState_Check()) {
  Branch (2683:9): [True: 0, False: 320k]
2684
        _Py_FatalErrorFunc(func,
2685
                           "Python memory allocator called "
2686
                           "without holding the GIL");
2687
    }
2688
}
2689
2690
static void *
2691
_PyMem_DebugMalloc(void *ctx, size_t nbytes)
2692
{
2693
    _PyMem_DebugCheckGIL(__func__);
2694
    return _PyMem_DebugRawMalloc(ctx, nbytes);
2695
}
2696
2697
static void *
2698
_PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
2699
{
2700
    _PyMem_DebugCheckGIL(__func__);
2701
    return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
2702
}
2703
2704
2705
static void
2706
_PyMem_DebugFree(void *ctx, void *ptr)
2707
{
2708
    _PyMem_DebugCheckGIL(__func__);
2709
    _PyMem_DebugRawFree(ctx, ptr);
2710
}
2711
2712
2713
static void *
2714
_PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
2715
{
2716
    _PyMem_DebugCheckGIL(__func__);
2717
    return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
2718
}
2719
2720
/* Check the forbidden bytes on both ends of the memory allocated for p.
2721
 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
2722
 * and call Py_FatalError to kill the program.
2723
 * The API id, is also checked.
2724
 */
2725
static void
2726
_PyMem_DebugCheckAddress(const char *func, char api, const void *p)
2727
{
2728
    assert(p != NULL);
2729
2730
    const uint8_t *q = (const uint8_t *)p;
2731
    size_t nbytes;
2732
    const uint8_t *tail;
2733
    int i;
2734
    char id;
2735
2736
    /* Check the API id */
2737
    id = (char)q[-SST];
2738
    if (id != api) {
  Branch (2738:9): [True: 0, False: 166k]
2739
        _PyObject_DebugDumpAddress(p);
2740
        _Py_FatalErrorFormat(func,
2741
                             "bad ID: Allocated using API '%c', "
2742
                             "verified using API '%c'",
2743
                             id, api);
2744
    }
2745
2746
    /* Check the stuff at the start of p first:  if there's underwrite
2747
     * corruption, the number-of-bytes field may be nuts, and checking
2748
     * the tail could lead to a segfault then.
2749
     */
2750
    
for (i = 166k
SST166k
-1; i >= 1;
--i1.16M
) {
  Branch (2750:21): [True: 1.16M, False: 166k]
2751
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
  Branch (2751:13): [True: 0, False: 1.16M]
2752
            _PyObject_DebugDumpAddress(p);
2753
            _Py_FatalErrorFunc(func, "bad leading pad byte");
2754
        }
2755
    }
2756
2757
    nbytes = read_size_t(q - 2*SST);
2758
    tail = q + nbytes;
2759
    for (i = 0; i < SST; 
++i1.33M
) {
  Branch (2759:17): [True: 1.33M, False: 166k]
2760
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
  Branch (2760:13): [True: 0, False: 1.33M]
2761
            _PyObject_DebugDumpAddress(p);
2762
            _Py_FatalErrorFunc(func, "bad trailing pad byte");
2763
        }
2764
    }
2765
}
2766
2767
/* Display info to stderr about the memory block at p. */
2768
static void
2769
_PyObject_DebugDumpAddress(const void *p)
2770
{
2771
    const uint8_t *q = (const uint8_t *)p;
2772
    const uint8_t *tail;
2773
    size_t nbytes;
2774
    int i;
2775
    int ok;
2776
    char id;
2777
2778
    fprintf(stderr, "Debug memory block at address p=%p:", p);
2779
    if (p == NULL) {
  Branch (2779:9): [True: 0, False: 0]
2780
        fprintf(stderr, "\n");
2781
        return;
2782
    }
2783
    id = (char)q[-SST];
2784
    fprintf(stderr, " API '%c'\n", id);
2785
2786
    nbytes = read_size_t(q - 2*SST);
2787
    fprintf(stderr, "    %zu bytes originally requested\n", nbytes);
2788
2789
    /* In case this is nuts, check the leading pad bytes first. */
2790
    fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
2791
    ok = 1;
2792
    for (i = 1; i <= SST-1; ++i) {
  Branch (2792:17): [True: 0, False: 0]
2793
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
  Branch (2793:13): [True: 0, False: 0]
2794
            ok = 0;
2795
            break;
2796
        }
2797
    }
2798
    if (ok)
  Branch (2798:9): [True: 0, False: 0]
2799
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
2800
    else {
2801
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2802
            PYMEM_FORBIDDENBYTE);
2803
        for (i = SST-1; i >= 1; --i) {
  Branch (2803:25): [True: 0, False: 0]
2804
            const uint8_t byte = *(q-i);
2805
            fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
2806
            if (byte != PYMEM_FORBIDDENBYTE)
  Branch (2806:17): [True: 0, False: 0]
2807
                fputs(" *** OUCH", stderr);
2808
            fputc('\n', stderr);
2809
        }
2810
2811
        fputs("    Because memory is corrupted at the start, the "
2812
              "count of bytes requested\n"
2813
              "       may be bogus, and checking the trailing pad "
2814
              "bytes may segfault.\n", stderr);
2815
    }
2816
2817
    tail = q + nbytes;
2818
    fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, (void *)tail);
2819
    ok = 1;
2820
    for (i = 0; i < SST; ++i) {
  Branch (2820:17): [True: 0, False: 0]
2821
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
  Branch (2821:13): [True: 0, False: 0]
2822
            ok = 0;
2823
            break;
2824
        }
2825
    }
2826
    if (ok)
  Branch (2826:9): [True: 0, False: 0]
2827
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
2828
    else {
2829
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2830
                PYMEM_FORBIDDENBYTE);
2831
        for (i = 0; i < SST; ++i) {
  Branch (2831:21): [True: 0, False: 0]
2832
            const uint8_t byte = tail[i];
2833
            fprintf(stderr, "        at tail+%d: 0x%02x",
2834
                    i, byte);
2835
            if (byte != PYMEM_FORBIDDENBYTE)
  Branch (2835:17): [True: 0, False: 0]
2836
                fputs(" *** OUCH", stderr);
2837
            fputc('\n', stderr);
2838
        }
2839
    }
2840
2841
#ifdef PYMEM_DEBUG_SERIALNO
2842
    size_t serial = read_size_t(tail + SST);
2843
    fprintf(stderr,
2844
            "    The block was made by call #%zu to debug malloc/realloc.\n",
2845
            serial);
2846
#endif
2847
2848
    if (nbytes > 0) {
  Branch (2848:9): [True: 0, False: 0]
2849
        i = 0;
2850
        fputs("    Data at p:", stderr);
2851
        /* print up to 8 bytes at the start */
2852
        while (q < tail && i < 8) {
  Branch (2852:16): [True: 0, False: 0]
  Branch (2852:28): [True: 0, False: 0]
2853
            fprintf(stderr, " %02x", *q);
2854
            ++i;
2855
            ++q;
2856
        }
2857
        /* and up to 8 at the end */
2858
        if (q < tail) {
  Branch (2858:13): [True: 0, False: 0]
2859
            if (tail - q > 8) {
  Branch (2859:17): [True: 0, False: 0]
2860
                fputs(" ...", stderr);
2861
                q = tail - 8;
2862
            }
2863
            while (q < tail) {
  Branch (2863:20): [True: 0, False: 0]
2864
                fprintf(stderr, " %02x", *q);
2865
                ++q;
2866
            }
2867
        }
2868
        fputc('\n', stderr);
2869
    }
2870
    fputc('\n', stderr);
2871
2872
    fflush(stderr);
2873
    _PyMem_DumpTraceback(fileno(stderr), p);
2874
}
2875
2876
2877
static size_t
2878
printone(FILE *out, const char* msg, size_t value)
2879
{
2880
    int i, k;
2881
    char buf[100];
2882
    size_t origvalue = value;
2883
2884
    fputs(msg, out);
2885
    for (i = (int)strlen(msg); i < 35; ++i)
  Branch (2885:32): [True: 0, False: 0]
2886
        fputc(' ', out);
2887
    fputc('=', out);
2888
2889
    /* Write the value with commas. */
2890
    i = 22;
2891
    buf[i--] = '\0';
2892
    buf[i--] = '\n';
2893
    k = 3;
2894
    do {
2895
        size_t nextvalue = value / 10;
2896
        unsigned int digit = (unsigned int)(value - nextvalue * 10);
2897
        value = nextvalue;
2898
        buf[i--] = (char)(digit + '0');
2899
        --k;
2900
        if (k == 0 && value && i >= 0) {
  Branch (2900:13): [True: 0, False: 0]
  Branch (2900:23): [True: 0, False: 0]
  Branch (2900:32): [True: 0, False: 0]
2901
            k = 3;
2902
            buf[i--] = ',';
2903
        }
2904
    } while (value && i >= 0);
  Branch (2904:14): [True: 0, False: 0]
  Branch (2904:23): [True: 0, False: 0]
2905
2906
    while (i >= 0)
  Branch (2906:12): [True: 0, False: 0]
2907
        buf[i--] = ' ';
2908
    fputs(buf, out);
2909
2910
    return origvalue;
2911
}
2912
2913
void
2914
_PyDebugAllocatorStats(FILE *out,
2915
                       const char *block_name, int num_blocks, size_t sizeof_block)
2916
{
2917
    char buf1[128];
2918
    char buf2[128];
2919
    PyOS_snprintf(buf1, sizeof(buf1),
2920
                  "%d %ss * %zd bytes each",
2921
                  num_blocks, block_name, sizeof_block);
2922
    PyOS_snprintf(buf2, sizeof(buf2),
2923
                  "%48s ", buf1);
2924
    (void)printone(out, buf2, num_blocks * sizeof_block);
2925
}
2926
2927
2928
#ifdef WITH_PYMALLOC
2929
2930
#ifdef Py_DEBUG
2931
/* Is target in the list?  The list is traversed via the nextpool pointers.
2932
 * The list may be NULL-terminated, or circular.  Return 1 if target is in
2933
 * list, else 0.
2934
 */
2935
static int
2936
pool_is_in_list(const poolp target, poolp list)
2937
{
2938
    poolp origlist = list;
2939
    assert(target != NULL);
2940
    if (list == NULL)
2941
        return 0;
2942
    do {
2943
        if (target == list)
2944
            return 1;
2945
        list = list->nextpool;
2946
    } while (list != NULL && list != origlist);
2947
    return 0;
2948
}
2949
#endif
2950
2951
/* Print summary info to "out" about the state of pymalloc's structures.
2952
 * In Py_DEBUG mode, also perform some expensive internal consistency
2953
 * checks.
2954
 *
2955
 * Return 0 if the memory debug hooks are not installed or no statistics was
2956
 * written into out, return 1 otherwise.
2957
 */
2958
int
2959
_PyObject_DebugMallocStats(FILE *out)
2960
{
2961
    if (!_PyMem_PymallocEnabled()) {
  Branch (2961:9): [True: 3, False: 0]
2962
        return 0;
2963
    }
2964
2965
    uint i;
2966
    const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
2967
    /* # of pools, allocated blocks, and free blocks per class index */
2968
    size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2969
    size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2970
    size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2971
    /* total # of allocated bytes in used and full pools */
2972
    size_t allocated_bytes = 0;
2973
    /* total # of available bytes in used pools */
2974
    size_t available_bytes = 0;
2975
    /* # of free pools + pools not yet carved out of current arena */
2976
    uint numfreepools = 0;
2977
    /* # of bytes for arena alignment padding */
2978
    size_t arena_alignment = 0;
2979
    /* # of bytes in used and full pools used for pool_headers */
2980
    size_t pool_header_bytes = 0;
2981
    /* # of bytes in used and full pools wasted due to quantization,
2982
     * i.e. the necessarily leftover space at the ends of used and
2983
     * full pools.
2984
     */
2985
    size_t quantization = 0;
2986
    /* # of arenas actually allocated. */
2987
    size_t narenas = 0;
2988
    /* running total -- should equal narenas * ARENA_SIZE */
2989
    size_t total;
2990
    char buf[128];
2991
2992
    fprintf(out, "Small block threshold = %d, in %u size classes.\n",
2993
            SMALL_REQUEST_THRESHOLD, numclasses);
2994
2995
    for (i = 0; i < numclasses; ++i)
  Branch (2995:17): [True: 0, False: 0]
2996
        numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
2997
2998
    /* Because full pools aren't linked to from anything, it's easiest
2999
     * to march over all the arenas.  If we're lucky, most of the memory
3000
     * will be living in full pools -- would be a shame to miss them.
3001
     */
3002
    for (i = 0; i < maxarenas; ++i) {
  Branch (3002:17): [True: 0, False: 0]
3003
        uint j;
3004
        uintptr_t base = arenas[i].address;
3005
3006
        /* Skip arenas which are not allocated. */
3007
        if (arenas[i].address == (uintptr_t)NULL)
  Branch (3007:13): [True: 0, False: 0]
3008
            continue;
3009
        narenas += 1;
3010
3011
        numfreepools += arenas[i].nfreepools;
3012
3013
        /* round up to pool alignment */
3014
        if (base & (uintptr_t)POOL_SIZE_MASK) {
  Branch (3014:13): [True: 0, False: 0]
3015
            arena_alignment += POOL_SIZE;
3016
            base &= ~(uintptr_t)POOL_SIZE_MASK;
3017
            base += POOL_SIZE;
3018
        }
3019
3020
        /* visit every pool in the arena */
3021
        assert(base <= (uintptr_t) arenas[i].pool_address);
3022
        for (j = 0; base < (uintptr_t) arenas[i].pool_address;
  Branch (3022:21): [True: 0, False: 0]
3023
             ++j, base += POOL_SIZE) {
3024
            poolp p = (poolp)base;
3025
            const uint sz = p->szidx;
3026
            uint freeblocks;
3027
3028
            if (p->ref.count == 0) {
  Branch (3028:17): [True: 0, False: 0]
3029
                /* currently unused */
3030
#ifdef Py_DEBUG
3031
                assert(pool_is_in_list(p, arenas[i].freepools));
3032
#endif
3033
                continue;
3034
            }
3035
            ++numpools[sz];
3036
            numblocks[sz] += p->ref.count;
3037
            freeblocks = NUMBLOCKS(sz) - p->ref.count;
3038
            numfreeblocks[sz] += freeblocks;
3039
#ifdef Py_DEBUG
3040
            if (freeblocks > 0)
3041
                assert(pool_is_in_list(p, usedpools[sz + sz]));
3042
#endif
3043
        }
3044
    }
3045
    assert(narenas == narenas_currently_allocated);
3046
3047
    fputc('\n', out);
3048
    fputs("class   size   num pools   blocks in use  avail blocks\n"
3049
          "-----   ----   ---------   -------------  ------------\n",
3050
          out);
3051
3052
    for (i = 0; i < numclasses; ++i) {
  Branch (3052:17): [True: 0, False: 0]
3053
        size_t p = numpools[i];
3054
        size_t b = numblocks[i];
3055
        size_t f = numfreeblocks[i];
3056
        uint size = INDEX2SIZE(i);
3057
        if (p == 0) {
  Branch (3057:13): [True: 0, False: 0]
3058
            assert(b == 0 && f == 0);
3059
            continue;
3060
        }
3061
        fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3062
                i, size, p, b, f);
3063
        allocated_bytes += b * size;
3064
        available_bytes += f * size;
3065
        pool_header_bytes += p * POOL_OVERHEAD;
3066
        quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3067
    }
3068
    fputc('\n', out);
3069
#ifdef PYMEM_DEBUG_SERIALNO
3070
    if (_PyMem_DebugEnabled()) {
3071
        (void)printone(out, "# times object malloc called", serialno);
3072
    }
3073
#endif
3074
    (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3075
    (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3076
    (void)printone(out, "# arenas highwater mark", narenas_highwater);
3077
    (void)printone(out, "# arenas allocated current", narenas);
3078
3079
    PyOS_snprintf(buf, sizeof(buf),
3080
                  "%zu arenas * %d bytes/arena",
3081
                  narenas, ARENA_SIZE);
3082
    (void)printone(out, buf, narenas * ARENA_SIZE);
3083
3084
    fputc('\n', out);
3085
3086
    /* Account for what all of those arena bytes are being used for. */
3087
    total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3088
    total += printone(out, "# bytes in available blocks", available_bytes);
3089
3090
    PyOS_snprintf(buf, sizeof(buf),
3091
        "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3092
    total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3093
3094
    total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3095
    total += printone(out, "# bytes lost to quantization", quantization);
3096
    total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3097
    (void)printone(out, "Total", total);
3098
    assert(narenas * ARENA_SIZE == total);
3099
3100
#if WITH_PYMALLOC_RADIX_TREE
3101
    fputs("\narena map counts\n", out);
3102
#ifdef USE_INTERIOR_NODES
3103
    (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3104
    (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3105
    fputc('\n', out);
3106
#endif
3107
    total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3108
#ifdef USE_INTERIOR_NODES
3109
    total += printone(out, "# bytes lost to arena map mid",
3110
                      sizeof(arena_map_mid_t) * arena_map_mid_count);
3111
    total += printone(out, "# bytes lost to arena map bot",
3112
                      sizeof(arena_map_bot_t) * arena_map_bot_count);
3113
    (void)printone(out, "Total", total);
3114
#endif
3115
#endif
3116
3117
    return 1;
3118
}
3119
3120
#endif /* #ifdef WITH_PYMALLOC */