Skip to content

Commit 509a5c7

Browse files
committed
Add USE_RADIX_TREE flag.
If disabled, the old version of address_in_range() is used.
1 parent 94ad760 commit 509a5c7

File tree

1 file changed

+124
-7
lines changed

1 file changed

+124
-7
lines changed

Objects/obmalloc.c

Lines changed: 124 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -894,6 +894,18 @@ static int running_on_valgrind = -1;
894894
#endif
895895
#endif
896896

897+
/* use radix-tree to track arena memory regions, for address_in_range() */
898+
#define USE_RADIX_TREE
899+
900+
#if SIZEOF_VOID_P > 4
901+
/* on 64-bit platforms use larger pools and arenas if we can */
902+
#define USE_LARGE_ARENAS
903+
#ifdef USE_RADIX_TREE
904+
/* large pools only supported if radix-tree is enabled */
905+
#define USE_LARGE_POOLS
906+
#endif
907+
#endif
908+
897909
/*
898910
* The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
899911
* on a page boundary. This is a reserved virtual address space for the
@@ -907,7 +919,7 @@ static int running_on_valgrind = -1;
907919
* Arenas are allocated with mmap() on systems supporting anonymous memory
908920
* mappings to reduce heap fragmentation.
909921
*/
910-
#if SIZEOF_VOID_P > 4
922+
#ifdef USE_LARGE_ARENAS
911923
#define ARENA_BITS 20 /* 1 MiB */
912924
#else
913925
#define ARENA_BITS 18 /* 256 KiB */
@@ -922,14 +934,20 @@ static int running_on_valgrind = -1;
922934
/*
923935
* Size of the pools used for small blocks. Must be a power of 2.
924936
*/
925-
#if SIZEOF_VOID_P > 4
937+
#ifdef USE_LARGE_POOLS
926938
#define POOL_BITS 14 /* 16 KiB */
927939
#else
928940
#define POOL_BITS 12 /* 4 KiB */
929941
#endif
930942
#define POOL_SIZE (1 << POOL_BITS)
931943
#define POOL_SIZE_MASK (POOL_SIZE - 1)
932944

945+
#ifndef USE_RADIX_TREE
946+
#if POOL_SIZE != SYSTEM_PAGE_SIZE
947+
# error "pool size must be system page size"
948+
#endif
949+
#endif
950+
933951
#define MAX_POOLS_IN_ARENA (ARENA_SIZE / POOL_SIZE)
934952
#if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE
935953
# error "arena size not an exact multiple of pool size"
@@ -1243,12 +1261,13 @@ _Py_GetAllocatedBlocks(void)
12431261
return n;
12441262
}
12451263

1264+
#ifdef USE_RADIX_TREE
12461265
/*==========================================================================*/
12471266
/* radix tree for tracking arena usage
12481267
1249-
bit allocation for keys (2^20 arena size)
1268+
bit allocation for keys
12501269
1251-
64-bit pointers:
1270+
64-bit pointers and 2^20 arena size:
12521271
16 -> ignored (BITS - PHYSICAL_BITS)
12531272
10 -> MAP_TOP
12541273
10 -> MAP_MID
@@ -1257,9 +1276,9 @@ _Py_GetAllocatedBlocks(void)
12571276
----
12581277
64
12591278
1260-
32-bit pointers:
1261-
12 -> MAP_BOT
1262-
20 -> ideal aligned arena
1279+
32-bit pointers and 2^18 arena size:
1280+
14 -> MAP_BOT
1281+
18 -> ideal aligned arena
12631282
----
12641283
32
12651284
@@ -1495,6 +1514,7 @@ arena_map_is_used(block *p)
14951514

14961515
/* end of radix tree logic */
14971516
/*==========================================================================*/
1517+
#endif /* USE_RADIX_TREE */
14981518

14991519

15001520
/* Allocate a new arena. If we run out of memory, return NULL. Else
@@ -1565,13 +1585,15 @@ new_arena(void)
15651585
unused_arena_objects = arenaobj->nextarena;
15661586
assert(arenaobj->address == 0);
15671587
address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1588+
#ifdef USE_RADIX_TREE
15681589
if (address != NULL) {
15691590
if (!arena_map_mark_used((uintptr_t)address, 1)) {
15701591
/* marking arena in radix tree failed, abort */
15711592
_PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
15721593
address = NULL;
15731594
}
15741595
}
1596+
#endif
15751597
if (address == NULL) {
15761598
/* The allocation failed: return NULL after putting the
15771599
* arenaobj back.
@@ -1603,6 +1625,7 @@ new_arena(void)
16031625

16041626

16051627

1628+
#ifdef USE_RADIX_TREE
16061629
/* Return true if and only if P is an address that was allocated by
16071630
pymalloc. When the radix tree is used, 'poolp' is unused.
16081631
*/
@@ -1611,7 +1634,99 @@ address_in_range(void *p, poolp pool)
16111634
{
16121635
return arena_map_is_used(p);
16131636
}
1637+
#else
1638+
/*
1639+
address_in_range(P, POOL)
1640+
1641+
Return true if and only if P is an address that was allocated by pymalloc.
1642+
POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1643+
(the caller is asked to compute this because the macro expands POOL more than
1644+
once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1645+
variable and pass the latter to the macro; because address_in_range is
1646+
called on every alloc/realloc/free, micro-efficiency is important here).
1647+
1648+
Tricky: Let B be the arena base address associated with the pool, B =
1649+
arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if
1650+
1651+
B <= P < B + ARENA_SIZE
1652+
1653+
Subtracting B throughout, this is true iff
1654+
1655+
0 <= P-B < ARENA_SIZE
1656+
1657+
By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1658+
1659+
Obscure: A PyMem "free memory" function can call the pymalloc free or realloc
1660+
before the first arena has been allocated. `arenas` is still NULL in that
1661+
case. We're relying on that maxarenas is also 0 in that case, so that
1662+
(POOL)->arenaindex < maxarenas must be false, saving us from trying to index
1663+
into a NULL arenas.
1664+
1665+
Details: given P and POOL, the arena_object corresponding to P is AO =
1666+
arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild
1667+
stores, etc), POOL is the correct address of P's pool, AO.address is the
1668+
correct base address of the pool's arena, and P must be within ARENA_SIZE of
1669+
AO.address. In addition, AO.address is not 0 (no arena can start at address 0
1670+
(NULL)). Therefore address_in_range correctly reports that obmalloc
1671+
controls P.
1672+
1673+
Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1674+
call to the system malloc() or realloc()). (POOL)->arenaindex may be anything
1675+
in this case -- it may even be uninitialized trash. If the trash arenaindex
1676+
is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1677+
control P.
1678+
1679+
Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an
1680+
allocated arena, obmalloc controls all the memory in slice AO.address :
1681+
AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc,
1682+
so P doesn't lie in that slice, so the macro correctly reports that P is not
1683+
controlled by obmalloc.
1684+
1685+
Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1686+
arena_object (one not currently associated with an allocated arena),
1687+
AO.address is 0, and the second test in the macro reduces to:
1688+
1689+
P < ARENA_SIZE
1690+
1691+
If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1692+
that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part
1693+
of the test still passes, and the third clause (AO.address != 0) is necessary
1694+
to get the correct result: AO.address is 0 in this case, so the macro
1695+
correctly reports that P is not controlled by obmalloc (despite that P lies in
1696+
slice AO.address : AO.address + ARENA_SIZE).
1697+
1698+
Note: The third (AO.address != 0) clause was added in Python 2.5. Before
1699+
2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1700+
corresponded to a currently-allocated arena, so the "P is not controlled by
1701+
obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1702+
was impossible.
1703+
1704+
Note that the logic is excruciating, and reading up possibly uninitialized
1705+
memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1706+
creates problems for some memory debuggers. The overwhelming advantage is
1707+
that this test determines whether an arbitrary address is controlled by
1708+
obmalloc in a small constant time, independent of the number of arenas
1709+
obmalloc controls. Since this test is needed at every entry point, it's
1710+
extremely desirable that it be this fast.
1711+
*/
1712+
1713+
static bool _Py_NO_SANITIZE_ADDRESS
1714+
_Py_NO_SANITIZE_THREAD
1715+
_Py_NO_SANITIZE_MEMORY
1716+
address_in_range(void *p, poolp pool)
1717+
{
1718+
// Since address_in_range may be reading from memory which was not allocated
1719+
// by Python, it is important that pool->arenaindex is read only once, as
1720+
// another thread may be concurrently modifying the value without holding
1721+
// the GIL. The following dance forces the compiler to read pool->arenaindex
1722+
// only once.
1723+
uint arenaindex = *((volatile uint *)&pool->arenaindex);
1724+
return arenaindex < maxarenas &&
1725+
(uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
1726+
arenas[arenaindex].address != 0;
1727+
}
16141728

1729+
#endif /* !USE_RADIX_TREE */
16151730

16161731
/*==========================================================================*/
16171732

@@ -1957,8 +2072,10 @@ insert_to_freepool(poolp pool)
19572072
ao->nextarena = unused_arena_objects;
19582073
unused_arena_objects = ao;
19592074

2075+
#ifdef USE_RADIX_TREE
19602076
/* mark arena region as not under control of obmalloc */
19612077
arena_map_mark_used(ao->address, 0);
2078+
#endif
19622079

19632080
/* Free the entire arena. */
19642081
_PyObject_Arena.free(_PyObject_Arena.ctx,

0 commit comments

Comments
 (0)