@@ -894,6 +894,18 @@ static int running_on_valgrind = -1;
894
894
#endif
895
895
#endif
896
896
897
+ /* use radix-tree to track arena memory regions, for address_in_range() */
898
+ #define USE_RADIX_TREE
899
+
900
+ #if SIZEOF_VOID_P > 4
901
+ /* on 64-bit platforms use larger pools and arenas if we can */
902
+ #define USE_LARGE_ARENAS
903
+ #ifdef USE_RADIX_TREE
904
+ /* large pools only supported if radix-tree is enabled */
905
+ #define USE_LARGE_POOLS
906
+ #endif
907
+ #endif
908
+
897
909
/*
898
910
* The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
899
911
* on a page boundary. This is a reserved virtual address space for the
@@ -907,7 +919,7 @@ static int running_on_valgrind = -1;
907
919
* Arenas are allocated with mmap() on systems supporting anonymous memory
908
920
* mappings to reduce heap fragmentation.
909
921
*/
910
- #if SIZEOF_VOID_P > 4
922
+ #ifdef USE_LARGE_ARENAS
911
923
#define ARENA_BITS 20 /* 1 MiB */
912
924
#else
913
925
#define ARENA_BITS 18 /* 256 KiB */
@@ -922,14 +934,20 @@ static int running_on_valgrind = -1;
922
934
/*
923
935
* Size of the pools used for small blocks. Must be a power of 2.
924
936
*/
925
- #if SIZEOF_VOID_P > 4
937
+ #ifdef USE_LARGE_POOLS
926
938
#define POOL_BITS 14 /* 16 KiB */
927
939
#else
928
940
#define POOL_BITS 12 /* 4 KiB */
929
941
#endif
930
942
#define POOL_SIZE (1 << POOL_BITS)
931
943
#define POOL_SIZE_MASK (POOL_SIZE - 1)
932
944
945
+ #ifndef USE_RADIX_TREE
946
+ #if POOL_SIZE != SYSTEM_PAGE_SIZE
947
+ # error "pool size must be system page size"
948
+ #endif
949
+ #endif
950
+
933
951
#define MAX_POOLS_IN_ARENA (ARENA_SIZE / POOL_SIZE)
934
952
#if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE
935
953
# error "arena size not an exact multiple of pool size"
@@ -1243,12 +1261,13 @@ _Py_GetAllocatedBlocks(void)
1243
1261
return n ;
1244
1262
}
1245
1263
1264
+ #ifdef USE_RADIX_TREE
1246
1265
/*==========================================================================*/
1247
1266
/* radix tree for tracking arena usage
1248
1267
1249
- bit allocation for keys (2^20 arena size)
1268
+ bit allocation for keys
1250
1269
1251
- 64-bit pointers:
1270
+ 64-bit pointers and 2^20 arena size :
1252
1271
16 -> ignored (BITS - PHYSICAL_BITS)
1253
1272
10 -> MAP_TOP
1254
1273
10 -> MAP_MID
@@ -1257,9 +1276,9 @@ _Py_GetAllocatedBlocks(void)
1257
1276
----
1258
1277
64
1259
1278
1260
- 32-bit pointers:
1261
- 12 -> MAP_BOT
1262
- 20 -> ideal aligned arena
1279
+ 32-bit pointers and 2^18 arena size :
1280
+ 14 -> MAP_BOT
1281
+ 18 -> ideal aligned arena
1263
1282
----
1264
1283
32
1265
1284
@@ -1495,6 +1514,7 @@ arena_map_is_used(block *p)
1495
1514
1496
1515
/* end of radix tree logic */
1497
1516
/*==========================================================================*/
1517
+ #endif /* USE_RADIX_TREE */
1498
1518
1499
1519
1500
1520
/* Allocate a new arena. If we run out of memory, return NULL. Else
@@ -1565,13 +1585,15 @@ new_arena(void)
1565
1585
unused_arena_objects = arenaobj -> nextarena ;
1566
1586
assert (arenaobj -> address == 0 );
1567
1587
address = _PyObject_Arena .alloc (_PyObject_Arena .ctx , ARENA_SIZE );
1588
+ #ifdef USE_RADIX_TREE
1568
1589
if (address != NULL ) {
1569
1590
if (!arena_map_mark_used ((uintptr_t )address , 1 )) {
1570
1591
/* marking arena in radix tree failed, abort */
1571
1592
_PyObject_Arena .free (_PyObject_Arena .ctx , address , ARENA_SIZE );
1572
1593
address = NULL ;
1573
1594
}
1574
1595
}
1596
+ #endif
1575
1597
if (address == NULL ) {
1576
1598
/* The allocation failed: return NULL after putting the
1577
1599
* arenaobj back.
@@ -1603,6 +1625,7 @@ new_arena(void)
1603
1625
1604
1626
1605
1627
1628
+ #ifdef USE_RADIX_TREE
1606
1629
/* Return true if and only if P is an address that was allocated by
1607
1630
pymalloc. When the radix tree is used, 'poolp' is unused.
1608
1631
*/
@@ -1611,7 +1634,99 @@ address_in_range(void *p, poolp pool)
1611
1634
{
1612
1635
return arena_map_is_used (p );
1613
1636
}
1637
+ #else
1638
+ /*
1639
+ address_in_range(P, POOL)
1640
+
1641
+ Return true if and only if P is an address that was allocated by pymalloc.
1642
+ POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1643
+ (the caller is asked to compute this because the macro expands POOL more than
1644
+ once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1645
+ variable and pass the latter to the macro; because address_in_range is
1646
+ called on every alloc/realloc/free, micro-efficiency is important here).
1647
+
1648
+ Tricky: Let B be the arena base address associated with the pool, B =
1649
+ arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if
1650
+
1651
+ B <= P < B + ARENA_SIZE
1652
+
1653
+ Subtracting B throughout, this is true iff
1654
+
1655
+ 0 <= P-B < ARENA_SIZE
1656
+
1657
+ By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1658
+
1659
+ Obscure: A PyMem "free memory" function can call the pymalloc free or realloc
1660
+ before the first arena has been allocated. `arenas` is still NULL in that
1661
+ case. We're relying on that maxarenas is also 0 in that case, so that
1662
+ (POOL)->arenaindex < maxarenas must be false, saving us from trying to index
1663
+ into a NULL arenas.
1664
+
1665
+ Details: given P and POOL, the arena_object corresponding to P is AO =
1666
+ arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild
1667
+ stores, etc), POOL is the correct address of P's pool, AO.address is the
1668
+ correct base address of the pool's arena, and P must be within ARENA_SIZE of
1669
+ AO.address. In addition, AO.address is not 0 (no arena can start at address 0
1670
+ (NULL)). Therefore address_in_range correctly reports that obmalloc
1671
+ controls P.
1672
+
1673
+ Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1674
+ call to the system malloc() or realloc()). (POOL)->arenaindex may be anything
1675
+ in this case -- it may even be uninitialized trash. If the trash arenaindex
1676
+ is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1677
+ control P.
1678
+
1679
+ Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an
1680
+ allocated arena, obmalloc controls all the memory in slice AO.address :
1681
+ AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc,
1682
+ so P doesn't lie in that slice, so the macro correctly reports that P is not
1683
+ controlled by obmalloc.
1684
+
1685
+ Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1686
+ arena_object (one not currently associated with an allocated arena),
1687
+ AO.address is 0, and the second test in the macro reduces to:
1688
+
1689
+ P < ARENA_SIZE
1690
+
1691
+ If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1692
+ that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part
1693
+ of the test still passes, and the third clause (AO.address != 0) is necessary
1694
+ to get the correct result: AO.address is 0 in this case, so the macro
1695
+ correctly reports that P is not controlled by obmalloc (despite that P lies in
1696
+ slice AO.address : AO.address + ARENA_SIZE).
1697
+
1698
+ Note: The third (AO.address != 0) clause was added in Python 2.5. Before
1699
+ 2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1700
+ corresponded to a currently-allocated arena, so the "P is not controlled by
1701
+ obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1702
+ was impossible.
1703
+
1704
+ Note that the logic is excruciating, and reading up possibly uninitialized
1705
+ memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1706
+ creates problems for some memory debuggers. The overwhelming advantage is
1707
+ that this test determines whether an arbitrary address is controlled by
1708
+ obmalloc in a small constant time, independent of the number of arenas
1709
+ obmalloc controls. Since this test is needed at every entry point, it's
1710
+ extremely desirable that it be this fast.
1711
+ */
1712
+
1713
+ static bool _Py_NO_SANITIZE_ADDRESS
1714
+ _Py_NO_SANITIZE_THREAD
1715
+ _Py_NO_SANITIZE_MEMORY
1716
+ address_in_range (void * p , poolp pool )
1717
+ {
1718
+ // Since address_in_range may be reading from memory which was not allocated
1719
+ // by Python, it is important that pool->arenaindex is read only once, as
1720
+ // another thread may be concurrently modifying the value without holding
1721
+ // the GIL. The following dance forces the compiler to read pool->arenaindex
1722
+ // only once.
1723
+ uint arenaindex = * ((volatile uint * )& pool -> arenaindex );
1724
+ return arenaindex < maxarenas &&
1725
+ (uintptr_t )p - arenas [arenaindex ].address < ARENA_SIZE &&
1726
+ arenas [arenaindex ].address != 0 ;
1727
+ }
1614
1728
1729
+ #endif /* !USE_RADIX_TREE */
1615
1730
1616
1731
/*==========================================================================*/
1617
1732
@@ -1957,8 +2072,10 @@ insert_to_freepool(poolp pool)
1957
2072
ao -> nextarena = unused_arena_objects ;
1958
2073
unused_arena_objects = ao ;
1959
2074
2075
+ #ifdef USE_RADIX_TREE
1960
2076
/* mark arena region as not under control of obmalloc */
1961
2077
arena_map_mark_used (ao -> address , 0 );
2078
+ #endif
1962
2079
1963
2080
/* Free the entire arena. */
1964
2081
_PyObject_Arena .free (_PyObject_Arena .ctx ,
0 commit comments