Skip to content

Commit b868f21

Browse files
committed
Add radix tree implementation for obmalloc address_in_range().
The radix tree approach is a relatively simple and memory sanitary alternative to the current (slightly) unsanitary address_in_range(). The radix tree is currently only implemented for 64-bit platforms. Adding a 32-bit version would be relatively easy.
1 parent 9048c49 commit b868f21

File tree

1 file changed

+260
-6
lines changed

1 file changed

+260
-6
lines changed

Objects/obmalloc.c

Lines changed: 260 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -832,6 +832,14 @@ static int running_on_valgrind = -1;
832832
* -- Main tunable settings section --
833833
*/
834834

835+
/* If defined, use radix tree to find if address is controlled by
836+
* obmalloc. Otherwise, we use a slightly memory unsanitary scheme that
837+
* has the advantage of performing very well.
838+
*/
839+
#if SIZEOF_VOID_P == 8
840+
#define WITH_RADIX_TREE
841+
#endif
842+
835843
/*
836844
* Alignment of addresses returned to the user. 8-bytes alignment works
837845
* on most current architectures (with 32-bit or 64-bit address busses).
@@ -905,18 +913,32 @@ static int running_on_valgrind = -1;
905913
* Arenas are allocated with mmap() on systems supporting anonymous memory
906914
* mappings to reduce heap fragmentation.
907915
*/
908-
#define ARENA_SIZE (256 << 10) /* 256KB */
916+
#define ARENA_BITS 18
917+
#define ARENA_SIZE (1 << ARENA_BITS) /* 256 KiB */
909918

910919
#ifdef WITH_MEMORY_LIMITS
911920
#define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE)
912921
#endif
913922

914923
/*
915-
* Size of the pools used for small blocks. Should be a power of 2,
916-
* between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
924+
* Size of the pools used for small blocks. Must be a power of 2.
917925
*/
918-
#define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */
919-
#define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK
926+
#ifdef WITH_RADIX_TREE
927+
928+
#define POOL_BITS 12 /* 4 KiB */
929+
#define POOL_SIZE (1 << POOL_BITS) /* 4 KiB */
930+
931+
#else
932+
933+
/*
934+
* For non-radix tree, must be between 1K and SYSTEM_PAGE_SIZE. E.g. 1k, 2k,
935+
* 4k.
936+
*/
937+
#define POOL_SIZE SYSTEM_PAGE_SIZE
938+
939+
#endif /* !WITH_RADIX_TREE */
940+
941+
#define POOL_SIZE_MASK (POOL_SIZE - 1)
920942

921943
#define MAX_POOLS_IN_ARENA (ARENA_SIZE / POOL_SIZE)
922944
#if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE
@@ -1214,6 +1236,14 @@ _Py_GetAllocatedBlocks(void)
12141236
return _Py_AllocatedBlocks;
12151237
}
12161238

1239+
#ifdef WITH_RADIX_TREE
1240+
static int arena_map_is_marked(block *op);
1241+
static int arena_map_mark_used(uintptr_t arena_base, int is_used);
1242+
1243+
/* number of used radix tree nodes */
1244+
static int arena_map1_count;
1245+
static int arena_map2_count;
1246+
#endif
12171247

12181248
/* Allocate a new arena. If we run out of memory, return NULL. Else
12191249
* allocate a new arena, and return the address of an arena_object
@@ -1283,6 +1313,15 @@ new_arena(void)
12831313
unused_arena_objects = arenaobj->nextarena;
12841314
assert(arenaobj->address == 0);
12851315
address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1316+
#ifdef WITH_RADIX_TREE
1317+
if (address != NULL) {
1318+
if (!arena_map_mark_used((uintptr_t)address, 1)) {
1319+
/* marking arena in radix tree failed, abort */
1320+
_PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
1321+
address = NULL;
1322+
}
1323+
}
1324+
#endif
12861325
if (address == NULL) {
12871326
/* The allocation failed: return NULL after putting the
12881327
* arenaobj back.
@@ -1313,6 +1352,19 @@ new_arena(void)
13131352
}
13141353

13151354

1355+
#ifdef WITH_RADIX_TREE
1356+
1357+
/* Return true if and only if P is an address that was allocated by
1358+
pymalloc. When the radix tree is used, 'poolp' is unused.
1359+
*/
1360+
static bool
1361+
address_in_range(void *p, poolp pool)
1362+
{
1363+
return arena_map_is_marked(p);
1364+
}
1365+
1366+
#else /* !WITH_RADIX_TREE */
1367+
13161368
/*
13171369
address_in_range(P, POOL)
13181370
@@ -1403,7 +1455,7 @@ address_in_range(void *p, poolp pool)
14031455
(uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
14041456
arenas[arenaindex].address != 0;
14051457
}
1406-
1458+
#endif /* !WITH_RADIX_TREE */
14071459

14081460
/*==========================================================================*/
14091461

@@ -1805,6 +1857,11 @@ pymalloc_free(void *ctx, void *p)
18051857
ao->nextarena = unused_arena_objects;
18061858
unused_arena_objects = ao;
18071859

1860+
#ifdef WITH_RADIX_TREE
1861+
/* mark arena as not under control of obmalloc */
1862+
arena_map_mark_used(ao->address, 0);
1863+
#endif
1864+
18081865
/* Free the entire arena. */
18091866
_PyObject_Arena.free(_PyObject_Arena.ctx,
18101867
(void *)ao->address, ARENA_SIZE);
@@ -2717,6 +2774,12 @@ _PyObject_DebugMallocStats(FILE *out)
27172774
(void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
27182775
(void)printone(out, "# arenas highwater mark", narenas_highwater);
27192776
(void)printone(out, "# arenas allocated current", narenas);
2777+
#ifdef WITH_RADIX_TREE
2778+
(void)printone(out, "# arena map level 1 nodes", arena_map1_count);
2779+
(void)printone(out, "# arena map level 2 nodes", arena_map2_count);
2780+
fputc('\n', out);
2781+
#endif
2782+
27202783

27212784
PyOS_snprintf(buf, sizeof(buf),
27222785
"%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena",
@@ -2739,4 +2802,195 @@ _PyObject_DebugMallocStats(FILE *out)
27392802
return 1;
27402803
}
27412804

2805+
2806+
#ifdef WITH_RADIX_TREE
2807+
/* radix tree for tracking arena coverage
2808+
2809+
key format (2^20 arena size)
2810+
15 -> MAP1
2811+
15 -> MAP2
2812+
14 -> MAP3
2813+
20 -> ideal aligned arena
2814+
----
2815+
64
2816+
*/
2817+
2818+
/* number of bits in a pointer */
2819+
#define BITS 64
2820+
2821+
#if SIZEOF_VOID_P != 8
2822+
/* Currently this code works for 64-bit pointers only. For 32-bits, we
2823+
* could use a two-layer tree but it hasn't been implemented yet. */
2824+
#error "Radix tree requires 64-bit pointers."
2825+
#endif
2826+
2827+
#define ARENA_MASK (ARENA_SIZE - 1)
2828+
2829+
/* arena_coverage_t members require this to be true */
2830+
#if ARENA_BITS >= 32
2831+
# error "arena size must be < 2^32"
2832+
#endif
2833+
2834+
/* bits used for MAP1 and MAP2 nodes */
2835+
#define INTERIOR_BITS ((BITS - ARENA_BITS + 2) / 3)
2836+
2837+
#define MAP1_BITS INTERIOR_BITS
2838+
#define MAP1_LENGTH (1 << MAP1_BITS)
2839+
2840+
#define MAP2_BITS INTERIOR_BITS
2841+
#define MAP2_LENGTH (1 << MAP2_BITS)
2842+
#define MAP2_MASK (MAP2_LENGTH - 1)
2843+
2844+
#define MAP3_BITS (BITS - ARENA_BITS - 2*INTERIOR_BITS)
2845+
#define MAP3_LENGTH (1 << MAP3_BITS)
2846+
#define MAP3_MASK (MAP3_LENGTH - 1)
2847+
2848+
#define MAP3_SHIFT ARENA_BITS
2849+
#define MAP2_SHIFT (MAP3_BITS + MAP3_SHIFT)
2850+
#define MAP1_SHIFT (MAP2_BITS + MAP2_SHIFT)
2851+
2852+
#define AS_UINT(p) ((uintptr_t)(p))
2853+
#define MAP3_INDEX(p) ((AS_UINT(p) >> MAP3_SHIFT) & MAP3_MASK)
2854+
#define MAP2_INDEX(p) ((AS_UINT(p) >> MAP2_SHIFT) & MAP2_MASK)
2855+
#define MAP1_INDEX(p) (AS_UINT(p) >> MAP1_SHIFT)
2856+
2857+
/* See arena_map_mark_used() for the meaning of these members. */
2858+
typedef struct {
2859+
int32_t tail_hi;
2860+
int32_t tail_lo;
2861+
} arena_coverage_t;
2862+
2863+
typedef struct arena_map3 {
2864+
/* The members tail_hi and tail_lo are accessed together. So, it
2865+
* better to have them as an array of structs, rather than two
2866+
* arrays.
2867+
*/
2868+
arena_coverage_t arenas[MAP3_LENGTH];
2869+
} arena_map3_t;
2870+
2871+
typedef struct arena_map2 {
2872+
struct arena_map3 *ptrs[MAP2_LENGTH];
2873+
} arena_map2_t;
2874+
2875+
typedef struct arena_map1 {
2876+
struct arena_map2 *ptrs[MAP1_LENGTH];
2877+
} arena_map1_t;
2878+
2879+
/* The root of tree (MAP1) and contains all MAP2 nodes. Note that by
2880+
* initializing like this, the memory should be in the BSS. The OS will
2881+
* only map pages as the MAP2 nodes get used (OS pages are demand loaded
2882+
* as needed).
2883+
*/
2884+
static arena_map1_t arena_map_root;
2885+
2886+
/* Return a pointer to a MAP3 node, return NULL if it doesn't exist
2887+
* or it cannot be created */
2888+
static arena_map3_t *
2889+
arena_map_get(block *p, int create)
2890+
{
2891+
int i1 = MAP1_INDEX(p);
2892+
if (arena_map_root.ptrs[i1] == NULL) {
2893+
if (!create) {
2894+
return NULL;
2895+
}
2896+
arena_map2_t *n = PyMem_RawCalloc(1, sizeof(arena_map2_t));
2897+
if (n == NULL) {
2898+
return NULL;
2899+
}
2900+
arena_map_root.ptrs[i1] = n;
2901+
arena_map1_count++;
2902+
}
2903+
int i2 = MAP2_INDEX(p);
2904+
if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
2905+
if (!create) {
2906+
return NULL;
2907+
}
2908+
arena_map3_t *n = PyMem_RawCalloc(1, sizeof(arena_map3_t));
2909+
if (n == NULL) {
2910+
return NULL;
2911+
}
2912+
arena_map_root.ptrs[i1]->ptrs[i2] = n;
2913+
arena_map2_count++;
2914+
}
2915+
return arena_map_root.ptrs[i1]->ptrs[i2];
2916+
}
2917+
2918+
/* The radix tree only tracks arenas. So, for 16 MiB arenas, we throw
2919+
* away 24 bits of the address. That reduces the space requirement of
2920+
* the tree compared to similar radix tree page-map schemes. In
2921+
* exchange for slashing the space requirement, it needs more
2922+
* computation to check an address.
2923+
*
2924+
* Tracking coverage is done by "ideal" arena address. It is easier to
2925+
* explain in decimal so let's say that the arena size is 100 bytes.
2926+
* Then, ideal addresses are 100, 200, 300, etc. For checking if a
2927+
* pointer address is inside an actual arena, we have to check two ideal
2928+
* arena addresses. E.g. if pointer is 357, we need to check 200 and
2929+
* 300. In the rare case that an arena is aligned in the ideal way
2930+
* (e.g. base address of arena is 200) then we only have to check one
2931+
* ideal address.
2932+
*
2933+
* The tree nodes for 200 and 300 both store the address of arena.
2934+
* There are two cases: the arena starts at a lower ideal arena and
2935+
* extends to this one, or the arena starts in this arena and extends to
2936+
* the next ideal arena. The tail_lo and tail_hi members correspond to
2937+
* these two cases.
2938+
*/
2939+
2940+
2941+
/* mark or unmark addresses covered by arena */
2942+
static int
2943+
arena_map_mark_used(uintptr_t arena_base, int is_used)
2944+
{
2945+
arena_map3_t *n_hi = arena_map_get((block *)arena_base, is_used);
2946+
if (n_hi == NULL) {
2947+
assert(is_used); /* otherwise node should already exist */
2948+
return 0; /* failed to allocate space for node */
2949+
}
2950+
int i3 = MAP3_INDEX((block *)arena_base);
2951+
int32_t tail = (int32_t)(arena_base & ARENA_MASK);
2952+
if (tail == 0) {
2953+
/* is ideal arena address */
2954+
n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
2955+
}
2956+
else {
2957+
/* arena_base address is not ideal (aligned to arena size) and
2958+
* so it potentially covers two MAP3 nodes. Get the MAP3 node
2959+
* for the next arena. Note that it might be in different MAP1
2960+
* and MAP2 nodes as well so we need to call arena_map_get()
2961+
* again (do the full tree traversal).
2962+
*/
2963+
n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
2964+
uintptr_t arena_base_next = arena_base + ARENA_SIZE;
2965+
arena_map3_t *n_lo = arena_map_get((block *)arena_base_next, is_used);
2966+
if (n_lo == NULL) {
2967+
assert(is_used); /* otherwise should already exist */
2968+
n_hi->arenas[i3].tail_hi = 0;
2969+
return 0; /* failed to allocate space for node */
2970+
}
2971+
int i3_next = MAP3_INDEX(arena_base_next);
2972+
n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
2973+
}
2974+
return 1;
2975+
}
2976+
2977+
/* Return true if 'p' is a pointer inside an obmalloc arena.
2978+
* _PyObject_Free() calls this so it needs to be very fast. */
2979+
static int
2980+
arena_map_is_marked(block *p)
2981+
{
2982+
arena_map3_t *n = arena_map_get(p, 0);
2983+
if (n == NULL) {
2984+
return 0;
2985+
}
2986+
int i3 = MAP3_INDEX(p);
2987+
/* in order to fit tail into 32-bits, ARENA_BITS must be <= 32 */
2988+
int32_t hi = n->arenas[i3].tail_hi;
2989+
int32_t lo = n->arenas[i3].tail_lo;
2990+
int32_t tail = (int32_t)(AS_UINT(p) & ARENA_MASK);
2991+
return (tail < lo) || (tail >= hi && hi != 0);
2992+
}
2993+
2994+
#endif /* WITH_RADIX_TREE */
2995+
27422996
#endif /* #ifdef WITH_PYMALLOC */

0 commit comments

Comments
 (0)