Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit a95018e

Browse files
thestingerjasone
authored andcommitted
Attempt to expand huge allocations in-place.
This adds support for expanding huge allocations in-place by requesting memory at a specific address from the chunk allocator. It's currently only implemented for the chunk recycling path, although in theory it could also be done by optimistically allocating new chunks. On Linux, it could attempt an in-place mremap. However, that won't work in practice since the heap is grown downwards and memory is not unmapped (in a normal build, at least). Repeated vector reallocation micro-benchmark: #include <string.h> #include <stdlib.h> int main(void) { for (size_t i = 0; i < 100; i++) { void *ptr = NULL; size_t old_size = 0; for (size_t size = 4; size < (1 << 30); size *= 2) { ptr = realloc(ptr, size); if (!ptr) return 1; memset(ptr + old_size, 0xff, size - old_size); old_size = size; } free(ptr); } } The glibc allocator fails to do any in-place reallocations on this benchmark once it passes the M_MMAP_THRESHOLD (default 128k) but it elides the cost of copies via mremap, which is currently not something that jemalloc can use. With this improvement, jemalloc still fails to do any in-place huge reallocations for the first outer loop, but then succeeds 100% of the time for the remaining 99 iterations. The time spent doing allocations and copies drops down to under 5%, with nearly all of it spent doing purging + faulting (when huge pages are disabled) and the array memset. An improved mremap API (MREMAP_RETAIN - jemalloc#138) would be far more general but this is a portable optimization and would still be useful on Linux for xallocx. Numbers with transparent huge pages enabled: glibc (copies elided via MREMAP_MAYMOVE): 8.471s jemalloc: 17.816s jemalloc + no-op madvise: 13.236s jemalloc + this commit: 6.787s jemalloc + this commit + no-op madvise: 6.144s Numbers with transparent huge pages disabled: glibc (copies elided via MREMAP_MAYMOVE): 15.403s jemalloc: 39.456s jemalloc + no-op madvise: 12.768s jemalloc + this commit: 15.534s jemalloc + this commit + no-op madvise: 6.354s Closes jemalloc#137
1 parent f11a677 commit a95018e

File tree

10 files changed

+118
-41
lines changed

10 files changed

+118
-41
lines changed

doc/jemalloc.xml.in

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,6 +1351,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
13511351
function that knows how to deallocate the chunks.
13521352
<funcprototype>
13531353
<funcdef>typedef void *<function>(chunk_alloc_t)</function></funcdef>
1354+
<paramdef>void *<parameter>chunk</parameter></paramdef>
13541355
<paramdef>size_t <parameter>size</parameter></paramdef>
13551356
<paramdef>size_t <parameter>alignment</parameter></paramdef>
13561357
<paramdef>bool *<parameter>zero</parameter></paramdef>
@@ -1367,8 +1368,10 @@ malloc_conf = "xmalloc:true";]]></programlisting>
13671368
<parameter>size</parameter> parameter is always a multiple of the chunk
13681369
size. The <parameter>alignment</parameter> parameter is always a power
13691370
of two at least as large as the chunk size. Zeroing is mandatory if
1370-
<parameter>*zero</parameter> is true upon function
1371-
entry.</para>
1371+
<parameter>*zero</parameter> is true upon function entry. If
1372+
<parameter>chunk</parameter> is not <constant>NULL</constant>, the
1373+
returned pointer must be <parameter>chunk</parameter> or
1374+
<constant>NULL</constant> if it could not be allocated.</para>
13721375

13731376
<para>Note that replacing the default chunk allocation function makes
13741377
the arena's <link

include/jemalloc/internal/arena.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,8 @@ extern arena_bin_info_t arena_bin_info[NBINS];
343343
/* Number of large size classes. */
344344
#define nlclasses (chunk_npages - map_bias)
345345

346-
void *arena_chunk_alloc_huge(arena_t *arena, size_t size, size_t alignment,
347-
bool *zero);
346+
void *arena_chunk_alloc_huge(arena_t *arena, void *new_addr, size_t size,
347+
size_t alignment, bool *zero);
348348
void arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t size);
349349
void arena_purge_all(arena_t *arena);
350350
void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,

include/jemalloc/internal/chunk.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,10 @@ extern size_t arena_maxclass; /* Max size class for arenas. */
4646

4747
void *chunk_alloc_base(size_t size);
4848
void *chunk_alloc_arena(chunk_alloc_t *chunk_alloc,
49-
chunk_dalloc_t *chunk_dalloc, unsigned arena_ind, size_t size,
50-
size_t alignment, bool *zero);
51-
void *chunk_alloc_default(size_t size, size_t alignment, bool *zero,
52-
unsigned arena_ind);
49+
chunk_dalloc_t *chunk_dalloc, unsigned arena_ind, void *new_addr,
50+
size_t size, size_t alignment, bool *zero);
51+
void *chunk_alloc_default(void *new_addr, size_t size, size_t alignment,
52+
bool *zero, unsigned arena_ind);
5353
void chunk_unmap(void *chunk, size_t size);
5454
bool chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind);
5555
bool chunk_boot(void);

include/jemalloc/internal/huge.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero);
1313
void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
1414
bool zero);
1515
bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
16-
size_t extra);
16+
size_t extra, bool zero);
1717
void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
1818
size_t size, size_t extra, size_t alignment, bool zero,
1919
bool try_tcache_dalloc);

include/jemalloc/internal/jemalloc_internal.h.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -902,7 +902,7 @@ ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero)
902902
if (size <= arena_maxclass)
903903
return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero));
904904
else
905-
return (huge_ralloc_no_move(ptr, oldsize, size, extra));
905+
return (huge_ralloc_no_move(ptr, oldsize, size, extra, zero));
906906
}
907907
#endif
908908

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
typedef void *(chunk_alloc_t)(size_t, size_t, bool *, unsigned);
1+
typedef void *(chunk_alloc_t)(void *, size_t, size_t, bool *, unsigned);
22
typedef bool (chunk_dalloc_t)(void *, size_t, unsigned);

src/arena.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,7 @@ arena_chunk_alloc_internal(arena_t *arena, size_t size, size_t alignment,
450450
chunk_dalloc = arena->chunk_dalloc;
451451
malloc_mutex_unlock(&arena->lock);
452452
chunk = (arena_chunk_t *)chunk_alloc_arena(chunk_alloc, chunk_dalloc,
453-
arena->ind, size, alignment, zero);
453+
arena->ind, NULL, size, alignment, zero);
454454
malloc_mutex_lock(&arena->lock);
455455
if (config_stats && chunk != NULL)
456456
arena->stats.mapped += chunksize;
@@ -459,8 +459,8 @@ arena_chunk_alloc_internal(arena_t *arena, size_t size, size_t alignment,
459459
}
460460

461461
void *
462-
arena_chunk_alloc_huge(arena_t *arena, size_t size, size_t alignment,
463-
bool *zero)
462+
arena_chunk_alloc_huge(arena_t *arena, void *new_addr, size_t size,
463+
size_t alignment, bool *zero)
464464
{
465465
void *ret;
466466
chunk_alloc_t *chunk_alloc;
@@ -480,7 +480,7 @@ arena_chunk_alloc_huge(arena_t *arena, size_t size, size_t alignment,
480480
malloc_mutex_unlock(&arena->lock);
481481

482482
ret = chunk_alloc_arena(chunk_alloc, chunk_dalloc, arena->ind,
483-
size, alignment, zero);
483+
new_addr, size, alignment, zero);
484484
if (config_stats) {
485485
if (ret != NULL)
486486
stats_cactive_add(size);

src/chunk.c

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ static void chunk_dalloc_core(void *chunk, size_t size);
4242
/******************************************************************************/
4343

4444
static void *
45-
chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size,
46-
size_t alignment, bool base, bool *zero)
45+
chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad,
46+
void *new_addr, size_t size, size_t alignment, bool base, bool *zero)
4747
{
4848
void *ret;
4949
extent_node_t *node;
@@ -65,11 +65,11 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size,
6565
/* Beware size_t wrap-around. */
6666
if (alloc_size < size)
6767
return (NULL);
68-
key.addr = NULL;
68+
key.addr = new_addr;
6969
key.size = alloc_size;
7070
malloc_mutex_lock(&chunks_mtx);
7171
node = extent_tree_szad_nsearch(chunks_szad, &key);
72-
if (node == NULL) {
72+
if (node == NULL || (new_addr && node->addr != new_addr)) {
7373
malloc_mutex_unlock(&chunks_mtx);
7474
return (NULL);
7575
}
@@ -142,8 +142,8 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size,
142142
* them if they are returned.
143143
*/
144144
static void *
145-
chunk_alloc_core(size_t size, size_t alignment, bool base, bool *zero,
146-
dss_prec_t dss_prec)
145+
chunk_alloc_core(void *new_addr, size_t size, size_t alignment, bool base,
146+
bool *zero, dss_prec_t dss_prec)
147147
{
148148
void *ret;
149149

@@ -154,24 +154,30 @@ chunk_alloc_core(size_t size, size_t alignment, bool base, bool *zero,
154154

155155
/* "primary" dss. */
156156
if (have_dss && dss_prec == dss_prec_primary) {
157-
if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size,
158-
alignment, base, zero)) != NULL)
157+
if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss,
158+
new_addr, size, alignment, base, zero)) != NULL)
159159
return (ret);
160-
if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL)
160+
/* requesting an address only implemented for recycle */
161+
if (new_addr == NULL
162+
&& (ret = chunk_alloc_dss(size, alignment, zero)) != NULL)
161163
return (ret);
162164
}
163165
/* mmap. */
164-
if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, size,
165-
alignment, base, zero)) != NULL)
166+
if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, new_addr,
167+
size, alignment, base, zero)) != NULL)
166168
return (ret);
167-
if ((ret = chunk_alloc_mmap(size, alignment, zero)) != NULL)
169+
/* requesting an address only implemented for recycle */
170+
if (new_addr == NULL &&
171+
(ret = chunk_alloc_mmap(size, alignment, zero)) != NULL)
168172
return (ret);
169173
/* "secondary" dss. */
170174
if (have_dss && dss_prec == dss_prec_secondary) {
171-
if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size,
172-
alignment, base, zero)) != NULL)
175+
if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss,
176+
new_addr, size, alignment, base, zero)) != NULL)
173177
return (ret);
174-
if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL)
178+
/* requesting an address only implemented for recycle */
179+
if (new_addr == NULL &&
180+
(ret = chunk_alloc_dss(size, alignment, zero)) != NULL)
175181
return (ret);
176182
}
177183

@@ -219,7 +225,7 @@ chunk_alloc_base(size_t size)
219225
bool zero;
220226

221227
zero = false;
222-
ret = chunk_alloc_core(size, chunksize, true, &zero,
228+
ret = chunk_alloc_core(NULL, size, chunksize, true, &zero,
223229
chunk_dss_prec_get());
224230
if (ret == NULL)
225231
return (NULL);
@@ -232,11 +238,12 @@ chunk_alloc_base(size_t size)
232238

233239
void *
234240
chunk_alloc_arena(chunk_alloc_t *chunk_alloc, chunk_dalloc_t *chunk_dalloc,
235-
unsigned arena_ind, size_t size, size_t alignment, bool *zero)
241+
unsigned arena_ind, void *new_addr, size_t size, size_t alignment,
242+
bool *zero)
236243
{
237244
void *ret;
238245

239-
ret = chunk_alloc(size, alignment, zero, arena_ind);
246+
ret = chunk_alloc(new_addr, size, alignment, zero, arena_ind);
240247
if (ret != NULL && chunk_register(ret, size, false)) {
241248
chunk_dalloc(ret, size, arena_ind);
242249
ret = NULL;
@@ -247,11 +254,11 @@ chunk_alloc_arena(chunk_alloc_t *chunk_alloc, chunk_dalloc_t *chunk_dalloc,
247254

248255
/* Default arena chunk allocation routine in the absence of user override. */
249256
void *
250-
chunk_alloc_default(size_t size, size_t alignment, bool *zero,
257+
chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
251258
unsigned arena_ind)
252259
{
253260

254-
return (chunk_alloc_core(size, alignment, false, zero,
261+
return (chunk_alloc_core(new_addr, size, alignment, false, zero,
255262
arenas[arena_ind]->dss_prec));
256263
}
257264

src/huge.c

Lines changed: 70 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
4747
*/
4848
is_zeroed = zero;
4949
arena = choose_arena(tsd, arena);
50-
ret = arena_chunk_alloc_huge(arena, csize, alignment, &is_zeroed);
50+
ret = arena_chunk_alloc_huge(arena, NULL, csize, alignment, &is_zeroed);
5151
if (ret == NULL) {
5252
base_node_dalloc(node);
5353
return (NULL);
@@ -95,8 +95,66 @@ huge_dalloc_junk(void *ptr, size_t usize)
9595
huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl);
9696
#endif
9797

98+
static bool
99+
huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) {
100+
size_t csize;
101+
void *expand_addr;
102+
size_t expand_size;
103+
extent_node_t *node, key;
104+
arena_t *arena;
105+
bool is_zeroed;
106+
void *ret;
107+
108+
csize = CHUNK_CEILING(size);
109+
if (csize == 0) {
110+
/* size is large enough to cause size_t wrap-around. */
111+
return (true);
112+
}
113+
114+
expand_addr = ptr + oldsize;
115+
expand_size = csize - oldsize;
116+
117+
malloc_mutex_lock(&huge_mtx);
118+
119+
key.addr = ptr;
120+
node = extent_tree_ad_search(&huge, &key);
121+
assert(node != NULL);
122+
assert(node->addr == ptr);
123+
124+
/* Find the current arena. */
125+
arena = node->arena;
126+
127+
malloc_mutex_unlock(&huge_mtx);
128+
129+
/*
130+
* Copy zero into is_zeroed and pass the copy to chunk_alloc(), so that
131+
* it is possible to make correct junk/zero fill decisions below.
132+
*/
133+
is_zeroed = zero;
134+
ret = arena_chunk_alloc_huge(arena, expand_addr, expand_size, chunksize,
135+
&is_zeroed);
136+
if (ret == NULL)
137+
return (true);
138+
139+
assert(ret == expand_addr);
140+
141+
malloc_mutex_lock(&huge_mtx);
142+
/* Update the size of the huge allocation. */
143+
node->size = csize;
144+
malloc_mutex_unlock(&huge_mtx);
145+
146+
if (config_fill && !zero) {
147+
if (unlikely(opt_junk))
148+
memset(expand_addr, 0xa5, expand_size);
149+
else if (unlikely(opt_zero) && !is_zeroed)
150+
memset(expand_addr, 0, expand_size);
151+
}
152+
return (false);
153+
}
154+
98155
bool
99-
huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra)
156+
huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
157+
bool zero)
100158
{
101159

102160
/* Both allocations must be huge to avoid a move. */
@@ -145,7 +203,15 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra)
145203
return (false);
146204
}
147205

148-
return (true);
206+
/* Attempt to expand the allocation in-place. */
207+
if (huge_ralloc_no_move_expand(ptr, oldsize, size + extra, zero)) {
208+
if (extra == 0)
209+
return (true);
210+
211+
/* Try again, this time without extra. */
212+
return (huge_ralloc_no_move_expand(ptr, oldsize, size, zero));
213+
}
214+
return (false);
149215
}
150216

151217
void *
@@ -156,7 +222,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
156222
size_t copysize;
157223

158224
/* Try to avoid moving the allocation. */
159-
if (!huge_ralloc_no_move(ptr, oldsize, size, extra))
225+
if (!huge_ralloc_no_move(ptr, oldsize, size, extra, zero))
160226
return (ptr);
161227

162228
/*

test/integration/chunk.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@ chunk_dalloc(void *chunk, size_t size, unsigned arena_ind)
1111
}
1212

1313
void *
14-
chunk_alloc(size_t size, size_t alignment, bool *zero, unsigned arena_ind)
14+
chunk_alloc(void *new_addr, size_t size, size_t alignment, bool *zero,
15+
unsigned arena_ind)
1516
{
1617

17-
return (old_alloc(size, alignment, zero, arena_ind));
18+
return (old_alloc(new_addr, size, alignment, zero, arena_ind));
1819
}
1920

2021
TEST_BEGIN(test_chunk)

0 commit comments

Comments
 (0)