Skip to content

Commit dc65903

Browse files
fmayerGerrit Code Review
authored andcommitted
Merge "[MTE] allocate ring buffer for stack history" into main
2 parents 2377d17 + f2910f8 commit dc65903

13 files changed

+147
-22
lines changed

libc/bionic/heap_tagging.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ void SetDefaultHeapTaggingLevel() {
5353
heap_tagging_level = __libc_shared_globals()->initial_heap_tagging_level;
5454
#endif
5555

56+
__libc_memtag_stack_abi = __libc_shared_globals()->initial_memtag_stack_abi;
57+
5658
__libc_globals.mutate([](libc_globals* globals) {
5759
switch (heap_tagging_level) {
5860
case M_HEAP_TAGGING_LEVEL_TBI:

libc/bionic/libc_init_common.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ extern "C" void scudo_malloc_set_pattern_fill_contents(int);
5858

5959
__LIBC_HIDDEN__ constinit WriteProtected<libc_globals> __libc_globals;
6060
__LIBC_HIDDEN__ constinit _Atomic(bool) __libc_memtag_stack;
61+
__LIBC_HIDDEN__ constinit bool __libc_memtag_stack_abi;
6162

6263
// Not public, but well-known in the BSDs.
6364
__BIONIC_WEAK_VARIABLE_FOR_NATIVE_BRIDGE

libc/bionic/libc_init_static.cpp

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -289,11 +289,7 @@ static HeapTaggingLevel __get_tagging_level(const memtag_dynamic_entries_t* memt
289289

290290
// We can't short-circuit the environment override, as `stack` is still inherited from the
291291
// binary's settings.
292-
if (get_environment_memtag_setting(&level)) {
293-
if (level == M_HEAP_TAGGING_LEVEL_NONE || level == M_HEAP_TAGGING_LEVEL_TBI) {
294-
*stack = false;
295-
}
296-
}
292+
get_environment_memtag_setting(&level);
297293
return level;
298294
}
299295

@@ -329,13 +325,14 @@ __attribute__((no_sanitize("hwaddress", "memtag"))) void __libc_init_mte(
329325
bool memtag_stack = false;
330326
HeapTaggingLevel level =
331327
__get_tagging_level(memtag_dynamic_entries, phdr_start, phdr_ct, load_bias, &memtag_stack);
332-
// This is used by the linker (in linker.cpp) to communicate than any library linked by this
333-
// executable enables memtag-stack.
334-
if (__libc_shared_globals()->initial_memtag_stack) {
335-
if (!memtag_stack) {
336-
async_safe_format_log(ANDROID_LOG_INFO, "libc", "enabling PROT_MTE as requested by linker");
337-
}
328+
// initial_memtag_stack is used by the linker (in linker.cpp) to communicate than any library
329+
// linked by this executable enables memtag-stack.
330+
// memtag_stack is also set for static executables if they request memtag stack via the note,
331+
// in which case it will differ from initial_memtag_stack.
332+
if (__libc_shared_globals()->initial_memtag_stack || memtag_stack) {
338333
memtag_stack = true;
334+
__libc_shared_globals()->initial_memtag_stack_abi = true;
335+
__get_bionic_tcb()->tls_slot(TLS_SLOT_STACK_MTE) = __allocate_stack_mte_ringbuffer(0, nullptr);
339336
}
340337
if (int64_t timed_upgrade = __get_memtag_upgrade_secs()) {
341338
if (level == M_HEAP_TAGGING_LEVEL_ASYNC) {

libc/bionic/pthread_create.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ void __init_tcb_stack_guard(bionic_tcb* tcb) {
6565
}
6666

6767
void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls) {
68+
tcb->thread()->bionic_tcb = tcb;
6869
tcb->thread()->bionic_tls = tls;
6970
tcb->tls_slot(TLS_SLOT_BIONIC_TLS) = tls;
7071
}
@@ -443,6 +444,14 @@ int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
443444

444445
ScopedReadLock locker(&g_thread_creation_lock);
445446

447+
// This has to be done under g_thread_creation_lock or g_thread_list_lock to avoid racing with
448+
// __pthread_internal_remap_stack_with_mte.
449+
#ifdef __aarch64__
450+
if (__libc_memtag_stack_abi) {
451+
tcb->tls_slot(TLS_SLOT_STACK_MTE) = __allocate_stack_mte_ringbuffer(0, thread);
452+
}
453+
#endif
454+
446455
sigset64_t block_all_mask;
447456
sigfillset64(&block_all_mask);
448457
__rt_sigprocmask(SIG_SETMASK, &block_all_mask, &thread->start_mask, sizeof(thread->start_mask));

libc/bionic/pthread_internal.cpp

Lines changed: 76 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,12 @@
3333
#include <stdlib.h>
3434
#include <string.h>
3535
#include <sys/mman.h>
36+
#include <sys/prctl.h>
3637

3738
#include <async_safe/log.h>
3839
#include <bionic/reserved_signals.h>
3940

41+
#include "bionic/tls_defines.h"
4042
#include "private/ErrnoRestorer.h"
4143
#include "private/ScopedRWLock.h"
4244
#include "private/bionic_futex.h"
@@ -71,8 +73,21 @@ void __pthread_internal_remove(pthread_internal_t* thread) {
7173
g_thread_list = thread->next;
7274
}
7375
}
76+
// N.B. that this is NOT the pagesize, but 4096. This is hardcoded in the codegen.
77+
// See
78+
// https://github.com/search?q=repo%3Allvm/llvm-project%20AArch64StackTagging%3A%3AinsertBaseTaggedPointer&type=code
79+
constexpr size_t kStackMteRingbufferSizeMultiplier = 4096;
7480

7581
static void __pthread_internal_free(pthread_internal_t* thread) {
82+
#ifdef __aarch64__
83+
if (void* stack_mte_tls = thread->bionic_tcb->tls_slot(TLS_SLOT_STACK_MTE)) {
84+
size_t size =
85+
kStackMteRingbufferSizeMultiplier * (reinterpret_cast<uintptr_t>(stack_mte_tls) >> 56ULL);
86+
void* ptr = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(stack_mte_tls) &
87+
((1ULL << 56ULL) - 1ULL));
88+
munmap(ptr, size);
89+
}
90+
#endif
7691
if (thread->mmap_size != 0) {
7792
// Free mapped space, including thread stack and pthread_internal_t.
7893
munmap(thread->mmap_base, thread->mmap_size);
@@ -176,21 +191,77 @@ void __find_main_stack_limits(uintptr_t* low, uintptr_t* high) {
176191
async_safe_fatal("stack not found in /proc/self/maps");
177192
}
178193

194+
__LIBC_HIDDEN__ void* __allocate_stack_mte_ringbuffer(size_t n, pthread_internal_t* thread) {
195+
if (n > 7) async_safe_fatal("error: invalid mte stack ring buffer size");
196+
// Allocation needs to be aligned to 2*size to make the fancy code-gen work.
197+
// So we allocate 3*size - pagesz bytes, which will always contain size bytes
198+
// aligned to 2*size, and unmap the unneeded part.
199+
// See
200+
// https://github.com/search?q=repo%3Allvm/llvm-project%20AArch64StackTagging%3A%3AinsertBaseTaggedPointer&type=code
201+
//
202+
// In the worst case, we get an allocation that is one page past the properly
203+
// aligned address, in which case we have to unmap the previous
204+
// 2*size - pagesz bytes. In that case, we still have size properly aligned
205+
// bytes left.
206+
size_t size = (1 << n) * kStackMteRingbufferSizeMultiplier;
207+
size_t pgsize = page_size();
208+
209+
size_t alloc_size = __BIONIC_ALIGN(3 * size - pgsize, pgsize);
210+
void* allocation_ptr =
211+
mmap(nullptr, alloc_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
212+
if (allocation_ptr == MAP_FAILED)
213+
async_safe_fatal("error: failed to allocate stack mte ring buffer");
214+
uintptr_t allocation = reinterpret_cast<uintptr_t>(allocation_ptr);
215+
216+
size_t alignment = 2 * size;
217+
uintptr_t aligned_allocation = __BIONIC_ALIGN(allocation, alignment);
218+
if (allocation != aligned_allocation) {
219+
munmap(reinterpret_cast<void*>(allocation), aligned_allocation - allocation);
220+
}
221+
if (aligned_allocation + size != allocation + alloc_size) {
222+
munmap(reinterpret_cast<void*>(aligned_allocation + size),
223+
(allocation + alloc_size) - (aligned_allocation + size));
224+
}
225+
226+
const char* name;
227+
if (thread == nullptr) {
228+
name = "stack_mte_ring:main";
229+
} else {
230+
// The kernel doesn't copy the name string, but this variable will last at least as long as the
231+
// mapped area. We unmap the ring buffer before unmapping the rest of the thread storage.
232+
auto& name_buffer = thread->stack_mte_ringbuffer_vma_name_buffer;
233+
static_assert(arraysize(name_buffer) >= arraysize("stack_mte_ring:") + 11 + 1);
234+
async_safe_format_buffer(name_buffer, arraysize(name_buffer), "stack_mte_ring:%d", thread->tid);
235+
name = name_buffer;
236+
}
237+
prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, reinterpret_cast<void*>(aligned_allocation), size, name);
238+
239+
// We store the size in the top byte of the pointer (which is ignored)
240+
return reinterpret_cast<void*>(aligned_allocation | ((1ULL << n) << 56ULL));
241+
}
242+
179243
void __pthread_internal_remap_stack_with_mte() {
180244
#if defined(__aarch64__)
181-
// If process doesn't have MTE enabled, we don't need to do anything.
245+
ScopedWriteLock creation_locker(&g_thread_creation_lock);
246+
ScopedReadLock list_locker(&g_thread_list_lock);
247+
// If process already uses memtag-stack ABI, we don't need to do anything.
248+
if (__libc_memtag_stack_abi) return;
249+
__libc_memtag_stack_abi = true;
250+
251+
for (pthread_internal_t* t = g_thread_list; t != nullptr; t = t->next) {
252+
if (t->terminating) continue;
253+
t->bionic_tcb->tls_slot(TLS_SLOT_STACK_MTE) =
254+
__allocate_stack_mte_ringbuffer(0, t->is_main() ? nullptr : t);
255+
}
182256
if (!atomic_load(&__libc_globals->memtag)) return;
183-
bool prev = atomic_exchange(&__libc_memtag_stack, true);
184-
if (prev) return;
257+
if (atomic_exchange(&__libc_memtag_stack, true)) return;
185258
uintptr_t lo, hi;
186259
__find_main_stack_limits(&lo, &hi);
187260

188261
if (mprotect(reinterpret_cast<void*>(lo), hi - lo,
189262
PROT_READ | PROT_WRITE | PROT_MTE | PROT_GROWSDOWN)) {
190263
async_safe_fatal("error: failed to set PROT_MTE on main thread");
191264
}
192-
ScopedWriteLock creation_locker(&g_thread_creation_lock);
193-
ScopedReadLock list_locker(&g_thread_list_lock);
194265
for (pthread_internal_t* t = g_thread_list; t != nullptr; t = t->next) {
195266
if (t->terminating || t->is_main()) continue;
196267
if (mprotect(t->mmap_base_unguarded, t->mmap_size_unguarded,

libc/bionic/pthread_internal.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,10 @@ class pthread_internal_t {
178178
bionic_tls* bionic_tls;
179179

180180
int errno_value;
181+
182+
bionic_tcb* bionic_tcb;
183+
char stack_mte_ringbuffer_vma_name_buffer[32];
184+
181185
bool is_main() { return start_routine == nullptr; }
182186
};
183187

@@ -209,6 +213,7 @@ __LIBC_HIDDEN__ pid_t __pthread_internal_gettid(pthread_t pthread_id, const char
209213
__LIBC_HIDDEN__ void __pthread_internal_remove(pthread_internal_t* thread);
210214
__LIBC_HIDDEN__ void __pthread_internal_remove_and_free(pthread_internal_t* thread);
211215
__LIBC_HIDDEN__ void __find_main_stack_limits(uintptr_t* low, uintptr_t* high);
216+
__LIBC_HIDDEN__ void* __allocate_stack_mte_ringbuffer(size_t n, pthread_internal_t* thread);
212217

213218
static inline __always_inline bionic_tcb* __get_bionic_tcb() {
214219
return reinterpret_cast<bionic_tcb*>(&__get_tls()[MIN_TLS_SLOT]);

libc/platform/bionic/tls_defines.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@
8585
// [1] "Addenda to, and Errata in, the ABI for the ARM Architecture". Section 3.
8686
// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045e/IHI0045E_ABI_addenda.pdf
8787

88-
#define MIN_TLS_SLOT (-2) // update this value when reserving a slot
88+
#define MIN_TLS_SLOT (-3) // update this value when reserving a slot
89+
#define TLS_SLOT_STACK_MTE (-3)
8990
#define TLS_SLOT_NATIVE_BRIDGE_GUEST_STATE (-2)
9091
#define TLS_SLOT_BIONIC_TLS (-1)
9192
#define TLS_SLOT_DTV 0

libc/private/bionic_globals.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,23 @@ struct memtag_dynamic_entries_t {
7676
};
7777

7878
__LIBC_HIDDEN__ extern WriteProtected<libc_globals> __libc_globals;
79-
// This cannot be in __libc_globals, because we cannot access the
79+
// These cannot be in __libc_globals, because we cannot access the
8080
// WriteProtected in a thread-safe way.
8181
// See b/328256432.
82+
//
83+
// __libc_memtag_stack says whether stack MTE is enabled on the process, i.e.
84+
// whether the stack pages are mapped with PROT_MTE. This is always false if
85+
// MTE is disabled for the process (i.e. libc_globals.memtag is false).
8286
__LIBC_HIDDEN__ extern _Atomic(bool) __libc_memtag_stack;
87+
// __libc_memtag_stack_abi says whether the process contains any code that was
88+
// compiled with memtag-stack. This is true even if the process does not have
89+
// MTE enabled (e.g. because it was overridden using MEMTAG_OPTIONS, or because
90+
// MTE is disabled for the device).
91+
// Code compiled with memtag-stack needs a stack history buffer in
92+
// TLS_SLOT_STACK_MTE, because the codegen will emit an unconditional
93+
// (to keep the code branchless) write to it.
94+
// Protected by g_heap_creation_lock.
95+
__LIBC_HIDDEN__ extern bool __libc_memtag_stack_abi;
8396

8497
struct abort_msg_t;
8598
struct crash_detail_page_t;
@@ -133,7 +146,9 @@ struct libc_shared_globals {
133146
size_t scudo_stack_depot_size = 0;
134147

135148
HeapTaggingLevel initial_heap_tagging_level = M_HEAP_TAGGING_LEVEL_NONE;
149+
// See comments for __libc_memtag_stack / __libc_memtag_stack_abi above.
136150
bool initial_memtag_stack = false;
151+
bool initial_memtag_stack_abi = false;
137152
int64_t heap_tagging_upgrade_timer_sec = 0;
138153

139154
void (*memtag_stack_dlopen_callback)() = nullptr;

linker/Android.bp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,7 @@ cc_defaults {
362362

363363
sanitize: {
364364
hwaddress: false,
365+
memtag_stack: false,
365366
},
366367

367368
static_libs: [

tests/libs/testbinary_is_stack_mte.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@
3636
#if defined(__BIONIC__) && defined(__aarch64__)
3737

3838
extern "C" int main(int, char**) {
39-
int ret = is_stack_mte_on() ? 0 : 1;
39+
void* mte_tls_ptr = mte_tls();
40+
*reinterpret_cast<uintptr_t*>(mte_tls_ptr) = 1;
41+
int ret = is_stack_mte_on() && mte_tls_ptr != nullptr ? 0 : 1;
4042
printf("RAN\n");
4143
return ret;
4244
}

tests/libs/testbinary_is_stack_mte_after_dlopen.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ extern "C" int main(int argc, char** argv) {
9696
State state = kInit;
9797

9898
bool is_early_thread_mte_on = false;
99+
void* early_thread_mte_tls = nullptr;
99100
std::thread early_th([&] {
100101
{
101102
std::lock_guard lk(m);
@@ -107,6 +108,8 @@ extern "C" int main(int argc, char** argv) {
107108
cv.wait(lk, [&] { return state == kStackRemapped; });
108109
}
109110
is_early_thread_mte_on = is_stack_mte_on();
111+
early_thread_mte_tls = mte_tls();
112+
*reinterpret_cast<uintptr_t*>(early_thread_mte_tls) = 1;
110113
});
111114
{
112115
std::unique_lock lk(m);
@@ -120,6 +123,7 @@ extern "C" int main(int argc, char** argv) {
120123
cv.notify_one();
121124
CHECK(handle != nullptr);
122125
CHECK(is_stack_mte_on());
126+
CHECK(mte_tls() != nullptr);
123127

124128
bool new_stack_page_mte_on = false;
125129
uintptr_t low;
@@ -129,11 +133,18 @@ extern "C" int main(int argc, char** argv) {
129133
CHECK(new_stack_page_mte_on);
130134

131135
bool is_late_thread_mte_on = false;
132-
std::thread late_th([&] { is_late_thread_mte_on = is_stack_mte_on(); });
136+
void* late_thread_mte_tls = nullptr;
137+
std::thread late_th([&] {
138+
is_late_thread_mte_on = is_stack_mte_on();
139+
late_thread_mte_tls = mte_tls();
140+
*reinterpret_cast<uintptr_t*>(late_thread_mte_tls) = 1;
141+
});
133142
late_th.join();
134143
early_th.join();
135144
CHECK(is_late_thread_mte_on);
136145
CHECK(is_early_thread_mte_on);
146+
CHECK(late_thread_mte_tls != nullptr);
147+
CHECK(early_thread_mte_tls != nullptr);
137148
printf("RAN\n");
138149
return 0;
139150
}

tests/mte_utils.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,10 @@ __attribute__((target("mte"))) static bool is_stack_mte_on() {
4040
return p == p_cpy;
4141
}
4242

43+
static void* mte_tls() {
44+
void** dst;
45+
__asm__("mrs %0, TPIDR_EL0" : "=r"(dst) :);
46+
return dst[-3];
47+
}
48+
4349
#endif

tests/struct_layout_test.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ void tests(CheckSize check_size, CheckOffset check_offset) {
3030
#define CHECK_OFFSET(name, field, offset) \
3131
check_offset(#name, #field, offsetof(name, field), offset);
3232
#ifdef __LP64__
33-
CHECK_SIZE(pthread_internal_t, 776);
33+
CHECK_SIZE(pthread_internal_t, 816);
3434
CHECK_OFFSET(pthread_internal_t, next, 0);
3535
CHECK_OFFSET(pthread_internal_t, prev, 8);
3636
CHECK_OFFSET(pthread_internal_t, tid, 16);
@@ -55,6 +55,8 @@ void tests(CheckSize check_size, CheckOffset check_offset) {
5555
CHECK_OFFSET(pthread_internal_t, dlerror_buffer, 248);
5656
CHECK_OFFSET(pthread_internal_t, bionic_tls, 760);
5757
CHECK_OFFSET(pthread_internal_t, errno_value, 768);
58+
CHECK_OFFSET(pthread_internal_t, bionic_tcb, 776);
59+
CHECK_OFFSET(pthread_internal_t, stack_mte_ringbuffer_vma_name_buffer, 784);
5860
CHECK_SIZE(bionic_tls, 12200);
5961
CHECK_OFFSET(bionic_tls, key_data, 0);
6062
CHECK_OFFSET(bionic_tls, locale, 2080);
@@ -72,7 +74,7 @@ void tests(CheckSize check_size, CheckOffset check_offset) {
7274
CHECK_OFFSET(bionic_tls, bionic_systrace_disabled, 12193);
7375
CHECK_OFFSET(bionic_tls, padding, 12194);
7476
#else
75-
CHECK_SIZE(pthread_internal_t, 668);
77+
CHECK_SIZE(pthread_internal_t, 704);
7678
CHECK_OFFSET(pthread_internal_t, next, 0);
7779
CHECK_OFFSET(pthread_internal_t, prev, 4);
7880
CHECK_OFFSET(pthread_internal_t, tid, 8);
@@ -97,6 +99,8 @@ void tests(CheckSize check_size, CheckOffset check_offset) {
9799
CHECK_OFFSET(pthread_internal_t, dlerror_buffer, 148);
98100
CHECK_OFFSET(pthread_internal_t, bionic_tls, 660);
99101
CHECK_OFFSET(pthread_internal_t, errno_value, 664);
102+
CHECK_OFFSET(pthread_internal_t, bionic_tcb, 668);
103+
CHECK_OFFSET(pthread_internal_t, stack_mte_ringbuffer_vma_name_buffer, 672);
100104
CHECK_SIZE(bionic_tls, 11080);
101105
CHECK_OFFSET(bionic_tls, key_data, 0);
102106
CHECK_OFFSET(bionic_tls, locale, 1040);

0 commit comments

Comments
 (0)