-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[TSan] Clarify and enforce shadow end alignment #144648
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-compiler-rt-sanitizer Author: Kunqiu Chen (Camsyn) ChangesThis is an improvement that enhances the robustness of the code. Previously, the correct calculation of exclusive EndShadow relied on the assumption that In addition, computing EndShadow does not require the corresponding address to be AppMem; for example, HighAppEnd is not AppMem, but can still be used to calculate EndShadow. For example, for the AppMem range [0, 1), This commit addresses this in two ways:
Additionally, the previous commit 4052de6 resolved a problem with overestimating the shadow end; it did not consider Full diff: https://github.com/llvm/llvm-project/pull/144648.diff 5 Files Affected:
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp
index 7c15a16388268..3e324eef9cb8d 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp
@@ -122,7 +122,6 @@ void __tsan_java_move(jptr src, jptr dst, jptr size) {
DCHECK_GE(dst, jctx->heap_begin);
DCHECK_LE(dst + size, jctx->heap_begin + jctx->heap_size);
DCHECK_NE(dst, src);
- DCHECK_NE(size, 0);
// Assuming it's not running concurrently with threads that do
// memory accesses and mutex operations (stop-the-world phase).
@@ -132,7 +131,7 @@ void __tsan_java_move(jptr src, jptr dst, jptr size) {
// We used to move shadow from src to dst, but the trace format does not
// support that anymore as it contains addresses of accesses.
RawShadow *d = MemToShadow(dst);
- RawShadow *dend = MemToShadow(dst + size);
+ RawShadow *dend = MemToEndShadow(dst + size);
ShadowSet(d, dend, Shadow::kEmpty);
}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform.h b/compiler-rt/lib/tsan/rtl/tsan_platform.h
index 354f6da6a64a1..5b589df482256 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform.h
@@ -968,6 +968,24 @@ RawShadow *MemToShadow(uptr x) {
return reinterpret_cast<RawShadow *>(SelectMapping<MemToShadowImpl>(x));
}
+struct MemToEndShadowImpl {
+ template <typename Mapping>
+ static uptr Apply(uptr x) {
+ return (((x + kShadowCell - 1) &
+ ~(Mapping::kShadowMsk | (kShadowCell - 1))) ^
+ Mapping::kShadowXor) *
+ kShadowMultiplier +
+ Mapping::kShadowAdd;
+ }
+};
+
+// If addr % kShadowCell == 0, then MemToEndShadow(addr) == MemToShadow(addr)
+// Otherwise, MemToEndShadow(addr) == MemToShadow(addr) + kShadowCnt
+ALWAYS_INLINE
+RawShadow *MemToEndShadow(uptr x) {
+ return reinterpret_cast<RawShadow *>(SelectMapping<MemToEndShadowImpl>(x));
+}
+
struct MemToMetaImpl {
template <typename Mapping>
static u32 *Apply(uptr x) {
diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp
index 2c55645a15479..dbf583b362359 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp
@@ -195,7 +195,7 @@ static NOINLINE void MapRodata(char* buffer, uptr size) {
!segment.IsWritable() && IsAppMem(segment.start)) {
// Assume it's .rodata
char *shadow_start = (char *)MemToShadow(segment.start);
- char *shadow_end = (char *)MemToShadow(segment.end);
+ char *shadow_end = (char *)MemToEndShadow(segment.end);
for (char *p = shadow_start; p < shadow_end;
p += marker.size() * sizeof(RawShadow)) {
internal_mmap(
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
index c83efec8eaca2..4afb84e89936a 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
@@ -532,7 +532,7 @@ static void StopBackgroundThread() {
void DontNeedShadowFor(uptr addr, uptr size) {
ReleaseMemoryPagesToOS(reinterpret_cast<uptr>(MemToShadow(addr)),
- reinterpret_cast<uptr>(MemToShadow(addr + size)));
+ reinterpret_cast<uptr>(MemToEndShadow(addr + size)));
}
#if !SANITIZER_GO
@@ -588,12 +588,12 @@ void MapShadow(uptr addr, uptr size) {
// CHECK_EQ(addr, addr & ~((64 << 10) - 1)); // windows wants 64K alignment
const uptr kPageSize = GetPageSizeCached();
uptr shadow_begin = RoundDownTo((uptr)MemToShadow(addr), kPageSize);
- uptr shadow_end = RoundUpTo((uptr)MemToShadow(addr + size), kPageSize);
+ uptr shadow_end = RoundUpTo((uptr)MemToEndShadow(addr + size), kPageSize);
if (!MmapFixedNoReserve(shadow_begin, shadow_end - shadow_begin, "shadow"))
Die();
#else
uptr shadow_begin = RoundDownTo((uptr)MemToShadow(addr), (64 << 10));
- uptr shadow_end = RoundUpTo((uptr)MemToShadow(addr + size), (64 << 10));
+ uptr shadow_end = RoundUpTo((uptr)MemToEndShadow(addr + size), (64 << 10));
VPrintf(2, "MapShadow for (0x%zx-0x%zx), begin/end: (0x%zx-0x%zx)\n",
addr, addr + size, shadow_begin, shadow_end);
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp
index cf07686d968dc..9652cd0267a79 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp
@@ -684,16 +684,15 @@ void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) {
DCHECK(IsShadowMem(shadow_mem));
}
- RawShadow* shadow_mem_end = reinterpret_cast<RawShadow*>(
- reinterpret_cast<uptr>(shadow_mem) + size * kShadowMultiplier - 1);
- if (!IsShadowMem(shadow_mem_end)) {
- Printf("Bad shadow end addr: %p (%p)\n", shadow_mem_end,
+ RawShadow* shadow_mem_end = MemToEndShadow(addr + size);
+ if (size > 0 && !IsShadowMem(shadow_mem_end - 1)) {
+ Printf("Bad shadow end addr: %p (%p)\n", shadow_mem_end - 1,
(void*)(addr + size - 1));
Printf(
"Shadow start addr (ok): %p (%p); size: 0x%zx; kShadowMultiplier: "
"%zx\n",
shadow_mem, (void*)addr, size, kShadowMultiplier);
- DCHECK(IsShadowMem(shadow_mem_end));
+ DCHECK(IsShadowMem(shadow_mem_end - 1));
}
#endif
|
If it is not appropriate to add a new utility function in
for shadow end computation. |
Is there a bug? If there's a bug, this needs a test. Otherwise, it's unclear what this is improving. |
I acknowledge that "This is an improvement that enhances the robustness of the code", because the potential 'bugs' can hardly be triggered under the current implementation. In tsan_interface_java.cpp: DCHECK_EQ(dst % kHeapAlignment, 0);
DCHECK_EQ(size % kHeapAlignment, 0);
...
// Duplicated assertion
DCHECK_NE(size, 0);
...
RawShadow *dend = MemToShadow(dst + size); It should be a bug if In tsan_rtl_access.cpp: RawShadow* shadow_mem_end = reinterpret_cast<RawShadow*>(
reinterpret_cast<uptr>(shadow_mem) + size * kShadowMultiplier - 1);
if (!IsShadowMem(shadow_mem_end)) { The calculation of What's more, if In tsan_platform_linux.cpp: TSan iterates the proc map and for each segment, In tsan_rtl.cpp: void DontNeedShadowFor(uptr addr, uptr size) {
ReleaseMemoryPagesToOS(reinterpret_cast<uptr>(MemToShadow(addr)),
reinterpret_cast<uptr>(MemToShadow(addr + size)));
}
E.g., for
In addition, another interface By the way, this problem seems can be triggered by a well-designed testcase. If necessary, I will design one when I have time. |
Thanks for the patch!
If the assertion is added, do all the tests pass, and does it fix all the problems that adding |
✅ With the latest revision this PR passed the C/C++ code formatter. |
I also really think it's not good to add a new utility function in |
…#145472) Once part of PR #144648, follow the reviewer's advice and split into this separate PR. `unmap` works at page granularity, but supports an arbitrary non-zero size as an argument, which results in possible shadow undercleaning in the existing TSan implementation when `size % kShadowCell != 0`. This change introduces two test cases to verify the shadow cleaning effect in `unmap`. - java_heap_init2.cpp: Imitating java_heap_init cpp, verify the incomplete cleaning of meta - munmap_clear_shadow.c: verify the incomplete cleaning of shadow
This is an improvement that enhances the robustness of the code. Previously, the correct calculation of exclusive EndShadow relied on the assumption that `addr_end % kShadowCell == 0`; however, in many current usages, this assumption was not strictly guaranteed (although it did in fact meet). In addition, computing EndShadow does not require the corresponding address to be AppMem; for example, HighAppEnd is not AppMem, but can still be used to calculate EndShadow. For example, for the AppMem range [0, 1), `s = MemToShadow(0)` is equal to `MemToShadow(1)`. The previous logic would incorrectly deduce an empty shadow range [s, s) while the correct shadow range should be [s, s + kShadowSize * kShadowCnt) to cover all the related shadow memory for the accessed cell. This commit addresses this in two ways: 1. It introduces a dedicated utility function, i.e., `MemToEndShadow`, to correctly calculate the end of a shadow memory range, accounting for the memory cell granularity. 2. It replaces existing (and potentially incorrect) calculations of the shadow end with this new utility function. Additionally, the previous commit 4052de6 resolved a problem with overestimating the shadow end; it did not consider `kShadowCell` and could therefore lead to underestimates. This is also corrected by utilizing the `MemToEndShadow` function.
We don't always need to get the real shadow end, for example, some shadow clear, if using the real shadow end, it will cause overcleaning (i.e., clear [0, 1) makes [1, 8) inaccessible when kShadowCell == 8). So when we do need to get the real end, use RoundUp in place. For example, `unmap(addr, sz)` makes `[addr + sz, addr + PageSize)` inaccessible, so we can safely clean up the full shadow (by getting the real shadow end).
Friendly ping @melver. |
…llvm#145472) Once part of PR llvm#144648, follow the reviewer's advice and split into this separate PR. `unmap` works at page granularity, but supports an arbitrary non-zero size as an argument, which results in possible shadow undercleaning in the existing TSan implementation when `size % kShadowCell != 0`. This change introduces two test cases to verify the shadow cleaning effect in `unmap`. - java_heap_init2.cpp: Imitating java_heap_init cpp, verify the incomplete cleaning of meta - munmap_clear_shadow.c: verify the incomplete cleaning of shadow
…llvm#145472) Once part of PR llvm#144648, follow the reviewer's advice and split into this separate PR. `unmap` works at page granularity, but supports an arbitrary non-zero size as an argument, which results in possible shadow undercleaning in the existing TSan implementation when `size % kShadowCell != 0`. This change introduces two test cases to verify the shadow cleaning effect in `unmap`. - java_heap_init2.cpp: Imitating java_heap_init cpp, verify the incomplete cleaning of meta - munmap_clear_shadow.c: verify the incomplete cleaning of shadow
In TSan, every `k` bytes of application memory (where `k = 8`) maps to a single shadow/meta cell. This design leads to two distinct outcomes when calculating the end of a shadow range using `MemToShadow(addr_end)`, depending on the alignment of `addr_end`: - **Exclusive End:** If `addr_end` is aligned (`addr_end % k == 0`), `MemToShadow(addr_end)` points to the first shadow cell *past* the intended range. This address is an exclusive boundary marker, not a cell to be operated on. - **Inclusive End:** If `addr_end` is not aligned (`addr_end % k != 0`), `MemToShadow(addr_end)` points to the last shadow cell that *is* part of the range (i.e., the same cell as `MemToShadow(addr_end - 1)`). Different TSan functions have different expectations for whether the shadow end should be inclusive or exclusive. However, these expectations are not always explicitly enforced, which can lead to subtle bugs or reliance on unstated invariants. The core of this patch is to ensure that functions ONLY requiring an **exclusive shadow end** behave correctly. 1. Enforcing Existing Invariants: For functions like `MetaMap::MoveMemory` and `MapShadow`, the assumption is that the end address is always `k`-aligned. While this holds true in the current codebase (e.g., due to some external implicit conditions), this invariant is not guaranteed by the function's internal context. We add explicit assertions to make this requirement clear and to catch any future changes that might violate this assumption. 2. Fixing Latent Bugs: In other cases, unaligned end addresses are possible, representing a latent bug. This was the case in `UnmapShadow`. The `size` of a memory region being unmapped is not always a multiple of `k`. When this happens, `UnmapShadow` would fail to clear the final (tail) portion of the shadow memory. This patch fixes `UnmapShadow` by rounding up the `size` to the next multiple of `k` before clearing the shadow memory. This is safe because the underlying OS `unmap` operation is page-granular, and the page size is guaranteed to be a multiple of `k`. Notably, this fix makes `UnmapShadow` consistent with its inverse operation, `MemoryRangeImitateWriteOrResetRange`, which already performs a similar size round-up. In summary, this PR: - **Adds assertions** to `MetaMap::MoveMemory` and `MapShadow` to enforce their implicit requirement for k-aligned end addresses. - **Fixes a latent bug** in `UnmapShadow` by rounding up the size to ensure the entire shadow range is cleared. Two new test cases have been added to cover this scenario. - Removes a redundant assertion in `__tsan_java_move`. - Fixes an incorrect shadow end calculation introduced in commit 4052de6. The previous logic, while fixing an overestimation issue, did not properly account for `kShadowCell` alignment and could lead to underestimation.
It looks like this broke a test in iOS CI: https://ci.swift.org/job/llvm.org/job/clang-san-iossim/9785/testReport/junit/ThreadSanitizer-x86_64-iossim/ThreadSanitizer-x86_64-iossim/munmap_clear_shadow_c/ |
@thetruestblue Hi, I got 404 when accessing this link. Could you paste the relevant test output to help me debug the issue? |
Consider reverting this commit, and then re-committing with the fix. |
After debugging, I found that it is a behavior difference of In IOS, it executes Therefore, in IOS, I think we need to change the test case to one of the following fixes:
Do you think which fix is better, or can you give a better suggestion? @melver After confirming the fix, I will propose another Reland PR. Or for such a simple testcase-only fix, should I just fix-forward without revert? |
Turning off the test case is unacceptable - that'd be a regression. Could you explain why this commit introduced the regression? |
This triggered test case I did pass the test case in my environment of Linux/x86_64, but lacked consideration of other platforms.
|
Good point. If this is a new test, feel free to disable it on iOS. |
Reverts #144648 due to a test failure of the new added test case `munmap_clear_shadow.c` in IOS .
#144648 was reverted because it failed the new sanitizer test `munmap_clear_shadow.c` in IOS's CI. That issue could be fixed by disabling the test on some platforms, due to the incompatibility of the test on these platforms. In detail, we should disable the test in FreeBSD, Apple, NetBSD, Solaris, and Haiku, where `ReleaseMemoryPagesToOS` executes `madvise(beg, end, MADV_FREE)`, which tags the relevant pages as 'FREE' and does not release them immediately.
…#146674) Reverts llvm/llvm-project#144648 due to a test failure of the new added test case `munmap_clear_shadow.c` in IOS .
In TSan, every
k
bytes of application memory (wherek = 8
) maps to a single shadow/meta cell. This design leads to two distinct outcomes when calculating the end of a shadow range usingMemToShadow(addr_end)
, depending on the alignment ofaddr_end
:addr_end
is aligned (addr_end % k == 0
),MemToShadow(addr_end)
points to the first shadow cell past the intended range. This address is an exclusive boundary marker, not a cell to be operated on.addr_end
is not aligned (addr_end % k != 0
),MemToShadow(addr_end)
points to the last shadow cell that is part of the range (i.e., the same cell asMemToShadow(addr_end - 1)
).Different TSan functions have different expectations for whether the shadow end should be inclusive or exclusive. However, these expectations are not always explicitly enforced, which can lead to subtle bugs or reliance on unstated invariants.
The core of this patch is to ensure that functions ONLY requiring an exclusive shadow end behave correctly.
Enforcing Existing Invariants:
For functions like
MetaMap::MoveMemory
andMapShadow
, the assumption is that the end address is alwaysk
-aligned. While this holds true in the current codebase (e.g., due to some external implicit conditions), this invariant is not guaranteed by the function's internal context. We add explicit assertions to make this requirement clear and to catch any future changes that might violate this assumption.Fixing Latent Bugs:
In other cases, unaligned end addresses are possible, representing a latent bug. This was the case in
UnmapShadow
. Thesize
of a memory region being unmapped is not always a multiple ofk
. When this happens,UnmapShadow
would fail to clear the final (tail) portion of the shadow memory.This patch fixes
UnmapShadow
by rounding up thesize
to the next multiple ofk
before clearing the shadow memory. This is safe because the underlying OSunmap
operation is page-granular, and the page size is guaranteed to be a multiple ofk
.Notably, this fix makes
UnmapShadow
consistent with its inverse operation,MemoryRangeImitateWriteOrResetRange
, which already performs a similar size round-up.In summary, this PR:
MetaMap::MoveMemory
andMapShadow
to enforce their implicit requirement for k-aligned end addresses.UnmapShadow
by rounding up the size to ensure the entire shadow range is cleared. Two new test cases have been added to cover this scenario.__tsan_java_move
.kShadowCell
alignment and could lead to underestimation.