Skip to content

Commit 45f6d55

Browse files
committed
[DFSan] Change shadow and origin memory layouts to match MSan.
Previously on x86_64: +--------------------+ 0x800000000000 (top of memory) | application memory | +--------------------+ 0x700000008000 (kAppAddr) | | | unused | | | +--------------------+ 0x300000000000 (kUnusedAddr) | origin | +--------------------+ 0x200000008000 (kOriginAddr) | unused | +--------------------+ 0x200000000000 | shadow memory | +--------------------+ 0x100000008000 (kShadowAddr) | unused | +--------------------+ 0x000000010000 | reserved by kernel | +--------------------+ 0x000000000000 MEM_TO_SHADOW(mem) = mem & ~0x600000000000 SHADOW_TO_ORIGIN(shadow) = kOriginAddr - kShadowAddr + shadow Now for x86_64: +--------------------+ 0x800000000000 (top of memory) | application 3 | +--------------------+ 0x700000000000 | invalid | +--------------------+ 0x610000000000 | origin 1 | +--------------------+ 0x600000000000 | application 2 | +--------------------+ 0x510000000000 | shadow 1 | +--------------------+ 0x500000000000 | invalid | +--------------------+ 0x400000000000 | origin 3 | +--------------------+ 0x300000000000 | shadow 3 | +--------------------+ 0x200000000000 | origin 2 | +--------------------+ 0x110000000000 | invalid | +--------------------+ 0x100000000000 | shadow 2 | +--------------------+ 0x010000000000 | application 1 | +--------------------+ 0x000000000000 MEM_TO_SHADOW(mem) = mem ^ 0x500000000000 SHADOW_TO_ORIGIN(shadow) = shadow + 0x100000000000 Reviewed By: stephan.yichao.zhao, gbalats Differential Revision: https://reviews.llvm.org/D104896
1 parent 2e9c75d commit 45f6d55

File tree

13 files changed

+417
-231
lines changed

13 files changed

+417
-231
lines changed

clang/docs/DataFlowSanitizerDesign.rst

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -76,25 +76,41 @@ The following is the memory layout for Linux/x86\_64:
7676
+---------------+---------------+--------------------+
7777
| Start | End | Use |
7878
+===============+===============+====================+
79-
| 0x700000008000|0x800000000000 | application memory |
79+
| 0x700000000000|0x800000000000 | application 3 |
8080
+---------------+---------------+--------------------+
81-
| 0x300000000000|0x700000008000 | unused |
81+
| 0x610000000000|0x700000000000 | unused |
8282
+---------------+---------------+--------------------+
83-
| 0x200000008000|0x300000000000 | origin |
83+
| 0x600000000000|0x610000000000 | origin 1 |
8484
+---------------+---------------+--------------------+
85-
| 0x200000000000|0x200000008000 | unused |
85+
| 0x510000000000|0x600000000000 | application 2 |
8686
+---------------+---------------+--------------------+
87-
| 0x100000008000|0x200000000000 | shadow memory |
87+
| 0x500000000000|0x510000000000 | shadow 1 |
8888
+---------------+---------------+--------------------+
89-
| 0x000000010000|0x100000008000 | unused |
89+
| 0x400000000000|0x500000000000 | unused |
9090
+---------------+---------------+--------------------+
91-
| 0x000000000000|0x000000010000 | reserved by kernel |
91+
| 0x300000000000|0x400000000000 | origin 3 |
92+
+---------------+---------------+--------------------+
93+
| 0x200000000000|0x300000000000 | shadow 3 |
94+
+---------------+---------------+--------------------+
95+
| 0x110000000000|0x200000000000 | origin 2 |
96+
+---------------+---------------+--------------------+
97+
| 0x100000000000|0x110000000000 | unused |
98+
+---------------+---------------+--------------------+
99+
| 0x010000000000|0x100000000000 | shadow 2 |
100+
+---------------+---------------+--------------------+
101+
| 0x000000000000|0x010000000000 | application 1 |
92102
+---------------+---------------+--------------------+
93103

94104
Each byte of application memory corresponds to a single byte of shadow
95-
memory, which is used to store its taint label. As for LLVM SSA
96-
registers, we have not found it necessary to associate a label with
97-
each byte or bit of data, as some other tools do. Instead, labels are
105+
memory, which is used to store its taint label. We map memory, shadow, and
106+
origin regions to each other with these masks and offsets:
107+
108+
* shadow_addr = memory_addr ^ 0x500000000000
109+
110+
* origin_addr = shadow_addr + 0x100000000000
111+
112+
As for LLVM SSA registers, we have not found it necessary to associate a label
113+
with each byte or bit of data, as some other tools do. Instead, labels are
98114
associated directly with registers. Loads will result in a union of
99115
all shadow labels corresponding to bytes loaded, and stores will
100116
result in a copy of the label of the stored value to the shadow of all

compiler-rt/lib/dfsan/dfsan.cpp

Lines changed: 184 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -64,30 +64,34 @@ int __dfsan_get_track_origins() {
6464

6565
// On Linux/x86_64, memory is laid out as follows:
6666
//
67-
// +--------------------+ 0x800000000000 (top of memory)
68-
// | application memory |
69-
// +--------------------+ 0x700000008000 (kAppAddr)
70-
// | |
71-
// | unused |
72-
// | |
73-
// +--------------------+ 0x300000000000 (kUnusedAddr)
74-
// | origin |
75-
// +--------------------+ 0x200000008000 (kOriginAddr)
76-
// | unused |
77-
// +--------------------+ 0x200000000000
78-
// | shadow memory |
79-
// +--------------------+ 0x100000008000 (kShadowAddr)
80-
// | unused |
81-
// +--------------------+ 0x000000010000
82-
// | reserved by kernel |
83-
// +--------------------+ 0x000000000000
67+
// +--------------------+ 0x800000000000 (top of memory)
68+
// | application 3 |
69+
// +--------------------+ 0x700000000000
70+
// | invalid |
71+
// +--------------------+ 0x610000000000
72+
// | origin 1 |
73+
// +--------------------+ 0x600000000000
74+
// | application 2 |
75+
// +--------------------+ 0x510000000000
76+
// | shadow 1 |
77+
// +--------------------+ 0x500000000000
78+
// | invalid |
79+
// +--------------------+ 0x400000000000
80+
// | origin 3 |
81+
// +--------------------+ 0x300000000000
82+
// | shadow 3 |
83+
// +--------------------+ 0x200000000000
84+
// | origin 2 |
85+
// +--------------------+ 0x110000000000
86+
// | invalid |
87+
// +--------------------+ 0x100000000000
88+
// | shadow 2 |
89+
// +--------------------+ 0x010000000000
90+
// | application 1 |
91+
// +--------------------+ 0x000000000000
8492
//
85-
// To derive a shadow memory address from an application memory address, bits
86-
// 45-46 are cleared to bring the address into the range
87-
// [0x100000008000,0x200000000000). See the function shadow_for below.
88-
//
89-
//
90-
93+
// MEM_TO_SHADOW(mem) = mem ^ 0x500000000000
94+
// SHADOW_TO_ORIGIN(shadow) = shadow + 0x100000000000
9195

9296
extern "C" SANITIZER_INTERFACE_ATTRIBUTE
9397
dfsan_label __dfsan_union_load(const dfsan_label *ls, uptr n) {
@@ -160,14 +164,12 @@ static uptr OriginAlignDown(uptr u) { return u & kOriginAlignMask; }
160164
static dfsan_origin GetOriginIfTainted(uptr addr, uptr size) {
161165
for (uptr i = 0; i < size; ++i, ++addr) {
162166
dfsan_label *s = shadow_for((void *)addr);
163-
if (!is_shadow_addr_valid((uptr)s)) {
164-
// The current DFSan memory layout is not always correct. For example,
165-
// addresses (0, 0x10000) are mapped to (0, 0x10000). Before fixing the
166-
// issue, we ignore such addresses.
167-
continue;
168-
}
169-
if (*s)
167+
168+
if (*s) {
169+
// Validate address region.
170+
CHECK(MEM_IS_SHADOW(s));
170171
return *(dfsan_origin *)origin_for((void *)addr);
172+
}
171173
}
172174
return 0;
173175
}
@@ -317,10 +319,12 @@ static void ReverseCopyOrigin(const void *dst, const void *src, uptr size,
317319
// operation.
318320
static void MoveOrigin(const void *dst, const void *src, uptr size,
319321
StackTrace *stack) {
320-
if (!has_valid_shadow_addr(dst) ||
321-
!has_valid_shadow_addr((void *)((uptr)dst + size)) ||
322-
!has_valid_shadow_addr(src) ||
323-
!has_valid_shadow_addr((void *)((uptr)src + size))) {
322+
// Validate address regions.
323+
if (!MEM_IS_SHADOW(shadow_for(dst)) ||
324+
!MEM_IS_SHADOW(shadow_for((void *)((uptr)dst + size))) ||
325+
!MEM_IS_SHADOW(shadow_for(src)) ||
326+
!MEM_IS_SHADOW(shadow_for((void *)((uptr)src + size)))) {
327+
CHECK(false);
324328
return;
325329
}
326330
// If destination origin range overlaps with source origin range, move
@@ -833,8 +837,149 @@ void dfsan_clear_thread_local_state() {
833837
}
834838

835839
extern "C" void dfsan_flush() {
836-
if (!MmapFixedSuperNoReserve(ShadowAddr(), UnusedAddr() - ShadowAddr()))
837-
Die();
840+
const uptr maxVirtualAddress = GetMaxUserVirtualAddress();
841+
for (unsigned i = 0; i < kMemoryLayoutSize; ++i) {
842+
uptr start = kMemoryLayout[i].start;
843+
uptr end = kMemoryLayout[i].end;
844+
uptr size = end - start;
845+
MappingDesc::Type type = kMemoryLayout[i].type;
846+
847+
if (type != MappingDesc::SHADOW && type != MappingDesc::ORIGIN)
848+
continue;
849+
850+
// Check if the segment should be mapped based on platform constraints.
851+
if (start >= maxVirtualAddress)
852+
continue;
853+
854+
if (!MmapFixedSuperNoReserve(start, size, kMemoryLayout[i].name)) {
855+
Printf("FATAL: DataFlowSanitizer: failed to clear memory region\n");
856+
Die();
857+
}
858+
}
859+
}
860+
861+
// TODO: CheckMemoryLayoutSanity is based on msan.
862+
// Consider refactoring these into a shared implementation.
863+
static void CheckMemoryLayoutSanity() {
864+
uptr prev_end = 0;
865+
for (unsigned i = 0; i < kMemoryLayoutSize; ++i) {
866+
uptr start = kMemoryLayout[i].start;
867+
uptr end = kMemoryLayout[i].end;
868+
MappingDesc::Type type = kMemoryLayout[i].type;
869+
CHECK_LT(start, end);
870+
CHECK_EQ(prev_end, start);
871+
CHECK(addr_is_type(start, type));
872+
CHECK(addr_is_type((start + end) / 2, type));
873+
CHECK(addr_is_type(end - 1, type));
874+
if (type == MappingDesc::APP) {
875+
uptr addr = start;
876+
CHECK(MEM_IS_SHADOW(MEM_TO_SHADOW(addr)));
877+
CHECK(MEM_IS_ORIGIN(MEM_TO_ORIGIN(addr)));
878+
CHECK_EQ(MEM_TO_ORIGIN(addr), SHADOW_TO_ORIGIN(MEM_TO_SHADOW(addr)));
879+
880+
addr = (start + end) / 2;
881+
CHECK(MEM_IS_SHADOW(MEM_TO_SHADOW(addr)));
882+
CHECK(MEM_IS_ORIGIN(MEM_TO_ORIGIN(addr)));
883+
CHECK_EQ(MEM_TO_ORIGIN(addr), SHADOW_TO_ORIGIN(MEM_TO_SHADOW(addr)));
884+
885+
addr = end - 1;
886+
CHECK(MEM_IS_SHADOW(MEM_TO_SHADOW(addr)));
887+
CHECK(MEM_IS_ORIGIN(MEM_TO_ORIGIN(addr)));
888+
CHECK_EQ(MEM_TO_ORIGIN(addr), SHADOW_TO_ORIGIN(MEM_TO_SHADOW(addr)));
889+
}
890+
prev_end = end;
891+
}
892+
}
893+
894+
// TODO: CheckMemoryRangeAvailability is based on msan.
895+
// Consider refactoring these into a shared implementation.
896+
static bool CheckMemoryRangeAvailability(uptr beg, uptr size) {
897+
if (size > 0) {
898+
uptr end = beg + size - 1;
899+
if (!MemoryRangeIsAvailable(beg, end)) {
900+
Printf("FATAL: Memory range %p - %p is not available.\n", beg, end);
901+
return false;
902+
}
903+
}
904+
return true;
905+
}
906+
907+
// TODO: ProtectMemoryRange is based on msan.
908+
// Consider refactoring these into a shared implementation.
909+
static bool ProtectMemoryRange(uptr beg, uptr size, const char *name) {
910+
if (size > 0) {
911+
void *addr = MmapFixedNoAccess(beg, size, name);
912+
if (beg == 0 && addr) {
913+
// Depending on the kernel configuration, we may not be able to protect
914+
// the page at address zero.
915+
uptr gap = 16 * GetPageSizeCached();
916+
beg += gap;
917+
size -= gap;
918+
addr = MmapFixedNoAccess(beg, size, name);
919+
}
920+
if ((uptr)addr != beg) {
921+
uptr end = beg + size - 1;
922+
Printf("FATAL: Cannot protect memory range %p - %p (%s).\n", beg, end,
923+
name);
924+
return false;
925+
}
926+
}
927+
return true;
928+
}
929+
930+
// TODO: InitShadow is based on msan.
931+
// Consider refactoring these into a shared implementation.
932+
bool InitShadow(bool init_origins) {
933+
// Let user know mapping parameters first.
934+
VPrintf(1, "dfsan_init %p\n", &__dfsan::dfsan_init);
935+
for (unsigned i = 0; i < kMemoryLayoutSize; ++i)
936+
VPrintf(1, "%s: %zx - %zx\n", kMemoryLayout[i].name, kMemoryLayout[i].start,
937+
kMemoryLayout[i].end - 1);
938+
939+
CheckMemoryLayoutSanity();
940+
941+
if (!MEM_IS_APP(&__dfsan::dfsan_init)) {
942+
Printf("FATAL: Code %p is out of application range. Non-PIE build?\n",
943+
(uptr)&__dfsan::dfsan_init);
944+
return false;
945+
}
946+
947+
const uptr maxVirtualAddress = GetMaxUserVirtualAddress();
948+
949+
for (unsigned i = 0; i < kMemoryLayoutSize; ++i) {
950+
uptr start = kMemoryLayout[i].start;
951+
uptr end = kMemoryLayout[i].end;
952+
uptr size = end - start;
953+
MappingDesc::Type type = kMemoryLayout[i].type;
954+
955+
// Check if the segment should be mapped based on platform constraints.
956+
if (start >= maxVirtualAddress)
957+
continue;
958+
959+
bool map = type == MappingDesc::SHADOW ||
960+
(init_origins && type == MappingDesc::ORIGIN);
961+
bool protect = type == MappingDesc::INVALID ||
962+
(!init_origins && type == MappingDesc::ORIGIN);
963+
CHECK(!(map && protect));
964+
if (!map && !protect)
965+
CHECK(type == MappingDesc::APP);
966+
if (map) {
967+
if (!CheckMemoryRangeAvailability(start, size))
968+
return false;
969+
if (!MmapFixedSuperNoReserve(start, size, kMemoryLayout[i].name))
970+
return false;
971+
if (common_flags()->use_madv_dontdump)
972+
DontDumpShadowMemory(start, size);
973+
}
974+
if (protect) {
975+
if (!CheckMemoryRangeAvailability(start, size))
976+
return false;
977+
if (!ProtectMemoryRange(start, size, kMemoryLayout[i].name))
978+
return false;
979+
}
980+
}
981+
982+
return true;
838983
}
839984

840985
static void DFsanInit(int argc, char **argv, char **envp) {
@@ -848,18 +993,9 @@ static void DFsanInit(int argc, char **argv, char **envp) {
848993

849994
InitializeFlags();
850995

851-
dfsan_flush();
852-
if (common_flags()->use_madv_dontdump)
853-
DontDumpShadowMemory(ShadowAddr(), UnusedAddr() - ShadowAddr());
854-
855-
// Protect the region of memory we don't use, to preserve the one-to-one
856-
// mapping from application to shadow memory. But if ASLR is disabled, Linux
857-
// will load our executable in the middle of our unused region. This mostly
858-
// works so long as the program doesn't use too much memory. We support this
859-
// case by disabling memory protection when ASLR is disabled.
860-
uptr init_addr = (uptr)&DFsanInit;
861-
if (!(init_addr >= UnusedAddr() && init_addr < AppAddr()))
862-
MmapFixedNoAccess(UnusedAddr(), AppAddr() - UnusedAddr());
996+
CheckASLR();
997+
998+
InitShadow(__dfsan_get_track_origins());
863999

8641000
initialize_interceptors();
8651001

compiler-rt/lib/dfsan/dfsan.h

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -61,16 +61,14 @@ extern bool dfsan_init_is_running;
6161
void initialize_interceptors();
6262

6363
inline dfsan_label *shadow_for(void *ptr) {
64-
return (dfsan_label *)(((uptr)ptr) & ShadowMask());
64+
return (dfsan_label *)MEM_TO_SHADOW(ptr);
6565
}
6666

6767
inline const dfsan_label *shadow_for(const void *ptr) {
6868
return shadow_for(const_cast<void *>(ptr));
6969
}
7070

71-
inline uptr unaligned_origin_for(uptr ptr) {
72-
return OriginAddr() - ShadowAddr() + (ptr & ShadowMask());
73-
}
71+
inline uptr unaligned_origin_for(uptr ptr) { return MEM_TO_ORIGIN(ptr); }
7472

7573
inline dfsan_origin *origin_for(void *ptr) {
7674
auto aligned_addr = unaligned_origin_for(reinterpret_cast<uptr>(ptr)) &
@@ -82,24 +80,6 @@ inline const dfsan_origin *origin_for(const void *ptr) {
8280
return origin_for(const_cast<void *>(ptr));
8381
}
8482

85-
inline bool is_shadow_addr_valid(uptr shadow_addr) {
86-
return (uptr)shadow_addr >= ShadowAddr() && (uptr)shadow_addr < OriginAddr();
87-
}
88-
89-
inline bool has_valid_shadow_addr(const void *ptr) {
90-
const dfsan_label *ptr_s = shadow_for(ptr);
91-
return is_shadow_addr_valid((uptr)ptr_s);
92-
}
93-
94-
inline bool is_origin_addr_valid(uptr origin_addr) {
95-
return (uptr)origin_addr >= OriginAddr() && (uptr)origin_addr < UnusedAddr();
96-
}
97-
98-
inline bool has_valid_origin_addr(const void *ptr) {
99-
const dfsan_origin *ptr_orig = origin_for(ptr);
100-
return is_origin_addr_valid((uptr)ptr_orig);
101-
}
102-
10383
void dfsan_copy_memory(void *dst, const void *src, uptr size);
10484

10585
void dfsan_allocator_init();

compiler-rt/lib/dfsan/dfsan_allocator.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,11 @@ struct DFsanMapUnmapCallback {
3333
void OnUnmap(uptr p, uptr size) const { dfsan_set_label(0, (void *)p, size); }
3434
};
3535

36+
static const uptr kAllocatorSpace = 0x700000000000ULL;
3637
static const uptr kMaxAllowedMallocSize = 8UL << 30;
3738

3839
struct AP64 { // Allocator64 parameters. Deliberately using a short name.
39-
// TODO: DFSan assumes application memory starts from 0x700000008000. For
40-
// unknown reason, the sanitizer allocator does not support any start address
41-
// between 0x701000000000 and 0x700000008000. After switching to fast8labels
42-
// mode, DFSan memory layout will be changed to the same to MSan's. Then we
43-
// set the start address to 0x700000000000 as MSan.
44-
static const uptr kSpaceBeg = 0x701000000000ULL;
40+
static const uptr kSpaceBeg = kAllocatorSpace;
4541
static const uptr kSpaceSize = 0x40000000000; // 4T.
4642
static const uptr kMetadataSize = sizeof(Metadata);
4743
typedef DefaultSizeClassMap SizeClassMap;

0 commit comments

Comments
 (0)