Skip to content

Commit 5b56688

Browse files
committed
[scudo] Split the code paths which enable memory tagging
Memory tagging is a flag which requires system reboot to enable. Which means, the code paths with memory tagging enabled will never be executed if it's set to off. However, we only mark those paths with UNLIKELY() which doesn't annotate the expectation properly here. As a result, the assembly code always interleaves instructions between w/ and w/o memory tagging enabled. The direct impact is the I-cache may always cache many unused instructions. This change explictily splits the paths into different code blocks. This slightly introduces very few duplicated codes but it creates two independent execution paths and will improve the cache locality.
1 parent 0f0f0ff commit 5b56688

File tree

1 file changed

+175
-125
lines changed

1 file changed

+175
-125
lines changed

compiler-rt/lib/scudo/standalone/combined.h

Lines changed: 175 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -401,133 +401,18 @@ class Allocator {
401401
reportOutOfMemory(NeededSize);
402402
}
403403

404-
const uptr BlockUptr = reinterpret_cast<uptr>(Block);
405-
const uptr UnalignedUserPtr = BlockUptr + Chunk::getHeaderSize();
406-
const uptr UserPtr = roundUp(UnalignedUserPtr, Alignment);
407-
408-
void *Ptr = reinterpret_cast<void *>(UserPtr);
409-
void *TaggedPtr = Ptr;
410-
if (LIKELY(ClassId)) {
411-
// We only need to zero or tag the contents for Primary backed
412-
// allocations. We only set tags for primary allocations in order to avoid
413-
// faulting potentially large numbers of pages for large secondary
414-
// allocations. We assume that guard pages are enough to protect these
415-
// allocations.
416-
//
417-
// FIXME: When the kernel provides a way to set the background tag of a
418-
// mapping, we should be able to tag secondary allocations as well.
419-
//
420-
// When memory tagging is enabled, zeroing the contents is done as part of
421-
// setting the tag.
422-
if (UNLIKELY(useMemoryTagging<Config>(Options))) {
423-
uptr PrevUserPtr;
424-
Chunk::UnpackedHeader Header;
425-
const uptr BlockSize = PrimaryT::getSizeByClassId(ClassId);
426-
const uptr BlockEnd = BlockUptr + BlockSize;
427-
// If possible, try to reuse the UAF tag that was set by deallocate().
428-
// For simplicity, only reuse tags if we have the same start address as
429-
// the previous allocation. This handles the majority of cases since
430-
// most allocations will not be more aligned than the minimum alignment.
431-
//
432-
// We need to handle situations involving reclaimed chunks, and retag
433-
// the reclaimed portions if necessary. In the case where the chunk is
434-
// fully reclaimed, the chunk's header will be zero, which will trigger
435-
// the code path for new mappings and invalid chunks that prepares the
436-
// chunk from scratch. There are three possibilities for partial
437-
// reclaiming:
438-
//
439-
// (1) Header was reclaimed, data was partially reclaimed.
440-
// (2) Header was not reclaimed, all data was reclaimed (e.g. because
441-
// data started on a page boundary).
442-
// (3) Header was not reclaimed, data was partially reclaimed.
443-
//
444-
// Case (1) will be handled in the same way as for full reclaiming,
445-
// since the header will be zero.
446-
//
447-
// We can detect case (2) by loading the tag from the start
448-
// of the chunk. If it is zero, it means that either all data was
449-
// reclaimed (since we never use zero as the chunk tag), or that the
450-
// previous allocation was of size zero. Either way, we need to prepare
451-
// a new chunk from scratch.
452-
//
453-
// We can detect case (3) by moving to the next page (if covered by the
454-
// chunk) and loading the tag of its first granule. If it is zero, it
455-
// means that all following pages may need to be retagged. On the other
456-
// hand, if it is nonzero, we can assume that all following pages are
457-
// still tagged, according to the logic that if any of the pages
458-
// following the next page were reclaimed, the next page would have been
459-
// reclaimed as well.
460-
uptr TaggedUserPtr;
461-
if (getChunkFromBlock(BlockUptr, &PrevUserPtr, &Header) &&
462-
PrevUserPtr == UserPtr &&
463-
(TaggedUserPtr = loadTag(UserPtr)) != UserPtr) {
464-
uptr PrevEnd = TaggedUserPtr + Header.SizeOrUnusedBytes;
465-
const uptr NextPage = roundUp(TaggedUserPtr, getPageSizeCached());
466-
if (NextPage < PrevEnd && loadTag(NextPage) != NextPage)
467-
PrevEnd = NextPage;
468-
TaggedPtr = reinterpret_cast<void *>(TaggedUserPtr);
469-
resizeTaggedChunk(PrevEnd, TaggedUserPtr + Size, Size, BlockEnd);
470-
if (UNLIKELY(FillContents != NoFill && !Header.OriginOrWasZeroed)) {
471-
// If an allocation needs to be zeroed (i.e. calloc) we can normally
472-
// avoid zeroing the memory now since we can rely on memory having
473-
// been zeroed on free, as this is normally done while setting the
474-
// UAF tag. But if tagging was disabled per-thread when the memory
475-
// was freed, it would not have been retagged and thus zeroed, and
476-
// therefore it needs to be zeroed now.
477-
memset(TaggedPtr, 0,
478-
Min(Size, roundUp(PrevEnd - TaggedUserPtr,
479-
archMemoryTagGranuleSize())));
480-
} else if (Size) {
481-
// Clear any stack metadata that may have previously been stored in
482-
// the chunk data.
483-
memset(TaggedPtr, 0, archMemoryTagGranuleSize());
484-
}
485-
} else {
486-
const uptr OddEvenMask =
487-
computeOddEvenMaskForPointerMaybe(Options, BlockUptr, ClassId);
488-
TaggedPtr = prepareTaggedChunk(Ptr, Size, OddEvenMask, BlockEnd);
489-
}
490-
storePrimaryAllocationStackMaybe(Options, Ptr);
491-
} else {
492-
Block = addHeaderTag(Block);
493-
Ptr = addHeaderTag(Ptr);
494-
if (UNLIKELY(FillContents != NoFill)) {
495-
// This condition is not necessarily unlikely, but since memset is
496-
// costly, we might as well mark it as such.
497-
memset(Block, FillContents == ZeroFill ? 0 : PatternFillByte,
498-
PrimaryT::getSizeByClassId(ClassId));
499-
}
500-
}
404+
const uptr UserPtr = roundUp(
405+
reinterpret_cast<uptr>(Block) + Chunk::getHeaderSize(), Alignment);
406+
const uptr SizeOrUnusedBytes =
407+
ClassId ? Size : SecondaryBlockEnd - (UserPtr + Size);
408+
409+
if (LIKELY(!useMemoryTagging<Config>(Options))) {
410+
return initChunk(ClassId, Origin, Block, UserPtr, SizeOrUnusedBytes,
411+
FillContents);
501412
} else {
502-
Block = addHeaderTag(Block);
503-
Ptr = addHeaderTag(Ptr);
504-
if (UNLIKELY(useMemoryTagging<Config>(Options))) {
505-
storeTags(reinterpret_cast<uptr>(Block), reinterpret_cast<uptr>(Ptr));
506-
storeSecondaryAllocationStackMaybe(Options, Ptr, Size);
507-
}
508-
}
509-
510-
Chunk::UnpackedHeader Header = {};
511-
if (UNLIKELY(UnalignedUserPtr != UserPtr)) {
512-
const uptr Offset = UserPtr - UnalignedUserPtr;
513-
DCHECK_GE(Offset, 2 * sizeof(u32));
514-
// The BlockMarker has no security purpose, but is specifically meant for
515-
// the chunk iteration function that can be used in debugging situations.
516-
// It is the only situation where we have to locate the start of a chunk
517-
// based on its block address.
518-
reinterpret_cast<u32 *>(Block)[0] = BlockMarker;
519-
reinterpret_cast<u32 *>(Block)[1] = static_cast<u32>(Offset);
520-
Header.Offset = (Offset >> MinAlignmentLog) & Chunk::OffsetMask;
413+
return initChunkWithMemoryTagging(ClassId, Origin, Block, UserPtr, Size,
414+
SizeOrUnusedBytes, FillContents);
521415
}
522-
Header.ClassId = ClassId & Chunk::ClassIdMask;
523-
Header.State = Chunk::State::Allocated;
524-
Header.OriginOrWasZeroed = Origin & Chunk::OriginMask;
525-
Header.SizeOrUnusedBytes =
526-
(ClassId ? Size : SecondaryBlockEnd - (UserPtr + Size)) &
527-
Chunk::SizeOrUnusedBytesMask;
528-
Chunk::storeHeader(Cookie, Ptr, &Header);
529-
530-
return TaggedPtr;
531416
}
532417

533418
NOINLINE void deallocate(void *Ptr, Chunk::Origin Origin, uptr DeleteSize = 0,
@@ -1148,6 +1033,171 @@ class Allocator {
11481033
reinterpret_cast<uptr>(Ptr) - SizeOrUnusedBytes;
11491034
}
11501035

1036+
ALWAYS_INLINE void *initChunk(const uptr ClassId, const Chunk::Origin Origin,
1037+
void *Block, const uptr UserPtr,
1038+
const uptr SizeOrUnusedBytes,
1039+
const FillContentsMode FillContents) {
1040+
Block = addHeaderTag(Block);
1041+
// Only do content fill when it's from primary allocator because secondary
1042+
// allocator has filled the content.
1043+
if (ClassId != 0 && UNLIKELY(FillContents != NoFill)) {
1044+
// This condition is not necessarily unlikely, but since memset is
1045+
// costly, we might as well mark it as such.
1046+
memset(Block, FillContents == ZeroFill ? 0 : PatternFillByte,
1047+
PrimaryT::getSizeByClassId(ClassId));
1048+
}
1049+
1050+
Chunk::UnpackedHeader Header = {};
1051+
1052+
const uptr DefaultAlignedPtr =
1053+
reinterpret_cast<uptr>(Block) + Chunk::getHeaderSize();
1054+
if (UNLIKELY(DefaultAlignedPtr != UserPtr)) {
1055+
const uptr Offset = UserPtr - DefaultAlignedPtr;
1056+
DCHECK_GE(Offset, 2 * sizeof(u32));
1057+
// The BlockMarker has no security purpose, but is specifically meant for
1058+
// the chunk iteration function that can be used in debugging situations.
1059+
// It is the only situation where we have to locate the start of a chunk
1060+
// based on its block address.
1061+
reinterpret_cast<u32 *>(Block)[0] = BlockMarker;
1062+
reinterpret_cast<u32 *>(Block)[1] = static_cast<u32>(Offset);
1063+
Header.Offset = (Offset >> MinAlignmentLog) & Chunk::OffsetMask;
1064+
}
1065+
1066+
Header.ClassId = ClassId & Chunk::ClassIdMask;
1067+
Header.State = Chunk::State::Allocated;
1068+
Header.OriginOrWasZeroed = Origin & Chunk::OriginMask;
1069+
Header.SizeOrUnusedBytes = SizeOrUnusedBytes & Chunk::SizeOrUnusedBytesMask;
1070+
Chunk::storeHeader(Cookie, reinterpret_cast<void *>(addHeaderTag(UserPtr)),
1071+
&Header);
1072+
1073+
return reinterpret_cast<void *>(UserPtr);
1074+
}
1075+
1076+
NOINLINE void *
1077+
initChunkWithMemoryTagging(const uptr ClassId, const Chunk::Origin Origin,
1078+
void *Block, const uptr UserPtr, const uptr Size,
1079+
const uptr SizeOrUnusedBytes,
1080+
const FillContentsMode FillContents) {
1081+
const Options Options = Primary.Options.load();
1082+
DCHECK(useMemoryTagging<Config>(Options));
1083+
1084+
void *Ptr = reinterpret_cast<void *>(UserPtr);
1085+
void *TaggedPtr = Ptr;
1086+
1087+
if (LIKELY(ClassId)) {
1088+
// We only need to zero or tag the contents for Primary backed
1089+
// allocations. We only set tags for primary allocations in order to avoid
1090+
// faulting potentially large numbers of pages for large secondary
1091+
// allocations. We assume that guard pages are enough to protect these
1092+
// allocations.
1093+
//
1094+
// FIXME: When the kernel provides a way to set the background tag of a
1095+
// mapping, we should be able to tag secondary allocations as well.
1096+
//
1097+
// When memory tagging is enabled, zeroing the contents is done as part of
1098+
// setting the tag.
1099+
1100+
uptr PrevUserPtr;
1101+
Chunk::UnpackedHeader Header;
1102+
const uptr BlockSize = PrimaryT::getSizeByClassId(ClassId);
1103+
const uptr BlockUptr = reinterpret_cast<uptr>(Block);
1104+
const uptr BlockEnd = BlockUptr + BlockSize;
1105+
// If possible, try to reuse the UAF tag that was set by deallocate().
1106+
// For simplicity, only reuse tags if we have the same start address as
1107+
// the previous allocation. This handles the majority of cases since
1108+
// most allocations will not be more aligned than the minimum alignment.
1109+
//
1110+
// We need to handle situations involving reclaimed chunks, and retag
1111+
// the reclaimed portions if necessary. In the case where the chunk is
1112+
// fully reclaimed, the chunk's header will be zero, which will trigger
1113+
// the code path for new mappings and invalid chunks that prepares the
1114+
// chunk from scratch. There are three possibilities for partial
1115+
// reclaiming:
1116+
//
1117+
// (1) Header was reclaimed, data was partially reclaimed.
1118+
// (2) Header was not reclaimed, all data was reclaimed (e.g. because
1119+
// data started on a page boundary).
1120+
// (3) Header was not reclaimed, data was partially reclaimed.
1121+
//
1122+
// Case (1) will be handled in the same way as for full reclaiming,
1123+
// since the header will be zero.
1124+
//
1125+
// We can detect case (2) by loading the tag from the start
1126+
// of the chunk. If it is zero, it means that either all data was
1127+
// reclaimed (since we never use zero as the chunk tag), or that the
1128+
// previous allocation was of size zero. Either way, we need to prepare
1129+
// a new chunk from scratch.
1130+
//
1131+
// We can detect case (3) by moving to the next page (if covered by the
1132+
// chunk) and loading the tag of its first granule. If it is zero, it
1133+
// means that all following pages may need to be retagged. On the other
1134+
// hand, if it is nonzero, we can assume that all following pages are
1135+
// still tagged, according to the logic that if any of the pages
1136+
// following the next page were reclaimed, the next page would have been
1137+
// reclaimed as well.
1138+
uptr TaggedUserPtr;
1139+
if (getChunkFromBlock(BlockUptr, &PrevUserPtr, &Header) &&
1140+
PrevUserPtr == UserPtr &&
1141+
(TaggedUserPtr = loadTag(UserPtr)) != UserPtr) {
1142+
uptr PrevEnd = TaggedUserPtr + Header.SizeOrUnusedBytes;
1143+
const uptr NextPage = roundUp(TaggedUserPtr, getPageSizeCached());
1144+
if (NextPage < PrevEnd && loadTag(NextPage) != NextPage)
1145+
PrevEnd = NextPage;
1146+
TaggedPtr = reinterpret_cast<void *>(TaggedUserPtr);
1147+
resizeTaggedChunk(PrevEnd, TaggedUserPtr + Size, Size, BlockEnd);
1148+
if (UNLIKELY(FillContents != NoFill && !Header.OriginOrWasZeroed)) {
1149+
// If an allocation needs to be zeroed (i.e. calloc) we can normally
1150+
// avoid zeroing the memory now since we can rely on memory having
1151+
// been zeroed on free, as this is normally done while setting the
1152+
// UAF tag. But if tagging was disabled per-thread when the memory
1153+
// was freed, it would not have been retagged and thus zeroed, and
1154+
// therefore it needs to be zeroed now.
1155+
memset(TaggedPtr, 0,
1156+
Min(Size, roundUp(PrevEnd - TaggedUserPtr,
1157+
archMemoryTagGranuleSize())));
1158+
} else if (Size) {
1159+
// Clear any stack metadata that may have previously been stored in
1160+
// the chunk data.
1161+
memset(TaggedPtr, 0, archMemoryTagGranuleSize());
1162+
}
1163+
} else {
1164+
const uptr OddEvenMask =
1165+
computeOddEvenMaskForPointerMaybe(Options, BlockUptr, ClassId);
1166+
TaggedPtr = prepareTaggedChunk(Ptr, Size, OddEvenMask, BlockEnd);
1167+
}
1168+
storePrimaryAllocationStackMaybe(Options, Ptr);
1169+
} else {
1170+
Block = addHeaderTag(Block);
1171+
Ptr = addHeaderTag(Ptr);
1172+
storeTags(reinterpret_cast<uptr>(Block), reinterpret_cast<uptr>(Ptr));
1173+
storeSecondaryAllocationStackMaybe(Options, Ptr, Size);
1174+
}
1175+
1176+
Chunk::UnpackedHeader Header = {};
1177+
1178+
const uptr DefaultAlignedPtr =
1179+
reinterpret_cast<uptr>(Block) + Chunk::getHeaderSize();
1180+
if (UNLIKELY(DefaultAlignedPtr != UserPtr)) {
1181+
const uptr Offset = UserPtr - DefaultAlignedPtr;
1182+
DCHECK_GE(Offset, 2 * sizeof(u32));
1183+
// The BlockMarker has no security purpose, but is specifically meant for
1184+
// the chunk iteration function that can be used in debugging situations.
1185+
// It is the only situation where we have to locate the start of a chunk
1186+
// based on its block address.
1187+
reinterpret_cast<u32 *>(Block)[0] = BlockMarker;
1188+
reinterpret_cast<u32 *>(Block)[1] = static_cast<u32>(Offset);
1189+
Header.Offset = (Offset >> MinAlignmentLog) & Chunk::OffsetMask;
1190+
}
1191+
1192+
Header.ClassId = ClassId & Chunk::ClassIdMask;
1193+
Header.State = Chunk::State::Allocated;
1194+
Header.OriginOrWasZeroed = Origin & Chunk::OriginMask;
1195+
Header.SizeOrUnusedBytes = SizeOrUnusedBytes & Chunk::SizeOrUnusedBytesMask;
1196+
Chunk::storeHeader(Cookie, Ptr, &Header);
1197+
1198+
return TaggedPtr;
1199+
}
1200+
11511201
void quarantineOrDeallocateChunk(const Options &Options, void *TaggedPtr,
11521202
Chunk::UnpackedHeader *Header,
11531203
uptr Size) NO_THREAD_SAFETY_ANALYSIS {

0 commit comments

Comments
 (0)