Skip to content

Commit e3178e9

Browse files
committed
[scudo] Split the code path of memory tagging out from allocate()
Memory tagging is a flag which requires system reboot to enable. Which means, the code paths with memory tagging enabled will never be executed if it's set to off. However, we only mark those paths with UNLIKELY() which doesn't annotate the expectation properly here. As a result, the assembly code always interleaves instructions between w/ and w/o memory tagging enabled. The direct impact is the I-cache may always cache many unused instructions. This change explictily splits the paths into different code blocks. This slightly introduces very few duplicated codes but it creates two independent execution paths and will improve the cache locality.
1 parent a1c2928 commit e3178e9

File tree

1 file changed

+175
-125
lines changed

1 file changed

+175
-125
lines changed

compiler-rt/lib/scudo/standalone/combined.h

Lines changed: 175 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -401,133 +401,18 @@ class Allocator {
401401
reportOutOfMemory(NeededSize);
402402
}
403403

404-
const uptr BlockUptr = reinterpret_cast<uptr>(Block);
405-
const uptr UnalignedUserPtr = BlockUptr + Chunk::getHeaderSize();
406-
const uptr UserPtr = roundUp(UnalignedUserPtr, Alignment);
407-
408-
void *Ptr = reinterpret_cast<void *>(UserPtr);
409-
void *TaggedPtr = Ptr;
410-
if (LIKELY(ClassId)) {
411-
// We only need to zero or tag the contents for Primary backed
412-
// allocations. We only set tags for primary allocations in order to avoid
413-
// faulting potentially large numbers of pages for large secondary
414-
// allocations. We assume that guard pages are enough to protect these
415-
// allocations.
416-
//
417-
// FIXME: When the kernel provides a way to set the background tag of a
418-
// mapping, we should be able to tag secondary allocations as well.
419-
//
420-
// When memory tagging is enabled, zeroing the contents is done as part of
421-
// setting the tag.
422-
if (UNLIKELY(useMemoryTagging<Config>(Options))) {
423-
uptr PrevUserPtr;
424-
Chunk::UnpackedHeader Header;
425-
const uptr BlockSize = PrimaryT::getSizeByClassId(ClassId);
426-
const uptr BlockEnd = BlockUptr + BlockSize;
427-
// If possible, try to reuse the UAF tag that was set by deallocate().
428-
// For simplicity, only reuse tags if we have the same start address as
429-
// the previous allocation. This handles the majority of cases since
430-
// most allocations will not be more aligned than the minimum alignment.
431-
//
432-
// We need to handle situations involving reclaimed chunks, and retag
433-
// the reclaimed portions if necessary. In the case where the chunk is
434-
// fully reclaimed, the chunk's header will be zero, which will trigger
435-
// the code path for new mappings and invalid chunks that prepares the
436-
// chunk from scratch. There are three possibilities for partial
437-
// reclaiming:
438-
//
439-
// (1) Header was reclaimed, data was partially reclaimed.
440-
// (2) Header was not reclaimed, all data was reclaimed (e.g. because
441-
// data started on a page boundary).
442-
// (3) Header was not reclaimed, data was partially reclaimed.
443-
//
444-
// Case (1) will be handled in the same way as for full reclaiming,
445-
// since the header will be zero.
446-
//
447-
// We can detect case (2) by loading the tag from the start
448-
// of the chunk. If it is zero, it means that either all data was
449-
// reclaimed (since we never use zero as the chunk tag), or that the
450-
// previous allocation was of size zero. Either way, we need to prepare
451-
// a new chunk from scratch.
452-
//
453-
// We can detect case (3) by moving to the next page (if covered by the
454-
// chunk) and loading the tag of its first granule. If it is zero, it
455-
// means that all following pages may need to be retagged. On the other
456-
// hand, if it is nonzero, we can assume that all following pages are
457-
// still tagged, according to the logic that if any of the pages
458-
// following the next page were reclaimed, the next page would have been
459-
// reclaimed as well.
460-
uptr TaggedUserPtr;
461-
if (getChunkFromBlock(BlockUptr, &PrevUserPtr, &Header) &&
462-
PrevUserPtr == UserPtr &&
463-
(TaggedUserPtr = loadTag(UserPtr)) != UserPtr) {
464-
uptr PrevEnd = TaggedUserPtr + Header.SizeOrUnusedBytes;
465-
const uptr NextPage = roundUp(TaggedUserPtr, getPageSizeCached());
466-
if (NextPage < PrevEnd && loadTag(NextPage) != NextPage)
467-
PrevEnd = NextPage;
468-
TaggedPtr = reinterpret_cast<void *>(TaggedUserPtr);
469-
resizeTaggedChunk(PrevEnd, TaggedUserPtr + Size, Size, BlockEnd);
470-
if (UNLIKELY(FillContents != NoFill && !Header.OriginOrWasZeroed)) {
471-
// If an allocation needs to be zeroed (i.e. calloc) we can normally
472-
// avoid zeroing the memory now since we can rely on memory having
473-
// been zeroed on free, as this is normally done while setting the
474-
// UAF tag. But if tagging was disabled per-thread when the memory
475-
// was freed, it would not have been retagged and thus zeroed, and
476-
// therefore it needs to be zeroed now.
477-
memset(TaggedPtr, 0,
478-
Min(Size, roundUp(PrevEnd - TaggedUserPtr,
479-
archMemoryTagGranuleSize())));
480-
} else if (Size) {
481-
// Clear any stack metadata that may have previously been stored in
482-
// the chunk data.
483-
memset(TaggedPtr, 0, archMemoryTagGranuleSize());
484-
}
485-
} else {
486-
const uptr OddEvenMask =
487-
computeOddEvenMaskForPointerMaybe(Options, BlockUptr, ClassId);
488-
TaggedPtr = prepareTaggedChunk(Ptr, Size, OddEvenMask, BlockEnd);
489-
}
490-
storePrimaryAllocationStackMaybe(Options, Ptr);
491-
} else {
492-
Block = addHeaderTag(Block);
493-
Ptr = addHeaderTag(Ptr);
494-
if (UNLIKELY(FillContents != NoFill)) {
495-
// This condition is not necessarily unlikely, but since memset is
496-
// costly, we might as well mark it as such.
497-
memset(Block, FillContents == ZeroFill ? 0 : PatternFillByte,
498-
PrimaryT::getSizeByClassId(ClassId));
499-
}
500-
}
404+
const uptr UserPtr = roundUp(
405+
reinterpret_cast<uptr>(Block) + Chunk::getHeaderSize(), Alignment);
406+
const uptr SizeOrUnusedBytes =
407+
ClassId ? Size : SecondaryBlockEnd - (UserPtr + Size);
408+
409+
if (LIKELY(!useMemoryTagging<Config>(Options))) {
410+
return initChunk(ClassId, Origin, Block, UserPtr, SizeOrUnusedBytes,
411+
FillContents);
501412
} else {
502-
Block = addHeaderTag(Block);
503-
Ptr = addHeaderTag(Ptr);
504-
if (UNLIKELY(useMemoryTagging<Config>(Options))) {
505-
storeTags(reinterpret_cast<uptr>(Block), reinterpret_cast<uptr>(Ptr));
506-
storeSecondaryAllocationStackMaybe(Options, Ptr, Size);
507-
}
508-
}
509-
510-
Chunk::UnpackedHeader Header = {};
511-
if (UNLIKELY(UnalignedUserPtr != UserPtr)) {
512-
const uptr Offset = UserPtr - UnalignedUserPtr;
513-
DCHECK_GE(Offset, 2 * sizeof(u32));
514-
// The BlockMarker has no security purpose, but is specifically meant for
515-
// the chunk iteration function that can be used in debugging situations.
516-
// It is the only situation where we have to locate the start of a chunk
517-
// based on its block address.
518-
reinterpret_cast<u32 *>(Block)[0] = BlockMarker;
519-
reinterpret_cast<u32 *>(Block)[1] = static_cast<u32>(Offset);
520-
Header.Offset = (Offset >> MinAlignmentLog) & Chunk::OffsetMask;
413+
return initChunkWithMemoryTagging(ClassId, Origin, Block, UserPtr, Size,
414+
SizeOrUnusedBytes, FillContents);
521415
}
522-
Header.ClassId = ClassId & Chunk::ClassIdMask;
523-
Header.State = Chunk::State::Allocated;
524-
Header.OriginOrWasZeroed = Origin & Chunk::OriginMask;
525-
Header.SizeOrUnusedBytes =
526-
(ClassId ? Size : SecondaryBlockEnd - (UserPtr + Size)) &
527-
Chunk::SizeOrUnusedBytesMask;
528-
Chunk::storeHeader(Cookie, Ptr, &Header);
529-
530-
return TaggedPtr;
531416
}
532417

533418
NOINLINE void deallocate(void *Ptr, Chunk::Origin Origin, uptr DeleteSize = 0,
@@ -1143,6 +1028,171 @@ class Allocator {
11431028
reinterpret_cast<uptr>(Ptr) - SizeOrUnusedBytes;
11441029
}
11451030

1031+
ALWAYS_INLINE void *initChunk(const uptr ClassId, const Chunk::Origin Origin,
1032+
void *Block, const uptr UserPtr,
1033+
const uptr SizeOrUnusedBytes,
1034+
const FillContentsMode FillContents) {
1035+
Block = addHeaderTag(Block);
1036+
// Only do content fill when it's from primary allocator because secondary
1037+
// allocator has filled the content.
1038+
if (ClassId != 0 && UNLIKELY(FillContents != NoFill)) {
1039+
// This condition is not necessarily unlikely, but since memset is
1040+
// costly, we might as well mark it as such.
1041+
memset(Block, FillContents == ZeroFill ? 0 : PatternFillByte,
1042+
PrimaryT::getSizeByClassId(ClassId));
1043+
}
1044+
1045+
Chunk::UnpackedHeader Header = {};
1046+
1047+
const uptr DefaultAlignedPtr =
1048+
reinterpret_cast<uptr>(Block) + Chunk::getHeaderSize();
1049+
if (UNLIKELY(DefaultAlignedPtr != UserPtr)) {
1050+
const uptr Offset = UserPtr - DefaultAlignedPtr;
1051+
DCHECK_GE(Offset, 2 * sizeof(u32));
1052+
// The BlockMarker has no security purpose, but is specifically meant for
1053+
// the chunk iteration function that can be used in debugging situations.
1054+
// It is the only situation where we have to locate the start of a chunk
1055+
// based on its block address.
1056+
reinterpret_cast<u32 *>(Block)[0] = BlockMarker;
1057+
reinterpret_cast<u32 *>(Block)[1] = static_cast<u32>(Offset);
1058+
Header.Offset = (Offset >> MinAlignmentLog) & Chunk::OffsetMask;
1059+
}
1060+
1061+
Header.ClassId = ClassId & Chunk::ClassIdMask;
1062+
Header.State = Chunk::State::Allocated;
1063+
Header.OriginOrWasZeroed = Origin & Chunk::OriginMask;
1064+
Header.SizeOrUnusedBytes = SizeOrUnusedBytes & Chunk::SizeOrUnusedBytesMask;
1065+
Chunk::storeHeader(Cookie, reinterpret_cast<void *>(addHeaderTag(UserPtr)),
1066+
&Header);
1067+
1068+
return reinterpret_cast<void *>(UserPtr);
1069+
}
1070+
1071+
NOINLINE void *
1072+
initChunkWithMemoryTagging(const uptr ClassId, const Chunk::Origin Origin,
1073+
void *Block, const uptr UserPtr, const uptr Size,
1074+
const uptr SizeOrUnusedBytes,
1075+
const FillContentsMode FillContents) {
1076+
const Options Options = Primary.Options.load();
1077+
DCHECK(useMemoryTagging<Config>(Options));
1078+
1079+
void *Ptr = reinterpret_cast<void *>(UserPtr);
1080+
void *TaggedPtr = Ptr;
1081+
1082+
if (LIKELY(ClassId)) {
1083+
// We only need to zero or tag the contents for Primary backed
1084+
// allocations. We only set tags for primary allocations in order to avoid
1085+
// faulting potentially large numbers of pages for large secondary
1086+
// allocations. We assume that guard pages are enough to protect these
1087+
// allocations.
1088+
//
1089+
// FIXME: When the kernel provides a way to set the background tag of a
1090+
// mapping, we should be able to tag secondary allocations as well.
1091+
//
1092+
// When memory tagging is enabled, zeroing the contents is done as part of
1093+
// setting the tag.
1094+
1095+
uptr PrevUserPtr;
1096+
Chunk::UnpackedHeader Header;
1097+
const uptr BlockSize = PrimaryT::getSizeByClassId(ClassId);
1098+
const uptr BlockUptr = reinterpret_cast<uptr>(Block);
1099+
const uptr BlockEnd = BlockUptr + BlockSize;
1100+
// If possible, try to reuse the UAF tag that was set by deallocate().
1101+
// For simplicity, only reuse tags if we have the same start address as
1102+
// the previous allocation. This handles the majority of cases since
1103+
// most allocations will not be more aligned than the minimum alignment.
1104+
//
1105+
// We need to handle situations involving reclaimed chunks, and retag
1106+
// the reclaimed portions if necessary. In the case where the chunk is
1107+
// fully reclaimed, the chunk's header will be zero, which will trigger
1108+
// the code path for new mappings and invalid chunks that prepares the
1109+
// chunk from scratch. There are three possibilities for partial
1110+
// reclaiming:
1111+
//
1112+
// (1) Header was reclaimed, data was partially reclaimed.
1113+
// (2) Header was not reclaimed, all data was reclaimed (e.g. because
1114+
// data started on a page boundary).
1115+
// (3) Header was not reclaimed, data was partially reclaimed.
1116+
//
1117+
// Case (1) will be handled in the same way as for full reclaiming,
1118+
// since the header will be zero.
1119+
//
1120+
// We can detect case (2) by loading the tag from the start
1121+
// of the chunk. If it is zero, it means that either all data was
1122+
// reclaimed (since we never use zero as the chunk tag), or that the
1123+
// previous allocation was of size zero. Either way, we need to prepare
1124+
// a new chunk from scratch.
1125+
//
1126+
// We can detect case (3) by moving to the next page (if covered by the
1127+
// chunk) and loading the tag of its first granule. If it is zero, it
1128+
// means that all following pages may need to be retagged. On the other
1129+
// hand, if it is nonzero, we can assume that all following pages are
1130+
// still tagged, according to the logic that if any of the pages
1131+
// following the next page were reclaimed, the next page would have been
1132+
// reclaimed as well.
1133+
uptr TaggedUserPtr;
1134+
if (getChunkFromBlock(BlockUptr, &PrevUserPtr, &Header) &&
1135+
PrevUserPtr == UserPtr &&
1136+
(TaggedUserPtr = loadTag(UserPtr)) != UserPtr) {
1137+
uptr PrevEnd = TaggedUserPtr + Header.SizeOrUnusedBytes;
1138+
const uptr NextPage = roundUp(TaggedUserPtr, getPageSizeCached());
1139+
if (NextPage < PrevEnd && loadTag(NextPage) != NextPage)
1140+
PrevEnd = NextPage;
1141+
TaggedPtr = reinterpret_cast<void *>(TaggedUserPtr);
1142+
resizeTaggedChunk(PrevEnd, TaggedUserPtr + Size, Size, BlockEnd);
1143+
if (UNLIKELY(FillContents != NoFill && !Header.OriginOrWasZeroed)) {
1144+
// If an allocation needs to be zeroed (i.e. calloc) we can normally
1145+
// avoid zeroing the memory now since we can rely on memory having
1146+
// been zeroed on free, as this is normally done while setting the
1147+
// UAF tag. But if tagging was disabled per-thread when the memory
1148+
// was freed, it would not have been retagged and thus zeroed, and
1149+
// therefore it needs to be zeroed now.
1150+
memset(TaggedPtr, 0,
1151+
Min(Size, roundUp(PrevEnd - TaggedUserPtr,
1152+
archMemoryTagGranuleSize())));
1153+
} else if (Size) {
1154+
// Clear any stack metadata that may have previously been stored in
1155+
// the chunk data.
1156+
memset(TaggedPtr, 0, archMemoryTagGranuleSize());
1157+
}
1158+
} else {
1159+
const uptr OddEvenMask =
1160+
computeOddEvenMaskForPointerMaybe(Options, BlockUptr, ClassId);
1161+
TaggedPtr = prepareTaggedChunk(Ptr, Size, OddEvenMask, BlockEnd);
1162+
}
1163+
storePrimaryAllocationStackMaybe(Options, Ptr);
1164+
} else {
1165+
Block = addHeaderTag(Block);
1166+
Ptr = addHeaderTag(Ptr);
1167+
storeTags(reinterpret_cast<uptr>(Block), reinterpret_cast<uptr>(Ptr));
1168+
storeSecondaryAllocationStackMaybe(Options, Ptr, Size);
1169+
}
1170+
1171+
Chunk::UnpackedHeader Header = {};
1172+
1173+
const uptr DefaultAlignedPtr =
1174+
reinterpret_cast<uptr>(Block) + Chunk::getHeaderSize();
1175+
if (UNLIKELY(DefaultAlignedPtr != UserPtr)) {
1176+
const uptr Offset = UserPtr - DefaultAlignedPtr;
1177+
DCHECK_GE(Offset, 2 * sizeof(u32));
1178+
// The BlockMarker has no security purpose, but is specifically meant for
1179+
// the chunk iteration function that can be used in debugging situations.
1180+
// It is the only situation where we have to locate the start of a chunk
1181+
// based on its block address.
1182+
reinterpret_cast<u32 *>(Block)[0] = BlockMarker;
1183+
reinterpret_cast<u32 *>(Block)[1] = static_cast<u32>(Offset);
1184+
Header.Offset = (Offset >> MinAlignmentLog) & Chunk::OffsetMask;
1185+
}
1186+
1187+
Header.ClassId = ClassId & Chunk::ClassIdMask;
1188+
Header.State = Chunk::State::Allocated;
1189+
Header.OriginOrWasZeroed = Origin & Chunk::OriginMask;
1190+
Header.SizeOrUnusedBytes = SizeOrUnusedBytes & Chunk::SizeOrUnusedBytesMask;
1191+
Chunk::storeHeader(Cookie, Ptr, &Header);
1192+
1193+
return TaggedPtr;
1194+
}
1195+
11461196
void quarantineOrDeallocateChunk(const Options &Options, void *TaggedPtr,
11471197
Chunk::UnpackedHeader *Header,
11481198
uptr Size) NO_THREAD_SAFETY_ANALYSIS {

0 commit comments

Comments
 (0)