Skip to content

Commit 73078ec

Browse files
committed
hwasan: Move memory access checks into small outlined functions on aarch64.
Each hwasan check requires emitting a small piece of code like this: https://clang.llvm.org/docs/HardwareAssistedAddressSanitizerDesign.html#memory-accesses The problem with this is that these code blocks typically bloat code size significantly. An obvious solution is to outline these blocks of code. In fact, this has already been implemented under the -hwasan-instrument-with-calls flag. However, as currently implemented this has a number of problems: - The functions use the same calling convention as regular C functions. This means that the backend must spill all temporary registers as required by the platform's C calling convention, even though the check only needs two registers on the hot path. - The functions take the address to be checked in a fixed register, which increases register pressure. Both of these factors can diminish the code size effect and increase the performance hit of -hwasan-instrument-with-calls. The solution that this patch implements is to involve the aarch64 backend in outlining the checks. An intrinsic and pseudo-instruction are created to represent a hwasan check. The pseudo-instruction is register allocated like any other instruction, and we allow the register allocator to select almost any register for the address to check. A particular combination of (register selection, type of check) triggers the creation in the backend of a function to handle the check for specifically that pair. The resulting functions are deduplicated by the linker. The pseudo-instruction (really the function) is specified to preserve all registers except for the registers that the AAPCS specifies may be clobbered by a call. To measure the code size and performance effect of this change, I took a number of measurements using Chromium for Android on aarch64, comparing a browser with inlined checks (the baseline) against a browser with outlined checks. Code size: Size of .text decreases from 243897420 to 171619972 bytes, or a 30% decrease. Performance: Using Chromium's blink_perf.layout microbenchmarks I measured a median performance regression of 6.24%. The fact that a perf/size tradeoff is evident here suggests that we might want to make the new behaviour conditional on -Os/-Oz. But for now I've enabled it unconditionally, my reasoning being that hwasan users typically expect a relatively large perf hit, and ~6% isn't really adding much. We may want to revisit this decision in the future, though. I also tried experimenting with varying the number of registers selectable by the hwasan check pseudo-instruction (which would result in fewer variants being created), on the hypothesis that creating fewer variants of the function would expose another perf/size tradeoff by reducing icache pressure from the check functions at the cost of register pressure. Although I did observe a code size increase with fewer registers, I did not observe a strong correlation between the number of registers and the performance of the resulting browser on the microbenchmarks, so I conclude that we might as well use ~all registers to get the maximum code size improvement. My results are below: Regs | .text size | Perf hit -----+------------+--------- ~all | 171619972 | 6.24% 16 | 171765192 | 7.03% 8 | 172917788 | 5.82% 4 | 177054016 | 6.89% Differential Revision: https://reviews.llvm.org/D56954 llvm-svn: 351920
1 parent 7cf2720 commit 73078ec

File tree

13 files changed

+435
-202
lines changed

13 files changed

+435
-202
lines changed

compiler-rt/lib/hwasan/hwasan_linux.cc

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -368,22 +368,27 @@ static AccessInfo GetAccessInfo(siginfo_t *info, ucontext_t *uc) {
368368
return AccessInfo{addr, size, is_store, !is_store, recover};
369369
}
370370

371-
static bool HwasanOnSIGTRAP(int signo, siginfo_t *info, ucontext_t *uc) {
372-
AccessInfo ai = GetAccessInfo(info, uc);
373-
if (!ai.is_store && !ai.is_load)
374-
return false;
375-
371+
static void HandleTagMismatch(AccessInfo ai, uptr pc, uptr frame,
372+
ucontext_t *uc) {
376373
InternalMmapVector<BufferedStackTrace> stack_buffer(1);
377374
BufferedStackTrace *stack = stack_buffer.data();
378375
stack->Reset();
379-
SignalContext sig{info, uc};
380-
GetStackTrace(stack, kStackTraceMax, StackTrace::GetNextInstructionPc(sig.pc),
381-
sig.bp, uc, common_flags()->fast_unwind_on_fatal);
376+
GetStackTrace(stack, kStackTraceMax, pc, frame, uc,
377+
common_flags()->fast_unwind_on_fatal);
382378

383379
++hwasan_report_count;
384380

385381
bool fatal = flags()->halt_on_error || !ai.recover;
386382
ReportTagMismatch(stack, ai.addr, ai.size, ai.is_store, fatal);
383+
}
384+
385+
static bool HwasanOnSIGTRAP(int signo, siginfo_t *info, ucontext_t *uc) {
386+
AccessInfo ai = GetAccessInfo(info, uc);
387+
if (!ai.is_store && !ai.is_load)
388+
return false;
389+
390+
SignalContext sig{info, uc};
391+
HandleTagMismatch(ai, StackTrace::GetNextInstructionPc(sig.pc), sig.bp, uc);
387392

388393
#if defined(__aarch64__)
389394
uc->uc_mcontext.pc += 4;
@@ -394,6 +399,19 @@ static bool HwasanOnSIGTRAP(int signo, siginfo_t *info, ucontext_t *uc) {
394399
return true;
395400
}
396401

402+
extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __hwasan_tag_mismatch(
403+
uptr addr, uptr access_info) {
404+
AccessInfo ai;
405+
ai.is_store = access_info & 0x10;
406+
ai.recover = false;
407+
ai.addr = addr;
408+
ai.size = 1 << (access_info & 0xf);
409+
410+
HandleTagMismatch(ai, (uptr)__builtin_return_address(0),
411+
(uptr)__builtin_frame_address(0), nullptr);
412+
__builtin_unreachable();
413+
}
414+
397415
static void OnStackUnwind(const SignalContext &sig, const void *,
398416
BufferedStackTrace *stack) {
399417
GetStackTrace(stack, kStackTraceMax, StackTrace::GetNextInstructionPc(sig.pc),

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,6 +1048,9 @@ def int_icall_branch_funnel : Intrinsic<[], [llvm_vararg_ty], []>;
10481048
def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
10491049
[IntrReadMem, IntrArgMemOnly]>;
10501050

1051+
def int_hwasan_check_memaccess :
1052+
Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrInaccessibleMemOnly]>;
1053+
10511054
// Xray intrinsics
10521055
//===----------------------------------------------------------------------===//
10531056
// Custom event logging for x-ray.

llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "llvm/ADT/Triple.h"
2929
#include "llvm/ADT/Twine.h"
3030
#include "llvm/BinaryFormat/COFF.h"
31+
#include "llvm/BinaryFormat/ELF.h"
3132
#include "llvm/CodeGen/AsmPrinter.h"
3233
#include "llvm/CodeGen/MachineBasicBlock.h"
3334
#include "llvm/CodeGen/MachineFunction.h"
@@ -43,6 +44,7 @@
4344
#include "llvm/MC/MCContext.h"
4445
#include "llvm/MC/MCInst.h"
4546
#include "llvm/MC/MCInstBuilder.h"
47+
#include "llvm/MC/MCSectionELF.h"
4648
#include "llvm/MC/MCStreamer.h"
4749
#include "llvm/MC/MCSymbol.h"
4850
#include "llvm/Support/Casting.h"
@@ -95,6 +97,10 @@ class AArch64AsmPrinter : public AsmPrinter {
9597
void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
9698
void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
9799

100+
std::map<std::pair<unsigned, uint32_t>, MCSymbol *> HwasanMemaccessSymbols;
101+
void LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI);
102+
void EmitHwasanMemaccessSymbols(Module &M);
103+
98104
void EmitSled(const MachineInstr &MI, SledKind Kind);
99105

100106
/// tblgen'erated driver function for lowering simple MI->MC
@@ -229,7 +235,109 @@ void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
229235
recordSled(CurSled, MI, Kind);
230236
}
231237

238+
void AArch64AsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
239+
unsigned Reg = MI.getOperand(0).getReg();
240+
uint32_t AccessInfo = MI.getOperand(1).getImm();
241+
MCSymbol *&Sym = HwasanMemaccessSymbols[{Reg, AccessInfo}];
242+
if (!Sym) {
243+
// FIXME: Make this work on non-ELF.
244+
if (!TM.getTargetTriple().isOSBinFormatELF())
245+
report_fatal_error("llvm.hwasan.check.memaccess only supported on ELF");
246+
247+
std::string SymName = "__hwasan_check_x" + utostr(Reg - AArch64::X0) + "_" +
248+
utostr(AccessInfo);
249+
Sym = OutContext.getOrCreateSymbol(SymName);
250+
}
251+
252+
EmitToStreamer(*OutStreamer,
253+
MCInstBuilder(AArch64::BL)
254+
.addExpr(MCSymbolRefExpr::create(Sym, OutContext)));
255+
}
256+
257+
void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
258+
if (HwasanMemaccessSymbols.empty())
259+
return;
260+
261+
const Triple &TT = TM.getTargetTriple();
262+
assert(TT.isOSBinFormatELF());
263+
std::unique_ptr<MCSubtargetInfo> STI(
264+
TM.getTarget().createMCSubtargetInfo(TT.str(), "", ""));
265+
266+
MCSymbol *HwasanTagMismatchSym =
267+
OutContext.getOrCreateSymbol("__hwasan_tag_mismatch");
268+
269+
for (auto &P : HwasanMemaccessSymbols) {
270+
unsigned Reg = P.first.first;
271+
uint32_t AccessInfo = P.first.second;
272+
MCSymbol *Sym = P.second;
273+
274+
OutStreamer->SwitchSection(OutContext.getELFSection(
275+
".text.hot", ELF::SHT_PROGBITS,
276+
ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
277+
Sym->getName()));
278+
279+
OutStreamer->EmitSymbolAttribute(Sym, MCSA_ELF_TypeFunction);
280+
OutStreamer->EmitSymbolAttribute(Sym, MCSA_Weak);
281+
OutStreamer->EmitSymbolAttribute(Sym, MCSA_Hidden);
282+
OutStreamer->EmitLabel(Sym);
283+
284+
OutStreamer->EmitInstruction(MCInstBuilder(AArch64::UBFMXri)
285+
.addReg(AArch64::X16)
286+
.addReg(Reg)
287+
.addImm(4)
288+
.addImm(55),
289+
*STI);
290+
OutStreamer->EmitInstruction(MCInstBuilder(AArch64::LDRBBroX)
291+
.addReg(AArch64::W16)
292+
.addReg(AArch64::X9)
293+
.addReg(AArch64::X16)
294+
.addImm(0)
295+
.addImm(0),
296+
*STI);
297+
OutStreamer->EmitInstruction(MCInstBuilder(AArch64::UBFMXri)
298+
.addReg(AArch64::X17)
299+
.addReg(Reg)
300+
.addImm(56)
301+
.addImm(63),
302+
*STI);
303+
OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWrs)
304+
.addReg(AArch64::WZR)
305+
.addReg(AArch64::W16)
306+
.addReg(AArch64::W17)
307+
.addImm(0),
308+
*STI);
309+
MCSymbol *HandleMismatchSym = OutContext.createTempSymbol();
310+
OutStreamer->EmitInstruction(
311+
MCInstBuilder(AArch64::Bcc)
312+
.addImm(AArch64CC::NE)
313+
.addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)),
314+
*STI);
315+
OutStreamer->EmitInstruction(
316+
MCInstBuilder(AArch64::RET).addReg(AArch64::LR), *STI);
317+
318+
OutStreamer->EmitLabel(HandleMismatchSym);
319+
if (Reg != AArch64::X0)
320+
OutStreamer->EmitInstruction(MCInstBuilder(AArch64::ORRXrs)
321+
.addReg(AArch64::X0)
322+
.addReg(AArch64::XZR)
323+
.addReg(Reg)
324+
.addImm(0),
325+
*STI);
326+
OutStreamer->EmitInstruction(MCInstBuilder(AArch64::MOVZXi)
327+
.addReg(AArch64::X1)
328+
.addImm(AccessInfo)
329+
.addImm(0),
330+
*STI);
331+
OutStreamer->EmitInstruction(
332+
MCInstBuilder(AArch64::B)
333+
.addExpr(MCSymbolRefExpr::create(HwasanTagMismatchSym, OutContext)),
334+
*STI);
335+
}
336+
}
337+
232338
void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
339+
EmitHwasanMemaccessSymbols(M);
340+
233341
const Triple &TT = TM.getTargetTriple();
234342
if (TT.isOSBinFormatMachO()) {
235343
// Funny Darwin hack: This flag tells the linker that no global symbols
@@ -883,6 +991,10 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
883991
LowerPATCHABLE_TAIL_CALL(*MI);
884992
return;
885993

994+
case AArch64::HWASAN_CHECK_MEMACCESS:
995+
LowerHWASAN_CHECK_MEMACCESS(*MI);
996+
return;
997+
886998
case AArch64::SEH_StackAlloc:
887999
TS->EmitARM64WinCFIAllocStack(MI->getOperand(0).getImm());
8881000
return;

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,13 @@ def MSRpstateImm4 : MSRpstateImm0_15;
763763
def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
764764
[(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
765765

766+
let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in {
767+
def HWASAN_CHECK_MEMACCESS : Pseudo<
768+
(outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
769+
[(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 imm:$accessinfo))]>,
770+
Sched<[]>;
771+
}
772+
766773
// The cycle counter PMC register is PMCCNTR_EL0.
767774
let Predicates = [HasPerfMon] in
768775
def : Pat<(readcyclecounter), (MRS 0xdce8)>;

llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,9 @@ const RegisterBank &AArch64RegisterBankInfo::getRegBankFromRegClass(
248248
case AArch64::GPR64spRegClassID:
249249
case AArch64::GPR64sponlyRegClassID:
250250
case AArch64::GPR64allRegClassID:
251+
case AArch64::GPR64noipRegClassID:
252+
case AArch64::GPR64common_and_GPR64noipRegClassID:
253+
case AArch64::GPR64noip_and_tcGPR64RegClassID:
251254
case AArch64::tcGPR64RegClassID:
252255
case AArch64::WSeqPairsClassRegClassID:
253256
case AArch64::XSeqPairsClassRegClassID:

llvm/lib/Target/AArch64/AArch64RegisterInfo.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,11 @@ def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X2
205205
// BTI-protected function.
206206
def rtcGPR64 : RegisterClass<"AArch64", [i64], 64, (add X16, X17)>;
207207

208+
// Register set that excludes registers that are reserved for procedure calls.
209+
// This is used for pseudo-instructions that are actually implemented using a
210+
// procedure call.
211+
def GPR64noip : RegisterClass<"AArch64", [i64], 64, (sub GPR64, X16, X17, LR)>;
212+
208213
// GPR register classes for post increment amount of vector load/store that
209214
// has alternate printing when Rm=31 and prints a constant immediate value
210215
// equal to the total number of bytes transferred.

0 commit comments

Comments
 (0)