Skip to content

Commit 72bff78

Browse files
scottconstabletstellar
authored andcommitted
[X86] Add an Unoptimized Load Value Injection (LVI) Load Hardening Pass
@nikic raised an issue on D75936 that the added complexity to the O0 pipeline was causing noticeable slowdowns for `-O0` builds. This patch addresses the issue by adding a pass with equal security properties, but without any optimizations (and more importantly, without the need for expensive analysis dependencies). Reviewers: nikic, craig.topper, mattdr Reviewed By: craig.topper, mattdr Differential Revision: https://reviews.llvm.org/D80964
1 parent 8aa8aba commit 72bff78

File tree

5 files changed

+129
-5
lines changed

5 files changed

+129
-5
lines changed

llvm/lib/Target/X86/X86.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
134134
X86RegisterBankInfo &);
135135

136136
FunctionPass *createX86LoadValueInjectionLoadHardeningPass();
137+
FunctionPass *createX86LoadValueInjectionLoadHardeningUnoptimizedPass();
137138
FunctionPass *createX86LoadValueInjectionRetHardeningPass();
138139
FunctionPass *createX86SpeculativeLoadHardeningPass();
139140

@@ -150,6 +151,7 @@ void initializeX86DomainReassignmentPass(PassRegistry &);
150151
void initializeX86ExecutionDomainFixPass(PassRegistry &);
151152
void initializeX86ExpandPseudoPass(PassRegistry &);
152153
void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
154+
void initializeX86LoadValueInjectionLoadHardeningUnoptimizedPassPass(PassRegistry &);
153155
void initializeX86LoadValueInjectionLoadHardeningPassPass(PassRegistry &);
154156
void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &);
155157
void initializeX86OptimizeLEAPassPass(PassRegistry &);

llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -822,3 +822,79 @@ INITIALIZE_PASS_END(X86LoadValueInjectionLoadHardeningPass, PASS_KEY,
822822
FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningPass() {
823823
return new X86LoadValueInjectionLoadHardeningPass();
824824
}
825+
826+
namespace {
827+
828+
/// The `X86LoadValueInjectionLoadHardeningPass` above depends on expensive
829+
/// analysis passes that add complexity to the pipeline. This complexity
830+
/// can cause noticable overhead when no optimizations are enabled, i.e., -O0.
831+
/// The purpose of `X86LoadValueInjectionLoadHardeningUnoptimizedPass` is to
832+
/// provide the same security as the optimized pass, but without adding
833+
/// unnecessary complexity to the LLVM pipeline.
834+
///
835+
/// The behavior of this pass is simply to insert an LFENCE after every load
836+
/// instruction.
837+
class X86LoadValueInjectionLoadHardeningUnoptimizedPass
838+
: public MachineFunctionPass {
839+
public:
840+
X86LoadValueInjectionLoadHardeningUnoptimizedPass()
841+
: MachineFunctionPass(ID) {}
842+
843+
StringRef getPassName() const override {
844+
return "X86 Load Value Injection (LVI) Load Hardening (Unoptimized)";
845+
}
846+
bool runOnMachineFunction(MachineFunction &MF) override;
847+
static char ID;
848+
};
849+
850+
} // end anonymous namespace
851+
852+
char X86LoadValueInjectionLoadHardeningUnoptimizedPass::ID = 0;
853+
854+
bool X86LoadValueInjectionLoadHardeningUnoptimizedPass::runOnMachineFunction(
855+
MachineFunction &MF) {
856+
LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
857+
<< " *****\n");
858+
const X86Subtarget *STI = &MF.getSubtarget<X86Subtarget>();
859+
if (!STI->useLVILoadHardening())
860+
return false;
861+
862+
// FIXME: support 32-bit
863+
if (!STI->is64Bit())
864+
report_fatal_error("LVI load hardening is only supported on 64-bit", false);
865+
866+
// Don't skip functions with the "optnone" attr but participate in opt-bisect.
867+
const Function &F = MF.getFunction();
868+
if (!F.hasOptNone() && skipFunction(F))
869+
return false;
870+
871+
bool Modified = false;
872+
++NumFunctionsConsidered;
873+
874+
const TargetInstrInfo *TII = STI->getInstrInfo();
875+
for (auto &MBB : MF) {
876+
for (auto &MI : MBB) {
877+
if (!MI.mayLoad() || MI.getOpcode() == X86::LFENCE ||
878+
MI.getOpcode() == X86::MFENCE)
879+
continue;
880+
881+
MachineBasicBlock::iterator InsertionPt =
882+
MI.getNextNode() ? MI.getNextNode() : MBB.end();
883+
BuildMI(MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE));
884+
++NumFences;
885+
Modified = true;
886+
}
887+
}
888+
889+
if (Modified)
890+
++NumFunctionsMitigated;
891+
892+
return Modified;
893+
}
894+
895+
INITIALIZE_PASS(X86LoadValueInjectionLoadHardeningUnoptimizedPass, PASS_KEY,
896+
"X86 LVI load hardening", false, false)
897+
898+
FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningUnoptimizedPass() {
899+
return new X86LoadValueInjectionLoadHardeningUnoptimizedPass();
900+
}

llvm/lib/Target/X86/X86TargetMachine.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,10 @@ void X86PassConfig::addMachineSSAOptimization() {
498498

499499
void X86PassConfig::addPostRegAlloc() {
500500
addPass(createX86FloatingPointStackifierPass());
501-
addPass(createX86LoadValueInjectionLoadHardeningPass());
501+
if (getOptLevel() != CodeGenOpt::None)
502+
addPass(createX86LoadValueInjectionLoadHardeningPass());
503+
else
504+
addPass(createX86LoadValueInjectionLoadHardeningUnoptimizedPass());
502505
}
503506

504507
void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); }

llvm/test/CodeGen/X86/O0-pipeline.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,7 @@
5555
; CHECK-NEXT: Fast Register Allocator
5656
; CHECK-NEXT: Bundle Machine CFG Edges
5757
; CHECK-NEXT: X86 FP Stackifier
58-
; CHECK-NEXT: MachineDominator Tree Construction
59-
; CHECK-NEXT: Machine Natural Loop Construction
60-
; CHECK-NEXT: Machine Dominance Frontier Construction
61-
; CHECK-NEXT: X86 Load Value Injection (LVI) Load Hardening
58+
; CHECK-NEXT: X86 Load Value Injection (LVI) Load Hardening (Unoptimized)
6259
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
6360
; CHECK-NEXT: Machine Optimization Remark Emitter
6461
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization

llvm/test/CodeGen/X86/lvi-hardening-loads.ll

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-ALL
22
; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-cbranch < %s | FileCheck %s --check-prefix=X64
3+
; RUN: llc -O0 -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --check-prefix=X64-NOOPT
34

45
; Function Attrs: noinline nounwind optnone uwtable
56
define dso_local i32 @test(i32** %secret, i32 %secret_size) #0 {
@@ -24,6 +25,13 @@ entry:
2425
; X64-NEXT: movl $0, -{{[0-9]+}}(%rsp)
2526
; X64-NEXT: jmp .LBB0_1
2627

28+
; X64-NOOPT: # %bb.0: # %entry
29+
; X64-NOOPT-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
30+
; X64-NOOPT-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
31+
; X64-NOOPT-NEXT: movl $0, -{{[0-9]+}}(%rsp)
32+
; X64-NOOPT-NEXT: lfence
33+
; X64-NOOPT-NEXT: movl $0, -{{[0-9]+}}(%rsp)
34+
2735
for.cond: ; preds = %for.inc, %entry
2836
%0 = load i32, i32* %i, align 4
2937
%1 = load i32, i32* %secret_size.addr, align 4
@@ -38,6 +46,14 @@ for.cond: ; preds = %for.inc, %entry
3846
; X64-ALL-NEXT: lfence
3947
; X64-NEXT: jge .LBB0_5
4048

49+
; X64-NOOPT: .LBB0_1: # %for.cond
50+
; X64-NOOPT-NEXT: # =>This Inner Loop Header: Depth=1
51+
; X64-NOOPT-NEXT: movl -{{[0-9]+}}(%rsp), %eax
52+
; X64-NOOPT-NEXT: lfence
53+
; X64-NOOPT-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax
54+
; X64-NOOPT-NEXT: lfence
55+
; X64-NOOPT-NEXT: jge .LBB0_6
56+
4157
for.body: ; preds = %for.cond
4258
%2 = load i32, i32* %i, align 4
4359
%rem = srem i32 %2, 2
@@ -55,6 +71,16 @@ for.body: ; preds = %for.cond
5571
; X64-NEXT: cmpl %ecx, %eax
5672
; X64-NEXT: jne .LBB0_4
5773

74+
; X64-NOOPT: # %bb.2: # %for.body
75+
; X64-NOOPT-NEXT: # in Loop: Header=BB0_1 Depth=1
76+
; X64-NOOPT-NEXT: movl -{{[0-9]+}}(%rsp), %eax
77+
; X64-NOOPT-NEXT: lfence
78+
; X64-NOOPT-NEXT: cltd
79+
; X64-NOOPT-NEXT: movl $2, %ecx
80+
; X64-NOOPT-NEXT: idivl %ecx
81+
; X64-NOOPT-NEXT: cmpl $0, %edx
82+
; X64-NOOPT-NEXT: jne .LBB0_4
83+
5884
if.then: ; preds = %for.body
5985
%3 = load i32**, i32*** %secret.addr, align 8
6086
%4 = load i32, i32* %ret_val, align 4
@@ -77,6 +103,18 @@ if.then: ; preds = %for.body
77103
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
78104
; X64-NEXT: jmp .LBB0_4
79105

106+
; X64-NOOPT: # %bb.3: # %if.then
107+
; X64-NOOPT-NEXT: # in Loop: Header=BB0_1 Depth=1
108+
; X64-NOOPT-NEXT: movq -{{[0-9]+}}(%rsp), %rax
109+
; X64-NOOPT-NEXT: lfence
110+
; X64-NOOPT-NEXT: movslq -{{[0-9]+}}(%rsp), %rcx
111+
; X64-NOOPT-NEXT: lfence
112+
; X64-NOOPT-NEXT: movq (%rax,%rcx,8), %rax
113+
; X64-NOOPT-NEXT: lfence
114+
; X64-NOOPT-NEXT: movl (%rax), %eax
115+
; X64-NOOPT-NEXT: lfence
116+
; X64-NOOPT-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
117+
80118
if.end: ; preds = %if.then, %for.body
81119
br label %for.inc
82120

@@ -86,6 +124,14 @@ for.inc: ; preds = %if.end
86124
store i32 %inc, i32* %i, align 4
87125
br label %for.cond
88126

127+
; X64-NOOPT: .LBB0_5: # %for.inc
128+
; X64-NOOPT-NEXT: # in Loop: Header=BB0_1 Depth=1
129+
; X64-NOOPT-NEXT: movl -{{[0-9]+}}(%rsp), %eax
130+
; X64-NOOPT-NEXT: lfence
131+
; X64-NOOPT-NEXT: addl $1, %eax
132+
; X64-NOOPT-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
133+
; X64-NOOPT-NEXT: jmp .LBB0_1
134+
89135
for.end: ; preds = %for.cond
90136
%8 = load i32, i32* %ret_val, align 4
91137
ret i32 %8

0 commit comments

Comments
 (0)