Skip to content

Commit dc120ae

Browse files
omjavaidtomtor
authored andcommitted
[CodeGen] Inline stack guard check on Windows (llvm#136290)
This patch optimizes the Windows security cookie check mechanism by moving the comparison inline and only calling __security_check_cookie when the check fails. This reduces the overhead of making a DLL call for every function return. Previously, we implemented this optimization through a machine pass (X86WinFixupBufferSecurityCheckPass) in PR llvm#95904 submitted by @mahesh-attarde. We have reverted that pass in favor of this new approach. Also we have abandoned the AArch64 specific implementation of same pass in PR llvm#121938 in favor of this more general solution. The old machine instruction pass approach: - Scanned the generated code to find __security_check_cookie calls - Modified these calls by splitting basic blocks - Added comparison logic and conditional branching - Required complex block management and live register computation The new approach: - Implements the same optimization during instruction selection - Directly emits the comparison and conditional branching - No need for post-processing or basic block manipulation - Disables optimization at -Oz. Thanks @tamaspetz, @efriedma-quic and @arsenm for their help.
1 parent 533349a commit dc120ae

File tree

12 files changed

+333
-313
lines changed

12 files changed

+333
-313
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 78 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3037,8 +3037,9 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
30373037

30383038
// First create the loads to the guard/stack slot for the comparison.
30393039
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3040-
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
3041-
EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
3040+
auto &DL = DAG.getDataLayout();
3041+
EVT PtrTy = TLI.getFrameIndexTy(DL);
3042+
EVT PtrMemTy = TLI.getPointerMemTy(DL, DL.getAllocaAddrSpace());
30423043

30433044
MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
30443045
int FI = MFI.getStackProtectorIndex();
@@ -3047,8 +3048,8 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
30473048
SDLoc dl = getCurSDLoc();
30483049
SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
30493050
const Module &M = *ParentBB->getParent()->getFunction().getParent();
3050-
Align Align =
3051-
DAG.getDataLayout().getPrefTypeAlign(PointerType::get(M.getContext(), 0));
3051+
Align Align = DL.getPrefTypeAlign(
3052+
PointerType::get(M.getContext(), DL.getAllocaAddrSpace()));
30523053

30533054
// Generate code to load the content of the guard slot.
30543055
SDValue GuardVal = DAG.getLoad(
@@ -3059,8 +3060,14 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
30593060
if (TLI.useStackGuardXorFP())
30603061
GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
30613062

3062-
// Retrieve guard check function, nullptr if instrumentation is inlined.
3063-
if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
3063+
// If we're using function-based instrumentation, call the guard check
3064+
// function
3065+
if (SPD.shouldEmitFunctionBasedCheckStackProtector()) {
3066+
// Get the guard check function from the target and verify it exists since
3067+
// we're using function-based instrumentation
3068+
const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M);
3069+
assert(GuardCheckFn && "Guard check function is null");
3070+
30643071
// The target provides a guard check function to validate the guard value.
30653072
// Generate a call to that function with the content of the guard slot as
30663073
// argument.
@@ -3101,10 +3108,9 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
31013108
}
31023109

31033110
// Perform the comparison via a getsetcc.
3104-
SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
3105-
*DAG.getContext(),
3106-
Guard.getValueType()),
3107-
Guard, GuardVal, ISD::SETNE);
3111+
SDValue Cmp = DAG.getSetCC(
3112+
dl, TLI.getSetCCResultType(DL, *DAG.getContext(), Guard.getValueType()),
3113+
Guard, GuardVal, ISD::SETNE);
31083114

31093115
// If the guard/stackslot do not equal, branch to failure MBB.
31103116
SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
@@ -3126,14 +3132,69 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
31263132
/// For a high level explanation of how this fits into the stack protector
31273133
/// generation see the comment on the declaration of class
31283134
/// StackProtectorDescriptor.
3129-
void
3130-
SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
3135+
void SelectionDAGBuilder::visitSPDescriptorFailure(
3136+
StackProtectorDescriptor &SPD) {
3137+
31313138
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3132-
TargetLowering::MakeLibCallOptions CallOptions;
3133-
CallOptions.setDiscardResult(true);
3134-
SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL,
3135-
MVT::isVoid, {}, CallOptions, getCurSDLoc())
3136-
.second;
3139+
MachineBasicBlock *ParentBB = SPD.getParentMBB();
3140+
const Module &M = *ParentBB->getParent()->getFunction().getParent();
3141+
SDValue Chain;
3142+
3143+
// For -Oz builds with a guard check function, we use function-based
3144+
// instrumentation. Otherwise, if we have a guard check function, we call it
3145+
// in the failure block.
3146+
auto *GuardCheckFn = TLI.getSSPStackGuardCheck(M);
3147+
if (GuardCheckFn && !SPD.shouldEmitFunctionBasedCheckStackProtector()) {
3148+
// First create the loads to the guard/stack slot for the comparison.
3149+
auto &DL = DAG.getDataLayout();
3150+
EVT PtrTy = TLI.getFrameIndexTy(DL);
3151+
EVT PtrMemTy = TLI.getPointerMemTy(DL, DL.getAllocaAddrSpace());
3152+
3153+
MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
3154+
int FI = MFI.getStackProtectorIndex();
3155+
3156+
SDLoc dl = getCurSDLoc();
3157+
SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
3158+
Align Align = DL.getPrefTypeAlign(
3159+
PointerType::get(M.getContext(), DL.getAllocaAddrSpace()));
3160+
3161+
// Generate code to load the content of the guard slot.
3162+
SDValue GuardVal = DAG.getLoad(
3163+
PtrMemTy, dl, DAG.getEntryNode(), StackSlotPtr,
3164+
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
3165+
MachineMemOperand::MOVolatile);
3166+
3167+
if (TLI.useStackGuardXorFP())
3168+
GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
3169+
3170+
// The target provides a guard check function to validate the guard value.
3171+
// Generate a call to that function with the content of the guard slot as
3172+
// argument.
3173+
FunctionType *FnTy = GuardCheckFn->getFunctionType();
3174+
assert(FnTy->getNumParams() == 1 && "Invalid function signature");
3175+
3176+
TargetLowering::ArgListTy Args;
3177+
TargetLowering::ArgListEntry Entry;
3178+
Entry.Node = GuardVal;
3179+
Entry.Ty = FnTy->getParamType(0);
3180+
if (GuardCheckFn->hasParamAttribute(0, Attribute::AttrKind::InReg))
3181+
Entry.IsInReg = true;
3182+
Args.push_back(Entry);
3183+
3184+
TargetLowering::CallLoweringInfo CLI(DAG);
3185+
CLI.setDebugLoc(getCurSDLoc())
3186+
.setChain(DAG.getEntryNode())
3187+
.setCallee(GuardCheckFn->getCallingConv(), FnTy->getReturnType(),
3188+
getValue(GuardCheckFn), std::move(Args));
3189+
3190+
Chain = TLI.LowerCallTo(CLI).second;
3191+
} else {
3192+
TargetLowering::MakeLibCallOptions CallOptions;
3193+
CallOptions.setDiscardResult(true);
3194+
Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
3195+
{}, CallOptions, getCurSDLoc())
3196+
.second;
3197+
}
31373198

31383199
// Emit a trap instruction if we are required to do so.
31393200
const TargetOptions &TargetOpts = DAG.getTarget().Options;

llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1877,7 +1877,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
18771877

18781878
if (SP->shouldEmitSDCheck(*LLVMBB)) {
18791879
bool FunctionBasedInstrumentation =
1880-
TLI->getSSPStackGuardCheck(*Fn.getParent());
1880+
TLI->getSSPStackGuardCheck(*Fn.getParent()) && Fn.hasMinSize();
18811881
SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->getMBB(LLVMBB),
18821882
FunctionBasedInstrumentation);
18831883
}
@@ -1950,8 +1950,7 @@ SelectionDAGISel::FinishBasicBlock() {
19501950

19511951
// Add load and check to the basicblock.
19521952
FuncInfo->MBB = ParentMBB;
1953-
FuncInfo->InsertPt =
1954-
findSplitPointForStackProtector(ParentMBB, *TII);
1953+
FuncInfo->InsertPt = findSplitPointForStackProtector(ParentMBB, *TII);
19551954
SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
19561955
CurDAG->setRoot(SDB->getRoot());
19571956
SDB->clear();
@@ -1973,8 +1972,7 @@ SelectionDAGISel::FinishBasicBlock() {
19731972
findSplitPointForStackProtector(ParentMBB, *TII);
19741973

19751974
// Splice the terminator of ParentMBB into SuccessMBB.
1976-
SuccessMBB->splice(SuccessMBB->end(), ParentMBB,
1977-
SplitPoint,
1975+
SuccessMBB->splice(SuccessMBB->end(), ParentMBB, SplitPoint,
19781976
ParentMBB->end());
19791977

19801978
// Add compare/jump on neq/jump to the parent BB.

llvm/lib/Target/X86/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ set(sources
8585
X86VZeroUpper.cpp
8686
X86WinEHState.cpp
8787
X86WinEHUnwindV2.cpp
88-
X86WinFixupBufferSecurityCheck.cpp
8988
X86InsertWait.cpp
9089
GISel/X86CallLowering.cpp
9190
GISel/X86InstructionSelector.cpp

llvm/lib/Target/X86/X86.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,6 @@ FunctionPass *createX86OptimizeLEAs();
7373
/// Return a pass that transforms setcc + movzx pairs into xor + setcc.
7474
FunctionPass *createX86FixupSetCC();
7575

76-
/// Return a pass that transform inline buffer security check into seperate bb
77-
FunctionPass *createX86WinFixupBufferSecurityCheckPass();
78-
7976
/// Return a pass that avoids creating store forward block issues in the hardware.
8077
FunctionPass *createX86AvoidStoreForwardingBlocks();
8178

@@ -195,7 +192,6 @@ void initializeX86ExpandPseudoPass(PassRegistry &);
195192
void initializeX86FastPreTileConfigPass(PassRegistry &);
196193
void initializeX86FastTileConfigPass(PassRegistry &);
197194
void initializeX86FixupSetCCPassPass(PassRegistry &);
198-
void initializeX86WinFixupBufferSecurityCheckPassPass(PassRegistry &);
199195
void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
200196
void initializeX86LoadValueInjectionLoadHardeningPassPass(PassRegistry &);
201197
void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &);

llvm/lib/Target/X86/X86TargetMachine.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,6 @@ bool X86PassConfig::addPreISel() {
554554
void X86PassConfig::addPreRegAlloc() {
555555
if (getOptLevel() != CodeGenOptLevel::None) {
556556
addPass(&LiveRangeShrinkID);
557-
addPass(createX86WinFixupBufferSecurityCheckPass());
558557
addPass(createX86FixupSetCC());
559558
addPass(createX86OptimizeLEAs());
560559
addPass(createX86CallFrameOptimization());

0 commit comments

Comments
 (0)