-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86,SimplifyCFG] Support conditional faulting load or store only #132032
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This is to fix a bug when a target only support conditional faulting load, see test case hoist_store_without_cstore. Add option disable-cload-cstore to emulate single load/store case.
@llvm/pr-subscribers-llvm-transforms Author: Phoebe Wang (phoebewang) ChangesThis is to fix a bug when a target only support conditional faulting load, see test case hoist_store_without_cstore. Add option disable-cload-cstore to emulate single load/store case. Patch is 60.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132032.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 17c5fa6503f20..8a8baf9073f48 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -132,6 +132,11 @@ static cl::opt<unsigned> HoistLoadsStoresWithCondFaultingThreshold(
"to speculatively execute to eliminate conditional branch "
"(default = 6)"));
+static cl::opt<unsigned> DisableCloadCstore(
+ "disable-cload-cstore", cl::Hidden, cl::init(0),
+ cl::desc("Control to disable cond-faulting-load(1)/cond-faulting-store(2)"
+ "/all(3)"));
+
static cl::opt<unsigned>
HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
cl::init(20),
@@ -1682,22 +1687,22 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
static void hoistConditionalLoadsStores(
BranchInst *BI,
SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
- std::optional<bool> Invert) {
+ std::optional<bool> Invert, Instruction *Sel) {
auto &Context = BI->getParent()->getContext();
auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
auto *Cond = BI->getOperand(0);
// Construct the condition if needed.
BasicBlock *BB = BI->getParent();
- IRBuilder<> Builder(
- Invert.has_value() ? SpeculatedConditionalLoadsStores.back() : BI);
Value *Mask = nullptr;
Value *MaskFalse = nullptr;
Value *MaskTrue = nullptr;
if (Invert.has_value()) {
+ IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
Mask = Builder.CreateBitCast(
*Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
VCondTy);
} else {
+ IRBuilder<> Builder(BI);
MaskFalse = Builder.CreateBitCast(
Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
@@ -1723,13 +1728,20 @@ static void hoistConditionalLoadsStores(
PHINode *PN = nullptr;
Value *PassThru = nullptr;
if (Invert.has_value())
- for (User *U : I->users())
+ for (User *U : I->users()) {
if ((PN = dyn_cast<PHINode>(U))) {
PassThru = Builder.CreateBitCast(
PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
FixedVectorType::get(Ty, 1));
- break;
+ } else if (auto *Ins = cast<Instruction>(U);
+ Sel && Ins->getParent() == BB) {
+ // This happens when store or/and a speculative instruction between
+ // load and store were hoisted to the BB. Make sure the masked load
+ // inserted before its use.
+ // We assume there's one of such use.
+ Builder.SetInsertPoint(Ins);
}
+ }
MaskedLoadStore = Builder.CreateMaskedLoad(
FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
@@ -1769,10 +1781,10 @@ static bool isSafeCheapLoadStore(const Instruction *I,
const TargetTransformInfo &TTI) {
// Not handle volatile or atomic.
if (auto *L = dyn_cast<LoadInst>(I)) {
- if (!L->isSimple())
+ if (!L->isSimple() || (DisableCloadCstore & 1))
return false;
} else if (auto *S = dyn_cast<StoreInst>(I)) {
- if (!S->isSimple())
+ if (!S->isSimple() || (DisableCloadCstore & 2))
return false;
} else
return false;
@@ -3308,6 +3320,7 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
// If we get here, we can hoist the instruction and if-convert.
LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
+ Instruction *Sel = nullptr;
// Insert a select of the value of the speculated store.
if (SpeculatedStoreValue) {
IRBuilder<NoFolder> Builder(BI);
@@ -3318,6 +3331,7 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
std::swap(TrueV, FalseV);
Value *S = Builder.CreateSelect(
BrCond, TrueV, FalseV, "spec.store.select", BI);
+ Sel = cast<Instruction>(S);
SpeculatedStore->setOperand(0, S);
SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
SpeculatedStore->getDebugLoc());
@@ -3390,7 +3404,8 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
std::prev(ThenBB->end()));
if (!SpeculatedConditionalLoadsStores.empty())
- hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert);
+ hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
+ Sel);
// Insert selects and rewrite the PHI operands.
IRBuilder<NoFolder> Builder(BI);
@@ -8042,7 +8057,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (CanSpeculateConditionalLoadsStores()) {
hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
- std::nullopt);
+ std::nullopt, nullptr);
return requestResimplify();
}
}
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index 5c9058b482320..eacabde9f3d61 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -1,24 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefixes=CHECK,LOADSTORE
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -disable-cload-cstore=0 -S | FileCheck %s --check-prefixes=CHECK,LOADSTORE
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -disable-cload-cstore=1 -S | FileCheck %s --check-prefixes=CHECK,NONE,STOREONLY
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -disable-cload-cstore=2 -S | FileCheck %s --check-prefixes=CHECK,NONE,LOADONLY
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -disable-cload-cstore=3 -S | FileCheck %s --check-prefixes=NONE,NONEONLY
;; Basic case: check masked.load/store is generated for i16/i32/i64.
define void @basic(i1 %cond, ptr %b, ptr %p, ptr %q) {
-; CHECK-LABEL: @basic(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
-; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
-; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
-; CHECK-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison)
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
-; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64>
-; CHECK-NEXT: call void @llvm.masked.store.v1i64.p0(<1 x i64> [[TMP9]], ptr [[Q]], i32 8, <1 x i1> [[TMP0]])
-; CHECK-NEXT: ret void
+; LOADSTORE-LABEL: @basic(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
+; LOADSTORE-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
+; LOADSTORE-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
+; LOADSTORE-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; LOADSTORE-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison)
+; LOADSTORE-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
+; LOADSTORE-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i64.p0(<1 x i64> [[TMP9]], ptr [[Q]], i32 8, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: ret void
+;
+; NONE-LABEL: @basic(
+; NONE-NEXT: entry:
+; NONE-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_END:%.*]]
+; NONE: if.true:
+; NONE-NEXT: [[TMP0:%.*]] = load i16, ptr [[P:%.*]], align 2
+; NONE-NEXT: [[TMP1:%.*]] = load i32, ptr [[Q:%.*]], align 4
+; NONE-NEXT: [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 8
+; NONE-NEXT: store i16 [[TMP0]], ptr [[B]], align 2
+; NONE-NEXT: store i32 [[TMP1]], ptr [[P]], align 4
+; NONE-NEXT: store i64 [[TMP2]], ptr [[Q]], align 8
+; NONE-NEXT: br label [[IF_END]]
+; NONE: if.end:
+; NONE-NEXT: ret void
;
entry:
br i1 %cond, label %if.true, label %if.false
@@ -41,16 +59,27 @@ if.end:
;; Successor 1 branches to successor 0.
define void @succ1to0(ptr %p, ptr %q, i32 %a) {
-; CHECK-LABEL: @succ1to0(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
-; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL]], true
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
-; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
-; CHECK-NEXT: ret void
+; LOADSTORE-LABEL: @succ1to0(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; LOADSTORE-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL]], true
+; LOADSTORE-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
+; LOADSTORE-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
+; LOADSTORE-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
+; LOADSTORE-NEXT: ret void
+;
+; NONE-LABEL: @succ1to0(
+; NONE-NEXT: entry:
+; NONE-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; NONE-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; NONE: if.end:
+; NONE-NEXT: ret void
+; NONE: if.then:
+; NONE-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4
+; NONE-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
+; NONE-NEXT: br label [[IF_END]]
;
entry:
%tobool = icmp ne i32 %a, 0
@@ -67,14 +96,45 @@ if.then:
;; Successor 1 branches to successor 0 and there is a phi node.
define i32 @succ1to0_phi(ptr %p) {
-; CHECK-LABEL: @succ1to0_phi(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
-; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
-; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
-; CHECK-NEXT: ret i32 [[TMP3]]
+; LOADSTORE-LABEL: @succ1to0_phi(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; LOADSTORE-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
+; LOADSTORE-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
+; LOADSTORE-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
+; LOADSTORE-NEXT: ret i32 [[TMP3]]
+;
+; STOREONLY-LABEL: @succ1to0_phi(
+; STOREONLY-NEXT: entry:
+; STOREONLY-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; STOREONLY-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; STOREONLY: if.false:
+; STOREONLY-NEXT: [[TMP0:%.*]] = load i32, ptr [[P]], align 4
+; STOREONLY-NEXT: br label [[IF_TRUE]]
+; STOREONLY: if.true:
+; STOREONLY-NEXT: [[RES:%.*]] = phi i32 [ [[TMP0]], [[IF_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; STOREONLY-NEXT: ret i32 [[RES]]
+;
+; LOADONLY-LABEL: @succ1to0_phi(
+; LOADONLY-NEXT: entry:
+; LOADONLY-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; LOADONLY-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
+; LOADONLY-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; LOADONLY-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
+; LOADONLY-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
+; LOADONLY-NEXT: ret i32 [[TMP3]]
+;
+; NONEONLY-LABEL: @succ1to0_phi(
+; NONEONLY-NEXT: entry:
+; NONEONLY-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; NONEONLY-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; NONEONLY: if.false:
+; NONEONLY-NEXT: [[TMP0:%.*]] = load i32, ptr [[P]], align 4
+; NONEONLY-NEXT: br label [[IF_TRUE]]
+; NONEONLY: if.true:
+; NONEONLY-NEXT: [[RES:%.*]] = phi i32 [ [[TMP0]], [[IF_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; NONEONLY-NEXT: ret i32 [[RES]]
;
entry:
%cond = icmp eq ptr %p, null
@@ -91,16 +151,28 @@ if.true:
;; Successor 0 branches to successor 1.
define void @succ0to1(i32 %a, ptr %b, ptr %p, ptr %q) {
-; CHECK-LABEL: @succ0to1(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
-; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i32> [[TMP1]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <1 x i32>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
-; CHECK-NEXT: ret void
+; LOADSTORE-LABEL: @succ0to1(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; LOADSTORE-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP1:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
+; LOADSTORE-NEXT: [[TMP2:%.*]] = bitcast <1 x i32> [[TMP1]] to i32
+; LOADSTORE-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <1 x i32>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: store i32 1, ptr [[Q:%.*]], align 4
+; LOADSTORE-NEXT: ret void
+;
+; NONE-LABEL: @succ0to1(
+; NONE-NEXT: entry:
+; NONE-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; NONE-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; NONE: if.false:
+; NONE-NEXT: store i32 1, ptr [[Q:%.*]], align 4
+; NONE-NEXT: ret void
+; NONE: if.true:
+; NONE-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
+; NONE-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
+; NONE-NEXT: br label [[IF_FALSE]]
;
entry:
%cond = icmp eq i32 %a, 0
@@ -121,16 +193,29 @@ if.end:
;; Load after store can be hoisted.
define i64 @load_after_store(i32 %a, ptr %b, ptr %p) {
-; CHECK-LABEL: @load_after_store(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
-; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 [[TMP2]] to i64
-; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i64 [[ZEXT]], i64 0
-; CHECK-NEXT: ret i64 [[SPEC_SELECT]]
+; LOADSTORE-LABEL: @load_after_store(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; LOADSTORE-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
+; LOADSTORE-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
+; LOADSTORE-NEXT: [[ZEXT:%.*]] = zext i16 [[TMP2]] to i64
+; LOADSTORE-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i64 [[ZEXT]], i64 0
+; LOADSTORE-NEXT: ret i64 [[SPEC_SELECT]]
+;
+; NONE-LABEL: @load_after_store(
+; NONE-NEXT: entry:
+; NONE-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; NONE-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[COMMON_RET:%.*]]
+; NONE: common.ret:
+; NONE-NEXT: [[COMMON_RET_OP:%.*]] = phi i64 [ [[ZEXT:%.*]], [[IF_TRUE]] ], [ 0, [[ENTRY:%.*]] ]
+; NONE-NEXT: ret i64 [[COMMON_RET_OP]]
+; NONE: if.true:
+; NONE-NEXT: store i32 1, ptr [[B:%.*]], align 4
+; NONE-NEXT: [[TMP0:%.*]] = load i16, ptr [[P:%.*]], align 2
+; NONE-NEXT: [[ZEXT]] = zext i16 [[TMP0]] to i64
+; NONE-NEXT: br label [[COMMON_RET]]
;
entry:
%cond = icmp eq i32 %a, 0
@@ -148,15 +233,49 @@ if.end:
;; Speculatable memory read doesn't prevent the hoist.
define void @load_skip_speculatable_memory_read(i32 %a, ptr %p, ptr %q) {
-; CHECK-LABEL: @load_skip_speculatable_memory_read(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT: [[READ:%.*]] = call i32 @read_memory_only()
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[READ]] to <1 x i32>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP1]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
-; CHECK-NEXT: ret void
+; LOADSTORE-LABEL: @load_skip_speculatable_memory_read(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; LOADSTORE-NEXT: [[READ:%.*]] = call i32 @read_memory_only()
+; LOADSTORE-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP1:%.*]] = bitcast i32 [[READ]] to <1 x i32>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP1]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: store i32 1, ptr [[Q:%.*]], align 4
+; LOADSTORE-NEXT: ret void
+;
+; STOREONLY-LABEL: @load_skip_speculatable_memory_read(
+; STOREONLY-NEXT: entry:
+; STOREONLY-NEXT: [[COND...
[truncated]
|
I recommend splitting the TTI hook into |
How about adding one parameter? #132153 |
…reForType (#132153) Address llvm/llvm-project#132032 (comment)
"disable-cload-cstore", cl::Hidden, cl::init(0), | ||
cl::desc("Control to disable cond-faulting-load(1)/cond-faulting-store(2)" | ||
"/all(3)")); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Quite strange here. Probably we should split hasConditionalLoadStoreForType
to hasConditionalLoadForType and hasConditionalStoreForType
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is used to test the functionality of the code. We don't have a target has single ConditionalLoad or ConditionalStore, so the change of TTI doesn't help with this patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But X86TTI can return true for both when cf is available, but providing other targets the flexibility?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The problem is not about the TTI, it's the design that doesn't work with single ConditionalLoad. X86 return true for both and others return false, but we need a case ConditionalLoad is true and ConditionalStore is false.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Similarly, we can split --simplifycfg-hoist-loads-stores-with-cond-faulting
into two flags (if it has not been widely used by downstream projects).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point! ff8789f
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Needs to update the description
This is to fix a bug when a target only support conditional faulting load, see test case hoist_store_without_cstore.
Split
-simplifycfg-hoist-loads-stores-with-cond-faulting
into-simplifycfg-hoist-loads-with-cond-faulting
and-simplifycfg-hoist-stores-with-cond-faulting
to control conditional faulting load and store respectively.