Skip to content

Commit 1ff10fa

Browse files
authored
[LV] Limits the splat operations be hoisted must not be defined by a recipe. (#117138)
Issue #117139
1 parent 148a7ae commit 1ff10fa

File tree

6 files changed

+22
-7
lines changed

6 files changed

+22
-7
lines changed

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "VPlan.h"
2020
#include "LoopVectorizationPlanner.h"
2121
#include "VPlanCFG.h"
22+
#include "VPlanDominatorTree.h"
2223
#include "VPlanHelpers.h"
2324
#include "VPlanPatternMatch.h"
2425
#include "VPlanTransforms.h"
@@ -221,7 +222,7 @@ VPTransformState::VPTransformState(const TargetTransformInfo *TTI,
221222
Loop *CurrentParentLoop, Type *CanonicalIVTy)
222223
: TTI(TTI), VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
223224
CurrentParentLoop(CurrentParentLoop), LVer(nullptr),
224-
TypeAnalysis(CanonicalIVTy) {}
225+
TypeAnalysis(CanonicalIVTy), VPDT(*Plan) {}
225226

226227
Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) {
227228
if (Def->isLiveIn())
@@ -264,7 +265,11 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
264265
return Data.VPV2Vector[Def];
265266

266267
auto GetBroadcastInstrs = [this, Def](Value *V) {
267-
bool SafeToHoist = Def->isDefinedOutsideLoopRegions();
268+
bool SafeToHoist =
269+
!Def->hasDefiningRecipe() ||
270+
VPDT.properlyDominates(Def->getDefiningRecipe()->getParent(),
271+
Plan->getVectorPreheader());
272+
268273
if (VF.isScalar())
269274
return V;
270275
// Place the code for broadcasting invariant variables in the new preheader.
@@ -929,6 +934,10 @@ void VPlan::execute(VPTransformState *State) {
929934
State->CFG.PrevVPBB = nullptr;
930935
State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
931936

937+
// Update VPDominatorTree since VPBasicBlock may be removed after State was
938+
// constructed.
939+
State->VPDT.recalculate(*this);
940+
932941
// Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
933942
BasicBlock *VectorPreHeader = State->CFG.PrevBB;
934943
cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);

llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,10 @@ class VPDominatorTree : public DominatorTreeBase<VPBlockBase, false> {
3939

4040
public:
4141
VPDominatorTree() = default;
42+
explicit VPDominatorTree(VPlan &Plan) { recalculate(Plan); }
4243

4344
/// Returns true if \p A properly dominates \p B.
45+
using Base::properlyDominates;
4446
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B);
4547
};
4648

llvm/lib/Transforms/Vectorize/VPlanHelpers.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#define LLVM_TRANSFORMS_VECTORIZE_VPLANHELPERS_H
1717

1818
#include "VPlanAnalysis.h"
19+
#include "VPlanDominatorTree.h"
1920
#include "llvm/ADT/DenseMap.h"
2021
#include "llvm/ADT/SmallPtrSet.h"
2122
#include "llvm/ADT/SmallVector.h"
@@ -360,6 +361,9 @@ struct VPTransformState {
360361

361362
/// VPlan-based type analysis.
362363
VPTypeAnalysis TypeAnalysis;
364+
365+
/// VPlan-based dominator tree.
366+
VPDominatorTree VPDT;
363367
};
364368

365369
/// Struct to hold various analysis needed for cost computations.

llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@
2323
; Check that the extractvalue operands are actually free in vector code.
2424

2525
; FORCED: [[E1:%.+]] = extractvalue { i64, i64 } %sv, 0
26-
; FORCED-NEXT: [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1
2726
; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x i64> poison, i64 [[E1]], i64 0
2827
; FORCED-NEXT: %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
28+
; FORCED-NEXT: [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1
2929
; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 [[E2]], i64 0
3030
; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x i64> %broadcast.splatinsert1, <2 x i64> poison, <2 x i32> zeroinitializer
3131
; FORCED-NEXT: [[ADD:%.+]] = add <2 x i64> %broadcast.splat, %broadcast.splat2
@@ -75,9 +75,9 @@ declare float @powf(float, float) readnone nounwind
7575
; FORCED-LABEL: define void @test_getVectorCallCost
7676

7777
; FORCED: [[E1:%.+]] = extractvalue { float, float } %sv, 0
78-
; FORCED-NEXT: [[E2:%.+]] = extractvalue { float, float } %sv, 1
7978
; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x float> poison, float [[E1]], i64 0
8079
; FORCED-NEXT: %broadcast.splat = shufflevector <2 x float> %broadcast.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer
80+
; FORCED-NEXT: [[E2:%.+]] = extractvalue { float, float } %sv, 1
8181
; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x float> poison, float [[E2]], i64 0
8282
; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x float> %broadcast.splatinsert1, <2 x float> poison, <2 x i32> zeroinitializer
8383

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ target triple = "aarch64-unknown-linux-gnu"
55
define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 {
66
; CHECK-LABEL: @widen_extractvalue(
77
; CHECK: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV:%.*]], 0
8-
; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1
98
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0
109
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
10+
; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1
1111
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT1]], i64 0
1212
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
1313
; CHECK: [[ADD:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[DOTSPLAT2]]

llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,10 +276,10 @@ define void @icmp_only_first_op_truncated(ptr noalias %dst, i32 %x, i64 %N, i64
276276
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <vscale x 2 x i32> [[TMP7]], [[BROADCAST_SPLAT2]]
277277
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[X]] to i64
278278
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP9]]
279-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[DST]], i64 0
280-
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
281279
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP10]], i64 0
282280
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT6]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
281+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[DST]], i64 0
282+
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
283283
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
284284
; CHECK: [[VECTOR_BODY]]:
285285
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]

0 commit comments

Comments
 (0)