Skip to content

Commit 5d5e706

Browse files
committed
[VPlan] Restrict hoisting of broadcast operations using VPDominatorTree (#117138)
This patch restricts broadcast operations from being hoisted to the vector preheader unless the basic block that defines the broadcasted value properly dominates the vector preheader. This prevents potential use-before-definition issues when the broadcasted value is defined within the plan. VPDominatorTree is used to confirm this restriction while still allowing safe hoisting for broadcasted values defined outside the plan. Issue #117139
1 parent ffe202c commit 5d5e706

File tree

6 files changed

+22
-7
lines changed

6 files changed

+22
-7
lines changed

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "VPlan.h"
2020
#include "LoopVectorizationPlanner.h"
2121
#include "VPlanCFG.h"
22+
#include "VPlanDominatorTree.h"
2223
#include "VPlanHelpers.h"
2324
#include "VPlanPatternMatch.h"
2425
#include "VPlanTransforms.h"
@@ -221,7 +222,7 @@ VPTransformState::VPTransformState(const TargetTransformInfo *TTI,
221222
Loop *CurrentParentLoop, Type *CanonicalIVTy)
222223
: TTI(TTI), VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
223224
CurrentParentLoop(CurrentParentLoop), LVer(nullptr),
224-
TypeAnalysis(CanonicalIVTy) {}
225+
TypeAnalysis(CanonicalIVTy), VPDT(*Plan) {}
225226

226227
Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) {
227228
if (Def->isLiveIn())
@@ -264,7 +265,11 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
264265
return Data.VPV2Vector[Def];
265266

266267
auto GetBroadcastInstrs = [this, Def](Value *V) {
267-
bool SafeToHoist = Def->isDefinedOutsideLoopRegions();
268+
bool SafeToHoist =
269+
!Def->hasDefiningRecipe() ||
270+
VPDT.properlyDominates(Def->getDefiningRecipe()->getParent(),
271+
Plan->getVectorPreheader());
272+
268273
if (VF.isScalar())
269274
return V;
270275
// Place the code for broadcasting invariant variables in the new preheader.
@@ -929,6 +934,10 @@ void VPlan::execute(VPTransformState *State) {
929934
State->CFG.PrevVPBB = nullptr;
930935
State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
931936

937+
// Update VPDominatorTree since VPBasicBlock may be removed after State was
938+
// constructed.
939+
State->VPDT.recalculate(*this);
940+
932941
// Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
933942
BasicBlock *VectorPreHeader = State->CFG.PrevBB;
934943
cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);

llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,10 @@ class VPDominatorTree : public DominatorTreeBase<VPBlockBase, false> {
3939

4040
public:
4141
VPDominatorTree() = default;
42+
explicit VPDominatorTree(VPlan &Plan) { recalculate(Plan); }
4243

4344
/// Returns true if \p A properly dominates \p B.
45+
using Base::properlyDominates;
4446
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B);
4547
};
4648

llvm/lib/Transforms/Vectorize/VPlanHelpers.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#define LLVM_TRANSFORMS_VECTORIZE_VPLANHELPERS_H
1717

1818
#include "VPlanAnalysis.h"
19+
#include "VPlanDominatorTree.h"
1920
#include "llvm/ADT/DenseMap.h"
2021
#include "llvm/ADT/SmallPtrSet.h"
2122
#include "llvm/ADT/SmallVector.h"
@@ -360,6 +361,9 @@ struct VPTransformState {
360361

361362
/// VPlan-based type analysis.
362363
VPTypeAnalysis TypeAnalysis;
364+
365+
/// VPlan-based dominator tree.
366+
VPDominatorTree VPDT;
363367
};
364368

365369
/// Struct to hold various analysis needed for cost computations.

llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@
2323
; Check that the extractvalue operands are actually free in vector code.
2424

2525
; FORCED: [[E1:%.+]] = extractvalue { i64, i64 } %sv, 0
26-
; FORCED-NEXT: [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1
2726
; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x i64> poison, i64 [[E1]], i64 0
2827
; FORCED-NEXT: %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
28+
; FORCED-NEXT: [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1
2929
; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 [[E2]], i64 0
3030
; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x i64> %broadcast.splatinsert1, <2 x i64> poison, <2 x i32> zeroinitializer
3131
; FORCED-NEXT: [[ADD:%.+]] = add <2 x i64> %broadcast.splat, %broadcast.splat2
@@ -75,9 +75,9 @@ declare float @powf(float, float) readnone nounwind
7575
; FORCED-LABEL: define void @test_getVectorCallCost
7676

7777
; FORCED: [[E1:%.+]] = extractvalue { float, float } %sv, 0
78-
; FORCED-NEXT: [[E2:%.+]] = extractvalue { float, float } %sv, 1
7978
; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x float> poison, float [[E1]], i64 0
8079
; FORCED-NEXT: %broadcast.splat = shufflevector <2 x float> %broadcast.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer
80+
; FORCED-NEXT: [[E2:%.+]] = extractvalue { float, float } %sv, 1
8181
; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x float> poison, float [[E2]], i64 0
8282
; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x float> %broadcast.splatinsert1, <2 x float> poison, <2 x i32> zeroinitializer
8383

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ target triple = "aarch64-unknown-linux-gnu"
55
define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 {
66
; CHECK-LABEL: @widen_extractvalue(
77
; CHECK: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV:%.*]], 0
8-
; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1
98
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0
109
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
10+
; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1
1111
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT1]], i64 0
1212
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
1313
; CHECK: [[ADD:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[DOTSPLAT2]]

llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,10 +276,10 @@ define void @icmp_only_first_op_truncated(ptr noalias %dst, i32 %x, i64 %N, i64
276276
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <vscale x 2 x i32> [[TMP7]], [[BROADCAST_SPLAT2]]
277277
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[X]] to i64
278278
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP9]]
279-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[DST]], i64 0
280-
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
281279
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP10]], i64 0
282280
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT6]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
281+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[DST]], i64 0
282+
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
283283
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
284284
; CHECK: [[VECTOR_BODY]]:
285285
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]

0 commit comments

Comments
 (0)