Skip to content

Commit e86a9a0

Browse files
committed
[VPlan] Set debug loc for VPReduction/VPWidenIntrinsicRecipe.
This patch add missing debug location for VPReduction/VPWidenIntrinsicRecipe.
1 parent 7508a6d commit e86a9a0

File tree

9 files changed

+103
-70
lines changed

9 files changed

+103
-70
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9575,9 +9575,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
95759575
if (CM.blockNeedsPredicationForAnyReason(BB))
95769576
CondOp = RecipeBuilder.getBlockInMask(BB);
95779577

9578-
VPReductionRecipe *RedRecipe =
9579-
new VPReductionRecipe(RdxDesc, CurrentLinkI, PreviousLink, VecOp,
9580-
CondOp, CM.useOrderedReductions(RdxDesc));
9578+
auto *RedRecipe = new VPReductionRecipe(
9579+
RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp,
9580+
CM.useOrderedReductions(RdxDesc), CurrentLinkI->getDebugLoc());
95819581
// Append the recipe to the end of the VPBasicBlock because we need to
95829582
// ensure that it comes after all of it's inputs, including CondOp.
95839583
// Note that this transformation may leave over dead recipes (including

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1673,7 +1673,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
16731673
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
16741674
ArrayRef<VPValue *> CallArguments, Type *Ty,
16751675
DebugLoc DL = {})
1676-
: VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments),
1676+
: VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL),
16771677
VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
16781678
LLVMContext &Ctx = Ty->getContext();
16791679
AttributeList Attrs = Intrinsic::getAttributes(Ctx, VectorIntrinsicID);
@@ -2312,8 +2312,8 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe {
23122312

23132313
public:
23142314
/// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
2315-
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
2316-
: VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
2315+
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, DebugLoc DL = {})
2316+
: VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi, DL) {
23172317
if (Start)
23182318
addOperand(Start);
23192319
}
@@ -2614,8 +2614,9 @@ class VPReductionRecipe : public VPSingleDefRecipe {
26142614
protected:
26152615
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
26162616
Instruction *I, ArrayRef<VPValue *> Operands,
2617-
VPValue *CondOp, bool IsOrdered)
2618-
: VPSingleDefRecipe(SC, Operands, I), RdxDesc(R), IsOrdered(IsOrdered) {
2617+
VPValue *CondOp, bool IsOrdered, DebugLoc DL)
2618+
: VPSingleDefRecipe(SC, Operands, I, DL), RdxDesc(R),
2619+
IsOrdered(IsOrdered) {
26192620
if (CondOp) {
26202621
IsConditional = true;
26212622
addOperand(CondOp);
@@ -2625,16 +2626,17 @@ class VPReductionRecipe : public VPSingleDefRecipe {
26252626
public:
26262627
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
26272628
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2628-
bool IsOrdered)
2629+
bool IsOrdered, DebugLoc DL = {})
26292630
: VPReductionRecipe(VPDef::VPReductionSC, R, I,
26302631
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2631-
IsOrdered) {}
2632+
IsOrdered, DL) {}
26322633

26332634
~VPReductionRecipe() override = default;
26342635

26352636
VPReductionRecipe *clone() override {
26362637
return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
2637-
getVecOp(), getCondOp(), IsOrdered);
2638+
getVecOp(), getCondOp(), IsOrdered,
2639+
getDebugLoc());
26382640
}
26392641

26402642
static inline bool classof(const VPRecipeBase *R) {
@@ -2689,7 +2691,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
26892691
VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
26902692
cast_or_null<Instruction>(R.getUnderlyingValue()),
26912693
ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2692-
R.isOrdered()) {}
2694+
R.isOrdered(), R.getDebugLoc()) {}
26932695

26942696
~VPReductionEVLRecipe() override = default;
26952697

llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
308308
// Phi node's operands may have not been visited at this point. We create
309309
// an empty VPInstruction that we will fix once the whole plain CFG has
310310
// been built.
311-
NewVPV = new VPWidenPHIRecipe(Phi);
311+
NewVPV = new VPWidenPHIRecipe(Phi, nullptr, Phi->getDebugLoc());
312312
VPBB->appendRecipe(cast<VPWidenPHIRecipe>(NewVPV));
313313
PhisToFix.push_back(Phi);
314314
} else {

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2114,6 +2114,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
21142114
// Propagate the fast-math flags carried by the underlying instruction.
21152115
IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
21162116
State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2117+
State.setDebugLocFrom(getDebugLoc());
21172118
Value *NewVecOp = State.get(getVecOp());
21182119
if (VPValue *Cond = getCondOp()) {
21192120
Value *NewCond = State.get(Cond, State.VF.isScalar());
@@ -3456,6 +3457,7 @@ void VPWidenPHIRecipe::execute(VPTransformState &State) {
34563457
assert(EnableVPlanNativePath &&
34573458
"Non-native vplans are not expected to have VPWidenPHIRecipes.");
34583459

3460+
State.setDebugLocFrom(getDebugLoc());
34593461
Value *Op0 = State.get(getOperand(0));
34603462
Type *VecTy = Op0->getType();
34613463
Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");

llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,34 +3,37 @@
33
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
44
; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -S < %s 2>&1 | FileCheck --check-prefix=DEBUGLOC %s
55

6-
; Testing the debug locations of the generated vector intstruction are same as
7-
; their scalar instruction.
6+
; Testing the debug locations of the generated vector intrinsic is same as
7+
; its scalar counterpart.
88

9+
define void @vp_select(ptr %a, ptr %b, ptr %c, i64 %N) {
910
; DEBUGLOC-LABEL: define void @vp_select(
10-
define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
1111
; DEBUGLOC: vector.body:
12-
; DEBUGLOC: %[[VPSel:[0-9]+]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %15, <vscale x 4 x i32> %vp.op.load1, <vscale x 4 x i32> %vp.op, i32 %9)
13-
; DEBUGLOC: for.body:
14-
; DEBUGLOC: %cond.p = select i1 %cmp4, i32 %22, i32 %23, !dbg !39
12+
; DEBUGLOC: = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %{{.+}}, <vscale x 4 x i32> %{{.+}}, <vscale x 4 x i32> %{{.+}}, i32 %{{.+}}), !dbg ![[SELLOC:[0-9]+]]
13+
; DEBUGLOC: loop:
14+
; DEBUGLOC: = select i1 %{{.+}}, i32 %{{.+}}, i32 %{{.+}}, !dbg ![[SELLOC]]
15+
;
1516
entry:
16-
br label %for.body
17+
br label %loop
1718

18-
for.body:
19-
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
20-
%arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
21-
%0 = load i32, ptr %arrayidx, align 4
22-
%arrayidx3 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
23-
%1 = load i32, ptr %arrayidx3, align 4
24-
%cmp4 = icmp sgt i32 %0, %1
25-
%2 = sub i32 0, %1
26-
%cond.p = select i1 %cmp4, i32 %1, i32 %2
27-
%cond = add i32 %cond.p, %0
28-
%arrayidx15 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
29-
store i32 %cond, ptr %arrayidx15, align 4
30-
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
31-
%exitcond.not = icmp eq i64 %indvars.iv.next, %N
32-
br i1 %exitcond.not, label %exit, label %for.body
19+
loop:
20+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
21+
%gep.b = getelementptr inbounds i32, ptr %b, i64 %iv
22+
%load.b = load i32, ptr %gep.b, align 4
23+
%gep.c = getelementptr inbounds i32, ptr %c, i64 %iv
24+
%load.c = load i32, ptr %gep.c, align 4
25+
%cmp = icmp sgt i32 %load.b, %load.c
26+
%neg.c = sub i32 0, %load.c
27+
%sel = select i1 %cmp, i32 %load.c, i32 %neg.c
28+
%add = add i32 %sel, %load.b
29+
%gep.a = getelementptr inbounds i32, ptr %a, i64 %iv
30+
store i32 %add, ptr %gep.a, align 4
31+
%iv.next = add nuw nsw i64 %iv, 1
32+
%exitcond = icmp eq i64 %iv.next, %N
33+
br i1 %exitcond, label %exit, label %loop
3334

3435
exit:
3536
ret void
3637
}
38+
39+
; DEBUGLOC: [[SELLOC]] = !DILocation(line: 9

llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ define void @foo(ptr %h) !dbg !4 {
1515
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND_CLEANUP32:%.*]] ]
1616
; CHECK-NEXT: br label [[FOR_COND5_PREHEADER1:%.*]], !dbg [[DBG21]]
1717
; CHECK: for.cond5.preheader1:
18-
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ], !dbg [[DBG21]]
19-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]], !dbg [[DBG21]]
18+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ]
19+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]]
2020
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> zeroinitializer, <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22:![0-9]+]]
2121
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 1, !dbg [[DBG22]]
2222
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 1), <4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]]

llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,30 @@ exit:
5454
ret void
5555
}
5656

57+
define void @widen_intrinsic_dbg(i64 %n, ptr %y, ptr %x) {
58+
; DEBUGLOC-LABEL: define void @widen_intrinsic_dbg(
59+
; DEBUGLOC: vector.body:
60+
; DEBUGLOC: = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !dbg ![[INTRINSIC_LOC:[0-9]+]]
61+
; DEBUGLOC: loop:
62+
; DEBUGLOC: = call float @llvm.sqrt.f32(float %{{.+}}), !dbg ![[INTRINSIC_LOC]]
63+
;
64+
entry:
65+
br label %loop
66+
67+
loop:
68+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
69+
%gep.y = getelementptr inbounds float, ptr %y, i64 %iv
70+
%load = load float, ptr %gep.y, align 4
71+
%call = call float @llvm.sqrt.f32(float %load)
72+
%gep.x = getelementptr inbounds float, ptr %x, i64 %iv
73+
store float %call, ptr %gep.x, align 4
74+
%iv.next = add i64 %iv, 1
75+
%exitcond = icmp eq i64 %iv.next, %n
76+
br i1 %exitcond, label %exit, label %loop
77+
78+
exit:
79+
ret void
80+
}
5781

5882
!0 = !{!0, !1}
5983
!1 = !{!"llvm.loop.vectorize.width", i32 4}
@@ -62,3 +86,4 @@ exit:
6286

6387
; DEBUGLOC: ![[RESUMELOC]] = !DILocation(line: 2
6488
; DEBUGLOC: ![[PTRIVLOC]] = !DILocation(line: 12
89+
; DEBUGLOC: ![[INTRINSIC_LOC]] = !DILocation(line: 22
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s -check-prefix DEBUGLOC
2+
3+
; Testing the debug locations of the generated vector intstructions are same as
4+
; their scalar counterpart.
5+
6+
define i32 @reduction_sum(ptr %A, ptr %B) {
7+
; DEBUGLOC-LABEL: define i32 @reduction_sum(
8+
; DEBUGLOC: vector.body:
9+
; DEBUGLOC: = load <4 x i32>, ptr %{{.+}}, align 4, !dbg ![[LOADLOC:[0-9]+]]
10+
; DEBUGLOC: = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %{{.+}}), !dbg ![[REDLOC:[0-9]+]]
11+
; DEBUGLOC: loop:
12+
; DEBUGLOC: = load i32, ptr %{{.+}}, align 4, !dbg ![[LOADLOC]]
13+
; DEBUGLOC: = add i32 %{{.+}}, %{{.+}}, !dbg ![[REDLOC]]
14+
;
15+
entry:
16+
br label %loop
17+
18+
loop:
19+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
20+
%red = phi i32 [ 0, %entry ], [ %red.next, %loop ]
21+
%gep = getelementptr inbounds i32, ptr %A, i64 %iv
22+
%load = load i32, ptr %gep, align 4
23+
%red.next = add i32 %red, %load
24+
%iv.next = add i64 %iv, 1
25+
%exitcond = icmp eq i64 %iv.next, 256
26+
br i1 %exitcond, label %exit, label %loop
27+
28+
exit:
29+
%red.lcssa = phi i32 [ %red.next, %loop ]
30+
ret i32 %red.lcssa
31+
}
32+
33+
; DEBUGLOC: ![[LOADLOC]] = !DILocation(line: 5
34+
; DEBUGLOC: ![[REDLOC]] = !DILocation(line: 6

llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll

Lines changed: 0 additions & 33 deletions
This file was deleted.

0 commit comments

Comments
 (0)