Skip to content

Commit 2cd829f

Browse files
authored
[VectorUtils][VPlan] Consolidate VPWidenIntrinsicRecipe::onlyFirstLaneUsed and isVectorIntrinsicWithScalarOpAtArg (#137497)
We can reuse isVectorIntrinsicWithScalarOpAtArg in VectorUtils to determine if only the first lane will be used for a VPWidenIntrinsicRecipe, provided that we also move the VP EVL operand check into it. This was needed by a local patch I was working on that created a VPWidenIntrinsicRecipe with a VP intrinsic, and prevents the need to update the scalar arguments in two places.
1 parent 88b03aa commit 2cd829f

File tree

3 files changed

+64
-5
lines changed

3 files changed

+64
-5
lines changed

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,10 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
149149
if (TTI && Intrinsic::isTargetIntrinsic(ID))
150150
return TTI->isTargetIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx);
151151

152+
// Vector predication intrinsics have the EVL as the last operand.
153+
if (VPIntrinsic::getVectorLengthParamPos(ID) == ScalarOpdIdx)
154+
return true;
155+
152156
switch (ID) {
153157
case Intrinsic::abs:
154158
case Intrinsic::vp_abs:
@@ -166,7 +170,7 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
166170
case Intrinsic::umul_fix_sat:
167171
return (ScalarOpdIdx == 2);
168172
case Intrinsic::experimental_vp_splice:
169-
return ScalarOpdIdx == 2 || ScalarOpdIdx == 4 || ScalarOpdIdx == 5;
173+
return ScalarOpdIdx == 2 || ScalarOpdIdx == 4;
170174
default:
171175
return false;
172176
}

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1364,10 +1364,11 @@ StringRef VPWidenIntrinsicRecipe::getIntrinsicName() const {
13641364

13651365
bool VPWidenIntrinsicRecipe::onlyFirstLaneUsed(const VPValue *Op) const {
13661366
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1367-
// Vector predication intrinsics only demand the the first lane the last
1368-
// operand (the EVL operand).
1369-
return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
1370-
Op == getOperand(getNumOperands() - 1);
1367+
return all_of(enumerate(operands()), [this, &Op](const auto &X) {
1368+
auto [Idx, V] = X;
1369+
return V != Op || isVectorIntrinsicWithScalarOpAtArg(getVectorIntrinsicID(),
1370+
Idx, nullptr);
1371+
});
13711372
}
13721373

13731374
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
2+
; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -S | FileCheck %s
3+
4+
; Check that we don't unnecessarily broadcast %pow
5+
define void @powi_only_first_lane_used_of_second_arg(ptr %p, i32 %pow) {
6+
; CHECK-LABEL: define void @powi_only_first_lane_used_of_second_arg(
7+
; CHECK-SAME: ptr [[P:%.*]], i32 [[POW:%.*]]) {
8+
; CHECK-NEXT: [[ENTRY:.*]]:
9+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
10+
; CHECK: [[VECTOR_PH]]:
11+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
12+
; CHECK: [[VECTOR_BODY]]:
13+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
14+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[P]], i32 [[INDEX]]
15+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TMP0]], i32 0
16+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
17+
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[WIDE_LOAD]], i32 [[POW]])
18+
; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[TMP1]], align 4
19+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
20+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
21+
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
22+
; CHECK: [[MIDDLE_BLOCK]]:
23+
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
24+
; CHECK: [[SCALAR_PH]]:
25+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
26+
; CHECK-NEXT: br label %[[LOOP:.*]]
27+
; CHECK: [[LOOP]]:
28+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
29+
; CHECK-NEXT: [[P_GEP:%.*]] = getelementptr float, ptr [[P]], i32 [[IV]]
30+
; CHECK-NEXT: [[X:%.*]] = load float, ptr [[P_GEP]], align 4
31+
; CHECK-NEXT: [[Y:%.*]] = call float @llvm.powi.f32.i32(float [[X]], i32 [[POW]])
32+
; CHECK-NEXT: store float [[Y]], ptr [[P_GEP]], align 4
33+
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
34+
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i32 [[IV_NEXT]], 1024
35+
; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
36+
; CHECK: [[EXIT]]:
37+
; CHECK-NEXT: ret void
38+
;
39+
entry:
40+
br label %loop
41+
42+
loop:
43+
%iv = phi i32 [0, %entry], [%iv.next, %loop]
44+
%p.gep = getelementptr float, ptr %p, i32 %iv
45+
%x = load float, ptr %p.gep
46+
%y = call float @llvm.powi(float %x, i32 %pow)
47+
store float %y, ptr %p.gep
48+
%iv.next = add i32 %iv, 1
49+
%done = icmp eq i32 %iv.next, 1024
50+
br i1 %done, label %exit, label %loop
51+
52+
exit:
53+
ret void
54+
}

0 commit comments

Comments
 (0)