Skip to content

Commit f6460f3

Browse files
fhahnronlieb
authored andcommitted
[VPlan] Use op from underlying call in computeCost if needed.
This fixes a divergence between legacy and VPlan-based cost model, e.g. if one of the operands has an first-order recurrence phi as operand. Change-Id: I818ca48f3540e8fc7d1049460516f64e12f2738f
1 parent 3d797a6 commit f6460f3

File tree

2 files changed

+63
-4
lines changed

2 files changed

+63
-4
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -939,13 +939,19 @@ InstructionCost VPWidenCallRecipe::computeCost(ElementCount VF,
939939
if (auto *FPMO = dyn_cast_or_null<FPMathOperator>(getUnderlyingValue()))
940940
FMF = FPMO->getFastMathFlags();
941941

942-
// Some backends analyze intrinsic arguments to determine cost. If all
943-
// operands are VPValues with an underlying IR value, use the original IR
944-
// values for cost computations.
942+
// Some backends analyze intrinsic arguments to determine cost. Use the
943+
// underlying value for the operand if it has one. Otherwise try to use the
944+
// operand of the underlying call instruction, if there is one. Otherwise
945+
// clear Arguments.
946+
// TODO: Rework TTI interface to be independent of concrete IR values.
945947
SmallVector<const Value *> Arguments;
946-
for (VPValue *Op : operands()) {
948+
for (const auto &[Idx, Op] : enumerate(operands())) {
947949
auto *V = Op->getUnderlyingValue();
948950
if (!V) {
951+
if (auto *UI = dyn_cast_or_null<CallBase>(getUnderlyingValue())) {
952+
Arguments.push_back(UI->getArgOperand(Idx));
953+
continue;
954+
}
949955
Arguments.clear();
950956
break;
951957
}

llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,59 @@ exit:
126126
ret void
127127
}
128128

129+
define void @call_scalarized(ptr noalias %src, ptr noalias %dst, double %0) {
130+
; CHECK-LABEL: define void @call_scalarized(
131+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], double [[TMP0:%.*]]) {
132+
; CHECK-NEXT: [[ENTRY:.*]]:
133+
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
134+
; CHECK: [[LOOP_HEADER]]:
135+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
136+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
137+
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[IV_NEXT]]
138+
; CHECK-NEXT: [[L:%.*]] = load double, ptr [[GEP_SRC]], align 8
139+
; CHECK-NEXT: [[CMP295:%.*]] = fcmp ugt double [[TMP0]], 0.000000e+00
140+
; CHECK-NEXT: [[CMP299:%.*]] = fcmp ugt double [[L]], 0.000000e+00
141+
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP295]], [[CMP299]]
142+
; CHECK-NEXT: br i1 [[OR_COND]], label %[[LOOP_LATCH]], label %[[THEN:.*]]
143+
; CHECK: [[THEN]]:
144+
; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[L]])
145+
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV_NEXT]]
146+
; CHECK-NEXT: store double [[SQRT]], ptr [[GEP_DST]], align 8
147+
; CHECK-NEXT: br label %[[LOOP_LATCH]]
148+
; CHECK: [[LOOP_LATCH]]:
149+
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 0
150+
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
151+
; CHECK: [[EXIT]]:
152+
; CHECK-NEXT: ret void
153+
;
154+
entry:
155+
br label %loop.header
156+
157+
loop.header:
158+
%iv = phi i64 [ 100, %entry ], [ %iv.next, %loop.latch ]
159+
%iv.next = add i64 %iv, -1
160+
%gep.src = getelementptr double, ptr %src, i64 %iv.next
161+
%l = load double, ptr %gep.src, align 8
162+
%cmp295 = fcmp ugt double %0, 0.000000e+00
163+
%cmp299 = fcmp ugt double %l, 0.000000e+00
164+
%or.cond = or i1 %cmp295, %cmp299
165+
br i1 %or.cond, label %loop.latch, label %then
166+
167+
then:
168+
%sqrt = call double @llvm.sqrt.f64(double %l)
169+
%gep.dst = getelementptr double, ptr %dst, i64 %iv.next
170+
store double %sqrt, ptr %gep.dst, align 8
171+
br label %loop.latch
172+
173+
loop.latch:
174+
%tobool.not = icmp eq i64 %iv.next, 0
175+
br i1 %tobool.not, label %exit, label %loop.header
176+
177+
exit:
178+
ret void
179+
}
180+
181+
declare double @llvm.sqrt.f64(double) #0
129182
declare double @llvm.powi.f64.i32(double, i32)
130183
declare i64 @llvm.fshl.i64(i64, i64, i64)
131184
;.

0 commit comments

Comments
 (0)