Skip to content

Commit dd94537

Browse files
committed
[LV] Update call widening decision when scalarzing calls.
collectInstsToScalarize may decide to scalarize a call. If so, we have to update the widening decision for the call, otherwise the call won't be scalarized as expected during VPlan construction. This issue was uncovered by f82543d509.
1 parent 0797c18 commit dd94537

File tree

2 files changed

+64
-1
lines changed

2 files changed

+64
-1
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5386,8 +5386,18 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
53865386
// 3. Emulated masked memrefs, if a hacked cost is needed.
53875387
if (!isScalarAfterVectorization(&I, VF) && !VF.isScalable() &&
53885388
!useEmulatedMaskMemRefHack(&I, VF) &&
5389-
computePredInstDiscount(&I, ScalarCosts, VF) >= 0)
5389+
computePredInstDiscount(&I, ScalarCosts, VF) >= 0) {
53905390
ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end());
5391+
// Check if we decided to scalarize a call. If so, update the widening
5392+
// decision of the call to CM_Scalarize with the computed scalar cost.
5393+
for (const auto &[I, _] : ScalarCosts) {
5394+
auto *CI = dyn_cast<CallInst>(I);
5395+
if (!CI || !CallWideningDecisions.contains({CI, VF}))
5396+
continue;
5397+
CallWideningDecisions[{CI, VF}].Kind = CM_Scalarize;
5398+
CallWideningDecisions[{CI, VF}].Cost = ScalarCosts[CI];
5399+
}
5400+
}
53915401
// Remember that BB will remain after vectorization.
53925402
PredicatedBBsAfterVectorization[VF].insert(BB);
53935403
for (auto *Pred : predecessors(BB)) {

llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,59 @@ exit:
126126
ret void
127127
}
128128

129+
define void @call_scalarized(ptr noalias %src, ptr noalias %dst, double %0) {
130+
; CHECK-LABEL: define void @call_scalarized(
131+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], double [[TMP0:%.*]]) {
132+
; CHECK-NEXT: [[ENTRY:.*]]:
133+
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
134+
; CHECK: [[LOOP_HEADER]]:
135+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
136+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
137+
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[IV_NEXT]]
138+
; CHECK-NEXT: [[L:%.*]] = load double, ptr [[GEP_SRC]], align 8
139+
; CHECK-NEXT: [[CMP295:%.*]] = fcmp ugt double [[TMP0]], 0.000000e+00
140+
; CHECK-NEXT: [[CMP299:%.*]] = fcmp ugt double [[L]], 0.000000e+00
141+
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP295]], [[CMP299]]
142+
; CHECK-NEXT: br i1 [[OR_COND]], label %[[LOOP_LATCH]], label %[[THEN:.*]]
143+
; CHECK: [[THEN]]:
144+
; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[L]])
145+
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV_NEXT]]
146+
; CHECK-NEXT: store double [[SQRT]], ptr [[GEP_DST]], align 8
147+
; CHECK-NEXT: br label %[[LOOP_LATCH]]
148+
; CHECK: [[LOOP_LATCH]]:
149+
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 0
150+
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
151+
; CHECK: [[EXIT]]:
152+
; CHECK-NEXT: ret void
153+
;
154+
entry:
155+
br label %loop.header
156+
157+
loop.header:
158+
%iv = phi i64 [ 100, %entry ], [ %iv.next, %loop.latch ]
159+
%iv.next = add i64 %iv, -1
160+
%gep.src = getelementptr double, ptr %src, i64 %iv.next
161+
%l = load double, ptr %gep.src, align 8
162+
%cmp295 = fcmp ugt double %0, 0.000000e+00
163+
%cmp299 = fcmp ugt double %l, 0.000000e+00
164+
%or.cond = or i1 %cmp295, %cmp299
165+
br i1 %or.cond, label %loop.latch, label %then
166+
167+
then:
168+
%sqrt = call double @llvm.sqrt.f64(double %l)
169+
%gep.dst = getelementptr double, ptr %dst, i64 %iv.next
170+
store double %sqrt, ptr %gep.dst, align 8
171+
br label %loop.latch
172+
173+
loop.latch:
174+
%tobool.not = icmp eq i64 %iv.next, 0
175+
br i1 %tobool.not, label %exit, label %loop.header
176+
177+
exit:
178+
ret void
179+
}
180+
181+
declare double @llvm.sqrt.f64(double) #0
129182
declare double @llvm.powi.f64.i32(double, i32)
130183
declare i64 @llvm.fshl.i64(i64, i64, i64)
131184
;.

0 commit comments

Comments
 (0)