Skip to content

Commit 3200385

Browse files
committed
[VPlan] Return cost of PHI for scalar VFs in computeCost for FORs.
This fixes a crash when the VF is scalar. Fixes #116375.
1 parent 07507cb commit 3200385

File tree

2 files changed

+46
-1
lines changed

2 files changed

+46
-1
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3317,6 +3317,10 @@ void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
33173317
InstructionCost
33183318
VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF,
33193319
VPCostContext &Ctx) const {
3320+
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
3321+
if (VF.isScalar())
3322+
return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
3323+
33203324
if (VF.isScalable() && VF.getKnownMinValue() == 1)
33213325
return InstructionCost::getInvalid();
33223326

@@ -3325,7 +3329,6 @@ VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF,
33253329
Type *VectorTy =
33263330
ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
33273331

3328-
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
33293332
return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Splice,
33303333
cast<VectorType>(VectorTy), Mask, CostKind,
33313334
VF.getKnownMinValue() - 1);
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes="loop-vectorize" -pass-remarks-output=%t.yaml -S %s | FileCheck %s
3+
; RUN: FileCheck --input-file=%t.yaml --check-prefix=REMARKS %s
4+
5+
; REMARKS: the cost-model indicates that vectorization is not beneficial
6+
7+
; Test for https://github.com/llvm/llvm-project/issues/116375.
8+
define void @test_i24_load_for(ptr noalias %src, ptr %dst) {
9+
; CHECK-LABEL: define void @test_i24_load_for(
10+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) {
11+
; CHECK-NEXT: [[ENTRY:.*]]:
12+
; CHECK-NEXT: br label %[[LOOP:.*]]
13+
; CHECK: [[LOOP]]:
14+
; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
15+
; CHECK-NEXT: [[FOR:%.*]] = phi i24 [ 0, %[[ENTRY]] ], [ [[FOR_NEXT:%.*]], %[[LOOP]] ]
16+
; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
17+
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i24, ptr [[SRC]], i16 [[IV]]
18+
; CHECK-NEXT: [[FOR_NEXT]] = load i24, ptr [[GEP_SRC]], align 1
19+
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i24, ptr [[DST]], i16 [[IV]]
20+
; CHECK-NEXT: store i24 [[FOR]], ptr [[GEP_DST]], align 4
21+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 1000
22+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
23+
; CHECK: [[EXIT]]:
24+
; CHECK-NEXT: ret void
25+
;
26+
entry:
27+
br label %loop
28+
29+
loop:
30+
%iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ]
31+
%for = phi i24 [ 0, %entry ], [ %for.next, %loop ]
32+
%iv.next = add i16 %iv, 1
33+
%gep.src = getelementptr inbounds i24, ptr %src, i16 %iv
34+
%for.next = load i24, ptr %gep.src, align 1
35+
%gep.dst = getelementptr inbounds i24, ptr %dst, i16 %iv
36+
store i24 %for, ptr %gep.dst
37+
%ec = icmp eq i16 %iv.next, 1000
38+
br i1 %ec, label %exit, label %loop
39+
40+
exit:
41+
ret void
42+
}

0 commit comments

Comments
 (0)