Skip to content

Commit ed6f499

Browse files
committed
[VPlan] Handle conditional ordered reductions with scalar VFs.
VPReductionRecipe::execute was not handling predicates for ordered reduction with scalar VFs, which was causing a crash. Thsi patch adds dedicated handling for scalar VFs when dealing with the condition. The other operands are already handled in a similar fashion below. Fixes #70988.
1 parent 4effdc4 commit ed6f499

File tree

2 files changed

+64
-5
lines changed

2 files changed

+64
-5
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9399,12 +9399,16 @@ void VPReductionRecipe::execute(VPTransformState &State) {
93999399
Value *NewVecOp = State.get(getVecOp(), Part);
94009400
if (VPValue *Cond = getCondOp()) {
94019401
Value *NewCond = State.get(Cond, Part);
9402-
VectorType *VecTy = cast<VectorType>(NewVecOp->getType());
9403-
Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, VecTy->getElementType(),
9402+
VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
9403+
Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
9404+
Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy,
94049405
RdxDesc.getFastMathFlags());
9405-
Value *IdenVec =
9406-
State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
9407-
Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, IdenVec);
9406+
if (State.VF.isVector()) {
9407+
Iden =
9408+
State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
9409+
}
9410+
9411+
Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden);
94089412
NewVecOp = Select;
94099413
}
94109414
Value *NewRed;
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2+
; RUN: opt -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -force-vector-interleave=2 -force-vector-width=1 -force-ordered-reductions -S %s | FileCheck %s
3+
4+
define float @pr70988() {
5+
; CHECK-LABEL: define float @pr70988() {
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
8+
; CHECK: vector.ph:
9+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
10+
; CHECK: vector.body:
11+
; CHECK-NEXT: [[INDEX1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
12+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
13+
; CHECK-NEXT: [[VEC_IV:%.*]] = add i32 [[INDEX1]], 0
14+
; CHECK-NEXT: [[VEC_IV2:%.*]] = add i32 [[INDEX1]], 1
15+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i32 [[VEC_IV]], 1020
16+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[VEC_IV2]], 1020
17+
; CHECK-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP0]], float 1.000000e+00, float -0.000000e+00
18+
; CHECK-NEXT: [[TMP3:%.*]] = fadd contract float [[VEC_PHI]], [[TMP2]]
19+
; CHECK-NEXT: [[TMP4:%.*]] = select contract i1 [[TMP1]], float 1.000000e+00, float -0.000000e+00
20+
; CHECK-NEXT: [[TMP5]] = fadd contract float [[TMP3]], [[TMP4]]
21+
; CHECK-NEXT: [[INDEX_NEXT3]] = add i32 [[INDEX1]], 2
22+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT3]], 1022
23+
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
24+
; CHECK: middle.block:
25+
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
26+
; CHECK: scalar.ph:
27+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1022, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
28+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
29+
; CHECK-NEXT: br label [[LOOP:%.*]]
30+
; CHECK: loop:
31+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
32+
; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RDX_NEXT:%.*]], [[LOOP]] ]
33+
; CHECK-NEXT: [[RDX_NEXT]] = fadd contract float [[RDX]], 1.000000e+00
34+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 1
35+
; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[INDEX_NEXT]], 1021
36+
; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
37+
; CHECK: exit:
38+
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi float [ [[RDX_NEXT]], [[LOOP]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
39+
; CHECK-NEXT: ret float [[DOTLCSSA]]
40+
;
41+
entry:
42+
br label %loop
43+
44+
loop:
45+
%index = phi i32 [ 0, %entry ], [ %index.next, %loop ]
46+
%rdx = phi float [ 0.000000e+00, %entry ], [ %rdx.next, %loop ]
47+
%rdx.next = fadd contract float %rdx, 1.000000e+00
48+
%index.next = add nuw nsw i32 %index, 1
49+
%cond = icmp ult i32 %index.next, 1021
50+
br i1 %cond, label %loop, label %exit
51+
52+
exit:
53+
%.lcssa = phi float [ %rdx.next, %loop ]
54+
ret float %.lcssa
55+
}

0 commit comments

Comments
 (0)