Skip to content

Commit e9d2429

Browse files
committed
Fix assertion failure with a small epilogue VF on apple m1
1 parent d83028e commit e9d2429

File tree

2 files changed

+115
-1
lines changed

2 files changed

+115
-1
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3422,7 +3422,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
34223422
// Phi nodes have cycles, so we need to vectorize them in two stages. This is
34233423
// stage #1: We create a new vector PHI node with no incoming edges. We'll use
34243424
// this value when we vectorize all of the instructions that use the PHI.
3425-
bool ScalarPHI = VF.isScalar() || IsInLoop;
3425+
bool ScalarPHI = State.VF.isScalar() || IsInLoop;
34263426
Type *VecTy =
34273427
ScalarPHI ? StartV->getType() : VectorType::get(StartV->getType(), VF);
34283428

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,4 +96,118 @@ for.exit: ; preds = %for.body
9696
ret i32 %add
9797
}
9898

99+
define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 {
100+
; CHECK-LABEL: define void @dotp_small_epilogue_vf(
101+
; CHECK-SAME: i64 [[IDX_NEG:%.*]], i8 [[A:%.*]]) #[[ATTR1:[0-9]+]] {
102+
; CHECK-NEXT: iter.check:
103+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 1, [[IDX_NEG]]
104+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8
105+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[ENTRY:%.*]]
106+
; CHECK: vector.main.loop.iter.check:
107+
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 16
108+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
109+
; CHECK: vector.ph:
110+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16
111+
; CHECK-NEXT: [[IV_NEXT:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
112+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A]], i64 0
113+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
114+
; CHECK-NEXT: [[TMP1:%.*]] = sext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i64>
115+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
116+
; CHECK: vector.body:
117+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
118+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
119+
; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr null, align 1
120+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i8> poison, i8 [[TMP2]], i64 0
121+
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT2]], <16 x i8> poison, <16 x i32> zeroinitializer
122+
; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[BROADCAST_SPLAT3]] to <16 x i64>
123+
; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i64> [[TMP3]], [[TMP1]]
124+
; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <2 x i64> @llvm.experimental.vector.partial.reduce.add.v2i64.v16i64(<2 x i64> [[VEC_PHI]], <16 x i64> [[TMP4]])
125+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
126+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IV_NEXT]]
127+
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
128+
; CHECK: middle.block:
129+
; CHECK-NEXT: [[ADD:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[PARTIAL_REDUCE]])
130+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[IV_NEXT]]
131+
; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY:%.*]]
132+
; CHECK: vec.epilog.iter.check:
133+
; CHECK-NEXT: [[IND_END6:%.*]] = add i64 [[IDX_NEG]], [[IV_NEXT]]
134+
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[IV_NEXT]]
135+
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
136+
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
137+
; CHECK: vec.epilog.ph:
138+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT]], [[WHILE_BODY]] ], [ 0, [[ENTRY]] ]
139+
; CHECK-NEXT: [[ACCUM:%.*]] = phi i64 [ [[ADD]], [[WHILE_BODY]] ], [ 0, [[ENTRY]] ]
140+
; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], 8
141+
; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]]
142+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IDX_NEG]], [[N_VEC5]]
143+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0
144+
; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT7]], <8 x i8> poison, <8 x i32> zeroinitializer
145+
; CHECK-NEXT: [[TMP7:%.*]] = sext <8 x i8> [[BROADCAST_SPLAT8]] to <8 x i64>
146+
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <1 x i64> zeroinitializer, i64 [[ACCUM]], i32 0
147+
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
148+
; CHECK: vec.epilog.vector.body:
149+
; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[IV]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
150+
; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <1 x i64> [ [[TMP8]], [[VEC_EPILOG_PH]] ], [ [[PARTIAL_REDUCE13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
151+
; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr null, align 1
152+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <8 x i8> poison, i8 [[TMP9]], i64 0
153+
; CHECK-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT11]], <8 x i8> poison, <8 x i32> zeroinitializer
154+
; CHECK-NEXT: [[TMP10:%.*]] = sext <8 x i8> [[BROADCAST_SPLAT12]] to <8 x i64>
155+
; CHECK-NEXT: [[TMP11:%.*]] = mul <8 x i64> [[TMP10]], [[TMP7]]
156+
; CHECK-NEXT: [[PARTIAL_REDUCE13]] = call <1 x i64> @llvm.experimental.vector.partial.reduce.add.v1i64.v8i64(<1 x i64> [[VEC_PHI10]], <8 x i64> [[TMP11]])
157+
; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX9]], 8
158+
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC5]]
159+
; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
160+
; CHECK: vec.epilog.middle.block:
161+
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> [[PARTIAL_REDUCE13]])
162+
; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]]
163+
; CHECK-NEXT: br i1 [[CMP_N15]], label [[WHILE_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
164+
; CHECK: vec.epilog.scalar.ph:
165+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IDX_NEG]], [[ITER_CHECK:%.*]] ], [ [[IND_END6]], [[WHILE_BODY]] ]
166+
; CHECK-NEXT: [[BC_RESUME_VAL16:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[IV_NEXT]], [[WHILE_BODY]] ]
167+
; CHECK-NEXT: [[BC_MERGE_RDX17:%.*]] = phi i64 [ [[TMP13]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[ADD]], [[WHILE_BODY]] ]
168+
; CHECK-NEXT: br label [[WHILE_BODY1:%.*]]
169+
; CHECK: while.body:
170+
; CHECK-NEXT: [[IV_NEG:%.*]] = phi i64 [ [[IV_NEG_NEXT:%.*]], [[WHILE_BODY1]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
171+
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT1:%.*]], [[WHILE_BODY1]] ], [ [[BC_RESUME_VAL16]], [[VEC_EPILOG_SCALAR_PH]] ]
172+
; CHECK-NEXT: [[ACCUM1:%.*]] = phi i64 [ [[ADD1:%.*]], [[WHILE_BODY1]] ], [ [[BC_MERGE_RDX17]], [[VEC_EPILOG_SCALAR_PH]] ]
173+
; CHECK-NEXT: [[IV_NEG_NEXT]] = add i64 [[IV_NEG]], 1
174+
; CHECK-NEXT: [[EXT_A:%.*]] = sext i8 [[A]] to i64
175+
; CHECK-NEXT: [[IV_NEXT1]] = add i64 [[IV1]], 1
176+
; CHECK-NEXT: [[B:%.*]] = load i8, ptr null, align 1
177+
; CHECK-NEXT: [[EXT_B:%.*]] = sext i8 [[B]] to i64
178+
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[EXT_B]], [[EXT_A]]
179+
; CHECK-NEXT: [[ADD1]] = add i64 [[MUL]], [[ACCUM1]]
180+
; CHECK-NEXT: [[CMP_IV_NEG:%.*]] = icmp ugt i64 [[IV_NEG]], 0
181+
; CHECK-NEXT: [[CMP_IV:%.*]] = icmp ne i64 [[IV1]], -1
182+
; CHECK-NEXT: [[EXITCOND:%.*]] = and i1 [[CMP_IV_NEG]], [[CMP_IV]]
183+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_BODY1]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]]
184+
; CHECK: while.end.loopexit:
185+
; CHECK-NEXT: [[RESULT:%.*]] = phi i64 [ [[ADD1]], [[WHILE_BODY1]] ], [ [[ADD]], [[MIDDLE_BLOCK]] ], [ [[TMP13]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
186+
; CHECK-NEXT: ret void
187+
;
188+
entry:
189+
br label %while.body
190+
191+
while.body: ; preds = %while.body, %entry
192+
%iv.neg = phi i64 [ %iv.neg.next, %while.body ], [ %idx.neg, %entry ]
193+
%iv = phi i64 [ %iv.next, %while.body ], [ 0, %entry ]
194+
%accum = phi i64 [ %add, %while.body ], [ 0, %entry ]
195+
%iv.neg.next = add i64 %iv.neg, 1
196+
%ext.a = sext i8 %a to i64
197+
%iv.next = add i64 %iv, 1
198+
%b = load i8, ptr null, align 1
199+
%ext.b = sext i8 %b to i64
200+
%mul = mul i64 %ext.b, %ext.a
201+
%add = add i64 %mul, %accum
202+
%cmp.iv.neg = icmp ugt i64 %iv.neg, 0
203+
%cmp.iv = icmp ne i64 %iv, -1
204+
%exitcond = and i1 %cmp.iv.neg, %cmp.iv
205+
br i1 %exitcond, label %while.body, label %while.end.loopexit
206+
207+
while.end.loopexit: ; preds = %while.body
208+
%result = phi i64 [ %add, %while.body ]
209+
ret void
210+
}
211+
99212
attributes #0 = { vscale_range(1,16) "target-features"="+sve" }
213+
attributes #1 = { "target-cpu"="apple-m1" }

0 commit comments

Comments
 (0)