Skip to content

Commit 78077d4

Browse files
committed
resolve conflicts with upstream
1 parent 08f0cda commit 78077d4

File tree

3 files changed

+135
-101
lines changed

3 files changed

+135
-101
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3523,10 +3523,10 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
35233523
if (hasIrregularType(ScalarTy, DL))
35243524
return false;
35253525

3526-
// We currently only know how to emit interleave/deinterleave with
3527-
// Factor=2 for scalable vectors. This is purely an implementation
3528-
// limit.
3529-
if (VF.isScalable() && InterleaveFactor != 2)
3526+
// For scalable vectors, the only interleave factor currently supported
3527+
// must be power of 2 since we require the (de)interleave2 intrinsics
3528+
// instead of shufflevectors.
3529+
if (VF.isScalable() && !isPowerOf2_32(InterleaveFactor))
35303530
return false;
35313531

35323532
// If the group involves a non-integral pointer, we may not be able to

llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll

Lines changed: 31 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -172,30 +172,26 @@ define void @interleave_deinterleave(ptr writeonly %dst, ptr readonly %a, ptr re
172172
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
173173
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
174174
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_XYZT:%.*]], ptr [[A]], i64 [[TMP7]]
175-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
176-
; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP9]])
177-
; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 0
178-
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1
179-
; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 2
180-
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 3
181-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[B]], i64 [[TMP7]]
182-
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
183-
; CHECK-NEXT: [[LDN14:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP15]])
184-
; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN14]], 0
185-
; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN14]], 1
186-
; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN14]], 2
187-
; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN14]], 3
188-
; CHECK-NEXT: [[TMP20:%.*]] = add nsw <vscale x 4 x i32> [[TMP16]], [[TMP10]]
189-
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[DST]], i64 [[TMP7]]
190-
; CHECK-NEXT: [[TMP22:%.*]] = sub nsw <vscale x 4 x i32> [[TMP11]], [[TMP17]]
191-
; CHECK-NEXT: [[TMP23:%.*]] = shl <vscale x 4 x i32> [[TMP12]], [[TMP18]]
192-
; CHECK-NEXT: [[TMP24:%.*]] = ashr <vscale x 4 x i32> [[TMP13]], [[TMP19]]
193-
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP21]], i64 12
194-
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 -3
195-
; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x i32> [[TMP22]], <vscale x 4 x i32> [[TMP23]], <vscale x 4 x i32> [[TMP24]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP26]])
175+
; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP8]])
176+
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 0
177+
; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1
178+
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 2
179+
; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 3
180+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[B]], i64 [[TMP7]]
181+
; CHECK-NEXT: [[LDN14:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP13]])
182+
; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN14]], 0
183+
; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN14]], 1
184+
; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN14]], 2
185+
; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN14]], 3
186+
; CHECK-NEXT: [[TMP18:%.*]] = add nsw <vscale x 4 x i32> [[TMP14]], [[TMP9]]
187+
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[DST]], i64 [[TMP7]]
188+
; CHECK-NEXT: [[TMP20:%.*]] = sub nsw <vscale x 4 x i32> [[TMP10]], [[TMP15]]
189+
; CHECK-NEXT: [[TMP21:%.*]] = shl <vscale x 4 x i32> [[TMP11]], [[TMP16]]
190+
; CHECK-NEXT: [[TMP22:%.*]] = ashr <vscale x 4 x i32> [[TMP12]], [[TMP17]]
191+
; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP18]], <vscale x 4 x i32> [[TMP20]], <vscale x 4 x i32> [[TMP21]], <vscale x 4 x i32> [[TMP22]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP19]])
196192
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
197-
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
198-
; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
193+
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
194+
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
199195
; CHECK: middle.block:
200196
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
201197
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
@@ -205,31 +201,31 @@ define void @interleave_deinterleave(ptr writeonly %dst, ptr readonly %a, ptr re
205201
; CHECK: for.body:
206202
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
207203
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[A]], i64 [[INDVARS_IV]]
208-
; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
204+
; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
209205
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[B]], i64 [[INDVARS_IV]]
210-
; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
211-
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP28]]
206+
; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
207+
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP24]]
212208
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[DST]], i64 [[INDVARS_IV]]
213209
; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX5]], align 4
214210
; CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4
215-
; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[Y]], align 4
211+
; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[Y]], align 4
216212
; CHECK-NEXT: [[Y11:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i64 4
217-
; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[Y11]], align 4
218-
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP30]], [[TMP31]]
213+
; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[Y11]], align 4
214+
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP26]], [[TMP27]]
219215
; CHECK-NEXT: [[Y14:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX5]], i64 4
220216
; CHECK-NEXT: store i32 [[SUB]], ptr [[Y14]], align 4
221217
; CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 8
222-
; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[Z]], align 4
218+
; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[Z]], align 4
223219
; CHECK-NEXT: [[Z19:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i64 8
224-
; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[Z19]], align 4
225-
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[TMP32]], [[TMP33]]
220+
; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[Z19]], align 4
221+
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[TMP28]], [[TMP29]]
226222
; CHECK-NEXT: [[Z22:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX5]], i64 8
227223
; CHECK-NEXT: store i32 [[SHL]], ptr [[Z22]], align 4
228224
; CHECK-NEXT: [[T:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 12
229-
; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[T]], align 4
225+
; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[T]], align 4
230226
; CHECK-NEXT: [[T27:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i64 12
231-
; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[T27]], align 4
232-
; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[TMP34]], [[TMP35]]
227+
; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[T27]], align 4
228+
; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[TMP30]], [[TMP31]]
233229
; CHECK-NEXT: [[T30:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX5]], i64 12
234230
; CHECK-NEXT: store i32 [[SHR]], ptr [[T30]], align 4
235231
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1

0 commit comments

Comments
 (0)