Skip to content

Commit 6a3a5ca

Browse files
committed
Revert "[SLP]Add support for strided loads."
This reverts commit 0940f90 to fix issues reported in #80310.
1 parent 595d8d4 commit 6a3a5ca

File tree

7 files changed

+530
-456
lines changed

7 files changed

+530
-456
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 133 additions & 155 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll

Lines changed: 274 additions & 272 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll

Lines changed: 111 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,61 @@ define void @test(ptr %p, ptr noalias %s) {
55
; CHECK-LABEL: @test(
66
; CHECK-NEXT: entry:
77
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
8+
; CHECK-NEXT: [[I:%.*]] = load float, ptr [[ARRAYIDX]], align 4
89
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
10+
; CHECK-NEXT: [[I1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
11+
; CHECK-NEXT: [[ADD:%.*]] = fsub fast float [[I1]], [[I]]
912
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
10-
; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
11-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
12-
; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <8 x float> [[TMP1]], [[TMP0]]
13-
; CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[ARRAYIDX2]], align 4
13+
; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
14+
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 4
15+
; CHECK-NEXT: [[I2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
16+
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 26
17+
; CHECK-NEXT: [[I3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
18+
; CHECK-NEXT: [[ADD7:%.*]] = fsub fast float [[I3]], [[I2]]
19+
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[S]], i64 1
20+
; CHECK-NEXT: store float [[ADD7]], ptr [[ARRAYIDX9]], align 4
21+
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 8
22+
; CHECK-NEXT: [[I4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
23+
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 22
24+
; CHECK-NEXT: [[I5:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
25+
; CHECK-NEXT: [[ADD14:%.*]] = fsub fast float [[I5]], [[I4]]
26+
; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[S]], i64 2
27+
; CHECK-NEXT: store float [[ADD14]], ptr [[ARRAYIDX16]], align 4
28+
; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 12
29+
; CHECK-NEXT: [[I6:%.*]] = load float, ptr [[ARRAYIDX18]], align 4
30+
; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 18
31+
; CHECK-NEXT: [[I7:%.*]] = load float, ptr [[ARRAYIDX20]], align 4
32+
; CHECK-NEXT: [[ADD21:%.*]] = fsub fast float [[I7]], [[I6]]
33+
; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds float, ptr [[S]], i64 3
34+
; CHECK-NEXT: store float [[ADD21]], ptr [[ARRAYIDX23]], align 4
35+
; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 16
36+
; CHECK-NEXT: [[I8:%.*]] = load float, ptr [[ARRAYIDX25]], align 4
37+
; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 14
38+
; CHECK-NEXT: [[I9:%.*]] = load float, ptr [[ARRAYIDX27]], align 4
39+
; CHECK-NEXT: [[ADD28:%.*]] = fsub fast float [[I9]], [[I8]]
40+
; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, ptr [[S]], i64 4
41+
; CHECK-NEXT: store float [[ADD28]], ptr [[ARRAYIDX30]], align 4
42+
; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 20
43+
; CHECK-NEXT: [[I10:%.*]] = load float, ptr [[ARRAYIDX32]], align 4
44+
; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 10
45+
; CHECK-NEXT: [[I11:%.*]] = load float, ptr [[ARRAYIDX34]], align 4
46+
; CHECK-NEXT: [[ADD35:%.*]] = fsub fast float [[I11]], [[I10]]
47+
; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds float, ptr [[S]], i64 5
48+
; CHECK-NEXT: store float [[ADD35]], ptr [[ARRAYIDX37]], align 4
49+
; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 24
50+
; CHECK-NEXT: [[I12:%.*]] = load float, ptr [[ARRAYIDX39]], align 4
51+
; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 6
52+
; CHECK-NEXT: [[I13:%.*]] = load float, ptr [[ARRAYIDX41]], align 4
53+
; CHECK-NEXT: [[ADD42:%.*]] = fsub fast float [[I13]], [[I12]]
54+
; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds float, ptr [[S]], i64 6
55+
; CHECK-NEXT: store float [[ADD42]], ptr [[ARRAYIDX44]], align 4
56+
; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 28
57+
; CHECK-NEXT: [[I14:%.*]] = load float, ptr [[ARRAYIDX46]], align 4
58+
; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 2
59+
; CHECK-NEXT: [[I15:%.*]] = load float, ptr [[ARRAYIDX48]], align 4
60+
; CHECK-NEXT: [[ADD49:%.*]] = fsub fast float [[I15]], [[I14]]
61+
; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds float, ptr [[S]], i64 7
62+
; CHECK-NEXT: store float [[ADD49]], ptr [[ARRAYIDX51]], align 4
1463
; CHECK-NEXT: ret void
1564
;
1665
entry:
@@ -213,40 +262,67 @@ define void @test2(ptr %p, ptr noalias %s, i32 %stride) {
213262
; CHECK-NEXT: entry:
214263
; CHECK-NEXT: [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64
215264
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 2
265+
; CHECK-NEXT: [[I:%.*]] = load float, ptr [[ARRAYIDX]], align 4
216266
; CHECK-NEXT: [[ST6:%.*]] = mul i64 [[STR]], 7
217267
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]]
218268
; CHECK-NEXT: [[I1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
269+
; CHECK-NEXT: [[ADD:%.*]] = fsub fast float [[I1]], [[I]]
219270
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
271+
; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
272+
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 6
273+
; CHECK-NEXT: [[I2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
220274
; CHECK-NEXT: [[ST5:%.*]] = mul i64 [[STR]], 6
221275
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST5]]
222276
; CHECK-NEXT: [[I3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
277+
; CHECK-NEXT: [[ADD7:%.*]] = fsub fast float [[I3]], [[I2]]
278+
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[S]], i64 1
279+
; CHECK-NEXT: store float [[ADD7]], ptr [[ARRAYIDX9]], align 4
280+
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 10
281+
; CHECK-NEXT: [[I4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
223282
; CHECK-NEXT: [[ST4:%.*]] = mul i64 [[STR]], 5
224283
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST4]]
225284
; CHECK-NEXT: [[I5:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
285+
; CHECK-NEXT: [[ADD14:%.*]] = fsub fast float [[I5]], [[I4]]
286+
; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[S]], i64 2
287+
; CHECK-NEXT: store float [[ADD14]], ptr [[ARRAYIDX16]], align 4
288+
; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 14
289+
; CHECK-NEXT: [[I6:%.*]] = load float, ptr [[ARRAYIDX18]], align 4
226290
; CHECK-NEXT: [[ST3:%.*]] = mul i64 [[STR]], 4
227291
; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST3]]
228292
; CHECK-NEXT: [[I7:%.*]] = load float, ptr [[ARRAYIDX20]], align 4
293+
; CHECK-NEXT: [[ADD21:%.*]] = fsub fast float [[I7]], [[I6]]
294+
; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds float, ptr [[S]], i64 3
295+
; CHECK-NEXT: store float [[ADD21]], ptr [[ARRAYIDX23]], align 4
296+
; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 18
229297
; CHECK-NEXT: [[ST2:%.*]] = mul i64 [[STR]], 3
298+
; CHECK-NEXT: [[I8:%.*]] = load float, ptr [[ARRAYIDX25]], align 4
230299
; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST2]]
231300
; CHECK-NEXT: [[I9:%.*]] = load float, ptr [[ARRAYIDX27]], align 4
301+
; CHECK-NEXT: [[ADD28:%.*]] = fsub fast float [[I9]], [[I8]]
302+
; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, ptr [[S]], i64 4
303+
; CHECK-NEXT: store float [[ADD28]], ptr [[ARRAYIDX30]], align 4
304+
; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 22
305+
; CHECK-NEXT: [[I10:%.*]] = load float, ptr [[ARRAYIDX32]], align 4
232306
; CHECK-NEXT: [[ST1:%.*]] = mul i64 [[STR]], 2
233307
; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST1]]
234308
; CHECK-NEXT: [[I11:%.*]] = load float, ptr [[ARRAYIDX34]], align 4
309+
; CHECK-NEXT: [[ADD35:%.*]] = fsub fast float [[I11]], [[I10]]
310+
; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds float, ptr [[S]], i64 5
311+
; CHECK-NEXT: store float [[ADD35]], ptr [[ARRAYIDX37]], align 4
312+
; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 26
313+
; CHECK-NEXT: [[I12:%.*]] = load float, ptr [[ARRAYIDX39]], align 4
235314
; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[STR]]
236315
; CHECK-NEXT: [[I13:%.*]] = load float, ptr [[ARRAYIDX41]], align 4
316+
; CHECK-NEXT: [[ADD42:%.*]] = fsub fast float [[I13]], [[I12]]
317+
; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds float, ptr [[S]], i64 6
318+
; CHECK-NEXT: store float [[ADD42]], ptr [[ARRAYIDX44]], align 4
319+
; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
320+
; CHECK-NEXT: [[I14:%.*]] = load float, ptr [[ARRAYIDX46]], align 4
237321
; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 0
238322
; CHECK-NEXT: [[I15:%.*]] = load float, ptr [[ARRAYIDX48]], align 4
239-
; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
240-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x float> poison, float [[I1]], i32 0
241-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x float> [[TMP1]], float [[I3]], i32 1
242-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x float> [[TMP2]], float [[I5]], i32 2
243-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x float> [[TMP3]], float [[I7]], i32 3
244-
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x float> [[TMP4]], float [[I9]], i32 4
245-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x float> [[TMP5]], float [[I11]], i32 5
246-
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x float> [[TMP6]], float [[I13]], i32 6
247-
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x float> [[TMP7]], float [[I15]], i32 7
248-
; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <8 x float> [[TMP8]], [[TMP0]]
249-
; CHECK-NEXT: store <8 x float> [[TMP9]], ptr [[ARRAYIDX2]], align 4
323+
; CHECK-NEXT: [[ADD49:%.*]] = fsub fast float [[I15]], [[I14]]
324+
; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds float, ptr [[S]], i64 7
325+
; CHECK-NEXT: store float [[ADD49]], ptr [[ARRAYIDX51]], align 4
250326
; CHECK-NEXT: ret void
251327
;
252328
entry:
@@ -321,12 +397,27 @@ define void @test3(ptr %p, ptr noalias %s) {
321397
; CHECK-NEXT: entry:
322398
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
323399
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
400+
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 4
401+
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 8
402+
; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 12
403+
; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 16
404+
; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 20
405+
; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 24
406+
; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 28
324407
; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 23
325-
; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
326-
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[ARRAYIDX48]], align 4
327-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
328-
; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]]
329-
; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4
408+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ARRAYIDX]], i32 0
409+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> [[TMP0]], ptr [[ARRAYIDX4]], i32 1
410+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x ptr> [[TMP1]], ptr [[ARRAYIDX11]], i32 2
411+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> [[TMP2]], ptr [[ARRAYIDX18]], i32 3
412+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x ptr> [[TMP3]], ptr [[ARRAYIDX25]], i32 4
413+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x ptr> [[TMP4]], ptr [[ARRAYIDX32]], i32 5
414+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x ptr> [[TMP5]], ptr [[ARRAYIDX39]], i32 6
415+
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x ptr> [[TMP6]], ptr [[ARRAYIDX46]], i32 7
416+
; CHECK-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP7]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison)
417+
; CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, ptr [[ARRAYIDX48]], align 4
418+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[TMP9]], <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
419+
; CHECK-NEXT: [[TMP11:%.*]] = fsub fast <8 x float> [[TMP10]], [[TMP8]]
420+
; CHECK-NEXT: store <8 x float> [[TMP11]], ptr [[ARRAYIDX2]], align 4
330421
; CHECK-NEXT: ret void
331422
;
332423
entry:

llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ define i16 @test() {
88
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
99
; CHECK-NEXT: entry:
1010
; CHECK-NEXT: [[PPREV_058_I:%.*]] = getelementptr [[S:%.*]], ptr null, i64 -1
11-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x ptr> <ptr null, ptr poison>, ptr [[PPREV_058_I]], i32 1
11+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x ptr> <ptr poison, ptr null>, ptr [[PPREV_058_I]], i32 0
1212
; CHECK-NEXT: br label [[WHILE_BODY_I:%.*]]
1313
; CHECK: while.body.i:
1414
; CHECK-NEXT: [[TMP1:%.*]] = phi i16 [ 0, [[WHILE_BODY_I]] ], [ 0, [[ENTRY:%.*]] ]
@@ -17,7 +17,7 @@ define i16 @test() {
1717
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> [[TMP3]], i32 2, <2 x i1> <i1 true, i1 true>, <2 x i16> poison)
1818
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
1919
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1
20-
; CHECK-NEXT: [[CMP_I178:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
20+
; CHECK-NEXT: [[CMP_I178:%.*]] = icmp ult i16 [[TMP6]], [[TMP5]]
2121
; CHECK-NEXT: br label [[WHILE_BODY_I]]
2222
;
2323
entry:

llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,14 @@ define i32 @sum_of_abs(ptr noalias %a, ptr noalias %b) {
55
; CHECK-LABEL: define i32 @sum_of_abs
66
; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0:[0-9]+]] {
77
; CHECK-NEXT: entry:
8-
; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr align 1 [[A]], i64 64, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
9-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP0]], i1 false)
10-
; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i32>
11-
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
12-
; CHECK-NEXT: ret i32 [[TMP3]]
8+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[A]], i32 0
9+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer
10+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> <i64 0, i64 64, i64 128, i64 192, i64 256, i64 320, i64 384, i64 448>
11+
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP2]], i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> poison)
12+
; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP3]], i1 false)
13+
; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i8> [[TMP4]] to <8 x i32>
14+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]])
15+
; CHECK-NEXT: ret i32 [[TMP6]]
1316
;
1417
entry:
1518
%0 = load i8, ptr %a, align 1

llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ define void @test() {
3030
; CHECK-SLP-THRESHOLD: bb:
3131
; CHECK-SLP-THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x ptr> poison, ptr [[COND_IN_V]], i32 0
3232
; CHECK-SLP-THRESHOLD-NEXT: [[TMP1:%.*]] = shufflevector <4 x ptr> [[TMP0]], <4 x ptr> poison, <4 x i32> zeroinitializer
33-
; CHECK-SLP-THRESHOLD-NEXT: [[TMP2:%.*]] = getelementptr i64, <4 x ptr> [[TMP1]], <4 x i64> <i64 12, i64 8, i64 4, i64 0>
33+
; CHECK-SLP-THRESHOLD-NEXT: [[TMP2:%.*]] = getelementptr i64, <4 x ptr> [[TMP1]], <4 x i64> <i64 0, i64 4, i64 8, i64 12>
3434
; CHECK-SLP-THRESHOLD-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP2]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> poison)
3535
; CHECK-SLP-THRESHOLD-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[TMP3]], zeroinitializer
3636
; CHECK-SLP-THRESHOLD-NEXT: ret void

llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ define i32 @test(ptr noalias %p, ptr noalias %addr) {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ADDR:%.*]], i32 0
99
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer
10-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[TMP1]], <8 x i32> <i32 15, i32 13, i32 11, i32 9, i32 7, i32 5, i32 3, i32 1>
10+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[TMP1]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1111
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
1212
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x ptr> poison, ptr [[P:%.*]], i32 0
1313
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x ptr> [[TMP4]], <8 x ptr> poison, <8 x i32> zeroinitializer

0 commit comments

Comments
 (0)