@@ -5,61 +5,12 @@ define void @test(ptr %p, ptr noalias %s) {
5
5
; CHECK-LABEL: @test(
6
6
; CHECK-NEXT: entry:
7
7
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
8
- ; CHECK-NEXT: [[I:%.*]] = load float, ptr [[ARRAYIDX]], align 4
9
8
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
10
- ; CHECK-NEXT: [[I1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
11
- ; CHECK-NEXT: [[ADD:%.*]] = fsub fast float [[I1]], [[I]]
12
9
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
13
- ; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
14
- ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 4
15
- ; CHECK-NEXT: [[I2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
16
- ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 26
17
- ; CHECK-NEXT: [[I3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
18
- ; CHECK-NEXT: [[ADD7:%.*]] = fsub fast float [[I3]], [[I2]]
19
- ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[S]], i64 1
20
- ; CHECK-NEXT: store float [[ADD7]], ptr [[ARRAYIDX9]], align 4
21
- ; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 8
22
- ; CHECK-NEXT: [[I4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
23
- ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 22
24
- ; CHECK-NEXT: [[I5:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
25
- ; CHECK-NEXT: [[ADD14:%.*]] = fsub fast float [[I5]], [[I4]]
26
- ; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[S]], i64 2
27
- ; CHECK-NEXT: store float [[ADD14]], ptr [[ARRAYIDX16]], align 4
28
- ; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 12
29
- ; CHECK-NEXT: [[I6:%.*]] = load float, ptr [[ARRAYIDX18]], align 4
30
- ; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 18
31
- ; CHECK-NEXT: [[I7:%.*]] = load float, ptr [[ARRAYIDX20]], align 4
32
- ; CHECK-NEXT: [[ADD21:%.*]] = fsub fast float [[I7]], [[I6]]
33
- ; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds float, ptr [[S]], i64 3
34
- ; CHECK-NEXT: store float [[ADD21]], ptr [[ARRAYIDX23]], align 4
35
- ; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 16
36
- ; CHECK-NEXT: [[I8:%.*]] = load float, ptr [[ARRAYIDX25]], align 4
37
- ; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 14
38
- ; CHECK-NEXT: [[I9:%.*]] = load float, ptr [[ARRAYIDX27]], align 4
39
- ; CHECK-NEXT: [[ADD28:%.*]] = fsub fast float [[I9]], [[I8]]
40
- ; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, ptr [[S]], i64 4
41
- ; CHECK-NEXT: store float [[ADD28]], ptr [[ARRAYIDX30]], align 4
42
- ; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 20
43
- ; CHECK-NEXT: [[I10:%.*]] = load float, ptr [[ARRAYIDX32]], align 4
44
- ; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 10
45
- ; CHECK-NEXT: [[I11:%.*]] = load float, ptr [[ARRAYIDX34]], align 4
46
- ; CHECK-NEXT: [[ADD35:%.*]] = fsub fast float [[I11]], [[I10]]
47
- ; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds float, ptr [[S]], i64 5
48
- ; CHECK-NEXT: store float [[ADD35]], ptr [[ARRAYIDX37]], align 4
49
- ; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 24
50
- ; CHECK-NEXT: [[I12:%.*]] = load float, ptr [[ARRAYIDX39]], align 4
51
- ; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 6
52
- ; CHECK-NEXT: [[I13:%.*]] = load float, ptr [[ARRAYIDX41]], align 4
53
- ; CHECK-NEXT: [[ADD42:%.*]] = fsub fast float [[I13]], [[I12]]
54
- ; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds float, ptr [[S]], i64 6
55
- ; CHECK-NEXT: store float [[ADD42]], ptr [[ARRAYIDX44]], align 4
56
- ; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 28
57
- ; CHECK-NEXT: [[I14:%.*]] = load float, ptr [[ARRAYIDX46]], align 4
58
- ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 2
59
- ; CHECK-NEXT: [[I15:%.*]] = load float, ptr [[ARRAYIDX48]], align 4
60
- ; CHECK-NEXT: [[ADD49:%.*]] = fsub fast float [[I15]], [[I14]]
61
- ; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds float, ptr [[S]], i64 7
62
- ; CHECK-NEXT: store float [[ADD49]], ptr [[ARRAYIDX51]], align 4
10
+ ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
11
+ ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
12
+ ; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <8 x float> [[TMP1]], [[TMP0]]
13
+ ; CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[ARRAYIDX2]], align 4
63
14
; CHECK-NEXT: ret void
64
15
;
65
16
entry:
@@ -262,67 +213,40 @@ define void @test2(ptr %p, ptr noalias %s, i32 %stride) {
262
213
; CHECK-NEXT: entry:
263
214
; CHECK-NEXT: [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64
264
215
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 2
265
- ; CHECK-NEXT: [[I:%.*]] = load float, ptr [[ARRAYIDX]], align 4
266
216
; CHECK-NEXT: [[ST6:%.*]] = mul i64 [[STR]], 7
267
217
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]]
268
218
; CHECK-NEXT: [[I1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
269
- ; CHECK-NEXT: [[ADD:%.*]] = fsub fast float [[I1]], [[I]]
270
219
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
271
- ; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
272
- ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 6
273
- ; CHECK-NEXT: [[I2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
274
220
; CHECK-NEXT: [[ST5:%.*]] = mul i64 [[STR]], 6
275
221
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST5]]
276
222
; CHECK-NEXT: [[I3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
277
- ; CHECK-NEXT: [[ADD7:%.*]] = fsub fast float [[I3]], [[I2]]
278
- ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[S]], i64 1
279
- ; CHECK-NEXT: store float [[ADD7]], ptr [[ARRAYIDX9]], align 4
280
- ; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 10
281
- ; CHECK-NEXT: [[I4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
282
223
; CHECK-NEXT: [[ST4:%.*]] = mul i64 [[STR]], 5
283
224
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST4]]
284
225
; CHECK-NEXT: [[I5:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
285
- ; CHECK-NEXT: [[ADD14:%.*]] = fsub fast float [[I5]], [[I4]]
286
- ; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[S]], i64 2
287
- ; CHECK-NEXT: store float [[ADD14]], ptr [[ARRAYIDX16]], align 4
288
- ; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 14
289
- ; CHECK-NEXT: [[I6:%.*]] = load float, ptr [[ARRAYIDX18]], align 4
290
226
; CHECK-NEXT: [[ST3:%.*]] = mul i64 [[STR]], 4
291
227
; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST3]]
292
228
; CHECK-NEXT: [[I7:%.*]] = load float, ptr [[ARRAYIDX20]], align 4
293
- ; CHECK-NEXT: [[ADD21:%.*]] = fsub fast float [[I7]], [[I6]]
294
- ; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds float, ptr [[S]], i64 3
295
- ; CHECK-NEXT: store float [[ADD21]], ptr [[ARRAYIDX23]], align 4
296
- ; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 18
297
229
; CHECK-NEXT: [[ST2:%.*]] = mul i64 [[STR]], 3
298
- ; CHECK-NEXT: [[I8:%.*]] = load float, ptr [[ARRAYIDX25]], align 4
299
230
; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST2]]
300
231
; CHECK-NEXT: [[I9:%.*]] = load float, ptr [[ARRAYIDX27]], align 4
301
- ; CHECK-NEXT: [[ADD28:%.*]] = fsub fast float [[I9]], [[I8]]
302
- ; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, ptr [[S]], i64 4
303
- ; CHECK-NEXT: store float [[ADD28]], ptr [[ARRAYIDX30]], align 4
304
- ; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 22
305
- ; CHECK-NEXT: [[I10:%.*]] = load float, ptr [[ARRAYIDX32]], align 4
306
232
; CHECK-NEXT: [[ST1:%.*]] = mul i64 [[STR]], 2
307
233
; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST1]]
308
234
; CHECK-NEXT: [[I11:%.*]] = load float, ptr [[ARRAYIDX34]], align 4
309
- ; CHECK-NEXT: [[ADD35:%.*]] = fsub fast float [[I11]], [[I10]]
310
- ; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds float, ptr [[S]], i64 5
311
- ; CHECK-NEXT: store float [[ADD35]], ptr [[ARRAYIDX37]], align 4
312
- ; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 26
313
- ; CHECK-NEXT: [[I12:%.*]] = load float, ptr [[ARRAYIDX39]], align 4
314
235
; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[STR]]
315
236
; CHECK-NEXT: [[I13:%.*]] = load float, ptr [[ARRAYIDX41]], align 4
316
- ; CHECK-NEXT: [[ADD42:%.*]] = fsub fast float [[I13]], [[I12]]
317
- ; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds float, ptr [[S]], i64 6
318
- ; CHECK-NEXT: store float [[ADD42]], ptr [[ARRAYIDX44]], align 4
319
- ; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
320
- ; CHECK-NEXT: [[I14:%.*]] = load float, ptr [[ARRAYIDX46]], align 4
321
237
; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 0
322
238
; CHECK-NEXT: [[I15:%.*]] = load float, ptr [[ARRAYIDX48]], align 4
323
- ; CHECK-NEXT: [[ADD49:%.*]] = fsub fast float [[I15]], [[I14]]
324
- ; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds float, ptr [[S]], i64 7
325
- ; CHECK-NEXT: store float [[ADD49]], ptr [[ARRAYIDX51]], align 4
239
+ ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
240
+ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x float> poison, float [[I1]], i32 0
241
+ ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x float> [[TMP1]], float [[I3]], i32 1
242
+ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x float> [[TMP2]], float [[I5]], i32 2
243
+ ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x float> [[TMP3]], float [[I7]], i32 3
244
+ ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x float> [[TMP4]], float [[I9]], i32 4
245
+ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x float> [[TMP5]], float [[I11]], i32 5
246
+ ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x float> [[TMP6]], float [[I13]], i32 6
247
+ ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x float> [[TMP7]], float [[I15]], i32 7
248
+ ; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <8 x float> [[TMP8]], [[TMP0]]
249
+ ; CHECK-NEXT: store <8 x float> [[TMP9]], ptr [[ARRAYIDX2]], align 4
326
250
; CHECK-NEXT: ret void
327
251
;
328
252
entry:
@@ -397,27 +321,12 @@ define void @test3(ptr %p, ptr noalias %s) {
397
321
; CHECK-NEXT: entry:
398
322
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
399
323
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
400
- ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 4
401
- ; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 8
402
- ; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 12
403
- ; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 16
404
- ; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 20
405
- ; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 24
406
- ; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 28
407
324
; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 23
408
- ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ARRAYIDX]], i32 0
409
- ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> [[TMP0]], ptr [[ARRAYIDX4]], i32 1
410
- ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x ptr> [[TMP1]], ptr [[ARRAYIDX11]], i32 2
411
- ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> [[TMP2]], ptr [[ARRAYIDX18]], i32 3
412
- ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x ptr> [[TMP3]], ptr [[ARRAYIDX25]], i32 4
413
- ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x ptr> [[TMP4]], ptr [[ARRAYIDX32]], i32 5
414
- ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x ptr> [[TMP5]], ptr [[ARRAYIDX39]], i32 6
415
- ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x ptr> [[TMP6]], ptr [[ARRAYIDX46]], i32 7
416
- ; CHECK-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP7]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison)
417
- ; CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, ptr [[ARRAYIDX48]], align 4
418
- ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[TMP9]], <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
419
- ; CHECK-NEXT: [[TMP11:%.*]] = fsub fast <8 x float> [[TMP10]], [[TMP8]]
420
- ; CHECK-NEXT: store <8 x float> [[TMP11]], ptr [[ARRAYIDX2]], align 4
325
+ ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
326
+ ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[ARRAYIDX48]], align 4
327
+ ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
328
+ ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]]
329
+ ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4
421
330
; CHECK-NEXT: ret void
422
331
;
423
332
entry:
0 commit comments