Skip to content

Commit 1a8f0b9

Browse files
davemgreentstellar
authored andcommitted
[ARM] Clean up some tests, removing dead instructions. NFC
1 parent e2e2057 commit 1a8f0b9

23 files changed

+39
-735
lines changed

llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll

Lines changed: 10 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -48,22 +48,13 @@ entry:
4848
vector.ph: ; preds = %entry
4949
%n.rnd.up = add i32 %N, 3
5050
%n.vec = and i32 %n.rnd.up, -4
51-
%trip.count.minus.1 = add i32 %N, -1
52-
%broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
53-
%broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer
5451
br label %vector.body
5552

5653
vector.body: ; preds = %vector.body, %vector.ph
5754
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5855
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %add, %vector.body ]
59-
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
60-
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
61-
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
6256
%tmp = getelementptr inbounds i32, i32* %a, i32 %index
63-
64-
; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat12
6557
%tmp1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
66-
6758
%tmp2 = bitcast i32* %tmp to <4 x i32>*
6859
%wide.masked.load.a = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %tmp1, <4 x i32> undef)
6960
%tmp3 = getelementptr inbounds i32, i32* %b, i32 %index
@@ -147,22 +138,13 @@ entry:
147138
vector.ph: ; preds = %entry
148139
%n.rnd.up = add i32 %N, 3
149140
%n.vec = and i32 %n.rnd.up, -4
150-
%trip.count.minus.1 = add i32 %N, -1
151-
%broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
152-
%broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer
153141
br label %vector.body
154142

155143
vector.body: ; preds = %vector.body, %vector.ph
156144
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
157145
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %add, %vector.body ]
158-
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
159-
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
160-
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
161146
%tmp = getelementptr inbounds i32, i32* %a, i32 %index
162-
163-
; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat12
164147
%tmp1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
165-
166148
%tmp2 = bitcast i32* %tmp to <4 x i32>*
167149
%wide.masked.load.a = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %tmp1, <4 x i32> undef)
168150
%tmp3 = getelementptr inbounds i32, i32* %b, i32 %index
@@ -205,13 +187,12 @@ define dso_local i32 @and_mul_reduce_add(i32* noalias nocapture readonly %a, i32
205187
; CHECK-NEXT: cmp.w r12, #0
206188
; CHECK-NEXT: beq .LBB2_4
207189
; CHECK-NEXT: @ %bb.1: @ %vector.ph
208-
; CHECK-NEXT: add.w r4, r12, #3
209-
; CHECK-NEXT: vmov.i32 q1, #0x0
210-
; CHECK-NEXT: bic r4, r4, #3
211-
; CHECK-NEXT: sub.w lr, r4, #4
190+
; CHECK-NEXT: add.w lr, r12, #3
212191
; CHECK-NEXT: movs r4, #1
192+
; CHECK-NEXT: bic lr, lr, #3
193+
; CHECK-NEXT: vmov.i32 q1, #0x0
194+
; CHECK-NEXT: sub.w lr, lr, #4
213195
; CHECK-NEXT: add.w lr, r4, lr, lsr #2
214-
; CHECK-NEXT: movs r4, #0
215196
; CHECK-NEXT: dls lr, lr
216197
; CHECK-NEXT: .LBB2_2: @ %vector.body
217198
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
@@ -222,12 +203,11 @@ define dso_local i32 @and_mul_reduce_add(i32* noalias nocapture readonly %a, i32
222203
; CHECK-NEXT: vldrwt.u32 q2, [r0], #16
223204
; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
224205
; CHECK-NEXT: vsub.i32 q1, q2, q1
225-
; CHECK-NEXT: adds r4, #4
206+
; CHECK-NEXT: sub.w r12, r12, #4
226207
; CHECK-NEXT: vpsttt
227208
; CHECK-NEXT: vcmpt.i32 eq, q1, zr
228209
; CHECK-NEXT: vldrwt.u32 q1, [r3], #16
229210
; CHECK-NEXT: vldrwt.u32 q2, [r2], #16
230-
; CHECK-NEXT: sub.w r12, r12, #4
231211
; CHECK-NEXT: vmul.i32 q1, q2, q1
232212
; CHECK-NEXT: vadd.i32 q1, q1, q0
233213
; CHECK-NEXT: le lr, .LBB2_2
@@ -249,22 +229,13 @@ entry:
249229
vector.ph: ; preds = %entry
250230
%n.rnd.up = add i32 %N, 3
251231
%n.vec = and i32 %n.rnd.up, -4
252-
%trip.count.minus.1 = add i32 %N, -1
253-
%broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
254-
%broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer
255232
br label %vector.body
256233

257234
vector.body: ; preds = %vector.body, %vector.ph
258235
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
259236
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %add, %vector.body ]
260-
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
261-
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
262-
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
263237
%tmp = getelementptr inbounds i32, i32* %a, i32 %index
264-
265-
; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat12
266238
%tmp1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
267-
268239
%tmp2 = bitcast i32* %tmp to <4 x i32>*
269240
%wide.masked.load.a = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %tmp1, <4 x i32> undef)
270241
%tmp3 = getelementptr inbounds i32, i32* %b, i32 %index
@@ -304,13 +275,12 @@ define dso_local i32 @or_mul_reduce_add(i32* noalias nocapture readonly %a, i32*
304275
; CHECK-NEXT: cmp.w r12, #0
305276
; CHECK-NEXT: beq .LBB3_4
306277
; CHECK-NEXT: @ %bb.1: @ %vector.ph
307-
; CHECK-NEXT: add.w r4, r12, #3
308-
; CHECK-NEXT: vmov.i32 q1, #0x0
309-
; CHECK-NEXT: bic r4, r4, #3
310-
; CHECK-NEXT: sub.w lr, r4, #4
278+
; CHECK-NEXT: add.w lr, r12, #3
311279
; CHECK-NEXT: movs r4, #1
280+
; CHECK-NEXT: bic lr, lr, #3
281+
; CHECK-NEXT: vmov.i32 q1, #0x0
282+
; CHECK-NEXT: sub.w lr, lr, #4
312283
; CHECK-NEXT: add.w lr, r4, lr, lsr #2
313-
; CHECK-NEXT: movs r4, #0
314284
; CHECK-NEXT: dls lr, lr
315285
; CHECK-NEXT: .LBB3_2: @ %vector.body
316286
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
@@ -326,9 +296,8 @@ define dso_local i32 @or_mul_reduce_add(i32* noalias nocapture readonly %a, i32*
326296
; CHECK-NEXT: vcmpt.i32 ne, q1, zr
327297
; CHECK-NEXT: vldrwe.u32 q1, [r3], #16
328298
; CHECK-NEXT: vldrwe.u32 q2, [r2], #16
329-
; CHECK-NEXT: adds r4, #4
330-
; CHECK-NEXT: vmul.i32 q1, q2, q1
331299
; CHECK-NEXT: sub.w r12, r12, #4
300+
; CHECK-NEXT: vmul.i32 q1, q2, q1
332301
; CHECK-NEXT: vadd.i32 q1, q1, q0
333302
; CHECK-NEXT: le lr, .LBB3_2
334303
; CHECK-NEXT: @ %bb.3: @ %middle.block
@@ -348,22 +317,13 @@ entry:
348317
vector.ph: ; preds = %entry
349318
%n.rnd.up = add i32 %N, 3
350319
%n.vec = and i32 %n.rnd.up, -4
351-
%trip.count.minus.1 = add i32 %N, -1
352-
%broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
353-
%broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer
354320
br label %vector.body
355321

356322
vector.body: ; preds = %vector.body, %vector.ph
357323
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
358324
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %add, %vector.body ]
359-
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
360-
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
361-
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
362325
%tmp = getelementptr inbounds i32, i32* %a, i32 %index
363-
364-
; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat12
365326
%tmp1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
366-
367327
%tmp2 = bitcast i32* %tmp to <4 x i32>*
368328
%wide.masked.load.a = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %tmp1, <4 x i32> undef)
369329
%tmp3 = getelementptr inbounds i32, i32* %b, i32 %index
@@ -402,11 +362,9 @@ define dso_local void @continue_on_zero(i32* noalias nocapture %arg, i32* noalia
402362
; CHECK-NEXT: it eq
403363
; CHECK-NEXT: popeq {r7, pc}
404364
; CHECK-NEXT: .LBB4_1: @ %bb3
405-
; CHECK-NEXT: movs r3, #0
406365
; CHECK-NEXT: dlstp.32 lr, r2
407366
; CHECK-NEXT: .LBB4_2: @ %bb9
408367
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
409-
; CHECK-NEXT: adds r3, #4
410368
; CHECK-NEXT: vldrw.u32 q0, [r1], #16
411369
; CHECK-NEXT: vpt.i32 ne, q0, zr
412370
; CHECK-NEXT: vldrwt.u32 q1, [r0]
@@ -423,21 +381,12 @@ bb:
423381
bb3: ; preds = %bb
424382
%tmp4 = add i32 %arg2, 3
425383
%tmp5 = and i32 %tmp4, -4
426-
%tmp6 = add i32 %arg2, -1
427-
%tmp7 = insertelement <4 x i32> undef, i32 %tmp6, i32 0
428-
%tmp8 = shufflevector <4 x i32> %tmp7, <4 x i32> undef, <4 x i32> zeroinitializer
429384
br label %bb9
430385

431386
bb9: ; preds = %bb9, %bb3
432387
%tmp10 = phi i32 [ 0, %bb3 ], [ %tmp25, %bb9 ]
433-
%tmp11 = insertelement <4 x i32> undef, i32 %tmp10, i32 0
434-
%tmp12 = shufflevector <4 x i32> %tmp11, <4 x i32> undef, <4 x i32> zeroinitializer
435-
%tmp13 = add <4 x i32> %tmp12, <i32 0, i32 1, i32 2, i32 3>
436388
%tmp14 = getelementptr inbounds i32, i32* %arg1, i32 %tmp10
437-
438-
; %tmp15 = icmp ule <4 x i32> %tmp13, %tmp8
439389
%tmp15 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %tmp10, i32 %arg2)
440-
441390
%tmp16 = bitcast i32* %tmp14 to <4 x i32>*
442391
%tmp17 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp16, i32 4, <4 x i1> %tmp15, <4 x i32> undef)
443392
%tmp18 = icmp ne <4 x i32> %tmp17, zeroinitializer
@@ -464,15 +413,13 @@ define dso_local arm_aapcs_vfpcc void @range_test(i32* noalias nocapture %arg, i
464413
; CHECK-NEXT: it eq
465414
; CHECK-NEXT: popeq {r7, pc}
466415
; CHECK-NEXT: .LBB5_1: @ %bb4
467-
; CHECK-NEXT: mov.w r12, #0
468416
; CHECK-NEXT: dlstp.32 lr, r3
469417
; CHECK-NEXT: .LBB5_2: @ %bb12
470418
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
471419
; CHECK-NEXT: vldrw.u32 q0, [r0]
472420
; CHECK-NEXT: vptt.i32 ne, q0, zr
473421
; CHECK-NEXT: vcmpt.s32 le, q0, r2
474422
; CHECK-NEXT: vldrwt.u32 q1, [r1], #16
475-
; CHECK-NEXT: add.w r12, r12, #4
476423
; CHECK-NEXT: vmul.i32 q0, q1, q0
477424
; CHECK-NEXT: vpst
478425
; CHECK-NEXT: vstrwt.32 q0, [r0], #16
@@ -486,23 +433,14 @@ bb:
486433
bb4: ; preds = %bb
487434
%tmp5 = add i32 %arg3, 3
488435
%tmp6 = and i32 %tmp5, -4
489-
%tmp7 = add i32 %arg3, -1
490-
%tmp8 = insertelement <4 x i32> undef, i32 %tmp7, i32 0
491-
%tmp9 = shufflevector <4 x i32> %tmp8, <4 x i32> undef, <4 x i32> zeroinitializer
492436
%tmp10 = insertelement <4 x i32> undef, i32 %arg2, i32 0
493437
%tmp11 = shufflevector <4 x i32> %tmp10, <4 x i32> undef, <4 x i32> zeroinitializer
494438
br label %bb12
495439

496440
bb12: ; preds = %bb12, %bb4
497441
%tmp13 = phi i32 [ 0, %bb4 ], [ %tmp30, %bb12 ]
498-
%tmp14 = insertelement <4 x i32> undef, i32 %tmp13, i32 0
499-
%tmp15 = shufflevector <4 x i32> %tmp14, <4 x i32> undef, <4 x i32> zeroinitializer
500-
%tmp16 = add <4 x i32> %tmp15, <i32 0, i32 1, i32 2, i32 3>
501442
%tmp17 = getelementptr inbounds i32, i32* %arg, i32 %tmp13
502-
503-
; %tmp18 = icmp ule <4 x i32> %tmp16, %tmp9
504443
%tmp18= call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %tmp13, i32 %arg3)
505-
506444
%tmp19 = bitcast i32* %tmp17 to <4 x i32>*
507445
%tmp20 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp19, i32 4, <4 x i1> %tmp18, <4 x i32> undef)
508446
%tmp21 = icmp ne <4 x i32> %tmp20, zeroinitializer

llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,9 @@ define dso_local arm_aapcs_vfpcc void @sext_i8(i16* noalias nocapture %a, i8* no
99
; CHECK-NEXT: it eq
1010
; CHECK-NEXT: popeq {r7, pc}
1111
; CHECK-NEXT: .LBB0_1: @ %vector.ph
12-
; CHECK-NEXT: movs r3, #0
1312
; CHECK-NEXT: dlstp.16 lr, r2
1413
; CHECK-NEXT: .LBB0_2: @ %vector.body
1514
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
16-
; CHECK-NEXT: adds r3, #8
1715
; CHECK-NEXT: vldrb.s16 q0, [r1], #8
1816
; CHECK-NEXT: vldrh.u16 q1, [r0]
1917
; CHECK-NEXT: vadd.i16 q0, q1, q0
@@ -28,21 +26,12 @@ entry:
2826
vector.ph: ; preds = %entry
2927
%n.rnd.up = add i32 %N, 7
3028
%n.vec = and i32 %n.rnd.up, -8
31-
%trip.count.minus.1 = add i32 %N, -1
32-
%broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0
33-
%broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer
3429
br label %vector.body
3530

3631
vector.body: ; preds = %vector.body, %vector.ph
3732
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
38-
%broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0
39-
%broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
40-
%induction = or <8 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4133
%0 = getelementptr inbounds i8, i8* %b, i32 %index
42-
43-
; %1 = icmp ule <8 x i32> %induction, %broadcast.splat11
4434
%1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N)
45-
4635
%2 = bitcast i8* %0 to <8 x i8>*
4736
%wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %2, i32 1, <8 x i1> %1, <8 x i8> undef)
4837
%3 = sext <8 x i8> %wide.masked.load to <8 x i16>
@@ -69,11 +58,9 @@ define dso_local arm_aapcs_vfpcc void @zext_i8(i16* noalias nocapture %a, i8* no
6958
; CHECK-NEXT: it eq
7059
; CHECK-NEXT: popeq {r7, pc}
7160
; CHECK-NEXT: .LBB1_1: @ %vector.ph
72-
; CHECK-NEXT: movs r3, #0
7361
; CHECK-NEXT: dlstp.16 lr, r2
7462
; CHECK-NEXT: .LBB1_2: @ %vector.body
7563
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
76-
; CHECK-NEXT: adds r3, #8
7764
; CHECK-NEXT: vldrb.u16 q0, [r1], #8
7865
; CHECK-NEXT: vldrh.u16 q1, [r0]
7966
; CHECK-NEXT: vadd.i16 q0, q1, q0
@@ -88,21 +75,12 @@ entry:
8875
vector.ph: ; preds = %entry
8976
%n.rnd.up = add i32 %N, 7
9077
%n.vec = and i32 %n.rnd.up, -8
91-
%trip.count.minus.1 = add i32 %N, -1
92-
%broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0
93-
%broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer
9478
br label %vector.body
9579

9680
vector.body: ; preds = %vector.body, %vector.ph
9781
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
98-
%broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0
99-
%broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
100-
%induction = or <8 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10182
%0 = getelementptr inbounds i8, i8* %b, i32 %index
102-
103-
; %1 = icmp ule <8 x i32> %induction, %broadcast.splat11
10483
%1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N)
105-
10684
%2 = bitcast i8* %0 to <8 x i8>*
10785
%wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %2, i32 1, <8 x i1> %1, <8 x i8> undef)
10886
%3 = zext <8 x i8> %wide.masked.load to <8 x i16>
@@ -129,11 +107,9 @@ define dso_local arm_aapcs_vfpcc void @sext_i16(i32* noalias nocapture %a, i16*
129107
; CHECK-NEXT: it eq
130108
; CHECK-NEXT: popeq {r7, pc}
131109
; CHECK-NEXT: .LBB2_1: @ %vector.ph
132-
; CHECK-NEXT: movs r3, #0
133110
; CHECK-NEXT: dlstp.32 lr, r2
134111
; CHECK-NEXT: .LBB2_2: @ %vector.body
135112
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
136-
; CHECK-NEXT: adds r3, #4
137113
; CHECK-NEXT: vldrh.s32 q0, [r1], #8
138114
; CHECK-NEXT: vldrw.u32 q1, [r0]
139115
; CHECK-NEXT: vadd.i32 q0, q1, q0
@@ -148,21 +124,12 @@ entry:
148124
vector.ph: ; preds = %entry
149125
%n.rnd.up = add i32 %N, 3
150126
%n.vec = and i32 %n.rnd.up, -4
151-
%trip.count.minus.1 = add i32 %N, -1
152-
%broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
153-
%broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer
154127
br label %vector.body
155128

156129
vector.body: ; preds = %vector.body, %vector.ph
157130
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
158-
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
159-
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
160-
%induction = or <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
161131
%0 = getelementptr inbounds i16, i16* %b, i32 %index
162-
163-
; %1 = icmp ule <4 x i32> %induction, %broadcast.splat9
164132
%1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
165-
166133
%2 = bitcast i16* %0 to <4 x i16>*
167134
%wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef)
168135
%3 = sext <4 x i16> %wide.masked.load to <4 x i32>
@@ -189,11 +156,9 @@ define dso_local arm_aapcs_vfpcc void @zext_i16(i32* noalias nocapture %a, i16*
189156
; CHECK-NEXT: it eq
190157
; CHECK-NEXT: popeq {r7, pc}
191158
; CHECK-NEXT: .LBB3_1: @ %vector.ph
192-
; CHECK-NEXT: movs r3, #0
193159
; CHECK-NEXT: dlstp.32 lr, r2
194160
; CHECK-NEXT: .LBB3_2: @ %vector.body
195161
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
196-
; CHECK-NEXT: adds r3, #4
197162
; CHECK-NEXT: vldrh.u32 q0, [r1], #8
198163
; CHECK-NEXT: vldrw.u32 q1, [r0]
199164
; CHECK-NEXT: vadd.i32 q0, q1, q0
@@ -208,21 +173,12 @@ entry:
208173
vector.ph: ; preds = %entry
209174
%n.rnd.up = add i32 %N, 3
210175
%n.vec = and i32 %n.rnd.up, -4
211-
%trip.count.minus.1 = add i32 %N, -1
212-
%broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
213-
%broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer
214176
br label %vector.body
215177

216178
vector.body: ; preds = %vector.body, %vector.ph
217179
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
218-
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
219-
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
220-
%induction = or <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
221180
%0 = getelementptr inbounds i16, i16* %b, i32 %index
222-
223-
; %1 = icmp ule <4 x i32> %induction, %broadcast.splat9
224181
%1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
225-
226182
%2 = bitcast i16* %0 to <4 x i16>*
227183
%wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef)
228184
%3 = zext <4 x i16> %wide.masked.load to <4 x i32>

0 commit comments

Comments
 (0)