Skip to content

Commit 4b1aa41

Browse files
committed
[DAG] SimplifyMultipleUseDemandedBits - bypass ADD nodes if either operand is zero
The dpbusd_const.ll test change is due to use losing the expanded add reduction pattern as one of the elements is known to be zero (removing one of the adds from the reduction pyramid). I don't think its of concern. Noticed while working on #107423
1 parent 1164bd7 commit 4b1aa41

18 files changed

+334
-322
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -793,6 +793,16 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
793793
return Op.getOperand(1);
794794
break;
795795
}
796+
case ISD::ADD: {
797+
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
798+
if (RHSKnown.isZero())
799+
return Op.getOperand(0);
800+
801+
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
802+
if (LHSKnown.isZero())
803+
return Op.getOperand(1);
804+
break;
805+
}
796806
case ISD::SHL: {
797807
// If we are only demanding sign bits then we can use the shift source
798808
// directly.

llvm/test/CodeGen/AArch64/srem-lkk.ll

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
define i32 @fold_srem_positive_odd(i32 %x) {
55
; CHECK-LABEL: fold_srem_positive_odd:
66
; CHECK: // %bb.0:
7-
; CHECK-NEXT: mov w8, #37253
7+
; CHECK-NEXT: mov w8, #37253 // =0x9185
88
; CHECK-NEXT: movk w8, #44150, lsl #16
99
; CHECK-NEXT: smull x8, w0, w8
1010
; CHECK-NEXT: lsr x8, x8, #32
1111
; CHECK-NEXT: add w8, w8, w0
1212
; CHECK-NEXT: asr w9, w8, #6
1313
; CHECK-NEXT: add w8, w9, w8, lsr #31
14-
; CHECK-NEXT: mov w9, #95
14+
; CHECK-NEXT: mov w9, #95 // =0x5f
1515
; CHECK-NEXT: msub w0, w8, w9, w0
1616
; CHECK-NEXT: ret
1717
%1 = srem i32 %x, 95
@@ -22,13 +22,12 @@ define i32 @fold_srem_positive_odd(i32 %x) {
2222
define i32 @fold_srem_positive_even(i32 %x) {
2323
; CHECK-LABEL: fold_srem_positive_even:
2424
; CHECK: // %bb.0:
25-
; CHECK-NEXT: mov w8, #36849
25+
; CHECK-NEXT: mov w8, #36849 // =0x8ff1
26+
; CHECK-NEXT: mov w9, #1060 // =0x424
2627
; CHECK-NEXT: movk w8, #15827, lsl #16
2728
; CHECK-NEXT: smull x8, w0, w8
28-
; CHECK-NEXT: lsr x9, x8, #63
2929
; CHECK-NEXT: asr x8, x8, #40
30-
; CHECK-NEXT: add w8, w8, w9
31-
; CHECK-NEXT: mov w9, #1060
30+
; CHECK-NEXT: add w8, w8, w8, lsr #31
3231
; CHECK-NEXT: msub w0, w8, w9, w0
3332
; CHECK-NEXT: ret
3433
%1 = srem i32 %x, 1060
@@ -39,13 +38,12 @@ define i32 @fold_srem_positive_even(i32 %x) {
3938
define i32 @fold_srem_negative_odd(i32 %x) {
4039
; CHECK-LABEL: fold_srem_negative_odd:
4140
; CHECK: // %bb.0:
42-
; CHECK-NEXT: mov w8, #65445
41+
; CHECK-NEXT: mov w8, #65445 // =0xffa5
42+
; CHECK-NEXT: mov w9, #-723 // =0xfffffd2d
4343
; CHECK-NEXT: movk w8, #42330, lsl #16
4444
; CHECK-NEXT: smull x8, w0, w8
45-
; CHECK-NEXT: lsr x9, x8, #63
4645
; CHECK-NEXT: asr x8, x8, #40
47-
; CHECK-NEXT: add w8, w8, w9
48-
; CHECK-NEXT: mov w9, #-723
46+
; CHECK-NEXT: add w8, w8, w8, lsr #31
4947
; CHECK-NEXT: msub w0, w8, w9, w0
5048
; CHECK-NEXT: ret
5149
%1 = srem i32 %x, -723
@@ -56,13 +54,12 @@ define i32 @fold_srem_negative_odd(i32 %x) {
5654
define i32 @fold_srem_negative_even(i32 %x) {
5755
; CHECK-LABEL: fold_srem_negative_even:
5856
; CHECK: // %bb.0:
59-
; CHECK-NEXT: mov w8, #62439
57+
; CHECK-NEXT: mov w8, #62439 // =0xf3e7
58+
; CHECK-NEXT: mov w9, #-22981 // =0xffffa63b
6059
; CHECK-NEXT: movk w8, #64805, lsl #16
6160
; CHECK-NEXT: smull x8, w0, w8
62-
; CHECK-NEXT: lsr x9, x8, #63
6361
; CHECK-NEXT: asr x8, x8, #40
64-
; CHECK-NEXT: add w8, w8, w9
65-
; CHECK-NEXT: mov w9, #-22981
62+
; CHECK-NEXT: add w8, w8, w8, lsr #31
6663
; CHECK-NEXT: msub w0, w8, w9, w0
6764
; CHECK-NEXT: ret
6865
%1 = srem i32 %x, -22981
@@ -74,14 +71,14 @@ define i32 @fold_srem_negative_even(i32 %x) {
7471
define i32 @combine_srem_sdiv(i32 %x) {
7572
; CHECK-LABEL: combine_srem_sdiv:
7673
; CHECK: // %bb.0:
77-
; CHECK-NEXT: mov w8, #37253
74+
; CHECK-NEXT: mov w8, #37253 // =0x9185
7875
; CHECK-NEXT: movk w8, #44150, lsl #16
7976
; CHECK-NEXT: smull x8, w0, w8
8077
; CHECK-NEXT: lsr x8, x8, #32
8178
; CHECK-NEXT: add w8, w8, w0
8279
; CHECK-NEXT: asr w9, w8, #6
8380
; CHECK-NEXT: add w8, w9, w8, lsr #31
84-
; CHECK-NEXT: mov w9, #95
81+
; CHECK-NEXT: mov w9, #95 // =0x5f
8582
; CHECK-NEXT: msub w9, w8, w9, w0
8683
; CHECK-NEXT: add w0, w9, w8
8784
; CHECK-NEXT: ret
@@ -95,14 +92,14 @@ define i32 @combine_srem_sdiv(i32 %x) {
9592
define i64 @dont_fold_srem_i64(i64 %x) {
9693
; CHECK-LABEL: dont_fold_srem_i64:
9794
; CHECK: // %bb.0:
98-
; CHECK-NEXT: mov x8, #58849
95+
; CHECK-NEXT: mov x8, #58849 // =0xe5e1
9996
; CHECK-NEXT: movk x8, #48148, lsl #16
10097
; CHECK-NEXT: movk x8, #33436, lsl #32
10198
; CHECK-NEXT: movk x8, #21399, lsl #48
10299
; CHECK-NEXT: smulh x8, x0, x8
103100
; CHECK-NEXT: asr x9, x8, #5
104101
; CHECK-NEXT: add x8, x9, x8, lsr #63
105-
; CHECK-NEXT: mov w9, #98
102+
; CHECK-NEXT: mov w9, #98 // =0x62
106103
; CHECK-NEXT: msub x0, x8, x9, x0
107104
; CHECK-NEXT: ret
108105
%1 = srem i64 %x, 98

llvm/test/CodeGen/AArch64/srem-vector-lkk.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -263,16 +263,14 @@ define <2 x i32> @fold_srem_v2i32(<2 x i32> %x) {
263263
; CHECK-LABEL: fold_srem_v2i32:
264264
; CHECK: // %bb.0:
265265
; CHECK-NEXT: mov w8, #26215 // =0x6667
266-
; CHECK-NEXT: movi v3.2s, #10
266+
; CHECK-NEXT: movi v2.2s, #10
267267
; CHECK-NEXT: movk w8, #26214, lsl #16
268268
; CHECK-NEXT: dup v1.2s, w8
269269
; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
270-
; CHECK-NEXT: ushr v2.2d, v1.2d, #63
271270
; CHECK-NEXT: sshr v1.2d, v1.2d, #34
272-
; CHECK-NEXT: xtn v2.2s, v2.2d
273271
; CHECK-NEXT: xtn v1.2s, v1.2d
274-
; CHECK-NEXT: add v1.2s, v1.2s, v2.2s
275-
; CHECK-NEXT: mls v0.2s, v1.2s, v3.2s
272+
; CHECK-NEXT: usra v1.2s, v1.2s, #31
273+
; CHECK-NEXT: mls v0.2s, v1.2s, v2.2s
276274
; CHECK-NEXT: ret
277275
%1 = srem <2 x i32> %x, <i32 10, i32 10>
278276
ret <2 x i32> %1

llvm/test/CodeGen/AMDGPU/srem.ll

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ define amdgpu_kernel void @srem_i16_7(ptr addrspace(1) %out, ptr addrspace(1) %i
3939
; TAHITI-NEXT: s_waitcnt vmcnt(0)
4040
; TAHITI-NEXT: v_readfirstlane_b32 s0, v0
4141
; TAHITI-NEXT: s_mulk_i32 s0, 0x4925
42-
; TAHITI-NEXT: s_lshr_b32 s1, s0, 31
4342
; TAHITI-NEXT: s_ashr_i32 s0, s0, 17
43+
; TAHITI-NEXT: s_bfe_u32 s1, s0, 0x1000f
4444
; TAHITI-NEXT: s_add_i32 s0, s0, s1
4545
; TAHITI-NEXT: s_mul_i32 s0, s0, 7
4646
; TAHITI-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0
@@ -72,7 +72,7 @@ define amdgpu_kernel void @srem_i16_7(ptr addrspace(1) %out, ptr addrspace(1) %i
7272
; EG: ; %bb.0:
7373
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
7474
; EG-NEXT: TEX 0 @6
75-
; EG-NEXT: ALU 22, @9, KC0[CB0:0-32], KC1[]
75+
; EG-NEXT: ALU 23, @9, KC0[CB0:0-32], KC1[]
7676
; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
7777
; EG-NEXT: CF_END
7878
; EG-NEXT: PAD
@@ -85,10 +85,11 @@ define amdgpu_kernel void @srem_i16_7(ptr addrspace(1) %out, ptr addrspace(1) %i
8585
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
8686
; EG-NEXT: MULLO_INT * T0.Y, PV.W, literal.x,
8787
; EG-NEXT: 18725(2.623931e-41), 0(0.000000e+00)
88-
; EG-NEXT: ASHR T0.W, PS, literal.x,
89-
; EG-NEXT: LSHR * T1.W, PS, literal.y,
90-
; EG-NEXT: 17(2.382207e-44), 31(4.344025e-44)
91-
; EG-NEXT: ADD_INT * T0.W, PV.W, PS,
88+
; EG-NEXT: ASHR * T0.W, PS, literal.x,
89+
; EG-NEXT: 17(2.382207e-44), 0(0.000000e+00)
90+
; EG-NEXT: BFE_UINT * T1.W, PV.W, literal.x, 1,
91+
; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00)
92+
; EG-NEXT: ADD_INT * T0.W, T0.W, PV.W,
9293
; EG-NEXT: MULLO_INT * T0.Y, PV.W, literal.x,
9394
; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
9495
; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,

llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll

Lines changed: 14 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,45 +11,35 @@ define dso_local fastcc void @BuildVectorICE() unnamed_addr {
1111
; 32BIT-NEXT: stwu 1, -48(1)
1212
; 32BIT-NEXT: .cfi_def_cfa_offset 48
1313
; 32BIT-NEXT: lxvw4x 34, 0, 3
14-
; 32BIT-NEXT: li 3, .LCPI0_0@l
15-
; 32BIT-NEXT: lis 4, .LCPI0_0@ha
1614
; 32BIT-NEXT: li 5, 0
17-
; 32BIT-NEXT: xxlxor 36, 36, 36
18-
; 32BIT-NEXT: lxvw4x 35, 4, 3
1915
; 32BIT-NEXT: addi 3, 1, 16
2016
; 32BIT-NEXT: addi 4, 1, 32
21-
; 32BIT-NEXT: .p2align 4
17+
; 32BIT-NEXT: xxspltw 35, 34, 1
18+
; 32BIT-NEXT: .p2align 5
2219
; 32BIT-NEXT: .LBB0_1: # %while.body
2320
; 32BIT-NEXT: #
2421
; 32BIT-NEXT: stw 5, 16(1)
25-
; 32BIT-NEXT: lxvw4x 37, 0, 3
26-
; 32BIT-NEXT: vperm 5, 5, 4, 3
27-
; 32BIT-NEXT: vadduwm 5, 2, 5
28-
; 32BIT-NEXT: xxspltw 32, 37, 1
29-
; 32BIT-NEXT: vadduwm 5, 5, 0
30-
; 32BIT-NEXT: stxvw4x 37, 0, 4
22+
; 32BIT-NEXT: lxvw4x 36, 0, 3
23+
; 32BIT-NEXT: vadduwm 4, 2, 4
24+
; 32BIT-NEXT: vadduwm 4, 4, 3
25+
; 32BIT-NEXT: stxvw4x 36, 0, 4
3126
; 32BIT-NEXT: lwz 5, 32(1)
3227
; 32BIT-NEXT: b .LBB0_1
3328
;
3429
; 64BIT-LABEL: BuildVectorICE:
3530
; 64BIT: # %bb.0: # %entry
3631
; 64BIT-NEXT: lxvw4x 34, 0, 3
3732
; 64BIT-NEXT: li 3, 0
38-
; 64BIT-NEXT: rldimi 3, 3, 32, 0
39-
; 64BIT-NEXT: mtfprd 0, 3
40-
; 64BIT-NEXT: li 3, 0
41-
; 64BIT-NEXT: .p2align 4
33+
; 64BIT-NEXT: xxspltw 35, 34, 1
34+
; 64BIT-NEXT: .p2align 5
4235
; 64BIT-NEXT: .LBB0_1: # %while.body
4336
; 64BIT-NEXT: #
44-
; 64BIT-NEXT: li 4, 0
45-
; 64BIT-NEXT: rldimi 4, 3, 32, 0
46-
; 64BIT-NEXT: mtfprd 1, 4
47-
; 64BIT-NEXT: xxmrghd 35, 1, 0
48-
; 64BIT-NEXT: vadduwm 3, 2, 3
49-
; 64BIT-NEXT: xxspltw 36, 35, 1
50-
; 64BIT-NEXT: vadduwm 3, 3, 4
51-
; 64BIT-NEXT: xxsldwi 1, 35, 35, 3
52-
; 64BIT-NEXT: mffprwz 3, 1
37+
; 64BIT-NEXT: sldi 3, 3, 32
38+
; 64BIT-NEXT: mtvsrd 36, 3
39+
; 64BIT-NEXT: vadduwm 4, 2, 4
40+
; 64BIT-NEXT: vadduwm 4, 4, 3
41+
; 64BIT-NEXT: xxsldwi 0, 36, 36, 3
42+
; 64BIT-NEXT: mffprwz 3, 0
5343
; 64BIT-NEXT: b .LBB0_1
5444
entry:
5545
br label %while.body

llvm/test/CodeGen/RISCV/div-by-constant.ll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -310,8 +310,8 @@ define i32 @sdiv_constant_srai(i32 %a) nounwind {
310310
; RV64-NEXT: lui a1, 419430
311311
; RV64-NEXT: addiw a1, a1, 1639
312312
; RV64-NEXT: mul a0, a0, a1
313-
; RV64-NEXT: srli a1, a0, 63
314313
; RV64-NEXT: srai a0, a0, 33
314+
; RV64-NEXT: srliw a1, a0, 31
315315
; RV64-NEXT: add a0, a0, a1
316316
; RV64-NEXT: ret
317317
%1 = sdiv i32 %a, 5
@@ -755,8 +755,9 @@ define i16 @sdiv16_constant_srai(i16 %a) nounwind {
755755
; RV32IM-NEXT: lui a1, 6
756756
; RV32IM-NEXT: addi a1, a1, 1639
757757
; RV32IM-NEXT: mul a0, a0, a1
758-
; RV32IM-NEXT: srli a1, a0, 31
759758
; RV32IM-NEXT: srai a0, a0, 17
759+
; RV32IM-NEXT: slli a1, a0, 16
760+
; RV32IM-NEXT: srli a1, a1, 31
760761
; RV32IM-NEXT: add a0, a0, a1
761762
; RV32IM-NEXT: ret
762763
;
@@ -766,8 +767,9 @@ define i16 @sdiv16_constant_srai(i16 %a) nounwind {
766767
; RV32IMZB-NEXT: lui a1, 6
767768
; RV32IMZB-NEXT: addi a1, a1, 1639
768769
; RV32IMZB-NEXT: mul a0, a0, a1
769-
; RV32IMZB-NEXT: srli a1, a0, 31
770770
; RV32IMZB-NEXT: srai a0, a0, 17
771+
; RV32IMZB-NEXT: slli a1, a0, 16
772+
; RV32IMZB-NEXT: srli a1, a1, 31
771773
; RV32IMZB-NEXT: add a0, a0, a1
772774
; RV32IMZB-NEXT: ret
773775
;

llvm/test/CodeGen/RISCV/div.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -640,8 +640,8 @@ define i32 @sdiv_constant(i32 %a) nounwind {
640640
; RV64IM-NEXT: lui a1, 419430
641641
; RV64IM-NEXT: addiw a1, a1, 1639
642642
; RV64IM-NEXT: mul a0, a0, a1
643-
; RV64IM-NEXT: srli a1, a0, 63
644643
; RV64IM-NEXT: srai a0, a0, 33
644+
; RV64IM-NEXT: srliw a1, a0, 31
645645
; RV64IM-NEXT: add a0, a0, a1
646646
; RV64IM-NEXT: ret
647647
%1 = sdiv i32 %a, 5
@@ -1169,8 +1169,9 @@ define i16 @sdiv16_constant(i16 %a) nounwind {
11691169
; RV32IM-NEXT: lui a1, 6
11701170
; RV32IM-NEXT: addi a1, a1, 1639
11711171
; RV32IM-NEXT: mul a0, a0, a1
1172-
; RV32IM-NEXT: srli a1, a0, 31
11731172
; RV32IM-NEXT: srai a0, a0, 17
1173+
; RV32IM-NEXT: slli a1, a0, 16
1174+
; RV32IM-NEXT: srli a1, a1, 31
11741175
; RV32IM-NEXT: add a0, a0, a1
11751176
; RV32IM-NEXT: ret
11761177
;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1004,8 +1004,8 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
10041004
; RV64M-NEXT: lui a1, 322639
10051005
; RV64M-NEXT: addiw a1, a1, -945
10061006
; RV64M-NEXT: mul a0, a0, a1
1007-
; RV64M-NEXT: srli a1, a0, 63
10081007
; RV64M-NEXT: srai a0, a0, 34
1008+
; RV64M-NEXT: srliw a1, a0, 31
10091009
; RV64M-NEXT: add a0, a0, a1
10101010
; RV64M-NEXT: ret
10111011
%bo = sdiv <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14>

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13487,7 +13487,6 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
1348713487
; RV32ZVE32F-NEXT: vid.v v8
1348813488
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 4
1348913489
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
13490-
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
1349113490
; RV32ZVE32F-NEXT: lw a3, 0(a1)
1349213491
; RV32ZVE32F-NEXT: sw a3, 252(sp) # 4-byte Folded Spill
1349313492
; RV32ZVE32F-NEXT: lw a1, 4(a1)
@@ -13587,10 +13586,10 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
1358713586
; RV32ZVE32F-NEXT: lw s9, 4(a1)
1358813587
; RV32ZVE32F-NEXT: lw s10, 0(a2)
1358913588
; RV32ZVE32F-NEXT: lw s11, 4(a2)
13590-
; RV32ZVE32F-NEXT: lw t5, 0(a3)
13591-
; RV32ZVE32F-NEXT: lw t6, 4(a3)
13592-
; RV32ZVE32F-NEXT: lw s2, 0(a4)
13593-
; RV32ZVE32F-NEXT: lw s3, 4(a4)
13589+
; RV32ZVE32F-NEXT: lw s4, 0(a3)
13590+
; RV32ZVE32F-NEXT: lw s5, 4(a3)
13591+
; RV32ZVE32F-NEXT: lw s6, 0(a4)
13592+
; RV32ZVE32F-NEXT: lw s7, 4(a4)
1359413593
; RV32ZVE32F-NEXT: lw a2, 336(sp)
1359513594
; RV32ZVE32F-NEXT: lw a4, 340(sp)
1359613595
; RV32ZVE32F-NEXT: lw a5, 344(sp)
@@ -13607,8 +13606,8 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
1360713606
; RV32ZVE32F-NEXT: lw a6, 356(sp)
1360813607
; RV32ZVE32F-NEXT: lw t3, 360(sp)
1360913608
; RV32ZVE32F-NEXT: lw t4, 364(sp)
13610-
; RV32ZVE32F-NEXT: lw s4, 0(a5)
13611-
; RV32ZVE32F-NEXT: sw s4, 116(sp) # 4-byte Folded Spill
13609+
; RV32ZVE32F-NEXT: lw t5, 0(a5)
13610+
; RV32ZVE32F-NEXT: sw t5, 116(sp) # 4-byte Folded Spill
1361213611
; RV32ZVE32F-NEXT: lw a5, 4(a5)
1361313612
; RV32ZVE32F-NEXT: sw a5, 112(sp) # 4-byte Folded Spill
1361413613
; RV32ZVE32F-NEXT: lw a5, 0(a6)
@@ -13626,10 +13625,10 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
1362613625
; RV32ZVE32F-NEXT: lw a6, 372(sp)
1362713626
; RV32ZVE32F-NEXT: lw t3, 376(sp)
1362813627
; RV32ZVE32F-NEXT: lw t4, 380(sp)
13629-
; RV32ZVE32F-NEXT: lw s4, 0(a5)
13630-
; RV32ZVE32F-NEXT: lw s5, 4(a5)
13631-
; RV32ZVE32F-NEXT: lw s6, 0(a6)
13632-
; RV32ZVE32F-NEXT: lw s7, 4(a6)
13628+
; RV32ZVE32F-NEXT: lw t5, 0(a5)
13629+
; RV32ZVE32F-NEXT: lw t6, 4(a5)
13630+
; RV32ZVE32F-NEXT: lw s2, 0(a6)
13631+
; RV32ZVE32F-NEXT: lw s3, 4(a6)
1363313632
; RV32ZVE32F-NEXT: lw a5, 0(t3)
1363413633
; RV32ZVE32F-NEXT: lw a6, 4(t3)
1363513634
; RV32ZVE32F-NEXT: lw t3, 0(t4)
@@ -13642,10 +13641,10 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
1364213641
; RV32ZVE32F-NEXT: sw t0, 164(a0)
1364313642
; RV32ZVE32F-NEXT: sw t1, 168(a0)
1364413643
; RV32ZVE32F-NEXT: sw t2, 172(a0)
13645-
; RV32ZVE32F-NEXT: sw t5, 144(a0)
13646-
; RV32ZVE32F-NEXT: sw t6, 148(a0)
13647-
; RV32ZVE32F-NEXT: sw s2, 152(a0)
13648-
; RV32ZVE32F-NEXT: sw s3, 156(a0)
13644+
; RV32ZVE32F-NEXT: sw s4, 144(a0)
13645+
; RV32ZVE32F-NEXT: sw s5, 148(a0)
13646+
; RV32ZVE32F-NEXT: sw s6, 152(a0)
13647+
; RV32ZVE32F-NEXT: sw s7, 156(a0)
1364913648
; RV32ZVE32F-NEXT: sw s8, 128(a0)
1365013649
; RV32ZVE32F-NEXT: sw s9, 132(a0)
1365113650
; RV32ZVE32F-NEXT: sw s10, 136(a0)
@@ -13686,10 +13685,10 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
1368613685
; RV32ZVE32F-NEXT: sw a6, 244(a0)
1368713686
; RV32ZVE32F-NEXT: sw t3, 248(a0)
1368813687
; RV32ZVE32F-NEXT: sw t4, 252(a0)
13689-
; RV32ZVE32F-NEXT: sw s4, 224(a0)
13690-
; RV32ZVE32F-NEXT: sw s5, 228(a0)
13691-
; RV32ZVE32F-NEXT: sw s6, 232(a0)
13692-
; RV32ZVE32F-NEXT: sw s7, 236(a0)
13688+
; RV32ZVE32F-NEXT: sw t5, 224(a0)
13689+
; RV32ZVE32F-NEXT: sw t6, 228(a0)
13690+
; RV32ZVE32F-NEXT: sw s2, 232(a0)
13691+
; RV32ZVE32F-NEXT: sw s3, 236(a0)
1369313692
; RV32ZVE32F-NEXT: sw ra, 208(a0)
1369413693
; RV32ZVE32F-NEXT: lw a1, 108(sp) # 4-byte Folded Reload
1369513694
; RV32ZVE32F-NEXT: sw a1, 212(a0)

0 commit comments

Comments
 (0)