Skip to content

Commit 4e0bd3f

Browse files
authored
[MachineLICM] Hoist copies of constant physical register (#93285)
Previously, we just check if the source is a virtual register and this prevents some potential hoists. We can see some improvements in AArch64/RISCV tests.
1 parent 5162027 commit 4e0bd3f

12 files changed

+218
-165
lines changed

llvm/lib/CodeGen/MachineLICM.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,8 +1269,9 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI,
12691269
Register DefReg = MI.getOperand(0).getReg();
12701270
if (DefReg.isVirtual() &&
12711271
all_of(MI.uses(),
1272-
[](const MachineOperand &UseOp) {
1273-
return !UseOp.isReg() || UseOp.getReg().isVirtual();
1272+
[this](const MachineOperand &UseOp) {
1273+
return !UseOp.isReg() || UseOp.getReg().isVirtual() ||
1274+
MRI->isConstantPhysReg(UseOp.getReg());
12741275
}) &&
12751276
IsLoopInvariantInst(MI, CurLoop) &&
12761277
any_of(MRI->use_nodbg_instructions(DefReg),

llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,15 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
5555
define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
5656
; CHECK-LABEL: atomicrmw_uinc_wrap_i64:
5757
; CHECK: // %bb.0:
58-
; CHECK-NEXT: mov x8, x0
5958
; CHECK-NEXT: .LBB3_1: // %atomicrmw.start
6059
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
61-
; CHECK-NEXT: ldaxr x0, [x8]
62-
; CHECK-NEXT: cmp x0, x1
63-
; CHECK-NEXT: csinc x9, xzr, x0, hs
64-
; CHECK-NEXT: stlxr w10, x9, [x8]
60+
; CHECK-NEXT: ldaxr x8, [x0]
61+
; CHECK-NEXT: cmp x8, x1
62+
; CHECK-NEXT: csinc x9, xzr, x8, hs
63+
; CHECK-NEXT: stlxr w10, x9, [x0]
6564
; CHECK-NEXT: cbnz w10, .LBB3_1
6665
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
66+
; CHECK-NEXT: mov x0, x8
6767
; CHECK-NEXT: ret
6868
%result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst
6969
ret i64 %result

llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -8,57 +8,57 @@ declare void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8>, <vscale x
88
define fastcc i8 @allocno_reload_assign() {
99
; CHECK-LABEL: allocno_reload_assign:
1010
; CHECK: // %bb.0:
11-
; CHECK-NEXT: mov z0.b, #0 // =0x0
12-
; CHECK-NEXT: mov z16.d, #0 // =0x0
11+
; CHECK-NEXT: fmov d0, xzr
1312
; CHECK-NEXT: ptrue p0.d
14-
; CHECK-NEXT: ptrue p1.b
13+
; CHECK-NEXT: mov z16.d, #0 // =0x0
14+
; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, #0
15+
; CHECK-NEXT: uzp1 p0.s, p0.s, p0.s
16+
; CHECK-NEXT: uzp1 p0.h, p0.h, p0.h
17+
; CHECK-NEXT: uzp1 p0.b, p0.b, p0.b
18+
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
19+
; CHECK-NEXT: ptrue p0.b
20+
; CHECK-NEXT: fmov w8, s0
21+
; CHECK-NEXT: mov z0.b, #0 // =0x0
22+
; CHECK-NEXT: sbfx x8, x8, #0, #1
1523
; CHECK-NEXT: uunpklo z1.h, z0.b
1624
; CHECK-NEXT: uunpkhi z0.h, z0.b
25+
; CHECK-NEXT: whilelo p1.b, xzr, x8
26+
; CHECK-NEXT: not p0.b, p0/z, p1.b
1727
; CHECK-NEXT: uunpklo z2.s, z1.h
1828
; CHECK-NEXT: uunpkhi z3.s, z1.h
1929
; CHECK-NEXT: uunpklo z5.s, z0.h
2030
; CHECK-NEXT: uunpkhi z7.s, z0.h
31+
; CHECK-NEXT: punpklo p1.h, p0.b
32+
; CHECK-NEXT: punpkhi p0.h, p0.b
33+
; CHECK-NEXT: punpklo p2.h, p1.b
2134
; CHECK-NEXT: uunpklo z0.d, z2.s
2235
; CHECK-NEXT: uunpkhi z1.d, z2.s
36+
; CHECK-NEXT: punpkhi p3.h, p1.b
2337
; CHECK-NEXT: uunpklo z2.d, z3.s
2438
; CHECK-NEXT: uunpkhi z3.d, z3.s
39+
; CHECK-NEXT: punpklo p5.h, p0.b
2540
; CHECK-NEXT: uunpklo z4.d, z5.s
2641
; CHECK-NEXT: uunpkhi z5.d, z5.s
42+
; CHECK-NEXT: punpkhi p7.h, p0.b
2743
; CHECK-NEXT: uunpklo z6.d, z7.s
2844
; CHECK-NEXT: uunpkhi z7.d, z7.s
29-
; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1
30-
; CHECK-NEXT: fmov d17, xzr
31-
; CHECK-NEXT: cmpeq p2.d, p0/z, z17.d, #0
32-
; CHECK-NEXT: uzp1 p2.s, p2.s, p0.s
33-
; CHECK-NEXT: uzp1 p2.h, p2.h, p0.h
34-
; CHECK-NEXT: uzp1 p2.b, p2.b, p0.b
35-
; CHECK-NEXT: mov z17.b, p2/z, #1 // =0x1
36-
; CHECK-NEXT: fmov w8, s17
37-
; CHECK-NEXT: sbfx x8, x8, #0, #1
38-
; CHECK-NEXT: whilelo p2.b, xzr, x8
39-
; CHECK-NEXT: not p2.b, p1/z, p2.b
40-
; CHECK-NEXT: punpklo p3.h, p2.b
41-
; CHECK-NEXT: punpkhi p2.h, p2.b
42-
; CHECK-NEXT: punpklo p4.h, p3.b
43-
; CHECK-NEXT: punpkhi p3.h, p3.b
44-
; CHECK-NEXT: punpklo p5.h, p4.b
45-
; CHECK-NEXT: punpkhi p4.h, p4.b
46-
; CHECK-NEXT: st1b { z0.d }, p5, [z16.d]
47-
; CHECK-NEXT: st1b { z1.d }, p4, [z16.d]
48-
; CHECK-NEXT: punpklo p4.h, p3.b
45+
; CHECK-NEXT: punpklo p0.h, p2.b
46+
; CHECK-NEXT: punpkhi p1.h, p2.b
47+
; CHECK-NEXT: punpklo p2.h, p3.b
4948
; CHECK-NEXT: punpkhi p3.h, p3.b
50-
; CHECK-NEXT: st1b { z2.d }, p4, [z16.d]
49+
; CHECK-NEXT: punpklo p4.h, p5.b
50+
; CHECK-NEXT: punpkhi p5.h, p5.b
51+
; CHECK-NEXT: punpklo p6.h, p7.b
52+
; CHECK-NEXT: punpkhi p7.h, p7.b
53+
; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1
54+
; CHECK-NEXT: st1b { z0.d }, p0, [z16.d]
55+
; CHECK-NEXT: st1b { z1.d }, p1, [z16.d]
56+
; CHECK-NEXT: st1b { z2.d }, p2, [z16.d]
5157
; CHECK-NEXT: st1b { z3.d }, p3, [z16.d]
52-
; CHECK-NEXT: punpklo p3.h, p2.b
53-
; CHECK-NEXT: punpkhi p2.h, p2.b
54-
; CHECK-NEXT: punpklo p4.h, p3.b
55-
; CHECK-NEXT: punpkhi p3.h, p3.b
5658
; CHECK-NEXT: st1b { z4.d }, p4, [z16.d]
57-
; CHECK-NEXT: st1b { z5.d }, p3, [z16.d]
58-
; CHECK-NEXT: punpklo p3.h, p2.b
59-
; CHECK-NEXT: punpkhi p2.h, p2.b
60-
; CHECK-NEXT: st1b { z6.d }, p3, [z16.d]
61-
; CHECK-NEXT: st1b { z7.d }, p2, [z16.d]
59+
; CHECK-NEXT: st1b { z5.d }, p5, [z16.d]
60+
; CHECK-NEXT: st1b { z6.d }, p6, [z16.d]
61+
; CHECK-NEXT: st1b { z7.d }, p7, [z16.d]
6262
; CHECK-NEXT: b .LBB0_1
6363
br label %1
6464

llvm/test/CodeGen/AArch64/machine-sink-cache-invalidation.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,20 @@ target triple = "arm64-apple-macosx13.5.0"
99
define i32 @nsis_BZ2_bzDecompress(ptr %pos.i, i1 %cmp661.not3117.i, i1 %exitcond.not.i) {
1010
; CHECK-LABEL: nsis_BZ2_bzDecompress:
1111
; CHECK: // %bb.0: // %entry
12+
; CHECK-NEXT: mov x8, xzr
1213
; CHECK-NEXT: b .LBB0_2
1314
; CHECK-NEXT: .LBB0_1: // %while.end671.i
1415
; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1
15-
; CHECK-NEXT: strb w8, [x0]
16+
; CHECK-NEXT: strb w9, [x0]
1617
; CHECK-NEXT: tbnz w2, #0, .LBB0_4
1718
; CHECK-NEXT: .LBB0_2: // %for.body653.i
1819
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
19-
; CHECK-NEXT: ldrb w8, [x0]
20+
; CHECK-NEXT: ldrb w9, [x0]
2021
; CHECK-NEXT: tbnz w1, #0, .LBB0_1
2122
; CHECK-NEXT: // %bb.3: // %while.body663.i
2223
; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1
23-
; CHECK-NEXT: mov x9, xzr
24-
; CHECK-NEXT: ldrb w9, [x9]
25-
; CHECK-NEXT: strb wzr, [x0, x9]
24+
; CHECK-NEXT: ldrb w10, [x8]
25+
; CHECK-NEXT: strb wzr, [x0, x10]
2626
; CHECK-NEXT: b .LBB0_1
2727
; CHECK-NEXT: .LBB0_4: // %for.end677.i
2828
; CHECK-NEXT: mov w0, wzr

llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll

Lines changed: 76 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -8,36 +8,39 @@
88
define dso_local void @run_test() local_unnamed_addr uwtable {
99
; CHECK-LABEL: run_test:
1010
; CHECK: // %bb.0: // %entry
11-
; CHECK-NEXT: sub sp, sp, #192
12-
; CHECK-NEXT: .cfi_def_cfa_offset 192
11+
; CHECK-NEXT: sub sp, sp, #208
12+
; CHECK-NEXT: .cfi_def_cfa_offset 208
1313
; CHECK-NEXT: stp d15, d14, [sp, #96] // 16-byte Folded Spill
1414
; CHECK-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill
1515
; CHECK-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill
1616
; CHECK-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill
17-
; CHECK-NEXT: stp x22, x21, [sp, #160] // 16-byte Folded Spill
18-
; CHECK-NEXT: stp x20, x19, [sp, #176] // 16-byte Folded Spill
17+
; CHECK-NEXT: str x23, [sp, #160] // 8-byte Folded Spill
18+
; CHECK-NEXT: stp x22, x21, [sp, #176] // 16-byte Folded Spill
19+
; CHECK-NEXT: stp x20, x19, [sp, #192] // 16-byte Folded Spill
1920
; CHECK-NEXT: .cfi_offset w19, -8
2021
; CHECK-NEXT: .cfi_offset w20, -16
2122
; CHECK-NEXT: .cfi_offset w21, -24
2223
; CHECK-NEXT: .cfi_offset w22, -32
23-
; CHECK-NEXT: .cfi_offset b8, -40
24-
; CHECK-NEXT: .cfi_offset b9, -48
25-
; CHECK-NEXT: .cfi_offset b10, -56
26-
; CHECK-NEXT: .cfi_offset b11, -64
27-
; CHECK-NEXT: .cfi_offset b12, -72
28-
; CHECK-NEXT: .cfi_offset b13, -80
29-
; CHECK-NEXT: .cfi_offset b14, -88
30-
; CHECK-NEXT: .cfi_offset b15, -96
24+
; CHECK-NEXT: .cfi_offset w23, -48
25+
; CHECK-NEXT: .cfi_offset b8, -56
26+
; CHECK-NEXT: .cfi_offset b9, -64
27+
; CHECK-NEXT: .cfi_offset b10, -72
28+
; CHECK-NEXT: .cfi_offset b11, -80
29+
; CHECK-NEXT: .cfi_offset b12, -88
30+
; CHECK-NEXT: .cfi_offset b13, -96
31+
; CHECK-NEXT: .cfi_offset b14, -104
32+
; CHECK-NEXT: .cfi_offset b15, -112
3133
; CHECK-NEXT: movi v2.2d, #0000000000000000
3234
; CHECK-NEXT: // implicit-def: $q1
3335
; CHECK-NEXT: mov x8, xzr
34-
; CHECK-NEXT: mov x9, xzr
35-
; CHECK-NEXT: adrp x10, B+48
36-
; CHECK-NEXT: add x10, x10, :lo12:B+48
37-
; CHECK-NEXT: adrp x11, A
38-
; CHECK-NEXT: add x11, x11, :lo12:A
36+
; CHECK-NEXT: adrp x9, B+48
37+
; CHECK-NEXT: add x9, x9, :lo12:B+48
38+
; CHECK-NEXT: adrp x10, A
39+
; CHECK-NEXT: add x10, x10, :lo12:A
40+
; CHECK-NEXT: mov x11, xzr
3941
; CHECK-NEXT: // kill: killed $q1
4042
; CHECK-NEXT: // implicit-def: $q1
43+
; CHECK-NEXT: mov x12, xzr
4144
; CHECK-NEXT: // implicit-def: $q0
4245
; CHECK-NEXT: // implicit-def: $q3
4346
; CHECK-NEXT: // implicit-def: $q4
@@ -69,103 +72,102 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
6972
; CHECK-NEXT: // kill: killed $q1
7073
; CHECK-NEXT: .LBB0_1: // %for.cond1.preheader
7174
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
72-
; CHECK-NEXT: str q14, [sp, #32] // 16-byte Folded Spill
73-
; CHECK-NEXT: ldr q14, [x8]
74-
; CHECK-NEXT: mov x12, xzr
75-
; CHECK-NEXT: ldr x14, [x12]
7675
; CHECK-NEXT: stp q29, q15, [sp] // 32-byte Folded Spill
77-
; CHECK-NEXT: add x19, x11, x8
78-
; CHECK-NEXT: fmov x15, d14
79-
; CHECK-NEXT: mov x16, v14.d[1]
80-
; CHECK-NEXT: ldr q15, [x12]
81-
; CHECK-NEXT: ldr q14, [x10], #64
76+
; CHECK-NEXT: ldr q15, [x8]
77+
; CHECK-NEXT: ldr x15, [x8]
78+
; CHECK-NEXT: str q14, [sp, #32] // 16-byte Folded Spill
79+
; CHECK-NEXT: add x20, x10, x11
8280
; CHECK-NEXT: mov v8.16b, v28.16b
83-
; CHECK-NEXT: fmov x13, d15
84-
; CHECK-NEXT: mov x18, v15.d[1]
81+
; CHECK-NEXT: fmov x2, d15
82+
; CHECK-NEXT: mov x17, v15.d[1]
83+
; CHECK-NEXT: ldr q14, [x8]
8584
; CHECK-NEXT: mov v28.16b, v24.16b
86-
; CHECK-NEXT: mul x17, x15, x14
87-
; CHECK-NEXT: mov x12, v14.d[1]
88-
; CHECK-NEXT: fmov x4, d14
8985
; CHECK-NEXT: mov v24.16b, v20.16b
9086
; CHECK-NEXT: mov v20.16b, v17.16b
87+
; CHECK-NEXT: fmov x13, d14
88+
; CHECK-NEXT: mov x16, v14.d[1]
9189
; CHECK-NEXT: mov v17.16b, v5.16b
92-
; CHECK-NEXT: mul x1, x16, x14
90+
; CHECK-NEXT: mul x3, x2, x15
91+
; CHECK-NEXT: ldr q14, [x9], #64
9392
; CHECK-NEXT: ldr q5, [sp, #64] // 16-byte Folded Reload
94-
; CHECK-NEXT: ldr x5, [x8]
95-
; CHECK-NEXT: ldr x19, [x19, #128]
93+
; CHECK-NEXT: ldr x6, [x8]
94+
; CHECK-NEXT: ldr x20, [x20, #128]
95+
; CHECK-NEXT: mul x1, x17, x15
96+
; CHECK-NEXT: mov x14, v14.d[1]
97+
; CHECK-NEXT: fmov x5, d14
9698
; CHECK-NEXT: mov v29.16b, v21.16b
9799
; CHECK-NEXT: mov v21.16b, v0.16b
98-
; CHECK-NEXT: mul x0, x13, x14
99100
; CHECK-NEXT: mov v25.16b, v6.16b
101+
; CHECK-NEXT: mul x18, x13, x15
100102
; CHECK-NEXT: mov v6.16b, v2.16b
101-
; CHECK-NEXT: fmov d15, x17
102103
; CHECK-NEXT: mov v26.16b, v22.16b
104+
; CHECK-NEXT: fmov d15, x3
103105
; CHECK-NEXT: mov v22.16b, v18.16b
104-
; CHECK-NEXT: mul x2, x18, x14
105106
; CHECK-NEXT: mov v18.16b, v7.16b
107+
; CHECK-NEXT: mul x0, x16, x15
106108
; CHECK-NEXT: mov v7.16b, v3.16b
107109
; CHECK-NEXT: mov v16.16b, v4.16b
108-
; CHECK-NEXT: add x8, x8, #8
109-
; CHECK-NEXT: add x9, x9, #1
110+
; CHECK-NEXT: add x11, x11, #8
111+
; CHECK-NEXT: add x12, x12, #1
110112
; CHECK-NEXT: mov v15.d[1], x1
111-
; CHECK-NEXT: mul x3, x12, x14
112-
; CHECK-NEXT: cmp x8, #64
113-
; CHECK-NEXT: fmov d14, x0
114-
; CHECK-NEXT: mul x14, x4, x14
113+
; CHECK-NEXT: mul x4, x14, x15
114+
; CHECK-NEXT: cmp x11, #64
115+
; CHECK-NEXT: fmov d14, x18
116+
; CHECK-NEXT: mul x15, x5, x15
115117
; CHECK-NEXT: add v5.2d, v5.2d, v15.2d
116-
; CHECK-NEXT: mul x20, x15, x5
117-
; CHECK-NEXT: mov v14.d[1], x2
118-
; CHECK-NEXT: mul x15, x15, x19
119-
; CHECK-NEXT: fmov d0, x14
118+
; CHECK-NEXT: mul x21, x2, x6
119+
; CHECK-NEXT: mov v14.d[1], x0
120+
; CHECK-NEXT: mul x2, x2, x20
121+
; CHECK-NEXT: fmov d0, x15
120122
; CHECK-NEXT: str q5, [sp, #64] // 16-byte Folded Spill
121123
; CHECK-NEXT: ldr q5, [sp, #48] // 16-byte Folded Reload
122-
; CHECK-NEXT: mul x21, x13, x19
124+
; CHECK-NEXT: mul x22, x13, x20
123125
; CHECK-NEXT: add v5.2d, v5.2d, v14.2d
124-
; CHECK-NEXT: fmov d3, x20
125-
; CHECK-NEXT: mul x7, x16, x5
126-
; CHECK-NEXT: mov v0.d[1], x3
127-
; CHECK-NEXT: fmov d1, x15
128-
; CHECK-NEXT: mul x16, x16, x19
126+
; CHECK-NEXT: fmov d3, x21
127+
; CHECK-NEXT: mul x19, x17, x6
128+
; CHECK-NEXT: mov v0.d[1], x4
129+
; CHECK-NEXT: fmov d1, x2
130+
; CHECK-NEXT: mul x17, x17, x20
129131
; CHECK-NEXT: str q5, [sp, #48] // 16-byte Folded Spill
130132
; CHECK-NEXT: add v5.2d, v13.2d, v14.2d
131-
; CHECK-NEXT: fmov d2, x21
133+
; CHECK-NEXT: fmov d2, x22
132134
; CHECK-NEXT: ldr q13, [sp, #80] // 16-byte Folded Reload
133-
; CHECK-NEXT: mul x6, x18, x5
135+
; CHECK-NEXT: mul x7, x16, x6
134136
; CHECK-NEXT: ldp q15, q14, [sp, #16] // 32-byte Folded Reload
135-
; CHECK-NEXT: mov v3.d[1], x7
137+
; CHECK-NEXT: mov v3.d[1], x19
136138
; CHECK-NEXT: add v13.2d, v13.2d, v0.2d
137-
; CHECK-NEXT: mul x18, x18, x19
138-
; CHECK-NEXT: mov v1.d[1], x16
139-
; CHECK-NEXT: mul x22, x4, x19
139+
; CHECK-NEXT: mul x16, x16, x20
140+
; CHECK-NEXT: mov v1.d[1], x17
141+
; CHECK-NEXT: mul x23, x5, x20
140142
; CHECK-NEXT: str q13, [sp, #80] // 16-byte Folded Spill
141143
; CHECK-NEXT: mov v13.16b, v5.16b
142144
; CHECK-NEXT: mov v5.16b, v17.16b
143145
; CHECK-NEXT: mov v17.16b, v20.16b
144146
; CHECK-NEXT: mov v20.16b, v24.16b
145-
; CHECK-NEXT: mul x13, x13, x5
147+
; CHECK-NEXT: mul x13, x13, x6
146148
; CHECK-NEXT: mov v24.16b, v28.16b
147149
; CHECK-NEXT: add v11.2d, v11.2d, v3.2d
148-
; CHECK-NEXT: mov v2.d[1], x18
150+
; CHECK-NEXT: mov v2.d[1], x16
149151
; CHECK-NEXT: add v15.2d, v15.2d, v1.2d
150152
; CHECK-NEXT: add v27.2d, v27.2d, v3.2d
151-
; CHECK-NEXT: mul x17, x12, x19
153+
; CHECK-NEXT: mul x18, x14, x20
152154
; CHECK-NEXT: add v23.2d, v23.2d, v3.2d
153155
; CHECK-NEXT: add v19.2d, v19.2d, v3.2d
154-
; CHECK-NEXT: fmov d4, x22
156+
; CHECK-NEXT: fmov d4, x23
155157
; CHECK-NEXT: add v10.2d, v10.2d, v3.2d
156-
; CHECK-NEXT: mul x14, x4, x5
158+
; CHECK-NEXT: mul x15, x5, x6
157159
; CHECK-NEXT: fmov d0, x13
158160
; CHECK-NEXT: add v14.2d, v14.2d, v2.2d
159161
; CHECK-NEXT: add v2.2d, v6.2d, v3.2d
160-
; CHECK-NEXT: mul x12, x12, x5
162+
; CHECK-NEXT: mul x14, x14, x6
161163
; CHECK-NEXT: mov v3.16b, v7.16b
162164
; CHECK-NEXT: mov v7.16b, v18.16b
163-
; CHECK-NEXT: mov v4.d[1], x17
165+
; CHECK-NEXT: mov v4.d[1], x18
164166
; CHECK-NEXT: mov v18.16b, v22.16b
165-
; CHECK-NEXT: mov v0.d[1], x6
166-
; CHECK-NEXT: fmov d1, x14
167+
; CHECK-NEXT: mov v0.d[1], x7
168+
; CHECK-NEXT: fmov d1, x15
167169
; CHECK-NEXT: add v28.2d, v8.2d, v4.2d
168-
; CHECK-NEXT: mov v1.d[1], x12
170+
; CHECK-NEXT: mov v1.d[1], x14
169171
; CHECK-NEXT: add v31.2d, v31.2d, v0.2d
170172
; CHECK-NEXT: add v30.2d, v30.2d, v0.2d
171173
; CHECK-NEXT: add v12.2d, v12.2d, v0.2d
@@ -192,11 +194,12 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
192194
; CHECK-NEXT: adrp x8, C
193195
; CHECK-NEXT: add x8, x8, :lo12:C
194196
; CHECK-NEXT: stp q11, q30, [x8, #80]
195-
; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload
197+
; CHECK-NEXT: ldp x20, x19, [sp, #192] // 16-byte Folded Reload
196198
; CHECK-NEXT: str q1, [x8]
197199
; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
200+
; CHECK-NEXT: ldr x23, [sp, #160] // 8-byte Folded Reload
198201
; CHECK-NEXT: stp q15, q14, [x8, #144]
199-
; CHECK-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload
202+
; CHECK-NEXT: ldp x22, x21, [sp, #176] // 16-byte Folded Reload
200203
; CHECK-NEXT: stp q1, q13, [x8, #16]
201204
; CHECK-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload
202205
; CHECK-NEXT: stp q28, q12, [x8, #176]
@@ -216,12 +219,13 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
216219
; CHECK-NEXT: stp q5, q4, [x8, #432]
217220
; CHECK-NEXT: stp q2, q3, [x8, #464]
218221
; CHECK-NEXT: str q0, [x8, #496]
219-
; CHECK-NEXT: add sp, sp, #192
222+
; CHECK-NEXT: add sp, sp, #208
220223
; CHECK-NEXT: .cfi_def_cfa_offset 0
221224
; CHECK-NEXT: .cfi_restore w19
222225
; CHECK-NEXT: .cfi_restore w20
223226
; CHECK-NEXT: .cfi_restore w21
224227
; CHECK-NEXT: .cfi_restore w22
228+
; CHECK-NEXT: .cfi_restore w23
225229
; CHECK-NEXT: .cfi_restore b8
226230
; CHECK-NEXT: .cfi_restore b9
227231
; CHECK-NEXT: .cfi_restore b10

0 commit comments

Comments
 (0)