Skip to content

Commit 461ff31

Browse files
committed
[AArch64] Fix resource length computation for STP. (llvm#81749)
On some uArchs, `STP [s|d], [s|d]` first combines the 2 input registers in a single register using a vector execution unit. IIUC AArch64StorePairSuppress tries to prevent forming STPs in case the critical resource are the vector units, in order to prevent adding more pressure on those units. The implementation however simply computes the new critical resource length by adding resource for another STP. If load/store units are the critical resource, this means we increase that length by one, and incorrectly prevent forming the STP. This patch adjusts the resource computation by also removing 2 STRs, as introducing a STP will remove 2 single stores. This should more accurately reflect the resource usage after introducing an STP, and does not prevent forming STPs if load/store units are the critical resources; in those cases, STP can actually help to reduce resource usage. PR: llvm#81749 (cherry-picked from 2f083b3)
1 parent a7b2321 commit 461ff31

File tree

9 files changed

+66
-107
lines changed

9 files changed

+66
-107
lines changed

llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,15 +80,23 @@ bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB)
8080
MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB);
8181
unsigned ResLength = BBTrace.getResourceLength();
8282

83-
// Get the machine model's scheduling class for STPQi.
83+
// Get the machine model's scheduling class for STPDi and STRDui.
8484
// Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
8585
unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass();
86-
const MCSchedClassDesc *SCDesc =
86+
const MCSchedClassDesc *PairSCDesc =
8787
SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);
8888

89-
// If a subtarget does not define resources for STPQi, bail here.
90-
if (SCDesc->isValid() && !SCDesc->isVariant()) {
91-
unsigned ResLenWithSTP = BBTrace.getResourceLength(std::nullopt, SCDesc);
89+
unsigned SCIdx2 = TII->get(AArch64::STRDui).getSchedClass();
90+
const MCSchedClassDesc *SingleSCDesc =
91+
SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx2);
92+
93+
// If a subtarget does not define resources for STPDi, bail here.
94+
if (PairSCDesc->isValid() && !PairSCDesc->isVariant() &&
95+
SingleSCDesc->isValid() && !SingleSCDesc->isVariant()) {
96+
// Compute the new critical resource length after replacing 2 separate
97+
// STRDui with one STPDi.
98+
unsigned ResLenWithSTP = BBTrace.getResourceLength(
99+
std::nullopt, PairSCDesc, {SingleSCDesc, SingleSCDesc});
92100
if (ResLenWithSTP > ResLength) {
93101
LLVM_DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber()
94102
<< " resources " << ResLength << " -> " << ResLenWithSTP

llvm/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,7 @@ entry:
8282
; CHECK: fmadd [[y]]
8383
; CHECK: fmadd [[x]]
8484
; CHECK-BALFP: stp [[x]], [[y]]
85-
; CHECK-A53-DAG: str [[x]]
86-
; CHECK-A53-DAG: str [[y]]
85+
; CHECK-A53-DAG: stp [[x]], [[y]]
8786

8887
define void @f2(ptr nocapture readonly %p, ptr nocapture %q) #0 {
8988
entry:
@@ -177,8 +176,7 @@ declare void @g(...) #1
177176
; CHECK: fmadd [[y]]
178177
; CHECK: fmadd [[x]]
179178
; CHECK-BALFP: stp [[x]], [[y]]
180-
; CHECK-A53-DAG: str [[x]]
181-
; CHECK-A53-DAG: str [[y]]
179+
; CHECK-A53-DAG: stp [[x]], [[y]]
182180

183181
define void @f4(ptr nocapture readonly %p, ptr nocapture %q) #0 {
184182
entry:

llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -385,12 +385,9 @@ define void @caller_in_block() {
385385
; CHECK-NEXT: bl return_in_block
386386
; CHECK-NEXT: adrp x8, in_block_store
387387
; CHECK-NEXT: add x8, x8, :lo12:in_block_store
388-
; CHECK-NEXT: str d0, [x8]
389-
; CHECK-NEXT: str d1, [x8, #8]
390-
; CHECK-NEXT: str d2, [x8, #16]
391-
; CHECK-NEXT: str d3, [x8, #24]
392-
; CHECK-NEXT: str d4, [x8, #32]
393-
; CHECK-NEXT: str d5, [x8, #40]
388+
; CHECK-NEXT: stp d0, d1, [x8]
389+
; CHECK-NEXT: stp d2, d3, [x8, #16]
390+
; CHECK-NEXT: stp d4, d5, [x8, #32]
394391
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
395392
; CHECK-NEXT: ret
396393
%1 = call %T_IN_BLOCK @return_in_block()
@@ -403,12 +400,9 @@ define void @callee_in_block(%T_IN_BLOCK %a) {
403400
; CHECK: // %bb.0:
404401
; CHECK-NEXT: adrp x8, in_block_store
405402
; CHECK-NEXT: add x8, x8, :lo12:in_block_store
406-
; CHECK-NEXT: str d5, [x8, #40]
407-
; CHECK-NEXT: str d4, [x8, #32]
408-
; CHECK-NEXT: str d3, [x8, #24]
409-
; CHECK-NEXT: str d2, [x8, #16]
410-
; CHECK-NEXT: str d1, [x8, #8]
411-
; CHECK-NEXT: str d0, [x8]
403+
; CHECK-NEXT: stp d4, d5, [x8, #32]
404+
; CHECK-NEXT: stp d2, d3, [x8, #16]
405+
; CHECK-NEXT: stp d0, d1, [x8]
412406
; CHECK-NEXT: ret
413407
store %T_IN_BLOCK %a, ptr @in_block_store
414408
ret void

llvm/test/CodeGen/AArch64/arm64-stur.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,8 @@ declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
6565

6666
; CHECK-LABEL: unaligned:
6767
; CHECK-NOT: str q0
68-
; CHECK: str d[[REG:[0-9]+]], [x0]
69-
; CHECK: ext.16b v[[REG2:[0-9]+]], v[[REG]], v[[REG]], #8
70-
; CHECK: str d[[REG2]], [x0, #8]
68+
; CHECK: ext.16b v[[REG2:[0-9]+]], v[[REG:[0-9]+]], v[[REG]], #8
69+
; CHECK: stp d[[REG]], d[[REG2]], [x0]
7170
define void @unaligned(ptr %p, <4 x i32> %v) nounwind {
7271
store <4 x i32> %v, ptr %p, align 4
7372
ret void

llvm/test/CodeGen/AArch64/arm64-windows-calls.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,7 @@ define void @call_copy_pod() {
152152
; CHECK-NEXT: add x19, x19, :lo12:Pod
153153
; CHECK-NEXT: mov x0, x19
154154
; CHECK-NEXT: bl copy_pod
155-
; CHECK-NEXT: str d0, [x19]
156-
; CHECK-NEXT: str d1, [x19, #8]
155+
; CHECK-NEXT: stp d0, d1, [x19]
157156
; CHECK-NEXT: .seh_startepilogue
158157
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
159158
; CHECK-NEXT: .seh_save_reg x30, 8

llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,7 @@ body: |
6565
; CHECK-LABEL: name: 3-ldrspre-ldrsui-merge
6666
; CHECK: liveins: $s0, $s1, $x1
6767
; CHECK: early-clobber $x1, renamable $s0, renamable $s1 = LDPSpre renamable $x1, 3 :: (load (s32))
68-
; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32))
69-
; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32))
68+
; CHECK: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32))
7069
; CHECK: RET undef $lr
7170
early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load (s32))
7271
renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32))
@@ -91,8 +90,7 @@ body: |
9190
; CHECK-LABEL: name: 4-ldrqdre-ldrdui-merge
9291
; CHECK: liveins: $d0, $d1, $x1
9392
; CHECK: early-clobber $x1, renamable $d0, renamable $d1 = LDPDpre renamable $x1, 16 :: (load (s64))
94-
; CHECK: STRDui renamable $d0, renamable $x1, 0 :: (store (s64))
95-
; CHECK: STRDui renamable $d1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s64))
93+
; CHECK: STPDi renamable $d0, renamable $d1, renamable $x1, 0 :: (store (s64))
9694
; CHECK: RET undef $lr
9795
early-clobber renamable $x1, renamable $d0 = LDRDpre killed renamable $x1, 128 :: (load (s64))
9896
renamable $d1 = LDRDui renamable $x1, 1 :: (load (s64))
@@ -275,8 +273,7 @@ body: |
275273
; CHECK: renamable $x0 = LDRXui renamable $x1, 1 :: (load (s64))
276274
; CHECK: STRXui renamable $x0, renamable $x0, 1 :: (store (s64))
277275
; CHECK: renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32))
278-
; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32))
279-
; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32))
276+
; CHECK: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32))
280277
; CHECK: RET undef $lr
281278
early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load (s32))
282279
renamable $x0 = LDRXui renamable $x1, 1 :: (load (s64))
@@ -346,8 +343,7 @@ body: |
346343
; CHECK: liveins: $s0, $s1, $x1
347344
; CHECK: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load (s32))
348345
; CHECK: renamable $s1 = LDRSui renamable $x1, 2 :: (load (s32))
349-
; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32))
350-
; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32))
346+
; CHECK: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32))
351347
; CHECK: RET undef $lr
352348
early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load (s32))
353349
renamable $s1 = LDRSui renamable $x1, 2 :: (load (s32))
@@ -549,8 +545,7 @@ body: |
549545
; CHECK-LABEL: name: 19-ldrspre-ldrsui-max-merge
550546
; CHECK: liveins: $s0, $s1, $x1
551547
; CHECK: early-clobber $x1, renamable $s0, renamable $s1 = LDPSpre renamable $x1, 63 :: (load (s32))
552-
; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32))
553-
; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32))
548+
; CHECK: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32))
554549
; CHECK: RET undef $lr
555550
early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 252 :: (load (s32))
556551
renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32))
@@ -576,8 +571,7 @@ body: |
576571
; CHECK: liveins: $s0, $s1, $x1
577572
; CHECK: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 251, implicit $w1 :: (load (s32))
578573
; CHECK: renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32))
579-
; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32))
580-
; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32))
574+
; CHECK: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32))
581575
; CHECK: RET undef $lr
582576
early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 251 :: (load (s32))
583577
renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32))

llvm/test/CodeGen/AArch64/merge-store.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,7 @@ define void @merge_vec_extract_stores(<4 x float> %v1, <2 x float>* %ptr) {
4545
; SPLITTING-LABEL: merge_vec_extract_stores:
4646
; SPLITTING: // %bb.0:
4747
; SPLITTING-NEXT: ext v1.16b, v0.16b, v0.16b, #8
48-
; SPLITTING-NEXT: str d0, [x0, #24]
49-
; SPLITTING-NEXT: str d1, [x0, #32]
48+
; SPLITTING-NEXT: stp d0, d1, [x0, #24]
5049
; SPLITTING-NEXT: ret
5150
;
5251
; MISALIGNED-LABEL: merge_vec_extract_stores:

llvm/test/CodeGen/AArch64/no-sve-no-neon.ll

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,34 +14,26 @@ define <16 x float> @foo(<16 x i64> %a) {
1414
; CHECK-NEXT: ucvtf s2, x11
1515
; CHECK-NEXT: ucvtf s3, x12
1616
; CHECK-NEXT: ldp x11, x10, [sp]
17-
; CHECK-NEXT: str s0, [x8, #60]
17+
; CHECK-NEXT: stp s1, s0, [x8, #56]
1818
; CHECK-NEXT: ucvtf s0, x13
19-
; CHECK-NEXT: str s1, [x8, #56]
2019
; CHECK-NEXT: ucvtf s4, x9
21-
; CHECK-NEXT: str s2, [x8, #52]
20+
; CHECK-NEXT: stp s3, s2, [x8, #48]
2221
; CHECK-NEXT: ucvtf s2, x11
23-
; CHECK-NEXT: str s3, [x8, #48]
24-
; CHECK-NEXT: ucvtf s1, x10
2522
; CHECK-NEXT: ucvtf s3, x7
26-
; CHECK-NEXT: str s0, [x8, #40]
27-
; CHECK-NEXT: ucvtf s0, x5
28-
; CHECK-NEXT: str s4, [x8, #44]
23+
; CHECK-NEXT: ucvtf s1, x10
24+
; CHECK-NEXT: stp s0, s4, [x8, #40]
2925
; CHECK-NEXT: ucvtf s4, x6
30-
; CHECK-NEXT: str s2, [x8, #32]
31-
; CHECK-NEXT: ucvtf s2, x3
32-
; CHECK-NEXT: str s1, [x8, #36]
26+
; CHECK-NEXT: ucvtf s0, x5
27+
; CHECK-NEXT: stp s2, s1, [x8, #32]
3328
; CHECK-NEXT: ucvtf s1, x4
34-
; CHECK-NEXT: str s3, [x8, #28]
29+
; CHECK-NEXT: ucvtf s2, x3
30+
; CHECK-NEXT: stp s4, s3, [x8, #24]
3531
; CHECK-NEXT: ucvtf s3, x2
36-
; CHECK-NEXT: str s4, [x8, #24]
3732
; CHECK-NEXT: ucvtf s4, x1
38-
; CHECK-NEXT: str s0, [x8, #20]
33+
; CHECK-NEXT: stp s1, s0, [x8, #16]
3934
; CHECK-NEXT: ucvtf s0, x0
40-
; CHECK-NEXT: str s1, [x8, #16]
41-
; CHECK-NEXT: str s2, [x8, #12]
42-
; CHECK-NEXT: str s3, [x8, #8]
43-
; CHECK-NEXT: str s4, [x8, #4]
44-
; CHECK-NEXT: str s0, [x8]
35+
; CHECK-NEXT: stp s3, s2, [x8, #8]
36+
; CHECK-NEXT: stp s0, s4, [x8]
4537
; CHECK-NEXT: ret
4638
%conv1 = uitofp <16 x i64> %a to <16 x float>
4739
ret <16 x float> %conv1

llvm/test/CodeGen/AArch64/storepairsuppress.ll

Lines changed: 24 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
3535
; CHECK-NEXT: fmadd s0, s5, s0, s1
3636
; CHECK-NEXT: fadd s1, s4, s2
3737
; CHECK-NEXT: fadd s5, s0, s3
38-
; CHECK-NEXT: str s1, [x8]
39-
; CHECK-NEXT: str s5, [x8, #4]
38+
; CHECK-NEXT: stp s1, s5, [x8]
4039
; CHECK-NEXT: fsub s2, s2, s4
4140
; CHECK-NEXT: fsub s0, s3, s0
42-
; CHECK-NEXT: str s2, [x8, #8]
43-
; CHECK-NEXT: str s0, [x8, #12]
41+
; CHECK-NEXT: stp s2, s0, [x8, #8]
4442
; CHECK-NEXT: ldr x9, [x0, #8]
4543
; CHECK-NEXT: ldp s3, s4, [x9]
4644
; CHECK-NEXT: ldp s6, s7, [x8, #16]
@@ -51,12 +49,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
5149
; CHECK-NEXT: fmadd s3, s17, s3, s4
5250
; CHECK-NEXT: fadd s4, s16, s6
5351
; CHECK-NEXT: fadd s17, s3, s7
54-
; CHECK-NEXT: str s4, [x8, #16]
55-
; CHECK-NEXT: str s17, [x8, #20]
52+
; CHECK-NEXT: stp s4, s17, [x8, #16]
5653
; CHECK-NEXT: fsub s6, s6, s16
5754
; CHECK-NEXT: fsub s3, s7, s3
58-
; CHECK-NEXT: str s6, [x8, #24]
59-
; CHECK-NEXT: str s3, [x8, #28]
55+
; CHECK-NEXT: stp s6, s3, [x8, #24]
6056
; CHECK-NEXT: ldr x9, [x0, #8]
6157
; CHECK-NEXT: ldp s7, s16, [x9]
6258
; CHECK-NEXT: fmul s18, s16, s17
@@ -65,12 +61,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
6561
; CHECK-NEXT: fmadd s4, s16, s4, s17
6662
; CHECK-NEXT: fadd s16, s7, s1
6763
; CHECK-NEXT: fadd s17, s4, s5
68-
; CHECK-NEXT: str s16, [x8]
69-
; CHECK-NEXT: str s17, [x8, #4]
64+
; CHECK-NEXT: stp s16, s17, [x8]
7065
; CHECK-NEXT: fsub s1, s1, s7
7166
; CHECK-NEXT: fsub s4, s5, s4
72-
; CHECK-NEXT: str s1, [x8, #16]
73-
; CHECK-NEXT: str s4, [x8, #20]
67+
; CHECK-NEXT: stp s1, s4, [x8, #16]
7468
; CHECK-NEXT: ldr x10, [x0, #8]
7569
; CHECK-NEXT: lsl x9, x3, #4
7670
; CHECK-NEXT: add x10, x10, x9
@@ -81,12 +75,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
8175
; CHECK-NEXT: fmadd s3, s4, s6, s3
8276
; CHECK-NEXT: fadd s4, s1, s2
8377
; CHECK-NEXT: fadd s5, s3, s0
84-
; CHECK-NEXT: str s4, [x8, #8]
85-
; CHECK-NEXT: str s5, [x8, #12]
78+
; CHECK-NEXT: stp s4, s5, [x8, #8]
8679
; CHECK-NEXT: fsub s1, s2, s1
8780
; CHECK-NEXT: fsub s0, s0, s3
88-
; CHECK-NEXT: str s1, [x8, #24]
89-
; CHECK-NEXT: str s0, [x8, #28]
81+
; CHECK-NEXT: stp s1, s0, [x8, #24]
9082
; CHECK-NEXT: ldr x10, [x0, #8]
9183
; CHECK-NEXT: ldp s0, s1, [x10]
9284
; CHECK-NEXT: ldp s2, s3, [x8, #32]
@@ -97,12 +89,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
9789
; CHECK-NEXT: fmadd s0, s5, s0, s1
9890
; CHECK-NEXT: fadd s1, s4, s2
9991
; CHECK-NEXT: fadd s5, s0, s3
100-
; CHECK-NEXT: str s1, [x8, #32]
101-
; CHECK-NEXT: str s5, [x8, #36]
92+
; CHECK-NEXT: stp s1, s5, [x8, #32]
10293
; CHECK-NEXT: fsub s4, s2, s4
10394
; CHECK-NEXT: fsub s6, s3, s0
104-
; CHECK-NEXT: str s4, [x8, #40]
105-
; CHECK-NEXT: str s6, [x8, #44]
95+
; CHECK-NEXT: stp s4, s6, [x8, #40]
10696
; CHECK-NEXT: ldr x10, [x0, #8]
10797
; CHECK-NEXT: ldp s0, s2, [x10]
10898
; CHECK-NEXT: ldp s3, s7, [x8, #48]
@@ -113,12 +103,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
113103
; CHECK-NEXT: fmadd s0, s17, s0, s2
114104
; CHECK-NEXT: fadd s2, s16, s3
115105
; CHECK-NEXT: fadd s17, s0, s7
116-
; CHECK-NEXT: str s2, [x8, #48]
117-
; CHECK-NEXT: str s17, [x8, #52]
106+
; CHECK-NEXT: stp s2, s17, [x8, #48]
118107
; CHECK-NEXT: fsub s16, s3, s16
119108
; CHECK-NEXT: fsub s0, s7, s0
120-
; CHECK-NEXT: str s16, [x8, #56]
121-
; CHECK-NEXT: str s0, [x8, #60]
109+
; CHECK-NEXT: stp s16, s0, [x8, #56]
122110
; CHECK-NEXT: ldr x10, [x0, #8]
123111
; CHECK-NEXT: ldp s3, s7, [x10]
124112
; CHECK-NEXT: fmul s18, s7, s17
@@ -127,12 +115,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
127115
; CHECK-NEXT: fmadd s7, s7, s2, s17
128116
; CHECK-NEXT: fadd s17, s3, s1
129117
; CHECK-NEXT: fadd s18, s7, s5
130-
; CHECK-NEXT: str s17, [x8, #32]
131-
; CHECK-NEXT: str s18, [x8, #36]
118+
; CHECK-NEXT: stp s17, s18, [x8, #32]
132119
; CHECK-NEXT: fsub s2, s1, s3
133120
; CHECK-NEXT: fsub s3, s5, s7
134-
; CHECK-NEXT: str s2, [x8, #48]
135-
; CHECK-NEXT: str s3, [x8, #52]
121+
; CHECK-NEXT: stp s2, s3, [x8, #48]
136122
; CHECK-NEXT: ldr x10, [x0, #8]
137123
; CHECK-NEXT: add x9, x10, x9
138124
; CHECK-NEXT: ldp s1, s5, [x9]
@@ -142,12 +128,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
142128
; CHECK-NEXT: fmadd s5, s5, s16, s0
143129
; CHECK-NEXT: fadd s7, s1, s4
144130
; CHECK-NEXT: fadd s16, s5, s6
145-
; CHECK-NEXT: str s7, [x8, #40]
146-
; CHECK-NEXT: str s16, [x8, #44]
131+
; CHECK-NEXT: stp s7, s16, [x8, #40]
147132
; CHECK-NEXT: fsub s0, s4, s1
148133
; CHECK-NEXT: fsub s1, s6, s5
149-
; CHECK-NEXT: str s0, [x8, #56]
150-
; CHECK-NEXT: str s1, [x8, #60]
134+
; CHECK-NEXT: stp s0, s1, [x8, #56]
151135
; CHECK-NEXT: ldr x9, [x0, #8]
152136
; CHECK-NEXT: ldp s4, s5, [x9]
153137
; CHECK-NEXT: ldp s6, s19, [x8]
@@ -157,12 +141,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
157141
; CHECK-NEXT: fmadd s4, s18, s4, s5
158142
; CHECK-NEXT: fadd s5, s17, s6
159143
; CHECK-NEXT: fadd s18, s4, s19
160-
; CHECK-NEXT: str s5, [x8]
161-
; CHECK-NEXT: str s18, [x8, #4]
144+
; CHECK-NEXT: stp s5, s18, [x8]
162145
; CHECK-NEXT: fsub s5, s6, s17
163146
; CHECK-NEXT: fsub s4, s19, s4
164-
; CHECK-NEXT: str s5, [x8, #32]
165-
; CHECK-NEXT: str s4, [x8, #36]
147+
; CHECK-NEXT: stp s5, s4, [x8, #32]
166148
; CHECK-NEXT: lsl x9, x3, #32
167149
; CHECK-NEXT: ldr x10, [x0, #8]
168150
; CHECK-NEXT: add x9, x10, x9, asr #29
@@ -174,12 +156,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
174156
; CHECK-NEXT: fmadd s4, s16, s4, s5
175157
; CHECK-NEXT: fadd s5, s7, s6
176158
; CHECK-NEXT: fadd s16, s4, s17
177-
; CHECK-NEXT: str s5, [x8, #8]
178-
; CHECK-NEXT: str s16, [x8, #12]
159+
; CHECK-NEXT: stp s5, s16, [x8, #8]
179160
; CHECK-NEXT: fsub s5, s6, s7
180161
; CHECK-NEXT: fsub s4, s17, s4
181-
; CHECK-NEXT: str s5, [x8, #40]
182-
; CHECK-NEXT: str s4, [x8, #44]
162+
; CHECK-NEXT: stp s5, s4, [x8, #40]
183163
; CHECK-NEXT: lsl x9, x3, #33
184164
; CHECK-NEXT: ldr x10, [x0, #8]
185165
; CHECK-NEXT: add x9, x10, x9, asr #29
@@ -191,12 +171,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
191171
; CHECK-NEXT: fmadd s3, s3, s4, s5
192172
; CHECK-NEXT: fadd s4, s2, s6
193173
; CHECK-NEXT: fadd s5, s3, s7
194-
; CHECK-NEXT: str s4, [x8, #16]
195-
; CHECK-NEXT: str s5, [x8, #20]
174+
; CHECK-NEXT: stp s4, s5, [x8, #16]
196175
; CHECK-NEXT: fsub s2, s6, s2
197176
; CHECK-NEXT: fsub s3, s7, s3
198-
; CHECK-NEXT: str s2, [x8, #48]
199-
; CHECK-NEXT: str s3, [x8, #52]
177+
; CHECK-NEXT: stp s2, s3, [x8, #48]
200178
; CHECK-NEXT: add w9, w3, w3, lsl #1
201179
; CHECK-NEXT: lsl x9, x9, #32
202180
; CHECK-NEXT: ldr x10, [x0, #8]
@@ -209,12 +187,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
209187
; CHECK-NEXT: fmadd s1, s1, s2, s3
210188
; CHECK-NEXT: fadd s2, s0, s4
211189
; CHECK-NEXT: fadd s3, s1, s5
212-
; CHECK-NEXT: str s2, [x8, #24]
213-
; CHECK-NEXT: str s3, [x8, #28]
190+
; CHECK-NEXT: stp s2, s3, [x8, #24]
214191
; CHECK-NEXT: fsub s0, s4, s0
215192
; CHECK-NEXT: fsub s1, s5, s1
216-
; CHECK-NEXT: str s0, [x8, #56]
217-
; CHECK-NEXT: str s1, [x8, #60]
193+
; CHECK-NEXT: stp s0, s1, [x8, #56]
218194
; CHECK-NEXT: ret
219195
bb:
220196
%shl = shl i64 %arg3, 1

0 commit comments

Comments
 (0)