Skip to content

Commit 55c9123

Browse files
authored
Merge pull request #8276 from fhahn/aarch64-stp-suppress
[AArch64] Fix resource length computation for STP.
2 parents c54e986 + 461ff31 commit 55c9123

10 files changed

+556
-113
lines changed

llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,15 +80,23 @@ bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB)
8080
MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB);
8181
unsigned ResLength = BBTrace.getResourceLength();
8282

83-
// Get the machine model's scheduling class for STPQi.
83+
// Get the machine model's scheduling class for STPDi and STRDui.
8484
// Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
8585
unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass();
86-
const MCSchedClassDesc *SCDesc =
86+
const MCSchedClassDesc *PairSCDesc =
8787
SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);
8888

89-
// If a subtarget does not define resources for STPQi, bail here.
90-
if (SCDesc->isValid() && !SCDesc->isVariant()) {
91-
unsigned ResLenWithSTP = BBTrace.getResourceLength(std::nullopt, SCDesc);
89+
unsigned SCIdx2 = TII->get(AArch64::STRDui).getSchedClass();
90+
const MCSchedClassDesc *SingleSCDesc =
91+
SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx2);
92+
93+
// If a subtarget does not define resources for STPDi, bail here.
94+
if (PairSCDesc->isValid() && !PairSCDesc->isVariant() &&
95+
SingleSCDesc->isValid() && !SingleSCDesc->isVariant()) {
96+
// Compute the new critical resource length after replacing 2 separate
97+
// STRDui with one STPDi.
98+
unsigned ResLenWithSTP = BBTrace.getResourceLength(
99+
std::nullopt, PairSCDesc, {SingleSCDesc, SingleSCDesc});
92100
if (ResLenWithSTP > ResLength) {
93101
LLVM_DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber()
94102
<< " resources " << ResLength << " -> " << ResLenWithSTP

llvm/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,7 @@ entry:
8282
; CHECK: fmadd [[y]]
8383
; CHECK: fmadd [[x]]
8484
; CHECK-BALFP: stp [[x]], [[y]]
85-
; CHECK-A53-DAG: str [[x]]
86-
; CHECK-A53-DAG: str [[y]]
85+
; CHECK-A53-DAG: stp [[x]], [[y]]
8786

8887
define void @f2(ptr nocapture readonly %p, ptr nocapture %q) #0 {
8988
entry:
@@ -177,8 +176,7 @@ declare void @g(...) #1
177176
; CHECK: fmadd [[y]]
178177
; CHECK: fmadd [[x]]
179178
; CHECK-BALFP: stp [[x]], [[y]]
180-
; CHECK-A53-DAG: str [[x]]
181-
; CHECK-A53-DAG: str [[y]]
179+
; CHECK-A53-DAG: stp [[x]], [[y]]
182180

183181
define void @f4(ptr nocapture readonly %p, ptr nocapture %q) #0 {
184182
entry:

llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -385,12 +385,9 @@ define void @caller_in_block() {
385385
; CHECK-NEXT: bl return_in_block
386386
; CHECK-NEXT: adrp x8, in_block_store
387387
; CHECK-NEXT: add x8, x8, :lo12:in_block_store
388-
; CHECK-NEXT: str d0, [x8]
389-
; CHECK-NEXT: str d1, [x8, #8]
390-
; CHECK-NEXT: str d2, [x8, #16]
391-
; CHECK-NEXT: str d3, [x8, #24]
392-
; CHECK-NEXT: str d4, [x8, #32]
393-
; CHECK-NEXT: str d5, [x8, #40]
388+
; CHECK-NEXT: stp d0, d1, [x8]
389+
; CHECK-NEXT: stp d2, d3, [x8, #16]
390+
; CHECK-NEXT: stp d4, d5, [x8, #32]
394391
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
395392
; CHECK-NEXT: ret
396393
%1 = call %T_IN_BLOCK @return_in_block()
@@ -403,12 +400,9 @@ define void @callee_in_block(%T_IN_BLOCK %a) {
403400
; CHECK: // %bb.0:
404401
; CHECK-NEXT: adrp x8, in_block_store
405402
; CHECK-NEXT: add x8, x8, :lo12:in_block_store
406-
; CHECK-NEXT: str d5, [x8, #40]
407-
; CHECK-NEXT: str d4, [x8, #32]
408-
; CHECK-NEXT: str d3, [x8, #24]
409-
; CHECK-NEXT: str d2, [x8, #16]
410-
; CHECK-NEXT: str d1, [x8, #8]
411-
; CHECK-NEXT: str d0, [x8]
403+
; CHECK-NEXT: stp d4, d5, [x8, #32]
404+
; CHECK-NEXT: stp d2, d3, [x8, #16]
405+
; CHECK-NEXT: stp d0, d1, [x8]
412406
; CHECK-NEXT: ret
413407
store %T_IN_BLOCK %a, ptr @in_block_store
414408
ret void

llvm/test/CodeGen/AArch64/arm64-stur.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,8 @@ declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
6565

6666
; CHECK-LABEL: unaligned:
6767
; CHECK-NOT: str q0
68-
; CHECK: str d[[REG:[0-9]+]], [x0]
69-
; CHECK: ext.16b v[[REG2:[0-9]+]], v[[REG]], v[[REG]], #8
70-
; CHECK: str d[[REG2]], [x0, #8]
68+
; CHECK: ext.16b v[[REG2:[0-9]+]], v[[REG:[0-9]+]], v[[REG]], #8
69+
; CHECK: stp d[[REG]], d[[REG2]], [x0]
7170
define void @unaligned(ptr %p, <4 x i32> %v) nounwind {
7271
store <4 x i32> %v, ptr %p, align 4
7372
ret void

llvm/test/CodeGen/AArch64/arm64-windows-calls.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,7 @@ define void @call_copy_pod() {
152152
; CHECK-NEXT: add x19, x19, :lo12:Pod
153153
; CHECK-NEXT: mov x0, x19
154154
; CHECK-NEXT: bl copy_pod
155-
; CHECK-NEXT: str d0, [x19]
156-
; CHECK-NEXT: str d1, [x19, #8]
155+
; CHECK-NEXT: stp d0, d1, [x19]
157156
; CHECK-NEXT: .seh_startepilogue
158157
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
159158
; CHECK-NEXT: .seh_save_reg x30, 8

llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,7 @@ body: |
6565
; CHECK-LABEL: name: 3-ldrspre-ldrsui-merge
6666
; CHECK: liveins: $s0, $s1, $x1
6767
; CHECK: early-clobber $x1, renamable $s0, renamable $s1 = LDPSpre renamable $x1, 3 :: (load (s32))
68-
; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32))
69-
; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32))
68+
; CHECK: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32))
7069
; CHECK: RET undef $lr
7170
early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load (s32))
7271
renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32))
@@ -91,8 +90,7 @@ body: |
9190
; CHECK-LABEL: name: 4-ldrqdre-ldrdui-merge
9291
; CHECK: liveins: $d0, $d1, $x1
9392
; CHECK: early-clobber $x1, renamable $d0, renamable $d1 = LDPDpre renamable $x1, 16 :: (load (s64))
94-
; CHECK: STRDui renamable $d0, renamable $x1, 0 :: (store (s64))
95-
; CHECK: STRDui renamable $d1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s64))
93+
; CHECK: STPDi renamable $d0, renamable $d1, renamable $x1, 0 :: (store (s64))
9694
; CHECK: RET undef $lr
9795
early-clobber renamable $x1, renamable $d0 = LDRDpre killed renamable $x1, 128 :: (load (s64))
9896
renamable $d1 = LDRDui renamable $x1, 1 :: (load (s64))
@@ -275,8 +273,7 @@ body: |
275273
; CHECK: renamable $x0 = LDRXui renamable $x1, 1 :: (load (s64))
276274
; CHECK: STRXui renamable $x0, renamable $x0, 1 :: (store (s64))
277275
; CHECK: renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32))
278-
; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32))
279-
; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32))
276+
; CHECK: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32))
280277
; CHECK: RET undef $lr
281278
early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load (s32))
282279
renamable $x0 = LDRXui renamable $x1, 1 :: (load (s64))
@@ -346,8 +343,7 @@ body: |
346343
; CHECK: liveins: $s0, $s1, $x1
347344
; CHECK: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load (s32))
348345
; CHECK: renamable $s1 = LDRSui renamable $x1, 2 :: (load (s32))
349-
; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32))
350-
; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32))
346+
; CHECK: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32))
351347
; CHECK: RET undef $lr
352348
early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load (s32))
353349
renamable $s1 = LDRSui renamable $x1, 2 :: (load (s32))
@@ -549,8 +545,7 @@ body: |
549545
; CHECK-LABEL: name: 19-ldrspre-ldrsui-max-merge
550546
; CHECK: liveins: $s0, $s1, $x1
551547
; CHECK: early-clobber $x1, renamable $s0, renamable $s1 = LDPSpre renamable $x1, 63 :: (load (s32))
552-
; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32))
553-
; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32))
548+
; CHECK: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32))
554549
; CHECK: RET undef $lr
555550
early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 252 :: (load (s32))
556551
renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32))
@@ -576,8 +571,7 @@ body: |
576571
; CHECK: liveins: $s0, $s1, $x1
577572
; CHECK: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 251, implicit $w1 :: (load (s32))
578573
; CHECK: renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32))
579-
; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32))
580-
; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32))
574+
; CHECK: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32))
581575
; CHECK: RET undef $lr
582576
early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 251 :: (load (s32))
583577
renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32))

llvm/test/CodeGen/AArch64/merge-store.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,7 @@ define void @merge_vec_extract_stores(<4 x float> %v1, <2 x float>* %ptr) {
4545
; SPLITTING-LABEL: merge_vec_extract_stores:
4646
; SPLITTING: // %bb.0:
4747
; SPLITTING-NEXT: ext v1.16b, v0.16b, v0.16b, #8
48-
; SPLITTING-NEXT: str d0, [x0, #24]
49-
; SPLITTING-NEXT: str d1, [x0, #32]
48+
; SPLITTING-NEXT: stp d0, d1, [x0, #24]
5049
; SPLITTING-NEXT: ret
5150
;
5251
; MISALIGNED-LABEL: merge_vec_extract_stores:

llvm/test/CodeGen/AArch64/no-sve-no-neon.ll

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,34 +14,26 @@ define <16 x float> @foo(<16 x i64> %a) {
1414
; CHECK-NEXT: ucvtf s2, x11
1515
; CHECK-NEXT: ucvtf s3, x12
1616
; CHECK-NEXT: ldp x11, x10, [sp]
17-
; CHECK-NEXT: str s0, [x8, #60]
17+
; CHECK-NEXT: stp s1, s0, [x8, #56]
1818
; CHECK-NEXT: ucvtf s0, x13
19-
; CHECK-NEXT: str s1, [x8, #56]
2019
; CHECK-NEXT: ucvtf s4, x9
21-
; CHECK-NEXT: str s2, [x8, #52]
20+
; CHECK-NEXT: stp s3, s2, [x8, #48]
2221
; CHECK-NEXT: ucvtf s2, x11
23-
; CHECK-NEXT: str s3, [x8, #48]
24-
; CHECK-NEXT: ucvtf s1, x10
2522
; CHECK-NEXT: ucvtf s3, x7
26-
; CHECK-NEXT: str s0, [x8, #40]
27-
; CHECK-NEXT: ucvtf s0, x5
28-
; CHECK-NEXT: str s4, [x8, #44]
23+
; CHECK-NEXT: ucvtf s1, x10
24+
; CHECK-NEXT: stp s0, s4, [x8, #40]
2925
; CHECK-NEXT: ucvtf s4, x6
30-
; CHECK-NEXT: str s2, [x8, #32]
31-
; CHECK-NEXT: ucvtf s2, x3
32-
; CHECK-NEXT: str s1, [x8, #36]
26+
; CHECK-NEXT: ucvtf s0, x5
27+
; CHECK-NEXT: stp s2, s1, [x8, #32]
3328
; CHECK-NEXT: ucvtf s1, x4
34-
; CHECK-NEXT: str s3, [x8, #28]
29+
; CHECK-NEXT: ucvtf s2, x3
30+
; CHECK-NEXT: stp s4, s3, [x8, #24]
3531
; CHECK-NEXT: ucvtf s3, x2
36-
; CHECK-NEXT: str s4, [x8, #24]
3732
; CHECK-NEXT: ucvtf s4, x1
38-
; CHECK-NEXT: str s0, [x8, #20]
33+
; CHECK-NEXT: stp s1, s0, [x8, #16]
3934
; CHECK-NEXT: ucvtf s0, x0
40-
; CHECK-NEXT: str s1, [x8, #16]
41-
; CHECK-NEXT: str s2, [x8, #12]
42-
; CHECK-NEXT: str s3, [x8, #8]
43-
; CHECK-NEXT: str s4, [x8, #4]
44-
; CHECK-NEXT: str s0, [x8]
35+
; CHECK-NEXT: stp s3, s2, [x8, #8]
36+
; CHECK-NEXT: stp s0, s4, [x8]
4537
; CHECK-NEXT: ret
4638
%conv1 = uitofp <16 x i64> %a to <16 x float>
4739
ret <16 x float> %conv1

0 commit comments

Comments
 (0)