Skip to content

Commit 35904ec

Browse files
authored
[AArch64] MI Scheduler STP combine (llvm#80188)
Add opcodes for different store instructions to the target hook that can enable more STP pairs. This is split off from the patch that does the same for some load instructions (llvm#79003). Patch co-authored by Cameron McInally.
1 parent c6b5ea3 commit 35904ec

File tree

2 files changed

+86
-1
lines changed

2 files changed

+86
-1
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4206,6 +4206,21 @@ static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
42064206
switch (FirstOpc) {
42074207
default:
42084208
return false;
4209+
case AArch64::STRSui:
4210+
case AArch64::STURSi:
4211+
return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4212+
case AArch64::STRDui:
4213+
case AArch64::STURDi:
4214+
return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4215+
case AArch64::STRQui:
4216+
case AArch64::STURQi:
4217+
return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4218+
case AArch64::STRWui:
4219+
case AArch64::STURWi:
4220+
return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4221+
case AArch64::STRXui:
4222+
case AArch64::STURXi:
4223+
return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
42094224
case AArch64::LDRSui:
42104225
case AArch64::LDURSi:
42114226
return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;

llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; REQUIRES: asserts
2-
; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
2+
; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefixes=CHECK,CHECK-A57
33
; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=exynos-m3 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
44

55
; Test ldr clustering.
@@ -227,3 +227,73 @@ entry:
227227
store i64 %r53, ptr %wb
228228
ret void
229229
}
230+
231+
; CHECK: ********** MI Scheduling **********
232+
; CHECK: STURWi_STRWui:%bb.0 entry
233+
; CHECK: Cluster ld/st SU(3) - SU(4)
234+
; CHECK: SU(3): STURWi %{{[0-9]+}}:gpr32
235+
; CHECK: SU(4): STRWui %{{[0-9]+}}:gpr32
236+
;
237+
define void @STURWi_STRWui(ptr nocapture readonly %arg, i32 %b, i32 %c) {
238+
entry:
239+
%r51 = getelementptr i8, ptr %arg, i64 -4
240+
store i32 %b, ptr %r51
241+
store i32 %c, ptr %arg
242+
ret void
243+
}
244+
245+
; CHECK: ********** MI Scheduling **********
246+
; CHECK: STURXi_STRXui:%bb.0 entry
247+
; CHECK: Cluster ld/st SU(3) - SU(4)
248+
; CHECK: SU(3): STURXi %{{[0-9]+}}:gpr64
249+
; CHECK: SU(4): STRXui %{{[0-9]+}}:gpr64
250+
;
251+
define void @STURXi_STRXui(ptr nocapture readonly %arg, i64 %b, i64 %c) {
252+
entry:
253+
%r51 = getelementptr i8, ptr %arg, i64 -8
254+
store i64 %b, ptr %r51
255+
store i64 %c, ptr %arg
256+
ret void
257+
}
258+
259+
; CHECK-A57: ********** MI Scheduling **********
260+
; CHECK-A57: STURSi_STRSui:%bb.0 entry
261+
; CHECK-A57: Cluster ld/st SU(3) - SU(4)
262+
; CHECK-A57: SU(3): STURSi %{{[0-9]+}}:fpr32
263+
; CHECK-A57: SU(4): STRSui %{{[0-9]+}}:fpr32
264+
;
265+
define void @STURSi_STRSui(ptr nocapture readonly %arg, float %b, float %c) {
266+
entry:
267+
%r51 = getelementptr i8, ptr %arg, i64 -4
268+
store float %b, ptr %r51
269+
store float %c, ptr %arg
270+
ret void
271+
}
272+
273+
; CHECK-A57: ********** MI Scheduling **********
274+
; CHECK-A57: STURDi_STRDui:%bb.0 entry
275+
; CHECK-A57: Cluster ld/st SU(3) - SU(4)
276+
; CHECK-A57: SU(3): STURDi %{{[0-9]+}}:fpr64
277+
; CHECK-A57: SU(4): STRDui %{{[0-9]+}}:fpr64
278+
;
279+
define void @STURDi_STRDui(ptr nocapture readonly %arg, <2 x float> %b, <2 x float> %c) {
280+
entry:
281+
%r51 = getelementptr i8, ptr %arg, i64 -8
282+
store <2 x float> %b, ptr %r51
283+
store <2 x float> %c, ptr %arg
284+
ret void
285+
}
286+
287+
; CHECK-A57: ********** MI Scheduling **********
288+
; CHECK-A57: STURQi_STRQui:%bb.0 entry
289+
; CHECK-A57: Cluster ld/st SU(3) - SU(4)
290+
; CHECK-A57: SU(3): STURQi %{{[0-9]+}}:fpr128
291+
; CHECK-A57: SU(4): STRQui %{{[0-9]+}}:fpr128
292+
;
293+
define void @STURQi_STRQui(ptr nocapture readonly %arg, <2 x double> %b, <2 x double> %c) {
294+
entry:
295+
%r51 = getelementptr i8, ptr %arg, i64 -16
296+
store <2 x double> %b, ptr %r51
297+
store <2 x double> %c, ptr %arg
298+
ret void
299+
}

0 commit comments

Comments
 (0)