Skip to content

Commit d2b7358

Browse files
committed
[AArch64] MI Scheduler LDP combine follow up
This is a follow up of 75d820d, adding more opcodes to the combine target hook enabling more LDP creation. Patch co-authored by Cameron McInally.
1 parent 77f2ccb commit d2b7358

File tree

3 files changed

+92
-10
lines changed

3 files changed

+92
-10
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4214,6 +4214,12 @@ static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
42144214
switch (FirstOpc) {
42154215
default:
42164216
return false;
4217+
case AArch64::LDRSui:
4218+
case AArch64::LDURSi:
4219+
return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4220+
case AArch64::LDRDui:
4221+
case AArch64::LDURDi:
4222+
return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
42174223
case AArch64::LDRQui:
42184224
case AArch64::LDURQi:
42194225
return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
@@ -4223,6 +4229,9 @@ static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
42234229
case AArch64::LDRSWui:
42244230
case AArch64::LDURSWi:
42254231
return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4232+
case AArch64::LDRXui:
4233+
case AArch64::LDURXi:
4234+
return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
42264235
}
42274236
// These instructions can't be paired based on their opcodes.
42284237
return false;

llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,22 @@ define <2 x i64> @ldq_cluster(ptr %p) {
114114
ret <2 x i64> %res
115115
}
116116

117+
; CHECK: ********** MI Scheduling **********
118+
; CHECK: LDURSi_LDRSui:%bb.0 entry
119+
; CHECK: Cluster ld/st SU(3) - SU(4)
120+
; CHECK: SU(3): %3:fpr32 = LDURSi %0:gpr64
121+
; CHECK: SU(4): %4:fpr32 = LDRSui %0:gpr64
122+
;
123+
define void @LDURSi_LDRSui(ptr nocapture readonly %arg, ptr nocapture readonly %wa, ptr nocapture readonly %wb) {
124+
entry:
125+
%r51 = getelementptr i8, ptr %arg, i64 -4
126+
%r52 = load float, ptr %r51, align 4
127+
%r53 = load float, ptr %arg, align 4
128+
store float %r52, ptr %wa
129+
store float %r53, ptr %wb
130+
ret void
131+
}
132+
117133
; Test LDURQi / LDRQui clustering
118134
;
119135
; CHECK: ********** MI Scheduling **********
@@ -154,3 +170,60 @@ vector_body:
154170
exit:
155171
ret void
156172
}
173+
174+
; Test LDURDi / LDRDui clustering
175+
;
176+
; CHECK: ********** MI Scheduling **********
177+
; CHECK: LDURDi_LDRDui:%bb.1 vector_body
178+
;
179+
; CHECK: Cluster ld/st SU(2) - SU(6)
180+
; CHECK: Cluster ld/st SU(3) - SU(7)
181+
;
182+
; CHECK: SU(2): %{{[0-9]+}}:fpr64 = LDURDi
183+
; CHECK: SU(3): %{{[0-9]+}}:fpr64 = LDURDi
184+
; CHECK: SU(6): %{{[0-9]+}}:fpr64 = LDRDui
185+
; CHECK: SU(7): %{{[0-9]+}}:fpr64 = LDRDui
186+
;
187+
define void @LDURDi_LDRDui(ptr nocapture readonly %arg) {
188+
entry:
189+
br label %vector_body
190+
vector_body:
191+
%phi1 = phi ptr [ null, %entry ], [ %r63, %vector_body ]
192+
%phi2 = phi ptr [ %arg, %entry ], [ %r62, %vector_body ]
193+
%phi3 = phi i32 [ 0, %entry ], [ %r61, %vector_body ]
194+
%r51 = getelementptr i8, ptr %phi1, i64 -8
195+
%r52 = load <2 x float>, ptr %r51, align 8
196+
%r53 = getelementptr i8, ptr %phi2, i64 -8
197+
%r54 = load <2 x float>, ptr %r53, align 8
198+
%r55 = fmul fast <2 x float> %r54, <float 3.0, float 4.0>
199+
%r56 = fsub fast <2 x float> %r52, %r55
200+
store <2 x float> %r56, ptr %r51, align 1
201+
%r57 = load <2 x float>, ptr %phi1, align 8
202+
%r58 = load <2 x float>, ptr %phi2, align 8
203+
%r59 = fmul fast <2 x float> %r58, <float 3.0, float 4.0>
204+
%r60 = fsub fast <2 x float> %r57, %r59
205+
store <2 x float> %r60, ptr %phi1, align 1
206+
%r61 = add i32 %phi3, 4
207+
%r62 = getelementptr i8, ptr %phi2, i64 32
208+
%r63 = getelementptr i8, ptr %phi1, i64 32
209+
%r.not = icmp eq i32 %r61, 0
210+
br i1 %r.not, label %exit, label %vector_body
211+
exit:
212+
ret void
213+
}
214+
215+
; CHECK: ********** MI Scheduling **********
216+
; CHECK: LDURXi_LDRXui:%bb.0 entry
217+
; CHECK: Cluster ld/st SU(3) - SU(4)
218+
; CHECK: SU(3): %{{[0-9]+}}:gpr64 = LDURXi
219+
; CHECK: SU(4): %{{[0-9]+}}:gpr64 = LDRXui
220+
;
221+
define void @LDURXi_LDRXui(ptr nocapture readonly %arg, ptr nocapture readonly %wa, ptr nocapture readonly %wb) {
222+
entry:
223+
%r51 = getelementptr i8, ptr %arg, i64 -8
224+
%r52 = load i64, ptr %r51, align 8
225+
%r53 = load i64, ptr %arg, align 8
226+
store i64 %r52, ptr %wa
227+
store i64 %r53, ptr %wb
228+
ret void
229+
}

llvm/test/CodeGen/AArch64/zext-to-tbl.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1680,31 +1680,31 @@ define void @zext_v8i8_to_v8i64_with_add_in_sequence_in_loop(ptr %src, ptr %dst)
16801680
; CHECK-NEXT: add x9, x0, #8
16811681
; CHECK-NEXT: LBB17_1: ; %loop
16821682
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
1683-
; CHECK-NEXT: ldp d2, d4, [x9, #-8]
1683+
; CHECK-NEXT: ldp d2, d3, [x9, #-8]
16841684
; CHECK-NEXT: add x10, x1, x8
16851685
; CHECK-NEXT: ldp q6, q5, [x10, #32]
16861686
; CHECK-NEXT: add x8, x8, #128
16871687
; CHECK-NEXT: ldp q17, q16, [x10]
16881688
; CHECK-NEXT: cmp x8, #1024
1689-
; CHECK-NEXT: tbl.16b v3, { v2 }, v1
1689+
; CHECK-NEXT: tbl.16b v4, { v2 }, v1
16901690
; CHECK-NEXT: tbl.16b v2, { v2 }, v0
1691-
; CHECK-NEXT: tbl.16b v7, { v4 }, v1
1692-
; CHECK-NEXT: tbl.16b v4, { v4 }, v0
1691+
; CHECK-NEXT: tbl.16b v7, { v3 }, v1
1692+
; CHECK-NEXT: tbl.16b v3, { v3 }, v0
16931693
; CHECK-NEXT: add x9, x9, #16
1694-
; CHECK-NEXT: uaddw2.2d v5, v5, v3
1695-
; CHECK-NEXT: uaddw.2d v3, v6, v3
1694+
; CHECK-NEXT: uaddw2.2d v5, v5, v4
1695+
; CHECK-NEXT: uaddw.2d v4, v6, v4
16961696
; CHECK-NEXT: uaddw2.2d v6, v16, v2
16971697
; CHECK-NEXT: ldp q18, q16, [x10, #96]
16981698
; CHECK-NEXT: uaddw.2d v2, v17, v2
1699-
; CHECK-NEXT: stp q3, q5, [x10, #32]
1699+
; CHECK-NEXT: stp q4, q5, [x10, #32]
17001700
; CHECK-NEXT: ldp q17, q5, [x10, #64]
17011701
; CHECK-NEXT: uaddw2.2d v16, v16, v7
17021702
; CHECK-NEXT: uaddw.2d v7, v18, v7
17031703
; CHECK-NEXT: stp q2, q6, [x10]
1704-
; CHECK-NEXT: uaddw2.2d v3, v5, v4
1705-
; CHECK-NEXT: uaddw.2d v4, v17, v4
1704+
; CHECK-NEXT: uaddw2.2d v4, v5, v3
1705+
; CHECK-NEXT: uaddw.2d v3, v17, v3
17061706
; CHECK-NEXT: stp q7, q16, [x10, #96]
1707-
; CHECK-NEXT: stp q4, q3, [x10, #64]
1707+
; CHECK-NEXT: stp q3, q4, [x10, #64]
17081708
; CHECK-NEXT: b.ne LBB17_1
17091709
; CHECK-NEXT: ; %bb.2: ; %exit
17101710
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)