Skip to content

Commit 4a00c84

Browse files
authored
[AArch64] Allow register offset addressing mode for prefetch (llvm#124534)
Previously instruction selection failed to generate PRFM instructions with register offsets because `AArch64ISD::PREFETCH` is not a `MemSDNode`.
1 parent 0f61558 commit 4a00c84

File tree

2 files changed

+155
-4
lines changed

2 files changed

+155
-4
lines changed

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -665,6 +665,10 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
665665
}
666666
}
667667

668+
static bool isMemOpOrPrefetch(SDNode *N) {
669+
return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
670+
}
671+
668672
/// Determine whether it is worth it to fold SHL into the addressing
669673
/// mode.
670674
static bool isWorthFoldingSHL(SDValue V) {
@@ -682,9 +686,9 @@ static bool isWorthFoldingSHL(SDValue V) {
682686
// computation, since the computation will be kept.
683687
const SDNode *Node = V.getNode();
684688
for (SDNode *UI : Node->users())
685-
if (!isa<MemSDNode>(*UI))
689+
if (!isMemOpOrPrefetch(UI))
686690
for (SDNode *UII : UI->users())
687-
if (!isa<MemSDNode>(*UII))
691+
if (!isMemOpOrPrefetch(UII))
688692
return false;
689693
return true;
690694
}
@@ -1248,7 +1252,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
12481252
// computation, since the computation will be kept.
12491253
const SDNode *Node = N.getNode();
12501254
for (SDNode *UI : Node->users()) {
1251-
if (!isa<MemSDNode>(*UI))
1255+
if (!isMemOpOrPrefetch(UI))
12521256
return false;
12531257
}
12541258

@@ -1332,7 +1336,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
13321336
// computation, since the computation will be kept.
13331337
const SDNode *Node = N.getNode();
13341338
for (SDNode *UI : Node->users()) {
1335-
if (!isa<MemSDNode>(*UI))
1339+
if (!isMemOpOrPrefetch(UI))
13361340
return false;
13371341
}
13381342

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
3+
4+
define void @imm9(ptr %object) {
5+
; CHECK-LABEL: imm9:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: prfum pldl1keep, [x0, #7]
8+
; CHECK-NEXT: ret
9+
%incdec.ptr = getelementptr inbounds i8, ptr %object, i64 7
10+
call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
11+
ret void
12+
}
13+
14+
define void @imm9_max(ptr %object) {
15+
; CHECK-LABEL: imm9_max:
16+
; CHECK: // %bb.0:
17+
; CHECK-NEXT: prfum pldl1keep, [x0, #255]
18+
; CHECK-NEXT: ret
19+
%incdec.ptr = getelementptr inbounds i8, ptr %object, i64 255
20+
call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
21+
ret void
22+
}
23+
24+
define void @imm9_above_max(ptr %object) {
25+
; CHECK-LABEL: imm9_above_max:
26+
; CHECK: // %bb.0:
27+
; CHECK-NEXT: add x8, x0, #257
28+
; CHECK-NEXT: prfm pldl1keep, [x8]
29+
; CHECK-NEXT: ret
30+
%incdec.ptr = getelementptr inbounds i8, ptr %object, i64 257 ; 256 would use the imm12 mode
31+
call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
32+
ret void
33+
}
34+
35+
define void @imm9_min(ptr %object) {
36+
; CHECK-LABEL: imm9_min:
37+
; CHECK: // %bb.0:
38+
; CHECK-NEXT: prfum pldl1keep, [x0, #-256]
39+
; CHECK-NEXT: ret
40+
%incdec.ptr = getelementptr inbounds i8, ptr %object, i64 -256
41+
call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
42+
ret void
43+
}
44+
45+
define void @imm9_below_min(ptr %object) {
46+
; CHECK-LABEL: imm9_below_min:
47+
; CHECK: // %bb.0:
48+
; CHECK-NEXT: sub x8, x0, #257
49+
; CHECK-NEXT: prfm pldl1keep, [x8]
50+
; CHECK-NEXT: ret
51+
%incdec.ptr = getelementptr inbounds i8, ptr %object, i64 -257
52+
call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
53+
ret void
54+
}
55+
56+
define void @imm12(ptr %object) {
57+
; CHECK-LABEL: imm12:
58+
; CHECK: // %bb.0:
59+
; CHECK-NEXT: prfm pldl1keep, [x0, #8]
60+
; CHECK-NEXT: ret
61+
%incdec.ptr = getelementptr inbounds i64, ptr %object, i64 1
62+
call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
63+
ret void
64+
}
65+
66+
define void @imm12_max(ptr %object) {
67+
; CHECK-LABEL: imm12_max:
68+
; CHECK: // %bb.0:
69+
; CHECK-NEXT: prfm pldl1keep, [x0, #32760]
70+
; CHECK-NEXT: ret
71+
%incdec.ptr = getelementptr inbounds i64, ptr %object, i64 4095
72+
call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
73+
ret void
74+
}
75+
76+
define void @imm12_above_max(ptr %object) {
77+
; CHECK-LABEL: imm12_above_max:
78+
; CHECK: // %bb.0:
79+
; CHECK-NEXT: mov w8, #32768 // =0x8000
80+
; CHECK-NEXT: prfm pldl1keep, [x0, x8]
81+
; CHECK-NEXT: ret
82+
%incdec.ptr = getelementptr inbounds i64, ptr %object, i64 4096
83+
call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
84+
ret void
85+
}
86+
87+
define void @reg(ptr %object, i64 %a) {
88+
; CHECK-LABEL: reg:
89+
; CHECK: // %bb.0:
90+
; CHECK-NEXT: prfm pldl1keep, [x0, x1]
91+
; CHECK-NEXT: ret
92+
%incdec.ptr = getelementptr inbounds i8, ptr %object, i64 %a
93+
call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
94+
ret void
95+
}
96+
97+
define void @reg_shift(ptr %object, i64 %a) {
98+
; CHECK-LABEL: reg_shift:
99+
; CHECK: // %bb.0:
100+
; CHECK-NEXT: prfm pldl1keep, [x0, x1, lsl #3]
101+
; CHECK-NEXT: ret
102+
%incdec.ptr = getelementptr inbounds i64, ptr %object, i64 %a
103+
call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
104+
ret void
105+
}
106+
107+
define void @reg_sext(ptr %object, i32 %a) {
108+
; CHECK-LABEL: reg_sext:
109+
; CHECK: // %bb.0:
110+
; CHECK-NEXT: prfm pldl1keep, [x0, w1, sxtw]
111+
; CHECK-NEXT: ret
112+
%incdec.ptr = getelementptr inbounds i8, ptr %object, i32 %a
113+
call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
114+
ret void
115+
}
116+
117+
define void @reg_sext_shift(ptr %object, i32 %a) {
118+
; CHECK-LABEL: reg_sext_shift:
119+
; CHECK: // %bb.0:
120+
; CHECK-NEXT: prfm pldl1keep, [x0, w1, sxtw #3]
121+
; CHECK-NEXT: ret
122+
%incdec.ptr = getelementptr inbounds i64, ptr %object, i32 %a
123+
call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
124+
ret void
125+
}
126+
127+
define void @reg_zext(ptr %object, i32 %a) {
128+
; CHECK-LABEL: reg_zext:
129+
; CHECK: // %bb.0:
130+
; CHECK-NEXT: prfm pldl1keep, [x0, w1, uxtw]
131+
; CHECK-NEXT: ret
132+
%a.zext = zext i32 %a to i64
133+
%incdec.ptr = getelementptr inbounds i8, ptr %object, i64 %a.zext
134+
call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
135+
ret void
136+
}
137+
138+
define void @reg_zext_shift(ptr %object, i32 %a) {
139+
; CHECK-LABEL: reg_zext_shift:
140+
; CHECK: // %bb.0:
141+
; CHECK-NEXT: prfm pldl1keep, [x0, w1, uxtw #3]
142+
; CHECK-NEXT: ret
143+
%a.zext = zext i32 %a to i64
144+
%incdec.ptr = getelementptr inbounds i64, ptr %object, i64 %a.zext
145+
call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
146+
ret void
147+
}

0 commit comments

Comments
 (0)