-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64] Allow register offset addressing mode for prefetch #124534
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Previously instruction selection failed to generate PRFM instructions with register offsets because `AArch64ISD::PREFETCH` is not a `MemSDNode`.
@llvm/pr-subscribers-backend-aarch64 Author: Csanád Hajdú (Il-Capitano) ChangesPreviously instruction selection failed to generate PRFM instructions with register offsets because Full diff: https://github.com/llvm/llvm-project/pull/124534.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 6aa8cd4f0232ac..1387a224fa660e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -665,6 +665,10 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
}
}
+static bool isMemOpOrPrefetch(SDNode *N) {
+ return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
+}
+
/// Determine whether it is worth it to fold SHL into the addressing
/// mode.
static bool isWorthFoldingSHL(SDValue V) {
@@ -682,9 +686,9 @@ static bool isWorthFoldingSHL(SDValue V) {
// computation, since the computation will be kept.
const SDNode *Node = V.getNode();
for (SDNode *UI : Node->users())
- if (!isa<MemSDNode>(*UI))
+ if (!isMemOpOrPrefetch(UI))
for (SDNode *UII : UI->users())
- if (!isa<MemSDNode>(*UII))
+ if (!isMemOpOrPrefetch(UII))
return false;
return true;
}
@@ -1248,7 +1252,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
// computation, since the computation will be kept.
const SDNode *Node = N.getNode();
for (SDNode *UI : Node->users()) {
- if (!isa<MemSDNode>(*UI))
+ if (!isMemOpOrPrefetch(UI))
return false;
}
@@ -1332,7 +1336,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
// computation, since the computation will be kept.
const SDNode *Node = N.getNode();
for (SDNode *UI : Node->users()) {
- if (!isa<MemSDNode>(*UI))
+ if (!isMemOpOrPrefetch(UI))
return false;
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-prefetch-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-prefetch-addrmode.ll
new file mode 100644
index 00000000000000..44202ffba6374b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-prefetch-addrmode.ll
@@ -0,0 +1,147 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+define void @imm9(ptr %object) {
+; CHECK-LABEL: imm9:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfum pldl1keep, [x0, #7]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 7
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @imm9_max(ptr %object) {
+; CHECK-LABEL: imm9_max:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfum pldl1keep, [x0, #255]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 255
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @imm9_above_max(ptr %object) {
+; CHECK-LABEL: imm9_above_max:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, #257
+; CHECK-NEXT: prfm pldl1keep, [x8]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 257 ; 256 would use the imm12 mode
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @imm9_min(ptr %object) {
+; CHECK-LABEL: imm9_min:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfum pldl1keep, [x0, #-256]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 -256
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @imm9_below_min(ptr %object) {
+; CHECK-LABEL: imm9_below_min:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub x8, x0, #257
+; CHECK-NEXT: prfm pldl1keep, [x8]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 -257
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @imm12(ptr %object) {
+; CHECK-LABEL: imm12:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfm pldl1keep, [x0, #8]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 1
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @imm12_max(ptr %object) {
+; CHECK-LABEL: imm12_max:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfm pldl1keep, [x0, #32760]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 4095
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @imm12_above_max(ptr %object) {
+; CHECK-LABEL: imm12_above_max:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32768 // =0x8000
+; CHECK-NEXT: prfm pldl1keep, [x0, x8]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 4096
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @reg(ptr %object, i64 %a) {
+; CHECK-LABEL: reg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfm pldl1keep, [x0, x1]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 %a
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @reg_shift(ptr %object, i64 %a) {
+; CHECK-LABEL: reg_shift:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfm pldl1keep, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 %a
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @reg_sext(ptr %object, i32 %a) {
+; CHECK-LABEL: reg_sext:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfm pldl1keep, [x0, w1, sxtw]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i8, ptr %object, i32 %a
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @reg_sext_shift(ptr %object, i32 %a) {
+; CHECK-LABEL: reg_sext_shift:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfm pldl1keep, [x0, w1, sxtw #3]
+; CHECK-NEXT: ret
+ %incdec.ptr = getelementptr inbounds i64, ptr %object, i32 %a
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @reg_zext(ptr %object, i32 %a) {
+; CHECK-LABEL: reg_zext:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfm pldl1keep, [x0, w1, uxtw]
+; CHECK-NEXT: ret
+ %a.zext = zext i32 %a to i64
+ %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 %a.zext
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
+
+define void @reg_zext_shift(ptr %object, i32 %a) {
+; CHECK-LABEL: reg_zext_shift:
+; CHECK: // %bb.0:
+; CHECK-NEXT: prfm pldl1keep, [x0, w1, uxtw #3]
+; CHECK-NEXT: ret
+ %a.zext = zext i32 %a to i64
+ %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 %a.zext
+ call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+ ret void
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, it looks like prfm should be fast in the same ways as normal loads/stores.
Previously instruction selection failed to generate PRFM instructions with register offsets because
AArch64ISD::PREFETCH
is not aMemSDNode
.