Skip to content

[AArch64] Allow register offset addressing mode for prefetch #124534

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 28, 2025

Conversation

Il-Capitano
Copy link
Contributor

Previously instruction selection failed to generate PRFM instructions with register offsets because AArch64ISD::PREFETCH is not a MemSDNode.

Previously instruction selection failed to generate PRFM instructions
with register offsets because `AArch64ISD::PREFETCH` is not a
`MemSDNode`.
@llvmbot
Copy link
Member

llvmbot commented Jan 27, 2025

@llvm/pr-subscribers-backend-aarch64

Author: Csanád Hajdú (Il-Capitano)

Changes

Previously instruction selection failed to generate PRFM instructions with register offsets because AArch64ISD::PREFETCH is not a MemSDNode.


Full diff: https://github.com/llvm/llvm-project/pull/124534.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (+8-4)
  • (added) llvm/test/CodeGen/AArch64/arm64-prefetch-addrmode.ll (+147)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 6aa8cd4f0232ac..1387a224fa660e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -665,6 +665,10 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
   }
 }
 
+static bool isMemOpOrPrefetch(SDNode *N) {
+  return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
+}
+
 /// Determine whether it is worth it to fold SHL into the addressing
 /// mode.
 static bool isWorthFoldingSHL(SDValue V) {
@@ -682,9 +686,9 @@ static bool isWorthFoldingSHL(SDValue V) {
   // computation, since the computation will be kept.
   const SDNode *Node = V.getNode();
   for (SDNode *UI : Node->users())
-    if (!isa<MemSDNode>(*UI))
+    if (!isMemOpOrPrefetch(UI))
       for (SDNode *UII : UI->users())
-        if (!isa<MemSDNode>(*UII))
+        if (!isMemOpOrPrefetch(UII))
           return false;
   return true;
 }
@@ -1248,7 +1252,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
   // computation, since the computation will be kept.
   const SDNode *Node = N.getNode();
   for (SDNode *UI : Node->users()) {
-    if (!isa<MemSDNode>(*UI))
+    if (!isMemOpOrPrefetch(UI))
       return false;
   }
 
@@ -1332,7 +1336,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
   // computation, since the computation will be kept.
   const SDNode *Node = N.getNode();
   for (SDNode *UI : Node->users()) {
-    if (!isa<MemSDNode>(*UI))
+    if (!isMemOpOrPrefetch(UI))
       return false;
   }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-prefetch-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-prefetch-addrmode.ll
new file mode 100644
index 00000000000000..44202ffba6374b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-prefetch-addrmode.ll
@@ -0,0 +1,147 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+define void @imm9(ptr %object) {
+; CHECK-LABEL: imm9:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfum pldl1keep, [x0, #7]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 7
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @imm9_max(ptr %object) {
+; CHECK-LABEL: imm9_max:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfum pldl1keep, [x0, #255]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 255
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @imm9_above_max(ptr %object) {
+; CHECK-LABEL: imm9_above_max:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, #257
+; CHECK-NEXT:    prfm pldl1keep, [x8]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 257  ; 256 would use the imm12 mode
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @imm9_min(ptr %object) {
+; CHECK-LABEL: imm9_min:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfum pldl1keep, [x0, #-256]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 -256
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @imm9_below_min(ptr %object) {
+; CHECK-LABEL: imm9_below_min:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub x8, x0, #257
+; CHECK-NEXT:    prfm pldl1keep, [x8]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 -257
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @imm12(ptr %object) {
+; CHECK-LABEL: imm12:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfm pldl1keep, [x0, #8]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 1
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @imm12_max(ptr %object) {
+; CHECK-LABEL: imm12_max:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfm pldl1keep, [x0, #32760]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 4095
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @imm12_above_max(ptr %object) {
+; CHECK-LABEL: imm12_above_max:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-NEXT:    prfm pldl1keep, [x0, x8]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 4096
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @reg(ptr %object, i64 %a) {
+; CHECK-LABEL: reg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfm pldl1keep, [x0, x1]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 %a
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @reg_shift(ptr %object, i64 %a) {
+; CHECK-LABEL: reg_shift:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfm pldl1keep, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 %a
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @reg_sext(ptr %object, i32 %a) {
+; CHECK-LABEL: reg_sext:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfm pldl1keep, [x0, w1, sxtw]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i8, ptr %object, i32 %a
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @reg_sext_shift(ptr %object, i32 %a) {
+; CHECK-LABEL: reg_sext_shift:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfm pldl1keep, [x0, w1, sxtw #3]
+; CHECK-NEXT:    ret
+  %incdec.ptr = getelementptr inbounds i64, ptr %object, i32 %a
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @reg_zext(ptr %object, i32 %a) {
+; CHECK-LABEL: reg_zext:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfm pldl1keep, [x0, w1, uxtw]
+; CHECK-NEXT:    ret
+  %a.zext = zext i32 %a to i64
+  %incdec.ptr = getelementptr inbounds i8, ptr %object, i64 %a.zext
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @reg_zext_shift(ptr %object, i32 %a) {
+; CHECK-LABEL: reg_zext_shift:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    prfm pldl1keep, [x0, w1, uxtw #3]
+; CHECK-NEXT:    ret
+  %a.zext = zext i32 %a to i64
+  %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 %a.zext
+  call void @llvm.prefetch.p0(ptr %incdec.ptr, i32 0, i32 3, i32 1)
+  ret void
+}

Copy link
Collaborator

@davemgreen davemgreen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, it looks like prfm should be fast in the same ways as normal loads/stores.

@davemgreen davemgreen merged commit 4a00c84 into llvm:main Jan 28, 2025
10 checks passed
@Il-Capitano Il-Capitano deleted the fix-prfm-register-offset branch January 28, 2025 09:24
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants