-
Notifications
You must be signed in to change notification settings - Fork 14.2k
[NVPTX] Fix and refine prefetch.* intrinsics #126899
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-nvptx Author: Abhilash Majumder (abhilash1910) ChangesThis is follow-up PR from #125887 which fixes the intrinsic failures . Full diff: https://github.com/llvm/llvm-project/pull/126899.diff 4 Files Affected:
diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst
index 1680b11433537..5168c6cea99d1 100644
--- a/llvm/docs/NVPTXUsage.rst
+++ b/llvm/docs/NVPTXUsage.rst
@@ -589,16 +589,13 @@ Syntax:
.. code-block:: llvm
- declare void @llvm.nvvm.prefetch.local.L1.evictnormal(ptr addrspace(5) %local_ptr)
- declare void @llvm.nvvm.prefetch.local.L2.evictnormal(ptr addrspace(5) %local_ptr)
+ eclare void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
+ declare void @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
+ declare void @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
+ declare void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
- declare void @llvm.nvvm.prefetch.global.L1.evictnormal(ptr addrspace(1) %global_ptr)
- declare void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
- declare void @llvm.nvvm.prefetch.global.L1.evictlast(ptr addrspace(1) %global_ptr)
- declare void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
-
- declare void @llvm.nvvm.prefetch.L1.evictnormal(ptr %ptr)
- declare void @llvm.nvvm.prefetch.L2.evictnormal(ptr %ptr)
+ declare void @llvm.nvvm.prefetch.L1(ptr %ptr)
+ declare void @llvm.nvvm.prefetch.L2(ptr %ptr)
declare void @llvm.nvvm.prefetchu.L1.evictnormal(ptr %ptr)
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 6af1f2a166773..19d1535e6215d 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -5001,20 +5001,19 @@ foreach dim = [1, 2, 3, 4, 5] in {
}
// Intrinsics for Prefetch and Prefetchu
-foreach level = ["L1", "L2"] in {
- foreach addr = ["global", "local", ""] in {
- foreach evict = !if(!eq(addr, "global"), ["evictlast", "evictnormal"], ["evictnormal"]) in {
- defvar suffix = "" # !if(!eq(addr, ""), "", addr # "_") # level # "_" # evict;
- def int_nvvm_prefetch_ # suffix : Intrinsic<[],
- !cond(
- !eq(addr, "global") : [llvm_global_ptr_ty],
- !eq(addr, "local") : [llvm_local_ptr_ty],
- !eq(addr, "") : [llvm_ptr_ty]),
- [IntrArgMemOnly, ReadOnly<ArgIndex<0>>,
- NoCapture<ArgIndex<0>>]>;
- }
- }
-}
+def int_nvvm_prefetch_L1 : Intrinsic<[], [llvm_ptr_ty],
+ [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+def int_nvvm_prefetch_L2 : Intrinsic<[], [llvm_ptr_ty],
+ [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+def int_nvvm_prefetch_global_L1 : Intrinsic<[], [llvm_global_ptr_ty],
+ [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+def int_nvvm_prefetch_global_L2 : Intrinsic<[], [llvm_global_ptr_ty],
+ [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+def int_nvvm_prefetch_local_L1 : Intrinsic<[], [llvm_local_ptr_ty],
+ [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+def int_nvvm_prefetch_local_L2 : Intrinsic<[], [llvm_local_ptr_ty],
+ [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+
def int_nvvm_prefetchu_L1_evictnormal : Intrinsic<[], [llvm_ptr_ty],
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 39dac65d67eb9..1b81072a19769 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -741,9 +741,6 @@ foreach dim = [1, 2, 3, 4, 5] in {
}
//Prefetch and Prefetchu
-class Join<string sep, list<string> lst> {
- string ret = !foldl("", lst, a, b, !if(!eq(a, ""), b, !if(!eq(b,""), a, !strconcat(a, sep, b))));
-}
class PREFETCH_INTRS<string InstName> :
NVPTXInst<(outs), (ins Int64Regs:$addr),
@@ -753,17 +750,13 @@ class PREFETCH_INTRS<string InstName> :
Requires<[hasPTX<80>, hasSM<90>]>;
-// Only global supports evictlast and evictnormal.
-// Other variants (local and default) only support evictnormal
-foreach level = ["L1", "L2"] in {
- foreach addr = ["global", "local", ""] in {
- foreach evict = !if(!eq(addr, "global"), ["evictlast", "evictnormal"], ["evictnormal"]) in {
- defvar suffix = Join<"_", [addr, level, evict]>.ret;
- defvar inst_name = "prefetch." # !subst("_", ".", suffix);
- def PREFETCH_# suffix : PREFETCH_INTRS<inst_name>;
- }
- }
-}
+def PREFETCH_L1 : PREFETCH_INTRS<"prefetch.L1">;
+def PREFETCH_L2 : PREFETCH_INTRS<"prefetch.L2">;
+def PREFETCH_GLOBAL_L1 : PREFETCH_INTRS<"prefetch.global.L1">;
+def PREFETCH_LOCAL_L1 : PREFETCH_INTRS<"prefetch.local.L1">;
+def PREFETCH_GLOBAL_L2 : PREFETCH_INTRS<"prefetch.global.L2">;
+def PREFETCH_LOCAL_L2 : PREFETCH_INTRS<"prefetch.local.L2">;
+
def PREFETCHU_L1_EVICTNORMAL : PREFETCH_INTRS<"prefetchu.L1.evictnormal">;
diff --git a/llvm/test/CodeGen/NVPTX/prefetch.ll b/llvm/test/CodeGen/NVPTX/prefetch.ll
index cf47000ffd9aa..70fd2a24fc586 100644
--- a/llvm/test/CodeGen/NVPTX/prefetch.ll
+++ b/llvm/test/CodeGen/NVPTX/prefetch.ll
@@ -4,16 +4,13 @@
target triple = "nvptx64-nvidia-cuda"
-declare void @llvm.nvvm.prefetch.local.L1.evictnormal(ptr addrspace(5) %local_ptr)
-declare void @llvm.nvvm.prefetch.local.L2.evictnormal(ptr addrspace(5) %local_ptr)
+declare void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
+declare void @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
+declare void @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
+declare void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
-declare void @llvm.nvvm.prefetch.global.L1.evictnormal(ptr addrspace(1) %global_ptr)
-declare void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
-declare void @llvm.nvvm.prefetch.global.L1.evictlast(ptr addrspace(1) %global_ptr)
-declare void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
-
-declare void @llvm.nvvm.prefetch.L1.evictnormal(ptr %ptr)
-declare void @llvm.nvvm.prefetch.L2.evictnormal(ptr %ptr)
+declare void @llvm.nvvm.prefetch.L1(ptr %ptr)
+declare void @llvm.nvvm.prefetch.L2(ptr %ptr)
declare void @llvm.nvvm.prefetchu.L1.evictnormal(ptr %ptr)
@@ -24,11 +21,11 @@ define void @prefetch_local(ptr addrspace(5) %local_ptr) {
; CHECK-PTX64-EMPTY:
; CHECK-PTX64-NEXT: // %bb.0:
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch_local_param_0];
-; CHECK-PTX64-NEXT: prefetch.local.L1.evictnormal [%rd1];
-; CHECK-PTX64-NEXT: prefetch.local.L2.evictnormal [%rd1];
+; CHECK-PTX64-NEXT: prefetch.local.L1 [%rd1];
+; CHECK-PTX64-NEXT: prefetch.local.L2 [%rd1];
; CHECK-PTX64-NEXT: ret;
- tail call void @llvm.nvvm.prefetch.local.L1.evictnormal(ptr addrspace(5) %local_ptr)
- tail call void @llvm.nvvm.prefetch.local.L2.evictnormal(ptr addrspace(5) %local_ptr)
+ tail call void @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
+ tail call void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
ret void
}
@@ -39,15 +36,11 @@ define void @prefetch_global(ptr addrspace(1) %global_ptr) {
; CHECK-PTX64-EMPTY:
; CHECK-PTX64-NEXT: // %bb.0:
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch_global_param_0];
-; CHECK-PTX64-NEXT: prefetch.global.L1.evictnormal [%rd1];
-; CHECK-PTX64-NEXT: prefetch.global.L2.evictnormal [%rd1];
-; CHECK-PTX64-NEXT: prefetch.global.L1.evictlast [%rd1];
-; CHECK-PTX64-NEXT: prefetch.global.L2.evictlast [%rd1];
+; CHECK-PTX64-NEXT: prefetch.global.L1 [%rd1];
+; CHECK-PTX64-NEXT: prefetch.global.L2 [%rd1];
; CHECK-PTX64-NEXT: ret;
- tail call void @llvm.nvvm.prefetch.global.L1.evictnormal(ptr addrspace(1) %global_ptr)
- tail call void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
- tail call void @llvm.nvvm.prefetch.global.L1.evictlast(ptr addrspace(1) %global_ptr)
- tail call void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
+ tail call void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
+ tail call void @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
ret void
}
@@ -59,11 +52,11 @@ define void @prefetch_(ptr %ptr) {
; CHECK-PTX64-EMPTY:
; CHECK-PTX64-NEXT: // %bb.0:
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch__param_0];
-; CHECK-PTX64-NEXT: prefetch.L1.evictnormal [%rd1];
-; CHECK-PTX64-NEXT: prefetch.L2.evictnormal [%rd1];
+; CHECK-PTX64-NEXT: prefetch.L1 [%rd1];
+; CHECK-PTX64-NEXT: prefetch.L2 [%rd1];
; CHECK-PTX64-NEXT: ret;
- tail call void @llvm.nvvm.prefetch.L1.evictnormal(ptr %ptr)
- tail call void @llvm.nvvm.prefetch.L2.evictnormal(ptr %ptr)
+ tail call void @llvm.nvvm.prefetch.L1(ptr %ptr)
+ tail call void @llvm.nvvm.prefetch.L2(ptr %ptr)
ret void
}
|
I confirm that this PR fixes the prefetch.ll test |
Thank you for the confirmation! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, the latest revision looks good to me.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Note: normally removing/changing public-facing APIs like intrinsics requires extra steps to avoid breaking existing users, or a fairly long deprecation sequence.
However in this case the original changes we're refining here were made only a day ago in #125887, so we're OK without autoupgrading the old intrinsics.
@abhilash1910 should I merge it? |
@durga4github could you please merge this ? |
Thanks @metaflow |
This is follow-up PR from llvm#125887 which fixes the intrinsic failures . --------- Co-authored-by: abmajumder <[email protected]>
This is follow-up PR from llvm#125887 which fixes the intrinsic failures . --------- Co-authored-by: abmajumder <[email protected]>
This is follow-up PR from #125887 which fixes the intrinsic failures .