Skip to content

[NVPTX] Fix and refine prefetch.* intrinsics #126899

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions llvm/docs/NVPTXUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -589,18 +589,18 @@ Syntax:

.. code-block:: llvm

declare void @llvm.nvvm.prefetch.local.L1.evictnormal(ptr addrspace(5) %local_ptr)
declare void @llvm.nvvm.prefetch.local.L2.evictnormal(ptr addrspace(5) %local_ptr)
declare void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
declare void @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
declare void @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
declare void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)

declare void @llvm.nvvm.prefetch.global.L1.evictnormal(ptr addrspace(1) %global_ptr)
declare void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
declare void @llvm.nvvm.prefetch.global.L1.evictlast(ptr addrspace(1) %global_ptr)
declare void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
declare void @llvm.nvvm.prefetch.L1(ptr %ptr)
declare void @llvm.nvvm.prefetch.L2(ptr %ptr)

declare void @llvm.nvvm.prefetch.L1.evictnormal(ptr %ptr)
declare void @llvm.nvvm.prefetch.L2.evictnormal(ptr %ptr)
declare void @llvm.nvvm.prefetchu.L1.evictnormal(ptr %ptr)
declare void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr)
declare void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr)

declare void @llvm.nvvm.prefetchu.L1(ptr %ptr)

Overview:
"""""""""
Expand Down
34 changes: 19 additions & 15 deletions llvm/include/llvm/IR/IntrinsicsNVVM.td
Original file line number Diff line number Diff line change
Expand Up @@ -5001,22 +5001,26 @@ foreach dim = [1, 2, 3, 4, 5] in {
}

// Intrinsics for Prefetch and Prefetchu
foreach level = ["L1", "L2"] in {
foreach addr = ["global", "local", ""] in {
foreach evict = !if(!eq(addr, "global"), ["evictlast", "evictnormal"], ["evictnormal"]) in {
defvar suffix = "" # !if(!eq(addr, ""), "", addr # "_") # level # "_" # evict;
def int_nvvm_prefetch_ # suffix : Intrinsic<[],
!cond(
!eq(addr, "global") : [llvm_global_ptr_ty],
!eq(addr, "local") : [llvm_local_ptr_ty],
!eq(addr, "") : [llvm_ptr_ty]),
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>,
NoCapture<ArgIndex<0>>]>;
}
}
}
def int_nvvm_prefetch_L1 : Intrinsic<[], [llvm_ptr_ty],
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
def int_nvvm_prefetch_L2 : Intrinsic<[], [llvm_ptr_ty],
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
def int_nvvm_prefetch_global_L1 : Intrinsic<[], [llvm_global_ptr_ty],
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
def int_nvvm_prefetch_global_L2 : Intrinsic<[], [llvm_global_ptr_ty],
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
def int_nvvm_prefetch_local_L1 : Intrinsic<[], [llvm_local_ptr_ty],
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
def int_nvvm_prefetch_local_L2 : Intrinsic<[], [llvm_local_ptr_ty],
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;

def int_nvvm_prefetch_global_L2_evict_normal: Intrinsic<[], [llvm_global_ptr_ty],
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
def int_nvvm_prefetch_global_L2_evict_last: Intrinsic<[], [llvm_global_ptr_ty],
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;


def int_nvvm_prefetchu_L1_evictnormal : Intrinsic<[], [llvm_ptr_ty],
def int_nvvm_prefetchu_L1 : Intrinsic<[], [llvm_ptr_ty],
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;


Expand Down
33 changes: 18 additions & 15 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -741,9 +741,6 @@ foreach dim = [1, 2, 3, 4, 5] in {
}

//Prefetch and Prefetchu
class Join<string sep, list<string> lst> {
string ret = !foldl("", lst, a, b, !if(!eq(a, ""), b, !if(!eq(b,""), a, !strconcat(a, sep, b))));
}

class PREFETCH_INTRS<string InstName> :
NVPTXInst<(outs), (ins Int64Regs:$addr),
Expand All @@ -753,19 +750,25 @@ class PREFETCH_INTRS<string InstName> :
Requires<[hasPTX<80>, hasSM<90>]>;


// Only global supports evictlast and evictnormal.
// Other variants (local and default) only support evictnormal
foreach level = ["L1", "L2"] in {
foreach addr = ["global", "local", ""] in {
foreach evict = !if(!eq(addr, "global"), ["evictlast", "evictnormal"], ["evictnormal"]) in {
defvar suffix = Join<"_", [addr, level, evict]>.ret;
defvar inst_name = "prefetch." # !subst("_", ".", suffix);
def PREFETCH_# suffix : PREFETCH_INTRS<inst_name>;
}
}
}
def PREFETCH_L1 : PREFETCH_INTRS<"prefetch.L1">;
def PREFETCH_L2 : PREFETCH_INTRS<"prefetch.L2">;
def PREFETCH_GLOBAL_L1 : PREFETCH_INTRS<"prefetch.global.L1">;
def PREFETCH_LOCAL_L1 : PREFETCH_INTRS<"prefetch.local.L1">;
def PREFETCH_GLOBAL_L2 : PREFETCH_INTRS<"prefetch.global.L2">;
def PREFETCH_LOCAL_L2 : PREFETCH_INTRS<"prefetch.local.L2">;

def PREFETCH_GLOBAL_L2_EVICT_NORMAL : NVPTXInst<(outs), (ins Int64Regs:$addr),
"prefetch.global.L2::evict_normal" # " [$addr];",
[(!cast<Intrinsic>("int_nvvm_prefetch_global_L2_evict_normal") i64:$addr)]>,
Requires<[hasPTX<80>, hasSM<90>]>;

def PREFETCH_GLOBAL_L2_EVICT_LAST : NVPTXInst<(outs), (ins Int64Regs:$addr),
"prefetch.global.L2::evict_last" # " [$addr];",
[(!cast<Intrinsic>("int_nvvm_prefetch_global_L2_evict_last") i64:$addr)]>,
Requires<[hasPTX<80>, hasSM<90>]>;


def PREFETCHU_L1_EVICTNORMAL : PREFETCH_INTRS<"prefetchu.L1.evictnormal">;
def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1">;

//-----------------------------------
// MBarrier Functions
Expand Down
54 changes: 27 additions & 27 deletions llvm/test/CodeGen/NVPTX/prefetch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@

target triple = "nvptx64-nvidia-cuda"

declare void @llvm.nvvm.prefetch.local.L1.evictnormal(ptr addrspace(5) %local_ptr)
declare void @llvm.nvvm.prefetch.local.L2.evictnormal(ptr addrspace(5) %local_ptr)
declare void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
declare void @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
declare void @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
declare void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)

declare void @llvm.nvvm.prefetch.global.L1.evictnormal(ptr addrspace(1) %global_ptr)
declare void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
declare void @llvm.nvvm.prefetch.global.L1.evictlast(ptr addrspace(1) %global_ptr)
declare void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
declare void @llvm.nvvm.prefetch.L1(ptr %ptr)
declare void @llvm.nvvm.prefetch.L2(ptr %ptr)

declare void @llvm.nvvm.prefetch.L1.evictnormal(ptr %ptr)
declare void @llvm.nvvm.prefetch.L2.evictnormal(ptr %ptr)
declare void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr)
declare void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr)

declare void @llvm.nvvm.prefetchu.L1.evictnormal(ptr %ptr)
declare void @llvm.nvvm.prefetchu.L1(ptr %ptr)

define void @prefetch_local(ptr addrspace(5) %local_ptr) {
; CHECK-PTX64-LABEL: prefetch_local(
Expand All @@ -24,11 +24,11 @@ define void @prefetch_local(ptr addrspace(5) %local_ptr) {
; CHECK-PTX64-EMPTY:
; CHECK-PTX64-NEXT: // %bb.0:
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch_local_param_0];
; CHECK-PTX64-NEXT: prefetch.local.L1.evictnormal [%rd1];
; CHECK-PTX64-NEXT: prefetch.local.L2.evictnormal [%rd1];
; CHECK-PTX64-NEXT: prefetch.local.L1 [%rd1];
; CHECK-PTX64-NEXT: prefetch.local.L2 [%rd1];
; CHECK-PTX64-NEXT: ret;
tail call void @llvm.nvvm.prefetch.local.L1.evictnormal(ptr addrspace(5) %local_ptr)
tail call void @llvm.nvvm.prefetch.local.L2.evictnormal(ptr addrspace(5) %local_ptr)
tail call void @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
tail call void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
ret void
}

Expand All @@ -39,15 +39,15 @@ define void @prefetch_global(ptr addrspace(1) %global_ptr) {
; CHECK-PTX64-EMPTY:
; CHECK-PTX64-NEXT: // %bb.0:
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch_global_param_0];
; CHECK-PTX64-NEXT: prefetch.global.L1.evictnormal [%rd1];
; CHECK-PTX64-NEXT: prefetch.global.L2.evictnormal [%rd1];
; CHECK-PTX64-NEXT: prefetch.global.L1.evictlast [%rd1];
; CHECK-PTX64-NEXT: prefetch.global.L2.evictlast [%rd1];
; CHECK-PTX64-NEXT: prefetch.global.L1 [%rd1];
; CHECK-PTX64-NEXT: prefetch.global.L2 [%rd1];
; CHECK-PTX64-NEXT: prefetch.global.L2::evict_normal [%rd1];
; CHECK-PTX64-NEXT: prefetch.global.L2::evict_last [%rd1];
; CHECK-PTX64-NEXT: ret;
tail call void @llvm.nvvm.prefetch.global.L1.evictnormal(ptr addrspace(1) %global_ptr)
tail call void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
tail call void @llvm.nvvm.prefetch.global.L1.evictlast(ptr addrspace(1) %global_ptr)
tail call void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
tail call void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
tail call void @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
tail call void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr)
tail call void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr)
ret void
}

Expand All @@ -59,11 +59,11 @@ define void @prefetch_(ptr %ptr) {
; CHECK-PTX64-EMPTY:
; CHECK-PTX64-NEXT: // %bb.0:
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch__param_0];
; CHECK-PTX64-NEXT: prefetch.L1.evictnormal [%rd1];
; CHECK-PTX64-NEXT: prefetch.L2.evictnormal [%rd1];
; CHECK-PTX64-NEXT: prefetch.L1 [%rd1];
; CHECK-PTX64-NEXT: prefetch.L2 [%rd1];
; CHECK-PTX64-NEXT: ret;
tail call void @llvm.nvvm.prefetch.L1.evictnormal(ptr %ptr)
tail call void @llvm.nvvm.prefetch.L2.evictnormal(ptr %ptr)
tail call void @llvm.nvvm.prefetch.L1(ptr %ptr)
tail call void @llvm.nvvm.prefetch.L2(ptr %ptr)
ret void
}

Expand All @@ -74,8 +74,8 @@ define void @prefetchu_l1(ptr %ptr) {
; CHECK-PTX64-EMPTY:
; CHECK-PTX64-NEXT: // %bb.0:
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetchu_l1_param_0];
; CHECK-PTX64-NEXT: prefetchu.L1.evictnormal [%rd1];
; CHECK-PTX64-NEXT: prefetchu.L1 [%rd1];
; CHECK-PTX64-NEXT: ret;
tail call void @llvm.nvvm.prefetchu.L1.evictnormal(ptr %ptr)
tail call void @llvm.nvvm.prefetchu.L1(ptr %ptr)
ret void
}