Skip to content

Commit 55f3df8

Browse files
authored
[NVPTX] Fix and refine prefetch.* intrinsics (#126899)
This is follow-up PR from #125887 which fixes the intrinsic failures . --------- Co-authored-by: abmajumder <[email protected]>
1 parent a663e78 commit 55f3df8

File tree

4 files changed

+74
-67
lines changed

4 files changed

+74
-67
lines changed

llvm/docs/NVPTXUsage.rst

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -598,18 +598,18 @@ Syntax:
598598

599599
.. code-block:: llvm
600600
601-
declare void @llvm.nvvm.prefetch.local.L1.evictnormal(ptr addrspace(5) %local_ptr)
602-
declare void @llvm.nvvm.prefetch.local.L2.evictnormal(ptr addrspace(5) %local_ptr)
601+
declare void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
602+
declare void @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
603+
declare void @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
604+
declare void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
603605
604-
declare void @llvm.nvvm.prefetch.global.L1.evictnormal(ptr addrspace(1) %global_ptr)
605-
declare void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
606-
declare void @llvm.nvvm.prefetch.global.L1.evictlast(ptr addrspace(1) %global_ptr)
607-
declare void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
606+
declare void @llvm.nvvm.prefetch.L1(ptr %ptr)
607+
declare void @llvm.nvvm.prefetch.L2(ptr %ptr)
608608
609-
declare void @llvm.nvvm.prefetch.L1.evictnormal(ptr %ptr)
610-
declare void @llvm.nvvm.prefetch.L2.evictnormal(ptr %ptr)
611-
612-
declare void @llvm.nvvm.prefetchu.L1.evictnormal(ptr %ptr)
609+
declare void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr)
610+
declare void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr)
611+
612+
declare void @llvm.nvvm.prefetchu.L1(ptr %ptr)
613613
614614
Overview:
615615
"""""""""

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5001,22 +5001,26 @@ foreach dim = [1, 2, 3, 4, 5] in {
50015001
}
50025002

50035003
// Intrinsics for Prefetch and Prefetchu
5004-
foreach level = ["L1", "L2"] in {
5005-
foreach addr = ["global", "local", ""] in {
5006-
foreach evict = !if(!eq(addr, "global"), ["evictlast", "evictnormal"], ["evictnormal"]) in {
5007-
defvar suffix = "" # !if(!eq(addr, ""), "", addr # "_") # level # "_" # evict;
5008-
def int_nvvm_prefetch_ # suffix : Intrinsic<[],
5009-
!cond(
5010-
!eq(addr, "global") : [llvm_global_ptr_ty],
5011-
!eq(addr, "local") : [llvm_local_ptr_ty],
5012-
!eq(addr, "") : [llvm_ptr_ty]),
5013-
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>,
5014-
NoCapture<ArgIndex<0>>]>;
5015-
}
5016-
}
5017-
}
5004+
def int_nvvm_prefetch_L1 : Intrinsic<[], [llvm_ptr_ty],
5005+
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
5006+
def int_nvvm_prefetch_L2 : Intrinsic<[], [llvm_ptr_ty],
5007+
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
5008+
def int_nvvm_prefetch_global_L1 : Intrinsic<[], [llvm_global_ptr_ty],
5009+
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
5010+
def int_nvvm_prefetch_global_L2 : Intrinsic<[], [llvm_global_ptr_ty],
5011+
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
5012+
def int_nvvm_prefetch_local_L1 : Intrinsic<[], [llvm_local_ptr_ty],
5013+
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
5014+
def int_nvvm_prefetch_local_L2 : Intrinsic<[], [llvm_local_ptr_ty],
5015+
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
5016+
5017+
def int_nvvm_prefetch_global_L2_evict_normal: Intrinsic<[], [llvm_global_ptr_ty],
5018+
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
5019+
def int_nvvm_prefetch_global_L2_evict_last: Intrinsic<[], [llvm_global_ptr_ty],
5020+
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
5021+
50185022

5019-
def int_nvvm_prefetchu_L1_evictnormal : Intrinsic<[], [llvm_ptr_ty],
5023+
def int_nvvm_prefetchu_L1 : Intrinsic<[], [llvm_ptr_ty],
50205024
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
50215025

50225026

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -741,9 +741,6 @@ foreach dim = [1, 2, 3, 4, 5] in {
741741
}
742742

743743
//Prefetch and Prefetchu
744-
class Join<string sep, list<string> lst> {
745-
string ret = !foldl("", lst, a, b, !if(!eq(a, ""), b, !if(!eq(b,""), a, !strconcat(a, sep, b))));
746-
}
747744

748745
class PREFETCH_INTRS<string InstName> :
749746
NVPTXInst<(outs), (ins Int64Regs:$addr),
@@ -753,19 +750,25 @@ class PREFETCH_INTRS<string InstName> :
753750
Requires<[hasPTX<80>, hasSM<90>]>;
754751

755752

756-
// Only global supports evictlast and evictnormal.
757-
// Other variants (local and default) only support evictnormal
758-
foreach level = ["L1", "L2"] in {
759-
foreach addr = ["global", "local", ""] in {
760-
foreach evict = !if(!eq(addr, "global"), ["evictlast", "evictnormal"], ["evictnormal"]) in {
761-
defvar suffix = Join<"_", [addr, level, evict]>.ret;
762-
defvar inst_name = "prefetch." # !subst("_", ".", suffix);
763-
def PREFETCH_# suffix : PREFETCH_INTRS<inst_name>;
764-
}
765-
}
766-
}
753+
def PREFETCH_L1 : PREFETCH_INTRS<"prefetch.L1">;
754+
def PREFETCH_L2 : PREFETCH_INTRS<"prefetch.L2">;
755+
def PREFETCH_GLOBAL_L1 : PREFETCH_INTRS<"prefetch.global.L1">;
756+
def PREFETCH_LOCAL_L1 : PREFETCH_INTRS<"prefetch.local.L1">;
757+
def PREFETCH_GLOBAL_L2 : PREFETCH_INTRS<"prefetch.global.L2">;
758+
def PREFETCH_LOCAL_L2 : PREFETCH_INTRS<"prefetch.local.L2">;
759+
760+
def PREFETCH_GLOBAL_L2_EVICT_NORMAL : NVPTXInst<(outs), (ins Int64Regs:$addr),
761+
"prefetch.global.L2::evict_normal" # " [$addr];",
762+
[(!cast<Intrinsic>("int_nvvm_prefetch_global_L2_evict_normal") i64:$addr)]>,
763+
Requires<[hasPTX<80>, hasSM<90>]>;
764+
765+
def PREFETCH_GLOBAL_L2_EVICT_LAST : NVPTXInst<(outs), (ins Int64Regs:$addr),
766+
"prefetch.global.L2::evict_last" # " [$addr];",
767+
[(!cast<Intrinsic>("int_nvvm_prefetch_global_L2_evict_last") i64:$addr)]>,
768+
Requires<[hasPTX<80>, hasSM<90>]>;
769+
767770

768-
def PREFETCHU_L1_EVICTNORMAL : PREFETCH_INTRS<"prefetchu.L1.evictnormal">;
771+
def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1">;
769772

770773
//-----------------------------------
771774
// MBarrier Functions

llvm/test/CodeGen/NVPTX/prefetch.ll

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,18 @@
44

55
target triple = "nvptx64-nvidia-cuda"
66

7-
declare void @llvm.nvvm.prefetch.local.L1.evictnormal(ptr addrspace(5) %local_ptr)
8-
declare void @llvm.nvvm.prefetch.local.L2.evictnormal(ptr addrspace(5) %local_ptr)
7+
declare void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
8+
declare void @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
9+
declare void @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
10+
declare void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
911

10-
declare void @llvm.nvvm.prefetch.global.L1.evictnormal(ptr addrspace(1) %global_ptr)
11-
declare void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
12-
declare void @llvm.nvvm.prefetch.global.L1.evictlast(ptr addrspace(1) %global_ptr)
13-
declare void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
12+
declare void @llvm.nvvm.prefetch.L1(ptr %ptr)
13+
declare void @llvm.nvvm.prefetch.L2(ptr %ptr)
1414

15-
declare void @llvm.nvvm.prefetch.L1.evictnormal(ptr %ptr)
16-
declare void @llvm.nvvm.prefetch.L2.evictnormal(ptr %ptr)
15+
declare void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr)
16+
declare void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr)
1717

18-
declare void @llvm.nvvm.prefetchu.L1.evictnormal(ptr %ptr)
18+
declare void @llvm.nvvm.prefetchu.L1(ptr %ptr)
1919

2020
define void @prefetch_local(ptr addrspace(5) %local_ptr) {
2121
; CHECK-PTX64-LABEL: prefetch_local(
@@ -24,11 +24,11 @@ define void @prefetch_local(ptr addrspace(5) %local_ptr) {
2424
; CHECK-PTX64-EMPTY:
2525
; CHECK-PTX64-NEXT: // %bb.0:
2626
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch_local_param_0];
27-
; CHECK-PTX64-NEXT: prefetch.local.L1.evictnormal [%rd1];
28-
; CHECK-PTX64-NEXT: prefetch.local.L2.evictnormal [%rd1];
27+
; CHECK-PTX64-NEXT: prefetch.local.L1 [%rd1];
28+
; CHECK-PTX64-NEXT: prefetch.local.L2 [%rd1];
2929
; CHECK-PTX64-NEXT: ret;
30-
tail call void @llvm.nvvm.prefetch.local.L1.evictnormal(ptr addrspace(5) %local_ptr)
31-
tail call void @llvm.nvvm.prefetch.local.L2.evictnormal(ptr addrspace(5) %local_ptr)
30+
tail call void @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
31+
tail call void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
3232
ret void
3333
}
3434

@@ -39,15 +39,15 @@ define void @prefetch_global(ptr addrspace(1) %global_ptr) {
3939
; CHECK-PTX64-EMPTY:
4040
; CHECK-PTX64-NEXT: // %bb.0:
4141
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch_global_param_0];
42-
; CHECK-PTX64-NEXT: prefetch.global.L1.evictnormal [%rd1];
43-
; CHECK-PTX64-NEXT: prefetch.global.L2.evictnormal [%rd1];
44-
; CHECK-PTX64-NEXT: prefetch.global.L1.evictlast [%rd1];
45-
; CHECK-PTX64-NEXT: prefetch.global.L2.evictlast [%rd1];
42+
; CHECK-PTX64-NEXT: prefetch.global.L1 [%rd1];
43+
; CHECK-PTX64-NEXT: prefetch.global.L2 [%rd1];
44+
; CHECK-PTX64-NEXT: prefetch.global.L2::evict_normal [%rd1];
45+
; CHECK-PTX64-NEXT: prefetch.global.L2::evict_last [%rd1];
4646
; CHECK-PTX64-NEXT: ret;
47-
tail call void @llvm.nvvm.prefetch.global.L1.evictnormal(ptr addrspace(1) %global_ptr)
48-
tail call void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
49-
tail call void @llvm.nvvm.prefetch.global.L1.evictlast(ptr addrspace(1) %global_ptr)
50-
tail call void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
47+
tail call void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
48+
tail call void @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
49+
tail call void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr)
50+
tail call void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr)
5151
ret void
5252
}
5353

@@ -59,11 +59,11 @@ define void @prefetch_(ptr %ptr) {
5959
; CHECK-PTX64-EMPTY:
6060
; CHECK-PTX64-NEXT: // %bb.0:
6161
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch__param_0];
62-
; CHECK-PTX64-NEXT: prefetch.L1.evictnormal [%rd1];
63-
; CHECK-PTX64-NEXT: prefetch.L2.evictnormal [%rd1];
62+
; CHECK-PTX64-NEXT: prefetch.L1 [%rd1];
63+
; CHECK-PTX64-NEXT: prefetch.L2 [%rd1];
6464
; CHECK-PTX64-NEXT: ret;
65-
tail call void @llvm.nvvm.prefetch.L1.evictnormal(ptr %ptr)
66-
tail call void @llvm.nvvm.prefetch.L2.evictnormal(ptr %ptr)
65+
tail call void @llvm.nvvm.prefetch.L1(ptr %ptr)
66+
tail call void @llvm.nvvm.prefetch.L2(ptr %ptr)
6767
ret void
6868
}
6969

@@ -74,8 +74,8 @@ define void @prefetchu_l1(ptr %ptr) {
7474
; CHECK-PTX64-EMPTY:
7575
; CHECK-PTX64-NEXT: // %bb.0:
7676
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetchu_l1_param_0];
77-
; CHECK-PTX64-NEXT: prefetchu.L1.evictnormal [%rd1];
77+
; CHECK-PTX64-NEXT: prefetchu.L1 [%rd1];
7878
; CHECK-PTX64-NEXT: ret;
79-
tail call void @llvm.nvvm.prefetchu.L1.evictnormal(ptr %ptr)
79+
tail call void @llvm.nvvm.prefetchu.L1(ptr %ptr)
8080
ret void
8181
}

0 commit comments

Comments
 (0)