1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2
+ ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| FileCheck --check-prefixes=CHECK-PTX64 %s
3
+ ; RUN: %if ptxas-12.3 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %}
4
+
5
+ target triple = "nvptx64-nvidia-cuda"
6
+
7
+ declare void @llvm.nvvm.prefetch.local.L1.evictnormal (ptr addrspace (5 ) %local_ptr )
8
+ declare void @llvm.nvvm.prefetch.local.L2.evictnormal (ptr addrspace (5 ) %local_ptr )
9
+
10
+ declare void @llvm.nvvm.prefetch.global.L1.evictnormal (ptr addrspace (1 ) %global_ptr )
11
+ declare void @llvm.nvvm.prefetch.global.L2.evictnormal (ptr addrspace (1 ) %global_ptr )
12
+ declare void @llvm.nvvm.prefetch.global.L1.evictlast (ptr addrspace (1 ) %global_ptr )
13
+ declare void @llvm.nvvm.prefetch.global.L2.evictlast (ptr addrspace (1 ) %global_ptr )
14
+
15
+ declare void @llvm.nvvm.prefetch.L1.evictnormal (ptr %ptr )
16
+ declare void @llvm.nvvm.prefetch.L2.evictnormal (ptr %ptr )
17
+
18
+ declare void @llvm.nvvm.prefetchu.L1.evictnormal (ptr %ptr )
19
+
20
+ define void @prefetch_local (ptr addrspace (5 ) %local_ptr ) {
21
+ ; CHECK-PTX64-LABEL: prefetch_local(
22
+ ; CHECK-PTX64: {
23
+ ; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
24
+ ; CHECK-PTX64-EMPTY:
25
+ ; CHECK-PTX64-NEXT: // %bb.0:
26
+ ; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch_local_param_0];
27
+ ; CHECK-PTX64-NEXT: prefetch.local.L1.evictnormal [%rd1];
28
+ ; CHECK-PTX64-NEXT: prefetch.local.L2.evictnormal [%rd1];
29
+ ; CHECK-PTX64-NEXT: ret;
30
+ tail call void @llvm.nvvm.prefetch.local.L1.evictnormal (ptr addrspace (5 ) %local_ptr )
31
+ tail call void @llvm.nvvm.prefetch.local.L2.evictnormal (ptr addrspace (5 ) %local_ptr )
32
+ ret void
33
+ }
34
+
35
+ define void @prefetch_global (ptr addrspace (1 ) %global_ptr ) {
36
+ ; CHECK-PTX64-LABEL: prefetch_global(
37
+ ; CHECK-PTX64: {
38
+ ; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
39
+ ; CHECK-PTX64-EMPTY:
40
+ ; CHECK-PTX64-NEXT: // %bb.0:
41
+ ; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch_global_param_0];
42
+ ; CHECK-PTX64-NEXT: prefetch.global.L1.evictnormal [%rd1];
43
+ ; CHECK-PTX64-NEXT: prefetch.global.L2.evictnormal [%rd1];
44
+ ; CHECK-PTX64-NEXT: prefetch.global.L1.evictlast [%rd1];
45
+ ; CHECK-PTX64-NEXT: prefetch.global.L2.evictlast [%rd1];
46
+ ; CHECK-PTX64-NEXT: ret;
47
+ tail call void @llvm.nvvm.prefetch.global.L1.evictnormal (ptr addrspace (1 ) %global_ptr )
48
+ tail call void @llvm.nvvm.prefetch.global.L2.evictnormal (ptr addrspace (1 ) %global_ptr )
49
+ tail call void @llvm.nvvm.prefetch.global.L1.evictlast (ptr addrspace (1 ) %global_ptr )
50
+ tail call void @llvm.nvvm.prefetch.global.L2.evictlast (ptr addrspace (1 ) %global_ptr )
51
+ ret void
52
+ }
53
+
54
+
55
+ define void @prefetch_ (ptr %ptr ) {
56
+ ; CHECK-PTX64-LABEL: prefetch_(
57
+ ; CHECK-PTX64: {
58
+ ; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
59
+ ; CHECK-PTX64-EMPTY:
60
+ ; CHECK-PTX64-NEXT: // %bb.0:
61
+ ; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch__param_0];
62
+ ; CHECK-PTX64-NEXT: prefetch.L1.evictnormal [%rd1];
63
+ ; CHECK-PTX64-NEXT: prefetch.L2.evictnormal [%rd1];
64
+ ; CHECK-PTX64-NEXT: ret;
65
+ tail call void @llvm.nvvm.prefetch.L1.evictnormal (ptr %ptr )
66
+ tail call void @llvm.nvvm.prefetch.L2.evictnormal (ptr %ptr )
67
+ ret void
68
+ }
69
+
70
+ define void @prefetchu_l1 (ptr %ptr ) {
71
+ ; CHECK-PTX64-LABEL: prefetchu_l1(
72
+ ; CHECK-PTX64: {
73
+ ; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
74
+ ; CHECK-PTX64-EMPTY:
75
+ ; CHECK-PTX64-NEXT: // %bb.0:
76
+ ; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetchu_l1_param_0];
77
+ ; CHECK-PTX64-NEXT: prefetchu.L1.evictnormal [%rd1];
78
+ ; CHECK-PTX64-NEXT: ret;
79
+ tail call void @llvm.nvvm.prefetchu.L1.evictnormal (ptr %ptr )
80
+ ret void
81
+ }
0 commit comments