1
- ; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_80 | FileCheck %s -check-prefixes=ALL,NOPTRCONV,CLS64
2
- ; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_80 --nvptx-short-ptr | FileCheck %s -check-prefixes=ALL,PTRCONV,CLS64
3
- ; RUN: %if ptxas-12.8 %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_80 | %ptxas-verify %}
4
- ; RUN: %if ptxas-12.8 %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_80 --nvptx-short-ptr | %ptxas-verify %}
1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2
+ ; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | FileCheck %s -check-prefixes=NOPTRCONV
3
+ ; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 --nvptx-short-ptr | FileCheck %s -check-prefixes=PTRCONV
4
+ ; RUN: %if ptxas-12.8 %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify %}
5
+ ; RUN: %if ptxas-12.8 %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 --nvptx-short-ptr | %ptxas-verify %}
5
6
6
7
; ALL-LABEL: conv_shared_cluster_to_generic
7
8
define i32 @conv_shared_cluster_to_generic (ptr addrspace (7 ) %ptr ) {
8
9
; CLS32: cvta.shared::cluster.u32
9
- ; PTRCONV: cvt.u64.u32
10
- ; NOPTRCONV-NOT: cvt.u64.u32
11
- ; CLS64: cvta.shared::cluster.u64
12
- ; ALL: ld.u32
10
+ ; NOPTRCONV-LABEL: conv_shared_cluster_to_generic(
11
+ ; NOPTRCONV: {
12
+ ; NOPTRCONV-NEXT: .reg .b32 %r<2>;
13
+ ; NOPTRCONV-NEXT: .reg .b64 %rd<3>;
14
+ ; NOPTRCONV-EMPTY:
15
+ ; NOPTRCONV-NEXT: // %bb.0:
16
+ ; NOPTRCONV-NEXT: ld.param.u64 %rd1, [conv_shared_cluster_to_generic_param_0];
17
+ ; NOPTRCONV-NEXT: cvta.shared::cluster.u64 %rd2, %rd1;
18
+ ; NOPTRCONV-NEXT: ld.u32 %r1, [%rd2];
19
+ ; NOPTRCONV-NEXT: st.param.b32 [func_retval0], %r1;
20
+ ; NOPTRCONV-NEXT: ret;
21
+ ;
22
+ ; PTRCONV-LABEL: conv_shared_cluster_to_generic(
23
+ ; PTRCONV: {
24
+ ; PTRCONV-NEXT: .reg .b32 %r<3>;
25
+ ; PTRCONV-NEXT: .reg .b64 %rd<3>;
26
+ ; PTRCONV-EMPTY:
27
+ ; PTRCONV-NEXT: // %bb.0:
28
+ ; PTRCONV-NEXT: ld.param.u32 %r1, [conv_shared_cluster_to_generic_param_0];
29
+ ; PTRCONV-NEXT: cvt.u64.u32 %rd1, %r1;
30
+ ; PTRCONV-NEXT: cvta.shared::cluster.u64 %rd2, %rd1;
31
+ ; PTRCONV-NEXT: ld.u32 %r2, [%rd2];
32
+ ; PTRCONV-NEXT: st.param.b32 [func_retval0], %r2;
33
+ ; PTRCONV-NEXT: ret;
13
34
%genptr = addrspacecast ptr addrspace (7 ) %ptr to ptr
14
35
%val = load i32 , ptr %genptr
15
36
ret i32 %val
@@ -18,31 +39,99 @@ define i32 @conv_shared_cluster_to_generic(ptr addrspace(7) %ptr) {
18
39
; ALL-LABEL: conv_generic_to_shared_cluster
19
40
define i32 @conv_generic_to_shared_cluster (ptr %ptr ) {
20
41
; CLS32: cvta.to.shared::cluster.u32
21
- ; CLS64: cvta.to.shared::cluster.u64
22
- ; PTRCONV: cvt.u32.u64
23
- ; NOPTRCONV-NOT: cvt.u32.u64
24
- ; ALL: ld.shared::cluster.u32
42
+ ; NOPTRCONV-LABEL: conv_generic_to_shared_cluster(
43
+ ; NOPTRCONV: {
44
+ ; NOPTRCONV-NEXT: .reg .b32 %r<2>;
45
+ ; NOPTRCONV-NEXT: .reg .b64 %rd<3>;
46
+ ; NOPTRCONV-EMPTY:
47
+ ; NOPTRCONV-NEXT: // %bb.0:
48
+ ; NOPTRCONV-NEXT: ld.param.u64 %rd1, [conv_generic_to_shared_cluster_param_0];
49
+ ; NOPTRCONV-NEXT: cvta.to.shared::cluster.u64 %rd2, %rd1;
50
+ ; NOPTRCONV-NEXT: ld.shared::cluster.u32 %r1, [%rd2];
51
+ ; NOPTRCONV-NEXT: st.param.b32 [func_retval0], %r1;
52
+ ; NOPTRCONV-NEXT: ret;
53
+ ;
54
+ ; PTRCONV-LABEL: conv_generic_to_shared_cluster(
55
+ ; PTRCONV: {
56
+ ; PTRCONV-NEXT: .reg .b32 %r<3>;
57
+ ; PTRCONV-NEXT: .reg .b64 %rd<3>;
58
+ ; PTRCONV-EMPTY:
59
+ ; PTRCONV-NEXT: // %bb.0:
60
+ ; PTRCONV-NEXT: ld.param.u64 %rd1, [conv_generic_to_shared_cluster_param_0];
61
+ ; PTRCONV-NEXT: cvta.to.shared::cluster.u64 %rd2, %rd1;
62
+ ; PTRCONV-NEXT: cvt.u32.u64 %r1, %rd2;
63
+ ; PTRCONV-NEXT: ld.shared::cluster.u32 %r2, [%r1];
64
+ ; PTRCONV-NEXT: st.param.b32 [func_retval0], %r2;
65
+ ; PTRCONV-NEXT: ret;
25
66
%specptr = addrspacecast ptr %ptr to ptr addrspace (7 )
26
67
%val = load i32 , ptr addrspace (7 ) %specptr
27
68
ret i32 %val
28
69
}
29
70
30
71
; ALL-LABEL: conv_shared_to_shared_cluster
31
72
define i32 @conv_shared_to_shared_cluster (ptr addrspace (3 ) %ptr ) {
32
- ; CLS64: cvta.shared.u64
33
- ; CLS64: cvta.to.shared::cluster.u64
34
- ; ALL: ld.shared::cluster.u32
73
+ ; NOPTRCONV-LABEL: conv_shared_to_shared_cluster(
74
+ ; NOPTRCONV: {
75
+ ; NOPTRCONV-NEXT: .reg .b32 %r<2>;
76
+ ; NOPTRCONV-NEXT: .reg .b64 %rd<4>;
77
+ ; NOPTRCONV-EMPTY:
78
+ ; NOPTRCONV-NEXT: // %bb.0:
79
+ ; NOPTRCONV-NEXT: ld.param.u64 %rd1, [conv_shared_to_shared_cluster_param_0];
80
+ ; NOPTRCONV-NEXT: cvta.shared.u64 %rd2, %rd1;
81
+ ; NOPTRCONV-NEXT: cvta.to.shared::cluster.u64 %rd3, %rd2;
82
+ ; NOPTRCONV-NEXT: ld.shared::cluster.u32 %r1, [%rd3];
83
+ ; NOPTRCONV-NEXT: st.param.b32 [func_retval0], %r1;
84
+ ; NOPTRCONV-NEXT: ret;
85
+ ;
86
+ ; PTRCONV-LABEL: conv_shared_to_shared_cluster(
87
+ ; PTRCONV: {
88
+ ; PTRCONV-NEXT: .reg .b32 %r<4>;
89
+ ; PTRCONV-NEXT: .reg .b64 %rd<4>;
90
+ ; PTRCONV-EMPTY:
91
+ ; PTRCONV-NEXT: // %bb.0:
92
+ ; PTRCONV-NEXT: ld.param.u32 %r1, [conv_shared_to_shared_cluster_param_0];
93
+ ; PTRCONV-NEXT: cvt.u64.u32 %rd1, %r1;
94
+ ; PTRCONV-NEXT: cvta.shared.u64 %rd2, %rd1;
95
+ ; PTRCONV-NEXT: cvta.to.shared::cluster.u64 %rd3, %rd2;
96
+ ; PTRCONV-NEXT: cvt.u32.u64 %r2, %rd3;
97
+ ; PTRCONV-NEXT: ld.shared::cluster.u32 %r3, [%r2];
98
+ ; PTRCONV-NEXT: st.param.b32 [func_retval0], %r3;
99
+ ; PTRCONV-NEXT: ret;
35
100
%specptr = addrspacecast ptr addrspace (3 ) %ptr to ptr addrspace (7 )
36
101
%val = load i32 , ptr addrspace (7 ) %specptr
37
102
ret i32 %val
38
103
}
39
104
40
105
; ALL-LABEL: conv_shared_cluster_to_shared
41
106
define i32 @conv_shared_cluster_to_shared (ptr addrspace (7 ) %ptr ) {
42
- ; CLS64: cvta.shared::cluster.u64
43
- ; CLS64: cvta.to.shared.u64
44
- ; ALL: ld.shared.u32
107
+ ; NOPTRCONV-LABEL: conv_shared_cluster_to_shared(
108
+ ; NOPTRCONV: {
109
+ ; NOPTRCONV-NEXT: .reg .b32 %r<2>;
110
+ ; NOPTRCONV-NEXT: .reg .b64 %rd<4>;
111
+ ; NOPTRCONV-EMPTY:
112
+ ; NOPTRCONV-NEXT: // %bb.0:
113
+ ; NOPTRCONV-NEXT: ld.param.u64 %rd1, [conv_shared_cluster_to_shared_param_0];
114
+ ; NOPTRCONV-NEXT: cvta.shared::cluster.u64 %rd2, %rd1;
115
+ ; NOPTRCONV-NEXT: cvta.to.shared.u64 %rd3, %rd2;
116
+ ; NOPTRCONV-NEXT: ld.shared.u32 %r1, [%rd3];
117
+ ; NOPTRCONV-NEXT: st.param.b32 [func_retval0], %r1;
118
+ ; NOPTRCONV-NEXT: ret;
119
+ ;
120
+ ; PTRCONV-LABEL: conv_shared_cluster_to_shared(
121
+ ; PTRCONV: {
122
+ ; PTRCONV-NEXT: .reg .b32 %r<4>;
123
+ ; PTRCONV-NEXT: .reg .b64 %rd<4>;
124
+ ; PTRCONV-EMPTY:
125
+ ; PTRCONV-NEXT: // %bb.0:
126
+ ; PTRCONV-NEXT: ld.param.u32 %r1, [conv_shared_cluster_to_shared_param_0];
127
+ ; PTRCONV-NEXT: cvt.u64.u32 %rd1, %r1;
128
+ ; PTRCONV-NEXT: cvta.shared::cluster.u64 %rd2, %rd1;
129
+ ; PTRCONV-NEXT: cvta.to.shared.u64 %rd3, %rd2;
130
+ ; PTRCONV-NEXT: cvt.u32.u64 %r2, %rd3;
131
+ ; PTRCONV-NEXT: ld.shared.u32 %r3, [%r2];
132
+ ; PTRCONV-NEXT: st.param.b32 [func_retval0], %r3;
133
+ ; PTRCONV-NEXT: ret;
45
134
%specptr = addrspacecast ptr addrspace (7 ) %ptr to ptr addrspace (3 )
46
135
%val = load i32 , ptr addrspace (3 ) %specptr
47
136
ret i32 %val
48
- }
137
+ }
0 commit comments