Skip to content

Commit 5f80afe

Browse files
committed
pre-commit tests -- use update_llc_test_checks.py
1 parent 4485d91 commit 5f80afe

File tree

1 file changed

+204
-64
lines changed

1 file changed

+204
-64
lines changed

llvm/test/CodeGen/NVPTX/lower-args.ll

Lines changed: 204 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,IR,IRC
2-
; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-nvcl | FileCheck %s --check-prefixes COMMON,IR,IRO
3-
; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,PTX,PTXC
4-
; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-nvcl| FileCheck %s --check-prefixes COMMON,PTX,PTXO
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes IR,IRC
3+
; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-nvcl | FileCheck %s --check-prefixes IR,IRO
4+
; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes PTX,PTXC
5+
; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-nvcl| FileCheck %s --check-prefixes PTX,PTXO
56
; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 | %ptxas-verify %}
67

78
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
@@ -12,12 +13,60 @@ target triple = "nvptx64-nvidia-cuda"
1213
%class.padded = type { i8, i32 }
1314

1415
; Check that nvptx-lower-args preserves arg alignment
15-
; COMMON-LABEL: load_alignment
1616
define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 %arg) {
17+
; IR-LABEL: define void @load_alignment(
18+
; IR-SAME: ptr readonly byval([[CLASS_OUTER:%.*]]) align 8 captures(none) [[ARG:%.*]]) {
19+
; IR-NEXT: [[ENTRY:.*:]]
20+
; IR-NEXT: [[ARG1:%.*]] = alloca [[CLASS_OUTER]], align 8
21+
; IR-NEXT: [[ARG2:%.*]] = addrspacecast ptr [[ARG]] to ptr addrspace(101)
22+
; IR-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[ARG1]], ptr addrspace(101) align 8 [[ARG2]], i64 24, i1 false)
23+
; IR-NEXT: [[ARG_IDX_VAL:%.*]] = load ptr, ptr [[ARG1]], align 8
24+
; IR-NEXT: [[ARG_IDX1:%.*]] = getelementptr [[CLASS_OUTER]], ptr [[ARG1]], i64 0, i32 0, i32 1
25+
; IR-NEXT: [[ARG_IDX1_VAL:%.*]] = load ptr, ptr [[ARG_IDX1]], align 8
26+
; IR-NEXT: [[ARG_IDX2:%.*]] = getelementptr [[CLASS_OUTER]], ptr [[ARG1]], i64 0, i32 1
27+
; IR-NEXT: [[ARG_IDX2_VAL:%.*]] = load i32, ptr [[ARG_IDX2]], align 8
28+
; IR-NEXT: [[ARG_IDX_VAL_VAL:%.*]] = load i32, ptr [[ARG_IDX_VAL]], align 4
29+
; IR-NEXT: [[ADD_I:%.*]] = add nsw i32 [[ARG_IDX_VAL_VAL]], [[ARG_IDX2_VAL]]
30+
; IR-NEXT: store i32 [[ADD_I]], ptr [[ARG_IDX1_VAL]], align 4
31+
; IR-NEXT: [[TMP:%.*]] = call ptr @escape(ptr nonnull [[ARG_IDX2]])
32+
; IR-NEXT: ret void
33+
;
34+
; PTX-LABEL: load_alignment(
35+
; PTX: {
36+
; PTX-NEXT: .local .align 8 .b8 __local_depot0[24];
37+
; PTX-NEXT: .reg .b64 %SP;
38+
; PTX-NEXT: .reg .b64 %SPL;
39+
; PTX-NEXT: .reg .b32 %r<4>;
40+
; PTX-NEXT: .reg .b64 %rd<10>;
41+
; PTX-EMPTY:
42+
; PTX-NEXT: // %bb.0: // %entry
43+
; PTX-NEXT: mov.u64 %SPL, __local_depot0;
44+
; PTX-NEXT: add.u64 %rd2, %SPL, 0;
45+
; PTX-NEXT: ld.param.u64 %rd3, [load_alignment_param_0+16];
46+
; PTX-NEXT: st.local.u64 [%rd2+16], %rd3;
47+
; PTX-NEXT: ld.param.u64 %rd4, [load_alignment_param_0+8];
48+
; PTX-NEXT: st.local.u64 [%rd2+8], %rd4;
49+
; PTX-NEXT: ld.param.u64 %rd5, [load_alignment_param_0];
50+
; PTX-NEXT: st.local.u64 [%rd2], %rd5;
51+
; PTX-NEXT: add.s64 %rd6, %rd2, 16;
52+
; PTX-NEXT: cvta.local.u64 %rd7, %rd6;
53+
; PTX-NEXT: cvt.u32.u64 %r1, %rd3;
54+
; PTX-NEXT: ld.u32 %r2, [%rd5];
55+
; PTX-NEXT: add.s32 %r3, %r2, %r1;
56+
; PTX-NEXT: st.u32 [%rd4], %r3;
57+
; PTX-NEXT: { // callseq 0, 0
58+
; PTX-NEXT: .param .b64 param0;
59+
; PTX-NEXT: st.param.b64 [param0], %rd7;
60+
; PTX-NEXT: .param .b64 retval0;
61+
; PTX-NEXT: call.uni (retval0),
62+
; PTX-NEXT: escape,
63+
; PTX-NEXT: (
64+
; PTX-NEXT: param0
65+
; PTX-NEXT: );
66+
; PTX-NEXT: ld.param.b64 %rd8, [retval0];
67+
; PTX-NEXT: } // callseq 0
68+
; PTX-NEXT: ret;
1769
entry:
18-
; IR: call void @llvm.memcpy.p0.p101.i64(ptr align 8
19-
; PTX: ld.param.u64
20-
; PTX-NOT: ld.param.u8
2170
%arg.idx.val = load ptr, ptr %arg, align 8
2271
%arg.idx1 = getelementptr %class.outer, ptr %arg, i64 0, i32 0, i32 1
2372
%arg.idx1.val = load ptr, ptr %arg.idx1, align 8
@@ -34,8 +83,16 @@ entry:
3483
}
3584

3685
; Check that nvptx-lower-args copies padding as the struct may have been a union
37-
; COMMON-LABEL: load_padding
3886
define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) {
87+
; IR-LABEL: define void @load_padding(
88+
; IR-SAME: ptr readonly byval([[CLASS_PADDED:%.*]]) captures(none) [[ARG:%.*]]) {
89+
; IR-NEXT: [[ARG1:%.*]] = alloca [[CLASS_PADDED]], align 8
90+
; IR-NEXT: [[ARG2:%.*]] = addrspacecast ptr [[ARG]] to ptr addrspace(101)
91+
; IR-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[ARG1]], ptr addrspace(101) align 8 [[ARG2]], i64 8, i1 false)
92+
; IR-NEXT: [[TMP:%.*]] = call ptr @escape(ptr nonnull align 16 [[ARG1]])
93+
; IR-NEXT: ret void
94+
;
95+
; PTX-LABEL: load_padding(
3996
; PTX: {
4097
; PTX-NEXT: .local .align 8 .b8 __local_depot1[8];
4198
; PTX-NEXT: .reg .b64 %SP;
@@ -45,8 +102,8 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) {
45102
; PTX-NEXT: // %bb.0:
46103
; PTX-NEXT: mov.u64 %SPL, __local_depot1;
47104
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
48-
; PTX-NEXT: add.u64 %rd1, %SP, 0;
49-
; PTX-NEXT: add.u64 %rd2, %SPL, 0;
105+
; PTX-NEXT: add.u64 %rd1, %SP, 0;
106+
; PTX-NEXT: add.u64 %rd2, %SPL, 0;
50107
; PTX-NEXT: ld.param.u64 %rd3, [load_padding_param_0];
51108
; PTX-NEXT: st.local.u64 [%rd2], %rd3;
52109
; PTX-NEXT: { // callseq 1, 0
@@ -65,85 +122,168 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) {
65122
ret void
66123
}
67124

68-
; COMMON-LABEL: ptr_generic
69-
define ptx_kernel void @ptr_generic(ptr %out, ptr %in) {
70-
; IRC: %in3 = addrspacecast ptr %in to ptr addrspace(1)
71-
; IRC: %in4 = addrspacecast ptr addrspace(1) %in3 to ptr
72-
; IRC: %out1 = addrspacecast ptr %out to ptr addrspace(1)
73-
; IRC: %out2 = addrspacecast ptr addrspace(1) %out1 to ptr
74-
; PTXC: cvta.to.global.u64
75-
; PTXC: cvta.to.global.u64
76-
; PTXC: ld.global.u32
77-
; PTXC: st.global.u32
78-
79125
; OpenCL can't make assumptions about incoming pointer, so we should generate
80126
; generic pointers load/store.
81-
; IRO-NOT: addrspacecast
82-
; PTXO-NOT: cvta.to.global
83-
; PTXO: ld.u32
84-
; PTXO: st.u32
127+
define ptx_kernel void @ptr_generic(ptr %out, ptr %in) {
128+
; IRC-LABEL: define ptx_kernel void @ptr_generic(
129+
; IRC-SAME: ptr [[OUT:%.*]], ptr [[IN:%.*]]) {
130+
; IRC-NEXT: [[IN3:%.*]] = addrspacecast ptr [[IN]] to ptr addrspace(1)
131+
; IRC-NEXT: [[IN4:%.*]] = addrspacecast ptr addrspace(1) [[IN3]] to ptr
132+
; IRC-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
133+
; IRC-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
134+
; IRC-NEXT: [[V:%.*]] = load i32, ptr [[IN4]], align 4
135+
; IRC-NEXT: store i32 [[V]], ptr [[OUT2]], align 4
136+
; IRC-NEXT: ret void
137+
;
138+
; IRO-LABEL: define ptx_kernel void @ptr_generic(
139+
; IRO-SAME: ptr [[OUT:%.*]], ptr [[IN:%.*]]) {
140+
; IRO-NEXT: [[V:%.*]] = load i32, ptr [[IN]], align 4
141+
; IRO-NEXT: store i32 [[V]], ptr [[OUT]], align 4
142+
; IRO-NEXT: ret void
143+
;
144+
; PTXC-LABEL: ptr_generic(
145+
; PTXC: {
146+
; PTXC-NEXT: .reg .b32 %r<2>;
147+
; PTXC-NEXT: .reg .b64 %rd<5>;
148+
; PTXC-EMPTY:
149+
; PTXC-NEXT: // %bb.0:
150+
; PTXC-NEXT: ld.param.u64 %rd1, [ptr_generic_param_0];
151+
; PTXC-NEXT: ld.param.u64 %rd2, [ptr_generic_param_1];
152+
; PTXC-NEXT: cvta.to.global.u64 %rd3, %rd2;
153+
; PTXC-NEXT: cvta.to.global.u64 %rd4, %rd1;
154+
; PTXC-NEXT: ld.global.u32 %r1, [%rd3];
155+
; PTXC-NEXT: st.global.u32 [%rd4], %r1;
156+
; PTXC-NEXT: ret;
157+
;
158+
; PTXO-LABEL: ptr_generic(
159+
; PTXO: {
160+
; PTXO-NEXT: .reg .b32 %r<2>;
161+
; PTXO-NEXT: .reg .b64 %rd<3>;
162+
; PTXO-EMPTY:
163+
; PTXO-NEXT: // %bb.0:
164+
; PTXO-NEXT: ld.param.u64 %rd1, [ptr_generic_param_0];
165+
; PTXO-NEXT: ld.param.u64 %rd2, [ptr_generic_param_1];
166+
; PTXO-NEXT: ld.u32 %r1, [%rd2];
167+
; PTXO-NEXT: st.u32 [%rd1], %r1;
168+
; PTXO-NEXT: ret;
85169
%v = load i32, ptr %in, align 4
86170
store i32 %v, ptr %out, align 4
87171
ret void
88172
}
89173

90-
; COMMON-LABEL: ptr_nongeneric
91174
define ptx_kernel void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(3) %in) {
92-
; IR-NOT: addrspacecast
93-
; PTX-NOT: cvta.to.global
94-
; PTX: ld.shared.u32
95-
; PTX st.global.u32
175+
; IR-LABEL: define ptx_kernel void @ptr_nongeneric(
176+
; IR-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(3) [[IN:%.*]]) {
177+
; IR-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) [[IN]], align 4
178+
; IR-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT]], align 4
179+
; IR-NEXT: ret void
180+
;
181+
; PTX-LABEL: ptr_nongeneric(
182+
; PTX: {
183+
; PTX-NEXT: .reg .b32 %r<2>;
184+
; PTX-NEXT: .reg .b64 %rd<3>;
185+
; PTX-EMPTY:
186+
; PTX-NEXT: // %bb.0:
187+
; PTX-NEXT: ld.param.u64 %rd1, [ptr_nongeneric_param_0];
188+
; PTX-NEXT: ld.param.u64 %rd2, [ptr_nongeneric_param_1];
189+
; PTX-NEXT: ld.shared.u32 %r1, [%rd2];
190+
; PTX-NEXT: st.global.u32 [%rd1], %r1;
191+
; PTX-NEXT: ret;
96192
%v = load i32, ptr addrspace(3) %in, align 4
97193
store i32 %v, ptr addrspace(1) %out, align 4
98194
ret void
99195
}
100196

101-
; COMMON-LABEL: ptr_as_int
102-
define ptx_kernel void @ptr_as_int(i64 noundef %i, i32 noundef %v) {
103-
; IR: [[P:%.*]] = inttoptr i64 %i to ptr
104-
; IRC: [[P1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
105-
; IRC: addrspacecast ptr addrspace(1) [[P1]] to ptr
106-
; IRO-NOT: addrspacecast
107-
108-
; PTXC-DAG: ld.param.u64 [[I:%rd.*]], [ptr_as_int_param_0];
109-
; PTXC-DAG: ld.param.u32 [[V:%r.*]], [ptr_as_int_param_1];
110-
; PTXC: cvta.to.global.u64 %[[P:rd.*]], [[I]];
111-
; PTXC: st.global.u32 [%[[P]]], [[V]];
112-
113-
; PTXO-DAG: ld.param.u64 %[[P:rd.*]], [ptr_as_int_param_0];
114-
; PTXO-DAG: ld.param.u32 [[V:%r.*]], [ptr_as_int_param_1];
115-
; PTXO: st.u32 [%[[P]]], [[V]];
116-
197+
define ptx_kernel void @ptr_as_int(i64 noundef %i, i32 noundef %v) {
198+
; IRC-LABEL: define ptx_kernel void @ptr_as_int(
199+
; IRC-SAME: i64 noundef [[I:%.*]], i32 noundef [[V:%.*]]) {
200+
; IRC-NEXT: [[P:%.*]] = inttoptr i64 [[I]] to ptr
201+
; IRC-NEXT: [[P1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
202+
; IRC-NEXT: [[P2:%.*]] = addrspacecast ptr addrspace(1) [[P1]] to ptr
203+
; IRC-NEXT: store i32 [[V]], ptr [[P2]], align 4
204+
; IRC-NEXT: ret void
205+
;
206+
; IRO-LABEL: define ptx_kernel void @ptr_as_int(
207+
; IRO-SAME: i64 noundef [[I:%.*]], i32 noundef [[V:%.*]]) {
208+
; IRO-NEXT: [[P:%.*]] = inttoptr i64 [[I]] to ptr
209+
; IRO-NEXT: store i32 [[V]], ptr [[P]], align 4
210+
; IRO-NEXT: ret void
211+
;
212+
; PTXC-LABEL: ptr_as_int(
213+
; PTXC: {
214+
; PTXC-NEXT: .reg .b32 %r<2>;
215+
; PTXC-NEXT: .reg .b64 %rd<3>;
216+
; PTXC-EMPTY:
217+
; PTXC-NEXT: // %bb.0:
218+
; PTXC-NEXT: ld.param.u64 %rd1, [ptr_as_int_param_0];
219+
; PTXC-NEXT: ld.param.u32 %r1, [ptr_as_int_param_1];
220+
; PTXC-NEXT: cvta.to.global.u64 %rd2, %rd1;
221+
; PTXC-NEXT: st.global.u32 [%rd2], %r1;
222+
; PTXC-NEXT: ret;
223+
;
224+
; PTXO-LABEL: ptr_as_int(
225+
; PTXO: {
226+
; PTXO-NEXT: .reg .b32 %r<2>;
227+
; PTXO-NEXT: .reg .b64 %rd<2>;
228+
; PTXO-EMPTY:
229+
; PTXO-NEXT: // %bb.0:
230+
; PTXO-NEXT: ld.param.u64 %rd1, [ptr_as_int_param_0];
231+
; PTXO-NEXT: ld.param.u32 %r1, [ptr_as_int_param_1];
232+
; PTXO-NEXT: st.u32 [%rd1], %r1;
233+
; PTXO-NEXT: ret;
117234
%p = inttoptr i64 %i to ptr
118235
store i32 %v, ptr %p, align 4
119236
ret void
120237
}
121238

122239
%struct.S = type { i64 }
123240

124-
; COMMON-LABEL: ptr_as_int_aggr
125241
define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%struct.S) align 8 %s, i32 noundef %v) {
126-
; IR: [[S:%.*]] = addrspacecast ptr %s to ptr addrspace(101)
127-
; IR: [[I:%.*]] = load i64, ptr addrspace(101) [[S]], align 8
128-
; IR: [[P0:%.*]] = inttoptr i64 [[I]] to ptr
129-
; IRC: [[P1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
130-
; IRC: [[P:%.*]] = addrspacecast ptr addrspace(1) [[P1]] to ptr
131-
; IRO-NOT: addrspacecast
132-
133-
; PTXC-DAG: ld.param.u64 [[I:%rd.*]], [ptr_as_int_aggr_param_0];
134-
; PTXC-DAG: ld.param.u32 [[V:%r.*]], [ptr_as_int_aggr_param_1];
135-
; PTXC: cvta.to.global.u64 %[[P:rd.*]], [[I]];
136-
; PTXC: st.global.u32 [%[[P]]], [[V]];
137-
138-
; PTXO-DAG: ld.param.u64 %[[P:rd.*]], [ptr_as_int_aggr_param_0];
139-
; PTXO-DAG: ld.param.u32 [[V:%r.*]], [ptr_as_int_aggr_param_1];
140-
; PTXO: st.u32 [%[[P]]], [[V]];
242+
; IRC-LABEL: define ptx_kernel void @ptr_as_int_aggr(
243+
; IRC-SAME: ptr noundef readonly byval([[STRUCT_S:%.*]]) align 8 captures(none) [[S:%.*]], i32 noundef [[V:%.*]]) {
244+
; IRC-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
245+
; IRC-NEXT: [[I:%.*]] = load i64, ptr addrspace(101) [[S3]], align 8
246+
; IRC-NEXT: [[P:%.*]] = inttoptr i64 [[I]] to ptr
247+
; IRC-NEXT: [[P1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
248+
; IRC-NEXT: [[P2:%.*]] = addrspacecast ptr addrspace(1) [[P1]] to ptr
249+
; IRC-NEXT: store i32 [[V]], ptr [[P2]], align 4
250+
; IRC-NEXT: ret void
251+
;
252+
; IRO-LABEL: define ptx_kernel void @ptr_as_int_aggr(
253+
; IRO-SAME: ptr noundef readonly byval([[STRUCT_S:%.*]]) align 8 captures(none) [[S:%.*]], i32 noundef [[V:%.*]]) {
254+
; IRO-NEXT: [[S1:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
255+
; IRO-NEXT: [[I:%.*]] = load i64, ptr addrspace(101) [[S1]], align 8
256+
; IRO-NEXT: [[P:%.*]] = inttoptr i64 [[I]] to ptr
257+
; IRO-NEXT: store i32 [[V]], ptr [[P]], align 4
258+
; IRO-NEXT: ret void
259+
;
260+
; PTXC-LABEL: ptr_as_int_aggr(
261+
; PTXC: {
262+
; PTXC-NEXT: .reg .b32 %r<2>;
263+
; PTXC-NEXT: .reg .b64 %rd<3>;
264+
; PTXC-EMPTY:
265+
; PTXC-NEXT: // %bb.0:
266+
; PTXC-NEXT: ld.param.u32 %r1, [ptr_as_int_aggr_param_1];
267+
; PTXC-NEXT: ld.param.u64 %rd1, [ptr_as_int_aggr_param_0];
268+
; PTXC-NEXT: cvta.to.global.u64 %rd2, %rd1;
269+
; PTXC-NEXT: st.global.u32 [%rd2], %r1;
270+
; PTXC-NEXT: ret;
271+
;
272+
; PTXO-LABEL: ptr_as_int_aggr(
273+
; PTXO: {
274+
; PTXO-NEXT: .reg .b32 %r<2>;
275+
; PTXO-NEXT: .reg .b64 %rd<2>;
276+
; PTXO-EMPTY:
277+
; PTXO-NEXT: // %bb.0:
278+
; PTXO-NEXT: ld.param.u32 %r1, [ptr_as_int_aggr_param_1];
279+
; PTXO-NEXT: ld.param.u64 %rd1, [ptr_as_int_aggr_param_0];
280+
; PTXO-NEXT: st.u32 [%rd1], %r1;
281+
; PTXO-NEXT: ret;
141282
%i = load i64, ptr %s, align 8
142283
%p = inttoptr i64 %i to ptr
143284
store i32 %v, ptr %p, align 4
144285
ret void
145286
}
146287

147-
148288
; Function Attrs: convergent nounwind
149289
declare dso_local ptr @escape(ptr) local_unnamed_addr

0 commit comments

Comments
 (0)