Skip to content

Commit a4572c2

Browse files
committed
Update test to account for indirect call conversion
1 parent 8afab99 commit a4572c2

File tree

1 file changed

+61
-49
lines changed

1 file changed

+61
-49
lines changed

llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll

Lines changed: 61 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -72,21 +72,24 @@ define void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
7272
; PTX-LABEL: grid_const_escape(
7373
; PTX: {
7474
; PTX-NEXT: .reg .b32 %r<3>;
75-
; PTX-NEXT: .reg .b64 %rd<4>;
75+
; PTX-NEXT: .reg .b64 %rd<5>;
7676
; PTX-EMPTY:
7777
; PTX-NEXT: // %bb.0:
78-
; PTX-NEXT: mov.b64 %rd1, grid_const_escape_param_0;
79-
; PTX-NEXT: mov.u64 %rd2, %rd1;
80-
; PTX-NEXT: cvta.param.u64 %rd3, %rd2;
78+
; PTX-NEXT: mov.b64 %rd2, grid_const_escape_param_0;
79+
; PTX-NEXT: mov.u64 %rd3, %rd2;
80+
; PTX-NEXT: cvta.param.u64 %rd4, %rd3;
81+
; PTX-NEXT: mov.u64 %rd1, escape;
8182
; PTX-NEXT: { // callseq 0, 0
8283
; PTX-NEXT: .param .b64 param0;
83-
; PTX-NEXT: st.param.b64 [param0+0], %rd3;
84+
; PTX-NEXT: st.param.b64 [param0+0], %rd4;
8485
; PTX-NEXT: .param .b32 retval0;
85-
; PTX-NEXT: call.uni (retval0),
86-
; PTX-NEXT: escape,
86+
; PTX-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .b64 _);
87+
; PTX-NEXT: call (retval0),
88+
; PTX-NEXT: %rd1,
8789
; PTX-NEXT: (
8890
; PTX-NEXT: param0
89-
; PTX-NEXT: );
91+
; PTX-NEXT: )
92+
; PTX-NEXT: , prototype_0;
9093
; PTX-NEXT: ld.param.b32 %r1, [retval0+0];
9194
; PTX-NEXT: } // callseq 0
9295
; PTX-NEXT: ret;
@@ -107,36 +110,39 @@ define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32
107110
; PTX-NEXT: .reg .b64 %SP;
108111
; PTX-NEXT: .reg .b64 %SPL;
109112
; PTX-NEXT: .reg .b32 %r<4>;
110-
; PTX-NEXT: .reg .b64 %rd<9>;
113+
; PTX-NEXT: .reg .b64 %rd<10>;
111114
; PTX-EMPTY:
112115
; PTX-NEXT: // %bb.0:
113116
; PTX-NEXT: mov.u64 %SPL, __local_depot3;
114117
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
115-
; PTX-NEXT: mov.b64 %rd1, multiple_grid_const_escape_param_0;
116-
; PTX-NEXT: mov.b64 %rd2, multiple_grid_const_escape_param_2;
117-
; PTX-NEXT: mov.u64 %rd3, %rd2;
118+
; PTX-NEXT: mov.b64 %rd2, multiple_grid_const_escape_param_0;
119+
; PTX-NEXT: mov.b64 %rd3, multiple_grid_const_escape_param_2;
120+
; PTX-NEXT: mov.u64 %rd4, %rd3;
118121
; PTX-NEXT: ld.param.u32 %r1, [multiple_grid_const_escape_param_1];
119-
; PTX-NEXT: cvta.param.u64 %rd4, %rd3;
120-
; PTX-NEXT: mov.u64 %rd5, %rd1;
121-
; PTX-NEXT: cvta.param.u64 %rd6, %rd5;
122-
; PTX-NEXT: add.u64 %rd7, %SP, 0;
123-
; PTX-NEXT: add.u64 %rd8, %SPL, 0;
124-
; PTX-NEXT: st.local.u32 [%rd8], %r1;
122+
; PTX-NEXT: cvta.param.u64 %rd5, %rd4;
123+
; PTX-NEXT: mov.u64 %rd6, %rd2;
124+
; PTX-NEXT: cvta.param.u64 %rd7, %rd6;
125+
; PTX-NEXT: add.u64 %rd8, %SP, 0;
126+
; PTX-NEXT: add.u64 %rd9, %SPL, 0;
127+
; PTX-NEXT: st.local.u32 [%rd9], %r1;
128+
; PTX-NEXT: mov.u64 %rd1, escape3;
125129
; PTX-NEXT: { // callseq 1, 0
126130
; PTX-NEXT: .param .b64 param0;
127-
; PTX-NEXT: st.param.b64 [param0+0], %rd6;
131+
; PTX-NEXT: st.param.b64 [param0+0], %rd7;
128132
; PTX-NEXT: .param .b64 param1;
129-
; PTX-NEXT: st.param.b64 [param1+0], %rd7;
133+
; PTX-NEXT: st.param.b64 [param1+0], %rd8;
130134
; PTX-NEXT: .param .b64 param2;
131-
; PTX-NEXT: st.param.b64 [param2+0], %rd4;
135+
; PTX-NEXT: st.param.b64 [param2+0], %rd5;
132136
; PTX-NEXT: .param .b32 retval0;
133-
; PTX-NEXT: call.uni (retval0),
134-
; PTX-NEXT: escape3,
137+
; PTX-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b64 _, .param .b64 _, .param .b64 _);
138+
; PTX-NEXT: call (retval0),
139+
; PTX-NEXT: %rd1,
135140
; PTX-NEXT: (
136141
; PTX-NEXT: param0,
137142
; PTX-NEXT: param1,
138143
; PTX-NEXT: param2
139-
; PTX-NEXT: );
144+
; PTX-NEXT: )
145+
; PTX-NEXT: , prototype_1;
140146
; PTX-NEXT: ld.param.b32 %r2, [retval0+0];
141147
; PTX-NEXT: } // callseq 1
142148
; PTX-NEXT: ret;
@@ -221,26 +227,29 @@ define void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) {
221227
; PTX-LABEL: grid_const_partial_escape(
222228
; PTX: {
223229
; PTX-NEXT: .reg .b32 %r<5>;
224-
; PTX-NEXT: .reg .b64 %rd<6>;
230+
; PTX-NEXT: .reg .b64 %rd<7>;
225231
; PTX-EMPTY:
226232
; PTX-NEXT: // %bb.0:
227-
; PTX-NEXT: mov.b64 %rd1, grid_const_partial_escape_param_0;
228-
; PTX-NEXT: ld.param.u64 %rd2, [grid_const_partial_escape_param_1];
229-
; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2;
230-
; PTX-NEXT: mov.u64 %rd4, %rd1;
231-
; PTX-NEXT: cvta.param.u64 %rd5, %rd4;
232-
; PTX-NEXT: ld.u32 %r1, [%rd5];
233+
; PTX-NEXT: mov.b64 %rd2, grid_const_partial_escape_param_0;
234+
; PTX-NEXT: ld.param.u64 %rd3, [grid_const_partial_escape_param_1];
235+
; PTX-NEXT: cvta.to.global.u64 %rd4, %rd3;
236+
; PTX-NEXT: mov.u64 %rd5, %rd2;
237+
; PTX-NEXT: cvta.param.u64 %rd6, %rd5;
238+
; PTX-NEXT: ld.u32 %r1, [%rd6];
233239
; PTX-NEXT: add.s32 %r2, %r1, %r1;
234-
; PTX-NEXT: st.global.u32 [%rd3], %r2;
240+
; PTX-NEXT: st.global.u32 [%rd4], %r2;
241+
; PTX-NEXT: mov.u64 %rd1, escape;
235242
; PTX-NEXT: { // callseq 2, 0
236243
; PTX-NEXT: .param .b64 param0;
237-
; PTX-NEXT: st.param.b64 [param0+0], %rd5;
244+
; PTX-NEXT: st.param.b64 [param0+0], %rd6;
238245
; PTX-NEXT: .param .b32 retval0;
239-
; PTX-NEXT: call.uni (retval0),
240-
; PTX-NEXT: escape,
246+
; PTX-NEXT: prototype_2 : .callprototype (.param .b32 _) _ (.param .b64 _);
247+
; PTX-NEXT: call (retval0),
248+
; PTX-NEXT: %rd1,
241249
; PTX-NEXT: (
242250
; PTX-NEXT: param0
243-
; PTX-NEXT: );
251+
; PTX-NEXT: )
252+
; PTX-NEXT: , prototype_2;
244253
; PTX-NEXT: ld.param.b32 %r3, [retval0+0];
245254
; PTX-NEXT: } // callseq 2
246255
; PTX-NEXT: ret;
@@ -266,27 +275,30 @@ define i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %outpu
266275
; PTX-LABEL: grid_const_partial_escapemem(
267276
; PTX: {
268277
; PTX-NEXT: .reg .b32 %r<6>;
269-
; PTX-NEXT: .reg .b64 %rd<6>;
278+
; PTX-NEXT: .reg .b64 %rd<7>;
270279
; PTX-EMPTY:
271280
; PTX-NEXT: // %bb.0:
272-
; PTX-NEXT: mov.b64 %rd1, grid_const_partial_escapemem_param_0;
273-
; PTX-NEXT: ld.param.u64 %rd2, [grid_const_partial_escapemem_param_1];
274-
; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2;
275-
; PTX-NEXT: mov.u64 %rd4, %rd1;
276-
; PTX-NEXT: cvta.param.u64 %rd5, %rd4;
277-
; PTX-NEXT: ld.u32 %r1, [%rd5];
278-
; PTX-NEXT: ld.u32 %r2, [%rd5+4];
279-
; PTX-NEXT: st.global.u64 [%rd3], %rd5;
281+
; PTX-NEXT: mov.b64 %rd2, grid_const_partial_escapemem_param_0;
282+
; PTX-NEXT: ld.param.u64 %rd3, [grid_const_partial_escapemem_param_1];
283+
; PTX-NEXT: cvta.to.global.u64 %rd4, %rd3;
284+
; PTX-NEXT: mov.u64 %rd5, %rd2;
285+
; PTX-NEXT: cvta.param.u64 %rd6, %rd5;
286+
; PTX-NEXT: ld.u32 %r1, [%rd6];
287+
; PTX-NEXT: ld.u32 %r2, [%rd6+4];
288+
; PTX-NEXT: st.global.u64 [%rd4], %rd6;
280289
; PTX-NEXT: add.s32 %r3, %r1, %r2;
290+
; PTX-NEXT: mov.u64 %rd1, escape;
281291
; PTX-NEXT: { // callseq 3, 0
282292
; PTX-NEXT: .param .b64 param0;
283-
; PTX-NEXT: st.param.b64 [param0+0], %rd5;
293+
; PTX-NEXT: st.param.b64 [param0+0], %rd6;
284294
; PTX-NEXT: .param .b32 retval0;
285-
; PTX-NEXT: call.uni (retval0),
286-
; PTX-NEXT: escape,
295+
; PTX-NEXT: prototype_3 : .callprototype (.param .b32 _) _ (.param .b64 _);
296+
; PTX-NEXT: call (retval0),
297+
; PTX-NEXT: %rd1,
287298
; PTX-NEXT: (
288299
; PTX-NEXT: param0
289-
; PTX-NEXT: );
300+
; PTX-NEXT: )
301+
; PTX-NEXT: , prototype_3;
290302
; PTX-NEXT: ld.param.b32 %r4, [retval0+0];
291303
; PTX-NEXT: } // callseq 3
292304
; PTX-NEXT: st.param.b32 [func_retval0+0], %r3;

0 commit comments

Comments
 (0)