@@ -12,9 +12,7 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
12
12
; OPT-LABEL: define dso_local noundef i32 @non_kernel_function(
13
13
; OPT-SAME: ptr noundef readonly byval([[STRUCT_UINT4:%.*]]) align 16 captures(none) [[A:%.*]], i1 noundef zeroext [[B:%.*]], i32 noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
14
14
; OPT-NEXT: [[ENTRY:.*:]]
15
- ; OPT-NEXT: [[A2:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(5)
16
- ; OPT-NEXT: [[A1:%.*]] = addrspacecast ptr addrspace(5) [[A2]] to ptr
17
- ; OPT-NEXT: [[A_:%.*]] = select i1 [[B]], ptr [[A1]], ptr addrspacecast (ptr addrspace(1) @gi to ptr)
15
+ ; OPT-NEXT: [[A_:%.*]] = select i1 [[B]], ptr [[A]], ptr addrspacecast (ptr addrspace(1) @gi to ptr)
18
16
; OPT-NEXT: [[IDX_EXT:%.*]] = sext i32 [[C]] to i64
19
17
; OPT-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[A_]], i64 [[IDX_EXT]]
20
18
; OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[ADD_PTR]], align 1
@@ -74,12 +72,10 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
74
72
; PTX-NEXT: ret;
75
73
; OPT-LABEL: define ptx_kernel void @grid_const_int(
76
74
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
77
- ; OPT-NEXT: [[OUT2:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
78
- ; OPT-NEXT: [[OUT3:%.*]] = addrspacecast ptr addrspace(1) [[OUT2]] to ptr
79
75
; OPT-NEXT: [[INPUT11:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
80
76
; OPT-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
81
77
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
82
- ; OPT-NEXT: store i32 [[ADD]], ptr [[OUT3 ]], align 4
78
+ ; OPT-NEXT: store i32 [[ADD]], ptr [[OUT ]], align 4
83
79
; OPT-NEXT: ret void
84
80
%tmp = load i32 , ptr %input1 , align 4
85
81
%add = add i32 %tmp , %input2
@@ -105,15 +101,13 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
105
101
; PTX-NEXT: ret;
106
102
; OPT-LABEL: define ptx_kernel void @grid_const_struct(
107
103
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
108
- ; OPT-NEXT: [[OUT4:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
109
- ; OPT-NEXT: [[OUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUT4]] to ptr
110
104
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
111
105
; OPT-NEXT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
112
106
; OPT-NEXT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
113
107
; OPT-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4
114
108
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(101) [[GEP22]], align 4
115
109
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]
116
- ; OPT-NEXT: store i32 [[ADD]], ptr [[OUT5 ]], align 4
110
+ ; OPT-NEXT: store i32 [[ADD]], ptr [[OUT ]], align 4
117
111
; OPT-NEXT: ret void
118
112
%gep1 = getelementptr inbounds %struct.s , ptr %input , i32 0 , i32 0
119
113
%gep2 = getelementptr inbounds %struct.s , ptr %input , i32 0 , i32 1
@@ -233,11 +227,9 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
233
227
; PTX-NEXT: ret;
234
228
; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape(
235
229
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
236
- ; OPT-NEXT: [[ADDR4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
237
- ; OPT-NEXT: [[ADDR5:%.*]] = addrspacecast ptr addrspace(1) [[ADDR4]] to ptr
238
230
; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
239
231
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
240
- ; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR5 ]], align 8
232
+ ; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR ]], align 8
241
233
; OPT-NEXT: ret void
242
234
store ptr %input , ptr %addr , align 8
243
235
ret void
@@ -263,14 +255,12 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
263
255
; PTX-NOT .local
264
256
; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape(
265
257
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
266
- ; OPT-NEXT: [[RESULT4:%.*]] = addrspacecast ptr [[RESULT]] to ptr addrspace(1)
267
- ; OPT-NEXT: [[RESULT5:%.*]] = addrspacecast ptr addrspace(1) [[RESULT4]] to ptr
268
258
; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
269
259
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
270
260
; OPT-NEXT: [[TMPPTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
271
261
; OPT-NEXT: [[TMPPTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
272
262
; OPT-NEXT: [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2
273
- ; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT5 ]], align 8
263
+ ; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT ]], align 8
274
264
; OPT-NEXT: ret void
275
265
%tmpptr1 = getelementptr inbounds %struct.s , ptr %input , i32 0 , i32 0
276
266
%tmpptr2 = getelementptr inbounds %struct.s , ptr %input , i32 0 , i32 1
@@ -311,13 +301,11 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
311
301
; PTX-NEXT: ret;
312
302
; OPT-LABEL: define ptx_kernel void @grid_const_partial_escape(
313
303
; OPT-SAME: ptr byval(i32) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
314
- ; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
315
- ; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
316
304
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
317
305
; OPT-NEXT: [[INPUT1_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1]])
318
306
; OPT-NEXT: [[VAL1:%.*]] = load i32, ptr [[INPUT1_GEN]], align 4
319
307
; OPT-NEXT: [[TWICE:%.*]] = add i32 [[VAL1]], [[VAL1]]
320
- ; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT5 ]], align 4
308
+ ; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT ]], align 4
321
309
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]])
322
310
; OPT-NEXT: ret void
323
311
%val = load i32 , ptr %input
@@ -361,15 +349,13 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
361
349
; PTX-NEXT: ret;
362
350
; OPT-LABEL: define ptx_kernel i32 @grid_const_partial_escapemem(
363
351
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
364
- ; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
365
- ; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
366
352
; OPT-NEXT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
367
353
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2]])
368
354
; OPT-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
369
355
; OPT-NEXT: [[VAL1:%.*]] = load i32, ptr [[PTR1]], align 4
370
356
; OPT-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
371
357
; OPT-NEXT: [[VAL2:%.*]] = load i32, ptr [[PTR2]], align 4
372
- ; OPT-NEXT: store ptr [[INPUT1]], ptr [[OUTPUT5 ]], align 8
358
+ ; OPT-NEXT: store ptr [[INPUT1]], ptr [[OUTPUT ]], align 8
373
359
; OPT-NEXT: [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]]
374
360
; OPT-NEXT: [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]])
375
361
; OPT-NEXT: ret i32 [[ADD]]
@@ -407,11 +393,9 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
407
393
; PTX-NEXT: ret;
408
394
; OPT-LABEL: define ptx_kernel void @grid_const_phi(
409
395
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
410
- ; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
411
- ; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
412
396
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
413
397
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
414
- ; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2 ]], align 4
398
+ ; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT ]], align 4
415
399
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
416
400
; OPT-NEXT: br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]]
417
401
; OPT: [[FIRST]]:
@@ -423,7 +407,7 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
423
407
; OPT: [[MERGE]]:
424
408
; OPT-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
425
409
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
426
- ; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2 ]], align 4
410
+ ; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT ]], align 4
427
411
; OPT-NEXT: ret void
428
412
429
413
%val = load i32 , ptr %inout
@@ -470,13 +454,11 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
470
454
; PTX-NEXT: ret;
471
455
; OPT-LABEL: define ptx_kernel void @grid_const_phi_ngc(
472
456
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
473
- ; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
474
- ; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
475
457
; OPT-NEXT: [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
476
458
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]])
477
459
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
478
460
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
479
- ; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2 ]], align 4
461
+ ; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT ]], align 4
480
462
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
481
463
; OPT-NEXT: br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]]
482
464
; OPT: [[FIRST]]:
@@ -488,7 +470,7 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
488
470
; OPT: [[MERGE]]:
489
471
; OPT-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
490
472
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
491
- ; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2 ]], align 4
473
+ ; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT ]], align 4
492
474
; OPT-NEXT: ret void
493
475
%val = load i32 , ptr %inout
494
476
%less = icmp slt i32 %val , 0
@@ -531,17 +513,15 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by
531
513
; PTX-NEXT: ret;
532
514
; OPT-LABEL: define ptx_kernel void @grid_const_select(
533
515
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
534
- ; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
535
- ; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
536
516
; OPT-NEXT: [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
537
517
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]])
538
518
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
539
519
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
540
- ; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2 ]], align 4
520
+ ; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT ]], align 4
541
521
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
542
522
; OPT-NEXT: [[PTRNEW:%.*]] = select i1 [[LESS]], ptr [[INPUT1_PARAM_GEN]], ptr [[INPUT2_PARAM_GEN]]
543
523
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
544
- ; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2 ]], align 4
524
+ ; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT ]], align 4
545
525
; OPT-NEXT: ret void
546
526
%val = load i32 , ptr %inout
547
527
%less = icmp slt i32 %val , 0
0 commit comments