@@ -111,63 +111,16 @@ entry:
111
111
; store i32 %i, ptr %out, align 4
112
112
; ret void
113
113
; }
114
-
115
- ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
116
- define dso_local ptx_kernel void @read_only_gep_asc0 (ptr nocapture noundef writeonly %out , ptr nocapture noundef readonly byval (%struct.S ) align 4 %s ) local_unnamed_addr #0 {
117
- ; SM_60-LABEL: define dso_local ptx_kernel void @read_only_gep_asc0(
118
- ; SM_60-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
119
- ; SM_60-NEXT: [[ENTRY:.*:]]
120
- ; SM_60-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
121
- ; SM_60-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
122
- ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
123
- ; SM_60-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4
124
- ; SM_60-NEXT: [[ASC:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101)
125
- ; SM_60-NEXT: [[ASC0:%.*]] = addrspacecast ptr addrspace(101) [[ASC]] to ptr
126
- ; SM_60-NEXT: [[I:%.*]] = load i32, ptr [[ASC0]], align 4
127
- ; SM_60-NEXT: store i32 [[I]], ptr [[OUT]], align 4
128
- ; SM_60-NEXT: ret void
129
- ;
130
- ; SM_70-LABEL: define dso_local ptx_kernel void @read_only_gep_asc0(
131
- ; SM_70-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
132
- ; SM_70-NEXT: [[ENTRY:.*:]]
133
- ; SM_70-NEXT: [[S_PARAM:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
134
- ; SM_70-NEXT: [[S_GEN:%.*]] = addrspacecast ptr addrspace(101) [[S_PARAM]] to ptr
135
- ; SM_70-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S_GEN]], i64 4
136
- ; SM_70-NEXT: [[ASC:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101)
137
- ; SM_70-NEXT: [[ASC0:%.*]] = addrspacecast ptr addrspace(101) [[ASC]] to ptr
138
- ; SM_70-NEXT: [[I:%.*]] = load i32, ptr [[ASC0]], align 4
139
- ; SM_70-NEXT: store i32 [[I]], ptr [[OUT]], align 4
140
- ; SM_70-NEXT: ret void
141
- ;
142
- ; COPY-LABEL: define dso_local ptx_kernel void @read_only_gep_asc0(
143
- ; COPY-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
144
- ; COPY-NEXT: [[ENTRY:.*:]]
145
- ; COPY-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 4
146
- ; COPY-NEXT: [[ASC:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101)
147
- ; COPY-NEXT: [[ASC0:%.*]] = addrspacecast ptr addrspace(101) [[ASC]] to ptr
148
- ; COPY-NEXT: [[I:%.*]] = load i32, ptr [[ASC0]], align 4
149
- ; COPY-NEXT: store i32 [[I]], ptr [[OUT]], align 4
150
- ; COPY-NEXT: ret void
151
114
;
152
- ; PTX-LABEL: read_only_gep_asc0(
153
- ; PTX: {
154
- ; PTX-NEXT: .reg .b32 %r<2>;
155
- ; PTX-NEXT: .reg .b64 %rd<3>;
156
- ; PTX-EMPTY:
157
- ; PTX-NEXT: // %bb.0: // %entry
158
- ; PTX-NEXT: ld.param.u64 %rd1, [read_only_gep_asc0_param_0];
159
- ; PTX-NEXT: cvta.to.global.u64 %rd2, %rd1;
160
- ; PTX-NEXT: ld.param.u32 %r1, [read_only_gep_asc0_param_1+4];
161
- ; PTX-NEXT: st.global.u32 [%rd2], %r1;
162
- ; PTX-NEXT: ret;
163
- entry:
164
- %b = getelementptr inbounds nuw i8 , ptr %s , i64 4
165
- %asc = addrspacecast ptr %b to ptr addrspace (101 )
166
- %asc0 = addrspacecast ptr addrspace (101 ) %asc to ptr
167
- %i = load i32 , ptr %asc0 , align 4
168
- store i32 %i , ptr %out , align 4
169
- ret void
170
- }
115
+ ; define dso_local ptx_kernel void @read_only_gep_asc0(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
116
+ ; entry:
117
+ ; %b = getelementptr inbounds nuw i8, ptr %s, i64 4
118
+ ; %asc = addrspacecast ptr %b to ptr addrspace(101)
119
+ ; %asc0 = addrspacecast ptr addrspace(101) %asc to ptr
120
+ ; %i = load i32, ptr %asc0, align 4
121
+ ; store i32 %i, ptr %out, align 4
122
+ ; ret void
123
+ ; }
171
124
172
125
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
173
126
define dso_local ptx_kernel void @escape_ptr (ptr nocapture noundef readnone %out , ptr noundef byval (%struct.S ) align 4 %s ) local_unnamed_addr #0 {
@@ -182,14 +135,14 @@ define dso_local ptx_kernel void @escape_ptr(ptr nocapture noundef readnone %out
182
135
;
183
136
; PTX-LABEL: escape_ptr(
184
137
; PTX: {
185
- ; PTX-NEXT: .local .align 4 .b8 __local_depot4 [8];
138
+ ; PTX-NEXT: .local .align 4 .b8 __local_depot2 [8];
186
139
; PTX-NEXT: .reg .b64 %SP;
187
140
; PTX-NEXT: .reg .b64 %SPL;
188
141
; PTX-NEXT: .reg .b32 %r<3>;
189
142
; PTX-NEXT: .reg .b64 %rd<3>;
190
143
; PTX-EMPTY:
191
144
; PTX-NEXT: // %bb.0: // %entry
192
- ; PTX-NEXT: mov.b64 %SPL, __local_depot4 ;
145
+ ; PTX-NEXT: mov.b64 %SPL, __local_depot2 ;
193
146
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
194
147
; PTX-NEXT: add.u64 %rd1, %SP, 0;
195
148
; PTX-NEXT: add.u64 %rd2, %SPL, 0;
@@ -226,14 +179,14 @@ define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone
226
179
;
227
180
; PTX-LABEL: escape_ptr_gep(
228
181
; PTX: {
229
- ; PTX-NEXT: .local .align 4 .b8 __local_depot5 [8];
182
+ ; PTX-NEXT: .local .align 4 .b8 __local_depot3 [8];
230
183
; PTX-NEXT: .reg .b64 %SP;
231
184
; PTX-NEXT: .reg .b64 %SPL;
232
185
; PTX-NEXT: .reg .b32 %r<3>;
233
186
; PTX-NEXT: .reg .b64 %rd<4>;
234
187
; PTX-EMPTY:
235
188
; PTX-NEXT: // %bb.0: // %entry
236
- ; PTX-NEXT: mov.b64 %SPL, __local_depot5 ;
189
+ ; PTX-NEXT: mov.b64 %SPL, __local_depot3 ;
237
190
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
238
191
; PTX-NEXT: add.u64 %rd1, %SP, 0;
239
192
; PTX-NEXT: add.u64 %rd2, %SPL, 0;
@@ -271,14 +224,14 @@ define dso_local ptx_kernel void @escape_ptr_store(ptr nocapture noundef writeon
271
224
;
272
225
; PTX-LABEL: escape_ptr_store(
273
226
; PTX: {
274
- ; PTX-NEXT: .local .align 4 .b8 __local_depot6 [8];
227
+ ; PTX-NEXT: .local .align 4 .b8 __local_depot4 [8];
275
228
; PTX-NEXT: .reg .b64 %SP;
276
229
; PTX-NEXT: .reg .b64 %SPL;
277
230
; PTX-NEXT: .reg .b32 %r<3>;
278
231
; PTX-NEXT: .reg .b64 %rd<5>;
279
232
; PTX-EMPTY:
280
233
; PTX-NEXT: // %bb.0: // %entry
281
- ; PTX-NEXT: mov.b64 %SPL, __local_depot6 ;
234
+ ; PTX-NEXT: mov.b64 %SPL, __local_depot4 ;
282
235
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
283
236
; PTX-NEXT: ld.param.u64 %rd1, [escape_ptr_store_param_0];
284
237
; PTX-NEXT: cvta.to.global.u64 %rd2, %rd1;
@@ -309,14 +262,14 @@ define dso_local ptx_kernel void @escape_ptr_gep_store(ptr nocapture noundef wri
309
262
;
310
263
; PTX-LABEL: escape_ptr_gep_store(
311
264
; PTX: {
312
- ; PTX-NEXT: .local .align 4 .b8 __local_depot7 [8];
265
+ ; PTX-NEXT: .local .align 4 .b8 __local_depot5 [8];
313
266
; PTX-NEXT: .reg .b64 %SP;
314
267
; PTX-NEXT: .reg .b64 %SPL;
315
268
; PTX-NEXT: .reg .b32 %r<3>;
316
269
; PTX-NEXT: .reg .b64 %rd<6>;
317
270
; PTX-EMPTY:
318
271
; PTX-NEXT: // %bb.0: // %entry
319
- ; PTX-NEXT: mov.b64 %SPL, __local_depot7 ;
272
+ ; PTX-NEXT: mov.b64 %SPL, __local_depot5 ;
320
273
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
321
274
; PTX-NEXT: ld.param.u64 %rd1, [escape_ptr_gep_store_param_0];
322
275
; PTX-NEXT: cvta.to.global.u64 %rd2, %rd1;
@@ -349,14 +302,14 @@ define dso_local ptx_kernel void @escape_ptrtoint(ptr nocapture noundef writeonl
349
302
;
350
303
; PTX-LABEL: escape_ptrtoint(
351
304
; PTX: {
352
- ; PTX-NEXT: .local .align 4 .b8 __local_depot8 [8];
305
+ ; PTX-NEXT: .local .align 4 .b8 __local_depot6 [8];
353
306
; PTX-NEXT: .reg .b64 %SP;
354
307
; PTX-NEXT: .reg .b64 %SPL;
355
308
; PTX-NEXT: .reg .b32 %r<3>;
356
309
; PTX-NEXT: .reg .b64 %rd<5>;
357
310
; PTX-EMPTY:
358
311
; PTX-NEXT: // %bb.0: // %entry
359
- ; PTX-NEXT: mov.b64 %SPL, __local_depot8 ;
312
+ ; PTX-NEXT: mov.b64 %SPL, __local_depot6 ;
360
313
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
361
314
; PTX-NEXT: ld.param.u64 %rd1, [escape_ptrtoint_param_0];
362
315
; PTX-NEXT: cvta.to.global.u64 %rd2, %rd1;
@@ -507,14 +460,14 @@ define dso_local ptx_kernel void @memcpy_to_param(ptr nocapture noundef readonly
507
460
;
508
461
; PTX-LABEL: memcpy_to_param(
509
462
; PTX: {
510
- ; PTX-NEXT: .local .align 8 .b8 __local_depot11 [8];
463
+ ; PTX-NEXT: .local .align 8 .b8 __local_depot9 [8];
511
464
; PTX-NEXT: .reg .b64 %SP;
512
465
; PTX-NEXT: .reg .b64 %SPL;
513
466
; PTX-NEXT: .reg .b32 %r<3>;
514
467
; PTX-NEXT: .reg .b64 %rd<48>;
515
468
; PTX-EMPTY:
516
469
; PTX-NEXT: // %bb.0: // %entry
517
- ; PTX-NEXT: mov.b64 %SPL, __local_depot11 ;
470
+ ; PTX-NEXT: mov.b64 %SPL, __local_depot9 ;
518
471
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
519
472
; PTX-NEXT: ld.param.u64 %rd1, [memcpy_to_param_param_0];
520
473
; PTX-NEXT: add.u64 %rd3, %SPL, 0;
@@ -700,7 +653,7 @@ define ptx_kernel void @test_select_write(ptr byval(i32) align 4 %input1, ptr by
700
653
;
701
654
; PTX-LABEL: test_select_write(
702
655
; PTX: {
703
- ; PTX-NEXT: .local .align 4 .b8 __local_depot14 [8];
656
+ ; PTX-NEXT: .local .align 4 .b8 __local_depot12 [8];
704
657
; PTX-NEXT: .reg .b64 %SP;
705
658
; PTX-NEXT: .reg .b64 %SPL;
706
659
; PTX-NEXT: .reg .pred %p<2>;
@@ -709,7 +662,7 @@ define ptx_kernel void @test_select_write(ptr byval(i32) align 4 %input1, ptr by
709
662
; PTX-NEXT: .reg .b64 %rd<6>;
710
663
; PTX-EMPTY:
711
664
; PTX-NEXT: // %bb.0: // %bb
712
- ; PTX-NEXT: mov.b64 %SPL, __local_depot14 ;
665
+ ; PTX-NEXT: mov.b64 %SPL, __local_depot12 ;
713
666
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
714
667
; PTX-NEXT: ld.param.u8 %rs1, [test_select_write_param_3];
715
668
; PTX-NEXT: and.b16 %rs2, %rs1, 1;
@@ -809,10 +762,10 @@ define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval
809
762
; PTX_60-NEXT: ld.param.u64 %rd2, [test_phi_param_2];
810
763
; PTX_60-NEXT: cvta.to.global.u64 %rd1, %rd2;
811
764
; PTX_60-NEXT: ld.param.u32 %r4, [test_phi_param_0];
812
- ; PTX_60-NEXT: @%p1 bra $L__BB15_2 ;
765
+ ; PTX_60-NEXT: @%p1 bra $L__BB13_2 ;
813
766
; PTX_60-NEXT: // %bb.1: // %second
814
767
; PTX_60-NEXT: ld.param.u32 %r4, [test_phi_param_1+4];
815
- ; PTX_60-NEXT: $L__BB15_2 : // %merge
768
+ ; PTX_60-NEXT: $L__BB13_2 : // %merge
816
769
; PTX_60-NEXT: st.global.u32 [%rd1], %r4;
817
770
; PTX_60-NEXT: ret;
818
771
;
@@ -830,11 +783,11 @@ define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval
830
783
; PTX_70-NEXT: mov.b64 %rd7, test_phi_param_0;
831
784
; PTX_70-NEXT: ld.param.u64 %rd6, [test_phi_param_2];
832
785
; PTX_70-NEXT: cvta.to.global.u64 %rd1, %rd6;
833
- ; PTX_70-NEXT: @%p1 bra $L__BB15_2 ;
786
+ ; PTX_70-NEXT: @%p1 bra $L__BB13_2 ;
834
787
; PTX_70-NEXT: // %bb.1: // %second
835
788
; PTX_70-NEXT: mov.b64 %rd2, test_phi_param_1;
836
789
; PTX_70-NEXT: add.s64 %rd7, %rd2, 4;
837
- ; PTX_70-NEXT: $L__BB15_2 : // %merge
790
+ ; PTX_70-NEXT: $L__BB13_2 : // %merge
838
791
; PTX_70-NEXT: ld.param.u32 %r1, [%rd7];
839
792
; PTX_70-NEXT: st.global.u32 [%rd1], %r1;
840
793
; PTX_70-NEXT: ret;
@@ -880,7 +833,7 @@ define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr
880
833
;
881
834
; PTX-LABEL: test_phi_write(
882
835
; PTX: {
883
- ; PTX-NEXT: .local .align 4 .b8 __local_depot16 [8];
836
+ ; PTX-NEXT: .local .align 4 .b8 __local_depot14 [8];
884
837
; PTX-NEXT: .reg .b64 %SP;
885
838
; PTX-NEXT: .reg .b64 %SPL;
886
839
; PTX-NEXT: .reg .pred %p<2>;
@@ -889,7 +842,7 @@ define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr
889
842
; PTX-NEXT: .reg .b64 %rd<7>;
890
843
; PTX-EMPTY:
891
844
; PTX-NEXT: // %bb.0: // %bb
892
- ; PTX-NEXT: mov.b64 %SPL, __local_depot16 ;
845
+ ; PTX-NEXT: mov.b64 %SPL, __local_depot14 ;
893
846
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
894
847
; PTX-NEXT: ld.param.u8 %rs1, [test_phi_write_param_2];
895
848
; PTX-NEXT: and.b16 %rs2, %rs1, 1;
@@ -900,10 +853,10 @@ define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr
900
853
; PTX-NEXT: add.u64 %rd6, %SPL, 4;
901
854
; PTX-NEXT: ld.param.u32 %r2, [test_phi_write_param_0];
902
855
; PTX-NEXT: st.u32 [%SP+4], %r2;
903
- ; PTX-NEXT: @%p1 bra $L__BB16_2 ;
856
+ ; PTX-NEXT: @%p1 bra $L__BB14_2 ;
904
857
; PTX-NEXT: // %bb.1: // %second
905
858
; PTX-NEXT: mov.b64 %rd6, %rd1;
906
- ; PTX-NEXT: $L__BB16_2 : // %merge
859
+ ; PTX-NEXT: $L__BB14_2 : // %merge
907
860
; PTX-NEXT: mov.b32 %r3, 1;
908
861
; PTX-NEXT: st.local.u32 [%rd6], %r3;
909
862
; PTX-NEXT: ret;
@@ -935,14 +888,14 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
935
888
;
936
889
; PTX-LABEL: test_forward_byval_arg(
937
890
; PTX: {
938
- ; PTX-NEXT: .local .align 4 .b8 __local_depot17 [4];
891
+ ; PTX-NEXT: .local .align 4 .b8 __local_depot15 [4];
939
892
; PTX-NEXT: .reg .b64 %SP;
940
893
; PTX-NEXT: .reg .b64 %SPL;
941
894
; PTX-NEXT: .reg .b32 %r<2>;
942
895
; PTX-NEXT: .reg .b64 %rd<3>;
943
896
; PTX-EMPTY:
944
897
; PTX-NEXT: // %bb.0:
945
- ; PTX-NEXT: mov.b64 %SPL, __local_depot17 ;
898
+ ; PTX-NEXT: mov.b64 %SPL, __local_depot15 ;
946
899
; PTX-NEXT: add.u64 %rd2, %SPL, 0;
947
900
; PTX-NEXT: ld.param.u32 %r1, [test_forward_byval_arg_param_0];
948
901
; PTX-NEXT: st.local.u32 [%rd2], %r1;
0 commit comments