Skip to content

Commit d1081f9

Browse files
committed
pre-commit tests
1 parent 209d8c8 commit d1081f9

File tree

2 files changed

+107
-1
lines changed

2 files changed

+107
-1
lines changed

llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -560,13 +560,46 @@ define ptx_kernel i32 @grid_const_ptrtoint(ptr byval(i32) %input) {
560560
ret i32 %keepalive
561561
}
562562

563+
declare void @device_func(ptr byval(i32) align 4)
564+
565+
define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
566+
; OPT-LABEL: define ptx_kernel void @test_forward_byval_arg(
567+
; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR0]] {
568+
; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
569+
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
570+
; OPT-NEXT: call void @device_func(ptr byval(i32) align 4 [[INPUT_PARAM_GEN]])
571+
; OPT-NEXT: ret void
572+
;
573+
; PTX-LABEL: test_forward_byval_arg(
574+
; PTX: {
575+
; PTX-NEXT: .reg .b32 %r<2>;
576+
; PTX-NEXT: .reg .b64 %rd<4>;
577+
; PTX-EMPTY:
578+
; PTX-NEXT: // %bb.0:
579+
; PTX-NEXT: mov.b64 %rd1, test_forward_byval_arg_param_0;
580+
; PTX-NEXT: mov.b64 %rd2, %rd1;
581+
; PTX-NEXT: cvta.param.u64 %rd3, %rd2;
582+
; PTX-NEXT: ld.u32 %r1, [%rd3];
583+
; PTX-NEXT: { // callseq 4, 0
584+
; PTX-NEXT: .param .align 4 .b8 param0[4];
585+
; PTX-NEXT: st.param.b32 [param0], %r1;
586+
; PTX-NEXT: call.uni
587+
; PTX-NEXT: device_func,
588+
; PTX-NEXT: (
589+
; PTX-NEXT: param0
590+
; PTX-NEXT: );
591+
; PTX-NEXT: } // callseq 4
592+
; PTX-NEXT: ret;
593+
call void @device_func(ptr byval(i32) align 4 %input)
594+
ret void
595+
}
563596

564597

565598
declare dso_local void @dummy() local_unnamed_addr
566599
declare dso_local ptr @escape(ptr) local_unnamed_addr
567600
declare dso_local ptr @escape3(ptr, ptr, ptr) local_unnamed_addr
568601

569-
!nvvm.annotations = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23}
602+
!nvvm.annotations = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24}
570603

571604
!0 = !{ptr @grid_const_int, !"grid_constant", !1}
572605
!1 = !{i32 1}
@@ -604,4 +637,6 @@ declare dso_local ptr @escape3(ptr, ptr, ptr) local_unnamed_addr
604637
!22 = !{ptr @grid_const_ptrtoint, !"grid_constant", !23}
605638
!23 = !{i32 1}
606639

640+
!24 = !{ptr @test_forward_byval_arg, !"grid_constant", !25}
641+
!25 = !{i32 1}
607642

llvm/test/CodeGen/NVPTX/lower-byval-args.ll

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -988,6 +988,77 @@ merge: ; preds = %second, %first
988988
ret void
989989
}
990990

991+
define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
992+
; COMMON-LABEL: define ptx_kernel void @test_forward_byval_arg(
993+
; COMMON-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR3]] {
994+
; COMMON-NEXT: [[INPUT1:%.*]] = alloca i32, align 4
995+
; COMMON-NEXT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
996+
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT1]], ptr addrspace(101) align 4 [[INPUT2]], i64 4, i1 false)
997+
; COMMON-NEXT: call void @device_func(ptr byval(i32) align 4 [[INPUT1]])
998+
; COMMON-NEXT: ret void
999+
;
1000+
; PTX-LABEL: test_forward_byval_arg(
1001+
; PTX: {
1002+
; PTX-NEXT: .local .align 4 .b8 __local_depot17[4];
1003+
; PTX-NEXT: .reg .b64 %SP;
1004+
; PTX-NEXT: .reg .b64 %SPL;
1005+
; PTX-NEXT: .reg .b32 %r<2>;
1006+
; PTX-NEXT: .reg .b64 %rd<3>;
1007+
; PTX-EMPTY:
1008+
; PTX-NEXT: // %bb.0:
1009+
; PTX-NEXT: mov.b64 %SPL, __local_depot17;
1010+
; PTX-NEXT: add.u64 %rd2, %SPL, 0;
1011+
; PTX-NEXT: ld.param.u32 %r1, [test_forward_byval_arg_param_0];
1012+
; PTX-NEXT: st.local.u32 [%rd2], %r1;
1013+
; PTX-NEXT: { // callseq 2, 0
1014+
; PTX-NEXT: .param .align 4 .b8 param0[4];
1015+
; PTX-NEXT: st.param.b32 [param0], %r1;
1016+
; PTX-NEXT: call.uni
1017+
; PTX-NEXT: device_func,
1018+
; PTX-NEXT: (
1019+
; PTX-NEXT: param0
1020+
; PTX-NEXT: );
1021+
; PTX-NEXT: } // callseq 2
1022+
; PTX-NEXT: ret;
1023+
call void @device_func(ptr byval(i32) align 4 %input)
1024+
ret void
1025+
}
1026+
1027+
define void @device_func(ptr byval(i32) align 4 %input) {
1028+
; LOWER-ARGS-LABEL: define void @device_func(
1029+
; LOWER-ARGS-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR3]] {
1030+
; LOWER-ARGS-NEXT: call void @device_func(ptr byval(i32) align 4 [[INPUT]])
1031+
; LOWER-ARGS-NEXT: ret void
1032+
;
1033+
; COPY-LABEL: define void @device_func(
1034+
; COPY-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR3]] {
1035+
; COPY-NEXT: [[INPUT1:%.*]] = alloca i32, align 4
1036+
; COPY-NEXT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
1037+
; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT1]], ptr addrspace(101) align 4 [[INPUT2]], i64 4, i1 false)
1038+
; COPY-NEXT: call void @device_func(ptr byval(i32) align 4 [[INPUT1]])
1039+
; COPY-NEXT: ret void
1040+
;
1041+
; PTX-LABEL: device_func(
1042+
; PTX: {
1043+
; PTX-NEXT: .reg .b32 %r<2>;
1044+
; PTX-NEXT: .reg .b64 %rd<3>;
1045+
; PTX-EMPTY:
1046+
; PTX-NEXT: // %bb.0:
1047+
; PTX-NEXT: ld.param.u32 %r1, [device_func_param_0];
1048+
; PTX-NEXT: { // callseq 3, 0
1049+
; PTX-NEXT: .param .align 4 .b8 param0[4];
1050+
; PTX-NEXT: st.param.b32 [param0], %r1;
1051+
; PTX-NEXT: call.uni
1052+
; PTX-NEXT: device_func,
1053+
; PTX-NEXT: (
1054+
; PTX-NEXT: param0
1055+
; PTX-NEXT: );
1056+
; PTX-NEXT: } // callseq 3
1057+
; PTX-NEXT: ret;
1058+
call void @device_func(ptr byval(i32) align 4 %input)
1059+
ret void
1060+
}
1061+
9911062
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) "no-trapping-math"="true" "target-cpu"="sm_60" "target-features"="+ptx78,+sm_60" "uniform-work-group-size"="true" }
9921063
attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
9931064
attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) }

0 commit comments

Comments
 (0)