@@ -586,3 +586,214 @@ define amdgpu_kernel void @readlane_expression(ptr addrspace(1) %out) {
586
586
ret void
587
587
}
588
588
589
+ define amdgpu_kernel void @ballot_i32 (i32 %v , ptr addrspace (1 ) %out ) {
590
+ ; CURRENT-CHECK-LABEL: define amdgpu_kernel void @ballot_i32(
591
+ ; CURRENT-CHECK-SAME: i32 [[V:%.*]], ptr addrspace(1) writeonly captures(none) initializes((0, 1)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
592
+ ; CURRENT-CHECK-NEXT: [[C:%.*]] = trunc i32 [[V]] to i1
593
+ ; CURRENT-CHECK-NEXT: [[BALLOT:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 [[C]])
594
+ ; CURRENT-CHECK-NEXT: [[BALLOT_NE_ZERO:%.*]] = icmp ne i32 [[BALLOT]], 0
595
+ ; CURRENT-CHECK-NEXT: store i1 [[BALLOT_NE_ZERO]], ptr addrspace(1) [[OUT]], align 1
596
+ ; CURRENT-CHECK-NEXT: ret void
597
+ ;
598
+ ; PASS-CHECK-LABEL: define amdgpu_kernel void @ballot_i32(
599
+ ; PASS-CHECK-SAME: i32 [[V:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
600
+ ; PASS-CHECK-NEXT: [[C:%.*]] = trunc i32 [[V]] to i1
601
+ ; PASS-CHECK-NEXT: store i1 [[C]], ptr addrspace(1) [[OUT]], align 1
602
+ ; PASS-CHECK-NEXT: ret void
603
+ ;
604
+ ; DCE-CHECK-LABEL: define amdgpu_kernel void @ballot_i32(
605
+ ; DCE-CHECK-SAME: i32 [[V:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
606
+ ; DCE-CHECK-NEXT: [[C:%.*]] = trunc i32 [[V]] to i1
607
+ ; DCE-CHECK-NEXT: store i1 [[C]], ptr addrspace(1) [[OUT]], align 1
608
+ ; DCE-CHECK-NEXT: ret void
609
+ ;
610
+ %c = trunc i32 %v to i1
611
+ %ballot = call i32 @llvm.amdgcn.ballot.i32 (i1 %c )
612
+ %ballot_ne_zero = icmp ne i32 %ballot , 0
613
+ store i1 %ballot_ne_zero , ptr addrspace (1 ) %out
614
+ ret void
615
+ }
616
+
617
+ define amdgpu_kernel void @ballot_i64 (i32 %v , ptr addrspace (1 ) %out ) {
618
+ ; CURRENT-CHECK-LABEL: define amdgpu_kernel void @ballot_i64(
619
+ ; CURRENT-CHECK-SAME: i32 [[V:%.*]], ptr addrspace(1) writeonly captures(none) initializes((0, 1)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
620
+ ; CURRENT-CHECK-NEXT: [[C:%.*]] = trunc i32 [[V]] to i1
621
+ ; CURRENT-CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 [[C]])
622
+ ; CURRENT-CHECK-NEXT: [[BALLOT_NE_ZERO:%.*]] = icmp ne i32 [[TMP1]], 0
623
+ ; CURRENT-CHECK-NEXT: store i1 [[BALLOT_NE_ZERO]], ptr addrspace(1) [[OUT]], align 1
624
+ ; CURRENT-CHECK-NEXT: ret void
625
+ ;
626
+ ; PASS-CHECK-LABEL: define amdgpu_kernel void @ballot_i64(
627
+ ; PASS-CHECK-SAME: i32 [[V:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
628
+ ; PASS-CHECK-NEXT: [[C:%.*]] = trunc i32 [[V]] to i1
629
+ ; PASS-CHECK-NEXT: store i1 [[C]], ptr addrspace(1) [[OUT]], align 1
630
+ ; PASS-CHECK-NEXT: ret void
631
+ ;
632
+ ; DCE-CHECK-LABEL: define amdgpu_kernel void @ballot_i64(
633
+ ; DCE-CHECK-SAME: i32 [[V:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
634
+ ; DCE-CHECK-NEXT: [[C:%.*]] = trunc i32 [[V]] to i1
635
+ ; DCE-CHECK-NEXT: store i1 [[C]], ptr addrspace(1) [[OUT]], align 1
636
+ ; DCE-CHECK-NEXT: ret void
637
+ ;
638
+ %c = trunc i32 %v to i1
639
+ %ballot = call i64 @llvm.amdgcn.ballot.i64 (i1 %c )
640
+ %ballot_ne_zero = icmp ne i64 %ballot , 0
641
+ store i1 %ballot_ne_zero , ptr addrspace (1 ) %out
642
+ ret void
643
+ }
644
+
645
+ define amdgpu_kernel void @test_readlane_i16 (i16 %src0 , i32 %src1 ) {
646
+ ; CURRENT-CHECK-LABEL: define amdgpu_kernel void @test_readlane_i16(
647
+ ; CURRENT-CHECK-SAME: i16 [[SRC0:%.*]], i32 [[SRC1:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
648
+ ; CURRENT-CHECK-NEXT: [[READLANE:%.*]] = tail call i16 @llvm.amdgcn.readlane.i16(i16 [[SRC0]], i32 [[SRC1]])
649
+ ; CURRENT-CHECK-NEXT: tail call void asm sideeffect "
650
+ ; CURRENT-CHECK-NEXT: ret void
651
+ ;
652
+ ; PASS-CHECK-LABEL: define amdgpu_kernel void @test_readlane_i16(
653
+ ; PASS-CHECK-SAME: i16 [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] {
654
+ ; PASS-CHECK-NEXT: call void asm sideeffect "
655
+ ; PASS-CHECK-NEXT: ret void
656
+ ;
657
+ ; DCE-CHECK-LABEL: define amdgpu_kernel void @test_readlane_i16(
658
+ ; DCE-CHECK-SAME: i16 [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] {
659
+ ; DCE-CHECK-NEXT: call void asm sideeffect "
660
+ ; DCE-CHECK-NEXT: ret void
661
+ ;
662
+ %readlane = call i16 @llvm.amdgcn.readlane.i16 (i16 %src0 , i32 %src1 )
663
+ call void asm sideeffect "; use $0" , "s" (i16 %readlane )
664
+ ret void
665
+ }
666
+
667
+ define amdgpu_kernel void @test_readlane_i64 (i64 %src0 , i32 %src1 ) {
668
+ ; CURRENT-CHECK-LABEL: define amdgpu_kernel void @test_readlane_i64(
669
+ ; CURRENT-CHECK-SAME: i64 [[SRC0:%.*]], i32 [[SRC1:%.*]]) local_unnamed_addr #[[ATTR3]] {
670
+ ; CURRENT-CHECK-NEXT: [[READLANE:%.*]] = tail call i64 @llvm.amdgcn.readlane.i64(i64 [[SRC0]], i32 [[SRC1]])
671
+ ; CURRENT-CHECK-NEXT: tail call void asm sideeffect "
672
+ ; CURRENT-CHECK-NEXT: ret void
673
+ ;
674
+ ; PASS-CHECK-LABEL: define amdgpu_kernel void @test_readlane_i64(
675
+ ; PASS-CHECK-SAME: i64 [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] {
676
+ ; PASS-CHECK-NEXT: call void asm sideeffect "
677
+ ; PASS-CHECK-NEXT: ret void
678
+ ;
679
+ ; DCE-CHECK-LABEL: define amdgpu_kernel void @test_readlane_i64(
680
+ ; DCE-CHECK-SAME: i64 [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] {
681
+ ; DCE-CHECK-NEXT: call void asm sideeffect "
682
+ ; DCE-CHECK-NEXT: ret void
683
+ ;
684
+ %readlane = call i64 @llvm.amdgcn.readlane.i64 (i64 %src0 , i32 %src1 )
685
+ call void asm sideeffect "; use $0" , "s" (i64 %readlane )
686
+ ret void
687
+ }
688
+
689
+ define amdgpu_kernel void @test_readlane_bf16 (bfloat %src0 , i32 %src1 ) {
690
+ ; CURRENT-CHECK-LABEL: define amdgpu_kernel void @test_readlane_bf16(
691
+ ; CURRENT-CHECK-SAME: bfloat [[SRC0:%.*]], i32 [[SRC1:%.*]]) local_unnamed_addr #[[ATTR3]] {
692
+ ; CURRENT-CHECK-NEXT: [[READLANE:%.*]] = tail call bfloat @llvm.amdgcn.readlane.bf16(bfloat [[SRC0]], i32 [[SRC1]])
693
+ ; CURRENT-CHECK-NEXT: tail call void asm sideeffect "
694
+ ; CURRENT-CHECK-NEXT: ret void
695
+ ;
696
+ ; PASS-CHECK-LABEL: define amdgpu_kernel void @test_readlane_bf16(
697
+ ; PASS-CHECK-SAME: bfloat [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] {
698
+ ; PASS-CHECK-NEXT: call void asm sideeffect "
699
+ ; PASS-CHECK-NEXT: ret void
700
+ ;
701
+ ; DCE-CHECK-LABEL: define amdgpu_kernel void @test_readlane_bf16(
702
+ ; DCE-CHECK-SAME: bfloat [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] {
703
+ ; DCE-CHECK-NEXT: call void asm sideeffect "
704
+ ; DCE-CHECK-NEXT: ret void
705
+ ;
706
+ %readlane = call bfloat @llvm.amdgcn.readlane.bf16 (bfloat %src0 , i32 %src1 )
707
+ call void asm sideeffect "; use $0" , "s" (bfloat %readlane )
708
+ ret void
709
+ }
710
+
711
+ define amdgpu_kernel void @test_readlane_f16 (half %src0 , i32 %src1 ) {
712
+ ; CURRENT-CHECK-LABEL: define amdgpu_kernel void @test_readlane_f16(
713
+ ; CURRENT-CHECK-SAME: half [[SRC0:%.*]], i32 [[SRC1:%.*]]) local_unnamed_addr #[[ATTR3]] {
714
+ ; CURRENT-CHECK-NEXT: [[READLANE:%.*]] = tail call half @llvm.amdgcn.readlane.f16(half [[SRC0]], i32 [[SRC1]])
715
+ ; CURRENT-CHECK-NEXT: tail call void asm sideeffect "
716
+ ; CURRENT-CHECK-NEXT: ret void
717
+ ;
718
+ ; PASS-CHECK-LABEL: define amdgpu_kernel void @test_readlane_f16(
719
+ ; PASS-CHECK-SAME: half [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] {
720
+ ; PASS-CHECK-NEXT: call void asm sideeffect "
721
+ ; PASS-CHECK-NEXT: ret void
722
+ ;
723
+ ; DCE-CHECK-LABEL: define amdgpu_kernel void @test_readlane_f16(
724
+ ; DCE-CHECK-SAME: half [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] {
725
+ ; DCE-CHECK-NEXT: call void asm sideeffect "
726
+ ; DCE-CHECK-NEXT: ret void
727
+ ;
728
+ %readlane = call half @llvm.amdgcn.readlane.f16 (half %src0 , i32 %src1 )
729
+ call void asm sideeffect "; use $0" , "s" (half %readlane )
730
+ ret void
731
+ }
732
+
733
+ define amdgpu_kernel void @test_readlane_f32 (float %src0 , i32 %src1 ) {
734
+ ; CURRENT-CHECK-LABEL: define amdgpu_kernel void @test_readlane_f32(
735
+ ; CURRENT-CHECK-SAME: float [[SRC0:%.*]], i32 [[SRC1:%.*]]) local_unnamed_addr #[[ATTR3]] {
736
+ ; CURRENT-CHECK-NEXT: [[READLANE:%.*]] = tail call float @llvm.amdgcn.readlane.f32(float [[SRC0]], i32 [[SRC1]])
737
+ ; CURRENT-CHECK-NEXT: tail call void asm sideeffect "
738
+ ; CURRENT-CHECK-NEXT: ret void
739
+ ;
740
+ ; PASS-CHECK-LABEL: define amdgpu_kernel void @test_readlane_f32(
741
+ ; PASS-CHECK-SAME: float [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] {
742
+ ; PASS-CHECK-NEXT: call void asm sideeffect "
743
+ ; PASS-CHECK-NEXT: ret void
744
+ ;
745
+ ; DCE-CHECK-LABEL: define amdgpu_kernel void @test_readlane_f32(
746
+ ; DCE-CHECK-SAME: float [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] {
747
+ ; DCE-CHECK-NEXT: call void asm sideeffect "
748
+ ; DCE-CHECK-NEXT: ret void
749
+ ;
750
+ %readlane = call float @llvm.amdgcn.readlane.f32 (float %src0 , i32 %src1 )
751
+ call void asm sideeffect "; use $0" , "s" (float %readlane )
752
+ ret void
753
+ }
754
+
755
+ define amdgpu_kernel void @test_readlane_f64 (double %src0 , i32 %src1 ) {
756
+ ; CURRENT-CHECK-LABEL: define amdgpu_kernel void @test_readlane_f64(
757
+ ; CURRENT-CHECK-SAME: double [[SRC0:%.*]], i32 [[SRC1:%.*]]) local_unnamed_addr #[[ATTR3]] {
758
+ ; CURRENT-CHECK-NEXT: [[READLANE:%.*]] = tail call double @llvm.amdgcn.readlane.f64(double [[SRC0]], i32 [[SRC1]])
759
+ ; CURRENT-CHECK-NEXT: tail call void asm sideeffect "
760
+ ; CURRENT-CHECK-NEXT: ret void
761
+ ;
762
+ ; PASS-CHECK-LABEL: define amdgpu_kernel void @test_readlane_f64(
763
+ ; PASS-CHECK-SAME: double [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] {
764
+ ; PASS-CHECK-NEXT: call void asm sideeffect "
765
+ ; PASS-CHECK-NEXT: ret void
766
+ ;
767
+ ; DCE-CHECK-LABEL: define amdgpu_kernel void @test_readlane_f64(
768
+ ; DCE-CHECK-SAME: double [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] {
769
+ ; DCE-CHECK-NEXT: call void asm sideeffect "
770
+ ; DCE-CHECK-NEXT: ret void
771
+ ;
772
+ %readlane = call double @llvm.amdgcn.readlane.f64 (double %src0 , i32 %src1 )
773
+ call void asm sideeffect "; use $0" , "s" (double %readlane )
774
+ ret void
775
+ }
776
+ ; All such cases can be optimised, given generic way to query getDeclarationIfExists()
777
+ define void @test_readlane_v8i16 (ptr addrspace (1 ) %out , <8 x i16 > %src , i32 %src1 ) {
778
+ ; CURRENT-CHECK-LABEL: define void @test_readlane_v8i16(
779
+ ; CURRENT-CHECK-SAME: ptr addrspace(1) readnone captures(none) [[OUT:%.*]], <8 x i16> [[SRC:%.*]], i32 [[SRC1:%.*]]) local_unnamed_addr #[[ATTR3]] {
780
+ ; CURRENT-CHECK-NEXT: [[X:%.*]] = tail call <8 x i16> @llvm.amdgcn.readlane.v8i16(<8 x i16> [[SRC]], i32 [[SRC1]])
781
+ ; CURRENT-CHECK-NEXT: tail call void asm sideeffect "
782
+ ; CURRENT-CHECK-NEXT: ret void
783
+ ;
784
+ ; PASS-CHECK-LABEL: define void @test_readlane_v8i16(
785
+ ; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], <8 x i16> [[SRC:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] {
786
+ ; PASS-CHECK-NEXT: [[X:%.*]] = call <8 x i16> @llvm.amdgcn.readlane.v8i16(<8 x i16> [[SRC]], i32 [[SRC1]])
787
+ ; PASS-CHECK-NEXT: call void asm sideeffect "
788
+ ; PASS-CHECK-NEXT: ret void
789
+ ;
790
+ ; DCE-CHECK-LABEL: define void @test_readlane_v8i16(
791
+ ; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], <8 x i16> [[SRC:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] {
792
+ ; DCE-CHECK-NEXT: [[X:%.*]] = call <8 x i16> @llvm.amdgcn.readlane.v8i16(<8 x i16> [[SRC]], i32 [[SRC1]])
793
+ ; DCE-CHECK-NEXT: call void asm sideeffect "
794
+ ; DCE-CHECK-NEXT: ret void
795
+ ;
796
+ %x = call <8 x i16 > @llvm.amdgcn.readlane.v8i16 (<8 x i16 > %src , i32 %src1 )
797
+ call void asm sideeffect "; use $0" , "s" (<8 x i16 > %x )
798
+ ret void
799
+ }
0 commit comments