@@ -550,3 +550,313 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
550
550
%d = or i64 %b , %c
551
551
ret i64 %d
552
552
}
553
+
554
+ ; Test that we're able to remove a mask on the rotate amount that has more than
555
+ ; one use.
556
+ define signext i32 @rotl_32_mask_shared (i32 signext %a , i32 signext %b , i32 signext %amt ) nounwind {
557
+ ; RV32I-LABEL: rotl_32_mask_shared:
558
+ ; RV32I: # %bb.0:
559
+ ; RV32I-NEXT: sll a3, a0, a2
560
+ ; RV32I-NEXT: neg a4, a2
561
+ ; RV32I-NEXT: srl a0, a0, a4
562
+ ; RV32I-NEXT: or a0, a3, a0
563
+ ; RV32I-NEXT: sll a1, a1, a2
564
+ ; RV32I-NEXT: add a0, a0, a1
565
+ ; RV32I-NEXT: ret
566
+ ;
567
+ ; RV64I-LABEL: rotl_32_mask_shared:
568
+ ; RV64I: # %bb.0:
569
+ ; RV64I-NEXT: sllw a3, a0, a2
570
+ ; RV64I-NEXT: negw a4, a2
571
+ ; RV64I-NEXT: srlw a0, a0, a4
572
+ ; RV64I-NEXT: or a0, a3, a0
573
+ ; RV64I-NEXT: sllw a1, a1, a2
574
+ ; RV64I-NEXT: addw a0, a0, a1
575
+ ; RV64I-NEXT: ret
576
+ ;
577
+ ; RV32ZBB-LABEL: rotl_32_mask_shared:
578
+ ; RV32ZBB: # %bb.0:
579
+ ; RV32ZBB-NEXT: andi a3, a2, 31
580
+ ; RV32ZBB-NEXT: rol a0, a0, a3
581
+ ; RV32ZBB-NEXT: sll a1, a1, a2
582
+ ; RV32ZBB-NEXT: add a0, a0, a1
583
+ ; RV32ZBB-NEXT: ret
584
+ ;
585
+ ; RV64ZBB-LABEL: rotl_32_mask_shared:
586
+ ; RV64ZBB: # %bb.0:
587
+ ; RV64ZBB-NEXT: rolw a0, a0, a2
588
+ ; RV64ZBB-NEXT: sllw a1, a1, a2
589
+ ; RV64ZBB-NEXT: addw a0, a0, a1
590
+ ; RV64ZBB-NEXT: ret
591
+ %maskedamt = and i32 %amt , 31
592
+ %1 = tail call i32 @llvm.fshl.i32 (i32 %a , i32 %a , i32 %maskedamt )
593
+ %2 = shl i32 %b , %maskedamt
594
+ %3 = add i32 %1 , %2
595
+ ret i32 %3
596
+ }
597
+ declare i32 @llvm.fshl.i32 (i32 , i32 , i32 )
598
+
599
+ define signext i64 @rotl_64_mask_shared (i64 signext %a , i64 signext %b , i64 signext %amt ) nounwind {
600
+ ; RV32I-LABEL: rotl_64_mask_shared:
601
+ ; RV32I: # %bb.0:
602
+ ; RV32I-NEXT: slli a5, a4, 26
603
+ ; RV32I-NEXT: srli a5, a5, 31
604
+ ; RV32I-NEXT: mv a7, a0
605
+ ; RV32I-NEXT: bnez a5, .LBB9_2
606
+ ; RV32I-NEXT: # %bb.1:
607
+ ; RV32I-NEXT: mv a7, a1
608
+ ; RV32I-NEXT: .LBB9_2:
609
+ ; RV32I-NEXT: andi a6, a4, 63
610
+ ; RV32I-NEXT: sll t0, a7, a4
611
+ ; RV32I-NEXT: bnez a5, .LBB9_4
612
+ ; RV32I-NEXT: # %bb.3:
613
+ ; RV32I-NEXT: mv a1, a0
614
+ ; RV32I-NEXT: .LBB9_4:
615
+ ; RV32I-NEXT: srli a0, a1, 1
616
+ ; RV32I-NEXT: not t1, a4
617
+ ; RV32I-NEXT: srl a0, a0, t1
618
+ ; RV32I-NEXT: or a5, t0, a0
619
+ ; RV32I-NEXT: sll a1, a1, a4
620
+ ; RV32I-NEXT: srli a0, a7, 1
621
+ ; RV32I-NEXT: srl a7, a0, t1
622
+ ; RV32I-NEXT: addi a0, a6, -32
623
+ ; RV32I-NEXT: or a1, a1, a7
624
+ ; RV32I-NEXT: bltz a0, .LBB9_6
625
+ ; RV32I-NEXT: # %bb.5:
626
+ ; RV32I-NEXT: sll a3, a2, a0
627
+ ; RV32I-NEXT: mv a0, a1
628
+ ; RV32I-NEXT: j .LBB9_7
629
+ ; RV32I-NEXT: .LBB9_6:
630
+ ; RV32I-NEXT: sll a0, a3, a4
631
+ ; RV32I-NEXT: srli a3, a2, 1
632
+ ; RV32I-NEXT: xori a6, a6, 31
633
+ ; RV32I-NEXT: srl a3, a3, a6
634
+ ; RV32I-NEXT: or a3, a0, a3
635
+ ; RV32I-NEXT: sll a0, a2, a4
636
+ ; RV32I-NEXT: add a0, a1, a0
637
+ ; RV32I-NEXT: .LBB9_7:
638
+ ; RV32I-NEXT: sltu a1, a0, a1
639
+ ; RV32I-NEXT: add a2, a5, a3
640
+ ; RV32I-NEXT: add a1, a2, a1
641
+ ; RV32I-NEXT: ret
642
+ ;
643
+ ; RV64I-LABEL: rotl_64_mask_shared:
644
+ ; RV64I: # %bb.0:
645
+ ; RV64I-NEXT: sll a3, a0, a2
646
+ ; RV64I-NEXT: neg a4, a2
647
+ ; RV64I-NEXT: srl a0, a0, a4
648
+ ; RV64I-NEXT: or a0, a3, a0
649
+ ; RV64I-NEXT: sll a1, a1, a2
650
+ ; RV64I-NEXT: add a0, a0, a1
651
+ ; RV64I-NEXT: ret
652
+ ;
653
+ ; RV32ZBB-LABEL: rotl_64_mask_shared:
654
+ ; RV32ZBB: # %bb.0:
655
+ ; RV32ZBB-NEXT: slli a5, a4, 26
656
+ ; RV32ZBB-NEXT: srli a5, a5, 31
657
+ ; RV32ZBB-NEXT: mv a7, a0
658
+ ; RV32ZBB-NEXT: bnez a5, .LBB9_2
659
+ ; RV32ZBB-NEXT: # %bb.1:
660
+ ; RV32ZBB-NEXT: mv a7, a1
661
+ ; RV32ZBB-NEXT: .LBB9_2:
662
+ ; RV32ZBB-NEXT: andi a6, a4, 63
663
+ ; RV32ZBB-NEXT: sll t0, a7, a4
664
+ ; RV32ZBB-NEXT: bnez a5, .LBB9_4
665
+ ; RV32ZBB-NEXT: # %bb.3:
666
+ ; RV32ZBB-NEXT: mv a1, a0
667
+ ; RV32ZBB-NEXT: .LBB9_4:
668
+ ; RV32ZBB-NEXT: srli a0, a1, 1
669
+ ; RV32ZBB-NEXT: not t1, a4
670
+ ; RV32ZBB-NEXT: srl a0, a0, t1
671
+ ; RV32ZBB-NEXT: or a5, t0, a0
672
+ ; RV32ZBB-NEXT: sll a1, a1, a4
673
+ ; RV32ZBB-NEXT: srli a0, a7, 1
674
+ ; RV32ZBB-NEXT: srl a7, a0, t1
675
+ ; RV32ZBB-NEXT: addi a0, a6, -32
676
+ ; RV32ZBB-NEXT: or a1, a1, a7
677
+ ; RV32ZBB-NEXT: bltz a0, .LBB9_6
678
+ ; RV32ZBB-NEXT: # %bb.5:
679
+ ; RV32ZBB-NEXT: sll a3, a2, a0
680
+ ; RV32ZBB-NEXT: mv a0, a1
681
+ ; RV32ZBB-NEXT: j .LBB9_7
682
+ ; RV32ZBB-NEXT: .LBB9_6:
683
+ ; RV32ZBB-NEXT: sll a0, a3, a4
684
+ ; RV32ZBB-NEXT: srli a3, a2, 1
685
+ ; RV32ZBB-NEXT: xori a6, a6, 31
686
+ ; RV32ZBB-NEXT: srl a3, a3, a6
687
+ ; RV32ZBB-NEXT: or a3, a0, a3
688
+ ; RV32ZBB-NEXT: sll a0, a2, a4
689
+ ; RV32ZBB-NEXT: add a0, a1, a0
690
+ ; RV32ZBB-NEXT: .LBB9_7:
691
+ ; RV32ZBB-NEXT: sltu a1, a0, a1
692
+ ; RV32ZBB-NEXT: add a2, a5, a3
693
+ ; RV32ZBB-NEXT: add a1, a2, a1
694
+ ; RV32ZBB-NEXT: ret
695
+ ;
696
+ ; RV64ZBB-LABEL: rotl_64_mask_shared:
697
+ ; RV64ZBB: # %bb.0:
698
+ ; RV64ZBB-NEXT: andi a3, a2, 63
699
+ ; RV64ZBB-NEXT: rol a0, a0, a3
700
+ ; RV64ZBB-NEXT: sll a1, a1, a2
701
+ ; RV64ZBB-NEXT: add a0, a0, a1
702
+ ; RV64ZBB-NEXT: ret
703
+ %maskedamt = and i64 %amt , 63
704
+ %1 = tail call i64 @llvm.fshl.i64 (i64 %a , i64 %a , i64 %maskedamt )
705
+ %2 = shl i64 %b , %maskedamt
706
+ %3 = add i64 %1 , %2
707
+ ret i64 %3
708
+ }
709
+ declare i64 @llvm.fshl.i64 (i64 , i64 , i64 )
710
+
711
+ define signext i32 @rotr_32_mask_shared (i32 signext %a , i32 signext %b , i32 signext %amt ) nounwind {
712
+ ; RV32I-LABEL: rotr_32_mask_shared:
713
+ ; RV32I: # %bb.0:
714
+ ; RV32I-NEXT: srl a3, a0, a2
715
+ ; RV32I-NEXT: neg a4, a2
716
+ ; RV32I-NEXT: sll a0, a0, a4
717
+ ; RV32I-NEXT: or a0, a3, a0
718
+ ; RV32I-NEXT: sll a1, a1, a2
719
+ ; RV32I-NEXT: add a0, a0, a1
720
+ ; RV32I-NEXT: ret
721
+ ;
722
+ ; RV64I-LABEL: rotr_32_mask_shared:
723
+ ; RV64I: # %bb.0:
724
+ ; RV64I-NEXT: srlw a3, a0, a2
725
+ ; RV64I-NEXT: negw a4, a2
726
+ ; RV64I-NEXT: sllw a0, a0, a4
727
+ ; RV64I-NEXT: or a0, a3, a0
728
+ ; RV64I-NEXT: sllw a1, a1, a2
729
+ ; RV64I-NEXT: addw a0, a0, a1
730
+ ; RV64I-NEXT: ret
731
+ ;
732
+ ; RV32ZBB-LABEL: rotr_32_mask_shared:
733
+ ; RV32ZBB: # %bb.0:
734
+ ; RV32ZBB-NEXT: andi a3, a2, 31
735
+ ; RV32ZBB-NEXT: ror a0, a0, a3
736
+ ; RV32ZBB-NEXT: sll a1, a1, a2
737
+ ; RV32ZBB-NEXT: add a0, a0, a1
738
+ ; RV32ZBB-NEXT: ret
739
+ ;
740
+ ; RV64ZBB-LABEL: rotr_32_mask_shared:
741
+ ; RV64ZBB: # %bb.0:
742
+ ; RV64ZBB-NEXT: rorw a0, a0, a2
743
+ ; RV64ZBB-NEXT: sllw a1, a1, a2
744
+ ; RV64ZBB-NEXT: addw a0, a0, a1
745
+ ; RV64ZBB-NEXT: ret
746
+ %maskedamt = and i32 %amt , 31
747
+ %1 = tail call i32 @llvm.fshr.i32 (i32 %a , i32 %a , i32 %maskedamt )
748
+ %2 = shl i32 %b , %maskedamt
749
+ %3 = add i32 %1 , %2
750
+ ret i32 %3
751
+ }
752
+ declare i32 @llvm.fshr.i32 (i32 , i32 , i32 )
753
+
754
+ define signext i64 @rotr_64_mask_shared (i64 signext %a , i64 signext %b , i64 signext %amt ) nounwind {
755
+ ; RV32I-LABEL: rotr_64_mask_shared:
756
+ ; RV32I: # %bb.0:
757
+ ; RV32I-NEXT: andi a7, a4, 32
758
+ ; RV32I-NEXT: mv a6, a1
759
+ ; RV32I-NEXT: beqz a7, .LBB11_2
760
+ ; RV32I-NEXT: # %bb.1:
761
+ ; RV32I-NEXT: mv a6, a0
762
+ ; RV32I-NEXT: .LBB11_2:
763
+ ; RV32I-NEXT: andi a5, a4, 63
764
+ ; RV32I-NEXT: srl t0, a6, a4
765
+ ; RV32I-NEXT: beqz a7, .LBB11_4
766
+ ; RV32I-NEXT: # %bb.3:
767
+ ; RV32I-NEXT: mv a0, a1
768
+ ; RV32I-NEXT: .LBB11_4:
769
+ ; RV32I-NEXT: slli a1, a0, 1
770
+ ; RV32I-NEXT: not a7, a4
771
+ ; RV32I-NEXT: sll a1, a1, a7
772
+ ; RV32I-NEXT: or a1, a1, t0
773
+ ; RV32I-NEXT: srl t0, a0, a4
774
+ ; RV32I-NEXT: slli a0, a6, 1
775
+ ; RV32I-NEXT: sll a6, a0, a7
776
+ ; RV32I-NEXT: addi a0, a5, -32
777
+ ; RV32I-NEXT: or a6, a6, t0
778
+ ; RV32I-NEXT: bltz a0, .LBB11_6
779
+ ; RV32I-NEXT: # %bb.5:
780
+ ; RV32I-NEXT: sll a3, a2, a0
781
+ ; RV32I-NEXT: mv a0, a6
782
+ ; RV32I-NEXT: j .LBB11_7
783
+ ; RV32I-NEXT: .LBB11_6:
784
+ ; RV32I-NEXT: sll a0, a3, a4
785
+ ; RV32I-NEXT: srli a3, a2, 1
786
+ ; RV32I-NEXT: xori a5, a5, 31
787
+ ; RV32I-NEXT: srl a3, a3, a5
788
+ ; RV32I-NEXT: or a3, a0, a3
789
+ ; RV32I-NEXT: sll a0, a2, a4
790
+ ; RV32I-NEXT: add a0, a6, a0
791
+ ; RV32I-NEXT: .LBB11_7:
792
+ ; RV32I-NEXT: sltu a2, a0, a6
793
+ ; RV32I-NEXT: add a1, a1, a3
794
+ ; RV32I-NEXT: add a1, a1, a2
795
+ ; RV32I-NEXT: ret
796
+ ;
797
+ ; RV64I-LABEL: rotr_64_mask_shared:
798
+ ; RV64I: # %bb.0:
799
+ ; RV64I-NEXT: srl a3, a0, a2
800
+ ; RV64I-NEXT: neg a4, a2
801
+ ; RV64I-NEXT: sll a0, a0, a4
802
+ ; RV64I-NEXT: or a0, a3, a0
803
+ ; RV64I-NEXT: sll a1, a1, a2
804
+ ; RV64I-NEXT: add a0, a0, a1
805
+ ; RV64I-NEXT: ret
806
+ ;
807
+ ; RV32ZBB-LABEL: rotr_64_mask_shared:
808
+ ; RV32ZBB: # %bb.0:
809
+ ; RV32ZBB-NEXT: andi a7, a4, 32
810
+ ; RV32ZBB-NEXT: mv a6, a1
811
+ ; RV32ZBB-NEXT: beqz a7, .LBB11_2
812
+ ; RV32ZBB-NEXT: # %bb.1:
813
+ ; RV32ZBB-NEXT: mv a6, a0
814
+ ; RV32ZBB-NEXT: .LBB11_2:
815
+ ; RV32ZBB-NEXT: andi a5, a4, 63
816
+ ; RV32ZBB-NEXT: srl t0, a6, a4
817
+ ; RV32ZBB-NEXT: beqz a7, .LBB11_4
818
+ ; RV32ZBB-NEXT: # %bb.3:
819
+ ; RV32ZBB-NEXT: mv a0, a1
820
+ ; RV32ZBB-NEXT: .LBB11_4:
821
+ ; RV32ZBB-NEXT: slli a1, a0, 1
822
+ ; RV32ZBB-NEXT: not a7, a4
823
+ ; RV32ZBB-NEXT: sll a1, a1, a7
824
+ ; RV32ZBB-NEXT: or a1, a1, t0
825
+ ; RV32ZBB-NEXT: srl t0, a0, a4
826
+ ; RV32ZBB-NEXT: slli a0, a6, 1
827
+ ; RV32ZBB-NEXT: sll a6, a0, a7
828
+ ; RV32ZBB-NEXT: addi a0, a5, -32
829
+ ; RV32ZBB-NEXT: or a6, a6, t0
830
+ ; RV32ZBB-NEXT: bltz a0, .LBB11_6
831
+ ; RV32ZBB-NEXT: # %bb.5:
832
+ ; RV32ZBB-NEXT: sll a3, a2, a0
833
+ ; RV32ZBB-NEXT: mv a0, a6
834
+ ; RV32ZBB-NEXT: j .LBB11_7
835
+ ; RV32ZBB-NEXT: .LBB11_6:
836
+ ; RV32ZBB-NEXT: sll a0, a3, a4
837
+ ; RV32ZBB-NEXT: srli a3, a2, 1
838
+ ; RV32ZBB-NEXT: xori a5, a5, 31
839
+ ; RV32ZBB-NEXT: srl a3, a3, a5
840
+ ; RV32ZBB-NEXT: or a3, a0, a3
841
+ ; RV32ZBB-NEXT: sll a0, a2, a4
842
+ ; RV32ZBB-NEXT: add a0, a6, a0
843
+ ; RV32ZBB-NEXT: .LBB11_7:
844
+ ; RV32ZBB-NEXT: sltu a2, a0, a6
845
+ ; RV32ZBB-NEXT: add a1, a1, a3
846
+ ; RV32ZBB-NEXT: add a1, a1, a2
847
+ ; RV32ZBB-NEXT: ret
848
+ ;
849
+ ; RV64ZBB-LABEL: rotr_64_mask_shared:
850
+ ; RV64ZBB: # %bb.0:
851
+ ; RV64ZBB-NEXT: andi a3, a2, 63
852
+ ; RV64ZBB-NEXT: ror a0, a0, a3
853
+ ; RV64ZBB-NEXT: sll a1, a1, a2
854
+ ; RV64ZBB-NEXT: add a0, a0, a1
855
+ ; RV64ZBB-NEXT: ret
856
+ %maskedamt = and i64 %amt , 63
857
+ %1 = tail call i64 @llvm.fshr.i64 (i64 %a , i64 %a , i64 %maskedamt )
858
+ %2 = shl i64 %b , %maskedamt
859
+ %3 = add i64 %1 , %2
860
+ ret i64 %3
861
+ }
862
+ declare i64 @llvm.fshr.i64 (i64 , i64 , i64 )
0 commit comments