@@ -550,3 +550,333 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
550
550
%d = or i64 %b , %c
551
551
ret i64 %d
552
552
}
553
+
554
+ define signext i32 @rotl_32_mask_multiple (i32 signext %a , i32 signext %b , i32 signext %amt ) nounwind {
555
+ ; RV32I-LABEL: rotl_32_mask_multiple:
556
+ ; RV32I: # %bb.0:
557
+ ; RV32I-NEXT: sll a3, a0, a2
558
+ ; RV32I-NEXT: neg a4, a2
559
+ ; RV32I-NEXT: srl a0, a0, a4
560
+ ; RV32I-NEXT: or a0, a3, a0
561
+ ; RV32I-NEXT: sll a2, a1, a2
562
+ ; RV32I-NEXT: srl a1, a1, a4
563
+ ; RV32I-NEXT: or a1, a2, a1
564
+ ; RV32I-NEXT: add a0, a0, a1
565
+ ; RV32I-NEXT: ret
566
+ ;
567
+ ; RV64I-LABEL: rotl_32_mask_multiple:
568
+ ; RV64I: # %bb.0:
569
+ ; RV64I-NEXT: sllw a3, a0, a2
570
+ ; RV64I-NEXT: negw a4, a2
571
+ ; RV64I-NEXT: srlw a0, a0, a4
572
+ ; RV64I-NEXT: or a0, a3, a0
573
+ ; RV64I-NEXT: sllw a2, a1, a2
574
+ ; RV64I-NEXT: srlw a1, a1, a4
575
+ ; RV64I-NEXT: or a1, a2, a1
576
+ ; RV64I-NEXT: addw a0, a0, a1
577
+ ; RV64I-NEXT: ret
578
+ ;
579
+ ; RV32ZBB-LABEL: rotl_32_mask_multiple:
580
+ ; RV32ZBB: # %bb.0:
581
+ ; RV32ZBB-NEXT: andi a2, a2, 31
582
+ ; RV32ZBB-NEXT: rol a0, a0, a2
583
+ ; RV32ZBB-NEXT: rol a1, a1, a2
584
+ ; RV32ZBB-NEXT: add a0, a0, a1
585
+ ; RV32ZBB-NEXT: ret
586
+ ;
587
+ ; RV64ZBB-LABEL: rotl_32_mask_multiple:
588
+ ; RV64ZBB: # %bb.0:
589
+ ; RV64ZBB-NEXT: andi a2, a2, 31
590
+ ; RV64ZBB-NEXT: rolw a0, a0, a2
591
+ ; RV64ZBB-NEXT: rolw a1, a1, a2
592
+ ; RV64ZBB-NEXT: addw a0, a0, a1
593
+ ; RV64ZBB-NEXT: ret
594
+ %maskedamt = and i32 %amt , 31
595
+ %1 = tail call i32 @llvm.fshl.i32 (i32 %a , i32 %a , i32 %maskedamt )
596
+ %2 = tail call i32 @llvm.fshl.i32 (i32 %b , i32 %b , i32 %maskedamt )
597
+ %3 = add i32 %1 , %2
598
+ ret i32 %3
599
+ }
600
+ declare i32 @llvm.fshl.i32 (i32 , i32 , i32 )
601
+
602
+ define i64 @rotl_64_mask_multiple (i64 %a , i64 %b , i64 %amt ) nounwind {
603
+ ; RV32I-LABEL: rotl_64_mask_multiple:
604
+ ; RV32I: # %bb.0:
605
+ ; RV32I-NEXT: slli a5, a4, 26
606
+ ; RV32I-NEXT: srli a5, a5, 31
607
+ ; RV32I-NEXT: mv a6, a1
608
+ ; RV32I-NEXT: bnez a5, .LBB9_2
609
+ ; RV32I-NEXT: # %bb.1:
610
+ ; RV32I-NEXT: mv a6, a0
611
+ ; RV32I-NEXT: .LBB9_2:
612
+ ; RV32I-NEXT: bnez a5, .LBB9_4
613
+ ; RV32I-NEXT: # %bb.3:
614
+ ; RV32I-NEXT: mv a0, a1
615
+ ; RV32I-NEXT: .LBB9_4:
616
+ ; RV32I-NEXT: sll a7, a6, a4
617
+ ; RV32I-NEXT: srli t0, a0, 1
618
+ ; RV32I-NEXT: not a1, a4
619
+ ; RV32I-NEXT: srl t0, t0, a1
620
+ ; RV32I-NEXT: sll t1, a0, a4
621
+ ; RV32I-NEXT: srli a0, a6, 1
622
+ ; RV32I-NEXT: srl t2, a0, a1
623
+ ; RV32I-NEXT: mv a0, a3
624
+ ; RV32I-NEXT: bnez a5, .LBB9_6
625
+ ; RV32I-NEXT: # %bb.5:
626
+ ; RV32I-NEXT: mv a0, a2
627
+ ; RV32I-NEXT: .LBB9_6:
628
+ ; RV32I-NEXT: or a6, a7, t0
629
+ ; RV32I-NEXT: or a7, t1, t2
630
+ ; RV32I-NEXT: sll t0, a0, a4
631
+ ; RV32I-NEXT: bnez a5, .LBB9_8
632
+ ; RV32I-NEXT: # %bb.7:
633
+ ; RV32I-NEXT: mv a2, a3
634
+ ; RV32I-NEXT: .LBB9_8:
635
+ ; RV32I-NEXT: srli a3, a2, 1
636
+ ; RV32I-NEXT: srl a3, a3, a1
637
+ ; RV32I-NEXT: or a3, t0, a3
638
+ ; RV32I-NEXT: sll a2, a2, a4
639
+ ; RV32I-NEXT: srli a0, a0, 1
640
+ ; RV32I-NEXT: srl a0, a0, a1
641
+ ; RV32I-NEXT: or a0, a2, a0
642
+ ; RV32I-NEXT: add a1, a7, a0
643
+ ; RV32I-NEXT: add a0, a6, a3
644
+ ; RV32I-NEXT: sltu a2, a0, a6
645
+ ; RV32I-NEXT: add a1, a1, a2
646
+ ; RV32I-NEXT: ret
647
+ ;
648
+ ; RV64I-LABEL: rotl_64_mask_multiple:
649
+ ; RV64I: # %bb.0:
650
+ ; RV64I-NEXT: sll a3, a0, a2
651
+ ; RV64I-NEXT: neg a4, a2
652
+ ; RV64I-NEXT: srl a0, a0, a4
653
+ ; RV64I-NEXT: or a0, a3, a0
654
+ ; RV64I-NEXT: sll a2, a1, a2
655
+ ; RV64I-NEXT: srl a1, a1, a4
656
+ ; RV64I-NEXT: or a1, a2, a1
657
+ ; RV64I-NEXT: add a0, a0, a1
658
+ ; RV64I-NEXT: ret
659
+ ;
660
+ ; RV32ZBB-LABEL: rotl_64_mask_multiple:
661
+ ; RV32ZBB: # %bb.0:
662
+ ; RV32ZBB-NEXT: slli a5, a4, 26
663
+ ; RV32ZBB-NEXT: srli a5, a5, 31
664
+ ; RV32ZBB-NEXT: mv a6, a1
665
+ ; RV32ZBB-NEXT: bnez a5, .LBB9_2
666
+ ; RV32ZBB-NEXT: # %bb.1:
667
+ ; RV32ZBB-NEXT: mv a6, a0
668
+ ; RV32ZBB-NEXT: .LBB9_2:
669
+ ; RV32ZBB-NEXT: bnez a5, .LBB9_4
670
+ ; RV32ZBB-NEXT: # %bb.3:
671
+ ; RV32ZBB-NEXT: mv a0, a1
672
+ ; RV32ZBB-NEXT: .LBB9_4:
673
+ ; RV32ZBB-NEXT: sll a7, a6, a4
674
+ ; RV32ZBB-NEXT: srli t0, a0, 1
675
+ ; RV32ZBB-NEXT: not a1, a4
676
+ ; RV32ZBB-NEXT: srl t0, t0, a1
677
+ ; RV32ZBB-NEXT: sll t1, a0, a4
678
+ ; RV32ZBB-NEXT: srli a0, a6, 1
679
+ ; RV32ZBB-NEXT: srl t2, a0, a1
680
+ ; RV32ZBB-NEXT: mv a0, a3
681
+ ; RV32ZBB-NEXT: bnez a5, .LBB9_6
682
+ ; RV32ZBB-NEXT: # %bb.5:
683
+ ; RV32ZBB-NEXT: mv a0, a2
684
+ ; RV32ZBB-NEXT: .LBB9_6:
685
+ ; RV32ZBB-NEXT: or a6, a7, t0
686
+ ; RV32ZBB-NEXT: or a7, t1, t2
687
+ ; RV32ZBB-NEXT: sll t0, a0, a4
688
+ ; RV32ZBB-NEXT: bnez a5, .LBB9_8
689
+ ; RV32ZBB-NEXT: # %bb.7:
690
+ ; RV32ZBB-NEXT: mv a2, a3
691
+ ; RV32ZBB-NEXT: .LBB9_8:
692
+ ; RV32ZBB-NEXT: srli a3, a2, 1
693
+ ; RV32ZBB-NEXT: srl a3, a3, a1
694
+ ; RV32ZBB-NEXT: or a3, t0, a3
695
+ ; RV32ZBB-NEXT: sll a2, a2, a4
696
+ ; RV32ZBB-NEXT: srli a0, a0, 1
697
+ ; RV32ZBB-NEXT: srl a0, a0, a1
698
+ ; RV32ZBB-NEXT: or a0, a2, a0
699
+ ; RV32ZBB-NEXT: add a1, a7, a0
700
+ ; RV32ZBB-NEXT: add a0, a6, a3
701
+ ; RV32ZBB-NEXT: sltu a2, a0, a6
702
+ ; RV32ZBB-NEXT: add a1, a1, a2
703
+ ; RV32ZBB-NEXT: ret
704
+ ;
705
+ ; RV64ZBB-LABEL: rotl_64_mask_multiple:
706
+ ; RV64ZBB: # %bb.0:
707
+ ; RV64ZBB-NEXT: andi a2, a2, 63
708
+ ; RV64ZBB-NEXT: rol a0, a0, a2
709
+ ; RV64ZBB-NEXT: rol a1, a1, a2
710
+ ; RV64ZBB-NEXT: add a0, a0, a1
711
+ ; RV64ZBB-NEXT: ret
712
+ %maskedamt = and i64 %amt , 63
713
+ %1 = tail call i64 @llvm.fshl.i64 (i64 %a , i64 %a , i64 %maskedamt )
714
+ %2 = tail call i64 @llvm.fshl.i64 (i64 %b , i64 %b , i64 %maskedamt )
715
+ %3 = add i64 %1 , %2
716
+ ret i64 %3
717
+ }
718
+ declare i64 @llvm.fshl.i64 (i64 , i64 , i64 )
719
+
720
+ define signext i32 @rotr_32_mask_multiple (i32 signext %a , i32 signext %b , i32 signext %amt ) nounwind {
721
+ ; RV32I-LABEL: rotr_32_mask_multiple:
722
+ ; RV32I: # %bb.0:
723
+ ; RV32I-NEXT: srl a3, a0, a2
724
+ ; RV32I-NEXT: neg a4, a2
725
+ ; RV32I-NEXT: sll a0, a0, a4
726
+ ; RV32I-NEXT: or a0, a3, a0
727
+ ; RV32I-NEXT: srl a2, a1, a2
728
+ ; RV32I-NEXT: sll a1, a1, a4
729
+ ; RV32I-NEXT: or a1, a2, a1
730
+ ; RV32I-NEXT: add a0, a0, a1
731
+ ; RV32I-NEXT: ret
732
+ ;
733
+ ; RV64I-LABEL: rotr_32_mask_multiple:
734
+ ; RV64I: # %bb.0:
735
+ ; RV64I-NEXT: srlw a3, a0, a2
736
+ ; RV64I-NEXT: negw a4, a2
737
+ ; RV64I-NEXT: sllw a0, a0, a4
738
+ ; RV64I-NEXT: or a0, a3, a0
739
+ ; RV64I-NEXT: srlw a2, a1, a2
740
+ ; RV64I-NEXT: sllw a1, a1, a4
741
+ ; RV64I-NEXT: or a1, a2, a1
742
+ ; RV64I-NEXT: addw a0, a0, a1
743
+ ; RV64I-NEXT: ret
744
+ ;
745
+ ; RV32ZBB-LABEL: rotr_32_mask_multiple:
746
+ ; RV32ZBB: # %bb.0:
747
+ ; RV32ZBB-NEXT: andi a2, a2, 31
748
+ ; RV32ZBB-NEXT: ror a0, a0, a2
749
+ ; RV32ZBB-NEXT: ror a1, a1, a2
750
+ ; RV32ZBB-NEXT: add a0, a0, a1
751
+ ; RV32ZBB-NEXT: ret
752
+ ;
753
+ ; RV64ZBB-LABEL: rotr_32_mask_multiple:
754
+ ; RV64ZBB: # %bb.0:
755
+ ; RV64ZBB-NEXT: andi a2, a2, 31
756
+ ; RV64ZBB-NEXT: rorw a0, a0, a2
757
+ ; RV64ZBB-NEXT: rorw a1, a1, a2
758
+ ; RV64ZBB-NEXT: addw a0, a0, a1
759
+ ; RV64ZBB-NEXT: ret
760
+ %maskedamt = and i32 %amt , 31
761
+ %1 = tail call i32 @llvm.fshr.i32 (i32 %a , i32 %a , i32 %maskedamt )
762
+ %2 = tail call i32 @llvm.fshr.i32 (i32 %b , i32 %b , i32 %maskedamt )
763
+ %3 = add i32 %1 , %2
764
+ ret i32 %3
765
+ }
766
+ declare i32 @llvm.fshr.i32 (i32 , i32 , i32 )
767
+
768
+ define i64 @rotr_64_mask_multiple (i64 %a , i64 %b , i64 %amt ) nounwind {
769
+ ; RV32I-LABEL: rotr_64_mask_multiple:
770
+ ; RV32I: # %bb.0:
771
+ ; RV32I-NEXT: andi a5, a4, 32
772
+ ; RV32I-NEXT: mv a6, a0
773
+ ; RV32I-NEXT: beqz a5, .LBB11_2
774
+ ; RV32I-NEXT: # %bb.1:
775
+ ; RV32I-NEXT: mv a6, a1
776
+ ; RV32I-NEXT: .LBB11_2:
777
+ ; RV32I-NEXT: beqz a5, .LBB11_4
778
+ ; RV32I-NEXT: # %bb.3:
779
+ ; RV32I-NEXT: mv a1, a0
780
+ ; RV32I-NEXT: .LBB11_4:
781
+ ; RV32I-NEXT: srl a7, a6, a4
782
+ ; RV32I-NEXT: slli t0, a1, 1
783
+ ; RV32I-NEXT: not a0, a4
784
+ ; RV32I-NEXT: sll t0, t0, a0
785
+ ; RV32I-NEXT: srl t1, a1, a4
786
+ ; RV32I-NEXT: slli a1, a6, 1
787
+ ; RV32I-NEXT: sll t2, a1, a0
788
+ ; RV32I-NEXT: mv a6, a2
789
+ ; RV32I-NEXT: beqz a5, .LBB11_6
790
+ ; RV32I-NEXT: # %bb.5:
791
+ ; RV32I-NEXT: mv a6, a3
792
+ ; RV32I-NEXT: .LBB11_6:
793
+ ; RV32I-NEXT: or a1, t0, a7
794
+ ; RV32I-NEXT: or a7, t2, t1
795
+ ; RV32I-NEXT: srl t0, a6, a4
796
+ ; RV32I-NEXT: beqz a5, .LBB11_8
797
+ ; RV32I-NEXT: # %bb.7:
798
+ ; RV32I-NEXT: mv a3, a2
799
+ ; RV32I-NEXT: .LBB11_8:
800
+ ; RV32I-NEXT: slli a2, a3, 1
801
+ ; RV32I-NEXT: sll a2, a2, a0
802
+ ; RV32I-NEXT: or a2, a2, t0
803
+ ; RV32I-NEXT: srl a3, a3, a4
804
+ ; RV32I-NEXT: slli a4, a6, 1
805
+ ; RV32I-NEXT: sll a0, a4, a0
806
+ ; RV32I-NEXT: or a0, a0, a3
807
+ ; RV32I-NEXT: add a3, a7, a0
808
+ ; RV32I-NEXT: add a0, a1, a2
809
+ ; RV32I-NEXT: sltu a1, a0, a1
810
+ ; RV32I-NEXT: add a1, a3, a1
811
+ ; RV32I-NEXT: ret
812
+ ;
813
+ ; RV64I-LABEL: rotr_64_mask_multiple:
814
+ ; RV64I: # %bb.0:
815
+ ; RV64I-NEXT: srl a3, a0, a2
816
+ ; RV64I-NEXT: neg a4, a2
817
+ ; RV64I-NEXT: sll a0, a0, a4
818
+ ; RV64I-NEXT: or a0, a3, a0
819
+ ; RV64I-NEXT: srl a2, a1, a2
820
+ ; RV64I-NEXT: sll a1, a1, a4
821
+ ; RV64I-NEXT: or a1, a2, a1
822
+ ; RV64I-NEXT: add a0, a0, a1
823
+ ; RV64I-NEXT: ret
824
+ ;
825
+ ; RV32ZBB-LABEL: rotr_64_mask_multiple:
826
+ ; RV32ZBB: # %bb.0:
827
+ ; RV32ZBB-NEXT: andi a5, a4, 32
828
+ ; RV32ZBB-NEXT: mv a6, a0
829
+ ; RV32ZBB-NEXT: beqz a5, .LBB11_2
830
+ ; RV32ZBB-NEXT: # %bb.1:
831
+ ; RV32ZBB-NEXT: mv a6, a1
832
+ ; RV32ZBB-NEXT: .LBB11_2:
833
+ ; RV32ZBB-NEXT: beqz a5, .LBB11_4
834
+ ; RV32ZBB-NEXT: # %bb.3:
835
+ ; RV32ZBB-NEXT: mv a1, a0
836
+ ; RV32ZBB-NEXT: .LBB11_4:
837
+ ; RV32ZBB-NEXT: srl a7, a6, a4
838
+ ; RV32ZBB-NEXT: slli t0, a1, 1
839
+ ; RV32ZBB-NEXT: not a0, a4
840
+ ; RV32ZBB-NEXT: sll t0, t0, a0
841
+ ; RV32ZBB-NEXT: srl t1, a1, a4
842
+ ; RV32ZBB-NEXT: slli a1, a6, 1
843
+ ; RV32ZBB-NEXT: sll t2, a1, a0
844
+ ; RV32ZBB-NEXT: mv a6, a2
845
+ ; RV32ZBB-NEXT: beqz a5, .LBB11_6
846
+ ; RV32ZBB-NEXT: # %bb.5:
847
+ ; RV32ZBB-NEXT: mv a6, a3
848
+ ; RV32ZBB-NEXT: .LBB11_6:
849
+ ; RV32ZBB-NEXT: or a1, t0, a7
850
+ ; RV32ZBB-NEXT: or a7, t2, t1
851
+ ; RV32ZBB-NEXT: srl t0, a6, a4
852
+ ; RV32ZBB-NEXT: beqz a5, .LBB11_8
853
+ ; RV32ZBB-NEXT: # %bb.7:
854
+ ; RV32ZBB-NEXT: mv a3, a2
855
+ ; RV32ZBB-NEXT: .LBB11_8:
856
+ ; RV32ZBB-NEXT: slli a2, a3, 1
857
+ ; RV32ZBB-NEXT: sll a2, a2, a0
858
+ ; RV32ZBB-NEXT: or a2, a2, t0
859
+ ; RV32ZBB-NEXT: srl a3, a3, a4
860
+ ; RV32ZBB-NEXT: slli a4, a6, 1
861
+ ; RV32ZBB-NEXT: sll a0, a4, a0
862
+ ; RV32ZBB-NEXT: or a0, a0, a3
863
+ ; RV32ZBB-NEXT: add a3, a7, a0
864
+ ; RV32ZBB-NEXT: add a0, a1, a2
865
+ ; RV32ZBB-NEXT: sltu a1, a0, a1
866
+ ; RV32ZBB-NEXT: add a1, a3, a1
867
+ ; RV32ZBB-NEXT: ret
868
+ ;
869
+ ; RV64ZBB-LABEL: rotr_64_mask_multiple:
870
+ ; RV64ZBB: # %bb.0:
871
+ ; RV64ZBB-NEXT: andi a2, a2, 63
872
+ ; RV64ZBB-NEXT: ror a0, a0, a2
873
+ ; RV64ZBB-NEXT: ror a1, a1, a2
874
+ ; RV64ZBB-NEXT: add a0, a0, a1
875
+ ; RV64ZBB-NEXT: ret
876
+ %maskedamt = and i64 %amt , 63
877
+ %1 = tail call i64 @llvm.fshr.i64 (i64 %a , i64 %a , i64 %maskedamt )
878
+ %2 = tail call i64 @llvm.fshr.i64 (i64 %b , i64 %b , i64 %maskedamt )
879
+ %3 = add i64 %1 , %2
880
+ ret i64 %3
881
+ }
882
+ declare i64 @llvm.fshr.i64 (i64 , i64 , i64 )
0 commit comments