Skip to content

Commit e2f410f

Browse files
committed
[RISCV] Add test cases showing failure to remove mask on rotate amounts.
If the masking AND has multiple users we fail to remove it.
1 parent 4957518 commit e2f410f

File tree

1 file changed

+330
-0
lines changed

1 file changed

+330
-0
lines changed

llvm/test/CodeGen/RISCV/rotl-rotr.ll

Lines changed: 330 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,3 +550,333 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
550550
%d = or i64 %b, %c
551551
ret i64 %d
552552
}
553+
554+
define signext i32 @rotl_32_mask_multiple(i32 signext %a, i32 signext %b, i32 signext %amt) nounwind {
555+
; RV32I-LABEL: rotl_32_mask_multiple:
556+
; RV32I: # %bb.0:
557+
; RV32I-NEXT: sll a3, a0, a2
558+
; RV32I-NEXT: neg a4, a2
559+
; RV32I-NEXT: srl a0, a0, a4
560+
; RV32I-NEXT: or a0, a3, a0
561+
; RV32I-NEXT: sll a2, a1, a2
562+
; RV32I-NEXT: srl a1, a1, a4
563+
; RV32I-NEXT: or a1, a2, a1
564+
; RV32I-NEXT: add a0, a0, a1
565+
; RV32I-NEXT: ret
566+
;
567+
; RV64I-LABEL: rotl_32_mask_multiple:
568+
; RV64I: # %bb.0:
569+
; RV64I-NEXT: sllw a3, a0, a2
570+
; RV64I-NEXT: negw a4, a2
571+
; RV64I-NEXT: srlw a0, a0, a4
572+
; RV64I-NEXT: or a0, a3, a0
573+
; RV64I-NEXT: sllw a2, a1, a2
574+
; RV64I-NEXT: srlw a1, a1, a4
575+
; RV64I-NEXT: or a1, a2, a1
576+
; RV64I-NEXT: addw a0, a0, a1
577+
; RV64I-NEXT: ret
578+
;
579+
; RV32ZBB-LABEL: rotl_32_mask_multiple:
580+
; RV32ZBB: # %bb.0:
581+
; RV32ZBB-NEXT: andi a2, a2, 31
582+
; RV32ZBB-NEXT: rol a0, a0, a2
583+
; RV32ZBB-NEXT: rol a1, a1, a2
584+
; RV32ZBB-NEXT: add a0, a0, a1
585+
; RV32ZBB-NEXT: ret
586+
;
587+
; RV64ZBB-LABEL: rotl_32_mask_multiple:
588+
; RV64ZBB: # %bb.0:
589+
; RV64ZBB-NEXT: andi a2, a2, 31
590+
; RV64ZBB-NEXT: rolw a0, a0, a2
591+
; RV64ZBB-NEXT: rolw a1, a1, a2
592+
; RV64ZBB-NEXT: addw a0, a0, a1
593+
; RV64ZBB-NEXT: ret
594+
%maskedamt = and i32 %amt, 31
595+
%1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %maskedamt)
596+
%2 = tail call i32 @llvm.fshl.i32(i32 %b, i32 %b, i32 %maskedamt)
597+
%3 = add i32 %1, %2
598+
ret i32 %3
599+
}
600+
declare i32 @llvm.fshl.i32(i32, i32, i32)
601+
602+
define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
603+
; RV32I-LABEL: rotl_64_mask_multiple:
604+
; RV32I: # %bb.0:
605+
; RV32I-NEXT: slli a5, a4, 26
606+
; RV32I-NEXT: srli a5, a5, 31
607+
; RV32I-NEXT: mv a6, a1
608+
; RV32I-NEXT: bnez a5, .LBB9_2
609+
; RV32I-NEXT: # %bb.1:
610+
; RV32I-NEXT: mv a6, a0
611+
; RV32I-NEXT: .LBB9_2:
612+
; RV32I-NEXT: bnez a5, .LBB9_4
613+
; RV32I-NEXT: # %bb.3:
614+
; RV32I-NEXT: mv a0, a1
615+
; RV32I-NEXT: .LBB9_4:
616+
; RV32I-NEXT: sll a7, a6, a4
617+
; RV32I-NEXT: srli t0, a0, 1
618+
; RV32I-NEXT: not a1, a4
619+
; RV32I-NEXT: srl t0, t0, a1
620+
; RV32I-NEXT: sll t1, a0, a4
621+
; RV32I-NEXT: srli a0, a6, 1
622+
; RV32I-NEXT: srl t2, a0, a1
623+
; RV32I-NEXT: mv a0, a3
624+
; RV32I-NEXT: bnez a5, .LBB9_6
625+
; RV32I-NEXT: # %bb.5:
626+
; RV32I-NEXT: mv a0, a2
627+
; RV32I-NEXT: .LBB9_6:
628+
; RV32I-NEXT: or a6, a7, t0
629+
; RV32I-NEXT: or a7, t1, t2
630+
; RV32I-NEXT: sll t0, a0, a4
631+
; RV32I-NEXT: bnez a5, .LBB9_8
632+
; RV32I-NEXT: # %bb.7:
633+
; RV32I-NEXT: mv a2, a3
634+
; RV32I-NEXT: .LBB9_8:
635+
; RV32I-NEXT: srli a3, a2, 1
636+
; RV32I-NEXT: srl a3, a3, a1
637+
; RV32I-NEXT: or a3, t0, a3
638+
; RV32I-NEXT: sll a2, a2, a4
639+
; RV32I-NEXT: srli a0, a0, 1
640+
; RV32I-NEXT: srl a0, a0, a1
641+
; RV32I-NEXT: or a0, a2, a0
642+
; RV32I-NEXT: add a1, a7, a0
643+
; RV32I-NEXT: add a0, a6, a3
644+
; RV32I-NEXT: sltu a2, a0, a6
645+
; RV32I-NEXT: add a1, a1, a2
646+
; RV32I-NEXT: ret
647+
;
648+
; RV64I-LABEL: rotl_64_mask_multiple:
649+
; RV64I: # %bb.0:
650+
; RV64I-NEXT: sll a3, a0, a2
651+
; RV64I-NEXT: neg a4, a2
652+
; RV64I-NEXT: srl a0, a0, a4
653+
; RV64I-NEXT: or a0, a3, a0
654+
; RV64I-NEXT: sll a2, a1, a2
655+
; RV64I-NEXT: srl a1, a1, a4
656+
; RV64I-NEXT: or a1, a2, a1
657+
; RV64I-NEXT: add a0, a0, a1
658+
; RV64I-NEXT: ret
659+
;
660+
; RV32ZBB-LABEL: rotl_64_mask_multiple:
661+
; RV32ZBB: # %bb.0:
662+
; RV32ZBB-NEXT: slli a5, a4, 26
663+
; RV32ZBB-NEXT: srli a5, a5, 31
664+
; RV32ZBB-NEXT: mv a6, a1
665+
; RV32ZBB-NEXT: bnez a5, .LBB9_2
666+
; RV32ZBB-NEXT: # %bb.1:
667+
; RV32ZBB-NEXT: mv a6, a0
668+
; RV32ZBB-NEXT: .LBB9_2:
669+
; RV32ZBB-NEXT: bnez a5, .LBB9_4
670+
; RV32ZBB-NEXT: # %bb.3:
671+
; RV32ZBB-NEXT: mv a0, a1
672+
; RV32ZBB-NEXT: .LBB9_4:
673+
; RV32ZBB-NEXT: sll a7, a6, a4
674+
; RV32ZBB-NEXT: srli t0, a0, 1
675+
; RV32ZBB-NEXT: not a1, a4
676+
; RV32ZBB-NEXT: srl t0, t0, a1
677+
; RV32ZBB-NEXT: sll t1, a0, a4
678+
; RV32ZBB-NEXT: srli a0, a6, 1
679+
; RV32ZBB-NEXT: srl t2, a0, a1
680+
; RV32ZBB-NEXT: mv a0, a3
681+
; RV32ZBB-NEXT: bnez a5, .LBB9_6
682+
; RV32ZBB-NEXT: # %bb.5:
683+
; RV32ZBB-NEXT: mv a0, a2
684+
; RV32ZBB-NEXT: .LBB9_6:
685+
; RV32ZBB-NEXT: or a6, a7, t0
686+
; RV32ZBB-NEXT: or a7, t1, t2
687+
; RV32ZBB-NEXT: sll t0, a0, a4
688+
; RV32ZBB-NEXT: bnez a5, .LBB9_8
689+
; RV32ZBB-NEXT: # %bb.7:
690+
; RV32ZBB-NEXT: mv a2, a3
691+
; RV32ZBB-NEXT: .LBB9_8:
692+
; RV32ZBB-NEXT: srli a3, a2, 1
693+
; RV32ZBB-NEXT: srl a3, a3, a1
694+
; RV32ZBB-NEXT: or a3, t0, a3
695+
; RV32ZBB-NEXT: sll a2, a2, a4
696+
; RV32ZBB-NEXT: srli a0, a0, 1
697+
; RV32ZBB-NEXT: srl a0, a0, a1
698+
; RV32ZBB-NEXT: or a0, a2, a0
699+
; RV32ZBB-NEXT: add a1, a7, a0
700+
; RV32ZBB-NEXT: add a0, a6, a3
701+
; RV32ZBB-NEXT: sltu a2, a0, a6
702+
; RV32ZBB-NEXT: add a1, a1, a2
703+
; RV32ZBB-NEXT: ret
704+
;
705+
; RV64ZBB-LABEL: rotl_64_mask_multiple:
706+
; RV64ZBB: # %bb.0:
707+
; RV64ZBB-NEXT: andi a2, a2, 63
708+
; RV64ZBB-NEXT: rol a0, a0, a2
709+
; RV64ZBB-NEXT: rol a1, a1, a2
710+
; RV64ZBB-NEXT: add a0, a0, a1
711+
; RV64ZBB-NEXT: ret
712+
%maskedamt = and i64 %amt, 63
713+
%1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %maskedamt)
714+
%2 = tail call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 %maskedamt)
715+
%3 = add i64 %1, %2
716+
ret i64 %3
717+
}
718+
declare i64 @llvm.fshl.i64(i64, i64, i64)
719+
720+
define signext i32 @rotr_32_mask_multiple(i32 signext %a, i32 signext %b, i32 signext %amt) nounwind {
721+
; RV32I-LABEL: rotr_32_mask_multiple:
722+
; RV32I: # %bb.0:
723+
; RV32I-NEXT: srl a3, a0, a2
724+
; RV32I-NEXT: neg a4, a2
725+
; RV32I-NEXT: sll a0, a0, a4
726+
; RV32I-NEXT: or a0, a3, a0
727+
; RV32I-NEXT: srl a2, a1, a2
728+
; RV32I-NEXT: sll a1, a1, a4
729+
; RV32I-NEXT: or a1, a2, a1
730+
; RV32I-NEXT: add a0, a0, a1
731+
; RV32I-NEXT: ret
732+
;
733+
; RV64I-LABEL: rotr_32_mask_multiple:
734+
; RV64I: # %bb.0:
735+
; RV64I-NEXT: srlw a3, a0, a2
736+
; RV64I-NEXT: negw a4, a2
737+
; RV64I-NEXT: sllw a0, a0, a4
738+
; RV64I-NEXT: or a0, a3, a0
739+
; RV64I-NEXT: srlw a2, a1, a2
740+
; RV64I-NEXT: sllw a1, a1, a4
741+
; RV64I-NEXT: or a1, a2, a1
742+
; RV64I-NEXT: addw a0, a0, a1
743+
; RV64I-NEXT: ret
744+
;
745+
; RV32ZBB-LABEL: rotr_32_mask_multiple:
746+
; RV32ZBB: # %bb.0:
747+
; RV32ZBB-NEXT: andi a2, a2, 31
748+
; RV32ZBB-NEXT: ror a0, a0, a2
749+
; RV32ZBB-NEXT: ror a1, a1, a2
750+
; RV32ZBB-NEXT: add a0, a0, a1
751+
; RV32ZBB-NEXT: ret
752+
;
753+
; RV64ZBB-LABEL: rotr_32_mask_multiple:
754+
; RV64ZBB: # %bb.0:
755+
; RV64ZBB-NEXT: andi a2, a2, 31
756+
; RV64ZBB-NEXT: rorw a0, a0, a2
757+
; RV64ZBB-NEXT: rorw a1, a1, a2
758+
; RV64ZBB-NEXT: addw a0, a0, a1
759+
; RV64ZBB-NEXT: ret
760+
%maskedamt = and i32 %amt, 31
761+
%1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %maskedamt)
762+
%2 = tail call i32 @llvm.fshr.i32(i32 %b, i32 %b, i32 %maskedamt)
763+
%3 = add i32 %1, %2
764+
ret i32 %3
765+
}
766+
declare i32 @llvm.fshr.i32(i32, i32, i32)
767+
768+
define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
769+
; RV32I-LABEL: rotr_64_mask_multiple:
770+
; RV32I: # %bb.0:
771+
; RV32I-NEXT: andi a5, a4, 32
772+
; RV32I-NEXT: mv a6, a0
773+
; RV32I-NEXT: beqz a5, .LBB11_2
774+
; RV32I-NEXT: # %bb.1:
775+
; RV32I-NEXT: mv a6, a1
776+
; RV32I-NEXT: .LBB11_2:
777+
; RV32I-NEXT: beqz a5, .LBB11_4
778+
; RV32I-NEXT: # %bb.3:
779+
; RV32I-NEXT: mv a1, a0
780+
; RV32I-NEXT: .LBB11_4:
781+
; RV32I-NEXT: srl a7, a6, a4
782+
; RV32I-NEXT: slli t0, a1, 1
783+
; RV32I-NEXT: not a0, a4
784+
; RV32I-NEXT: sll t0, t0, a0
785+
; RV32I-NEXT: srl t1, a1, a4
786+
; RV32I-NEXT: slli a1, a6, 1
787+
; RV32I-NEXT: sll t2, a1, a0
788+
; RV32I-NEXT: mv a6, a2
789+
; RV32I-NEXT: beqz a5, .LBB11_6
790+
; RV32I-NEXT: # %bb.5:
791+
; RV32I-NEXT: mv a6, a3
792+
; RV32I-NEXT: .LBB11_6:
793+
; RV32I-NEXT: or a1, t0, a7
794+
; RV32I-NEXT: or a7, t2, t1
795+
; RV32I-NEXT: srl t0, a6, a4
796+
; RV32I-NEXT: beqz a5, .LBB11_8
797+
; RV32I-NEXT: # %bb.7:
798+
; RV32I-NEXT: mv a3, a2
799+
; RV32I-NEXT: .LBB11_8:
800+
; RV32I-NEXT: slli a2, a3, 1
801+
; RV32I-NEXT: sll a2, a2, a0
802+
; RV32I-NEXT: or a2, a2, t0
803+
; RV32I-NEXT: srl a3, a3, a4
804+
; RV32I-NEXT: slli a4, a6, 1
805+
; RV32I-NEXT: sll a0, a4, a0
806+
; RV32I-NEXT: or a0, a0, a3
807+
; RV32I-NEXT: add a3, a7, a0
808+
; RV32I-NEXT: add a0, a1, a2
809+
; RV32I-NEXT: sltu a1, a0, a1
810+
; RV32I-NEXT: add a1, a3, a1
811+
; RV32I-NEXT: ret
812+
;
813+
; RV64I-LABEL: rotr_64_mask_multiple:
814+
; RV64I: # %bb.0:
815+
; RV64I-NEXT: srl a3, a0, a2
816+
; RV64I-NEXT: neg a4, a2
817+
; RV64I-NEXT: sll a0, a0, a4
818+
; RV64I-NEXT: or a0, a3, a0
819+
; RV64I-NEXT: srl a2, a1, a2
820+
; RV64I-NEXT: sll a1, a1, a4
821+
; RV64I-NEXT: or a1, a2, a1
822+
; RV64I-NEXT: add a0, a0, a1
823+
; RV64I-NEXT: ret
824+
;
825+
; RV32ZBB-LABEL: rotr_64_mask_multiple:
826+
; RV32ZBB: # %bb.0:
827+
; RV32ZBB-NEXT: andi a5, a4, 32
828+
; RV32ZBB-NEXT: mv a6, a0
829+
; RV32ZBB-NEXT: beqz a5, .LBB11_2
830+
; RV32ZBB-NEXT: # %bb.1:
831+
; RV32ZBB-NEXT: mv a6, a1
832+
; RV32ZBB-NEXT: .LBB11_2:
833+
; RV32ZBB-NEXT: beqz a5, .LBB11_4
834+
; RV32ZBB-NEXT: # %bb.3:
835+
; RV32ZBB-NEXT: mv a1, a0
836+
; RV32ZBB-NEXT: .LBB11_4:
837+
; RV32ZBB-NEXT: srl a7, a6, a4
838+
; RV32ZBB-NEXT: slli t0, a1, 1
839+
; RV32ZBB-NEXT: not a0, a4
840+
; RV32ZBB-NEXT: sll t0, t0, a0
841+
; RV32ZBB-NEXT: srl t1, a1, a4
842+
; RV32ZBB-NEXT: slli a1, a6, 1
843+
; RV32ZBB-NEXT: sll t2, a1, a0
844+
; RV32ZBB-NEXT: mv a6, a2
845+
; RV32ZBB-NEXT: beqz a5, .LBB11_6
846+
; RV32ZBB-NEXT: # %bb.5:
847+
; RV32ZBB-NEXT: mv a6, a3
848+
; RV32ZBB-NEXT: .LBB11_6:
849+
; RV32ZBB-NEXT: or a1, t0, a7
850+
; RV32ZBB-NEXT: or a7, t2, t1
851+
; RV32ZBB-NEXT: srl t0, a6, a4
852+
; RV32ZBB-NEXT: beqz a5, .LBB11_8
853+
; RV32ZBB-NEXT: # %bb.7:
854+
; RV32ZBB-NEXT: mv a3, a2
855+
; RV32ZBB-NEXT: .LBB11_8:
856+
; RV32ZBB-NEXT: slli a2, a3, 1
857+
; RV32ZBB-NEXT: sll a2, a2, a0
858+
; RV32ZBB-NEXT: or a2, a2, t0
859+
; RV32ZBB-NEXT: srl a3, a3, a4
860+
; RV32ZBB-NEXT: slli a4, a6, 1
861+
; RV32ZBB-NEXT: sll a0, a4, a0
862+
; RV32ZBB-NEXT: or a0, a0, a3
863+
; RV32ZBB-NEXT: add a3, a7, a0
864+
; RV32ZBB-NEXT: add a0, a1, a2
865+
; RV32ZBB-NEXT: sltu a1, a0, a1
866+
; RV32ZBB-NEXT: add a1, a3, a1
867+
; RV32ZBB-NEXT: ret
868+
;
869+
; RV64ZBB-LABEL: rotr_64_mask_multiple:
870+
; RV64ZBB: # %bb.0:
871+
; RV64ZBB-NEXT: andi a2, a2, 63
872+
; RV64ZBB-NEXT: ror a0, a0, a2
873+
; RV64ZBB-NEXT: ror a1, a1, a2
874+
; RV64ZBB-NEXT: add a0, a0, a1
875+
; RV64ZBB-NEXT: ret
876+
%maskedamt = and i64 %amt, 63
877+
%1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %maskedamt)
878+
%2 = tail call i64 @llvm.fshr.i64(i64 %b, i64 %b, i64 %maskedamt)
879+
%3 = add i64 %1, %2
880+
ret i64 %3
881+
}
882+
declare i64 @llvm.fshr.i64(i64, i64, i64)

0 commit comments

Comments
 (0)