Skip to content

Commit cddeb78

Browse files
committed
[RISCV] Add test cases showing failure to remove mask on rotate amounts.
This is similar to tests I added in e2f410f that had to be reverted. I've modified them to avoid the bug that is being fixed by D126036.
1 parent 1b976f2 commit cddeb78

File tree

1 file changed

+310
-0
lines changed

1 file changed

+310
-0
lines changed

llvm/test/CodeGen/RISCV/rotl-rotr.ll

Lines changed: 310 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,3 +550,313 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
550550
%d = or i64 %b, %c
551551
ret i64 %d
552552
}
553+
554+
; Test that we're able to remove a mask on the rotate amount that has more than
555+
; one use.
556+
define signext i32 @rotl_32_mask_shared(i32 signext %a, i32 signext %b, i32 signext %amt) nounwind {
557+
; RV32I-LABEL: rotl_32_mask_shared:
558+
; RV32I: # %bb.0:
559+
; RV32I-NEXT: sll a3, a0, a2
560+
; RV32I-NEXT: neg a4, a2
561+
; RV32I-NEXT: srl a0, a0, a4
562+
; RV32I-NEXT: or a0, a3, a0
563+
; RV32I-NEXT: sll a1, a1, a2
564+
; RV32I-NEXT: add a0, a0, a1
565+
; RV32I-NEXT: ret
566+
;
567+
; RV64I-LABEL: rotl_32_mask_shared:
568+
; RV64I: # %bb.0:
569+
; RV64I-NEXT: sllw a3, a0, a2
570+
; RV64I-NEXT: negw a4, a2
571+
; RV64I-NEXT: srlw a0, a0, a4
572+
; RV64I-NEXT: or a0, a3, a0
573+
; RV64I-NEXT: sllw a1, a1, a2
574+
; RV64I-NEXT: addw a0, a0, a1
575+
; RV64I-NEXT: ret
576+
;
577+
; RV32ZBB-LABEL: rotl_32_mask_shared:
578+
; RV32ZBB: # %bb.0:
579+
; RV32ZBB-NEXT: andi a3, a2, 31
580+
; RV32ZBB-NEXT: rol a0, a0, a3
581+
; RV32ZBB-NEXT: sll a1, a1, a2
582+
; RV32ZBB-NEXT: add a0, a0, a1
583+
; RV32ZBB-NEXT: ret
584+
;
585+
; RV64ZBB-LABEL: rotl_32_mask_shared:
586+
; RV64ZBB: # %bb.0:
587+
; RV64ZBB-NEXT: rolw a0, a0, a2
588+
; RV64ZBB-NEXT: sllw a1, a1, a2
589+
; RV64ZBB-NEXT: addw a0, a0, a1
590+
; RV64ZBB-NEXT: ret
591+
%maskedamt = and i32 %amt, 31
592+
%1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %maskedamt)
593+
%2 = shl i32 %b, %maskedamt
594+
%3 = add i32 %1, %2
595+
ret i32 %3
596+
}
597+
declare i32 @llvm.fshl.i32(i32, i32, i32)
598+
599+
define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 signext %amt) nounwind {
600+
; RV32I-LABEL: rotl_64_mask_shared:
601+
; RV32I: # %bb.0:
602+
; RV32I-NEXT: slli a5, a4, 26
603+
; RV32I-NEXT: srli a5, a5, 31
604+
; RV32I-NEXT: mv a7, a0
605+
; RV32I-NEXT: bnez a5, .LBB9_2
606+
; RV32I-NEXT: # %bb.1:
607+
; RV32I-NEXT: mv a7, a1
608+
; RV32I-NEXT: .LBB9_2:
609+
; RV32I-NEXT: andi a6, a4, 63
610+
; RV32I-NEXT: sll t0, a7, a4
611+
; RV32I-NEXT: bnez a5, .LBB9_4
612+
; RV32I-NEXT: # %bb.3:
613+
; RV32I-NEXT: mv a1, a0
614+
; RV32I-NEXT: .LBB9_4:
615+
; RV32I-NEXT: srli a0, a1, 1
616+
; RV32I-NEXT: not t1, a4
617+
; RV32I-NEXT: srl a0, a0, t1
618+
; RV32I-NEXT: or a5, t0, a0
619+
; RV32I-NEXT: sll a1, a1, a4
620+
; RV32I-NEXT: srli a0, a7, 1
621+
; RV32I-NEXT: srl a7, a0, t1
622+
; RV32I-NEXT: addi a0, a6, -32
623+
; RV32I-NEXT: or a1, a1, a7
624+
; RV32I-NEXT: bltz a0, .LBB9_6
625+
; RV32I-NEXT: # %bb.5:
626+
; RV32I-NEXT: sll a3, a2, a0
627+
; RV32I-NEXT: mv a0, a1
628+
; RV32I-NEXT: j .LBB9_7
629+
; RV32I-NEXT: .LBB9_6:
630+
; RV32I-NEXT: sll a0, a3, a4
631+
; RV32I-NEXT: srli a3, a2, 1
632+
; RV32I-NEXT: xori a6, a6, 31
633+
; RV32I-NEXT: srl a3, a3, a6
634+
; RV32I-NEXT: or a3, a0, a3
635+
; RV32I-NEXT: sll a0, a2, a4
636+
; RV32I-NEXT: add a0, a1, a0
637+
; RV32I-NEXT: .LBB9_7:
638+
; RV32I-NEXT: sltu a1, a0, a1
639+
; RV32I-NEXT: add a2, a5, a3
640+
; RV32I-NEXT: add a1, a2, a1
641+
; RV32I-NEXT: ret
642+
;
643+
; RV64I-LABEL: rotl_64_mask_shared:
644+
; RV64I: # %bb.0:
645+
; RV64I-NEXT: sll a3, a0, a2
646+
; RV64I-NEXT: neg a4, a2
647+
; RV64I-NEXT: srl a0, a0, a4
648+
; RV64I-NEXT: or a0, a3, a0
649+
; RV64I-NEXT: sll a1, a1, a2
650+
; RV64I-NEXT: add a0, a0, a1
651+
; RV64I-NEXT: ret
652+
;
653+
; RV32ZBB-LABEL: rotl_64_mask_shared:
654+
; RV32ZBB: # %bb.0:
655+
; RV32ZBB-NEXT: slli a5, a4, 26
656+
; RV32ZBB-NEXT: srli a5, a5, 31
657+
; RV32ZBB-NEXT: mv a7, a0
658+
; RV32ZBB-NEXT: bnez a5, .LBB9_2
659+
; RV32ZBB-NEXT: # %bb.1:
660+
; RV32ZBB-NEXT: mv a7, a1
661+
; RV32ZBB-NEXT: .LBB9_2:
662+
; RV32ZBB-NEXT: andi a6, a4, 63
663+
; RV32ZBB-NEXT: sll t0, a7, a4
664+
; RV32ZBB-NEXT: bnez a5, .LBB9_4
665+
; RV32ZBB-NEXT: # %bb.3:
666+
; RV32ZBB-NEXT: mv a1, a0
667+
; RV32ZBB-NEXT: .LBB9_4:
668+
; RV32ZBB-NEXT: srli a0, a1, 1
669+
; RV32ZBB-NEXT: not t1, a4
670+
; RV32ZBB-NEXT: srl a0, a0, t1
671+
; RV32ZBB-NEXT: or a5, t0, a0
672+
; RV32ZBB-NEXT: sll a1, a1, a4
673+
; RV32ZBB-NEXT: srli a0, a7, 1
674+
; RV32ZBB-NEXT: srl a7, a0, t1
675+
; RV32ZBB-NEXT: addi a0, a6, -32
676+
; RV32ZBB-NEXT: or a1, a1, a7
677+
; RV32ZBB-NEXT: bltz a0, .LBB9_6
678+
; RV32ZBB-NEXT: # %bb.5:
679+
; RV32ZBB-NEXT: sll a3, a2, a0
680+
; RV32ZBB-NEXT: mv a0, a1
681+
; RV32ZBB-NEXT: j .LBB9_7
682+
; RV32ZBB-NEXT: .LBB9_6:
683+
; RV32ZBB-NEXT: sll a0, a3, a4
684+
; RV32ZBB-NEXT: srli a3, a2, 1
685+
; RV32ZBB-NEXT: xori a6, a6, 31
686+
; RV32ZBB-NEXT: srl a3, a3, a6
687+
; RV32ZBB-NEXT: or a3, a0, a3
688+
; RV32ZBB-NEXT: sll a0, a2, a4
689+
; RV32ZBB-NEXT: add a0, a1, a0
690+
; RV32ZBB-NEXT: .LBB9_7:
691+
; RV32ZBB-NEXT: sltu a1, a0, a1
692+
; RV32ZBB-NEXT: add a2, a5, a3
693+
; RV32ZBB-NEXT: add a1, a2, a1
694+
; RV32ZBB-NEXT: ret
695+
;
696+
; RV64ZBB-LABEL: rotl_64_mask_shared:
697+
; RV64ZBB: # %bb.0:
698+
; RV64ZBB-NEXT: andi a3, a2, 63
699+
; RV64ZBB-NEXT: rol a0, a0, a3
700+
; RV64ZBB-NEXT: sll a1, a1, a2
701+
; RV64ZBB-NEXT: add a0, a0, a1
702+
; RV64ZBB-NEXT: ret
703+
%maskedamt = and i64 %amt, 63
704+
%1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %maskedamt)
705+
%2 = shl i64 %b, %maskedamt
706+
%3 = add i64 %1, %2
707+
ret i64 %3
708+
}
709+
declare i64 @llvm.fshl.i64(i64, i64, i64)
710+
711+
define signext i32 @rotr_32_mask_shared(i32 signext %a, i32 signext %b, i32 signext %amt) nounwind {
712+
; RV32I-LABEL: rotr_32_mask_shared:
713+
; RV32I: # %bb.0:
714+
; RV32I-NEXT: srl a3, a0, a2
715+
; RV32I-NEXT: neg a4, a2
716+
; RV32I-NEXT: sll a0, a0, a4
717+
; RV32I-NEXT: or a0, a3, a0
718+
; RV32I-NEXT: sll a1, a1, a2
719+
; RV32I-NEXT: add a0, a0, a1
720+
; RV32I-NEXT: ret
721+
;
722+
; RV64I-LABEL: rotr_32_mask_shared:
723+
; RV64I: # %bb.0:
724+
; RV64I-NEXT: srlw a3, a0, a2
725+
; RV64I-NEXT: negw a4, a2
726+
; RV64I-NEXT: sllw a0, a0, a4
727+
; RV64I-NEXT: or a0, a3, a0
728+
; RV64I-NEXT: sllw a1, a1, a2
729+
; RV64I-NEXT: addw a0, a0, a1
730+
; RV64I-NEXT: ret
731+
;
732+
; RV32ZBB-LABEL: rotr_32_mask_shared:
733+
; RV32ZBB: # %bb.0:
734+
; RV32ZBB-NEXT: andi a3, a2, 31
735+
; RV32ZBB-NEXT: ror a0, a0, a3
736+
; RV32ZBB-NEXT: sll a1, a1, a2
737+
; RV32ZBB-NEXT: add a0, a0, a1
738+
; RV32ZBB-NEXT: ret
739+
;
740+
; RV64ZBB-LABEL: rotr_32_mask_shared:
741+
; RV64ZBB: # %bb.0:
742+
; RV64ZBB-NEXT: rorw a0, a0, a2
743+
; RV64ZBB-NEXT: sllw a1, a1, a2
744+
; RV64ZBB-NEXT: addw a0, a0, a1
745+
; RV64ZBB-NEXT: ret
746+
%maskedamt = and i32 %amt, 31
747+
%1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %maskedamt)
748+
%2 = shl i32 %b, %maskedamt
749+
%3 = add i32 %1, %2
750+
ret i32 %3
751+
}
752+
declare i32 @llvm.fshr.i32(i32, i32, i32)
753+
754+
define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 signext %amt) nounwind {
755+
; RV32I-LABEL: rotr_64_mask_shared:
756+
; RV32I: # %bb.0:
757+
; RV32I-NEXT: andi a7, a4, 32
758+
; RV32I-NEXT: mv a6, a1
759+
; RV32I-NEXT: beqz a7, .LBB11_2
760+
; RV32I-NEXT: # %bb.1:
761+
; RV32I-NEXT: mv a6, a0
762+
; RV32I-NEXT: .LBB11_2:
763+
; RV32I-NEXT: andi a5, a4, 63
764+
; RV32I-NEXT: srl t0, a6, a4
765+
; RV32I-NEXT: beqz a7, .LBB11_4
766+
; RV32I-NEXT: # %bb.3:
767+
; RV32I-NEXT: mv a0, a1
768+
; RV32I-NEXT: .LBB11_4:
769+
; RV32I-NEXT: slli a1, a0, 1
770+
; RV32I-NEXT: not a7, a4
771+
; RV32I-NEXT: sll a1, a1, a7
772+
; RV32I-NEXT: or a1, a1, t0
773+
; RV32I-NEXT: srl t0, a0, a4
774+
; RV32I-NEXT: slli a0, a6, 1
775+
; RV32I-NEXT: sll a6, a0, a7
776+
; RV32I-NEXT: addi a0, a5, -32
777+
; RV32I-NEXT: or a6, a6, t0
778+
; RV32I-NEXT: bltz a0, .LBB11_6
779+
; RV32I-NEXT: # %bb.5:
780+
; RV32I-NEXT: sll a3, a2, a0
781+
; RV32I-NEXT: mv a0, a6
782+
; RV32I-NEXT: j .LBB11_7
783+
; RV32I-NEXT: .LBB11_6:
784+
; RV32I-NEXT: sll a0, a3, a4
785+
; RV32I-NEXT: srli a3, a2, 1
786+
; RV32I-NEXT: xori a5, a5, 31
787+
; RV32I-NEXT: srl a3, a3, a5
788+
; RV32I-NEXT: or a3, a0, a3
789+
; RV32I-NEXT: sll a0, a2, a4
790+
; RV32I-NEXT: add a0, a6, a0
791+
; RV32I-NEXT: .LBB11_7:
792+
; RV32I-NEXT: sltu a2, a0, a6
793+
; RV32I-NEXT: add a1, a1, a3
794+
; RV32I-NEXT: add a1, a1, a2
795+
; RV32I-NEXT: ret
796+
;
797+
; RV64I-LABEL: rotr_64_mask_shared:
798+
; RV64I: # %bb.0:
799+
; RV64I-NEXT: srl a3, a0, a2
800+
; RV64I-NEXT: neg a4, a2
801+
; RV64I-NEXT: sll a0, a0, a4
802+
; RV64I-NEXT: or a0, a3, a0
803+
; RV64I-NEXT: sll a1, a1, a2
804+
; RV64I-NEXT: add a0, a0, a1
805+
; RV64I-NEXT: ret
806+
;
807+
; RV32ZBB-LABEL: rotr_64_mask_shared:
808+
; RV32ZBB: # %bb.0:
809+
; RV32ZBB-NEXT: andi a7, a4, 32
810+
; RV32ZBB-NEXT: mv a6, a1
811+
; RV32ZBB-NEXT: beqz a7, .LBB11_2
812+
; RV32ZBB-NEXT: # %bb.1:
813+
; RV32ZBB-NEXT: mv a6, a0
814+
; RV32ZBB-NEXT: .LBB11_2:
815+
; RV32ZBB-NEXT: andi a5, a4, 63
816+
; RV32ZBB-NEXT: srl t0, a6, a4
817+
; RV32ZBB-NEXT: beqz a7, .LBB11_4
818+
; RV32ZBB-NEXT: # %bb.3:
819+
; RV32ZBB-NEXT: mv a0, a1
820+
; RV32ZBB-NEXT: .LBB11_4:
821+
; RV32ZBB-NEXT: slli a1, a0, 1
822+
; RV32ZBB-NEXT: not a7, a4
823+
; RV32ZBB-NEXT: sll a1, a1, a7
824+
; RV32ZBB-NEXT: or a1, a1, t0
825+
; RV32ZBB-NEXT: srl t0, a0, a4
826+
; RV32ZBB-NEXT: slli a0, a6, 1
827+
; RV32ZBB-NEXT: sll a6, a0, a7
828+
; RV32ZBB-NEXT: addi a0, a5, -32
829+
; RV32ZBB-NEXT: or a6, a6, t0
830+
; RV32ZBB-NEXT: bltz a0, .LBB11_6
831+
; RV32ZBB-NEXT: # %bb.5:
832+
; RV32ZBB-NEXT: sll a3, a2, a0
833+
; RV32ZBB-NEXT: mv a0, a6
834+
; RV32ZBB-NEXT: j .LBB11_7
835+
; RV32ZBB-NEXT: .LBB11_6:
836+
; RV32ZBB-NEXT: sll a0, a3, a4
837+
; RV32ZBB-NEXT: srli a3, a2, 1
838+
; RV32ZBB-NEXT: xori a5, a5, 31
839+
; RV32ZBB-NEXT: srl a3, a3, a5
840+
; RV32ZBB-NEXT: or a3, a0, a3
841+
; RV32ZBB-NEXT: sll a0, a2, a4
842+
; RV32ZBB-NEXT: add a0, a6, a0
843+
; RV32ZBB-NEXT: .LBB11_7:
844+
; RV32ZBB-NEXT: sltu a2, a0, a6
845+
; RV32ZBB-NEXT: add a1, a1, a3
846+
; RV32ZBB-NEXT: add a1, a1, a2
847+
; RV32ZBB-NEXT: ret
848+
;
849+
; RV64ZBB-LABEL: rotr_64_mask_shared:
850+
; RV64ZBB: # %bb.0:
851+
; RV64ZBB-NEXT: andi a3, a2, 63
852+
; RV64ZBB-NEXT: ror a0, a0, a3
853+
; RV64ZBB-NEXT: sll a1, a1, a2
854+
; RV64ZBB-NEXT: add a0, a0, a1
855+
; RV64ZBB-NEXT: ret
856+
%maskedamt = and i64 %amt, 63
857+
%1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %maskedamt)
858+
%2 = shl i64 %b, %maskedamt
859+
%3 = add i64 %1, %2
860+
ret i64 %3
861+
}
862+
declare i64 @llvm.fshr.i64(i64, i64, i64)

0 commit comments

Comments
 (0)