Skip to content

Commit b8c7cdc

Browse files
committed
[SelectionDAG][RISCV] Teach getNode to fold bswap(bswap(x))->x.
This can show up during when bitreverse is expanded to bswap and swap of bits within a byte. If the input is already a bswap, we should cancel them out before we further transform them in a way that makes it harder to see the redundancy. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D118007
1 parent cd2a9ff commit b8c7cdc

File tree

2 files changed

+53
-206
lines changed

2 files changed

+53
-206
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5119,6 +5119,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
51195119
"BSWAP types must be a multiple of 16 bits!");
51205120
if (OpOpcode == ISD::UNDEF)
51215121
return getUNDEF(VT);
5122+
// bswap(bswap(X)) -> X.
5123+
if (OpOpcode == ISD::BSWAP)
5124+
return Operand.getOperand(0);
51225125
break;
51235126
case ISD::BITREVERSE:
51245127
assert(VT.isInteger() && VT == Operand.getValueType() &&

llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll

Lines changed: 50 additions & 206 deletions
Original file line numberDiff line numberDiff line change
@@ -694,13 +694,6 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
694694
define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
695695
; RV32I-LABEL: test_bswap_bitreverse_i16:
696696
; RV32I: # %bb.0:
697-
; RV32I-NEXT: slli a1, a0, 8
698-
; RV32I-NEXT: slli a2, a0, 16
699-
; RV32I-NEXT: srli a2, a2, 24
700-
; RV32I-NEXT: or a1, a1, a2
701-
; RV32I-NEXT: slli a1, a1, 8
702-
; RV32I-NEXT: andi a0, a0, 255
703-
; RV32I-NEXT: or a0, a1, a0
704697
; RV32I-NEXT: srli a1, a0, 4
705698
; RV32I-NEXT: lui a2, 1
706699
; RV32I-NEXT: addi a2, a2, -241
@@ -726,13 +719,6 @@ define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
726719
;
727720
; RV64I-LABEL: test_bswap_bitreverse_i16:
728721
; RV64I: # %bb.0:
729-
; RV64I-NEXT: slli a1, a0, 8
730-
; RV64I-NEXT: slli a2, a0, 48
731-
; RV64I-NEXT: srli a2, a2, 56
732-
; RV64I-NEXT: or a1, a1, a2
733-
; RV64I-NEXT: slli a1, a1, 8
734-
; RV64I-NEXT: andi a0, a0, 255
735-
; RV64I-NEXT: or a0, a1, a0
736722
; RV64I-NEXT: srli a1, a0, 4
737723
; RV64I-NEXT: lui a2, 1
738724
; RV64I-NEXT: addiw a2, a2, -241
@@ -758,16 +744,13 @@ define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
758744
;
759745
; RV32ZBB-LABEL: test_bswap_bitreverse_i16:
760746
; RV32ZBB: # %bb.0:
761-
; RV32ZBB-NEXT: rev8 a0, a0
762-
; RV32ZBB-NEXT: srli a0, a0, 16
763-
; RV32ZBB-NEXT: rev8 a0, a0
764-
; RV32ZBB-NEXT: srli a1, a0, 12
765-
; RV32ZBB-NEXT: lui a2, 15
766-
; RV32ZBB-NEXT: addi a2, a2, 240
747+
; RV32ZBB-NEXT: srli a1, a0, 4
748+
; RV32ZBB-NEXT: lui a2, 1
749+
; RV32ZBB-NEXT: addi a2, a2, -241
767750
; RV32ZBB-NEXT: and a1, a1, a2
768-
; RV32ZBB-NEXT: srli a0, a0, 20
769-
; RV32ZBB-NEXT: andi a0, a0, -241
770-
; RV32ZBB-NEXT: or a0, a0, a1
751+
; RV32ZBB-NEXT: and a0, a0, a2
752+
; RV32ZBB-NEXT: slli a0, a0, 4
753+
; RV32ZBB-NEXT: or a0, a1, a0
771754
; RV32ZBB-NEXT: srli a1, a0, 2
772755
; RV32ZBB-NEXT: lui a2, 3
773756
; RV32ZBB-NEXT: addi a2, a2, 819
@@ -786,16 +769,13 @@ define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
786769
;
787770
; RV64ZBB-LABEL: test_bswap_bitreverse_i16:
788771
; RV64ZBB: # %bb.0:
789-
; RV64ZBB-NEXT: rev8 a0, a0
790-
; RV64ZBB-NEXT: srli a0, a0, 48
791-
; RV64ZBB-NEXT: rev8 a0, a0
792-
; RV64ZBB-NEXT: srli a1, a0, 44
793-
; RV64ZBB-NEXT: lui a2, 15
794-
; RV64ZBB-NEXT: addiw a2, a2, 240
772+
; RV64ZBB-NEXT: srli a1, a0, 4
773+
; RV64ZBB-NEXT: lui a2, 1
774+
; RV64ZBB-NEXT: addiw a2, a2, -241
795775
; RV64ZBB-NEXT: and a1, a1, a2
796-
; RV64ZBB-NEXT: srli a0, a0, 52
797-
; RV64ZBB-NEXT: andi a0, a0, -241
798-
; RV64ZBB-NEXT: or a0, a0, a1
776+
; RV64ZBB-NEXT: and a0, a0, a2
777+
; RV64ZBB-NEXT: slli a0, a0, 4
778+
; RV64ZBB-NEXT: or a0, a1, a0
799779
; RV64ZBB-NEXT: srli a1, a0, 2
800780
; RV64ZBB-NEXT: lui a2, 3
801781
; RV64ZBB-NEXT: addiw a2, a2, 819
@@ -819,27 +799,6 @@ define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
819799
define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind {
820800
; RV32I-LABEL: test_bswap_bitreverse_i32:
821801
; RV32I: # %bb.0:
822-
; RV32I-NEXT: srli a1, a0, 8
823-
; RV32I-NEXT: lui a2, 16
824-
; RV32I-NEXT: addi a2, a2, -256
825-
; RV32I-NEXT: and a1, a1, a2
826-
; RV32I-NEXT: srli a3, a0, 24
827-
; RV32I-NEXT: or a1, a1, a3
828-
; RV32I-NEXT: slli a3, a0, 8
829-
; RV32I-NEXT: lui a4, 4080
830-
; RV32I-NEXT: and a3, a3, a4
831-
; RV32I-NEXT: slli a0, a0, 24
832-
; RV32I-NEXT: or a0, a0, a3
833-
; RV32I-NEXT: or a0, a0, a1
834-
; RV32I-NEXT: srli a1, a0, 8
835-
; RV32I-NEXT: and a1, a1, a2
836-
; RV32I-NEXT: srli a2, a0, 24
837-
; RV32I-NEXT: or a1, a1, a2
838-
; RV32I-NEXT: slli a2, a0, 8
839-
; RV32I-NEXT: and a2, a2, a4
840-
; RV32I-NEXT: slli a0, a0, 24
841-
; RV32I-NEXT: or a0, a0, a2
842-
; RV32I-NEXT: or a0, a0, a1
843802
; RV32I-NEXT: srli a1, a0, 4
844803
; RV32I-NEXT: lui a2, 61681
845804
; RV32I-NEXT: addi a2, a2, -241
@@ -865,27 +824,6 @@ define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind {
865824
;
866825
; RV64I-LABEL: test_bswap_bitreverse_i32:
867826
; RV64I: # %bb.0:
868-
; RV64I-NEXT: srliw a1, a0, 8
869-
; RV64I-NEXT: lui a2, 16
870-
; RV64I-NEXT: addiw a2, a2, -256
871-
; RV64I-NEXT: and a1, a1, a2
872-
; RV64I-NEXT: srliw a3, a0, 24
873-
; RV64I-NEXT: or a1, a1, a3
874-
; RV64I-NEXT: slli a3, a0, 8
875-
; RV64I-NEXT: lui a4, 4080
876-
; RV64I-NEXT: and a3, a3, a4
877-
; RV64I-NEXT: slliw a0, a0, 24
878-
; RV64I-NEXT: or a0, a0, a3
879-
; RV64I-NEXT: or a0, a0, a1
880-
; RV64I-NEXT: srliw a1, a0, 8
881-
; RV64I-NEXT: and a1, a1, a2
882-
; RV64I-NEXT: srliw a2, a0, 24
883-
; RV64I-NEXT: or a1, a1, a2
884-
; RV64I-NEXT: slli a2, a0, 8
885-
; RV64I-NEXT: and a2, a2, a4
886-
; RV64I-NEXT: slliw a0, a0, 24
887-
; RV64I-NEXT: or a0, a0, a2
888-
; RV64I-NEXT: or a0, a0, a1
889827
; RV64I-NEXT: srli a1, a0, 4
890828
; RV64I-NEXT: lui a2, 61681
891829
; RV64I-NEXT: addiw a2, a2, -241
@@ -936,18 +874,12 @@ define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind {
936874
;
937875
; RV64ZBB-LABEL: test_bswap_bitreverse_i32:
938876
; RV64ZBB: # %bb.0:
939-
; RV64ZBB-NEXT: rev8 a0, a0
940-
; RV64ZBB-NEXT: srli a0, a0, 32
941-
; RV64ZBB-NEXT: rev8 a0, a0
942-
; RV64ZBB-NEXT: srli a1, a0, 36
877+
; RV64ZBB-NEXT: srli a1, a0, 4
943878
; RV64ZBB-NEXT: lui a2, 61681
944879
; RV64ZBB-NEXT: addiw a2, a2, -241
945880
; RV64ZBB-NEXT: and a1, a1, a2
946-
; RV64ZBB-NEXT: srli a0, a0, 28
947-
; RV64ZBB-NEXT: lui a2, 986895
948-
; RV64ZBB-NEXT: addiw a2, a2, 240
949881
; RV64ZBB-NEXT: and a0, a0, a2
950-
; RV64ZBB-NEXT: sext.w a0, a0
882+
; RV64ZBB-NEXT: slliw a0, a0, 4
951883
; RV64ZBB-NEXT: or a0, a1, a0
952884
; RV64ZBB-NEXT: srli a1, a0, 2
953885
; RV64ZBB-NEXT: lui a2, 209715
@@ -972,155 +904,67 @@ define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind {
972904
define i64 @test_bswap_bitreverse_i64(i64 %a) nounwind {
973905
; RV32I-LABEL: test_bswap_bitreverse_i64:
974906
; RV32I: # %bb.0:
975-
; RV32I-NEXT: srli a2, a1, 8
976-
; RV32I-NEXT: lui a3, 16
977-
; RV32I-NEXT: addi a3, a3, -256
978-
; RV32I-NEXT: and a2, a2, a3
979-
; RV32I-NEXT: srli a4, a1, 24
980-
; RV32I-NEXT: or a2, a2, a4
981-
; RV32I-NEXT: slli a4, a1, 8
982-
; RV32I-NEXT: lui a5, 4080
983-
; RV32I-NEXT: and a4, a4, a5
984-
; RV32I-NEXT: slli a1, a1, 24
985-
; RV32I-NEXT: or a1, a1, a4
986-
; RV32I-NEXT: or a1, a1, a2
987-
; RV32I-NEXT: srli a2, a0, 8
988-
; RV32I-NEXT: and a2, a2, a3
989-
; RV32I-NEXT: srli a4, a0, 24
990-
; RV32I-NEXT: or a2, a2, a4
991-
; RV32I-NEXT: slli a4, a0, 8
992-
; RV32I-NEXT: and a4, a4, a5
993-
; RV32I-NEXT: slli a0, a0, 24
994-
; RV32I-NEXT: or a0, a0, a4
995-
; RV32I-NEXT: or a0, a0, a2
996-
; RV32I-NEXT: srli a2, a0, 8
997-
; RV32I-NEXT: and a2, a2, a3
998-
; RV32I-NEXT: srli a4, a0, 24
999-
; RV32I-NEXT: or a2, a2, a4
1000-
; RV32I-NEXT: slli a4, a0, 8
1001-
; RV32I-NEXT: and a4, a4, a5
1002-
; RV32I-NEXT: slli a0, a0, 24
1003-
; RV32I-NEXT: or a0, a0, a4
1004-
; RV32I-NEXT: or a0, a0, a2
1005907
; RV32I-NEXT: srli a2, a0, 4
1006-
; RV32I-NEXT: lui a4, 61681
1007-
; RV32I-NEXT: addi a4, a4, -241
1008-
; RV32I-NEXT: and a2, a2, a4
1009-
; RV32I-NEXT: and a0, a0, a4
908+
; RV32I-NEXT: lui a3, 61681
909+
; RV32I-NEXT: addi a3, a3, -241
910+
; RV32I-NEXT: and a2, a2, a3
911+
; RV32I-NEXT: and a0, a0, a3
1010912
; RV32I-NEXT: slli a0, a0, 4
1011913
; RV32I-NEXT: or a0, a2, a0
1012914
; RV32I-NEXT: srli a2, a0, 2
1013-
; RV32I-NEXT: lui a6, 209715
1014-
; RV32I-NEXT: addi a6, a6, 819
1015-
; RV32I-NEXT: and a2, a2, a6
1016-
; RV32I-NEXT: and a0, a0, a6
915+
; RV32I-NEXT: lui a4, 209715
916+
; RV32I-NEXT: addi a4, a4, 819
917+
; RV32I-NEXT: and a2, a2, a4
918+
; RV32I-NEXT: and a0, a0, a4
1017919
; RV32I-NEXT: slli a0, a0, 2
1018920
; RV32I-NEXT: or a0, a2, a0
1019921
; RV32I-NEXT: srli a2, a0, 1
1020-
; RV32I-NEXT: lui a7, 349525
1021-
; RV32I-NEXT: addi a7, a7, 1365
1022-
; RV32I-NEXT: and a2, a2, a7
1023-
; RV32I-NEXT: and a0, a0, a7
922+
; RV32I-NEXT: lui a5, 349525
923+
; RV32I-NEXT: addi a5, a5, 1365
924+
; RV32I-NEXT: and a2, a2, a5
925+
; RV32I-NEXT: and a0, a0, a5
1024926
; RV32I-NEXT: slli a0, a0, 1
1025927
; RV32I-NEXT: or a0, a2, a0
1026-
; RV32I-NEXT: srli a2, a1, 8
1027-
; RV32I-NEXT: and a2, a2, a3
1028-
; RV32I-NEXT: srli a3, a1, 24
1029-
; RV32I-NEXT: or a2, a2, a3
1030-
; RV32I-NEXT: slli a3, a1, 8
1031-
; RV32I-NEXT: and a3, a3, a5
1032-
; RV32I-NEXT: slli a1, a1, 24
1033-
; RV32I-NEXT: or a1, a1, a3
1034-
; RV32I-NEXT: or a1, a1, a2
1035928
; RV32I-NEXT: srli a2, a1, 4
1036-
; RV32I-NEXT: and a2, a2, a4
1037-
; RV32I-NEXT: and a1, a1, a4
929+
; RV32I-NEXT: and a2, a2, a3
930+
; RV32I-NEXT: and a1, a1, a3
1038931
; RV32I-NEXT: slli a1, a1, 4
1039932
; RV32I-NEXT: or a1, a2, a1
1040933
; RV32I-NEXT: srli a2, a1, 2
1041-
; RV32I-NEXT: and a2, a2, a6
1042-
; RV32I-NEXT: and a1, a1, a6
934+
; RV32I-NEXT: and a2, a2, a4
935+
; RV32I-NEXT: and a1, a1, a4
1043936
; RV32I-NEXT: slli a1, a1, 2
1044937
; RV32I-NEXT: or a1, a2, a1
1045938
; RV32I-NEXT: srli a2, a1, 1
1046-
; RV32I-NEXT: and a2, a2, a7
1047-
; RV32I-NEXT: and a1, a1, a7
939+
; RV32I-NEXT: and a2, a2, a5
940+
; RV32I-NEXT: and a1, a1, a5
1048941
; RV32I-NEXT: slli a1, a1, 1
1049942
; RV32I-NEXT: or a1, a2, a1
1050943
; RV32I-NEXT: ret
1051944
;
1052945
; RV64I-LABEL: test_bswap_bitreverse_i64:
1053946
; RV64I: # %bb.0:
1054-
; RV64I-NEXT: srli a1, a0, 24
1055-
; RV64I-NEXT: lui a2, 4080
1056-
; RV64I-NEXT: and a1, a1, a2
1057-
; RV64I-NEXT: srli a3, a0, 8
1058-
; RV64I-NEXT: li a4, 255
1059-
; RV64I-NEXT: slli a5, a4, 24
1060-
; RV64I-NEXT: and a3, a3, a5
1061-
; RV64I-NEXT: or a1, a3, a1
1062-
; RV64I-NEXT: srli a3, a0, 40
1063-
; RV64I-NEXT: lui a6, 16
1064-
; RV64I-NEXT: addiw a6, a6, -256
1065-
; RV64I-NEXT: and a3, a3, a6
1066-
; RV64I-NEXT: srli a7, a0, 56
1067-
; RV64I-NEXT: or a3, a3, a7
1068-
; RV64I-NEXT: or a1, a1, a3
1069-
; RV64I-NEXT: slli a3, a0, 24
1070-
; RV64I-NEXT: slli a7, a4, 40
1071-
; RV64I-NEXT: and a3, a3, a7
1072-
; RV64I-NEXT: srliw t0, a0, 24
1073-
; RV64I-NEXT: slli t0, t0, 32
1074-
; RV64I-NEXT: or a3, a3, t0
1075-
; RV64I-NEXT: slli t0, a0, 40
1076-
; RV64I-NEXT: slli a4, a4, 48
1077-
; RV64I-NEXT: and t0, t0, a4
1078-
; RV64I-NEXT: slli a0, a0, 56
1079-
; RV64I-NEXT: or a0, a0, t0
1080-
; RV64I-NEXT: or a0, a0, a3
1081-
; RV64I-NEXT: or a0, a0, a1
1082-
; RV64I-NEXT: srli a1, a0, 40
1083-
; RV64I-NEXT: and a1, a1, a6
1084-
; RV64I-NEXT: srli a3, a0, 56
1085-
; RV64I-NEXT: or a1, a1, a3
1086-
; RV64I-NEXT: srli a3, a0, 24
1087-
; RV64I-NEXT: and a2, a3, a2
1088-
; RV64I-NEXT: srli a3, a0, 8
1089-
; RV64I-NEXT: and a3, a3, a5
1090-
; RV64I-NEXT: or a2, a3, a2
1091-
; RV64I-NEXT: or a1, a2, a1
1092-
; RV64I-NEXT: slli a2, a0, 24
1093-
; RV64I-NEXT: and a2, a2, a7
1094-
; RV64I-NEXT: srliw a3, a0, 24
1095-
; RV64I-NEXT: slli a3, a3, 32
1096-
; RV64I-NEXT: or a2, a2, a3
1097-
; RV64I-NEXT: slli a3, a0, 40
1098-
; RV64I-NEXT: and a3, a3, a4
1099-
; RV64I-NEXT: slli a0, a0, 56
1100-
; RV64I-NEXT: or a0, a0, a3
1101-
; RV64I-NEXT: lui a3, %hi(.LCPI9_0)
1102-
; RV64I-NEXT: ld a3, %lo(.LCPI9_0)(a3)
1103-
; RV64I-NEXT: or a0, a0, a2
1104-
; RV64I-NEXT: or a0, a0, a1
1105-
; RV64I-NEXT: srli a1, a0, 4
1106-
; RV64I-NEXT: and a1, a1, a3
1107-
; RV64I-NEXT: and a0, a0, a3
1108-
; RV64I-NEXT: lui a2, %hi(.LCPI9_1)
1109-
; RV64I-NEXT: ld a2, %lo(.LCPI9_1)(a2)
947+
; RV64I-NEXT: lui a1, %hi(.LCPI9_0)
948+
; RV64I-NEXT: ld a1, %lo(.LCPI9_0)(a1)
949+
; RV64I-NEXT: srli a2, a0, 4
950+
; RV64I-NEXT: and a2, a2, a1
951+
; RV64I-NEXT: and a0, a0, a1
952+
; RV64I-NEXT: lui a1, %hi(.LCPI9_1)
953+
; RV64I-NEXT: ld a1, %lo(.LCPI9_1)(a1)
1110954
; RV64I-NEXT: slli a0, a0, 4
1111-
; RV64I-NEXT: or a0, a1, a0
1112-
; RV64I-NEXT: srli a1, a0, 2
1113-
; RV64I-NEXT: and a1, a1, a2
1114-
; RV64I-NEXT: and a0, a0, a2
1115-
; RV64I-NEXT: lui a2, %hi(.LCPI9_2)
1116-
; RV64I-NEXT: ld a2, %lo(.LCPI9_2)(a2)
955+
; RV64I-NEXT: or a0, a2, a0
956+
; RV64I-NEXT: srli a2, a0, 2
957+
; RV64I-NEXT: and a2, a2, a1
958+
; RV64I-NEXT: and a0, a0, a1
959+
; RV64I-NEXT: lui a1, %hi(.LCPI9_2)
960+
; RV64I-NEXT: ld a1, %lo(.LCPI9_2)(a1)
1117961
; RV64I-NEXT: slli a0, a0, 2
1118-
; RV64I-NEXT: or a0, a1, a0
1119-
; RV64I-NEXT: srli a1, a0, 1
1120-
; RV64I-NEXT: and a1, a1, a2
1121-
; RV64I-NEXT: and a0, a0, a2
962+
; RV64I-NEXT: or a0, a2, a0
963+
; RV64I-NEXT: srli a2, a0, 1
964+
; RV64I-NEXT: and a2, a2, a1
965+
; RV64I-NEXT: and a0, a0, a1
1122966
; RV64I-NEXT: slli a0, a0, 1
1123-
; RV64I-NEXT: or a0, a1, a0
967+
; RV64I-NEXT: or a0, a2, a0
1124968
; RV64I-NEXT: ret
1125969
;
1126970
; RV32ZBB-LABEL: test_bswap_bitreverse_i64:

0 commit comments

Comments
 (0)