Skip to content

Commit 4477500

Browse files
committed
[RISCV] ISel (and (shift X, C1), C2)) to shift pair in more cases
Previously, these isel optimizations were disabled if the AND could be selected as a ANDI instruction. This patch disables the optimizations only if the immediate is valid for C.ANDI. If we can't use C.ANDI, we might be able to compress the shift instructions instead. I'm not checking the C extension since we have relatively poor test coverage of the C extension. Without C extension the code size should be equal. My only concern would be if the shift+andi had better latency/throughput on a particular CPU. I did have to add a peephole to match SRLIW if the input is zexti32 to prevent a regression in rv64zbp.ll. Reviewed By: luismarques Differential Revision: https://reviews.llvm.org/D122701
1 parent 5fd0925 commit 4477500

File tree

6 files changed

+44
-33
lines changed

6 files changed

+44
-33
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -724,8 +724,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
724724

725725
uint64_t C1 = N1C->getZExtValue();
726726

727-
// Keep track of whether this is an andi.
728-
bool IsANDI = isInt<12>(N1C->getSExtValue());
727+
// Keep track of whether this is a c.andi. If we can't use c.andi, the
728+
// shift pair might offer more compression opportunities.
729+
// TODO: We could check for C extension here, but we don't have many lit
730+
// tests with the C extension enabled so not checking gets better coverage.
731+
// TODO: What if ANDI faster than shift?
732+
bool IsCANDI = isInt<6>(N1C->getSExtValue());
729733

730734
// Clear irrelevant bits in the mask.
731735
if (LeftShift)
@@ -776,7 +780,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
776780
bool Skip = Subtarget->hasStdExtZba() && C3 == 32 &&
777781
X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
778782
cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
779-
if (OneUseOrZExtW && !IsANDI && !Skip) {
783+
if (OneUseOrZExtW && !IsCANDI && !Skip) {
780784
SDNode *SLLI = CurDAG->getMachineNode(
781785
RISCV::SLLI, DL, XLenVT, X,
782786
CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
@@ -806,7 +810,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
806810
}
807811

808812
// (srli (slli c2+c3), c3)
809-
if (OneUseOrZExtW && !IsANDI) {
813+
if (OneUseOrZExtW && !IsCANDI) {
810814
SDNode *SLLI = CurDAG->getMachineNode(
811815
RISCV::SLLI, DL, XLenVT, X,
812816
CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
@@ -824,9 +828,16 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
824828
if (!LeftShift && isShiftedMask_64(C1)) {
825829
uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
826830
uint64_t C3 = countTrailingZeros(C1);
827-
if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsANDI) {
831+
if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsCANDI) {
832+
unsigned SrliOpc = RISCV::SRLI;
833+
// If the input is zexti32 we should use SRLIW.
834+
if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
835+
X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
836+
SrliOpc = RISCV::SRLIW;
837+
X = X.getOperand(0);
838+
}
828839
SDNode *SRLI = CurDAG->getMachineNode(
829-
RISCV::SRLI, DL, XLenVT, X,
840+
SrliOpc, DL, XLenVT, X,
830841
CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
831842
SDNode *SLLI =
832843
CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0),
@@ -836,7 +847,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
836847
}
837848
// If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
838849
if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 &&
839-
OneUseOrZExtW && !IsANDI) {
850+
OneUseOrZExtW && !IsCANDI) {
840851
SDNode *SRLIW = CurDAG->getMachineNode(
841852
RISCV::SRLIW, DL, XLenVT, X,
842853
CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
@@ -853,7 +864,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
853864
if (LeftShift && isShiftedMask_64(C1)) {
854865
uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
855866
uint64_t C3 = countTrailingZeros(C1);
856-
if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsANDI) {
867+
if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsCANDI) {
857868
SDNode *SRLI = CurDAG->getMachineNode(
858869
RISCV::SRLI, DL, XLenVT, X,
859870
CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
@@ -864,7 +875,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
864875
return;
865876
}
866877
// If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
867-
if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsANDI) {
878+
if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
868879
SDNode *SRLIW = CurDAG->getMachineNode(
869880
RISCV::SRLIW, DL, XLenVT, X,
870881
CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));

llvm/test/CodeGen/RISCV/bitreverse-shift.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ define i8 @test_bitreverse_shli_bitreverse_i8(i8 %a) nounwind {
120120
; RV32ZBKB: # %bb.0:
121121
; RV32ZBKB-NEXT: rev8 a0, a0
122122
; RV32ZBKB-NEXT: brev8 a0, a0
123-
; RV32ZBKB-NEXT: srli a0, a0, 21
124-
; RV32ZBKB-NEXT: andi a0, a0, 2040
123+
; RV32ZBKB-NEXT: srli a0, a0, 24
124+
; RV32ZBKB-NEXT: slli a0, a0, 3
125125
; RV32ZBKB-NEXT: rev8 a0, a0
126126
; RV32ZBKB-NEXT: brev8 a0, a0
127127
; RV32ZBKB-NEXT: srli a0, a0, 24
@@ -131,8 +131,8 @@ define i8 @test_bitreverse_shli_bitreverse_i8(i8 %a) nounwind {
131131
; RV64ZBKB: # %bb.0:
132132
; RV64ZBKB-NEXT: rev8 a0, a0
133133
; RV64ZBKB-NEXT: brev8 a0, a0
134-
; RV64ZBKB-NEXT: srli a0, a0, 53
135-
; RV64ZBKB-NEXT: andi a0, a0, 2040
134+
; RV64ZBKB-NEXT: srli a0, a0, 56
135+
; RV64ZBKB-NEXT: slli a0, a0, 3
136136
; RV64ZBKB-NEXT: rev8 a0, a0
137137
; RV64ZBKB-NEXT: brev8 a0, a0
138138
; RV64ZBKB-NEXT: srli a0, a0, 56
@@ -148,8 +148,8 @@ define i16 @test_bitreverse_shli_bitreverse_i16(i16 %a) nounwind {
148148
; RV32ZBKB: # %bb.0:
149149
; RV32ZBKB-NEXT: rev8 a0, a0
150150
; RV32ZBKB-NEXT: brev8 a0, a0
151-
; RV32ZBKB-NEXT: srli a0, a0, 9
152-
; RV32ZBKB-NEXT: andi a0, a0, -128
151+
; RV32ZBKB-NEXT: srli a0, a0, 16
152+
; RV32ZBKB-NEXT: slli a0, a0, 7
153153
; RV32ZBKB-NEXT: rev8 a0, a0
154154
; RV32ZBKB-NEXT: brev8 a0, a0
155155
; RV32ZBKB-NEXT: srli a0, a0, 16
@@ -159,8 +159,8 @@ define i16 @test_bitreverse_shli_bitreverse_i16(i16 %a) nounwind {
159159
; RV64ZBKB: # %bb.0:
160160
; RV64ZBKB-NEXT: rev8 a0, a0
161161
; RV64ZBKB-NEXT: brev8 a0, a0
162-
; RV64ZBKB-NEXT: srli a0, a0, 41
163-
; RV64ZBKB-NEXT: andi a0, a0, -128
162+
; RV64ZBKB-NEXT: srli a0, a0, 48
163+
; RV64ZBKB-NEXT: slli a0, a0, 7
164164
; RV64ZBKB-NEXT: rev8 a0, a0
165165
; RV64ZBKB-NEXT: brev8 a0, a0
166166
; RV64ZBKB-NEXT: srli a0, a0, 48

llvm/test/CodeGen/RISCV/bswap-shift.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,17 +123,17 @@ define i16 @test_bswap_shli_7_bswap_i16(i16 %a) nounwind {
123123
; RV32ZB-LABEL: test_bswap_shli_7_bswap_i16:
124124
; RV32ZB: # %bb.0:
125125
; RV32ZB-NEXT: rev8 a0, a0
126-
; RV32ZB-NEXT: srli a0, a0, 9
127-
; RV32ZB-NEXT: andi a0, a0, -128
126+
; RV32ZB-NEXT: srli a0, a0, 16
127+
; RV32ZB-NEXT: slli a0, a0, 7
128128
; RV32ZB-NEXT: rev8 a0, a0
129129
; RV32ZB-NEXT: srli a0, a0, 16
130130
; RV32ZB-NEXT: ret
131131
;
132132
; RV64ZB-LABEL: test_bswap_shli_7_bswap_i16:
133133
; RV64ZB: # %bb.0:
134134
; RV64ZB-NEXT: rev8 a0, a0
135-
; RV64ZB-NEXT: srli a0, a0, 41
136-
; RV64ZB-NEXT: andi a0, a0, -128
135+
; RV64ZB-NEXT: srli a0, a0, 48
136+
; RV64ZB-NEXT: slli a0, a0, 7
137137
; RV64ZB-NEXT: rev8 a0, a0
138138
; RV64ZB-NEXT: srli a0, a0, 48
139139
; RV64ZB-NEXT: ret

llvm/test/CodeGen/RISCV/rv64zbp.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2758,8 +2758,8 @@ define i32 @bswap_rotr_i32(i32 %a) {
27582758
; RV64I-NEXT: slli a2, a0, 24
27592759
; RV64I-NEXT: or a1, a2, a1
27602760
; RV64I-NEXT: srliw a2, a0, 24
2761-
; RV64I-NEXT: srliw a0, a0, 8
2762-
; RV64I-NEXT: andi a0, a0, -256
2761+
; RV64I-NEXT: srliw a0, a0, 16
2762+
; RV64I-NEXT: slli a0, a0, 8
27632763
; RV64I-NEXT: or a0, a0, a2
27642764
; RV64I-NEXT: slliw a0, a0, 16
27652765
; RV64I-NEXT: srliw a1, a1, 16
@@ -2779,8 +2779,8 @@ define i32 @bswap_rotl_i32(i32 %a) {
27792779
; RV64I-LABEL: bswap_rotl_i32:
27802780
; RV64I: # %bb.0:
27812781
; RV64I-NEXT: srliw a1, a0, 24
2782-
; RV64I-NEXT: srliw a2, a0, 8
2783-
; RV64I-NEXT: andi a2, a2, -256
2782+
; RV64I-NEXT: srliw a2, a0, 16
2783+
; RV64I-NEXT: slli a2, a2, 8
27842784
; RV64I-NEXT: or a1, a2, a1
27852785
; RV64I-NEXT: slli a2, a0, 8
27862786
; RV64I-NEXT: lui a3, 4080

llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ define i32 @neg_sel_constants(i32 signext %a) {
3131
define i32 @neg_sel_special_constant(i32 signext %a) {
3232
; RV32-LABEL: neg_sel_special_constant:
3333
; RV32: # %bb.0:
34-
; RV32-NEXT: srli a0, a0, 22
35-
; RV32-NEXT: andi a0, a0, 512
34+
; RV32-NEXT: srli a0, a0, 31
35+
; RV32-NEXT: slli a0, a0, 9
3636
; RV32-NEXT: ret
3737
;
3838
; RV64-LABEL: neg_sel_special_constant:
@@ -100,8 +100,8 @@ define i32 @pos_sel_special_constant(i32 signext %a) {
100100
; RV32-LABEL: pos_sel_special_constant:
101101
; RV32: # %bb.0:
102102
; RV32-NEXT: not a0, a0
103-
; RV32-NEXT: srli a0, a0, 22
104-
; RV32-NEXT: andi a0, a0, 512
103+
; RV32-NEXT: srli a0, a0, 31
104+
; RV32-NEXT: slli a0, a0, 9
105105
; RV32-NEXT: ret
106106
;
107107
; RV64-LABEL: pos_sel_special_constant:

llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -535,8 +535,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
535535
; RV32MV-NEXT: lw a1, 0(a0)
536536
; RV32MV-NEXT: andi a2, a1, 2047
537537
; RV32MV-NEXT: sh a2, 8(sp)
538-
; RV32MV-NEXT: srli a2, a1, 11
539-
; RV32MV-NEXT: andi a2, a2, 2047
538+
; RV32MV-NEXT: slli a2, a1, 10
539+
; RV32MV-NEXT: srli a2, a2, 21
540540
; RV32MV-NEXT: sh a2, 10(sp)
541541
; RV32MV-NEXT: lb a2, 4(a0)
542542
; RV32MV-NEXT: slli a2, a2, 10
@@ -606,8 +606,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
606606
; RV64MV-NEXT: sh a2, 12(sp)
607607
; RV64MV-NEXT: andi a2, a1, 2047
608608
; RV64MV-NEXT: sh a2, 8(sp)
609-
; RV64MV-NEXT: srli a1, a1, 11
610-
; RV64MV-NEXT: andi a1, a1, 2047
609+
; RV64MV-NEXT: slli a1, a1, 42
610+
; RV64MV-NEXT: srli a1, a1, 53
611611
; RV64MV-NEXT: sh a1, 10(sp)
612612
; RV64MV-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
613613
; RV64MV-NEXT: addi a1, sp, 8

0 commit comments

Comments
 (0)