Skip to content

Commit d9ba1a9

Browse files
committed
[RISCV] Teach isel to select ADDW/SUBW/MULW/SLLIW when only the lower 32-bits are used.
We normally select these when the root node is a sext_inreg, but SimplifyDemandedBits can sometimes bypass the sext_inreg for some users. This can create situation where sext_inreg+add/sub/mul/shl is selected to a W instruction, and then the add/sub/mul/shl is separately selected to a non-W instruction with the same inputs. This patch tries to detect when it would still be ok to use a W instruction without the sext_inreg by checking the direct users. This can allow the W instruction to CSE with one created for a sext_inreg+add/sub/mul/shl. To minimize complexity and cost of checking, we make no attempt to determine if the CSE will happen and just always use a W instruction when we can. Differential Revision: https://reviews.llvm.org/D107658
1 parent 6cc1109 commit d9ba1a9

33 files changed

+692
-610
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1496,6 +1496,88 @@ bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) {
14961496
return false;
14971497
}
14981498

1499+
// Return true if all users of this SDNode* only consume the lower \p Bits.
1500+
// This can be used to form W instructions for add/sub/mul/shl even when the
1501+
// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
1502+
// SimplifyDemandedBits has made it so some users see a sext_inreg and some
1503+
// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
1504+
// the add/sub/mul/shl to become non-W instructions. By checking the users we
1505+
// may be able to use a W instruction and CSE with the other instruction if
1506+
// this has happened. We could try to detect that the CSE opportunity exists
1507+
// before doing this, but that would be more complicated.
1508+
// TODO: Does this need to look through AND/OR/XOR to their users to find more
1509+
// opportunities.
1510+
bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
1511+
assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
1512+
Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL) &&
1513+
"Unexpected opcode");
1514+
1515+
for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
1516+
SDNode *User = *UI;
1517+
// Users of this node should have already been instruction selected
1518+
if (!User->isMachineOpcode())
1519+
return false;
1520+
1521+
// TODO: Add more opcodes?
1522+
switch (User->getMachineOpcode()) {
1523+
default:
1524+
return false;
1525+
case RISCV::ADDW:
1526+
case RISCV::ADDIW:
1527+
case RISCV::SUBW:
1528+
case RISCV::MULW:
1529+
case RISCV::SLLW:
1530+
case RISCV::SLLIW:
1531+
case RISCV::SRAW:
1532+
case RISCV::SRAIW:
1533+
case RISCV::SRLW:
1534+
case RISCV::SRLIW:
1535+
case RISCV::DIVW:
1536+
case RISCV::DIVUW:
1537+
case RISCV::REMW:
1538+
case RISCV::REMUW:
1539+
case RISCV::ROLW:
1540+
case RISCV::RORW:
1541+
case RISCV::RORIW:
1542+
case RISCV::CLZW:
1543+
case RISCV::CTZW:
1544+
case RISCV::CPOPW:
1545+
case RISCV::SLLIUW:
1546+
if (Bits < 32)
1547+
return false;
1548+
break;
1549+
case RISCV::SLLI:
1550+
// SLLI only uses the lower (XLen - ShAmt) bits.
1551+
if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
1552+
return false;
1553+
break;
1554+
case RISCV::ADDUW:
1555+
case RISCV::SH1ADDUW:
1556+
case RISCV::SH2ADDUW:
1557+
case RISCV::SH3ADDUW:
1558+
// The first operand to add.uw/shXadd.uw is implicitly zero extended from
1559+
// 32 bits.
1560+
if (UI.getOperandNo() != 0 || Bits < 32)
1561+
return false;
1562+
break;
1563+
case RISCV::SB:
1564+
if (UI.getOperandNo() != 0 || Bits < 8)
1565+
return false;
1566+
break;
1567+
case RISCV::SH:
1568+
if (UI.getOperandNo() != 0 || Bits < 16)
1569+
return false;
1570+
break;
1571+
case RISCV::SW:
1572+
if (UI.getOperandNo() != 0 || Bits < 32)
1573+
return false;
1574+
break;
1575+
}
1576+
}
1577+
1578+
return true;
1579+
}
1580+
14991581
// Select VL as a 5 bit immediate or a value that will become a register. This
15001582
// allows us to choose betwen VSETIVLI or VSETVLI later.
15011583
bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
5858
bool selectSExti32(SDValue N, SDValue &Val);
5959
bool selectZExti32(SDValue N, SDValue &Val);
6060

61+
bool hasAllNBitUsers(SDNode *Node, unsigned Bits) const;
62+
bool hasAllWUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 32); }
63+
6164
bool selectVLOp(SDValue N, SDValue &VL);
6265

6366
bool selectVSplat(SDValue N, SDValue &SplatVal);

llvm/lib/Target/RISCV/RISCVInstrInfo.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1254,6 +1254,14 @@ def : Pat<(i64 (shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)),
12541254
(SRLI (SLLI GPR:$rs1, 32), (ImmSubFrom32 uimm5:$shamt))>;
12551255
}
12561256

1257+
// PatFrag to allow ADDW/SUBW/MULW/SLLW to be selected from i64 add/sub/mul/shl
1258+
// if only the lower 32 bits of their result is used.
1259+
class overflowingbinopw<SDPatternOperator operator>
1260+
: PatFrag<(ops node:$lhs, node:$rhs),
1261+
(operator node:$lhs, node:$rhs), [{
1262+
return hasAllWUsers(Node);
1263+
}]>;
1264+
12571265
let Predicates = [IsRV64] in {
12581266

12591267
/// sext and zext
@@ -1283,6 +1291,13 @@ def : PatGprGpr<shiftopw<riscv_sllw>, SLLW>;
12831291
def : PatGprGpr<shiftopw<riscv_srlw>, SRLW>;
12841292
def : PatGprGpr<shiftopw<riscv_sraw>, SRAW>;
12851293

1294+
// Select W instructions without sext_inreg if only the lower 32 bits of the
1295+
// result are used.
1296+
def : PatGprGpr<overflowingbinopw<add>, ADDW>;
1297+
def : PatGprSimm12<overflowingbinopw<add>, ADDIW>;
1298+
def : PatGprGpr<overflowingbinopw<sub>, SUBW>;
1299+
def : PatGprImm<overflowingbinopw<shl>, SLLIW, uimm5>;
1300+
12861301
/// Loads
12871302

12881303
defm : LdPat<sextloadi32, LW, i64>;

llvm/lib/Target/RISCV/RISCVInstrInfoM.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,10 @@ let Predicates = [HasStdExtM, IsRV64] in {
7575
def : Pat<(sext_inreg (mul GPR:$rs1, GPR:$rs2), i32),
7676
(MULW GPR:$rs1, GPR:$rs2)>;
7777

78+
// Select W instructions without sext_inreg if only the lower 32-bits of the
79+
// result are used.
80+
def : PatGprGpr<overflowingbinopw<mul>, MULW>;
81+
7882
def : PatGprGpr<riscv_divw, DIVW>;
7983
def : PatGprGpr<riscv_divuw, DIVUW>;
8084
def : PatGprGpr<riscv_remuw, REMUW>;

llvm/test/CodeGen/RISCV/add-before-shl.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ define signext i32 @add_small_const(i32 signext %a) nounwind {
2121
;
2222
; RV64I-LABEL: add_small_const:
2323
; RV64I: # %bb.0:
24-
; RV64I-NEXT: addi a0, a0, 1
24+
; RV64I-NEXT: addiw a0, a0, 1
2525
; RV64I-NEXT: slli a0, a0, 56
2626
; RV64I-NEXT: srai a0, a0, 56
2727
; RV64I-NEXT: jalr zero, 0(ra)
@@ -35,7 +35,7 @@ define signext i32 @add_small_const(i32 signext %a) nounwind {
3535
;
3636
; RV64C-LABEL: add_small_const:
3737
; RV64C: # %bb.0:
38-
; RV64C-NEXT: c.addi a0, 1
38+
; RV64C-NEXT: c.addiw a0, 1
3939
; RV64C-NEXT: c.slli a0, 56
4040
; RV64C-NEXT: c.srai a0, 56
4141
; RV64C-NEXT: c.jr ra
@@ -75,7 +75,7 @@ define signext i32 @add_large_const(i32 signext %a) nounwind {
7575
; RV64C: # %bb.0:
7676
; RV64C-NEXT: c.lui a1, 1
7777
; RV64C-NEXT: c.addiw a1, -1
78-
; RV64C-NEXT: c.add a0, a1
78+
; RV64C-NEXT: c.addw a0, a1
7979
; RV64C-NEXT: c.slli a0, 48
8080
; RV64C-NEXT: c.srai a0, 48
8181
; RV64C-NEXT: c.jr ra
@@ -115,7 +115,7 @@ define signext i32 @add_huge_const(i32 signext %a) nounwind {
115115
; RV64C: # %bb.0:
116116
; RV64C-NEXT: c.lui a1, 8
117117
; RV64C-NEXT: c.addiw a1, -1
118-
; RV64C-NEXT: c.add a0, a1
118+
; RV64C-NEXT: c.addw a0, a1
119119
; RV64C-NEXT: c.slli a0, 48
120120
; RV64C-NEXT: c.srai a0, 48
121121
; RV64C-NEXT: c.jr ra
@@ -135,7 +135,7 @@ define signext i24 @add_non_machine_type(i24 signext %a) nounwind {
135135
;
136136
; RV64I-LABEL: add_non_machine_type:
137137
; RV64I: # %bb.0:
138-
; RV64I-NEXT: addi a0, a0, 256
138+
; RV64I-NEXT: addiw a0, a0, 256
139139
; RV64I-NEXT: slli a0, a0, 52
140140
; RV64I-NEXT: srai a0, a0, 40
141141
; RV64I-NEXT: jalr zero, 0(ra)
@@ -149,7 +149,7 @@ define signext i24 @add_non_machine_type(i24 signext %a) nounwind {
149149
;
150150
; RV64C-LABEL: add_non_machine_type:
151151
; RV64C: # %bb.0:
152-
; RV64C-NEXT: addi a0, a0, 256
152+
; RV64C-NEXT: addiw a0, a0, 256
153153
; RV64C-NEXT: c.slli a0, 52
154154
; RV64C-NEXT: c.srai a0, 40
155155
; RV64C-NEXT: c.jr ra

llvm/test/CodeGen/RISCV/add-imm.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -180,10 +180,9 @@ define signext i32 @add32_sext_reject_on_rv64(i32 signext %a) nounwind {
180180
; RV64I: # %bb.0:
181181
; RV64I-NEXT: lui a1, 1
182182
; RV64I-NEXT: addiw a1, a1, -1096
183-
; RV64I-NEXT: add a2, a0, a1
184-
; RV64I-NEXT: lui a3, %hi(gv0)
185183
; RV64I-NEXT: addw a0, a0, a1
186-
; RV64I-NEXT: sw a2, %lo(gv0)(a3)
184+
; RV64I-NEXT: lui a1, %hi(gv0)
185+
; RV64I-NEXT: sw a0, %lo(gv0)(a1)
187186
; RV64I-NEXT: ret
188187
%b = add nsw i32 %a, 3000
189188
store i32 %b, i32* @gv0, align 4
@@ -234,8 +233,8 @@ define void @add32_reject() nounwind {
234233
; RV64I-NEXT: lw a3, %lo(gb)(a2)
235234
; RV64I-NEXT: lui a4, 1
236235
; RV64I-NEXT: addiw a4, a4, -1096
237-
; RV64I-NEXT: add a1, a1, a4
238-
; RV64I-NEXT: add a3, a3, a4
236+
; RV64I-NEXT: addw a1, a1, a4
237+
; RV64I-NEXT: addw a3, a3, a4
239238
; RV64I-NEXT: sw a1, %lo(ga)(a0)
240239
; RV64I-NEXT: sw a3, %lo(gb)(a2)
241240
; RV64I-NEXT: ret

llvm/test/CodeGen/RISCV/addimm-mulimm.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ define signext i32 @add_mul_trans_accept_1(i32 %x) {
1818
; RV64IM-LABEL: add_mul_trans_accept_1:
1919
; RV64IM: # %bb.0:
2020
; RV64IM-NEXT: addi a1, zero, 11
21-
; RV64IM-NEXT: mul a0, a0, a1
21+
; RV64IM-NEXT: mulw a0, a0, a1
2222
; RV64IM-NEXT: addiw a0, a0, 407
2323
; RV64IM-NEXT: ret
2424
%tmp0 = add i32 %x, 37
@@ -39,7 +39,7 @@ define signext i32 @add_mul_trans_accept_2(i32 %x) {
3939
; RV64IM-LABEL: add_mul_trans_accept_2:
4040
; RV64IM: # %bb.0:
4141
; RV64IM-NEXT: addi a1, zero, 13
42-
; RV64IM-NEXT: mul a0, a0, a1
42+
; RV64IM-NEXT: mulw a0, a0, a1
4343
; RV64IM-NEXT: lui a1, 28
4444
; RV64IM-NEXT: addiw a1, a1, 1701
4545
; RV64IM-NEXT: addw a0, a0, a1
@@ -62,7 +62,7 @@ define signext i32 @add_mul_trans_reject_1(i32 %x) {
6262
; RV64IM-LABEL: add_mul_trans_reject_1:
6363
; RV64IM: # %bb.0:
6464
; RV64IM-NEXT: addi a1, zero, 19
65-
; RV64IM-NEXT: mul a0, a0, a1
65+
; RV64IM-NEXT: mulw a0, a0, a1
6666
; RV64IM-NEXT: lui a1, 9
6767
; RV64IM-NEXT: addiw a1, a1, 585
6868
; RV64IM-NEXT: addw a0, a0, a1
@@ -87,7 +87,7 @@ define signext i32 @add_mul_trans_reject_2(i32 %x) {
8787
; RV64IM: # %bb.0:
8888
; RV64IM-NEXT: lui a1, 792
8989
; RV64IM-NEXT: addiw a1, a1, -1709
90-
; RV64IM-NEXT: mul a0, a0, a1
90+
; RV64IM-NEXT: mulw a0, a0, a1
9191
; RV64IM-NEXT: lui a1, 1014660
9292
; RV64IM-NEXT: addiw a1, a1, -1891
9393
; RV64IM-NEXT: addw a0, a0, a1

0 commit comments

Comments
 (0)