Skip to content

Commit 847235b

Browse files
authored
[RISCV] Add DAG combine to turn (sub (shl X, 8), X) into orc.b (#96680)
If only bits 8, 16, 24, 32, etc. can be non-zero. This is what (mul X, 255) is decomposed to. This decomposition happens early before RISC-V DAG combine runs. This patch does not support types larger than XLen so i64 on rv32 fails to generate 2 orc.b instructions. It might have worked if the mul hadn't been decomposed before it was expanded. Partial fix for #96595.
1 parent e214ed9 commit 847235b

File tree

3 files changed

+163
-2
lines changed

3 files changed

+163
-2
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12502,12 +12502,15 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
1250212502
}
1250312503
break;
1250412504
}
12505-
case RISCVISD::BREV8: {
12505+
case RISCVISD::BREV8:
12506+
case RISCVISD::ORC_B: {
1250612507
MVT VT = N->getSimpleValueType(0);
1250712508
MVT XLenVT = Subtarget.getXLenVT();
1250812509
assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
1250912510
"Unexpected custom legalisation");
12510-
assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
12511+
assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
12512+
(N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
12513+
"Unexpected extension");
1251112514
SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
1251212515
SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
1251312516
// ReplaceNodeResults requires we maintain the same type for the return
@@ -13345,6 +13348,35 @@ static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) {
1334513348
return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
1334613349
}
1334713350

13351+
// Looks for (sub (shl X, 8), X) where only bits 8, 16, 24, 32, etc. of X are
13352+
// non-zero. Replace with orc.b.
13353+
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG,
13354+
const RISCVSubtarget &Subtarget) {
13355+
if (!Subtarget.hasStdExtZbb())
13356+
return SDValue();
13357+
13358+
EVT VT = N->getValueType(0);
13359+
13360+
if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
13361+
return SDValue();
13362+
13363+
SDValue N0 = N->getOperand(0);
13364+
SDValue N1 = N->getOperand(1);
13365+
13366+
if (N0.getOpcode() != ISD::SHL || N0.getOperand(0) != N1 || !N0.hasOneUse())
13367+
return SDValue();
13368+
13369+
auto *ShAmtC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
13370+
if (!ShAmtC || ShAmtC->getZExtValue() != 8)
13371+
return SDValue();
13372+
13373+
APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0xfe));
13374+
if (!DAG.MaskedValueIsZero(N1, Mask))
13375+
return SDValue();
13376+
13377+
return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, N1);
13378+
}
13379+
1334813380
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
1334913381
const RISCVSubtarget &Subtarget) {
1335013382
if (SDValue V = combineSubOfBoolean(N, DAG))
@@ -13367,6 +13399,8 @@ static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
1336713399

1336813400
if (SDValue V = combineBinOpOfZExt(N, DAG))
1336913401
return V;
13402+
if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
13403+
return V;
1337013404

1337113405
// fold (sub x, (select lhs, rhs, cc, 0, y)) ->
1337213406
// (select lhs, rhs, cc, x, (sub x, y))

llvm/test/CodeGen/RISCV/rv32zbb.ll

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1356,3 +1356,64 @@ define i64 @bswap_i64(i64 %a) {
13561356
%1 = call i64 @llvm.bswap.i64(i64 %a)
13571357
ret i64 %1
13581358
}
1359+
1360+
define i16 @orc_b_i16(i16 %a) {
1361+
; RV32I-LABEL: orc_b_i16:
1362+
; RV32I: # %bb.0:
1363+
; RV32I-NEXT: andi a0, a0, 257
1364+
; RV32I-NEXT: slli a1, a0, 8
1365+
; RV32I-NEXT: sub a0, a1, a0
1366+
; RV32I-NEXT: ret
1367+
;
1368+
; RV32ZBB-LABEL: orc_b_i16:
1369+
; RV32ZBB: # %bb.0:
1370+
; RV32ZBB-NEXT: andi a0, a0, 257
1371+
; RV32ZBB-NEXT: orc.b a0, a0
1372+
; RV32ZBB-NEXT: ret
1373+
%1 = and i16 %a, 257
1374+
%2 = mul nuw i16 %1, 255
1375+
ret i16 %2
1376+
}
1377+
1378+
define i32 @orc_b_i32(i32 %a) {
1379+
; RV32I-LABEL: orc_b_i32:
1380+
; RV32I: # %bb.0:
1381+
; RV32I-NEXT: lui a1, 4112
1382+
; RV32I-NEXT: addi a1, a1, 257
1383+
; RV32I-NEXT: and a0, a0, a1
1384+
; RV32I-NEXT: slli a1, a0, 8
1385+
; RV32I-NEXT: sub a0, a1, a0
1386+
; RV32I-NEXT: ret
1387+
;
1388+
; RV32ZBB-LABEL: orc_b_i32:
1389+
; RV32ZBB: # %bb.0:
1390+
; RV32ZBB-NEXT: lui a1, 4112
1391+
; RV32ZBB-NEXT: addi a1, a1, 257
1392+
; RV32ZBB-NEXT: and a0, a0, a1
1393+
; RV32ZBB-NEXT: orc.b a0, a0
1394+
; RV32ZBB-NEXT: ret
1395+
%1 = and i32 %a, 16843009
1396+
%2 = mul nuw i32 %1, 255
1397+
ret i32 %2
1398+
}
1399+
1400+
define i64 @orc_b_i64(i64 %a) {
1401+
; CHECK-LABEL: orc_b_i64:
1402+
; CHECK: # %bb.0:
1403+
; CHECK-NEXT: lui a2, 4112
1404+
; CHECK-NEXT: addi a2, a2, 257
1405+
; CHECK-NEXT: and a1, a1, a2
1406+
; CHECK-NEXT: and a0, a0, a2
1407+
; CHECK-NEXT: slli a2, a0, 8
1408+
; CHECK-NEXT: sltu a3, a2, a0
1409+
; CHECK-NEXT: srli a4, a0, 24
1410+
; CHECK-NEXT: slli a5, a1, 8
1411+
; CHECK-NEXT: or a4, a5, a4
1412+
; CHECK-NEXT: sub a1, a4, a1
1413+
; CHECK-NEXT: sub a1, a1, a3
1414+
; CHECK-NEXT: sub a0, a2, a0
1415+
; CHECK-NEXT: ret
1416+
%1 = and i64 %a, 72340172838076673
1417+
%2 = mul nuw i64 %1, 255
1418+
ret i64 %2
1419+
}

llvm/test/CodeGen/RISCV/rv64zbb.ll

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1494,3 +1494,69 @@ define i64 @bswap_i64(i64 %a) {
14941494
%1 = call i64 @llvm.bswap.i64(i64 %a)
14951495
ret i64 %1
14961496
}
1497+
1498+
define i16 @orc_b_i16(i16 %a) {
1499+
; RV64I-LABEL: orc_b_i16:
1500+
; RV64I: # %bb.0:
1501+
; RV64I-NEXT: andi a0, a0, 257
1502+
; RV64I-NEXT: slli a1, a0, 8
1503+
; RV64I-NEXT: sub a0, a1, a0
1504+
; RV64I-NEXT: ret
1505+
;
1506+
; RV64ZBB-LABEL: orc_b_i16:
1507+
; RV64ZBB: # %bb.0:
1508+
; RV64ZBB-NEXT: andi a0, a0, 257
1509+
; RV64ZBB-NEXT: orc.b a0, a0
1510+
; RV64ZBB-NEXT: ret
1511+
%1 = and i16 %a, 257
1512+
%2 = mul nuw i16 %1, 255
1513+
ret i16 %2
1514+
}
1515+
1516+
define i32 @orc_b_i32(i32 %a) {
1517+
; RV64I-LABEL: orc_b_i32:
1518+
; RV64I: # %bb.0:
1519+
; RV64I-NEXT: lui a1, 4112
1520+
; RV64I-NEXT: addi a1, a1, 257
1521+
; RV64I-NEXT: and a0, a0, a1
1522+
; RV64I-NEXT: slli a1, a0, 8
1523+
; RV64I-NEXT: subw a0, a1, a0
1524+
; RV64I-NEXT: ret
1525+
;
1526+
; RV64ZBB-LABEL: orc_b_i32:
1527+
; RV64ZBB: # %bb.0:
1528+
; RV64ZBB-NEXT: lui a1, 4112
1529+
; RV64ZBB-NEXT: addiw a1, a1, 257
1530+
; RV64ZBB-NEXT: and a0, a0, a1
1531+
; RV64ZBB-NEXT: orc.b a0, a0
1532+
; RV64ZBB-NEXT: ret
1533+
%1 = and i32 %a, 16843009
1534+
%2 = mul nuw i32 %1, 255
1535+
ret i32 %2
1536+
}
1537+
1538+
define i64 @orc_b_i64(i64 %a) {
1539+
; RV64I-LABEL: orc_b_i64:
1540+
; RV64I: # %bb.0:
1541+
; RV64I-NEXT: lui a1, 4112
1542+
; RV64I-NEXT: addiw a1, a1, 257
1543+
; RV64I-NEXT: slli a2, a1, 32
1544+
; RV64I-NEXT: add a1, a1, a2
1545+
; RV64I-NEXT: and a0, a0, a1
1546+
; RV64I-NEXT: slli a1, a0, 8
1547+
; RV64I-NEXT: sub a0, a1, a0
1548+
; RV64I-NEXT: ret
1549+
;
1550+
; RV64ZBB-LABEL: orc_b_i64:
1551+
; RV64ZBB: # %bb.0:
1552+
; RV64ZBB-NEXT: lui a1, 4112
1553+
; RV64ZBB-NEXT: addiw a1, a1, 257
1554+
; RV64ZBB-NEXT: slli a2, a1, 32
1555+
; RV64ZBB-NEXT: add a1, a1, a2
1556+
; RV64ZBB-NEXT: and a0, a0, a1
1557+
; RV64ZBB-NEXT: orc.b a0, a0
1558+
; RV64ZBB-NEXT: ret
1559+
%1 = and i64 %a, 72340172838076673
1560+
%2 = mul nuw i64 %1, 255
1561+
ret i64 %2
1562+
}

0 commit comments

Comments
 (0)