Skip to content

Commit 2b06ff5

Browse files
authored
[RISCV] Expand mul to shNadd x, (slli x, c) in DAGCombine (#88524)
This expansion is directly inspired by the analogous code in the x86 backend for LEA. shXadd and (this sub-case of) LEA are largely equivalent. This is an alternative to #87105. This expansion is also supported via the decomposeMulByConstant callback, but restricted because of interactions with other combines since that code runs before legalization. As discussed in the other review, my original plan had been to support post legalization expansion through the same interface, but that ended up being more complicated than seems justified. Instead, lets go ahead and do the general expansion post-legalize. Other targets use the combine approach, and matching that structure makes it easier for us to adapt ideas from other targets to RISCV.
1 parent c50f7e9 commit 2b06ff5

File tree

5 files changed

+153
-52
lines changed

5 files changed

+153
-52
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13363,10 +13363,56 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
1336313363
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
1336413364
}
1336513365

13366-
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG) {
13366+
// Try to expand a scalar multiply to a faster sequence.
13367+
static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
13368+
TargetLowering::DAGCombinerInfo &DCI,
13369+
const RISCVSubtarget &Subtarget) {
13370+
1336713371
EVT VT = N->getValueType(0);
13368-
if (!VT.isVector())
13372+
13373+
// LI + MUL is usually smaller than the alternative sequence.
13374+
if (DAG.getMachineFunction().getFunction().hasMinSize())
13375+
return SDValue();
13376+
13377+
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
13378+
return SDValue();
13379+
13380+
if (VT != Subtarget.getXLenVT())
13381+
return SDValue();
13382+
13383+
if (!Subtarget.hasStdExtZba())
13384+
return SDValue();
13385+
13386+
ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
13387+
if (!CNode)
1336913388
return SDValue();
13389+
uint64_t MulAmt = CNode->getZExtValue();
13390+
13391+
// If this is a power 2 + 2/4/8, we can use a shift followed by a single
13392+
// shXadd. First check if this a sum of two power of 2s because that's
13393+
// easy. Then count how many zeros are up to the first bit.
13394+
if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
13395+
unsigned ScaleShift = llvm::countr_zero(MulAmt);
13396+
if (ScaleShift >= 1 && ScaleShift < 4) {
13397+
unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
13398+
SDLoc DL(N);
13399+
SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
13400+
DAG.getConstant(ShiftAmt, DL, VT));
13401+
SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
13402+
DAG.getConstant(ScaleShift, DL, VT));
13403+
return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2);
13404+
}
13405+
}
13406+
return SDValue();
13407+
}
13408+
13409+
13410+
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
13411+
TargetLowering::DAGCombinerInfo &DCI,
13412+
const RISCVSubtarget &Subtarget) {
13413+
EVT VT = N->getValueType(0);
13414+
if (!VT.isVector())
13415+
return expandMul(N, DAG, DCI, Subtarget);
1337013416

1337113417
SDLoc DL(N);
1337213418
SDValue N0 = N->getOperand(0);
@@ -15913,7 +15959,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1591315959
case ISD::MUL:
1591415960
if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
1591515961
return V;
15916-
return performMULCombine(N, DAG);
15962+
return performMULCombine(N, DAG, DCI, Subtarget);
1591715963
case ISD::SDIV:
1591815964
case ISD::UDIV:
1591915965
case ISD::SREM:

llvm/test/CodeGen/RISCV/addimm-mulimm.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -551,8 +551,9 @@ define i64 @add_mul_combine_infinite_loop(i64 %x) {
551551
; RV32IMB-NEXT: sh3add a1, a1, a2
552552
; RV32IMB-NEXT: sh1add a0, a0, a0
553553
; RV32IMB-NEXT: slli a2, a0, 3
554-
; RV32IMB-NEXT: addi a0, a2, 2047
555-
; RV32IMB-NEXT: addi a0, a0, 1
554+
; RV32IMB-NEXT: li a3, 1
555+
; RV32IMB-NEXT: slli a3, a3, 11
556+
; RV32IMB-NEXT: sh3add a0, a0, a3
556557
; RV32IMB-NEXT: sltu a2, a0, a2
557558
; RV32IMB-NEXT: add a1, a1, a2
558559
; RV32IMB-NEXT: ret
@@ -561,8 +562,8 @@ define i64 @add_mul_combine_infinite_loop(i64 %x) {
561562
; RV64IMB: # %bb.0:
562563
; RV64IMB-NEXT: addi a0, a0, 86
563564
; RV64IMB-NEXT: sh1add a0, a0, a0
564-
; RV64IMB-NEXT: li a1, -16
565-
; RV64IMB-NEXT: sh3add a0, a0, a1
565+
; RV64IMB-NEXT: slli a0, a0, 3
566+
; RV64IMB-NEXT: addi a0, a0, -16
566567
; RV64IMB-NEXT: ret
567568
%tmp0 = mul i64 %x, 24
568569
%tmp1 = add i64 %tmp0, 2048

llvm/test/CodeGen/RISCV/rv32zba.ll

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -271,31 +271,49 @@ define i32 @mul288(i32 %a) {
271271
}
272272

273273
define i32 @mul258(i32 %a) {
274-
; CHECK-LABEL: mul258:
275-
; CHECK: # %bb.0:
276-
; CHECK-NEXT: li a1, 258
277-
; CHECK-NEXT: mul a0, a0, a1
278-
; CHECK-NEXT: ret
274+
; RV32I-LABEL: mul258:
275+
; RV32I: # %bb.0:
276+
; RV32I-NEXT: li a1, 258
277+
; RV32I-NEXT: mul a0, a0, a1
278+
; RV32I-NEXT: ret
279+
;
280+
; RV32ZBA-LABEL: mul258:
281+
; RV32ZBA: # %bb.0:
282+
; RV32ZBA-NEXT: slli a1, a0, 8
283+
; RV32ZBA-NEXT: sh1add a0, a0, a1
284+
; RV32ZBA-NEXT: ret
279285
%c = mul i32 %a, 258
280286
ret i32 %c
281287
}
282288

283289
define i32 @mul260(i32 %a) {
284-
; CHECK-LABEL: mul260:
285-
; CHECK: # %bb.0:
286-
; CHECK-NEXT: li a1, 260
287-
; CHECK-NEXT: mul a0, a0, a1
288-
; CHECK-NEXT: ret
290+
; RV32I-LABEL: mul260:
291+
; RV32I: # %bb.0:
292+
; RV32I-NEXT: li a1, 260
293+
; RV32I-NEXT: mul a0, a0, a1
294+
; RV32I-NEXT: ret
295+
;
296+
; RV32ZBA-LABEL: mul260:
297+
; RV32ZBA: # %bb.0:
298+
; RV32ZBA-NEXT: slli a1, a0, 8
299+
; RV32ZBA-NEXT: sh2add a0, a0, a1
300+
; RV32ZBA-NEXT: ret
289301
%c = mul i32 %a, 260
290302
ret i32 %c
291303
}
292304

293305
define i32 @mul264(i32 %a) {
294-
; CHECK-LABEL: mul264:
295-
; CHECK: # %bb.0:
296-
; CHECK-NEXT: li a1, 264
297-
; CHECK-NEXT: mul a0, a0, a1
298-
; CHECK-NEXT: ret
306+
; RV32I-LABEL: mul264:
307+
; RV32I: # %bb.0:
308+
; RV32I-NEXT: li a1, 264
309+
; RV32I-NEXT: mul a0, a0, a1
310+
; RV32I-NEXT: ret
311+
;
312+
; RV32ZBA-LABEL: mul264:
313+
; RV32ZBA: # %bb.0:
314+
; RV32ZBA-NEXT: slli a1, a0, 8
315+
; RV32ZBA-NEXT: sh3add a0, a0, a1
316+
; RV32ZBA-NEXT: ret
299317
%c = mul i32 %a, 264
300318
ret i32 %c
301319
}

llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -811,31 +811,49 @@ define i64 @adduw_imm(i32 signext %0) nounwind {
811811
}
812812

813813
define i64 @mul258(i64 %a) {
814-
; CHECK-LABEL: mul258:
815-
; CHECK: # %bb.0:
816-
; CHECK-NEXT: li a1, 258
817-
; CHECK-NEXT: mul a0, a0, a1
818-
; CHECK-NEXT: ret
814+
; RV64I-LABEL: mul258:
815+
; RV64I: # %bb.0:
816+
; RV64I-NEXT: li a1, 258
817+
; RV64I-NEXT: mul a0, a0, a1
818+
; RV64I-NEXT: ret
819+
;
820+
; RV64ZBA-LABEL: mul258:
821+
; RV64ZBA: # %bb.0:
822+
; RV64ZBA-NEXT: slli a1, a0, 8
823+
; RV64ZBA-NEXT: sh1add a0, a0, a1
824+
; RV64ZBA-NEXT: ret
819825
%c = mul i64 %a, 258
820826
ret i64 %c
821827
}
822828

823829
define i64 @mul260(i64 %a) {
824-
; CHECK-LABEL: mul260:
825-
; CHECK: # %bb.0:
826-
; CHECK-NEXT: li a1, 260
827-
; CHECK-NEXT: mul a0, a0, a1
828-
; CHECK-NEXT: ret
830+
; RV64I-LABEL: mul260:
831+
; RV64I: # %bb.0:
832+
; RV64I-NEXT: li a1, 260
833+
; RV64I-NEXT: mul a0, a0, a1
834+
; RV64I-NEXT: ret
835+
;
836+
; RV64ZBA-LABEL: mul260:
837+
; RV64ZBA: # %bb.0:
838+
; RV64ZBA-NEXT: slli a1, a0, 8
839+
; RV64ZBA-NEXT: sh2add a0, a0, a1
840+
; RV64ZBA-NEXT: ret
829841
%c = mul i64 %a, 260
830842
ret i64 %c
831843
}
832844

833845
define i64 @mul264(i64 %a) {
834-
; CHECK-LABEL: mul264:
835-
; CHECK: # %bb.0:
836-
; CHECK-NEXT: li a1, 264
837-
; CHECK-NEXT: mul a0, a0, a1
838-
; CHECK-NEXT: ret
846+
; RV64I-LABEL: mul264:
847+
; RV64I: # %bb.0:
848+
; RV64I-NEXT: li a1, 264
849+
; RV64I-NEXT: mul a0, a0, a1
850+
; RV64I-NEXT: ret
851+
;
852+
; RV64ZBA-LABEL: mul264:
853+
; RV64ZBA: # %bb.0:
854+
; RV64ZBA-NEXT: slli a1, a0, 8
855+
; RV64ZBA-NEXT: sh3add a0, a0, a1
856+
; RV64ZBA-NEXT: ret
839857
%c = mul i64 %a, 264
840858
ret i64 %c
841859
}

llvm/test/CodeGen/RISCV/rv64zba.ll

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -834,31 +834,49 @@ define i64 @adduw_imm(i32 signext %0) nounwind {
834834
}
835835

836836
define i64 @mul258(i64 %a) {
837-
; CHECK-LABEL: mul258:
838-
; CHECK: # %bb.0:
839-
; CHECK-NEXT: li a1, 258
840-
; CHECK-NEXT: mul a0, a0, a1
841-
; CHECK-NEXT: ret
837+
; RV64I-LABEL: mul258:
838+
; RV64I: # %bb.0:
839+
; RV64I-NEXT: li a1, 258
840+
; RV64I-NEXT: mul a0, a0, a1
841+
; RV64I-NEXT: ret
842+
;
843+
; RV64ZBA-LABEL: mul258:
844+
; RV64ZBA: # %bb.0:
845+
; RV64ZBA-NEXT: slli a1, a0, 8
846+
; RV64ZBA-NEXT: sh1add a0, a0, a1
847+
; RV64ZBA-NEXT: ret
842848
%c = mul i64 %a, 258
843849
ret i64 %c
844850
}
845851

846852
define i64 @mul260(i64 %a) {
847-
; CHECK-LABEL: mul260:
848-
; CHECK: # %bb.0:
849-
; CHECK-NEXT: li a1, 260
850-
; CHECK-NEXT: mul a0, a0, a1
851-
; CHECK-NEXT: ret
853+
; RV64I-LABEL: mul260:
854+
; RV64I: # %bb.0:
855+
; RV64I-NEXT: li a1, 260
856+
; RV64I-NEXT: mul a0, a0, a1
857+
; RV64I-NEXT: ret
858+
;
859+
; RV64ZBA-LABEL: mul260:
860+
; RV64ZBA: # %bb.0:
861+
; RV64ZBA-NEXT: slli a1, a0, 8
862+
; RV64ZBA-NEXT: sh2add a0, a0, a1
863+
; RV64ZBA-NEXT: ret
852864
%c = mul i64 %a, 260
853865
ret i64 %c
854866
}
855867

856868
define i64 @mul264(i64 %a) {
857-
; CHECK-LABEL: mul264:
858-
; CHECK: # %bb.0:
859-
; CHECK-NEXT: li a1, 264
860-
; CHECK-NEXT: mul a0, a0, a1
861-
; CHECK-NEXT: ret
869+
; RV64I-LABEL: mul264:
870+
; RV64I: # %bb.0:
871+
; RV64I-NEXT: li a1, 264
872+
; RV64I-NEXT: mul a0, a0, a1
873+
; RV64I-NEXT: ret
874+
;
875+
; RV64ZBA-LABEL: mul264:
876+
; RV64ZBA: # %bb.0:
877+
; RV64ZBA-NEXT: slli a1, a0, 8
878+
; RV64ZBA-NEXT: sh3add a0, a0, a1
879+
; RV64ZBA-NEXT: ret
862880
%c = mul i64 %a, 264
863881
ret i64 %c
864882
}

0 commit comments

Comments
 (0)