Skip to content

Commit 4bcd818

Browse files
authored
[TargetLowering] Pull similar code out of the forceExpandWideMUL into a helper. NFC (#124371)
These functions have similar code. One of them calculates the 2x width full product from 2 sources. The other calculates the product from 2 sources that have low and high halves. This patch introduces a new function that takes HiLHS and HiRHS as optional values. If they are not null, they will be used in the calculation of the Hi half. The Signed flag can only be set when HiLHS/HiRHS are null.
1 parent 2655ae5 commit 4bcd818

File tree

2 files changed

+108
-87
lines changed

2 files changed

+108
-87
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5503,6 +5503,15 @@ class TargetLowering : public TargetLoweringBase {
55035503
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow,
55045504
SelectionDAG &DAG) const;
55055505

5506+
/// Calculate the product twice the width of LHS and RHS. If HiLHS/HiRHS are
5507+
/// non-null they will be included in the multiplication. The expansion works
5508+
/// by splitting the 2 inputs into 4 pieces that we can multiply and add
5509+
/// together without neding MULH or MUL_LOHI.
5510+
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed,
5511+
SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS,
5512+
SDValue HiLHS = SDValue(),
5513+
SDValue HiRHS = SDValue()) const;
5514+
55065515
/// forceExpandWideMUL - Unconditionally expand a MUL into either a libcall or
55075516
/// brute force via a wide multiplication. The expansion works by
55085517
/// attempting to do a multiplication on a wider type twice the size of the

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 99 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -10857,6 +10857,64 @@ SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
1085710857
return DAG.getSelect(dl, VT, Cond, SatVal, Result);
1085810858
}
1085910859

10860+
void TargetLowering::forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl,
10861+
bool Signed, SDValue &Lo, SDValue &Hi,
10862+
SDValue LHS, SDValue RHS,
10863+
SDValue HiLHS, SDValue HiRHS) const {
10864+
EVT VT = LHS.getValueType();
10865+
assert(RHS.getValueType() == VT && "Mismatching operand types");
10866+
10867+
assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
10868+
assert((!Signed || !HiLHS) &&
10869+
"Signed flag should only be set when HiLHS and RiRHS are null");
10870+
10871+
// We'll expand the multiplication by brute force because we have no other
10872+
// options. This is a trivially-generalized version of the code from
10873+
// Hacker's Delight (itself derived from Knuth's Algorithm M from section
10874+
// 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
10875+
// sign bits while calculating the Hi half.
10876+
unsigned Bits = VT.getSizeInBits();
10877+
unsigned HalfBits = Bits / 2;
10878+
SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10879+
SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
10880+
SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
10881+
10882+
SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
10883+
SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10884+
10885+
SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10886+
// This is always an unsigned shift.
10887+
SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10888+
10889+
unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
10890+
SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
10891+
SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
10892+
10893+
SDValue U =
10894+
DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
10895+
SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10896+
SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
10897+
10898+
SDValue V =
10899+
DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
10900+
SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
10901+
10902+
Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10903+
DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10904+
10905+
Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
10906+
DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10907+
10908+
// If HiLHS and HiRHS are set, multiply them by the opposite low part and add
10909+
// the products to Hi.
10910+
if (HiLHS) {
10911+
Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
10912+
DAG.getNode(ISD::ADD, dl, VT,
10913+
DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS),
10914+
DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS)));
10915+
}
10916+
}
10917+
1086010918
void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
1086110919
bool Signed, EVT WideVT,
1086210920
const SDValue LL, const SDValue LH,
@@ -10877,45 +10935,7 @@ void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
1087710935
LC = RTLIB::MUL_I128;
1087810936

1087910937
if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10880-
// We'll expand the multiplication by brute force because we have no other
10881-
// options. This is a trivially-generalized version of the code from
10882-
// Hacker's Delight (itself derived from Knuth's Algorithm M from section
10883-
// 4.3.1).
10884-
EVT VT = LL.getValueType();
10885-
unsigned Bits = VT.getSizeInBits();
10886-
unsigned HalfBits = Bits >> 1;
10887-
SDValue Mask =
10888-
DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10889-
SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10890-
SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10891-
10892-
SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10893-
SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10894-
10895-
SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10896-
SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10897-
SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10898-
SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10899-
10900-
SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10901-
DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10902-
SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10903-
SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10904-
10905-
SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10906-
DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10907-
SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10908-
10909-
SDValue W =
10910-
DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10911-
DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10912-
Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10913-
DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10914-
10915-
Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10916-
DAG.getNode(ISD::ADD, dl, VT,
10917-
DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10918-
DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10938+
forceExpandMultiply(DAG, dl, /*Signed=*/false, Lo, Hi, LL, RL, LH, RH);
1091910939
} else {
1092010940
// Attempt a libcall.
1092110941
SDValue Ret;
@@ -10965,58 +10985,50 @@ void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
1096510985
else if (WideVT == MVT::i128)
1096610986
LC = RTLIB::MUL_I128;
1096710987

10968-
if (LC != RTLIB::UNKNOWN_LIBCALL && getLibcallName(LC)) {
10969-
SDValue HiLHS, HiRHS;
10970-
if (Signed) {
10971-
// The high part is obtained by SRA'ing all but one of the bits of low
10972-
// part.
10973-
unsigned LoSize = VT.getFixedSizeInBits();
10974-
SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
10975-
HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
10976-
HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
10977-
} else {
10978-
HiLHS = DAG.getConstant(0, dl, VT);
10979-
HiRHS = DAG.getConstant(0, dl, VT);
10980-
}
10981-
forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
10988+
if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10989+
forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
1098210990
return;
1098310991
}
1098410992

10985-
// Expand the multiplication by brute force. This is a generalized-version of
10986-
// the code from Hacker's Delight (itself derived from Knuth's Algorithm M
10987-
// from section 4.3.1) combined with the Hacker's delight code
10988-
// for calculating mulhs.
10989-
unsigned Bits = VT.getSizeInBits();
10990-
unsigned HalfBits = Bits / 2;
10991-
SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10992-
SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
10993-
SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
10994-
10995-
SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
10996-
SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10997-
10998-
SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10999-
// This is always an unsigned shift.
11000-
SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
11001-
11002-
unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
11003-
SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
11004-
SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
11005-
11006-
SDValue U =
11007-
DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
11008-
SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
11009-
SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
11010-
11011-
SDValue V =
11012-
DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
11013-
SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
11014-
11015-
Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
11016-
DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10993+
SDValue HiLHS, HiRHS;
10994+
if (Signed) {
10995+
// The high part is obtained by SRA'ing all but one of the bits of low
10996+
// part.
10997+
unsigned LoSize = VT.getFixedSizeInBits();
10998+
SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
10999+
HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
11000+
HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
11001+
} else {
11002+
HiLHS = DAG.getConstant(0, dl, VT);
11003+
HiRHS = DAG.getConstant(0, dl, VT);
11004+
}
1101711005

11018-
Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
11019-
DAG.getNode(ISD::ADD, dl, VT, UH, VH));
11006+
// Attempt a libcall.
11007+
SDValue Ret;
11008+
TargetLowering::MakeLibCallOptions CallOptions;
11009+
CallOptions.setIsSigned(Signed);
11010+
CallOptions.setIsPostTypeLegalization(true);
11011+
if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
11012+
// Halves of WideVT are packed into registers in different order
11013+
// depending on platform endianness. This is usually handled by
11014+
// the C calling convention, but we can't defer to it in
11015+
// the legalizer.
11016+
SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
11017+
Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11018+
} else {
11019+
SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
11020+
Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11021+
}
11022+
assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
11023+
"Ret value is a collection of constituent nodes holding result.");
11024+
if (DAG.getDataLayout().isLittleEndian()) {
11025+
// Same as above.
11026+
Lo = Ret.getOperand(0);
11027+
Hi = Ret.getOperand(1);
11028+
} else {
11029+
Lo = Ret.getOperand(1);
11030+
Hi = Ret.getOperand(0);
11031+
}
1102011032
}
1102111033

1102211034
SDValue

0 commit comments

Comments
 (0)