Skip to content

Commit 6ab984f

Browse files
committed
[SelectionDAG] Expand fixed point multiplication into libcall
32-bit ARMv6 with thumb doesn't support MULHS/MUL_LOHI as legal/custom nodes during expansion which will cause fixed point multiplication of _Accum types to fail with fixed point arithmetic. Prior to this, we just happen to use fixed point multiplication on platforms that happen to support these MULHS/MUL_LOHI. This patch attempts to check if the multiplication can be done via libcalls, which are provided by the arm runtime. These libcall attempts are made elsewhere, so this patch refactors that libcall logic into its own functions and the fixed point expansion calls and reuses that logic.
1 parent a551703 commit 6ab984f

File tree

7 files changed

+2031
-104
lines changed

7 files changed

+2031
-104
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5287,6 +5287,23 @@ class TargetLowering : public TargetLoweringBase {
52875287
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow,
52885288
SelectionDAG &DAG) const;
52895289

5290+
/// ForceExpandMUL - Unconditionally expand a MUL into either a libcall or
5291+
/// brute force involving many multiplications. The expansion works by
5292+
/// attempting to do a multiplication on a wider type twice the size of the
5293+
/// original operands. LL and LH represent the lower and upper halves of the
5294+
/// first operand. RL and RH represent the lower and upper halves of the
5295+
/// second operand. The upper and lower halves of the result are stored in Lo
5296+
/// and Hi.
5297+
void ForceExpandMUL(SelectionDAG &DAG, SDLoc dl, bool Signed, EVT WideVT,
5298+
const SDValue LL, const SDValue LH, const SDValue RL,
5299+
const SDValue RH, SDValue &Lo, SDValue &Hi) const;
5300+
5301+
/// Same as above, but creates the upper halves of each operand by
5302+
/// sign/zero-extending the operands.
5303+
void ForceExpandMUL(SelectionDAG &DAG, SDLoc dl, bool Signed,
5304+
const SDValue LHS, const SDValue RHS, SDValue &Lo,
5305+
SDValue &Hi) const;
5306+
52905307
/// Expand a VECREDUCE_* into an explicit calculation. If Count is specified,
52915308
/// only the first Count elements of the vector are used.
52925309
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 9 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -4008,44 +4008,7 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
40084008
LC = RTLIB::MUL_I128;
40094009

40104010
if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
4011-
// We'll expand the multiplication by brute force because we have no other
4012-
// options. This is a trivially-generalized version of the code from
4013-
// Hacker's Delight (itself derived from Knuth's Algorithm M from section
4014-
// 4.3.1).
4015-
unsigned Bits = NVT.getSizeInBits();
4016-
unsigned HalfBits = Bits >> 1;
4017-
SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl,
4018-
NVT);
4019-
SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask);
4020-
SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask);
4021-
4022-
SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL);
4023-
SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);
4024-
4025-
SDValue Shift = DAG.getShiftAmountConstant(HalfBits, NVT, dl);
4026-
SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
4027-
SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
4028-
SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift);
4029-
4030-
SDValue U = DAG.getNode(ISD::ADD, dl, NVT,
4031-
DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TH);
4032-
SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask);
4033-
SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift);
4034-
4035-
SDValue V = DAG.getNode(ISD::ADD, dl, NVT,
4036-
DAG.getNode(ISD::MUL, dl, NVT, LLL, RLH), UL);
4037-
SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift);
4038-
4039-
SDValue W = DAG.getNode(ISD::ADD, dl, NVT,
4040-
DAG.getNode(ISD::MUL, dl, NVT, LLH, RLH),
4041-
DAG.getNode(ISD::ADD, dl, NVT, UH, VH));
4042-
Lo = DAG.getNode(ISD::ADD, dl, NVT, TL,
4043-
DAG.getNode(ISD::SHL, dl, NVT, V, Shift));
4044-
4045-
Hi = DAG.getNode(ISD::ADD, dl, NVT, W,
4046-
DAG.getNode(ISD::ADD, dl, NVT,
4047-
DAG.getNode(ISD::MUL, dl, NVT, RH, LL),
4048-
DAG.getNode(ISD::MUL, dl, NVT, RL, LH)));
4011+
TLI.ForceExpandMUL(DAG, dl, /*Signed=*/true, VT, LL, LH, RL, RH, Lo, Hi);
40494012
return;
40504013
}
40514014

@@ -4146,9 +4109,15 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
41464109
if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG,
41474110
TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
41484111
LL, LH, RL, RH)) {
4149-
report_fatal_error("Unable to expand MUL_FIX using MUL_LOHI.");
4150-
return;
4112+
Result.clear();
4113+
Result.resize(4);
4114+
4115+
SDValue LoTmp, HiTmp;
4116+
TLI.ForceExpandMUL(DAG, dl, Signed, LHS, RHS, LoTmp, HiTmp);
4117+
SplitInteger(LoTmp, Result[0], Result[1]);
4118+
SplitInteger(HiTmp, Result[2], Result[3]);
41514119
}
4120+
assert(Result.size() == 4 && "Unexpected number of partlets in the result");
41524121

41534122
unsigned NVTSize = NVT.getScalarSizeInBits();
41544123
assert((VTSize == NVTSize * 2) && "Expected the new value type to be half "

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 117 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -10149,6 +10149,121 @@ SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
1014910149
return DAG.getSelect(dl, VT, Cond, SatVal, Result);
1015010150
}
1015110151

10152+
void TargetLowering::ForceExpandMUL(SelectionDAG &DAG, SDLoc dl, bool Signed,
10153+
EVT WideVT, const SDValue LL,
10154+
const SDValue LH, const SDValue RL,
10155+
const SDValue RH, SDValue &Lo,
10156+
SDValue &Hi) const {
10157+
// We can fall back to a libcall with an illegal type for the MUL if we
10158+
// have a libcall big enough.
10159+
// Also, we can fall back to a division in some cases, but that's a big
10160+
// performance hit in the general case.
10161+
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10162+
if (WideVT == MVT::i16)
10163+
LC = RTLIB::MUL_I16;
10164+
else if (WideVT == MVT::i32)
10165+
LC = RTLIB::MUL_I32;
10166+
else if (WideVT == MVT::i64)
10167+
LC = RTLIB::MUL_I64;
10168+
else if (WideVT == MVT::i128)
10169+
LC = RTLIB::MUL_I128;
10170+
10171+
if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10172+
// We'll expand the multiplication by brute force because we have no other
10173+
// options. This is a trivially-generalized version of the code from
10174+
// Hacker's Delight (itself derived from Knuth's Algorithm M from section
10175+
// 4.3.1).
10176+
EVT VT = LL.getValueType();
10177+
unsigned Bits = VT.getSizeInBits();
10178+
unsigned HalfBits = Bits >> 1;
10179+
SDValue Mask =
10180+
DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10181+
SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10182+
SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10183+
10184+
SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10185+
SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10186+
10187+
SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10188+
SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10189+
SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10190+
SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10191+
10192+
SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10193+
DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10194+
SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10195+
SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10196+
10197+
SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10198+
DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10199+
SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10200+
10201+
SDValue W =
10202+
DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10203+
DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10204+
Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10205+
DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10206+
10207+
Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10208+
DAG.getNode(ISD::ADD, dl, VT,
10209+
DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10210+
DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10211+
} else {
10212+
// Attempt a libcall.
10213+
SDValue Ret;
10214+
TargetLowering::MakeLibCallOptions CallOptions;
10215+
CallOptions.setSExt(Signed);
10216+
CallOptions.setIsPostTypeLegalization(true);
10217+
if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10218+
// Halves of WideVT are packed into registers in different order
10219+
// depending on platform endianness. This is usually handled by
10220+
// the C calling convention, but we can't defer to it in
10221+
// the legalizer.
10222+
SDValue Args[] = {LL, LH, RL, RH};
10223+
Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10224+
} else {
10225+
SDValue Args[] = {LH, LL, RH, RL};
10226+
Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10227+
}
10228+
assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10229+
"Ret value is a collection of constituent nodes holding result.");
10230+
if (DAG.getDataLayout().isLittleEndian()) {
10231+
// Same as above.
10232+
Lo = Ret.getOperand(0);
10233+
Hi = Ret.getOperand(1);
10234+
} else {
10235+
Lo = Ret.getOperand(1);
10236+
Hi = Ret.getOperand(0);
10237+
}
10238+
}
10239+
}
10240+
10241+
void TargetLowering::ForceExpandMUL(SelectionDAG &DAG, SDLoc dl, bool Signed,
10242+
const SDValue LHS, const SDValue RHS,
10243+
SDValue &Lo, SDValue &Hi) const {
10244+
EVT VT = LHS.getValueType();
10245+
assert(RHS.getValueType() == VT && "Mismatching operand types");
10246+
10247+
SDValue HiLHS;
10248+
SDValue HiRHS;
10249+
if (Signed) {
10250+
// The high part is obtained by SRA'ing all but one of the bits of low
10251+
// part.
10252+
unsigned LoSize = VT.getFixedSizeInBits();
10253+
HiLHS = DAG.getNode(
10254+
ISD::SRA, dl, VT, LHS,
10255+
DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10256+
HiRHS = DAG.getNode(
10257+
ISD::SRA, dl, VT, RHS,
10258+
DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10259+
} else {
10260+
HiLHS = DAG.getConstant(0, dl, VT);
10261+
HiRHS = DAG.getConstant(0, dl, VT);
10262+
}
10263+
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10264+
ForceExpandMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
10265+
}
10266+
1015210267
SDValue
1015310268
TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
1015410269
assert((Node->getOpcode() == ISD::SMULFIX ||
@@ -10223,7 +10338,7 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
1022310338
} else if (VT.isVector()) {
1022410339
return SDValue();
1022510340
} else {
10226-
report_fatal_error("Unable to expand fixed point multiplication.");
10341+
ForceExpandMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
1022710342
}
1022810343

1022910344
if (Scale == VTSize)
@@ -10522,69 +10637,7 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
1052210637
if (VT.isVector())
1052310638
return false;
1052410639

10525-
// We can fall back to a libcall with an illegal type for the MUL if we
10526-
// have a libcall big enough.
10527-
// Also, we can fall back to a division in some cases, but that's a big
10528-
// performance hit in the general case.
10529-
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10530-
if (WideVT == MVT::i16)
10531-
LC = RTLIB::MUL_I16;
10532-
else if (WideVT == MVT::i32)
10533-
LC = RTLIB::MUL_I32;
10534-
else if (WideVT == MVT::i64)
10535-
LC = RTLIB::MUL_I64;
10536-
else if (WideVT == MVT::i128)
10537-
LC = RTLIB::MUL_I128;
10538-
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
10539-
10540-
SDValue HiLHS;
10541-
SDValue HiRHS;
10542-
if (isSigned) {
10543-
// The high part is obtained by SRA'ing all but one of the bits of low
10544-
// part.
10545-
unsigned LoSize = VT.getFixedSizeInBits();
10546-
HiLHS =
10547-
DAG.getNode(ISD::SRA, dl, VT, LHS,
10548-
DAG.getConstant(LoSize - 1, dl,
10549-
getPointerTy(DAG.getDataLayout())));
10550-
HiRHS =
10551-
DAG.getNode(ISD::SRA, dl, VT, RHS,
10552-
DAG.getConstant(LoSize - 1, dl,
10553-
getPointerTy(DAG.getDataLayout())));
10554-
} else {
10555-
HiLHS = DAG.getConstant(0, dl, VT);
10556-
HiRHS = DAG.getConstant(0, dl, VT);
10557-
}
10558-
10559-
// Here we're passing the 2 arguments explicitly as 4 arguments that are
10560-
// pre-lowered to the correct types. This all depends upon WideVT not
10561-
// being a legal type for the architecture and thus has to be split to
10562-
// two arguments.
10563-
SDValue Ret;
10564-
TargetLowering::MakeLibCallOptions CallOptions;
10565-
CallOptions.setSExt(isSigned);
10566-
CallOptions.setIsPostTypeLegalization(true);
10567-
if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10568-
// Halves of WideVT are packed into registers in different order
10569-
// depending on platform endianness. This is usually handled by
10570-
// the C calling convention, but we can't defer to it in
10571-
// the legalizer.
10572-
SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
10573-
Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10574-
} else {
10575-
SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
10576-
Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10577-
}
10578-
assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10579-
"Ret value is a collection of constituent nodes holding result.");
10580-
if (DAG.getDataLayout().isLittleEndian()) {
10581-
// Same as above.
10582-
BottomHalf = Ret.getOperand(0);
10583-
TopHalf = Ret.getOperand(1);
10584-
} else {
10585-
BottomHalf = Ret.getOperand(1);
10586-
TopHalf = Ret.getOperand(0);
10587-
}
10640+
ForceExpandMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
1058810641
}
1058910642

1059010643
Result = BottomHalf;

0 commit comments

Comments
 (0)