-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86] Extend combinei64TruncSrlAdd
to handle patterns with or
and xor
#128435
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fe8151f
5574dce
55189cc
d2f27bf
1ba8e32
696f3a5
7caceb1
4388e73
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53733,36 +53733,35 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG, | |
return DAG.getNode(X86ISD::CVTP2SI, DL, VT, Src); | ||
} | ||
|
||
// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to | ||
// (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able | ||
// to avoid generating code with MOVABS and large constants in certain cases. | ||
static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG, | ||
const SDLoc &DL) { | ||
using namespace llvm::SDPatternMatch; | ||
// Attempt to fold some (truncate (srl (add/or/xor X, C1), C2)) patterns to | ||
// (add/or/xor (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we | ||
// are able to avoid generating code with MOVABS and large constants in certain | ||
// cases. | ||
static SDValue combinei64TruncSrlConstant(SDValue N, EVT VT, SelectionDAG &DAG, | ||
const SDLoc &DL) { | ||
|
||
SDValue AddLhs; | ||
APInt AddConst, SrlConst; | ||
if (VT != MVT::i32 || | ||
!sd_match(N, m_AllOf(m_SpecificVT(MVT::i64), | ||
m_Srl(m_OneUse(m_Add(m_Value(AddLhs), | ||
m_ConstInt(AddConst))), | ||
m_ConstInt(SrlConst))))) | ||
return SDValue(); | ||
SDValue Op = N.getOperand(0); | ||
APInt OpConst = Op.getConstantOperandAPInt(1); | ||
APInt SrlConst = N.getConstantOperandAPInt(1); | ||
uint64_t SrlConstVal = SrlConst.getZExtValue(); | ||
unsigned Opcode = Op.getOpcode(); | ||
|
||
if (SrlConst.ule(32) || AddConst.countr_zero() < SrlConst.getZExtValue()) | ||
if (SrlConst.ule(32) || | ||
(Opcode == ISD::ADD && OpConst.countr_zero() < SrlConstVal)) | ||
return SDValue(); | ||
|
||
SDValue AddLHSSrl = | ||
DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1)); | ||
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl); | ||
SDValue OpLhsSrl = | ||
DAG.getNode(ISD::SRL, DL, MVT::i64, Op.getOperand(0), N.getOperand(1)); | ||
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, OpLhsSrl); | ||
|
||
APInt NewAddConstVal = AddConst.lshr(SrlConst).trunc(VT.getSizeInBits()); | ||
SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT); | ||
SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst); | ||
APInt NewOpConstVal = OpConst.lshr(SrlConst).trunc(VT.getSizeInBits()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've added test cases that cover the scenario you mentioned for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right. I mistook it with |
||
SDValue NewOpConst = DAG.getConstant(NewOpConstVal, DL, VT); | ||
SDValue NewOpNode = DAG.getNode(Opcode, DL, VT, Trunc, NewOpConst); | ||
EVT CleanUpVT = EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConstVal); | ||
|
||
EVT CleanUpVT = | ||
EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConst.getZExtValue()); | ||
return DAG.getZeroExtendInReg(NewAddNode, DL, CleanUpVT); | ||
if (Opcode == ISD::ADD) | ||
return DAG.getZeroExtendInReg(NewOpNode, DL, CleanUpVT); | ||
return NewOpNode; | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. APInt::extractBits ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I'm not mistaken, by using |
||
/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify | ||
|
@@ -53810,11 +53809,21 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, | |
if (!Src.hasOneUse()) | ||
return SDValue(); | ||
|
||
if (SDValue R = combinei64TruncSrlAdd(Src, VT, DAG, DL)) | ||
return R; | ||
if (VT == MVT::i32 && SrcVT == MVT::i64 && SrcOpcode == ISD::SRL && | ||
isa<ConstantSDNode>(Src.getOperand(1))) { | ||
|
||
unsigned SrcOpOpcode = Src.getOperand(0).getOpcode(); | ||
if ((SrcOpOpcode != ISD::ADD && SrcOpOpcode != ISD::OR && | ||
SrcOpOpcode != ISD::XOR) || | ||
!isa<ConstantSDNode>(Src.getOperand(0).getOperand(1))) | ||
return SDValue(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it work for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We also have other Binop like min/max etc., so we should add switch check like below. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This exact fold doesn't work for |
||
|
||
if (SDValue R = combinei64TruncSrlConstant(Src, VT, DAG, DL)) | ||
return R; | ||
|
||
return SDValue(); | ||
} | ||
|
||
// Only support vector truncation for now. | ||
// TODO: i64 scalar math would benefit as well. | ||
if (!VT.isVector()) | ||
return SDValue(); | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No need to use switch since we have limit to ADD/OR/XOR in
combineTruncatedArithmetic
.