-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86] Generate kmov
for masking integers
#120593
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 17 commits
Commits
Show all changes
24 commits
Select commit
Hold shift + click to select a range
822ae48
Generate `kmov` for masking integers
abhishek-kaushik22 95c8864
Merge branch 'main' into kmov
abhishek-kaushik22 3f39f65
Review Changes
abhishek-kaushik22 47e9a51
Merge branch 'main' into kmov
abhishek-kaushik22 57c4aa0
Update tests
abhishek-kaushik22 85b9945
Combine to KMOV
abhishek-kaushik22 389c871
Merge branch 'main' into kmov
abhishek-kaushik22 1ae4114
Use getTargetConstantBitsFromNode
abhishek-kaushik22 76e904b
Merge branch 'main' into kmov
abhishek-kaushik22 ca6c246
Update test
abhishek-kaushik22 606d7f6
Update X86ISelLowering.cpp
abhishek-kaushik22 a51f6cb
Merge branch 'main' into kmov
abhishek-kaushik22 abd6a4d
Update X86ISelLowering.cpp
abhishek-kaushik22 1fef3b3
fix reviews
abhishek-kaushik22 cb51265
fix reviews
abhishek-kaushik22 11f9dbb
Update X86ISelLowering.cpp
abhishek-kaushik22 bfc963b
Update X86ISelLowering.cpp
abhishek-kaushik22 e1a9e35
Address review comments
abhishek-kaushik22 e209af1
Merge branch 'main' into kmov
abhishek-kaushik22 b76c86d
Fix tests
abhishek-kaushik22 0feb0a1
Remove basic block name from tests
abhishek-kaushik22 250fc9c
Use getVectorIdxConstant instead of getConstant
abhishek-kaushik22 a8ba133
Merge branch 'main' into kmov
abhishek-kaushik22 60a5c70
Use DAG.getNOT
abhishek-kaushik22 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -55447,6 +55447,95 @@ static SDValue truncateAVX512SetCCNoBWI(EVT VT, EVT OpVT, SDValue LHS, | |
return SDValue(); | ||
} | ||
|
||
// The pattern (setcc (and (broadcast x), (2^n, 2^{n+1}, ...)), (0, 0, ...), | ||
// eq/ne) is generated when using an integer as a mask. Instead of generating a | ||
// broadcast + vptest, we can directly move the integer to a mask register. | ||
static SDValue combineAVX512SetCCToKMOV(EVT VT, SDValue Op0, ISD::CondCode CC, | ||
const SDLoc &DL, SelectionDAG &DAG, | ||
const X86Subtarget &Subtarget) { | ||
if (CC != ISD::SETNE && CC != ISD::SETEQ) | ||
return SDValue(); | ||
|
||
if (!Subtarget.hasAVX512()) | ||
return SDValue(); | ||
|
||
if (Op0.getOpcode() != ISD::AND) | ||
return SDValue(); | ||
|
||
SDValue Broadcast = Op0.getOperand(0); | ||
if (Broadcast.getOpcode() != X86ISD::VBROADCAST && | ||
Broadcast.getOpcode() != X86ISD::VBROADCAST_LOAD) | ||
return SDValue(); | ||
|
||
SDValue Load = Op0.getOperand(1); | ||
EVT LoadVT = Load.getSimpleValueType(); | ||
|
||
APInt UndefElts; | ||
SmallVector<APInt, 32> EltBits; | ||
if (!getTargetConstantBitsFromNode(Load, LoadVT.getScalarSizeInBits(), | ||
UndefElts, EltBits, | ||
/*AllowWholeUndefs*/ true, | ||
/*AllowPartialUndefs*/ false) || | ||
UndefElts[0] || !EltBits[0].isPowerOf2()) | ||
abhishek-kaushik22 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return SDValue(); | ||
|
||
// Check if the constant pool contains only powers of 2 starting from some | ||
// 2^N. The table may also contain undefs because of widening of vector | ||
// operands. | ||
unsigned N = EltBits[0].logBase2(); | ||
unsigned Len = UndefElts.getBitWidth(); | ||
for (unsigned I = 1; I != Len; ++I) { | ||
if (UndefElts[I]) { | ||
if (!UndefElts.extractBits(Len - (I + 1), I + 1).isAllOnes()) | ||
return SDValue(); | ||
break; | ||
} | ||
|
||
if (EltBits[I].getBitWidth() <= N + I || !EltBits[I].isOneBitSet(N + I)) | ||
abhishek-kaushik22 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return SDValue(); | ||
} | ||
|
||
const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | ||
const DataLayout &DataLayout = DAG.getDataLayout(); | ||
MVT VecIdxTy = TLI.getVectorIdxTy(DataLayout); | ||
abhishek-kaushik22 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
MVT BroadcastOpVT = Broadcast.getSimpleValueType().getVectorElementType(); | ||
SDValue BroadcastOp; | ||
abhishek-kaushik22 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (Broadcast.getOpcode() != X86ISD::VBROADCAST) { | ||
BroadcastOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, BroadcastOpVT, | ||
abhishek-kaushik22 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Broadcast, DAG.getConstant(0, DL, VecIdxTy)); | ||
} else { | ||
BroadcastOp = Broadcast.getOperand(0); | ||
abhishek-kaushik22 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (BroadcastOp.getValueType().isVector()) | ||
return SDValue(); | ||
} | ||
|
||
SDValue Masked = BroadcastOp; | ||
if (N != 0) { | ||
unsigned Mask = (1ULL << Len) - 1; | ||
abhishek-kaushik22 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
SDValue ShiftedValue = DAG.getNode(ISD::SRL, DL, BroadcastOpVT, BroadcastOp, | ||
DAG.getConstant(N, DL, BroadcastOpVT)); | ||
Masked = DAG.getNode(ISD::AND, DL, BroadcastOpVT, ShiftedValue, | ||
DAG.getConstant(Mask, DL, BroadcastOpVT)); | ||
abhishek-kaushik22 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
abhishek-kaushik22 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// We can't extract more than 16 bits using this pattern, because 2^{17} will | ||
// not fit in an i16 and a vXi32 where X > 16 is more than 512 bits. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where is the check that VT is not greater than v16i1? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've added the check here: |
||
SDValue Trunc = DAG.getAnyExtOrTrunc(Masked, DL, MVT::i16); | ||
SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, MVT::v16i1, Trunc); | ||
MVT PtrTy = TLI.getPointerTy(DataLayout); | ||
|
||
if (CC == ISD::SETEQ) | ||
Bitcast = | ||
DAG.getNode(ISD::XOR, DL, MVT::v16i1, Bitcast, | ||
DAG.getSplatBuildVector(MVT::v16i1, DL, | ||
DAG.getAllOnesConstant(DL, PtrTy))); | ||
|
||
if (VT != MVT::v16i1) | ||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Bitcast, | ||
DAG.getConstant(0, DL, PtrTy)); | ||
|
||
return Bitcast; | ||
} | ||
|
||
static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, | ||
TargetLowering::DAGCombinerInfo &DCI, | ||
const X86Subtarget &Subtarget) { | ||
|
@@ -55579,6 +55668,11 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, | |
"Unexpected condition code!"); | ||
return Op0.getOperand(0); | ||
} | ||
|
||
if (IsVZero1) | ||
if (SDValue V = | ||
combineAVX512SetCCToKMOV(VT, Op0, TmpCC, DL, DAG, Subtarget)) | ||
return V; | ||
} | ||
|
||
// Try and make unsigned vector comparison signed. On pre AVX512 targets there | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.