Skip to content

Commit b20c1ff

Browse files
committed
[X86] combinePTESTCC - remove unnecessary legal vector type assertion
Most of these folds bitcast to the PTEST operand type anyway, and its only relevant for the PTEST->MOVMSK fold, which I'm looking at expanding to attempt to fold to PTEST->TESTP as well. Noticed while beginning triage of Issue llvm#60007
1 parent ebb0f1d commit b20c1ff

File tree

1 file changed

+23
-22
lines changed

1 file changed

+23
-22
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -47321,8 +47321,6 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
4732147321
if (Op0 == Op1) {
4732247322
SDValue BC = peekThroughBitcasts(Op0);
4732347323
EVT BCVT = BC.getValueType();
47324-
assert(BCVT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(BCVT) &&
47325-
"Unexpected vector type");
4732647324

4732747325
// TESTZ(AND(X,Y),AND(X,Y)) == TESTZ(X,Y)
4732847326
if (BC.getOpcode() == ISD::AND || BC.getOpcode() == X86ISD::FAND) {
@@ -47342,29 +47340,32 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
4734247340
// If every element is an all-sign value, see if we can use MOVMSK to
4734347341
// more efficiently extract the sign bits and compare that.
4734447342
// TODO: Handle TESTC with comparison inversion.
47343+
// TODO: When can we use TESTPS/TESTPD instead?
4734547344
// TODO: Can we remove SimplifyMultipleUseDemandedBits and rely on
4734647345
// MOVMSK combines to make sure its never worse than PTEST?
47347-
unsigned EltBits = BCVT.getScalarSizeInBits();
47348-
if (DAG.ComputeNumSignBits(BC) == EltBits) {
47349-
assert(VT == MVT::i32 && "Expected i32 EFLAGS comparison result");
47350-
APInt SignMask = APInt::getSignMask(EltBits);
47351-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
47352-
if (SDValue Res =
47353-
TLI.SimplifyMultipleUseDemandedBits(BC, SignMask, DAG)) {
47354-
// For vXi16 cases we need to use pmovmksb and extract every other
47355-
// sign bit.
47356-
SDLoc DL(EFLAGS);
47357-
if (EltBits == 16) {
47358-
MVT MovmskVT = BCVT.is128BitVector() ? MVT::v16i8 : MVT::v32i8;
47359-
Res = DAG.getBitcast(MovmskVT, Res);
47360-
Res = getPMOVMSKB(DL, Res, DAG, Subtarget);
47361-
Res = DAG.getNode(ISD::AND, DL, MVT::i32, Res,
47362-
DAG.getConstant(0xAAAAAAAA, DL, MVT::i32));
47363-
} else {
47364-
Res = getPMOVMSKB(DL, Res, DAG, Subtarget);
47346+
if (BCVT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(BCVT)) {
47347+
unsigned EltBits = BCVT.getScalarSizeInBits();
47348+
if (DAG.ComputeNumSignBits(BC) == EltBits) {
47349+
assert(VT == MVT::i32 && "Expected i32 EFLAGS comparison result");
47350+
APInt SignMask = APInt::getSignMask(EltBits);
47351+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
47352+
if (SDValue Res =
47353+
TLI.SimplifyMultipleUseDemandedBits(BC, SignMask, DAG)) {
47354+
// For vXi16 cases we need to use pmovmksb and extract every other
47355+
// sign bit.
47356+
SDLoc DL(EFLAGS);
47357+
if (EltBits == 16) {
47358+
MVT MovmskVT = BCVT.is128BitVector() ? MVT::v16i8 : MVT::v32i8;
47359+
Res = DAG.getBitcast(MovmskVT, Res);
47360+
Res = getPMOVMSKB(DL, Res, DAG, Subtarget);
47361+
Res = DAG.getNode(ISD::AND, DL, MVT::i32, Res,
47362+
DAG.getConstant(0xAAAAAAAA, DL, MVT::i32));
47363+
} else {
47364+
Res = getPMOVMSKB(DL, Res, DAG, Subtarget);
47365+
}
47366+
return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Res,
47367+
DAG.getConstant(0, DL, MVT::i32));
4736547368
}
47366-
return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Res,
47367-
DAG.getConstant(0, DL, MVT::i32));
4736847369
}
4736947370
}
4737047371
}

0 commit comments

Comments
 (0)