Skip to content

Commit 1bca77a

Browse files
RKSimonDanielCChen
authored andcommitted
[X86] combineAndnp - fold ANDN(SEXT(SETCC()),X) -> SELECT(NOT(SETCC()),X,0) on AVX512 targets
Reverse the generic foldVSelectToSignBitSplatMask fold on AVX512 targets where we can use the SETCC result directly in predicated moves/instructions. Fixes llvm#109272
1 parent 5ceb78d commit 1bca77a

File tree

2 files changed

+15
-3
lines changed

2 files changed

+15
-3
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54021,6 +54021,7 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
5402154021
MVT VT = N->getSimpleValueType(0);
5402254022
int NumElts = VT.getVectorNumElements();
5402354023
unsigned EltSizeInBits = VT.getScalarSizeInBits();
54024+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5402454025
SDLoc DL(N);
5402554026

5402654027
// ANDNP(undef, x) -> 0
@@ -54044,6 +54045,18 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
5404454045
if (SDValue Not = IsNOT(N0, DAG))
5404554046
return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
5404654047

54048+
// On AVX512 targets, attempt to reverse foldVSelectToSignBitSplatMask.
54049+
// to make use of predicated selects.
54050+
// ANDN(SEXT(SETCC()),X) -> SELECT(NOT(SETCC()),X,0)
54051+
if (DCI.isAfterLegalizeDAG() && N0.getOpcode() == ISD::SIGN_EXTEND) {
54052+
SDValue Src = N0.getOperand(0);
54053+
EVT SrcVT = Src.getValueType();
54054+
if (Src.getOpcode() == ISD::SETCC && SrcVT.getScalarType() == MVT::i1 &&
54055+
TLI.isTypeLegal(SrcVT) && N0.hasOneUse() && Src.hasOneUse())
54056+
return DAG.getSelect(DL, VT, DAG.getNOT(DL, Src, SrcVT), N1,
54057+
getZeroVector(VT, Subtarget, DAG, DL));
54058+
}
54059+
5404754060
// Constant Folding
5404854061
APInt Undefs0, Undefs1;
5404954062
SmallVector<APInt> EltBits0, EltBits1;
@@ -54110,7 +54123,6 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
5411054123
std::tie(Bits0, Elts0) = GetDemandedMasks(N1);
5411154124
std::tie(Bits1, Elts1) = GetDemandedMasks(N0, true);
5411254125

54113-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5411454126
if (TLI.SimplifyDemandedVectorElts(N0, Elts0, DCI) ||
5411554127
TLI.SimplifyDemandedVectorElts(N1, Elts1, DCI) ||
5411654128
TLI.SimplifyDemandedBits(N0, Bits0, Elts0, DCI) ||

llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,8 @@ define <64 x i8> @combine_vpermi2var_v64i8_with_mask(<64 x i8> %a0, <64 x i8> %a
153153
; CHECK: # %bb.0:
154154
; CHECK-NEXT: vpermt2b %zmm2, %zmm1, %zmm0
155155
; CHECK-NEXT: vpmovb2m %zmm1, %k0
156-
; CHECK-NEXT: vpmovm2b %k0, %zmm1
157-
; CHECK-NEXT: vpandnq %zmm0, %zmm1, %zmm0
156+
; CHECK-NEXT: knotq %k0, %k1
157+
; CHECK-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z}
158158
; CHECK-NEXT: ret{{[l|q]}}
159159
%perm = tail call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %a0, <64 x i8> %a1, <64 x i8> %a2)
160160
%cmp = icmp slt <64 x i8> %a1, zeroinitializer

0 commit comments

Comments
 (0)