@@ -37082,6 +37082,33 @@ static void computeKnownBitsForPSADBW(SDValue LHS, SDValue RHS,
37082
37082
Known = Known.zext(64);
37083
37083
}
37084
37084
37085
+ static void computeKnownBitsForPMADDWD(SDValue LHS, SDValue RHS,
37086
+ KnownBits &Known,
37087
+ const APInt &DemandedElts,
37088
+ const SelectionDAG &DAG,
37089
+ unsigned Depth) {
37090
+ unsigned NumSrcElts = LHS.getValueType().getVectorNumElements();
37091
+
37092
+ // Multiply signed i16 elements to create i32 values and add Lo/Hi pairs.
37093
+ APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
37094
+ APInt DemandedLoElts =
37095
+ DemandedSrcElts & APInt::getSplat(NumSrcElts, APInt(2, 0b01));
37096
+ APInt DemandedHiElts =
37097
+ DemandedSrcElts & APInt::getSplat(NumSrcElts, APInt(2, 0b10));
37098
+ KnownBits LHSLo =
37099
+ DAG.computeKnownBits(LHS, DemandedLoElts, Depth + 1).sext(32);
37100
+ KnownBits LHSHi =
37101
+ DAG.computeKnownBits(LHS, DemandedHiElts, Depth + 1).sext(32);
37102
+ KnownBits RHSLo =
37103
+ DAG.computeKnownBits(RHS, DemandedLoElts, Depth + 1).sext(32);
37104
+ KnownBits RHSHi =
37105
+ DAG.computeKnownBits(RHS, DemandedHiElts, Depth + 1).sext(32);
37106
+ KnownBits Lo = KnownBits::mul(LHSLo, RHSLo);
37107
+ KnownBits Hi = KnownBits::mul(LHSHi, RHSHi);
37108
+ Known = KnownBits::computeForAddSub(/*Add=*/true, /*NSW=*/true,
37109
+ /*NUW=*/false, Lo, Hi);
37110
+ }
37111
+
37085
37112
void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
37086
37113
KnownBits &Known,
37087
37114
const APInt &DemandedElts,
@@ -37257,6 +37284,16 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
37257
37284
}
37258
37285
break;
37259
37286
}
37287
+ case X86ISD::VPMADDWD: {
37288
+ SDValue LHS = Op.getOperand(0);
37289
+ SDValue RHS = Op.getOperand(1);
37290
+ assert(VT.getVectorElementType() == MVT::i32 &&
37291
+ LHS.getValueType() == RHS.getValueType() &&
37292
+ LHS.getValueType().getVectorElementType() == MVT::i16 &&
37293
+ "Unexpected PMADDWD types");
37294
+ computeKnownBitsForPMADDWD(LHS, RHS, Known, DemandedElts, DAG, Depth);
37295
+ break;
37296
+ }
37260
37297
case X86ISD::PMULUDQ: {
37261
37298
KnownBits Known2;
37262
37299
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -37393,6 +37430,18 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
37393
37430
}
37394
37431
case ISD::INTRINSIC_WO_CHAIN: {
37395
37432
switch (Op->getConstantOperandVal(0)) {
37433
+ case Intrinsic::x86_sse2_pmadd_wd:
37434
+ case Intrinsic::x86_avx2_pmadd_wd:
37435
+ case Intrinsic::x86_avx512_pmaddw_d_512: {
37436
+ SDValue LHS = Op.getOperand(1);
37437
+ SDValue RHS = Op.getOperand(2);
37438
+ assert(VT.getScalarType() == MVT::i32 &&
37439
+ LHS.getValueType() == RHS.getValueType() &&
37440
+ LHS.getValueType().getScalarType() == MVT::i16 &&
37441
+ "Unexpected PMADDWD types");
37442
+ computeKnownBitsForPMADDWD(LHS, RHS, Known, DemandedElts, DAG, Depth);
37443
+ break;
37444
+ }
37396
37445
case Intrinsic::x86_sse2_psad_bw:
37397
37446
case Intrinsic::x86_avx2_psad_bw:
37398
37447
case Intrinsic::x86_avx512_psad_bw_512: {
0 commit comments