@@ -5180,29 +5180,10 @@ static void getPackDemandedElts(EVT VT, const APInt &DemandedElts,
5180
5180
// Split the demanded elts of a HADD/HSUB node between its operands.
5181
5181
static void getHorizDemandedElts(EVT VT, const APInt &DemandedElts,
5182
5182
APInt &DemandedLHS, APInt &DemandedRHS) {
5183
- int NumLanes = VT.getSizeInBits() / 128;
5184
- int NumElts = DemandedElts.getBitWidth();
5185
- int NumEltsPerLane = NumElts / NumLanes;
5186
- int HalfEltsPerLane = NumEltsPerLane / 2;
5187
-
5188
- DemandedLHS = APInt::getZero(NumElts);
5189
- DemandedRHS = APInt::getZero(NumElts);
5190
-
5191
- // Map DemandedElts to the horizontal operands.
5192
- for (int Idx = 0; Idx != NumElts; ++Idx) {
5193
- if (!DemandedElts[Idx])
5194
- continue;
5195
- int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane;
5196
- int LocalIdx = Idx % NumEltsPerLane;
5197
- if (LocalIdx < HalfEltsPerLane) {
5198
- DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 0);
5199
- DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 1);
5200
- } else {
5201
- LocalIdx -= HalfEltsPerLane;
5202
- DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 0);
5203
- DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 1);
5204
- }
5205
- }
5183
+ getHorizDemandedEltsForFirstOperand(VT.getSizeInBits(), DemandedElts,
5184
+ DemandedLHS, DemandedRHS);
5185
+ DemandedLHS |= DemandedLHS << 1;
5186
+ DemandedRHS |= DemandedRHS << 1;
5206
5187
}
5207
5188
5208
5189
/// Calculates the shuffle mask corresponding to the target-specific opcode.
@@ -36953,6 +36934,41 @@ static void computeKnownBitsForPSADBW(SDValue LHS, SDValue RHS,
36953
36934
Known = Known.zext(64);
36954
36935
}
36955
36936
36937
+ static KnownBits computeKnownBitsForHorizontalOperation(
36938
+ const SDValue Op, const APInt &DemandedElts, unsigned Depth,
36939
+ unsigned OpIndexStart, const SelectionDAG &DAG,
36940
+ const function_ref<KnownBits(const KnownBits &, const KnownBits &)>
36941
+ KnownBitsFunc) {
36942
+ APInt DemandedEltsLHS, DemandedEltsRHS;
36943
+ getHorizDemandedEltsForFirstOperand(Op.getValueType().getSizeInBits(),
36944
+ DemandedElts, DemandedEltsLHS,
36945
+ DemandedEltsRHS);
36946
+
36947
+ const auto ComputeForSingleOpFunc =
36948
+ [&DAG, Depth](const SDValue &Op, APInt &DemandedEltsOp,
36949
+ std::array<KnownBits, 2> &Known) {
36950
+ for (unsigned Index = 0; Index < Known.size(); ++Index) {
36951
+ if (!DemandedEltsOp.isZero()) {
36952
+ Known[Index] = DAG.computeKnownBits(Op, DemandedEltsOp, Depth + 1);
36953
+ } else {
36954
+ Known[Index] = KnownBits(Op.getScalarValueSizeInBits());
36955
+ Known[Index].setAllZero();
36956
+ }
36957
+ DemandedEltsOp <<= 1;
36958
+ }
36959
+ };
36960
+
36961
+ std::array<KnownBits, 2> KnownLHS;
36962
+ ComputeForSingleOpFunc(Op.getOperand(OpIndexStart), DemandedEltsLHS,
36963
+ KnownLHS);
36964
+ std::array<KnownBits, 2> KnownRHS;
36965
+ ComputeForSingleOpFunc(Op.getOperand(OpIndexStart + 1), DemandedEltsRHS,
36966
+ KnownRHS);
36967
+
36968
+ return KnownBitsFunc(KnownLHS[0], KnownLHS[1])
36969
+ .intersectWith(KnownBitsFunc(KnownRHS[0], KnownRHS[1]));
36970
+ }
36971
+
36956
36972
void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
36957
36973
KnownBits &Known,
36958
36974
const APInt &DemandedElts,
@@ -37262,6 +37278,17 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
37262
37278
}
37263
37279
break;
37264
37280
}
37281
+ case X86ISD::HADD:
37282
+ case X86ISD::HSUB: {
37283
+ Known = computeKnownBitsForHorizontalOperation(
37284
+ Op, DemandedElts, Depth, /*OpIndexStart=*/0, DAG,
37285
+ [Opc](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37286
+ return KnownBits::computeForAddSub(
37287
+ /*Add=*/Opc == X86ISD::HADD, /*NSW=*/false, /*NUW=*/false,
37288
+ KnownLHS, KnownRHS);
37289
+ });
37290
+ break;
37291
+ }
37265
37292
case ISD::INTRINSIC_WO_CHAIN: {
37266
37293
switch (Op->getConstantOperandVal(0)) {
37267
37294
case Intrinsic::x86_sse2_psad_bw:
@@ -37276,6 +37303,55 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
37276
37303
computeKnownBitsForPSADBW(LHS, RHS, Known, DemandedElts, DAG, Depth);
37277
37304
break;
37278
37305
}
37306
+ case Intrinsic::x86_ssse3_phadd_d:
37307
+ case Intrinsic::x86_ssse3_phadd_w:
37308
+ case Intrinsic::x86_ssse3_phadd_d_128:
37309
+ case Intrinsic::x86_ssse3_phadd_w_128:
37310
+ case Intrinsic::x86_avx2_phadd_d:
37311
+ case Intrinsic::x86_avx2_phadd_w: {
37312
+ Known = computeKnownBitsForHorizontalOperation(
37313
+ Op, DemandedElts, Depth, /*OpIndexStart=*/1, DAG,
37314
+ [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37315
+ return KnownBits::computeForAddSub(
37316
+ /*Add=*/true, /*NSW=*/false, /*NUW=*/false, KnownLHS, KnownRHS);
37317
+ });
37318
+ break;
37319
+ }
37320
+ case Intrinsic::x86_ssse3_phadd_sw:
37321
+ case Intrinsic::x86_ssse3_phadd_sw_128:
37322
+ case Intrinsic::x86_avx2_phadd_sw: {
37323
+ Known = computeKnownBitsForHorizontalOperation(
37324
+ Op, DemandedElts, Depth, /*OpIndexStart=*/1, DAG,
37325
+ [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37326
+ return KnownBits::sadd_sat(KnownLHS, KnownRHS);
37327
+ });
37328
+ break;
37329
+ }
37330
+ case Intrinsic::x86_ssse3_phsub_d:
37331
+ case Intrinsic::x86_ssse3_phsub_w:
37332
+ case Intrinsic::x86_ssse3_phsub_d_128:
37333
+ case Intrinsic::x86_ssse3_phsub_w_128:
37334
+ case Intrinsic::x86_avx2_phsub_d:
37335
+ case Intrinsic::x86_avx2_phsub_w: {
37336
+ Known = computeKnownBitsForHorizontalOperation(
37337
+ Op, DemandedElts, Depth, /*OpIndexStart=*/1, DAG,
37338
+ [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37339
+ return KnownBits::computeForAddSub(/*Add=*/false, /*NSW=*/false,
37340
+ /*NUW=*/false, KnownLHS,
37341
+ KnownRHS);
37342
+ });
37343
+ break;
37344
+ }
37345
+ case Intrinsic::x86_ssse3_phsub_sw:
37346
+ case Intrinsic::x86_ssse3_phsub_sw_128:
37347
+ case Intrinsic::x86_avx2_phsub_sw: {
37348
+ Known = computeKnownBitsForHorizontalOperation(
37349
+ Op, DemandedElts, Depth, /*OpIndexStart=*/1, DAG,
37350
+ [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37351
+ return KnownBits::ssub_sat(KnownLHS, KnownRHS);
37352
+ });
37353
+ break;
37354
+ }
37279
37355
}
37280
37356
break;
37281
37357
}
0 commit comments