@@ -5180,29 +5180,10 @@ static void getPackDemandedElts(EVT VT, const APInt &DemandedElts,
5180
5180
// Split the demanded elts of a HADD/HSUB node between its operands.
5181
5181
static void getHorizDemandedElts(EVT VT, const APInt &DemandedElts,
5182
5182
APInt &DemandedLHS, APInt &DemandedRHS) {
5183
- int NumLanes = VT.getSizeInBits() / 128;
5184
- int NumElts = DemandedElts.getBitWidth();
5185
- int NumEltsPerLane = NumElts / NumLanes;
5186
- int HalfEltsPerLane = NumEltsPerLane / 2;
5187
-
5188
- DemandedLHS = APInt::getZero(NumElts);
5189
- DemandedRHS = APInt::getZero(NumElts);
5190
-
5191
- // Map DemandedElts to the horizontal operands.
5192
- for (int Idx = 0; Idx != NumElts; ++Idx) {
5193
- if (!DemandedElts[Idx])
5194
- continue;
5195
- int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane;
5196
- int LocalIdx = Idx % NumEltsPerLane;
5197
- if (LocalIdx < HalfEltsPerLane) {
5198
- DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 0);
5199
- DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 1);
5200
- } else {
5201
- LocalIdx -= HalfEltsPerLane;
5202
- DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 0);
5203
- DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 1);
5204
- }
5205
- }
5183
+ getHorizDemandedEltsForFirstOperand(VT.getSizeInBits(), DemandedElts,
5184
+ DemandedLHS, DemandedRHS);
5185
+ DemandedLHS |= DemandedLHS << 1;
5186
+ DemandedRHS |= DemandedRHS << 1;
5206
5187
}
5207
5188
5208
5189
/// Calculates the shuffle mask corresponding to the target-specific opcode.
@@ -36953,6 +36934,37 @@ static void computeKnownBitsForPSADBW(SDValue LHS, SDValue RHS,
36953
36934
Known = Known.zext(64);
36954
36935
}
36955
36936
36937
+ static KnownBits computeKnownBitsForHorizontalOperation(
36938
+ const SDValue Op, const APInt &DemandedElts, unsigned Depth,
36939
+ unsigned OpIndexStart, const SelectionDAG &DAG,
36940
+ const function_ref<KnownBits(const KnownBits &, const KnownBits &)>
36941
+ KnownBitsFunc) {
36942
+ APInt DemandedEltsLHS, DemandedEltsRHS;
36943
+ getHorizDemandedEltsForFirstOperand(Op.getValueType().getSizeInBits(),
36944
+ DemandedElts, DemandedEltsLHS,
36945
+ DemandedEltsRHS);
36946
+
36947
+ const auto ComputeForSingleOpFunc =
36948
+ [&DAG, Depth, KnownBitsFunc](const SDValue &Op, APInt &DemandedEltsOp) {
36949
+ std::array<KnownBits, 2> Known;
36950
+ for (unsigned Index = 0; Index < Known.size(); ++Index) {
36951
+ Known[Index] = DAG.computeKnownBits(Op, DemandedEltsOp, Depth + 1);
36952
+ DemandedEltsOp <<= 1;
36953
+ }
36954
+ return KnownBitsFunc(Known[0], Known[1]);
36955
+ };
36956
+
36957
+ if (!DemandedEltsLHS.isZero() && !DemandedEltsRHS.isZero()) {
36958
+ return ComputeForSingleOpFunc(Op.getOperand(OpIndexStart), DemandedEltsLHS)
36959
+ .intersectWith(ComputeForSingleOpFunc(Op.getOperand(OpIndexStart + 1),
36960
+ DemandedEltsRHS));
36961
+ }
36962
+ if (!DemandedEltsLHS.isZero()) {
36963
+ return ComputeForSingleOpFunc(Op.getOperand(OpIndexStart), DemandedEltsLHS);
36964
+ }
36965
+ return ComputeForSingleOpFunc(Op.getOperand(OpIndexStart + 1), DemandedEltsRHS);
36966
+ }
36967
+
36956
36968
void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
36957
36969
KnownBits &Known,
36958
36970
const APInt &DemandedElts,
@@ -37262,6 +37274,17 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
37262
37274
}
37263
37275
break;
37264
37276
}
37277
+ case X86ISD::HADD:
37278
+ case X86ISD::HSUB: {
37279
+ Known = computeKnownBitsForHorizontalOperation(
37280
+ Op, DemandedElts, Depth, /*OpIndexStart=*/0, DAG,
37281
+ [Opc](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37282
+ return KnownBits::computeForAddSub(
37283
+ /*Add=*/Opc == X86ISD::HADD, /*NSW=*/false, /*NUW=*/false,
37284
+ KnownLHS, KnownRHS);
37285
+ });
37286
+ break;
37287
+ }
37265
37288
case ISD::INTRINSIC_WO_CHAIN: {
37266
37289
switch (Op->getConstantOperandVal(0)) {
37267
37290
case Intrinsic::x86_sse2_psad_bw:
@@ -37276,6 +37299,55 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
37276
37299
computeKnownBitsForPSADBW(LHS, RHS, Known, DemandedElts, DAG, Depth);
37277
37300
break;
37278
37301
}
37302
+ case Intrinsic::x86_ssse3_phadd_d:
37303
+ case Intrinsic::x86_ssse3_phadd_w:
37304
+ case Intrinsic::x86_ssse3_phadd_d_128:
37305
+ case Intrinsic::x86_ssse3_phadd_w_128:
37306
+ case Intrinsic::x86_avx2_phadd_d:
37307
+ case Intrinsic::x86_avx2_phadd_w: {
37308
+ Known = computeKnownBitsForHorizontalOperation(
37309
+ Op, DemandedElts, Depth, /*OpIndexStart=*/1, DAG,
37310
+ [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37311
+ return KnownBits::computeForAddSub(
37312
+ /*Add=*/true, /*NSW=*/false, /*NUW=*/false, KnownLHS, KnownRHS);
37313
+ });
37314
+ break;
37315
+ }
37316
+ case Intrinsic::x86_ssse3_phadd_sw:
37317
+ case Intrinsic::x86_ssse3_phadd_sw_128:
37318
+ case Intrinsic::x86_avx2_phadd_sw: {
37319
+ Known = computeKnownBitsForHorizontalOperation(
37320
+ Op, DemandedElts, Depth, /*OpIndexStart=*/1, DAG,
37321
+ [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37322
+ return KnownBits::sadd_sat(KnownLHS, KnownRHS);
37323
+ });
37324
+ break;
37325
+ }
37326
+ case Intrinsic::x86_ssse3_phsub_d:
37327
+ case Intrinsic::x86_ssse3_phsub_w:
37328
+ case Intrinsic::x86_ssse3_phsub_d_128:
37329
+ case Intrinsic::x86_ssse3_phsub_w_128:
37330
+ case Intrinsic::x86_avx2_phsub_d:
37331
+ case Intrinsic::x86_avx2_phsub_w: {
37332
+ Known = computeKnownBitsForHorizontalOperation(
37333
+ Op, DemandedElts, Depth, /*OpIndexStart=*/1, DAG,
37334
+ [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37335
+ return KnownBits::computeForAddSub(/*Add=*/false, /*NSW=*/false,
37336
+ /*NUW=*/false, KnownLHS,
37337
+ KnownRHS);
37338
+ });
37339
+ break;
37340
+ }
37341
+ case Intrinsic::x86_ssse3_phsub_sw:
37342
+ case Intrinsic::x86_ssse3_phsub_sw_128:
37343
+ case Intrinsic::x86_avx2_phsub_sw: {
37344
+ Known = computeKnownBitsForHorizontalOperation(
37345
+ Op, DemandedElts, Depth, /*OpIndexStart=*/1, DAG,
37346
+ [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37347
+ return KnownBits::ssub_sat(KnownLHS, KnownRHS);
37348
+ });
37349
+ break;
37350
+ }
37279
37351
}
37280
37352
break;
37281
37353
}
0 commit comments