@@ -5180,29 +5180,10 @@ static void getPackDemandedElts(EVT VT, const APInt &DemandedElts,
5180
5180
// Split the demanded elts of a HADD/HSUB node between its operands.
5181
5181
static void getHorizDemandedElts(EVT VT, const APInt &DemandedElts,
5182
5182
APInt &DemandedLHS, APInt &DemandedRHS) {
5183
- int NumLanes = VT.getSizeInBits() / 128;
5184
- int NumElts = DemandedElts.getBitWidth();
5185
- int NumEltsPerLane = NumElts / NumLanes;
5186
- int HalfEltsPerLane = NumEltsPerLane / 2;
5187
-
5188
- DemandedLHS = APInt::getZero(NumElts);
5189
- DemandedRHS = APInt::getZero(NumElts);
5190
-
5191
- // Map DemandedElts to the horizontal operands.
5192
- for (int Idx = 0; Idx != NumElts; ++Idx) {
5193
- if (!DemandedElts[Idx])
5194
- continue;
5195
- int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane;
5196
- int LocalIdx = Idx % NumEltsPerLane;
5197
- if (LocalIdx < HalfEltsPerLane) {
5198
- DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 0);
5199
- DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 1);
5200
- } else {
5201
- LocalIdx -= HalfEltsPerLane;
5202
- DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 0);
5203
- DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 1);
5204
- }
5205
- }
5183
+ getHorizDemandedEltsForFirstOperand(VT.getSizeInBits(), DemandedElts,
5184
+ DemandedLHS, DemandedRHS);
5185
+ DemandedLHS |= DemandedLHS << 1;
5186
+ DemandedRHS |= DemandedRHS << 1;
5206
5187
}
5207
5188
5208
5189
/// Calculates the shuffle mask corresponding to the target-specific opcode.
@@ -36953,6 +36934,34 @@ static void computeKnownBitsForPSADBW(SDValue LHS, SDValue RHS,
36953
36934
Known = Known.zext(64);
36954
36935
}
36955
36936
36937
+ static KnownBits computeKnownBitsForHorizontalOperation(
36938
+ const SDValue Op, const APInt &DemandedElts, unsigned Depth,
36939
+ unsigned OpIndexStart, const SelectionDAG &DAG,
36940
+ const function_ref<KnownBits(const KnownBits &, const KnownBits &)>
36941
+ KnownBitsFunc) {
36942
+ APInt DemandedEltsLHS, DemandedEltsRHS;
36943
+ getHorizDemandedEltsForFirstOperand(Op.getValueType().getSizeInBits(),
36944
+ DemandedElts, DemandedEltsLHS,
36945
+ DemandedEltsRHS);
36946
+
36947
+ const auto ComputeForSingleOpFunc =
36948
+ [&DAG, Depth, KnownBitsFunc](const SDValue &Op, APInt &DemandedEltsOp) {
36949
+ return KnownBitsFunc(
36950
+ DAG.computeKnownBits(Op, DemandedEltsOp, Depth + 1),
36951
+ DAG.computeKnownBits(Op, DemandedEltsOp << 1, Depth + 1));
36952
+ };
36953
+
36954
+ if (!DemandedEltsLHS.isZero() && !DemandedEltsRHS.isZero()) {
36955
+ return ComputeForSingleOpFunc(Op.getOperand(OpIndexStart), DemandedEltsLHS)
36956
+ .intersectWith(ComputeForSingleOpFunc(Op.getOperand(OpIndexStart + 1),
36957
+ DemandedEltsRHS));
36958
+ }
36959
+ if (!DemandedEltsLHS.isZero()) {
36960
+ return ComputeForSingleOpFunc(Op.getOperand(OpIndexStart), DemandedEltsLHS);
36961
+ }
36962
+ return ComputeForSingleOpFunc(Op.getOperand(OpIndexStart + 1), DemandedEltsRHS);
36963
+ }
36964
+
36956
36965
void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
36957
36966
KnownBits &Known,
36958
36967
const APInt &DemandedElts,
@@ -37262,6 +37271,17 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
37262
37271
}
37263
37272
break;
37264
37273
}
37274
+ case X86ISD::HADD:
37275
+ case X86ISD::HSUB: {
37276
+ Known = computeKnownBitsForHorizontalOperation(
37277
+ Op, DemandedElts, Depth, /*OpIndexStart=*/0, DAG,
37278
+ [Opc](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37279
+ return KnownBits::computeForAddSub(
37280
+ /*Add=*/Opc == X86ISD::HADD, /*NSW=*/false, /*NUW=*/false,
37281
+ KnownLHS, KnownRHS);
37282
+ });
37283
+ break;
37284
+ }
37265
37285
case ISD::INTRINSIC_WO_CHAIN: {
37266
37286
switch (Op->getConstantOperandVal(0)) {
37267
37287
case Intrinsic::x86_sse2_psad_bw:
@@ -37276,6 +37296,55 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
37276
37296
computeKnownBitsForPSADBW(LHS, RHS, Known, DemandedElts, DAG, Depth);
37277
37297
break;
37278
37298
}
37299
+ case Intrinsic::x86_ssse3_phadd_d:
37300
+ case Intrinsic::x86_ssse3_phadd_w:
37301
+ case Intrinsic::x86_ssse3_phadd_d_128:
37302
+ case Intrinsic::x86_ssse3_phadd_w_128:
37303
+ case Intrinsic::x86_avx2_phadd_d:
37304
+ case Intrinsic::x86_avx2_phadd_w: {
37305
+ Known = computeKnownBitsForHorizontalOperation(
37306
+ Op, DemandedElts, Depth, /*OpIndexStart=*/1, DAG,
37307
+ [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37308
+ return KnownBits::computeForAddSub(
37309
+ /*Add=*/true, /*NSW=*/false, /*NUW=*/false, KnownLHS, KnownRHS);
37310
+ });
37311
+ break;
37312
+ }
37313
+ case Intrinsic::x86_ssse3_phadd_sw:
37314
+ case Intrinsic::x86_ssse3_phadd_sw_128:
37315
+ case Intrinsic::x86_avx2_phadd_sw: {
37316
+ Known = computeKnownBitsForHorizontalOperation(
37317
+ Op, DemandedElts, Depth, /*OpIndexStart=*/1, DAG,
37318
+ [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37319
+ return KnownBits::sadd_sat(KnownLHS, KnownRHS);
37320
+ });
37321
+ break;
37322
+ }
37323
+ case Intrinsic::x86_ssse3_phsub_d:
37324
+ case Intrinsic::x86_ssse3_phsub_w:
37325
+ case Intrinsic::x86_ssse3_phsub_d_128:
37326
+ case Intrinsic::x86_ssse3_phsub_w_128:
37327
+ case Intrinsic::x86_avx2_phsub_d:
37328
+ case Intrinsic::x86_avx2_phsub_w: {
37329
+ Known = computeKnownBitsForHorizontalOperation(
37330
+ Op, DemandedElts, Depth, /*OpIndexStart=*/1, DAG,
37331
+ [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37332
+ return KnownBits::computeForAddSub(/*Add=*/false, /*NSW=*/false,
37333
+ /*NUW=*/false, KnownLHS,
37334
+ KnownRHS);
37335
+ });
37336
+ break;
37337
+ }
37338
+ case Intrinsic::x86_ssse3_phsub_sw:
37339
+ case Intrinsic::x86_ssse3_phsub_sw_128:
37340
+ case Intrinsic::x86_avx2_phsub_sw: {
37341
+ Known = computeKnownBitsForHorizontalOperation(
37342
+ Op, DemandedElts, Depth, /*OpIndexStart=*/1, DAG,
37343
+ [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37344
+ return KnownBits::ssub_sat(KnownLHS, KnownRHS);
37345
+ });
37346
+ break;
37347
+ }
37279
37348
}
37280
37349
break;
37281
37350
}
0 commit comments