@@ -5180,29 +5180,10 @@ static void getPackDemandedElts(EVT VT, const APInt &DemandedElts,
5180
5180
// Split the demanded elts of a HADD/HSUB node between its operands.
5181
5181
static void getHorizDemandedElts(EVT VT, const APInt &DemandedElts,
5182
5182
APInt &DemandedLHS, APInt &DemandedRHS) {
5183
- int NumLanes = VT.getSizeInBits() / 128;
5184
- int NumElts = DemandedElts.getBitWidth();
5185
- int NumEltsPerLane = NumElts / NumLanes;
5186
- int HalfEltsPerLane = NumEltsPerLane / 2;
5187
-
5188
- DemandedLHS = APInt::getZero(NumElts);
5189
- DemandedRHS = APInt::getZero(NumElts);
5190
-
5191
- // Map DemandedElts to the horizontal operands.
5192
- for (int Idx = 0; Idx != NumElts; ++Idx) {
5193
- if (!DemandedElts[Idx])
5194
- continue;
5195
- int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane;
5196
- int LocalIdx = Idx % NumEltsPerLane;
5197
- if (LocalIdx < HalfEltsPerLane) {
5198
- DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 0);
5199
- DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 1);
5200
- } else {
5201
- LocalIdx -= HalfEltsPerLane;
5202
- DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 0);
5203
- DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 1);
5204
- }
5205
- }
5183
+ getHorizDemandedEltsForFirstOperand(VT.getSizeInBits(), DemandedElts,
5184
+ DemandedLHS, DemandedRHS);
5185
+ DemandedLHS |= DemandedLHS << 1;
5186
+ DemandedRHS |= DemandedRHS << 1;
5206
5187
}
5207
5188
5208
5189
/// Calculates the shuffle mask corresponding to the target-specific opcode.
@@ -36953,6 +36934,43 @@ static void computeKnownBitsForPSADBW(SDValue LHS, SDValue RHS,
36953
36934
Known = Known.zext(64);
36954
36935
}
36955
36936
36937
+ static KnownBits computeKnownBitsForHorizontalOperation(
36938
+ const SDValue Op, const APInt &DemandedElts, unsigned Depth,
36939
+ unsigned OpIndexStart, const SelectionDAG &DAG,
36940
+ const function_ref<KnownBits(const KnownBits &, const KnownBits &)>
36941
+ KnownBitsFunc) {
36942
+ APInt DemandedEltsLHS, DemandedEltsRHS;
36943
+ getHorizDemandedEltsForFirstOperand(Op.getValueType().getSizeInBits(),
36944
+ DemandedElts, DemandedEltsLHS,
36945
+ DemandedEltsRHS);
36946
+
36947
+ std::array<KnownBits, 2> KnownLHS;
36948
+ for (unsigned Index = 0; Index < KnownLHS.size(); ++Index) {
36949
+ if (!DemandedEltsLHS.isZero()) {
36950
+ KnownLHS[Index] = DAG.computeKnownBits(Op.getOperand(OpIndexStart),
36951
+ DemandedEltsLHS, Depth + 1);
36952
+ } else {
36953
+ KnownLHS[Index] = KnownBits(Op.getScalarValueSizeInBits());
36954
+ KnownLHS[Index].setAllZero();
36955
+ }
36956
+ DemandedEltsLHS <<= 1;
36957
+ }
36958
+ std::array<KnownBits, 2> KnownRHS;
36959
+ for (unsigned Index = 0; Index < KnownRHS.size(); ++Index) {
36960
+ if (!DemandedEltsRHS.isZero()) {
36961
+ KnownRHS[Index] = DAG.computeKnownBits(Op.getOperand(OpIndexStart + 1),
36962
+ DemandedEltsRHS, Depth + 1);
36963
+ } else {
36964
+ KnownRHS[Index] = KnownBits(Op.getScalarValueSizeInBits());
36965
+ KnownRHS[Index].setAllZero();
36966
+ }
36967
+ DemandedEltsRHS <<= 1;
36968
+ }
36969
+
36970
+ return KnownBitsFunc(KnownLHS[0], KnownLHS[1])
36971
+ .intersectWith(KnownBitsFunc(KnownRHS[0], KnownRHS[1]));
36972
+ }
36973
+
36956
36974
void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
36957
36975
KnownBits &Known,
36958
36976
const APInt &DemandedElts,
@@ -37262,6 +37280,17 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
37262
37280
}
37263
37281
break;
37264
37282
}
37283
+ case X86ISD::HADD:
37284
+ case X86ISD::HSUB: {
37285
+ Known = computeKnownBitsForHorizontalOperation(
37286
+ Op, DemandedElts, Depth, /*OpIndexStart=*/0, DAG,
37287
+ [Opc](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37288
+ return KnownBits::computeForAddSub(
37289
+ /*Add=*/Opc == X86ISD::HADD, /*NSW=*/false, /*NUW=*/false,
37290
+ KnownLHS, KnownRHS);
37291
+ });
37292
+ break;
37293
+ }
37265
37294
case ISD::INTRINSIC_WO_CHAIN: {
37266
37295
switch (Op->getConstantOperandVal(0)) {
37267
37296
case Intrinsic::x86_sse2_psad_bw:
@@ -37276,6 +37305,55 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
37276
37305
computeKnownBitsForPSADBW(LHS, RHS, Known, DemandedElts, DAG, Depth);
37277
37306
break;
37278
37307
}
37308
+ case Intrinsic::x86_ssse3_phadd_d:
37309
+ case Intrinsic::x86_ssse3_phadd_w:
37310
+ case Intrinsic::x86_ssse3_phadd_d_128:
37311
+ case Intrinsic::x86_ssse3_phadd_w_128:
37312
+ case Intrinsic::x86_avx2_phadd_d:
37313
+ case Intrinsic::x86_avx2_phadd_w: {
37314
+ Known = computeKnownBitsForHorizontalOperation(
37315
+ Op, DemandedElts, Depth, /*OpIndexStart=*/1, DAG,
37316
+ [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37317
+ return KnownBits::computeForAddSub(
37318
+ /*Add=*/true, /*NSW=*/false, /*NUW=*/false, KnownLHS, KnownRHS);
37319
+ });
37320
+ break;
37321
+ }
37322
+ case Intrinsic::x86_ssse3_phadd_sw:
37323
+ case Intrinsic::x86_ssse3_phadd_sw_128:
37324
+ case Intrinsic::x86_avx2_phadd_sw: {
37325
+ Known = computeKnownBitsForHorizontalOperation(
37326
+ Op, DemandedElts, Depth, /*OpIndexStart=*/1, DAG,
37327
+ [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37328
+ return KnownBits::sadd_sat(KnownLHS, KnownRHS);
37329
+ });
37330
+ break;
37331
+ }
37332
+ case Intrinsic::x86_ssse3_phsub_d:
37333
+ case Intrinsic::x86_ssse3_phsub_w:
37334
+ case Intrinsic::x86_ssse3_phsub_d_128:
37335
+ case Intrinsic::x86_ssse3_phsub_w_128:
37336
+ case Intrinsic::x86_avx2_phsub_d:
37337
+ case Intrinsic::x86_avx2_phsub_w: {
37338
+ Known = computeKnownBitsForHorizontalOperation(
37339
+ Op, DemandedElts, Depth, /*OpIndexStart=*/1, DAG,
37340
+ [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37341
+ return KnownBits::computeForAddSub(/*Add=*/false, /*NSW=*/false,
37342
+ /*NUW=*/false, KnownLHS,
37343
+ KnownRHS);
37344
+ });
37345
+ break;
37346
+ }
37347
+ case Intrinsic::x86_ssse3_phsub_sw:
37348
+ case Intrinsic::x86_ssse3_phsub_sw_128:
37349
+ case Intrinsic::x86_avx2_phsub_sw: {
37350
+ Known = computeKnownBitsForHorizontalOperation(
37351
+ Op, DemandedElts, Depth, /*OpIndexStart=*/1, DAG,
37352
+ [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
37353
+ return KnownBits::ssub_sat(KnownLHS, KnownRHS);
37354
+ });
37355
+ break;
37356
+ }
37279
37357
}
37280
37358
break;
37281
37359
}
0 commit comments