@@ -552,7 +552,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
552
552
case VPRecipeBase::VPWidenIntOrFpInductionSC:
553
553
case VPRecipeBase::VPWidenPointerInductionSC:
554
554
case VPRecipeBase::VPReductionPHISC:
555
- case VPRecipeBase::VPPartialReductionSC:
556
555
return true ;
557
556
case VPRecipeBase::VPBranchOnMaskSC:
558
557
case VPRecipeBase::VPInterleaveSC:
@@ -2182,34 +2181,37 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
2182
2181
// / Descriptor for the reduction.
2183
2182
const RecurrenceDescriptor &RdxDesc;
2184
2183
2185
- // / The phi is part of an in-loop reduction.
2186
- bool IsInLoop;
2187
-
2188
2184
// / The phi is part of an ordered reduction. Requires IsInLoop to be true.
2189
2185
bool IsOrdered;
2190
2186
2191
- // / When expanding the reduction PHI, the plan's VF element count is divided
2192
- // / by this factor to form the reduction phi's VF.
2193
- unsigned VFScaleFactor = 1 ;
2187
+ // / The scaling factor, relative to the VF, that this recipe's output is
2188
+ // / divided by.
2189
+ // / For outer-loop reductions this is equal to 1.
2190
+ // / For in-loop reductions this is equal to 0, to specify that this is equal
2191
+ // / to the VF (which may not be known yet). For partial-reductions this is
2192
+ // / equal to another scalar value.
2193
+ unsigned VFScaleFactor;
2194
2194
2195
2195
public:
2196
2196
// / Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
2197
2197
// / RdxDesc.
2198
2198
VPReductionPHIRecipe (PHINode *Phi, const RecurrenceDescriptor &RdxDesc,
2199
- VPValue &Start, bool IsInLoop = false ,
2200
- bool IsOrdered = false , unsigned VFScaleFactor = 1 )
2199
+ VPValue &Start, bool IsOrdered = false ,
2200
+ unsigned VFScaleFactor = 1 )
2201
2201
: VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
2202
- RdxDesc (RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered),
2203
- VFScaleFactor(VFScaleFactor) {
2204
- assert ((!IsOrdered || IsInLoop) && " IsOrdered requires IsInLoop" );
2202
+ RdxDesc (RdxDesc), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
2203
+ assert ((!IsOrdered || isInLoop ()) &&
2204
+ " IsOrdered requires the reduction to be in-loop" );
2205
+ assert (((!isInLoop () && !IsOrdered) || isInLoop ()) &&
2206
+ " Invalid VFScaleFactor" );
2205
2207
}
2206
2208
2207
2209
~VPReductionPHIRecipe () override = default ;
2208
2210
2209
2211
VPReductionPHIRecipe *clone () override {
2210
- auto *R = new VPReductionPHIRecipe (cast<PHINode>( getUnderlyingInstr ()),
2211
- RdxDesc, * getOperand ( 0 ), IsInLoop ,
2212
- IsOrdered, VFScaleFactor);
2212
+ auto *R =
2213
+ new VPReductionPHIRecipe (cast<PHINode>( getUnderlyingInstr ()), RdxDesc ,
2214
+ * getOperand ( 0 ), IsOrdered, VFScaleFactor);
2213
2215
R->addOperand (getBackedgeValue ());
2214
2216
return R;
2215
2217
}
@@ -2235,8 +2237,10 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
2235
2237
// / Returns true, if the phi is part of an ordered reduction.
2236
2238
bool isOrdered () const { return IsOrdered; }
2237
2239
2238
- // / Returns true, if the phi is part of an in-loop reduction.
2239
- bool isInLoop () const { return IsInLoop; }
2240
+ // / Returns true if the phi is part of an in-loop reduction.
2241
+ bool isInLoop () const { return VFScaleFactor == 0 ; }
2242
+
2243
+ bool isPartialReduction () const { return VFScaleFactor > 1 ; }
2240
2244
2241
2245
// / Returns true if the recipe only uses the first lane of operand \p Op.
2242
2246
bool onlyFirstLaneUsed (const VPValue *Op) const override {
@@ -2409,23 +2413,32 @@ class VPInterleaveRecipe : public VPRecipeBase {
2409
2413
Instruction *getInsertPos () const { return IG->getInsertPos (); }
2410
2414
};
2411
2415
2412
- // / A recipe to represent inloop reduction operations, performing a reduction on
2413
- // / a vector operand into a scalar value, and adding the result to a chain.
2414
- // / The Operands are {ChainOp, VecOp, [Condition]}.
2416
+ // / A recipe to represent inloop, ordered or partial reduction operations. It
2417
+ // / performs a reduction on a vector operand into a scalar (vector in the case
2418
+ // / of a partial reduction) value, and adds the result to a chain. The Operands
2419
+ // / are {ChainOp, VecOp, [Condition]}.
2415
2420
class VPReductionRecipe : public VPRecipeWithIRFlags {
2416
2421
// / The recurrence kind for the reduction in question.
2417
2422
RecurKind RdxKind;
2418
2423
bool IsOrdered;
2419
2424
// / Whether the reduction is conditional.
2420
2425
bool IsConditional = false ;
2426
+ // / The scaling factor, relative to the VF, that this recipe's output is
2427
+ // / divided by.
2428
+ // / For outer-loop reductions this is equal to 1.
2429
+ // / For in-loop reductions this is equal to 0, to specify that this is equal
2430
+ // / to the VF (which may not be known yet).
2431
+ // / For partial-reductions this is equal to another scalar value.
2432
+ unsigned VFScaleFactor;
2421
2433
2422
2434
protected:
2423
2435
VPReductionRecipe (const unsigned char SC, RecurKind RdxKind,
2424
2436
FastMathFlags FMFs, Instruction *I,
2425
2437
ArrayRef<VPValue *> Operands, VPValue *CondOp,
2426
- bool IsOrdered, DebugLoc DL)
2438
+ bool IsOrdered, unsigned VFScaleFactor, DebugLoc DL)
2427
2439
: VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2428
- IsOrdered (IsOrdered) {
2440
+ IsOrdered (IsOrdered), VFScaleFactor(VFScaleFactor) {
2441
+ assert ((!IsOrdered || VFScaleFactor == 0 ) && " Invalid scale factor" );
2429
2442
if (CondOp) {
2430
2443
IsConditional = true ;
2431
2444
addOperand (CondOp);
@@ -2436,24 +2449,24 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
2436
2449
public:
2437
2450
VPReductionRecipe (RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
2438
2451
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2439
- bool IsOrdered, DebugLoc DL = {})
2452
+ bool IsOrdered, unsigned VFScaleFactor, DebugLoc DL = {})
2440
2453
: VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
2441
2454
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2442
- IsOrdered, DL) {}
2455
+ IsOrdered, VFScaleFactor, DL) {}
2443
2456
2444
2457
VPReductionRecipe (const RecurKind RdxKind, FastMathFlags FMFs,
2445
2458
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2446
- bool IsOrdered, DebugLoc DL = {})
2459
+ bool IsOrdered, unsigned VFScaleFactor, DebugLoc DL = {})
2447
2460
: VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr ,
2448
2461
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2449
- IsOrdered, DL) {}
2462
+ IsOrdered, VFScaleFactor, DL) {}
2450
2463
2451
2464
~VPReductionRecipe () override = default ;
2452
2465
2453
2466
VPReductionRecipe *clone () override {
2454
- return new VPReductionRecipe (RdxKind, getFastMathFlags (),
2455
- getUnderlyingInstr (), getChainOp (), getVecOp (),
2456
- getCondOp (), IsOrdered, getDebugLoc ());
2467
+ return new VPReductionRecipe (
2468
+ RdxKind, getFastMathFlags (), getUnderlyingInstr (), getChainOp (),
2469
+ getVecOp (), getCondOp (), IsOrdered, VFScaleFactor , getDebugLoc ());
2457
2470
}
2458
2471
2459
2472
static inline bool classof (const VPRecipeBase *R) {
@@ -2485,6 +2498,8 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
2485
2498
bool isOrdered () const { return IsOrdered; };
2486
2499
// / Return true if the in-loop reduction is conditional.
2487
2500
bool isConditional () const { return IsConditional; };
2501
+ // / Return true if the reduction is a partial reduction.
2502
+ bool isPartialReduction () const { return VFScaleFactor > 1 ; }
2488
2503
// / The VPValue of the scalar Chain being accumulated.
2489
2504
VPValue *getChainOp () const { return getOperand (0 ); }
2490
2505
// / The VPValue of the vector value to be reduced.
@@ -2493,65 +2508,8 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
2493
2508
VPValue *getCondOp () const {
2494
2509
return isConditional () ? getOperand (getNumOperands () - 1 ) : nullptr ;
2495
2510
}
2496
- };
2497
-
2498
- // / A recipe for forming partial reductions. In the loop, an accumulator and
2499
- // / vector operand are added together and passed to the next iteration as the
2500
- // / next accumulator. After the loop body, the accumulator is reduced to a
2501
- // / scalar value.
2502
- class VPPartialReductionRecipe : public VPReductionRecipe {
2503
- unsigned Opcode;
2504
-
2505
- // / The divisor by which the VF of this recipe's output should be divided
2506
- // / during execution.
2507
- unsigned VFScaleFactor;
2508
-
2509
- public:
2510
- VPPartialReductionRecipe (Instruction *ReductionInst, VPValue *Op0,
2511
- VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
2512
- : VPPartialReductionRecipe(ReductionInst->getOpcode (), Op0, Op1, Cond,
2513
- VFScaleFactor, ReductionInst) {}
2514
- VPPartialReductionRecipe (unsigned Opcode, VPValue *Op0, VPValue *Op1,
2515
- VPValue *Cond, unsigned ScaleFactor,
2516
- Instruction *ReductionInst = nullptr )
2517
- : VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add,
2518
- FastMathFlags (), ReductionInst,
2519
- ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}),
2520
- Opcode(Opcode), VFScaleFactor(ScaleFactor) {
2521
- [[maybe_unused]] auto *AccumulatorRecipe =
2522
- getChainOp ()->getDefiningRecipe ();
2523
- assert ((isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
2524
- isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
2525
- " Unexpected operand order for partial reduction recipe" );
2526
- }
2527
- ~VPPartialReductionRecipe () override = default ;
2528
-
2529
- VPPartialReductionRecipe *clone () override {
2530
- return new VPPartialReductionRecipe (Opcode, getOperand (0 ), getOperand (1 ),
2531
- getCondOp (), VFScaleFactor,
2532
- getUnderlyingInstr ());
2533
- }
2534
-
2535
- VP_CLASSOF_IMPL (VPDef::VPPartialReductionSC)
2536
-
2537
- // / Generate the reduction in the loop.
2538
- void execute(VPTransformState &State) override ;
2539
-
2540
- // / Return the cost of this VPPartialReductionRecipe.
2541
- InstructionCost computeCost (ElementCount VF,
2542
- VPCostContext &Ctx) const override ;
2543
-
2544
- // / Get the binary op's opcode.
2545
- unsigned getOpcode () const { return Opcode; }
2546
-
2547
2511
// / Get the factor that the VF of this recipe's output should be scaled by.
2548
2512
unsigned getVFScaleFactor () const { return VFScaleFactor; }
2549
-
2550
- #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2551
- // / Print the recipe.
2552
- void print (raw_ostream &O, const Twine &Indent,
2553
- VPSlotTracker &SlotTracker) const override ;
2554
- #endif
2555
2513
};
2556
2514
2557
2515
// / A recipe to represent inloop reduction operations with vector-predication
@@ -2567,7 +2525,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
2567
2525
R.getFastMathFlags(),
2568
2526
cast_or_null<Instruction>(R.getUnderlyingValue()),
2569
2527
ArrayRef<VPValue *>({R.getChainOp (), R.getVecOp (), &EVL}), CondOp,
2570
- R.isOrdered(), DL) {}
2528
+ R.isOrdered(), 0 , DL) {}
2571
2529
2572
2530
~VPReductionEVLRecipe () override = default ;
2573
2531
@@ -2768,6 +2726,11 @@ class VPSingleDefBundleRecipe : public VPSingleDefRecipe {
2768
2726
VPWidenRecipe *Mul, VPReductionRecipe *Red)
2769
2727
: VPSingleDefBundleRecipe(BundleTypes::ExtMulAccumulateReduction,
2770
2728
{Ext0, Ext1, Mul, Red}) {}
2729
+ VPSingleDefBundleRecipe (VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
2730
+ VPWidenRecipe *Mul, VPWidenRecipe *Sub,
2731
+ VPReductionRecipe *Red)
2732
+ : VPSingleDefBundleRecipe(BundleTypes::ExtMulAccumulateReduction,
2733
+ {Ext0, Ext1, Mul, Sub, Red}) {}
2771
2734
2772
2735
~VPSingleDefBundleRecipe () override {
2773
2736
SmallPtrSet<VPRecipeBase *, 4 > Seen;
0 commit comments