@@ -111,6 +111,7 @@ class VectorCombine {
111
111
bool scalarizeLoadExtract (Instruction &I);
112
112
bool foldShuffleOfBinops (Instruction &I);
113
113
bool foldShuffleFromReductions (Instruction &I);
114
+ bool foldTruncFromReductions (Instruction &I);
114
115
bool foldSelectShuffle (Instruction &I, bool FromReduction = false );
115
116
116
117
void replaceValue (Value &Old, Value &New) {
@@ -1526,6 +1527,67 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
1526
1527
return foldSelectShuffle (*Shuffle, true );
1527
1528
}
1528
1529
1530
+ // / Determine if its more efficient to fold:
1531
+ // / reduce(trunc(x)) -> trunc(reduce(x)).
1532
+ bool VectorCombine::foldTruncFromReductions (Instruction &I) {
1533
+ auto *II = dyn_cast<IntrinsicInst>(&I);
1534
+ if (!II)
1535
+ return false ;
1536
+
1537
+ unsigned ReductionOpc = 0 ;
1538
+ switch (II->getIntrinsicID ()) {
1539
+ case Intrinsic::vector_reduce_add:
1540
+ ReductionOpc = Instruction::Add;
1541
+ break ;
1542
+ case Intrinsic::vector_reduce_mul:
1543
+ ReductionOpc = Instruction::Mul;
1544
+ break ;
1545
+ case Intrinsic::vector_reduce_and:
1546
+ ReductionOpc = Instruction::And;
1547
+ break ;
1548
+ case Intrinsic::vector_reduce_or:
1549
+ ReductionOpc = Instruction::Or;
1550
+ break ;
1551
+ case Intrinsic::vector_reduce_xor:
1552
+ ReductionOpc = Instruction::Xor;
1553
+ break ;
1554
+ default :
1555
+ return false ;
1556
+ }
1557
+ Value *ReductionSrc = I.getOperand (0 );
1558
+
1559
+ Value *TruncSrc;
1560
+ if (!match (ReductionSrc, m_Trunc (m_OneUse (m_Value (TruncSrc)))))
1561
+ return false ;
1562
+
1563
+ auto *Trunc = cast<CastInst>(ReductionSrc);
1564
+ auto *TruncTy = cast<VectorType>(TruncSrc->getType ());
1565
+ auto *ReductionTy = cast<VectorType>(ReductionSrc->getType ());
1566
+ Type *ResultTy = I.getType ();
1567
+
1568
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1569
+ InstructionCost OldCost =
1570
+ TTI.getCastInstrCost (Instruction::Trunc, ReductionTy, TruncTy,
1571
+ TTI::CastContextHint::None, CostKind, Trunc) +
1572
+ TTI.getArithmeticReductionCost (ReductionOpc, ReductionTy, std::nullopt,
1573
+ CostKind);
1574
+ InstructionCost NewCost =
1575
+ TTI.getArithmeticReductionCost (ReductionOpc, TruncTy, std::nullopt,
1576
+ CostKind) +
1577
+ TTI.getCastInstrCost (Instruction::Trunc, ResultTy,
1578
+ ReductionTy->getScalarType (),
1579
+ TTI::CastContextHint::None, CostKind);
1580
+
1581
+ if (OldCost < NewCost || !NewCost.isValid ())
1582
+ return false ;
1583
+
1584
+ Value *NewReduction = Builder.CreateIntrinsic (
1585
+ TruncTy->getScalarType (), II->getIntrinsicID (), {TruncSrc});
1586
+ Value *NewTruncation = Builder.CreateTrunc (NewReduction, ResultTy);
1587
+ replaceValue (I, *NewTruncation);
1588
+ return true ;
1589
+ }
1590
+
1529
1591
// / This method looks for groups of shuffles acting on binops, of the form:
1530
1592
// / %x = shuffle ...
1531
1593
// / %y = shuffle ...
@@ -1917,6 +1979,7 @@ bool VectorCombine::run() {
1917
1979
switch (Opcode) {
1918
1980
case Instruction::Call:
1919
1981
MadeChange |= foldShuffleFromReductions (I);
1982
+ MadeChange |= foldTruncFromReductions (I);
1920
1983
break ;
1921
1984
case Instruction::ICmp:
1922
1985
case Instruction::FCmp:
0 commit comments