@@ -641,10 +641,6 @@ class InnerLoopVectorizer {
641
641
// / the block that was created for it.
642
642
void sinkScalarOperands (Instruction *PredInst);
643
643
644
- // / Shrinks vector element sizes to the smallest bitwidth they can be legally
645
- // / represented as.
646
- void truncateToMinimalBitwidths (VPTransformState &State);
647
-
648
644
// / Returns (and creates if needed) the trip count of the widened loop.
649
645
Value *getOrCreateVectorTripCount (BasicBlock *InsertBlock);
650
646
@@ -3429,151 +3425,8 @@ static Type *largestIntegerVectorType(Type *T1, Type *T2) {
3429
3425
return I1->getBitWidth () > I2->getBitWidth () ? T1 : T2;
3430
3426
}
3431
3427
3432
- void InnerLoopVectorizer::truncateToMinimalBitwidths (VPTransformState &State) {
3433
- // For every instruction `I` in MinBWs, truncate the operands, create a
3434
- // truncated version of `I` and reextend its result. InstCombine runs
3435
- // later and will remove any ext/trunc pairs.
3436
- SmallPtrSet<Value *, 4 > Erased;
3437
- for (const auto &KV : Cost->getMinimalBitwidths ()) {
3438
- // If the value wasn't vectorized, we must maintain the original scalar
3439
- // type. The absence of the value from State indicates that it
3440
- // wasn't vectorized.
3441
- // FIXME: Should not rely on getVPValue at this point.
3442
- VPValue *Def = State.Plan ->getVPValue (KV.first , true );
3443
- if (!State.hasAnyVectorValue (Def))
3444
- continue ;
3445
- // If the instruction is defined outside the loop, only update the first
3446
- // part; the first part will be re-used for all other parts.
3447
- unsigned UFToUse = OrigLoop->contains (KV.first ) ? UF : 1 ;
3448
- for (unsigned Part = 0 ; Part < UFToUse; ++Part) {
3449
- Value *I = State.get (Def, Part);
3450
- if (Erased.count (I) || I->use_empty () || !isa<Instruction>(I))
3451
- continue ;
3452
- Type *OriginalTy = I->getType ();
3453
- Type *ScalarTruncatedTy =
3454
- IntegerType::get (OriginalTy->getContext (), KV.second );
3455
- auto *TruncatedTy = VectorType::get (
3456
- ScalarTruncatedTy, cast<VectorType>(OriginalTy)->getElementCount ());
3457
- if (TruncatedTy == OriginalTy)
3458
- continue ;
3459
-
3460
- IRBuilder<> B (cast<Instruction>(I));
3461
- auto ShrinkOperand = [&](Value *V) -> Value * {
3462
- if (auto *ZI = dyn_cast<ZExtInst>(V))
3463
- if (ZI->getSrcTy () == TruncatedTy)
3464
- return ZI->getOperand (0 );
3465
- return B.CreateZExtOrTrunc (V, TruncatedTy);
3466
- };
3467
-
3468
- // The actual instruction modification depends on the instruction type,
3469
- // unfortunately.
3470
- Value *NewI = nullptr ;
3471
- if (auto *BO = dyn_cast<BinaryOperator>(I)) {
3472
- Value *Op0 = ShrinkOperand (BO->getOperand (0 ));
3473
- Value *Op1 = ShrinkOperand (BO->getOperand (1 ));
3474
- NewI = B.CreateBinOp (BO->getOpcode (), Op0, Op1);
3475
-
3476
- // Any wrapping introduced by shrinking this operation shouldn't be
3477
- // considered undefined behavior. So, we can't unconditionally copy
3478
- // arithmetic wrapping flags to NewI.
3479
- cast<BinaryOperator>(NewI)->copyIRFlags (I, /* IncludeWrapFlags=*/ false );
3480
- } else if (auto *CI = dyn_cast<ICmpInst>(I)) {
3481
- Value *Op0 = ShrinkOperand (BO->getOperand (0 ));
3482
- Value *Op1 = ShrinkOperand (BO->getOperand (1 ));
3483
- NewI = B.CreateICmp (CI->getPredicate (), Op0, Op1);
3484
- } else if (auto *SI = dyn_cast<SelectInst>(I)) {
3485
- Value *TV = ShrinkOperand (SI->getTrueValue ());
3486
- Value *FV = ShrinkOperand (SI->getFalseValue ());
3487
- NewI = B.CreateSelect (SI->getCondition (), TV, FV);
3488
- } else if (auto *CI = dyn_cast<CastInst>(I)) {
3489
- switch (CI->getOpcode ()) {
3490
- default :
3491
- llvm_unreachable (" Unhandled cast!" );
3492
- case Instruction::Trunc:
3493
- NewI = ShrinkOperand (CI->getOperand (0 ));
3494
- break ;
3495
- case Instruction::SExt:
3496
- NewI = B.CreateSExtOrTrunc (
3497
- CI->getOperand (0 ),
3498
- smallestIntegerVectorType (OriginalTy, TruncatedTy));
3499
- break ;
3500
- case Instruction::ZExt:
3501
- NewI = B.CreateZExtOrTrunc (
3502
- CI->getOperand (0 ),
3503
- smallestIntegerVectorType (OriginalTy, TruncatedTy));
3504
- break ;
3505
- }
3506
- } else if (auto *SI = dyn_cast<ShuffleVectorInst>(I)) {
3507
- auto Elements0 =
3508
- cast<VectorType>(SI->getOperand (0 )->getType ())->getElementCount ();
3509
- auto *O0 = B.CreateZExtOrTrunc (
3510
- SI->getOperand (0 ), VectorType::get (ScalarTruncatedTy, Elements0));
3511
- auto Elements1 =
3512
- cast<VectorType>(SI->getOperand (1 )->getType ())->getElementCount ();
3513
- auto *O1 = B.CreateZExtOrTrunc (
3514
- SI->getOperand (1 ), VectorType::get (ScalarTruncatedTy, Elements1));
3515
-
3516
- NewI = B.CreateShuffleVector (O0, O1, SI->getShuffleMask ());
3517
- } else if (isa<LoadInst>(I) || isa<PHINode>(I)) {
3518
- // Don't do anything with the operands, just extend the result.
3519
- continue ;
3520
- } else if (auto *IE = dyn_cast<InsertElementInst>(I)) {
3521
- auto Elements =
3522
- cast<VectorType>(IE->getOperand (0 )->getType ())->getElementCount ();
3523
- auto *O0 = B.CreateZExtOrTrunc (
3524
- IE->getOperand (0 ), VectorType::get (ScalarTruncatedTy, Elements));
3525
- auto *O1 = B.CreateZExtOrTrunc (IE->getOperand (1 ), ScalarTruncatedTy);
3526
- NewI = B.CreateInsertElement (O0, O1, IE->getOperand (2 ));
3527
- } else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
3528
- auto Elements =
3529
- cast<VectorType>(EE->getOperand (0 )->getType ())->getElementCount ();
3530
- auto *O0 = B.CreateZExtOrTrunc (
3531
- EE->getOperand (0 ), VectorType::get (ScalarTruncatedTy, Elements));
3532
- NewI = B.CreateExtractElement (O0, EE->getOperand (2 ));
3533
- } else {
3534
- // If we don't know what to do, be conservative and don't do anything.
3535
- continue ;
3536
- }
3537
-
3538
- // Lastly, extend the result.
3539
- NewI->takeName (cast<Instruction>(I));
3540
- Value *Res = B.CreateZExtOrTrunc (NewI, OriginalTy);
3541
- I->replaceAllUsesWith (Res);
3542
- cast<Instruction>(I)->eraseFromParent ();
3543
- Erased.insert (I);
3544
- State.reset (Def, Res, Part);
3545
- }
3546
- }
3547
-
3548
- // We'll have created a bunch of ZExts that are now parentless. Clean up.
3549
- for (const auto &KV : Cost->getMinimalBitwidths ()) {
3550
- // If the value wasn't vectorized, we must maintain the original scalar
3551
- // type. The absence of the value from State indicates that it
3552
- // wasn't vectorized.
3553
- // FIXME: Should not rely on getVPValue at this point.
3554
- VPValue *Def = State.Plan ->getVPValue (KV.first , true );
3555
- if (!State.hasAnyVectorValue (Def))
3556
- continue ;
3557
- unsigned UFToUse = OrigLoop->contains (KV.first ) ? UF : 1 ;
3558
- for (unsigned Part = 0 ; Part < UFToUse; ++Part) {
3559
- Value *I = State.get (Def, Part);
3560
- ZExtInst *Inst = dyn_cast<ZExtInst>(I);
3561
- if (Inst && Inst->use_empty ()) {
3562
- Value *NewI = Inst->getOperand (0 );
3563
- Inst->eraseFromParent ();
3564
- State.reset (Def, NewI, Part);
3565
- }
3566
- }
3567
- }
3568
- }
3569
-
3570
3428
void InnerLoopVectorizer::fixVectorizedLoop (VPTransformState &State,
3571
3429
VPlan &Plan) {
3572
- // Insert truncates and extends for any truncated instructions as hints to
3573
- // InstCombine.
3574
- if (VF.isVector ())
3575
- truncateToMinimalBitwidths (State);
3576
-
3577
3430
// Fix widened non-induction PHIs by setting up the PHI operands.
3578
3431
if (EnableVPlanNativePath)
3579
3432
fixNonInductionPHIs (Plan, State);
@@ -8741,6 +8594,9 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
8741
8594
VFRange SubRange = {VF, MaxVFTimes2};
8742
8595
if (auto Plan = tryToBuildVPlanWithVPRecipes (SubRange)) {
8743
8596
// Now optimize the initial VPlan.
8597
+ if (!Plan->hasVF (ElementCount::getFixed (1 )))
8598
+ VPlanTransforms::truncateToMinimalBitwidths (
8599
+ *Plan, CM.getMinimalBitwidths (), PSE.getSE ()->getContext ());
8744
8600
VPlanTransforms::optimize (*Plan, *PSE.getSE ());
8745
8601
assert (VPlanVerifier::verifyPlanIsValid (*Plan) && " VPlan is invalid" );
8746
8602
VPlans.push_back (std::move (Plan));
0 commit comments