@@ -8095,7 +8095,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB) {
8095
8095
BlockMaskCache[BB] = BlockMask;
8096
8096
}
8097
8097
8098
- VPWidenMemoryInstructionRecipe *
8098
+ VPWidenMemoryRecipe *
8099
8099
VPRecipeBuilder::tryToWidenMemory (Instruction *I, ArrayRef<VPValue *> Operands,
8100
8100
VFRange &Range) {
8101
8101
assert ((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
@@ -8140,12 +8140,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
8140
8140
Ptr = VectorPtr;
8141
8141
}
8142
8142
if (LoadInst *Load = dyn_cast<LoadInst>(I))
8143
- return new VPWidenMemoryInstructionRecipe (*Load, Ptr, Mask, Consecutive,
8144
- Reverse, I->getDebugLoc ());
8143
+ return new VPWidenLoadRecipe (*Load, Ptr, Mask, Consecutive, Reverse ,
8144
+ I->getDebugLoc ());
8145
8145
8146
8146
StoreInst *Store = cast<StoreInst>(I);
8147
- return new VPWidenMemoryInstructionRecipe (
8148
- *Store, Ptr, Operands[ 0 ], Mask, Consecutive, Reverse, I->getDebugLoc ());
8147
+ return new VPWidenStoreRecipe (*Store, Operands[ 0 ], Ptr, Mask, Consecutive,
8148
+ Reverse, I->getDebugLoc ());
8149
8149
}
8150
8150
8151
8151
// / Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
@@ -8780,13 +8780,12 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
8780
8780
// for this VPlan, replace the Recipes widening its memory instructions with a
8781
8781
// single VPInterleaveRecipe at its insertion point.
8782
8782
for (const auto *IG : InterleaveGroups) {
8783
- auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
8784
- RecipeBuilder.getRecipe (IG->getInsertPos ()));
8783
+ auto *Recipe =
8784
+ cast<VPWidenMemoryRecipe>( RecipeBuilder.getRecipe (IG->getInsertPos ()));
8785
8785
SmallVector<VPValue *, 4 > StoredValues;
8786
8786
for (unsigned i = 0 ; i < IG->getFactor (); ++i)
8787
8787
if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember (i))) {
8788
- auto *StoreR =
8789
- cast<VPWidenMemoryInstructionRecipe>(RecipeBuilder.getRecipe (SI));
8788
+ auto *StoreR = cast<VPWidenStoreRecipe>(RecipeBuilder.getRecipe (SI));
8790
8789
StoredValues.push_back (StoreR->getStoredValue ());
8791
8790
}
8792
8791
@@ -9464,27 +9463,19 @@ static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder,
9464
9463
return Call;
9465
9464
}
9466
9465
9467
- void VPWidenMemoryInstructionRecipe::execute (VPTransformState &State) {
9468
- VPValue *StoredValue = isStore () ? getStoredValue () : nullptr ;
9469
-
9466
+ void VPWidenLoadRecipe::execute (VPTransformState &State) {
9470
9467
// Attempt to issue a wide load.
9471
- LoadInst *LI = dyn_cast<LoadInst>(&Ingredient);
9472
- StoreInst *SI = dyn_cast<StoreInst>(&Ingredient);
9473
-
9474
- assert ((LI || SI) && " Invalid Load/Store instruction" );
9475
- assert ((!SI || StoredValue) && " No stored value provided for widened store" );
9476
- assert ((!LI || !StoredValue) && " Stored value provided for widened load" );
9468
+ auto *LI = cast<LoadInst>(&Ingredient);
9477
9469
9478
9470
Type *ScalarDataTy = getLoadStoreType (&Ingredient);
9479
-
9480
9471
auto *DataTy = VectorType::get (ScalarDataTy, State.VF );
9481
9472
const Align Alignment = getLoadStoreAlignment (&Ingredient);
9482
- bool CreateGatherScatter = !isConsecutive ();
9473
+ bool CreateGather = !isConsecutive ();
9483
9474
9484
9475
auto &Builder = State.Builder ;
9485
9476
InnerLoopVectorizer::VectorParts BlockInMaskParts (State.UF );
9486
- bool isMaskRequired = getMask ();
9487
- if (isMaskRequired ) {
9477
+ bool IsMaskRequired = getMask ();
9478
+ if (IsMaskRequired ) {
9488
9479
// Mask reversal is only needed for non-all-one (null) masks, as reverse of
9489
9480
// a null all-one mask is a null mask.
9490
9481
for (unsigned Part = 0 ; Part < State.UF ; ++Part) {
@@ -9495,56 +9486,6 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
9495
9486
}
9496
9487
}
9497
9488
9498
- // Handle Stores:
9499
- if (SI) {
9500
- State.setDebugLocFrom (getDebugLoc ());
9501
-
9502
- for (unsigned Part = 0 ; Part < State.UF ; ++Part) {
9503
- Instruction *NewSI = nullptr ;
9504
- Value *StoredVal = State.get (StoredValue, Part);
9505
- // TODO: split this into several classes for better design.
9506
- if (State.EVL ) {
9507
- assert (State.UF == 1 && " Expected only UF == 1 when vectorizing with "
9508
- " explicit vector length." );
9509
- assert (cast<VPInstruction>(State.EVL )->getOpcode () ==
9510
- VPInstruction::ExplicitVectorLength &&
9511
- " EVL must be VPInstruction::ExplicitVectorLength." );
9512
- Value *EVL = State.get (State.EVL , VPIteration (0 , 0 ));
9513
- // If EVL is not nullptr, then EVL must be a valid value set during plan
9514
- // creation, possibly default value = whole vector register length. EVL
9515
- // is created only if TTI prefers predicated vectorization, thus if EVL
9516
- // is not nullptr it also implies preference for predicated
9517
- // vectorization.
9518
- // FIXME: Support reverse store after vp_reverse is added.
9519
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr ;
9520
- NewSI = lowerStoreUsingVectorIntrinsics (
9521
- Builder, State.get (getAddr (), Part, !CreateGatherScatter),
9522
- StoredVal, CreateGatherScatter, MaskPart, EVL, Alignment);
9523
- } else if (CreateGatherScatter) {
9524
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr ;
9525
- Value *VectorGep = State.get (getAddr (), Part);
9526
- NewSI = Builder.CreateMaskedScatter (StoredVal, VectorGep, Alignment,
9527
- MaskPart);
9528
- } else {
9529
- if (isReverse ()) {
9530
- // If we store to reverse consecutive memory locations, then we need
9531
- // to reverse the order of elements in the stored value.
9532
- StoredVal = Builder.CreateVectorReverse (StoredVal, " reverse" );
9533
- // We don't want to update the value in the map as it might be used in
9534
- // another expression. So don't call resetVectorValue(StoredVal).
9535
- }
9536
- auto *VecPtr = State.get (getAddr (), Part, /* IsScalar*/ true );
9537
- if (isMaskRequired)
9538
- NewSI = Builder.CreateMaskedStore (StoredVal, VecPtr, Alignment,
9539
- BlockInMaskParts[Part]);
9540
- else
9541
- NewSI = Builder.CreateAlignedStore (StoredVal, VecPtr, Alignment);
9542
- }
9543
- State.addMetadata (NewSI, SI);
9544
- }
9545
- return ;
9546
- }
9547
-
9548
9489
// Handle loads.
9549
9490
assert (LI && " Must have a load instruction" );
9550
9491
State.setDebugLocFrom (getDebugLoc ());
@@ -9564,19 +9505,19 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
9564
9505
// is not nullptr it also implies preference for predicated
9565
9506
// vectorization.
9566
9507
// FIXME: Support reverse loading after vp_reverse is added.
9567
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr ;
9508
+ Value *MaskPart = IsMaskRequired ? BlockInMaskParts[Part] : nullptr ;
9568
9509
NewLI = lowerLoadUsingVectorIntrinsics (
9569
- Builder, DataTy, State.get (getAddr (), Part, !CreateGatherScatter ),
9570
- CreateGatherScatter , MaskPart, EVL, Alignment);
9571
- } else if (CreateGatherScatter ) {
9572
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr ;
9510
+ Builder, DataTy, State.get (getAddr (), Part, !CreateGather ),
9511
+ CreateGather , MaskPart, EVL, Alignment);
9512
+ } else if (CreateGather ) {
9513
+ Value *MaskPart = IsMaskRequired ? BlockInMaskParts[Part] : nullptr ;
9573
9514
Value *VectorGep = State.get (getAddr (), Part);
9574
9515
NewLI = Builder.CreateMaskedGather (DataTy, VectorGep, Alignment, MaskPart,
9575
9516
nullptr , " wide.masked.gather" );
9576
9517
State.addMetadata (NewLI, LI);
9577
9518
} else {
9578
9519
auto *VecPtr = State.get (getAddr (), Part, /* IsScalar*/ true );
9579
- if (isMaskRequired )
9520
+ if (IsMaskRequired )
9580
9521
NewLI = Builder.CreateMaskedLoad (
9581
9522
DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
9582
9523
PoisonValue::get (DataTy), " wide.masked.load" );
@@ -9590,7 +9531,75 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
9590
9531
NewLI = Builder.CreateVectorReverse (NewLI, " reverse" );
9591
9532
}
9592
9533
9593
- State.set (getVPSingleValue (), NewLI, Part);
9534
+ State.set (this , NewLI, Part);
9535
+ }
9536
+ }
9537
+
9538
+ void VPWidenStoreRecipe::execute (VPTransformState &State) {
9539
+ auto *SI = cast<StoreInst>(&Ingredient);
9540
+
9541
+ VPValue *StoredValue = getStoredValue ();
9542
+ bool CreateScatter = !isConsecutive ();
9543
+ const Align Alignment = getLoadStoreAlignment (&Ingredient);
9544
+
9545
+ auto &Builder = State.Builder ;
9546
+ InnerLoopVectorizer::VectorParts BlockInMaskParts (State.UF );
9547
+ bool IsMaskRequired = getMask ();
9548
+ if (IsMaskRequired) {
9549
+ // Mask reversal is only needed for non-all-one (null) masks, as reverse of
9550
+ // a null all-one mask is a null mask.
9551
+ for (unsigned Part = 0 ; Part < State.UF ; ++Part) {
9552
+ Value *Mask = State.get (getMask (), Part);
9553
+ if (isReverse ())
9554
+ Mask = Builder.CreateVectorReverse (Mask, " reverse" );
9555
+ BlockInMaskParts[Part] = Mask;
9556
+ }
9557
+ }
9558
+
9559
+ State.setDebugLocFrom (getDebugLoc ());
9560
+
9561
+ for (unsigned Part = 0 ; Part < State.UF ; ++Part) {
9562
+ Instruction *NewSI = nullptr ;
9563
+ Value *StoredVal = State.get (StoredValue, Part);
9564
+ // TODO: split this into several classes for better design.
9565
+ if (State.EVL ) {
9566
+ assert (State.UF == 1 && " Expected only UF == 1 when vectorizing with "
9567
+ " explicit vector length." );
9568
+ assert (cast<VPInstruction>(State.EVL )->getOpcode () ==
9569
+ VPInstruction::ExplicitVectorLength &&
9570
+ " EVL must be VPInstruction::ExplicitVectorLength." );
9571
+ Value *EVL = State.get (State.EVL , VPIteration (0 , 0 ));
9572
+ // If EVL is not nullptr, then EVL must be a valid value set during plan
9573
+ // creation, possibly default value = whole vector register length. EVL
9574
+ // is created only if TTI prefers predicated vectorization, thus if EVL
9575
+ // is not nullptr it also implies preference for predicated
9576
+ // vectorization.
9577
+ // FIXME: Support reverse store after vp_reverse is added.
9578
+ Value *MaskPart = IsMaskRequired ? BlockInMaskParts[Part] : nullptr ;
9579
+ NewSI = lowerStoreUsingVectorIntrinsics (
9580
+ Builder, State.get (getAddr (), Part, !CreateScatter), StoredVal,
9581
+ CreateScatter, MaskPart, EVL, Alignment);
9582
+ } else if (CreateScatter) {
9583
+ Value *MaskPart = IsMaskRequired ? BlockInMaskParts[Part] : nullptr ;
9584
+ Value *VectorGep = State.get (getAddr (), Part);
9585
+ NewSI = Builder.CreateMaskedScatter (StoredVal, VectorGep, Alignment,
9586
+ MaskPart);
9587
+ } else {
9588
+ if (isReverse ()) {
9589
+ // If we store to reverse consecutive memory locations, then we need
9590
+ // to reverse the order of elements in the stored value.
9591
+ StoredVal = Builder.CreateVectorReverse (StoredVal, " reverse" );
9592
+ // We don't want to update the value in the map as it might be used in
9593
+ // another expression. So don't call resetVectorValue(StoredVal).
9594
+ }
9595
+ auto *VecPtr = State.get (getAddr (), Part, /* IsScalar*/ true );
9596
+ if (IsMaskRequired)
9597
+ NewSI = Builder.CreateMaskedStore (StoredVal, VecPtr, Alignment,
9598
+ BlockInMaskParts[Part]);
9599
+ else
9600
+ NewSI = Builder.CreateAlignedStore (StoredVal, VecPtr, Alignment);
9601
+ }
9602
+ State.addMetadata (NewSI, SI);
9594
9603
}
9595
9604
}
9596
9605
0 commit comments