@@ -545,11 +545,6 @@ class InnerLoopVectorizer {
545
545
// Return true if any runtime check is added.
546
546
bool areSafetyChecksAdded () { return AddedSafetyChecks; }
547
547
548
- // / A type for vectorized values in the new loop. Each value from the
549
- // / original loop, when vectorized, is represented by UF vector values in the
550
- // / new unrolled loop, where UF is the unroll factor.
551
- using VectorParts = SmallVector<Value *, 2 >;
552
-
553
548
// / A helper function to scalarize a single Instruction in the innermost loop.
554
549
// / Generates a sequence of scalar instances for each lane between \p MinLane
555
550
// / and \p MaxLane, times each part between \p MinPart and \p MaxPart,
@@ -8086,7 +8081,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB) {
8086
8081
BlockMaskCache[BB] = BlockMask;
8087
8082
}
8088
8083
8089
- VPWidenMemoryInstructionRecipe *
8084
+ VPWidenMemoryRecipe *
8090
8085
VPRecipeBuilder::tryToWidenMemory (Instruction *I, ArrayRef<VPValue *> Operands,
8091
8086
VFRange &Range) {
8092
8087
assert ((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
@@ -8131,12 +8126,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
8131
8126
Ptr = VectorPtr;
8132
8127
}
8133
8128
if (LoadInst *Load = dyn_cast<LoadInst>(I))
8134
- return new VPWidenMemoryInstructionRecipe (*Load, Ptr, Mask, Consecutive,
8135
- Reverse, I->getDebugLoc ());
8129
+ return new VPWidenLoadRecipe (*Load, Ptr, Mask, Consecutive, Reverse ,
8130
+ I->getDebugLoc ());
8136
8131
8137
8132
StoreInst *Store = cast<StoreInst>(I);
8138
- return new VPWidenMemoryInstructionRecipe (
8139
- *Store, Ptr, Operands[ 0 ], Mask, Consecutive, Reverse, I->getDebugLoc ());
8133
+ return new VPWidenStoreRecipe (*Store, Ptr, Operands[ 0 ], Mask, Consecutive,
8134
+ Reverse, I->getDebugLoc ());
8140
8135
}
8141
8136
8142
8137
// / Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
@@ -8775,13 +8770,12 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
8775
8770
// for this VPlan, replace the Recipes widening its memory instructions with a
8776
8771
// single VPInterleaveRecipe at its insertion point.
8777
8772
for (const auto *IG : InterleaveGroups) {
8778
- auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
8779
- RecipeBuilder.getRecipe (IG->getInsertPos ()));
8773
+ auto *Recipe =
8774
+ cast<VPWidenMemoryRecipe>( RecipeBuilder.getRecipe (IG->getInsertPos ()));
8780
8775
SmallVector<VPValue *, 4 > StoredValues;
8781
8776
for (unsigned i = 0 ; i < IG->getFactor (); ++i)
8782
8777
if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember (i))) {
8783
- auto *StoreR =
8784
- cast<VPWidenMemoryInstructionRecipe>(RecipeBuilder.getRecipe (SI));
8778
+ auto *StoreR = cast<VPWidenStoreRecipe>(RecipeBuilder.getRecipe (SI));
8785
8779
StoredValues.push_back (StoreR->getStoredValue ());
8786
8780
}
8787
8781
@@ -9368,92 +9362,27 @@ static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder,
9368
9362
return Call;
9369
9363
}
9370
9364
9371
- void VPWidenMemoryInstructionRecipe::execute (VPTransformState &State) {
9372
- VPValue *StoredValue = isStore () ? getStoredValue () : nullptr ;
9373
-
9374
- // Attempt to issue a wide load.
9375
- LoadInst *LI = dyn_cast<LoadInst>(&Ingredient);
9376
- StoreInst *SI = dyn_cast<StoreInst>(&Ingredient);
9377
-
9378
- assert ((LI || SI) && " Invalid Load/Store instruction" );
9379
- assert ((!SI || StoredValue) && " No stored value provided for widened store" );
9380
- assert ((!LI || !StoredValue) && " Stored value provided for widened load" );
9365
+ void VPWidenLoadRecipe::execute (VPTransformState &State) {
9366
+ auto *LI = cast<LoadInst>(&Ingredient);
9381
9367
9382
9368
Type *ScalarDataTy = getLoadStoreType (&Ingredient);
9383
-
9384
9369
auto *DataTy = VectorType::get (ScalarDataTy, State.VF );
9385
9370
const Align Alignment = getLoadStoreAlignment (&Ingredient);
9386
- bool CreateGatherScatter = !isConsecutive ();
9371
+ bool CreateGather = !isConsecutive ();
9387
9372
9388
9373
auto &Builder = State.Builder ;
9389
- InnerLoopVectorizer::VectorParts BlockInMaskParts (State.UF );
9390
- bool isMaskRequired = getMask ();
9391
- if (isMaskRequired) {
9392
- // Mask reversal is only needed for non-all-one (null) masks, as reverse of
9393
- // a null all-one mask is a null mask.
9394
- for (unsigned Part = 0 ; Part < State.UF ; ++Part) {
9395
- Value *Mask = State.get (getMask (), Part);
9374
+ State.setDebugLocFrom (getDebugLoc ());
9375
+ for (unsigned Part = 0 ; Part < State.UF ; ++Part) {
9376
+ Value *NewLI;
9377
+ Value *Mask = nullptr ;
9378
+ if (auto *VPMask = getMask ()) {
9379
+ // Mask reversal is only needed for non-all-one (null) masks, as reverse
9380
+ // of a null all-one mask is a null mask.
9381
+ Mask = State.get (VPMask, Part);
9396
9382
if (isReverse ())
9397
9383
Mask = Builder.CreateVectorReverse (Mask, " reverse" );
9398
- BlockInMaskParts[Part] = Mask;
9399
- }
9400
- }
9401
-
9402
- // Handle Stores:
9403
- if (SI) {
9404
- State.setDebugLocFrom (getDebugLoc ());
9405
-
9406
- for (unsigned Part = 0 ; Part < State.UF ; ++Part) {
9407
- Instruction *NewSI = nullptr ;
9408
- Value *StoredVal = State.get (StoredValue, Part);
9409
- // TODO: split this into several classes for better design.
9410
- if (State.EVL ) {
9411
- assert (State.UF == 1 && " Expected only UF == 1 when vectorizing with "
9412
- " explicit vector length." );
9413
- assert (cast<VPInstruction>(State.EVL )->getOpcode () ==
9414
- VPInstruction::ExplicitVectorLength &&
9415
- " EVL must be VPInstruction::ExplicitVectorLength." );
9416
- Value *EVL = State.get (State.EVL , VPIteration (0 , 0 ));
9417
- // If EVL is not nullptr, then EVL must be a valid value set during plan
9418
- // creation, possibly default value = whole vector register length. EVL
9419
- // is created only if TTI prefers predicated vectorization, thus if EVL
9420
- // is not nullptr it also implies preference for predicated
9421
- // vectorization.
9422
- // FIXME: Support reverse store after vp_reverse is added.
9423
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr ;
9424
- NewSI = lowerStoreUsingVectorIntrinsics (
9425
- Builder, State.get (getAddr (), Part, !CreateGatherScatter),
9426
- StoredVal, CreateGatherScatter, MaskPart, EVL, Alignment);
9427
- } else if (CreateGatherScatter) {
9428
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr ;
9429
- Value *VectorGep = State.get (getAddr (), Part);
9430
- NewSI = Builder.CreateMaskedScatter (StoredVal, VectorGep, Alignment,
9431
- MaskPart);
9432
- } else {
9433
- if (isReverse ()) {
9434
- // If we store to reverse consecutive memory locations, then we need
9435
- // to reverse the order of elements in the stored value.
9436
- StoredVal = Builder.CreateVectorReverse (StoredVal, " reverse" );
9437
- // We don't want to update the value in the map as it might be used in
9438
- // another expression. So don't call resetVectorValue(StoredVal).
9439
- }
9440
- auto *VecPtr = State.get (getAddr (), Part, /* IsScalar*/ true );
9441
- if (isMaskRequired)
9442
- NewSI = Builder.CreateMaskedStore (StoredVal, VecPtr, Alignment,
9443
- BlockInMaskParts[Part]);
9444
- else
9445
- NewSI = Builder.CreateAlignedStore (StoredVal, VecPtr, Alignment);
9446
- }
9447
- State.addMetadata (NewSI, SI);
9448
9384
}
9449
- return ;
9450
- }
9451
9385
9452
- // Handle loads.
9453
- assert (LI && " Must have a load instruction" );
9454
- State.setDebugLocFrom (getDebugLoc ());
9455
- for (unsigned Part = 0 ; Part < State.UF ; ++Part) {
9456
- Value *NewLI;
9457
9386
// TODO: split this into several classes for better design.
9458
9387
if (State.EVL ) {
9459
9388
assert (State.UF == 1 && " Expected only UF == 1 when vectorizing with "
@@ -9468,22 +9397,20 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
9468
9397
// is not nullptr it also implies preference for predicated
9469
9398
// vectorization.
9470
9399
// FIXME: Support reverse loading after vp_reverse is added.
9471
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr ;
9472
9400
NewLI = lowerLoadUsingVectorIntrinsics (
9473
- Builder, DataTy, State.get (getAddr (), Part, !CreateGatherScatter),
9474
- CreateGatherScatter, MaskPart, EVL, Alignment);
9475
- } else if (CreateGatherScatter) {
9476
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr ;
9401
+ Builder, DataTy, State.get (getAddr (), Part, !CreateGather),
9402
+ CreateGather, Mask, EVL, Alignment);
9403
+ } else if (CreateGather) {
9477
9404
Value *VectorGep = State.get (getAddr (), Part);
9478
- NewLI = Builder.CreateMaskedGather (DataTy, VectorGep, Alignment, MaskPart ,
9405
+ NewLI = Builder.CreateMaskedGather (DataTy, VectorGep, Alignment, Mask ,
9479
9406
nullptr , " wide.masked.gather" );
9480
9407
State.addMetadata (NewLI, LI);
9481
9408
} else {
9482
9409
auto *VecPtr = State.get (getAddr (), Part, /* IsScalar*/ true );
9483
- if (isMaskRequired )
9484
- NewLI = Builder.CreateMaskedLoad (
9485
- DataTy, VecPtr, Alignment, BlockInMaskParts[Part] ,
9486
- PoisonValue::get (DataTy), " wide.masked.load" );
9410
+ if (Mask )
9411
+ NewLI = Builder.CreateMaskedLoad (DataTy, VecPtr, Alignment, Mask,
9412
+ PoisonValue::get (DataTy) ,
9413
+ " wide.masked.load" );
9487
9414
else
9488
9415
NewLI =
9489
9416
Builder.CreateAlignedLoad (DataTy, VecPtr, Alignment, " wide.load" );
@@ -9494,7 +9421,69 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
9494
9421
NewLI = Builder.CreateVectorReverse (NewLI, " reverse" );
9495
9422
}
9496
9423
9497
- State.set (getVPSingleValue (), NewLI, Part);
9424
+ State.set (this , NewLI, Part);
9425
+ }
9426
+ }
9427
+
9428
+ void VPWidenStoreRecipe::execute (VPTransformState &State) {
9429
+ auto *SI = cast<StoreInst>(&Ingredient);
9430
+
9431
+ VPValue *StoredVPValue = getStoredValue ();
9432
+ bool CreateScatter = !isConsecutive ();
9433
+ const Align Alignment = getLoadStoreAlignment (&Ingredient);
9434
+
9435
+ auto &Builder = State.Builder ;
9436
+ State.setDebugLocFrom (getDebugLoc ());
9437
+
9438
+ for (unsigned Part = 0 ; Part < State.UF ; ++Part) {
9439
+ Instruction *NewSI = nullptr ;
9440
+ Value *Mask = nullptr ;
9441
+ if (auto *VPMask = getMask ()) {
9442
+ // Mask reversal is only needed for non-all-one (null) masks, as reverse
9443
+ // of a null all-one mask is a null mask.
9444
+ Mask = State.get (VPMask, Part);
9445
+ if (isReverse ())
9446
+ Mask = Builder.CreateVectorReverse (Mask, " reverse" );
9447
+ }
9448
+
9449
+ Value *StoredVal = State.get (StoredVPValue, Part);
9450
+ if (isReverse ()) {
9451
+ assert (!State.EVL && " reversing not yet implemented with EVL" );
9452
+ // If we store to reverse consecutive memory locations, then we need
9453
+ // to reverse the order of elements in the stored value.
9454
+ StoredVal = Builder.CreateVectorReverse (StoredVal, " reverse" );
9455
+ // We don't want to update the value in the map as it might be used in
9456
+ // another expression. So don't call resetVectorValue(StoredVal).
9457
+ }
9458
+ // TODO: split this into several classes for better design.
9459
+ if (State.EVL ) {
9460
+ assert (State.UF == 1 && " Expected only UF == 1 when vectorizing with "
9461
+ " explicit vector length." );
9462
+ assert (cast<VPInstruction>(State.EVL )->getOpcode () ==
9463
+ VPInstruction::ExplicitVectorLength &&
9464
+ " EVL must be VPInstruction::ExplicitVectorLength." );
9465
+ Value *EVL = State.get (State.EVL , VPIteration (0 , 0 ));
9466
+ // If EVL is not nullptr, then EVL must be a valid value set during plan
9467
+ // creation, possibly default value = whole vector register length. EVL
9468
+ // is created only if TTI prefers predicated vectorization, thus if EVL
9469
+ // is not nullptr it also implies preference for predicated
9470
+ // vectorization.
9471
+ // FIXME: Support reverse store after vp_reverse is added.
9472
+ NewSI = lowerStoreUsingVectorIntrinsics (
9473
+ Builder, State.get (getAddr (), Part, !CreateScatter), StoredVal,
9474
+ CreateScatter, Mask, EVL, Alignment);
9475
+ } else if (CreateScatter) {
9476
+ Value *VectorGep = State.get (getAddr (), Part);
9477
+ NewSI =
9478
+ Builder.CreateMaskedScatter (StoredVal, VectorGep, Alignment, Mask);
9479
+ } else {
9480
+ auto *VecPtr = State.get (getAddr (), Part, /* IsScalar*/ true );
9481
+ if (Mask)
9482
+ NewSI = Builder.CreateMaskedStore (StoredVal, VecPtr, Alignment, Mask);
9483
+ else
9484
+ NewSI = Builder.CreateAlignedStore (StoredVal, VecPtr, Alignment);
9485
+ }
9486
+ State.addMetadata (NewSI, SI);
9498
9487
}
9499
9488
}
9500
9489
0 commit comments