@@ -1143,6 +1143,7 @@ class LoopVectorizationCostModel {
1143
1143
CM_Widen_Reverse, // For consecutive accesses with stride -1.
1144
1144
CM_Interleave,
1145
1145
CM_GatherScatter,
1146
+ CM_Strided,
1146
1147
CM_Scalarize,
1147
1148
CM_VectorCall,
1148
1149
CM_IntrinsicCall
@@ -6160,6 +6161,17 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
6160
6161
" Expected consecutive stride." );
6161
6162
InstWidening Decision =
6162
6163
ConsecutiveStride == 1 ? CM_Widen : CM_Widen_Reverse;
6164
+ // Consider using strided load/store for consecutive reverse accesses to
6165
+ // achieve more efficient memory operations.
6166
+ if (ConsecutiveStride == -1 ) {
6167
+ const InstructionCost StridedLoadStoreCost =
6168
+ isLegalStridedLoadStore (&I, VF) ? getStridedLoadStoreCost (&I, VF)
6169
+ : InstructionCost::getInvalid ();
6170
+ if (StridedLoadStoreCost < Cost) {
6171
+ Decision = CM_Strided;
6172
+ Cost = StridedLoadStoreCost;
6173
+ }
6174
+ }
6163
6175
setWideningDecision (&I, VF, Decision, Cost);
6164
6176
continue ;
6165
6177
}
@@ -6805,6 +6817,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
6805
6817
return TTI::CastContextHint::Normal;
6806
6818
6807
6819
switch (getWideningDecision (I, VF)) {
6820
+ // TODO: New CastContextHint for strided accesses.
6821
+ case LoopVectorizationCostModel::CM_Strided:
6808
6822
case LoopVectorizationCostModel::CM_GatherScatter:
6809
6823
return TTI::CastContextHint::GatherScatter;
6810
6824
case LoopVectorizationCostModel::CM_Interleave:
@@ -8356,6 +8370,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
8356
8370
bool Reverse = Decision == LoopVectorizationCostModel::CM_Widen_Reverse;
8357
8371
bool Consecutive =
8358
8372
Reverse || Decision == LoopVectorizationCostModel::CM_Widen;
8373
+ bool Strided = Decision == LoopVectorizationCostModel::CM_Strided;
8359
8374
8360
8375
VPValue *Ptr = isa<LoadInst>(I) ? Operands[0 ] : Operands[1 ];
8361
8376
if (Consecutive) {
@@ -8382,12 +8397,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
8382
8397
Ptr = VectorPtr;
8383
8398
}
8384
8399
if (LoadInst *Load = dyn_cast<LoadInst>(I))
8385
- return new VPWidenLoadRecipe (*Load, Ptr, Mask, Consecutive, Reverse, false ,
8386
- I->getDebugLoc ());
8400
+ return new VPWidenLoadRecipe (*Load, Ptr, Mask, Consecutive, Reverse,
8401
+ Strided, I->getDebugLoc ());
8387
8402
8388
8403
StoreInst *Store = cast<StoreInst>(I);
8389
8404
return new VPWidenStoreRecipe (*Store, Ptr, Operands[0 ], Mask, Consecutive,
8390
- Reverse, false , I->getDebugLoc ());
8405
+ Reverse, Strided , I->getDebugLoc ());
8391
8406
}
8392
8407
8393
8408
// / Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
0 commit comments