@@ -342,6 +342,33 @@ static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
342
342
}
343
343
}
344
344
345
+ // / \returns True if in-tree use also needs extract. This refers to
346
+ // / possible scalar operand in vectorized instruction.
347
+ static bool InTreeUserNeedToExtract (Value *Scalar, Instruction *UserInst,
348
+ TargetLibraryInfo *TLI) {
349
+
350
+ unsigned Opcode = UserInst->getOpcode ();
351
+ switch (Opcode) {
352
+ case Instruction::Load: {
353
+ LoadInst *LI = cast<LoadInst>(UserInst);
354
+ return (LI->getPointerOperand () == Scalar);
355
+ }
356
+ case Instruction::Store: {
357
+ StoreInst *SI = cast<StoreInst>(UserInst);
358
+ return (SI->getPointerOperand () == Scalar);
359
+ }
360
+ case Instruction::Call: {
361
+ CallInst *CI = cast<CallInst>(UserInst);
362
+ Intrinsic::ID ID = getIntrinsicIDForCall (CI, TLI);
363
+ if (hasVectorInstrinsicScalarOpd (ID, 1 )) {
364
+ return (CI->getArgOperand (1 ) == Scalar);
365
+ }
366
+ }
367
+ default :
368
+ return false ;
369
+ }
370
+ }
371
+
345
372
// / Bottom Up SLP Vectorizer.
346
373
class BoUpSLP {
347
374
public:
@@ -864,18 +891,27 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
864
891
for (User *U : Scalar->users ()) {
865
892
DEBUG (dbgs () << " SLP: Checking user:" << *U << " .\n " );
866
893
867
- // Skip in-tree scalars that become vectors.
868
- if (ScalarToTreeEntry.count (U)) {
869
- DEBUG (dbgs () << " SLP: \t Internal user will be removed:" <<
870
- *U << " .\n " );
871
- int Idx = ScalarToTreeEntry[U]; (void ) Idx;
872
- assert (!VectorizableTree[Idx].NeedToGather && " Bad state" );
873
- continue ;
874
- }
875
894
Instruction *UserInst = dyn_cast<Instruction>(U);
876
895
if (!UserInst)
877
896
continue ;
878
897
898
+ // Skip in-tree scalars that become vectors
899
+ if (ScalarToTreeEntry.count (U)) {
900
+ int Idx = ScalarToTreeEntry[U];
901
+ TreeEntry *UseEntry = &VectorizableTree[Idx];
902
+ Value *UseScalar = UseEntry->Scalars [0 ];
903
+ // Some in-tree scalars will remain as scalar in vectorized
904
+ // instructions. If that is the case, the one in Lane 0 will
905
+ // be used.
906
+ if (UseScalar != U ||
907
+ !InTreeUserNeedToExtract (Scalar, UserInst, TLI)) {
908
+ DEBUG (dbgs () << " SLP: \t Internal user will be removed:" << *U
909
+ << " .\n " );
910
+ assert (!VectorizableTree[Idx].NeedToGather && " Bad state" );
911
+ continue ;
912
+ }
913
+ }
914
+
879
915
// Ignore users in the user ignore list.
880
916
if (std::find (UserIgnoreList.begin (), UserIgnoreList.end (), UserInst) !=
881
917
UserIgnoreList.end ())
@@ -1190,16 +1226,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
1190
1226
}
1191
1227
}
1192
1228
1193
- // We combine only GEPs with a single use.
1194
- for (unsigned j = 0 ; j < VL.size (); ++j) {
1195
- if (cast<Instruction>(VL[j])->getNumUses () > 1 ) {
1196
- DEBUG (dbgs () << " SLP: not-vectorizable GEP (multiple uses).\n " );
1197
- BS.cancelScheduling (VL);
1198
- newTreeEntry (VL, false );
1199
- return ;
1200
- }
1201
- }
1202
-
1203
1229
// We can't combine several GEPs into one vector if they operate on
1204
1230
// different types.
1205
1231
Type *Ty0 = cast<Instruction>(VL0)->getOperand (0 )->getType ();
@@ -2023,6 +2049,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
2023
2049
2024
2050
Value *VecPtr = Builder.CreateBitCast (LI->getPointerOperand (),
2025
2051
VecTy->getPointerTo (AS));
2052
+
2053
+ // The pointer operand uses an in-tree scalar so we add the new BitCast to
2054
+ // ExternalUses list to make sure that an extract will be generated in the
2055
+ // future.
2056
+ if (ScalarToTreeEntry.count (LI->getPointerOperand ()))
2057
+ ExternalUses.push_back (
2058
+ ExternalUser (LI->getPointerOperand (), cast<User>(VecPtr), 0 ));
2059
+
2026
2060
unsigned Alignment = LI->getAlignment ();
2027
2061
LI = Builder.CreateLoad (VecPtr);
2028
2062
if (!Alignment)
@@ -2047,6 +2081,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
2047
2081
Value *VecPtr = Builder.CreateBitCast (SI->getPointerOperand (),
2048
2082
VecTy->getPointerTo (AS));
2049
2083
StoreInst *S = Builder.CreateStore (VecValue, VecPtr);
2084
+
2085
+ // The pointer operand uses an in-tree scalar so we add the new BitCast to
2086
+ // ExternalUses list to make sure that an extract will be generated in the
2087
+ // future.
2088
+ if (ScalarToTreeEntry.count (SI->getPointerOperand ()))
2089
+ ExternalUses.push_back (
2090
+ ExternalUser (SI->getPointerOperand (), cast<User>(VecPtr), 0 ));
2091
+
2050
2092
if (!Alignment)
2051
2093
Alignment = DL->getABITypeAlignment (SI->getValueOperand ()->getType ());
2052
2094
S->setAlignment (Alignment);
@@ -2088,6 +2130,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
2088
2130
setInsertPointAfterBundle (E->Scalars );
2089
2131
Function *FI;
2090
2132
Intrinsic::ID IID = Intrinsic::not_intrinsic;
2133
+ Value *ScalarArg = nullptr ;
2091
2134
if (CI && (FI = CI->getCalledFunction ())) {
2092
2135
IID = (Intrinsic::ID) FI->getIntrinsicID ();
2093
2136
}
@@ -2098,6 +2141,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
2098
2141
// a scalar. This argument should not be vectorized.
2099
2142
if (hasVectorInstrinsicScalarOpd (IID, 1 ) && j == 1 ) {
2100
2143
CallInst *CEI = cast<CallInst>(E->Scalars [0 ]);
2144
+ ScalarArg = CEI->getArgOperand (j);
2101
2145
OpVecs.push_back (CEI->getArgOperand (j));
2102
2146
continue ;
2103
2147
}
@@ -2116,6 +2160,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
2116
2160
Type *Tys[] = { VectorType::get (CI->getType (), E->Scalars .size ()) };
2117
2161
Function *CF = Intrinsic::getDeclaration (M, ID, Tys);
2118
2162
Value *V = Builder.CreateCall (CF, OpVecs);
2163
+
2164
+ // The scalar argument uses an in-tree scalar so we add the new vectorized
2165
+ // call to ExternalUses list to make sure that an extract will be
2166
+ // generated in the future.
2167
+ if (ScalarArg && ScalarToTreeEntry.count (ScalarArg))
2168
+ ExternalUses.push_back (ExternalUser (ScalarArg, cast<User>(V), 0 ));
2169
+
2119
2170
E->VectorizedValue = V;
2120
2171
++NumVectorInstructions;
2121
2172
return V;
0 commit comments