@@ -12539,7 +12539,9 @@ Value *BoUpSLP::vectorizeTree(
12539
12539
DenseMap<Value *, InsertElementInst *> VectorToInsertElement;
12540
12540
// Maps extract Scalar to the corresponding extractelement instruction in the
12541
12541
// basic block. Only one extractelement per block should be emitted.
12542
- DenseMap<Value *, DenseMap<BasicBlock *, Instruction *>> ScalarToEEs;
12542
+ DenseMap<Value *,
12543
+ DenseMap<BasicBlock *, std::pair<Instruction *, Instruction *>>>
12544
+ ScalarToEEs;
12543
12545
SmallDenseSet<Value *, 4> UsedInserts;
12544
12546
DenseMap<std::pair<Value *, Type *>, Value *> VectorCasts;
12545
12547
SmallDenseSet<Value *, 4> ScalarsWithNullptrUser;
@@ -12568,18 +12570,23 @@ Value *BoUpSLP::vectorizeTree(
12568
12570
auto ExtractAndExtendIfNeeded = [&](Value *Vec) {
12569
12571
if (Scalar->getType() != Vec->getType()) {
12570
12572
Value *Ex = nullptr;
12573
+ Value *ExV = nullptr;
12571
12574
auto It = ScalarToEEs.find(Scalar);
12572
12575
if (It != ScalarToEEs.end()) {
12573
12576
// No need to emit many extracts, just move the only one in the
12574
12577
// current block.
12575
12578
auto EEIt = It->second.find(Builder.GetInsertBlock());
12576
12579
if (EEIt != It->second.end()) {
12577
- Instruction *I = EEIt->second;
12580
+ Instruction *I = EEIt->second.first ;
12578
12581
if (Builder.GetInsertPoint() != Builder.GetInsertBlock()->end() &&
12579
- Builder.GetInsertPoint()->comesBefore(I))
12582
+ Builder.GetInsertPoint()->comesBefore(I)) {
12580
12583
I->moveBefore(*Builder.GetInsertPoint()->getParent(),
12581
12584
Builder.GetInsertPoint());
12585
+ if (auto *CI = EEIt->second.second)
12586
+ CI->moveAfter(I);
12587
+ }
12582
12588
Ex = I;
12589
+ ExV = EEIt->second.second ? EEIt->second.second : Ex;
12583
12590
}
12584
12591
}
12585
12592
if (!Ex) {
@@ -12592,21 +12599,24 @@ Value *BoUpSLP::vectorizeTree(
12592
12599
} else {
12593
12600
Ex = Builder.CreateExtractElement(Vec, Lane);
12594
12601
}
12602
+ // If necessary, sign-extend or zero-extend ScalarRoot
12603
+ // to the larger type.
12604
+ ExV = Ex;
12605
+ if (Scalar->getType() != Ex->getType())
12606
+ ExV = Builder.CreateIntCast(Ex, Scalar->getType(),
12607
+ MinBWs.find(E)->second.second);
12595
12608
if (auto *I = dyn_cast<Instruction>(Ex))
12596
- ScalarToEEs[Scalar].try_emplace(Builder.GetInsertBlock(), I);
12609
+ ScalarToEEs[Scalar].try_emplace(
12610
+ Builder.GetInsertBlock(),
12611
+ std::make_pair(I, cast<Instruction>(ExV)));
12597
12612
}
12598
12613
// The then branch of the previous if may produce constants, since 0
12599
12614
// operand might be a constant.
12600
12615
if (auto *ExI = dyn_cast<Instruction>(Ex)) {
12601
12616
GatherShuffleExtractSeq.insert(ExI);
12602
12617
CSEBlocks.insert(ExI->getParent());
12603
12618
}
12604
- // If necessary, sign-extend or zero-extend ScalarRoot
12605
- // to the larger type.
12606
- if (Scalar->getType() != Ex->getType())
12607
- return Builder.CreateIntCast(Ex, Scalar->getType(),
12608
- MinBWs.find(E)->second.second);
12609
- return Ex;
12619
+ return ExV;
12610
12620
}
12611
12621
assert(isa<FixedVectorType>(Scalar->getType()) &&
12612
12622
isa<InsertElementInst>(Scalar) &&
0 commit comments