Skip to content

Commit f06e332

Browse files
committed
Revert "[SLP]Improve/fix reordering of the gathered graph nodes."
This reverts commit 64d1617 to fix test non-stability.
1 parent 9830518 commit f06e332

File tree

5 files changed

+89
-177
lines changed

5 files changed

+89
-177
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 39 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -766,12 +766,6 @@ class BoUpSLP {
766766
/// Perform LICM and CSE on the newly generated gather sequences.
767767
void optimizeGatherSequence();
768768

769-
/// Checks if the specified gather tree entry \p TE can be represented as a
770-
/// shuffled vector entry + (possibly) permutation with other gathers. It
771-
/// implements the checks only for possibly ordered scalars (Loads,
772-
/// ExtractElement, ExtractValue), which can be part of the graph.
773-
Optional<OrdersType> findReusedOrderedScalars(const TreeEntry &TE);
774-
775769
/// Reorders the current graph to the most profitable order starting from the
776770
/// root node to the leaf nodes. The best order is chosen only from the nodes
777771
/// of the same size (vectorization factor). Smaller nodes are considered
@@ -2676,72 +2670,6 @@ static void reorderOrder(SmallVectorImpl<unsigned> &Order, ArrayRef<int> Mask) {
26762670
fixupOrderingIndices(Order);
26772671
}
26782672

2679-
Optional<BoUpSLP::OrdersType>
2680-
BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
2681-
assert(TE.State == TreeEntry::NeedToGather && "Expected gather node only.");
2682-
unsigned NumScalars = TE.Scalars.size();
2683-
OrdersType CurrentOrder(NumScalars, NumScalars);
2684-
SmallVector<int> Positions;
2685-
SmallBitVector UsedPositions(NumScalars);
2686-
const TreeEntry *STE = nullptr;
2687-
// Try to find all gathered scalars that are gets vectorized in other
2688-
// vectorize node. Here we can have only one single tree vector node to
2689-
// correctly identify order of the gathered scalars.
2690-
for (unsigned I = 0; I < NumScalars; ++I) {
2691-
Value *V = TE.Scalars[I];
2692-
if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
2693-
continue;
2694-
if (const auto *LocalSTE = getTreeEntry(V)) {
2695-
if (!STE)
2696-
STE = LocalSTE;
2697-
else if (STE != LocalSTE)
2698-
// Take the order only from the single vector node.
2699-
return None;
2700-
unsigned Lane =
2701-
std::distance(STE->Scalars.begin(), find(STE->Scalars, V));
2702-
if (Lane >= NumScalars)
2703-
return None;
2704-
if (CurrentOrder[Lane] != NumScalars) {
2705-
if (Lane != I)
2706-
continue;
2707-
UsedPositions.reset(CurrentOrder[Lane]);
2708-
}
2709-
// The partial identity (where only some elements of the gather node are
2710-
// in the identity order) is good.
2711-
CurrentOrder[Lane] = I;
2712-
UsedPositions.set(I);
2713-
}
2714-
}
2715-
// Need to keep the order if we have a vector entry and at least 2 scalars or
2716-
// the vectorized entry has just 2 scalars.
2717-
if (STE && (UsedPositions.count() > 1 || STE->Scalars.size() == 2)) {
2718-
auto &&IsIdentityOrder = [NumScalars](ArrayRef<unsigned> CurrentOrder) {
2719-
for (unsigned I = 0; I < NumScalars; ++I)
2720-
if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars)
2721-
return false;
2722-
return true;
2723-
};
2724-
if (IsIdentityOrder(CurrentOrder)) {
2725-
CurrentOrder.clear();
2726-
return CurrentOrder;
2727-
}
2728-
auto *It = CurrentOrder.begin();
2729-
for (unsigned I = 0; I < NumScalars;) {
2730-
if (UsedPositions.test(I)) {
2731-
++I;
2732-
continue;
2733-
}
2734-
if (*It == NumScalars) {
2735-
*It = I;
2736-
++I;
2737-
}
2738-
++It;
2739-
}
2740-
return CurrentOrder;
2741-
}
2742-
return None;
2743-
}
2744-
27452673
void BoUpSLP::reorderTopToBottom() {
27462674
// Maps VF to the graph nodes.
27472675
DenseMap<unsigned, SmallPtrSet<TreeEntry *, 4>> VFToOrderedEntries;
@@ -2761,29 +2689,19 @@ void BoUpSLP::reorderTopToBottom() {
27612689
InsertElementInst>(TE->getMainOp()) &&
27622690
!TE->isAltShuffle()) {
27632691
VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
2764-
return;
2765-
}
2766-
if (TE->State == TreeEntry::NeedToGather) {
2767-
if (TE->getOpcode() == Instruction::ExtractElement &&
2768-
!TE->isAltShuffle() &&
2769-
isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
2770-
->getVectorOperandType()) &&
2771-
allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
2772-
// Check that gather of extractelements can be represented as
2773-
// just a shuffle of a single vector.
2774-
OrdersType CurrentOrder;
2775-
bool Reuse =
2776-
canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
2777-
if (Reuse || !CurrentOrder.empty()) {
2778-
VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
2779-
GathersToOrders.try_emplace(TE.get(), CurrentOrder);
2780-
return;
2781-
}
2782-
}
2783-
if (Optional<OrdersType> CurrentOrder =
2784-
findReusedOrderedScalars(*TE.get())) {
2692+
} else if (TE->State == TreeEntry::NeedToGather &&
2693+
TE->getOpcode() == Instruction::ExtractElement &&
2694+
!TE->isAltShuffle() &&
2695+
isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
2696+
->getVectorOperandType()) &&
2697+
allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
2698+
// Check that gather of extractelements can be represented as
2699+
// just a shuffle of a single vector.
2700+
OrdersType CurrentOrder;
2701+
bool Reuse = canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
2702+
if (Reuse || !CurrentOrder.empty()) {
27852703
VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
2786-
GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
2704+
GathersToOrders.try_emplace(TE.get(), CurrentOrder);
27872705
}
27882706
}
27892707
});
@@ -2835,7 +2753,7 @@ void BoUpSLP::reorderTopToBottom() {
28352753
// Choose the most used order.
28362754
ArrayRef<unsigned> BestOrder = OrdersUses.begin()->first;
28372755
unsigned Cnt = OrdersUses.begin()->second;
2838-
for (const auto &Pair : drop_begin(OrdersUses)) {
2756+
for (const auto &Pair : llvm::drop_begin(OrdersUses)) {
28392757
if (Cnt < Pair.second || (Cnt == Pair.second && Pair.first.empty())) {
28402758
BestOrder = Pair.first;
28412759
Cnt = Pair.second;
@@ -2912,8 +2830,6 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
29122830
for_each(VectorizableTree, [this, &OrderedEntries, &GathersToOrders,
29132831
&NonVectorized](
29142832
const std::unique_ptr<TreeEntry> &TE) {
2915-
if (TE->State != TreeEntry::Vectorize)
2916-
NonVectorized.push_back(TE.get());
29172833
// No need to reorder if need to shuffle reuses, still need to shuffle the
29182834
// node.
29192835
if (!TE->ReuseShuffleIndices.empty())
@@ -2922,37 +2838,28 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
29222838
isa<LoadInst, ExtractElementInst, ExtractValueInst>(TE->getMainOp()) &&
29232839
!TE->isAltShuffle()) {
29242840
OrderedEntries.insert(TE.get());
2925-
return;
2926-
}
2927-
if (TE->State == TreeEntry::NeedToGather) {
2928-
if (TE->getOpcode() == Instruction::ExtractElement &&
2929-
!TE->isAltShuffle() &&
2930-
isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
2931-
->getVectorOperandType()) &&
2932-
allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
2933-
// Check that gather of extractelements can be represented as
2934-
// just a shuffle of a single vector with a single user only.
2935-
OrdersType CurrentOrder;
2936-
bool Reuse =
2937-
canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
2938-
if ((Reuse || !CurrentOrder.empty()) &&
2939-
!any_of(VectorizableTree,
2940-
[&TE](const std::unique_ptr<TreeEntry> &Entry) {
2941-
return Entry->State == TreeEntry::NeedToGather &&
2942-
Entry.get() != TE.get() &&
2943-
Entry->isSame(TE->Scalars);
2944-
})) {
2945-
OrderedEntries.insert(TE.get());
2946-
GathersToOrders.try_emplace(TE.get(), CurrentOrder);
2947-
return;
2948-
}
2949-
}
2950-
if (Optional<OrdersType> CurrentOrder =
2951-
findReusedOrderedScalars(*TE.get())) {
2841+
} else if (TE->State == TreeEntry::NeedToGather &&
2842+
TE->getOpcode() == Instruction::ExtractElement &&
2843+
!TE->isAltShuffle() &&
2844+
isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
2845+
->getVectorOperandType()) &&
2846+
allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
2847+
// Check that gather of extractelements can be represented as
2848+
// just a shuffle of a single vector with a single user only.
2849+
OrdersType CurrentOrder;
2850+
bool Reuse = canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
2851+
if ((Reuse || !CurrentOrder.empty()) &&
2852+
!any_of(
2853+
VectorizableTree, [&TE](const std::unique_ptr<TreeEntry> &Entry) {
2854+
return Entry->State == TreeEntry::NeedToGather &&
2855+
Entry.get() != TE.get() && Entry->isSame(TE->Scalars);
2856+
})) {
29522857
OrderedEntries.insert(TE.get());
2953-
GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
2858+
GathersToOrders.try_emplace(TE.get(), CurrentOrder);
29542859
}
29552860
}
2861+
if (TE->State != TreeEntry::Vectorize)
2862+
NonVectorized.push_back(TE.get());
29562863
});
29572864

29582865
// Checks if the operands of the users are reordarable and have only single
@@ -3004,7 +2911,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
30042911
for (TreeEntry *TE : OrderedEntries) {
30052912
if (!(TE->State == TreeEntry::Vectorize ||
30062913
(TE->State == TreeEntry::NeedToGather &&
3007-
GathersToOrders.count(TE))) ||
2914+
TE->getOpcode() == Instruction::ExtractElement)) ||
30082915
TE->UserTreeIndices.empty() || !TE->ReuseShuffleIndices.empty() ||
30092916
!all_of(drop_begin(TE->UserTreeIndices),
30102917
[TE](const EdgeInfo &EI) {
@@ -3082,7 +2989,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
30822989
// Choose the best order.
30832990
ArrayRef<unsigned> BestOrder = OrdersUses.begin()->first;
30842991
unsigned Cnt = OrdersUses.begin()->second;
3085-
for (const auto &Pair : drop_begin(OrdersUses)) {
2992+
for (const auto &Pair : llvm::drop_begin(OrdersUses)) {
30862993
if (Cnt < Pair.second || (Cnt == Pair.second && Pair.first.empty())) {
30872994
BestOrder = Pair.first;
30882995
Cnt = Pair.second;
@@ -3125,13 +3032,10 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
31253032
}
31263033
// For gathers just need to reorder its scalars.
31273034
for (TreeEntry *Gather : GatherOps) {
3035+
if (!Gather->ReuseShuffleIndices.empty())
3036+
continue;
31283037
assert(Gather->ReorderIndices.empty() &&
31293038
"Unexpected reordering of gathers.");
3130-
if (!Gather->ReuseShuffleIndices.empty()) {
3131-
// Just reorder reuses indices.
3132-
reorderReuses(Gather->ReuseShuffleIndices, Mask);
3133-
continue;
3134-
}
31353039
reorderScalars(Gather->Scalars, Mask);
31363040
OrderedEntries.remove(Gather);
31373041
}
@@ -7465,7 +7369,9 @@ struct SLPVectorizer : public FunctionPass {
74657369
initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
74667370
}
74677371

7468-
bool doInitialization(Module &M) override { return false; }
7372+
bool doInitialization(Module &M) override {
7373+
return false;
7374+
}
74697375

74707376
bool runOnFunction(Function &F) override {
74717377
if (skipFunction(F))

llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,21 @@ define <2 x i64> @build_vec_v2i64(<2 x i64> %v0, <2 x i64> %v1) {
3232

3333
define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {
3434
; CHECK-LABEL: @store_chain_v2i64(
35-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
36-
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
37-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
38-
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
39-
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
40-
; CHECK-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
41-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i32> <i32 1, i32 2>
42-
; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
43-
; CHECK-NEXT: [[TMP9:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
44-
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP9]], <2 x i32> <i32 0, i32 3>
45-
; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[TMP10]], [[TMP7]]
46-
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
47-
; CHECK-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* [[TMP12]], align 8
35+
; CHECK-NEXT: [[A_1:%.*]] = getelementptr i64, i64* [[A:%.*]], i64 1
36+
; CHECK-NEXT: [[B_1:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 1
37+
; CHECK-NEXT: [[C_1:%.*]] = getelementptr i64, i64* [[C:%.*]], i64 1
38+
; CHECK-NEXT: [[V0_0:%.*]] = load i64, i64* [[A]], align 8
39+
; CHECK-NEXT: [[V0_1:%.*]] = load i64, i64* [[A_1]], align 8
40+
; CHECK-NEXT: [[V1_0:%.*]] = load i64, i64* [[B]], align 8
41+
; CHECK-NEXT: [[V1_1:%.*]] = load i64, i64* [[B_1]], align 8
42+
; CHECK-NEXT: [[TMP0_0:%.*]] = add i64 [[V0_0]], [[V1_0]]
43+
; CHECK-NEXT: [[TMP0_1:%.*]] = add i64 [[V0_1]], [[V1_1]]
44+
; CHECK-NEXT: [[TMP1_0:%.*]] = sub i64 [[V0_0]], [[V1_0]]
45+
; CHECK-NEXT: [[TMP1_1:%.*]] = sub i64 [[V0_1]], [[V1_1]]
46+
; CHECK-NEXT: [[TMP2_0:%.*]] = add i64 [[TMP0_0]], [[TMP0_1]]
47+
; CHECK-NEXT: [[TMP2_1:%.*]] = add i64 [[TMP1_0]], [[TMP1_1]]
48+
; CHECK-NEXT: store i64 [[TMP2_0]], i64* [[C]], align 8
49+
; CHECK-NEXT: store i64 [[TMP2_1]], i64* [[C_1]], align 8
4850
; CHECK-NEXT: ret void
4951
;
5052
%a.0 = getelementptr i64, i64* %a, i64 0

llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,21 @@ define <2 x i64> @build_vec_v2i64(<2 x i64> %v0, <2 x i64> %v1) {
3232

3333
define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {
3434
; CHECK-LABEL: @store_chain_v2i64(
35-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
36-
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
37-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
38-
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
39-
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
40-
; CHECK-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
41-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i32> <i32 1, i32 2>
42-
; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
43-
; CHECK-NEXT: [[TMP9:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
44-
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP9]], <2 x i32> <i32 0, i32 3>
45-
; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[TMP10]], [[TMP7]]
46-
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
47-
; CHECK-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* [[TMP12]], align 8
35+
; CHECK-NEXT: [[A_1:%.*]] = getelementptr i64, i64* [[A:%.*]], i64 1
36+
; CHECK-NEXT: [[B_1:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 1
37+
; CHECK-NEXT: [[C_1:%.*]] = getelementptr i64, i64* [[C:%.*]], i64 1
38+
; CHECK-NEXT: [[V0_0:%.*]] = load i64, i64* [[A]], align 8
39+
; CHECK-NEXT: [[V0_1:%.*]] = load i64, i64* [[A_1]], align 8
40+
; CHECK-NEXT: [[V1_0:%.*]] = load i64, i64* [[B]], align 8
41+
; CHECK-NEXT: [[V1_1:%.*]] = load i64, i64* [[B_1]], align 8
42+
; CHECK-NEXT: [[TMP0_0:%.*]] = add i64 [[V0_0]], [[V1_0]]
43+
; CHECK-NEXT: [[TMP0_1:%.*]] = add i64 [[V0_1]], [[V1_1]]
44+
; CHECK-NEXT: [[TMP1_0:%.*]] = sub i64 [[V0_0]], [[V1_0]]
45+
; CHECK-NEXT: [[TMP1_1:%.*]] = sub i64 [[V0_1]], [[V1_1]]
46+
; CHECK-NEXT: [[TMP2_0:%.*]] = add i64 [[TMP0_0]], [[TMP0_1]]
47+
; CHECK-NEXT: [[TMP2_1:%.*]] = add i64 [[TMP1_0]], [[TMP1_1]]
48+
; CHECK-NEXT: store i64 [[TMP2_0]], i64* [[C]], align 8
49+
; CHECK-NEXT: store i64 [[TMP2_1]], i64* [[C_1]], align 8
4850
; CHECK-NEXT: ret void
4951
;
5052
%a.0 = getelementptr i64, i64* %a, i64 0

llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -69,23 +69,22 @@ define i32 @jumbled-load-multiuses(i32* noalias nocapture %in, i32* noalias noca
6969
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2
7070
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>*
7171
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
72-
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
73-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1
72+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
7473
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
75-
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 2
74+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
7675
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1
77-
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0
76+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
7877
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP7]], i32 2
79-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 3
78+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
8079
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP9]], i32 3
81-
; CHECK-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[SHUFFLE]], [[TMP10]]
80+
; CHECK-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[TMP2]], [[TMP10]]
8281
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0
8382
; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1
8483
; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2
8584
; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3
86-
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
85+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
8786
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>*
88-
; CHECK-NEXT: store <4 x i32> [[SHUFFLE1]], <4 x i32>* [[TMP12]], align 4
87+
; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP12]], align 4
8988
; CHECK-NEXT: ret i32 undef
9089
;
9190
%in.addr = getelementptr inbounds i32, i32* %in, i64 0

0 commit comments

Comments
 (0)