Skip to content

Commit 7d01a8f

Browse files
committed
[SLP]Fix vector factor for repeated node for bv
When adding a node vector, when it is used already in the shuffle for buildvector, need to calculate vector factor from all vector, not only this single vector, to avoid incorrect result. Also, need to increase stability of the reused entries detection to avoid mismatch in cost estimation/codegen. Fixes llvm#123639
1 parent 5e4c34a commit 7d01a8f

File tree

2 files changed

+11
-6
lines changed

2 files changed

+11
-6
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13205,9 +13205,12 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1320513205
VTE = *MIt;
1320613206
}
1320713207
}
13208-
Instruction &LastBundleInst = getLastInstructionInBundle(VTE);
13209-
if (&LastBundleInst == TEInsertPt || !CheckOrdering(&LastBundleInst))
13210-
continue;
13208+
if (none_of(TE->CombinedEntriesWithIndices,
13209+
[&](const auto &P) { return P.first == VTE->Idx; })) {
13210+
Instruction &LastBundleInst = getLastInstructionInBundle(VTE);
13211+
if (&LastBundleInst == TEInsertPt || !CheckOrdering(&LastBundleInst))
13212+
continue;
13213+
}
1321113214
VToTEs.insert(VTE);
1321213215
}
1321313216
if (VToTEs.empty())
@@ -14497,7 +14500,9 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1449714500
break;
1449814501
}
1449914502
}
14500-
int VF = getVF(V1);
14503+
unsigned VF = 0;
14504+
for (Value *V : InVectors)
14505+
VF = std::max(VF, getVF(V));
1450114506
for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
1450214507
if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem)
1450314508
CommonMask[Idx] = Mask[Idx] + (It == InVectors.begin() ? 0 : VF);

llvm/test/Transforms/SLPVectorizer/X86/multi-node-reuse-in-bv.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
; YAML-NEXT: Function: test
99
; YAML-NEXT: Args:
1010
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
11-
; YAML-NEXT: - Cost: '-38'
11+
; YAML-NEXT: - Cost: '-41'
1212
; YAML-NEXT: - String: ' and with tree size '
1313
; YAML-NEXT: - TreeSize: '7'
1414
; YAML-NEXT: ...
@@ -17,7 +17,7 @@ define i64 @test() {
1717
; CHECK-LABEL: define i64 @test(
1818
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
1919
; CHECK-NEXT: [[ENTRY:.*:]]
20-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0>, i32 0, i32 6
20+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 1, i32 0>, i32 0, i32 6
2121
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 8)
2222
; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP1]], <4 x i32> <i32 0, i32 0, i32 0, i32 1>, i64 24)
2323
; CHECK-NEXT: [[TMP3:%.*]] = sub <32 x i32> zeroinitializer, [[TMP2]]

0 commit comments

Comments
 (0)