Skip to content

Commit 1e3536e

Browse files
committed
[SLP]Fix PR108620: Need to check, if the reduced value was transformed
Before trying to include the scalar into the list of ExternallyUsedValues, need to check, if it was transformed in previous iteration and use the transformed value, not the original one, to avoid compiler crash when building external uses. Fixes #108620
1 parent 03618ce commit 1e3536e

File tree

2 files changed

+96
-19
lines changed

2 files changed

+96
-19
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,11 +1283,8 @@ class BoUpSLP {
12831283
/// Vectorize the tree but with the list of externally used values \p
12841284
/// ExternallyUsedValues. Values in this MapVector can be replaced but the
12851285
/// generated extractvalue instructions.
1286-
/// \param ReplacedExternals containd list of replaced external values
1287-
/// {scalar, replace} after emitting extractelement for external uses.
12881286
Value *
12891287
vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
1290-
SmallVectorImpl<std::pair<Value *, Value *>> &ReplacedExternals,
12911288
Instruction *ReductionRoot = nullptr);
12921289

12931290
/// \returns the cost incurred by unwanted spills and fills, caused by
@@ -14222,14 +14219,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1422214219

1422314220
Value *BoUpSLP::vectorizeTree() {
1422414221
ExtraValueToDebugLocsMap ExternallyUsedValues;
14225-
SmallVector<std::pair<Value *, Value *>> ReplacedExternals;
14226-
return vectorizeTree(ExternallyUsedValues, ReplacedExternals);
14222+
return vectorizeTree(ExternallyUsedValues);
1422714223
}
1422814224

14229-
Value *BoUpSLP::vectorizeTree(
14230-
const ExtraValueToDebugLocsMap &ExternallyUsedValues,
14231-
SmallVectorImpl<std::pair<Value *, Value *>> &ReplacedExternals,
14232-
Instruction *ReductionRoot) {
14225+
Value *
14226+
BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
14227+
Instruction *ReductionRoot) {
1423314228
// All blocks must be scheduled before any instructions are inserted.
1423414229
for (auto &BSIter : BlocksSchedules) {
1423514230
scheduleBlock(BSIter.second.get());
@@ -14373,6 +14368,7 @@ Value *BoUpSLP::vectorizeTree(
1437314368
SmallDenseSet<Value *, 4> UsedInserts;
1437414369
DenseMap<std::pair<Value *, Type *>, Value *> VectorCasts;
1437514370
SmallDenseSet<Value *, 4> ScalarsWithNullptrUser;
14371+
SmallDenseSet<ExtractElementInst *, 4> IgnoredExtracts;
1437614372
// Extract all of the elements with the external uses.
1437714373
for (const auto &ExternalUse : ExternalUses) {
1437814374
Value *Scalar = ExternalUse.Scalar;
@@ -14426,11 +14422,16 @@ Value *BoUpSLP::vectorizeTree(
1442614422
if (ReplaceInst) {
1442714423
// Leave the instruction as is, if it cheaper extracts and all
1442814424
// operands are scalar.
14429-
auto *CloneInst = Inst->clone();
14430-
CloneInst->insertBefore(Inst);
14431-
if (Inst->hasName())
14432-
CloneInst->takeName(Inst);
14433-
Ex = CloneInst;
14425+
if (auto *EE = dyn_cast<ExtractElementInst>(Inst)) {
14426+
IgnoredExtracts.insert(EE);
14427+
Ex = EE;
14428+
} else {
14429+
auto *CloneInst = Inst->clone();
14430+
CloneInst->insertBefore(Inst);
14431+
if (Inst->hasName())
14432+
CloneInst->takeName(Inst);
14433+
Ex = CloneInst;
14434+
}
1443414435
} else if (auto *ES = dyn_cast<ExtractElementInst>(Scalar);
1443514436
ES && isa<Instruction>(Vec)) {
1443614437
Value *V = ES->getVectorOperand();
@@ -14530,8 +14531,12 @@ Value *BoUpSLP::vectorizeTree(
1453014531
}
1453114532
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
1453214533
// Required to update internally referenced instructions.
14533-
Scalar->replaceAllUsesWith(NewInst);
14534-
ReplacedExternals.emplace_back(Scalar, NewInst);
14534+
if (Scalar != NewInst) {
14535+
assert((!isa<ExtractElementInst>(Scalar) ||
14536+
!IgnoredExtracts.contains(cast<ExtractElementInst>(Scalar))) &&
14537+
"Extractelements should not be replaced.");
14538+
Scalar->replaceAllUsesWith(NewInst);
14539+
}
1453514540
continue;
1453614541
}
1453714542

@@ -14757,6 +14762,9 @@ Value *BoUpSLP::vectorizeTree(
1475714762
if (Entry->getOpcode() == Instruction::GetElementPtr &&
1475814763
!isa<GetElementPtrInst>(Scalar))
1475914764
continue;
14765+
if (auto *EE = dyn_cast<ExtractElementInst>(Scalar);
14766+
EE && IgnoredExtracts.contains(EE))
14767+
continue;
1476014768
#ifndef NDEBUG
1476114769
Type *Ty = Scalar->getType();
1476214770
if (!Ty->isVoidTy()) {
@@ -17660,7 +17668,6 @@ class HorizontalReduction {
1766017668
// because of the vectorization.
1766117669
DenseMap<Value *, WeakTrackingVH> TrackedVals(ReducedVals.size() *
1766217670
ReducedVals.front().size());
17663-
SmallVector<std::pair<Value *, Value *>> ReplacedExternals;
1766417671

1766517672
// The compare instruction of a min/max is the insertion point for new
1766617673
// instructions and may be replaced with a new compare instruction.
@@ -17956,6 +17963,8 @@ class HorizontalReduction {
1795617963
if (Cnt >= Pos && Cnt < Pos + ReduxWidth)
1795717964
continue;
1795817965
Value *RdxVal = Candidates[Cnt];
17966+
if (auto It = TrackedVals.find(RdxVal); It != TrackedVals.end())
17967+
RdxVal = It->second;
1795917968
if (!Visited.insert(RdxVal).second)
1796017969
continue;
1796117970
// Check if the scalar was vectorized as part of the vectorization
@@ -18024,8 +18033,8 @@ class HorizontalReduction {
1802418033
InsertPt = GetCmpForMinMaxReduction(RdxRootInst);
1802518034

1802618035
// Vectorize a tree.
18027-
Value *VectorizedRoot = V.vectorizeTree(LocalExternallyUsedValues,
18028-
ReplacedExternals, InsertPt);
18036+
Value *VectorizedRoot =
18037+
V.vectorizeTree(LocalExternallyUsedValues, InsertPt);
1802918038

1803018039
Builder.SetInsertPoint(InsertPt);
1803118040

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define void @test(i32 %arg) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: i32 [[ARG:%.*]]) {
7+
; CHECK-NEXT: [[BB:.*]]:
8+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[ARG]], i32 0
9+
; CHECK-NEXT: br label %[[BB1:.*]]
10+
; CHECK: [[BB1]]:
11+
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP5:%.*]], %[[BB1]] ]
12+
; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP6:%.*]], %[[BB1]] ]
13+
; CHECK-NEXT: [[PHI3:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[OP_RDX4:%.*]], %[[BB1]] ]
14+
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP4:%.*]], %[[BB1]] ]
15+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 1, i32 0>
16+
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], zeroinitializer
17+
; CHECK-NEXT: [[ADD17:%.*]] = add i32 [[PHI]], 0
18+
; CHECK-NEXT: [[ADD18:%.*]] = add i32 [[PHI2]], 0
19+
; CHECK-NEXT: [[ADD19:%.*]] = add i32 [[PHI2]], 0
20+
; CHECK-NEXT: [[ADD23:%.*]] = add i32 [[PHI2]], 0
21+
; CHECK-NEXT: [[TMP4]] = add <2 x i32> [[TMP0]], <i32 0, i32 1>
22+
; CHECK-NEXT: [[TMP5]] = extractelement <2 x i32> [[TMP4]], i32 1
23+
; CHECK-NEXT: [[TMP6]] = extractelement <2 x i32> [[TMP4]], i32 0
24+
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> [[TMP3]])
25+
; CHECK-NEXT: [[OP_RDX:%.*]] = xor i32 [[TMP7]], [[ADD18]]
26+
; CHECK-NEXT: [[OP_RDX1:%.*]] = xor i32 [[ADD17]], [[ADD19]]
27+
; CHECK-NEXT: [[OP_RDX2:%.*]] = xor i32 [[ADD23]], [[TMP6]]
28+
; CHECK-NEXT: [[OP_RDX3:%.*]] = xor i32 [[OP_RDX]], [[OP_RDX1]]
29+
; CHECK-NEXT: [[OP_RDX4]] = xor i32 [[OP_RDX3]], [[OP_RDX2]]
30+
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i32 [[TMP5]], 0
31+
; CHECK-NEXT: br label %[[BB1]]
32+
;
33+
bb:
34+
br label %bb1
35+
36+
bb1:
37+
%phi = phi i32 [ 0, %bb ], [ %add27, %bb1 ]
38+
%phi2 = phi i32 [ 0, %bb ], [ %add24, %bb1 ]
39+
%phi3 = phi i32 [ 0, %bb ], [ %xor26, %bb1 ]
40+
%add = add i32 %phi2, 0
41+
%add4 = add i32 %phi2, 0
42+
%xor = xor i32 %add, %add4
43+
%add5 = add i32 %phi, 0
44+
%add6 = add i32 %phi2, 0
45+
%add7 = add i32 %phi2, 0
46+
%xor8 = xor i32 %add6, %xor
47+
%xor9 = xor i32 %xor8, %add5
48+
%xor10 = xor i32 %xor9, %add7
49+
%add11 = add i32 %phi, 0
50+
%add12 = add i32 %phi2, 0
51+
%add13 = add i32 %phi2, 0
52+
%xor14 = xor i32 %add12, %xor10
53+
%xor15 = xor i32 %xor14, %add11
54+
%xor16 = xor i32 %xor15, %add13
55+
%add17 = add i32 %phi, 0
56+
%add18 = add i32 %phi2, 0
57+
%add19 = add i32 %phi2, 0
58+
%xor20 = xor i32 %add18, %xor16
59+
%xor21 = xor i32 %xor20, %add17
60+
%xor22 = xor i32 %xor21, %add19
61+
%add23 = add i32 %phi2, 0
62+
%add24 = add i32 %arg, 0
63+
%xor25 = xor i32 %add23, %xor22
64+
%xor26 = xor i32 %xor25, %add24
65+
%add27 = add i32 1, 0
66+
%icmp = icmp ult i32 %add27, 0
67+
br label %bb1
68+
}

0 commit comments

Comments
 (0)