Skip to content

Commit 4b5018d

Browse files
committed
[SLP]Track repeated reduced value as it might be vectorized
Need to track changes with the repeated reduced value, since it might be vectorized in the next attempt for reduction vectorization, to correctly generate the code and avoid compiler crash. Fixes #111887
1 parent 1954869 commit 4b5018d

File tree

2 files changed

+49
-10
lines changed

2 files changed

+49
-10
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1293,8 +1293,7 @@ class BoUpSLP {
12931293
using InstrList = SmallVector<Instruction *, 16>;
12941294
using ValueSet = SmallPtrSet<Value *, 16>;
12951295
using StoreList = SmallVector<StoreInst *, 8>;
1296-
using ExtraValueToDebugLocsMap =
1297-
MapVector<Value *, SmallVector<Instruction *, 2>>;
1296+
using ExtraValueToDebugLocsMap = SmallDenseSet<Value *, 4>;
12981297
using OrdersType = SmallVector<unsigned, 4>;
12991298

13001299
BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
@@ -6322,7 +6321,7 @@ void BoUpSLP::buildExternalUses(
63226321
continue;
63236322

63246323
// Check if the scalar is externally used as an extra arg.
6325-
const auto *ExtI = ExternallyUsedValues.find(Scalar);
6324+
const auto ExtI = ExternallyUsedValues.find(Scalar);
63266325
if (ExtI != ExternallyUsedValues.end()) {
63276326
int FoundLane = Entry->findLaneForValue(Scalar);
63286327
LLVM_DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane "
@@ -18820,7 +18819,7 @@ class HorizontalReduction {
1882018819
// List of the values that were reduced in other trees as part of gather
1882118820
// nodes and thus requiring extract if fully vectorized in other trees.
1882218821
SmallPtrSet<Value *, 4> RequiredExtract;
18823-
Value *VectorizedTree = nullptr;
18822+
WeakTrackingVH VectorizedTree = nullptr;
1882418823
bool CheckForReusedReductionOps = false;
1882518824
// Try to vectorize elements based on their type.
1882618825
SmallVector<InstructionsState> States;
@@ -18916,6 +18915,7 @@ class HorizontalReduction {
1891618915
bool SameScaleFactor = false;
1891718916
bool OptReusedScalars = IsSupportedHorRdxIdentityOp &&
1891818917
SameValuesCounter.size() != Candidates.size();
18918+
BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
1891918919
if (OptReusedScalars) {
1892018920
SameScaleFactor =
1892118921
(RdxKind == RecurKind::Add || RdxKind == RecurKind::FAdd ||
@@ -18936,6 +18936,7 @@ class HorizontalReduction {
1893618936
emitScaleForReusedOps(Candidates.front(), Builder, Cnt);
1893718937
VectorizedTree = GetNewVectorizedTree(VectorizedTree, RedVal);
1893818938
VectorizedVals.try_emplace(OrigV, Cnt);
18939+
ExternallyUsedValues.insert(OrigV);
1893918940
continue;
1894018941
}
1894118942
}
@@ -19015,17 +19016,18 @@ class HorizontalReduction {
1901519016
V.reorderBottomToTop(/*IgnoreReorder=*/true);
1901619017
// Keep extracted other reduction values, if they are used in the
1901719018
// vectorization trees.
19018-
BoUpSLP::ExtraValueToDebugLocsMap LocalExternallyUsedValues;
19019+
BoUpSLP::ExtraValueToDebugLocsMap LocalExternallyUsedValues(
19020+
ExternallyUsedValues);
1901919021
// The reduction root is used as the insertion point for new
1902019022
// instructions, so set it as externally used to prevent it from being
1902119023
// deleted.
19022-
LocalExternallyUsedValues[ReductionRoot];
19024+
LocalExternallyUsedValues.insert(ReductionRoot);
1902319025
for (unsigned Cnt = 0, Sz = ReducedVals.size(); Cnt < Sz; ++Cnt) {
1902419026
if (Cnt == I || (ShuffledExtracts && Cnt == I - 1))
1902519027
continue;
1902619028
for (Value *V : ReducedVals[Cnt])
1902719029
if (isa<Instruction>(V))
19028-
LocalExternallyUsedValues[TrackedVals[V]];
19030+
LocalExternallyUsedValues.insert(TrackedVals[V]);
1902919031
}
1903019032
if (!IsSupportedHorRdxIdentityOp) {
1903119033
// Number of uses of the candidates in the vector of values.
@@ -19054,21 +19056,21 @@ class HorizontalReduction {
1905419056
// Check if the scalar was vectorized as part of the vectorization
1905519057
// tree but not the top node.
1905619058
if (!VLScalars.contains(RdxVal) && V.isVectorized(RdxVal)) {
19057-
LocalExternallyUsedValues[RdxVal];
19059+
LocalExternallyUsedValues.insert(RdxVal);
1905819060
continue;
1905919061
}
1906019062
Value *OrigV = TrackedToOrig.at(RdxVal);
1906119063
unsigned NumOps =
1906219064
VectorizedVals.lookup(OrigV) + At(SameValuesCounter, OrigV);
1906319065
if (NumOps != ReducedValsToOps.at(OrigV).size())
19064-
LocalExternallyUsedValues[RdxVal];
19066+
LocalExternallyUsedValues.insert(RdxVal);
1906519067
}
1906619068
// Do not need the list of reused scalars in regular mode anymore.
1906719069
if (!IsSupportedHorRdxIdentityOp)
1906819070
SameValuesCounter.clear();
1906919071
for (Value *RdxVal : VL)
1907019072
if (RequiredExtract.contains(RdxVal))
19071-
LocalExternallyUsedValues[RdxVal];
19073+
LocalExternallyUsedValues.insert(RdxVal);
1907219074
V.buildExternalUses(LocalExternallyUsedValues);
1907319075

1907419076
V.computeMinimumValueSizes();
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s
3+
4+
define void @test() {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.experimental.vp.strided.load.v4i16.p0.i64(ptr align 2 null, i64 6, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 4)
9+
; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr null, align 2
10+
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i16> [[TMP0]], zeroinitializer
11+
; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> [[TMP2]])
12+
; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.smax.i16(i16 [[TMP1]], i16 [[TMP3]])
13+
; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.smax.i16(i16 [[TMP4]], i16 0)
14+
; CHECK-NEXT: [[TMP6:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP5]], i16 0)
15+
; CHECK-NEXT: ret void
16+
;
17+
entry:
18+
%0 = load i16, ptr null, align 2
19+
%1 = xor i16 %0, 0
20+
%2 = tail call i16 @llvm.smax.i16(i16 %1, i16 %0)
21+
%3 = tail call i16 @llvm.smax.i16(i16 0, i16 %2)
22+
%4 = load i16, ptr getelementptr inbounds (i8, ptr null, i64 6), align 2
23+
%5 = xor i16 %4, 0
24+
%6 = tail call i16 @llvm.smax.i16(i16 %5, i16 %0)
25+
%7 = tail call i16 @llvm.smax.i16(i16 %3, i16 %6)
26+
%8 = load i16, ptr getelementptr (i8, ptr null, i64 12), align 2
27+
%9 = xor i16 %8, 0
28+
%10 = tail call i16 @llvm.smax.i16(i16 %9, i16 %0)
29+
%11 = tail call i16 @llvm.smax.i16(i16 %7, i16 %10)
30+
%12 = load i16, ptr getelementptr (i8, ptr null, i64 18), align 2
31+
%13 = xor i16 %12, 0
32+
%14 = tail call i16 @llvm.smax.i16(i16 %13, i16 %0)
33+
%15 = tail call i16 @llvm.smax.i16(i16 %11, i16 %14)
34+
%16 = tail call i16 @llvm.smax.i16(i16 %15, i16 0)
35+
ret void
36+
}
37+

0 commit comments

Comments
 (0)