Skip to content

Commit dab19da

Browse files
committed
[SLP]Fix a crash for the strided nodes with reversed order and externally used pointer.
If the strided node is reversed, need to cehck for the last instruction, not the first one in the list of scalars, when checking if the root pointer must be extracted.
1 parent 67a9093 commit dab19da

File tree

2 files changed

+70
-6
lines changed

2 files changed

+70
-6
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,8 @@ static bool allSameType(ArrayRef<Value *> VL) {
10201020
/// possible scalar operand in vectorized instruction.
10211021
static bool doesInTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
10221022
TargetLibraryInfo *TLI) {
1023+
if (!UserInst)
1024+
return false;
10231025
unsigned Opcode = UserInst->getOpcode();
10241026
switch (Opcode) {
10251027
case Instruction::Load: {
@@ -2809,6 +2811,11 @@ class BoUpSLP {
28092811
/// \ returns the graph entry for the \p Idx operand of the \p E entry.
28102812
const TreeEntry *getOperandEntry(const TreeEntry *E, unsigned Idx) const;
28112813

2814+
/// Gets the root instruction for the given node. If the node is a strided
2815+
/// load/store node with the reverse order, the root instruction is the last
2816+
/// one.
2817+
Instruction *getRootEntryInstruction(const TreeEntry &Entry) const;
2818+
28122819
/// \returns Cast context for the given graph node.
28132820
TargetTransformInfo::CastContextHint
28142821
getCastContextHint(const TreeEntry &TE) const;
@@ -5987,6 +5994,15 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
59875994
VectorizableTree.front()->ReorderIndices.clear();
59885995
}
59895996

5997+
Instruction *BoUpSLP::getRootEntryInstruction(const TreeEntry &Entry) const {
5998+
if ((Entry.getOpcode() == Instruction::Store ||
5999+
Entry.getOpcode() == Instruction::Load) &&
6000+
Entry.State == TreeEntry::StridedVectorize &&
6001+
!Entry.ReorderIndices.empty() && isReverseOrder(Entry.ReorderIndices))
6002+
return dyn_cast<Instruction>(Entry.Scalars[Entry.ReorderIndices.front()]);
6003+
return dyn_cast<Instruction>(Entry.Scalars.front());
6004+
}
6005+
59906006
void BoUpSLP::buildExternalUses(
59916007
const ExtraValueToDebugLocsMap &ExternallyUsedValues) {
59926008
DenseMap<Value *, unsigned> ScalarToExtUses;
@@ -6036,7 +6052,7 @@ void BoUpSLP::buildExternalUses(
60366052
// be used.
60376053
if (UseEntry->State == TreeEntry::ScatterVectorize ||
60386054
!doesInTreeUserNeedToExtract(
6039-
Scalar, cast<Instruction>(UseEntry->Scalars.front()), TLI)) {
6055+
Scalar, getRootEntryInstruction(*UseEntry), TLI)) {
60406056
LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
60416057
<< ".\n");
60426058
assert(!UseEntry->isGather() && "Bad state");
@@ -8450,8 +8466,8 @@ void BoUpSLP::transformNodes() {
84508466
Instruction::Store, VecTy, BaseSI->getPointerOperand(),
84518467
/*VariableMask=*/false, CommonAlignment, CostKind, BaseSI);
84528468
if (StridedCost < OriginalVecCost)
8453-
// Strided load is more profitable than consecutive load + reverse -
8454-
// transform the node to strided load.
8469+
// Strided store is more profitable than reverse + consecutive store -
8470+
// transform the node to strided store.
84558471
E.State = TreeEntry::StridedVectorize;
84568472
}
84578473
break;
@@ -13776,7 +13792,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1377613792
ST = Builder.CreateAlignedStore(VecValue, Ptr, SI->getAlign());
1377713793
} else {
1377813794
assert(E->State == TreeEntry::StridedVectorize &&
13779-
"Expected either strided or conseutive stores.");
13795+
"Expected either strided or consecutive stores.");
1378013796
if (!E->ReorderIndices.empty()) {
1378113797
SI = cast<StoreInst>(E->Scalars[E->ReorderIndices.front()]);
1378213798
Ptr = SI->getPointerOperand();
@@ -14380,8 +14396,7 @@ Value *BoUpSLP::vectorizeTree(
1438014396
(E->State == TreeEntry::Vectorize ||
1438114397
E->State == TreeEntry::StridedVectorize) &&
1438214398
doesInTreeUserNeedToExtract(
14383-
Scalar,
14384-
cast<Instruction>(UseEntry->Scalars.front()),
14399+
Scalar, getRootEntryInstruction(*UseEntry),
1438514400
TLI);
1438614401
})) &&
1438714402
"Scalar with nullptr User must be registered in "
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=riscv64 -mattr=+v < %s | FileCheck %s
3+
4+
define void @test(ptr %a, i64 %0) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: ptr [[A:%.*]], i64 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0
9+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
10+
; CHECK-NEXT: br label %[[BB:.*]]
11+
; CHECK: [[BB]]:
12+
; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1
13+
; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]]
14+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
15+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1
16+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]]
17+
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x double> poison)
18+
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8
19+
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8
20+
; CHECK-NEXT: [[TMP10:%.*]] = fsub <2 x double> [[TMP8]], [[TMP9]]
21+
; CHECK-NEXT: [[TMP11:%.*]] = fsub <2 x double> [[TMP7]], [[TMP10]]
22+
; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v2f64.p0.i64(<2 x double> [[TMP11]], ptr align 8 [[ARRAYIDX17_I28_1]], i64 -8, <2 x i1> <i1 true, i1 true>, i32 2)
23+
; CHECK-NEXT: br label %[[BB]]
24+
;
25+
entry:
26+
br label %bb
27+
28+
bb:
29+
%indvars.iv.next239.i = add i64 0, 0
30+
%arrayidx.i.1 = getelementptr double, ptr %a, i64 %indvars.iv.next239.i
31+
%1 = load double, ptr %arrayidx.i.1, align 8
32+
%arrayidx10.i.1 = getelementptr double, ptr %a, i64 %0
33+
%2 = or disjoint i64 %0, 1
34+
%arrayidx17.i28.1 = getelementptr double, ptr %a, i64 %2
35+
%3 = load double, ptr %arrayidx17.i28.1, align 8
36+
%4 = load double, ptr %a, align 8
37+
%5 = load double, ptr %a, align 8
38+
%arrayidx38.i.1 = getelementptr double, ptr %a, i64 1
39+
%6 = load double, ptr %arrayidx38.i.1, align 8
40+
%arrayidx41.i.1 = getelementptr double, ptr %a, i64 1
41+
%7 = load double, ptr %arrayidx41.i.1, align 8
42+
%sub47.i.1 = fsub double %4, %5
43+
%sub54.i.1 = fsub double %6, %7
44+
%sub69.i.1 = fsub double %1, %sub54.i.1
45+
store double %sub69.i.1, ptr %arrayidx10.i.1, align 8
46+
%sub72.i.1 = fsub double %3, %sub47.i.1
47+
store double %sub72.i.1, ptr %arrayidx17.i28.1, align 8
48+
br label %bb
49+
}

0 commit comments

Comments
 (0)