Skip to content

Commit 9df0568

Browse files
committed
[SLP] Fix crash caused by reorderBottomToTop().
The crash is caused by incorrect order set by reorderBottomToTop(), which happens when it is reordering a TreeEntry which has a user that has already been reordered earlier. Please see the detailed description in the lit test. Differential Revision: https://reviews.llvm.org/D126099
1 parent 05527b6 commit 9df0568

File tree

2 files changed

+141
-2
lines changed

2 files changed

+141
-2
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3858,7 +3858,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
38583858
while (!OrderedEntries.empty()) {
38593859
// 1. Filter out only reordered nodes.
38603860
// 2. If the entry has multiple uses - skip it and jump to the next node.
3861-
MapVector<TreeEntry *, SmallVector<std::pair<unsigned, TreeEntry *>>> Users;
3861+
DenseMap<TreeEntry *, SmallVector<std::pair<unsigned, TreeEntry *>>> Users;
38623862
SmallVector<TreeEntry *> Filtered;
38633863
for (TreeEntry *TE : OrderedEntries) {
38643864
if (!(TE->State == TreeEntry::Vectorize ||
@@ -3886,7 +3886,13 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
38863886
// Erase filtered entries.
38873887
for_each(Filtered,
38883888
[&OrderedEntries](TreeEntry *TE) { OrderedEntries.remove(TE); });
3889-
for (auto &Data : Users) {
3889+
SmallVector<
3890+
std::pair<TreeEntry *, SmallVector<std::pair<unsigned, TreeEntry *>>>>
3891+
UsersVec(Users.begin(), Users.end());
3892+
sort(UsersVec, [](const auto &Data1, const auto &Data2) {
3893+
return Data1.first->Idx > Data2.first->Idx;
3894+
});
3895+
for (auto &Data : UsersVec) {
38903896
// Check that operands are used only in the User node.
38913897
SmallVector<TreeEntry *> GatherOps;
38923898
if (!canReorderOperands(Data.first, Data.second, NonVectorized,
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -slp-vectorizer -mtriple=x86_64-grtev4-linux-gnu -S | FileCheck %s
3+
4+
; This checks that reorderBottomToTop() can handle reordering of a TreeEntry
5+
; which has a user TreeEntry that has already been reordered.
6+
; Here is when the crash occurs:
7+
;
8+
; (N4)OrderB
9+
; |
10+
; (N1)OrderA (N2)OrderA (N3)NoOrder
11+
; \ | /
12+
; (Phi)NoOrder
13+
;
14+
; 1. Phi is visited along with its operands (N1,N2,N3). BestOrder is "OrderA".
15+
; 2. Phi along with all its operands (N1,N2,N3) are reordered. The result is:
16+
;
17+
; (N4)OrderB
18+
; |
19+
; (N1)NoOrder (N2)NoOrder (N3)OrderA
20+
; \ | /
21+
; (Phi)OrderA
22+
;
23+
; 3. N3 is now visited along with its operand N4. BestOrder is "OrderB".
24+
; 4. N3 and N4 are reordered. The result is this:
25+
;
26+
; (N4)NoOrder
27+
; |
28+
; (N1)NoOrder (N2)NoOrder (N3)OrderB
29+
; \ | /
30+
; (Phi)OrderA
31+
;
32+
; At this point there is a discrepancy between Phi's Operand 2 which are
33+
; reordered based on OrderA and N3's OrderB. This results in a crash in
34+
; vectorizeTree() on its way from N3 back to the Phi. The reason is that
35+
; N3->isSame(Phi's operand 2) returns false and vectorizeTree() skips N3.
36+
;
37+
; This patch changes the order in which the nodes are visited to bottom-up,
38+
; which fixes the issue.
39+
;
40+
; NOTE: The crash shows up when reorderTopToBottom() does not reorder the tree,
41+
; so to simulate this we add external store users. Alternatively one can
42+
; comment out reorderTopToBottom() and remove the stores.
43+
44+
45+
define void @reorder_crash(float* %ptr) {
46+
; CHECK-LABEL: @reorder_crash(
47+
; CHECK-NEXT: entry:
48+
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0
49+
; CHECK-NEXT: br i1 undef, label [[BB0:%.*]], label [[BB12:%.*]]
50+
; CHECK: bb0:
51+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
52+
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
53+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
54+
; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4
55+
; CHECK-NEXT: br label [[BB3:%.*]]
56+
; CHECK: bb12:
57+
; CHECK-NEXT: br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]]
58+
; CHECK: bb1:
59+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
60+
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
61+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
62+
; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 4
63+
; CHECK-NEXT: br label [[BB3]]
64+
; CHECK: bb2:
65+
; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
66+
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4
67+
; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], zeroinitializer
68+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
69+
; CHECK-NEXT: br label [[BB3]]
70+
; CHECK: bb3:
71+
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x float> [ [[TMP1]], [[BB0]] ], [ [[TMP4]], [[BB1]] ], [ [[SHUFFLE]], [[BB2]] ]
72+
; CHECK-NEXT: ret void
73+
;
74+
entry:
75+
%gep0 = getelementptr inbounds float, float* %ptr, i64 0
76+
%gep1 = getelementptr inbounds float, float* %ptr, i64 1
77+
%gep2 = getelementptr inbounds float, float* %ptr, i64 2
78+
%gep3 = getelementptr inbounds float, float* %ptr, i64 3
79+
br i1 undef, label %bb0, label %bb12
80+
81+
bb0:
82+
; Used by phi in this order: 1, 0, 2, 3
83+
%ld00 = load float, float* %gep0
84+
%ld01 = load float, float* %gep1
85+
%ld02 = load float, float* %gep2
86+
%ld03 = load float, float* %gep3
87+
88+
; External store users in natural order 0, 1, 2, 3
89+
store float %ld00, float *%gep0
90+
store float %ld01, float *%gep1
91+
store float %ld02, float *%gep2
92+
store float %ld03, float *%gep3
93+
br label %bb3
94+
95+
bb12:
96+
br i1 undef, label %bb1, label %bb2
97+
98+
bb1:
99+
; Used by phi in this order: 1, 0, 2, 3
100+
%ld10 = load float, float* %gep0
101+
%ld11 = load float, float* %gep1
102+
%ld12 = load float, float* %gep2
103+
%ld13 = load float, float* %gep3
104+
105+
; External store users in natural order 0, 1, 2, 3
106+
store float %ld10, float *%gep0
107+
store float %ld11, float *%gep1
108+
store float %ld12, float *%gep2
109+
store float %ld13, float *%gep3
110+
111+
br label %bb3
112+
113+
bb2:
114+
; Used by fadd in this order: 2, 3, 0, 1
115+
%ld20 = load float, float* %gep0
116+
%ld21 = load float, float* %gep1
117+
%ld22 = load float, float* %gep2
118+
%ld23 = load float, float* %gep3
119+
120+
; Used by phi in this order: 0, 1, 2, 3
121+
%add20 = fadd float %ld22, 0.0
122+
%add21 = fadd float %ld23, 0.0
123+
%add22 = fadd float %ld20, 0.0
124+
%add23 = fadd float %ld21, 0.0
125+
br label %bb3
126+
127+
bb3:
128+
%phi0 = phi float [ %ld01, %bb0 ], [ %ld11, %bb1 ], [ %add20, %bb2 ]
129+
%phi1 = phi float [ %ld00, %bb0 ], [ %ld10, %bb1 ], [ %add21, %bb2 ]
130+
%phi2 = phi float [ %ld02, %bb0 ], [ %ld12, %bb1 ], [ %add22, %bb2 ]
131+
%phi3 = phi float [ %ld03, %bb0 ], [ %ld13, %bb1 ], [ %add23, %bb2 ]
132+
ret void
133+
}

0 commit comments

Comments
 (0)