Skip to content

Commit 0e1a9e3

Browse files
committed
[SLP]Fix PR74607: Fix dependency between buildvector nodes with user
nodes, having same last instruction. If the user nodes has the same last-instruction, used as insert points for the buildvector nodes, finding the proper dependency is crucial. Before, it depended on the indices of the buildvectors themselves but looks like it should depend on indices of the user nodes, because it identifies the vectorization order and, thus, properly aligns buildvector nodes in terms of def-use chain.
1 parent 9bad6cb commit 0e1a9e3

File tree

3 files changed

+51
-4
lines changed

3 files changed

+51
-4
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9384,7 +9384,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
93849384
continue;
93859385
// If the user instruction is used for some reason in different
93869386
// vectorized nodes - make it depend on index.
9387-
if (TEUseEI.UserTE != UseEI.UserTE && TE->Idx < TEPtr->Idx)
9387+
if (TEUseEI.UserTE != UseEI.UserTE &&
9388+
TEUseEI.UserTE->Idx < UseEI.UserTE->Idx)
93889389
continue;
93899390
}
93909391

llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ define void @test() {
1515
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> <float poison, float undef>, float [[DOTPRE_PRE]], i32 0
1616
; CHECK-NEXT: br label [[BB1:%.*]]
1717
; CHECK: bb1:
18-
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[BB2:%.*]] ]
18+
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[BB2:%.*]] ]
1919
; CHECK-NEXT: br label [[BB2]]
2020
; CHECK: bb2:
2121
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP9:%.*]], [[BB2]] ]
@@ -29,8 +29,9 @@ define void @test() {
2929
; CHECK-NEXT: tail call void @foo(float [[MUL]])
3030
; CHECK-NEXT: [[I2:%.*]] = load float, ptr poison, align 4
3131
; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[I2]], 0.000000e+00
32-
; CHECK-NEXT: [[TMP8]] = insertelement <2 x float> [[TMP2]], float [[I2]], i32 0
33-
; CHECK-NEXT: [[TMP9]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
32+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <2 x i32> <i32 poison, i32 0>
33+
; CHECK-NEXT: [[TMP9]] = insertelement <2 x float> [[TMP8]], float [[I2]], i32 0
34+
; CHECK-NEXT: [[TMP10]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
3435
; CHECK-NEXT: br i1 [[TOBOOL]], label [[BB1]], label [[BB2]]
3536
;
3637
entry:
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-9999 < %s | FileCheck %s
3+
4+
define i64 @foo() {
5+
; CHECK-LABEL: define i64 @foo() {
6+
; CHECK-NEXT: bb:
7+
; CHECK-NEXT: br label [[BB3:%.*]]
8+
; CHECK: bb1:
9+
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ]
10+
; CHECK-NEXT: ret i64 0
11+
; CHECK: bb3:
12+
; CHECK-NEXT: [[PHI5:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
13+
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ]
14+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI5]], i32 0
15+
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
16+
; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
17+
; CHECK-NEXT: [[TMP5]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3>
18+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
19+
; CHECK-NEXT: [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]]
20+
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1
21+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[TMP8]]
22+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
23+
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0
24+
; CHECK-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]]
25+
;
26+
bb:
27+
br label %bb3
28+
29+
bb1:
30+
%phi = phi i64 [ %add, %bb3 ]
31+
%phi2 = phi i64 [ %or, %bb3 ]
32+
ret i64 0
33+
34+
bb3:
35+
%phi4 = phi i64 [ 0, %bb ], [ %add7, %bb3 ]
36+
%phi5 = phi i64 [ 0, %bb ], [ 0, %bb3 ]
37+
%phi6 = phi i64 [ 0, %bb ], [ %add, %bb3 ]
38+
%add = add i64 %phi6, %phi5
39+
%add7 = add i64 0, 0
40+
%getelementptr = getelementptr i64, ptr addrspace(1) null, i64 %add7
41+
%or = or i64 %phi4, 0
42+
%icmp = icmp ult i64 %or, 0
43+
br i1 false, label %bb3, label %bb1
44+
}
45+

0 commit comments

Comments
 (0)