Skip to content

Commit 45090b3

Browse files
committed
[SLP]Check the whole def-use chain in the tree to find proper dominance, if the last instruction is the same
If the insertion point (last instruction) of the user nodes is the same, need to check the whole def-use chain in the tree to find proper dominance to prevent a compiler crash. Fixes #131818
1 parent 84909d7 commit 45090b3

File tree

2 files changed

+73
-2
lines changed

2 files changed

+73
-2
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14086,11 +14086,18 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1408614086
auto CheckParentNodes = [&](const TreeEntry *User1, const TreeEntry *User2,
1408714087
unsigned EdgeIdx) {
1408814088
const TreeEntry *Ptr1 = User1;
14089+
const TreeEntry *Ptr2 = User2;
14090+
SmallDenseMap<const TreeEntry *, unsigned> PtrToIdx;
14091+
while (Ptr2) {
14092+
PtrToIdx.try_emplace(Ptr2, EdgeIdx);
14093+
EdgeIdx = Ptr2->UserTreeIndex.EdgeIdx;
14094+
Ptr2 = Ptr2->UserTreeIndex.UserTE;
14095+
}
1408914096
while (Ptr1) {
1409014097
unsigned Idx = Ptr1->UserTreeIndex.EdgeIdx;
1409114098
Ptr1 = Ptr1->UserTreeIndex.UserTE;
14092-
if (Ptr1 == User2)
14093-
return Idx < EdgeIdx;
14099+
if (auto It = PtrToIdx.find(Ptr1); It != PtrToIdx.end())
14100+
return Idx < It->second;
1409414101
}
1409514102
return false;
1409614103
};
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define i32 @test(i32 %0, i1 %1) {
5+
; CHECK-LABEL: define i32 @test(
6+
; CHECK-SAME: i32 [[TMP0:%.*]], i1 [[TMP1:%.*]]) {
7+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
8+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <2 x i32> zeroinitializer
9+
; CHECK-NEXT: [[TMP5:%.*]] = sitofp <2 x i32> [[TMP4]] to <2 x double>
10+
; CHECK-NEXT: [[TMP6:%.*]] = sitofp <2 x i32> [[TMP4]] to <2 x double>
11+
; CHECK-NEXT: br i1 [[TMP1]], label %[[BB7:.*]], label %[[BB9:.*]]
12+
; CHECK: [[BB7]]:
13+
; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP6]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
14+
; CHECK-NEXT: br label %[[BB16:.*]]
15+
; CHECK: [[BB9]]:
16+
; CHECK-NEXT: br i1 false, label %[[BB14:.*]], label %[[BB10:.*]]
17+
; CHECK: [[BB10]]:
18+
; CHECK-NEXT: [[TMP11:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> zeroinitializer, <2 x double> [[TMP5]])
19+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> <double 0.000000e+00, double poison>, <2 x i32> <i32 2, i32 1>
20+
; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> zeroinitializer)
21+
; CHECK-NEXT: br label %[[BB14]]
22+
; CHECK: [[BB14]]:
23+
; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x double> [ [[TMP13]], %[[BB10]] ], [ zeroinitializer, %[[BB9]] ]
24+
; CHECK-NEXT: br label %[[BB16]]
25+
; CHECK: [[BB16]]:
26+
; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x double> [ [[TMP15]], %[[BB14]] ], [ [[TMP8]], %[[BB7]] ]
27+
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[TMP17]], i32 0
28+
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP17]], i32 1
29+
; CHECK-NEXT: [[TMP20:%.*]] = fmul double [[TMP19]], [[TMP18]]
30+
; CHECK-NEXT: [[TMP21:%.*]] = fptosi double [[TMP20]] to i32
31+
; CHECK-NEXT: ret i32 [[TMP21]]
32+
;
33+
%3 = sitofp i32 %0 to double
34+
%4 = sitofp i32 %0 to double
35+
%5 = sitofp i32 %0 to double
36+
br i1 %1, label %6, label %9
37+
38+
6:
39+
%7 = tail call double @llvm.fmuladd.f64(double %4, double 0.000000e+00, double 0.000000e+00)
40+
%8 = tail call double @llvm.fmuladd.f64(double %5, double 0.000000e+00, double 0.000000e+00)
41+
br label %18
42+
43+
9:
44+
br i1 false, label %15, label %10
45+
46+
10:
47+
%11 = call double @llvm.copysign.f64(double 0.000000e+00, double %3)
48+
%12 = tail call double @llvm.fmuladd.f64(double %11, double %4, double 0.000000e+00)
49+
%13 = call double @llvm.copysign.f64(double 0.000000e+00, double %5)
50+
%14 = tail call double @llvm.fmuladd.f64(double %13, double 0.000000e+00, double 0.000000e+00)
51+
br label %15
52+
53+
15:
54+
%16 = phi double [ %12, %10 ], [ 0.000000e+00, %9 ]
55+
%17 = phi double [ %14, %10 ], [ 0.000000e+00, %9 ]
56+
br label %18
57+
58+
18:
59+
%19 = phi double [ %17, %15 ], [ %8, %6 ]
60+
%20 = phi double [ %16, %15 ], [ %7, %6 ]
61+
%21 = fmul double %20, %19
62+
%22 = fptosi double %21 to i32
63+
ret i32 %22
64+
}

0 commit comments

Comments
 (0)