Skip to content

Commit 3aecbbc

Browse files
committed
[SLP]Do not match nodes if schedulability of parent nodes is different
If one user node is non-schedulable and another one is schedulable, such nodes should be considered matched. The selection of the actual insert point in this case differs and the insert points may match, which may cause a compiler crash because of the broken def-use chain. Fixes #137797
1 parent 368fbc2 commit 3aecbbc

File tree

2 files changed

+44
-0
lines changed

2 files changed

+44
-0
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15245,6 +15245,11 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1524515245
continue;
1524615246
}
1524715247

15248+
if (!TEUseEI.UserTE->isGather() && !UserPHI &&
15249+
doesNotNeedToSchedule(TEUseEI.UserTE->Scalars) !=
15250+
doesNotNeedToSchedule(UseEI.UserTE->Scalars) &&
15251+
is_contained(UseEI.UserTE->Scalars, TEInsertPt))
15252+
continue;
1524815253
// Check if the user node of the TE comes after user node of TEPtr,
1524915254
// otherwise TEPtr depends on TE.
1525015255
if ((TEInsertBlock != InsertPt->getParent() ||
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s -slp-threshold=-99999 | FileCheck %s
3+
4+
define void @test() {
5+
; CHECK-LABEL: define void @test() {
6+
; CHECK-NEXT: [[BB:.*]]:
7+
; CHECK-NEXT: br i1 false, label %[[BB1:.*]], label %[[BB5:.*]]
8+
; CHECK: [[BB1]]:
9+
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], %[[BB1]] ], [ zeroinitializer, %[[BB]] ]
10+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 poison, i32 0>, <2 x i32> <i32 0, i32 3>
11+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 0, i32 0
12+
; CHECK-NEXT: [[TMP3]] = or <2 x i32> [[TMP1]], [[TMP2]]
13+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
14+
; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP0]], [[TMP4]]
15+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
16+
; CHECK-NEXT: [[OR3:%.*]] = or i32 [[TMP6]], 0
17+
; CHECK-NEXT: br i1 false, label %[[BB1]], label %[[BB5]]
18+
; CHECK: [[BB5]]:
19+
; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP5]], %[[BB1]] ]
20+
; CHECK-NEXT: ret void
21+
;
22+
bb:
23+
br i1 false, label %bb1, label %bb5
24+
25+
bb1:
26+
%phi = phi i32 [ %or4, %bb1 ], [ 0, %bb ]
27+
%phi2 = phi i32 [ %add, %bb1 ], [ 0, %bb ]
28+
%add = add i32 1, 0
29+
%or = or i32 0, %phi2
30+
%or3 = or i32 %or, 0
31+
%mul = mul i32 0, 0
32+
%or4 = or i32 %phi, %mul
33+
br i1 false, label %bb1, label %bb5
34+
35+
bb5:
36+
%phi6 = phi i32 [ 0, %bb ], [ %or4, %bb1 ]
37+
%phi7 = phi i32 [ 0, %bb ], [ %or, %bb1 ]
38+
ret void
39+
}

0 commit comments

Comments
 (0)