Skip to content

Commit b9d3da8

Browse files
committed
[SLP]Fix PR105904: the root node might be a gather node without user for reductions.
Before checking the user components of the gather/buildvector nodes, need to check if the node has users at all. Root nodes might not have users, if it is a node for the reduction. Fixes #105904
1 parent 499e135 commit b9d3da8

File tree

2 files changed

+76
-1
lines changed

2 files changed

+76
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3011,7 +3011,8 @@ class BoUpSLP {
30113011
}
30123012

30133013
bool isOperandGatherNode(const EdgeInfo &UserEI) const {
3014-
return isGather() && UserTreeIndices.front().EdgeIdx == UserEI.EdgeIdx &&
3014+
return isGather() && (Idx > 0 || !UserTreeIndices.empty()) &&
3015+
UserTreeIndices.front().EdgeIdx == UserEI.EdgeIdx &&
30153016
UserTreeIndices.front().UserTE == UserEI.UserTE;
30163017
}
30173018

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr=+v,+zvl512b < %s | FileCheck %s
3+
4+
define void @test(ptr %c) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: ptr [[C:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[ENTRY:.*]]:
8+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[C]], i32 0
9+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer
10+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> <i64 222, i64 228, i64 276, i64 279, i64 282, i64 285, i64 288, i64 0>
11+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> <i64 0, i64 345, i64 348, i64 351, i64 354, i64 357, i64 360, i64 363>
12+
; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP2]], i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> poison)
13+
; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP3]], i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> poison)
14+
; CHECK-NEXT: br label %[[FOR_COND:.*]]
15+
; CHECK: [[FOR_COND]]:
16+
; CHECK-NEXT: [[A_PROMOTED2226:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[TMP8:%.*]], %[[FOR_COND]] ]
17+
; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[TMP4]], i64 0)
18+
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP5]], i64 8)
19+
; CHECK-NEXT: [[TMP8]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP7]])
20+
; CHECK-NEXT: br label %[[FOR_COND]]
21+
;
22+
entry:
23+
%arrayidx8.5.3 = getelementptr i8, ptr %c, i64 222
24+
%0 = load i8, ptr %arrayidx8.5.3, align 1
25+
%arrayidx8.7.3 = getelementptr i8, ptr %c, i64 228
26+
%1 = load i8, ptr %arrayidx8.7.3, align 1
27+
%arrayidx8.434 = getelementptr i8, ptr %c, i64 276
28+
%2 = load i8, ptr %arrayidx8.434, align 1
29+
%arrayidx8.1.4 = getelementptr i8, ptr %c, i64 279
30+
%3 = load i8, ptr %arrayidx8.1.4, align 1
31+
%arrayidx8.2.4 = getelementptr i8, ptr %c, i64 282
32+
%4 = load i8, ptr %arrayidx8.2.4, align 1
33+
%arrayidx8.3.4 = getelementptr i8, ptr %c, i64 285
34+
%5 = load i8, ptr %arrayidx8.3.4, align 1
35+
%arrayidx8.4.4 = getelementptr i8, ptr %c, i64 288
36+
%6 = load i8, ptr %arrayidx8.4.4, align 1
37+
%7 = load i8, ptr %c, align 1
38+
%8 = load i8, ptr %c, align 1
39+
%arrayidx8.536 = getelementptr i8, ptr %c, i64 345
40+
%9 = load i8, ptr %arrayidx8.536, align 1
41+
%arrayidx8.1.5 = getelementptr i8, ptr %c, i64 348
42+
%10 = load i8, ptr %arrayidx8.1.5, align 1
43+
%arrayidx8.2.5 = getelementptr i8, ptr %c, i64 351
44+
%11 = load i8, ptr %arrayidx8.2.5, align 1
45+
%arrayidx8.3.5 = getelementptr i8, ptr %c, i64 354
46+
%12 = load i8, ptr %arrayidx8.3.5, align 1
47+
%arrayidx8.4.5 = getelementptr i8, ptr %c, i64 357
48+
%13 = load i8, ptr %arrayidx8.4.5, align 1
49+
%arrayidx8.5.5 = getelementptr i8, ptr %c, i64 360
50+
%14 = load i8, ptr %arrayidx8.5.5, align 1
51+
%arrayidx8.6.5 = getelementptr i8, ptr %c, i64 363
52+
%15 = load i8, ptr %arrayidx8.6.5, align 1
53+
br label %for.cond
54+
55+
for.cond:
56+
%a.promoted2226 = phi i8 [ 0, %entry ], [ %or18.6.5, %for.cond ]
57+
%or18.7.3 = or i8 %0, %1
58+
%or18.435 = or i8 %or18.7.3, %2
59+
%or18.1.4 = or i8 %or18.435, %3
60+
%or18.2.4 = or i8 %or18.1.4, %4
61+
%or18.3.4 = or i8 %or18.2.4, %5
62+
%or18.4.4 = or i8 %or18.3.4, %6
63+
%or18.5.4 = or i8 %or18.4.4, %7
64+
%or18.6.4 = or i8 %or18.5.4, %8
65+
%or18.537 = or i8 %or18.6.4, %9
66+
%or18.1.5 = or i8 %or18.537, %10
67+
%or18.2.5 = or i8 %or18.1.5, %11
68+
%or18.3.5 = or i8 %or18.2.5, %12
69+
%or18.4.5 = or i8 %or18.3.5, %13
70+
%or18.5.5 = or i8 %or18.4.5, %14
71+
%or18.6.5 = or i8 %or18.5.5, %15
72+
br label %for.cond
73+
}
74+

0 commit comments

Comments
 (0)