Skip to content

Commit af63546

Browse files
committed
[SLP]Look for vector user when estimating the cost
Need to find the first vector node user, not the very first user node at all. The very first user might be a gather, vectorized as clustered, which may cause compiler crash. Fixes #110193
1 parent 7dfdca1 commit af63546

File tree

2 files changed

+79
-5
lines changed

2 files changed

+79
-5
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10340,13 +10340,16 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
1034010340
InstructionCost VecCost = VectorCost(CommonCost);
1034110341
// Check if the current node must be resized, if the parent node is not
1034210342
// resized.
10343-
if (!UnaryInstruction::isCast(E->getOpcode()) && E->Idx != 0 &&
10343+
if (It != MinBWs.end() && !UnaryInstruction::isCast(E->getOpcode()) &&
10344+
E->Idx != 0 &&
1034410345
(E->getOpcode() != Instruction::Load ||
1034510346
!E->UserTreeIndices.empty())) {
10346-
const EdgeInfo &EI = E->UserTreeIndices.front();
10347-
if ((EI.UserTE->getOpcode() != Instruction::Select ||
10348-
EI.EdgeIdx != 0) &&
10349-
It != MinBWs.end()) {
10347+
const EdgeInfo &EI =
10348+
*find_if(E->UserTreeIndices, [](const EdgeInfo &EI) {
10349+
return !EI.UserTE->isGather() || EI.EdgeIdx != UINT_MAX;
10350+
});
10351+
if (EI.UserTE->getOpcode() != Instruction::Select ||
10352+
EI.EdgeIdx != 0) {
1035010353
auto UserBWIt = MinBWs.find(EI.UserTE);
1035110354
Type *UserScalarTy =
1035210355
EI.UserTE->getOperand(EI.EdgeIdx).front()->getType();
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define i1 @test() {
5+
; CHECK-LABEL: define i1 @test() {
6+
; CHECK-NEXT: [[ENTRY:.*:]]
7+
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 0 to i32
8+
; CHECK-NEXT: [[CONV85_22_I333_I_I:%.*]] = or i32 0, [[TMP0]]
9+
; CHECK-NEXT: [[CMP3_I_22_I334_I_I:%.*]] = icmp ugt i32 [[CONV85_22_I333_I_I]], 0
10+
; CHECK-NEXT: [[SHL_I111_22_I335_I_I:%.*]] = select i1 [[CMP3_I_22_I334_I_I]], i32 0, i32 0
11+
; CHECK-NEXT: [[C22_I336_I_I:%.*]] = shl i32 [[CONV85_22_I333_I_I]], [[SHL_I111_22_I335_I_I]]
12+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 0 to i32
13+
; CHECK-NEXT: [[CONV85_23_I340_I_I:%.*]] = or i32 0, [[TMP1]]
14+
; CHECK-NEXT: [[CMP3_I_23_I341_I_I:%.*]] = icmp ugt i32 [[CONV85_23_I340_I_I]], 0
15+
; CHECK-NEXT: [[SHL_I111_23_I342_I_I:%.*]] = select i1 [[CMP3_I_23_I341_I_I]], i32 0, i32 0
16+
; CHECK-NEXT: [[C23_I343_I_I:%.*]] = shl i32 [[CONV85_23_I340_I_I]], [[SHL_I111_23_I342_I_I]]
17+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 0 to i32
18+
; CHECK-NEXT: [[CONV85_24_I347_I_I:%.*]] = or i32 0, [[TMP2]]
19+
; CHECK-NEXT: [[CMP3_I_24_I348_I_I:%.*]] = icmp ugt i32 [[CONV85_24_I347_I_I]], 0
20+
; CHECK-NEXT: [[SHL_I111_24_I349_I_I:%.*]] = select i1 [[CMP3_I_24_I348_I_I]], i32 0, i32 0
21+
; CHECK-NEXT: [[C24_I350_I_I:%.*]] = shl i32 [[CONV85_24_I347_I_I]], [[SHL_I111_24_I349_I_I]]
22+
; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 0 to i32
23+
; CHECK-NEXT: [[CONV85_25_I354_I_I:%.*]] = or i32 0, [[TMP3]]
24+
; CHECK-NEXT: [[CMP3_I_25_I355_I_I:%.*]] = icmp ugt i32 [[CONV85_25_I354_I_I]], 0
25+
; CHECK-NEXT: [[SHL_I111_25_I356_I_I:%.*]] = select i1 [[CMP3_I_25_I355_I_I]], i32 0, i32 0
26+
; CHECK-NEXT: [[C25_I357_I_I:%.*]] = shl i32 [[CONV85_25_I354_I_I]], [[SHL_I111_25_I356_I_I]]
27+
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> zeroinitializer)
28+
; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP4]], [[C22_I336_I_I]]
29+
; CHECK-NEXT: [[OP_RDX1:%.*]] = and i32 [[C23_I343_I_I]], [[C24_I350_I_I]]
30+
; CHECK-NEXT: [[OP_RDX2:%.*]] = and i32 [[OP_RDX]], [[OP_RDX1]]
31+
; CHECK-NEXT: [[OP_RDX3:%.*]] = and i32 [[OP_RDX2]], [[C25_I357_I_I]]
32+
; CHECK-NEXT: [[CONV109_I_I:%.*]] = trunc i32 [[OP_RDX3]] to i8
33+
; CHECK-NEXT: [[CMP_I_I54_I:%.*]] = icmp eq i8 [[CONV109_I_I]], 0
34+
; CHECK-NEXT: ret i1 [[CMP_I_I54_I]]
35+
;
36+
entry:
37+
%c18.i308.i.i = shl i32 0, 0
38+
%c19.i315.i.i = shl i32 0, 0
39+
%and.19.i316.i.i = and i32 %c18.i308.i.i, %c19.i315.i.i
40+
%c20.i322.i.i = shl i32 0, 0
41+
%and.20.i323.i.i = and i32 %and.19.i316.i.i, %c20.i322.i.i
42+
%c21.i329.i.i = shl i32 0, 0
43+
%and.21.i330.i.i = and i32 %and.20.i323.i.i, %c21.i329.i.i
44+
%0 = trunc i64 0 to i32
45+
%conv85.22.i333.i.i = or i32 0, %0
46+
%cmp3.i.22.i334.i.i = icmp ugt i32 %conv85.22.i333.i.i, 0
47+
%shl.i111.22.i335.i.i = select i1 %cmp3.i.22.i334.i.i, i32 0, i32 0
48+
%c22.i336.i.i = shl i32 %conv85.22.i333.i.i, %shl.i111.22.i335.i.i
49+
%and.22.i337.i.i = and i32 %and.21.i330.i.i, %c22.i336.i.i
50+
%1 = trunc i64 0 to i32
51+
%conv85.23.i340.i.i = or i32 0, %1
52+
%cmp3.i.23.i341.i.i = icmp ugt i32 %conv85.23.i340.i.i, 0
53+
%shl.i111.23.i342.i.i = select i1 %cmp3.i.23.i341.i.i, i32 0, i32 0
54+
%c23.i343.i.i = shl i32 %conv85.23.i340.i.i, %shl.i111.23.i342.i.i
55+
%and.23.i344.i.i = and i32 %and.22.i337.i.i, %c23.i343.i.i
56+
%2 = trunc i64 0 to i32
57+
%conv85.24.i347.i.i = or i32 0, %2
58+
%cmp3.i.24.i348.i.i = icmp ugt i32 %conv85.24.i347.i.i, 0
59+
%shl.i111.24.i349.i.i = select i1 %cmp3.i.24.i348.i.i, i32 0, i32 0
60+
%c24.i350.i.i = shl i32 %conv85.24.i347.i.i, %shl.i111.24.i349.i.i
61+
%and.24.i351.i.i = and i32 %and.23.i344.i.i, %c24.i350.i.i
62+
%3 = trunc i64 0 to i32
63+
%conv85.25.i354.i.i = or i32 0, %3
64+
%cmp3.i.25.i355.i.i = icmp ugt i32 %conv85.25.i354.i.i, 0
65+
%shl.i111.25.i356.i.i = select i1 %cmp3.i.25.i355.i.i, i32 0, i32 0
66+
%c25.i357.i.i = shl i32 %conv85.25.i354.i.i, %shl.i111.25.i356.i.i
67+
%and.25.i358.i.i = and i32 %and.24.i351.i.i, %c25.i357.i.i
68+
%conv109.i.i = trunc i32 %and.25.i358.i.i to i8
69+
%cmp.i.i54.i = icmp eq i8 %conv109.i.i, 0
70+
ret i1 %cmp.i.i54.i
71+
}

0 commit comments

Comments
 (0)