Skip to content

Commit cee7d99

Browse files
committed
[SLP]Fix PR89438: Check for same vectorized node in MinBWs, not user.
Need to check if the buildvector node has perfect diamond match in the graph and the matched node is resized.
1 parent 0c455ee commit cee7d99

File tree

2 files changed

+108
-3
lines changed

2 files changed

+108
-3
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13139,9 +13139,30 @@ Value *BoUpSLP::vectorizeTree(
1313913139
assert(Vec->getType()->isIntOrIntVectorTy() &&
1314013140
PrevVec->getType()->isIntOrIntVectorTy() &&
1314113141
"Expected integer vector types only.");
13142-
assert(MinBWs.contains(TE->UserTreeIndices.front().UserTE) &&
13143-
"Expected user in MinBWs.");
13144-
bool IsSigned = MinBWs.lookup(TE->UserTreeIndices.front().UserTE).second;
13142+
std::optional<std::pair<unsigned long, bool>> Res;
13143+
if (const TreeEntry *BaseTE = getTreeEntry(TE->Scalars.front())) {
13144+
SmallVector<const TreeEntry *> BaseTEs;
13145+
if (BaseTE->isSame(TE->Scalars))
13146+
BaseTEs.push_back(BaseTE);
13147+
auto It = MultiNodeScalars.find(TE->Scalars.front());
13148+
if (It != MultiNodeScalars.end()) {
13149+
for (const TreeEntry *MNTE : It->getSecond())
13150+
if (MNTE->isSame(TE->Scalars))
13151+
BaseTEs.push_back(MNTE);
13152+
}
13153+
const auto *BaseIt = find_if(BaseTEs, [&](const TreeEntry *BaseTE) {
13154+
return MinBWs.contains(BaseTE);
13155+
});
13156+
if (BaseIt != BaseTEs.end())
13157+
Res = MinBWs.lookup(*BaseIt);
13158+
}
13159+
if (!Res) {
13160+
assert(MinBWs.contains(TE->UserTreeIndices.front().UserTE) &&
13161+
"Expected user in MinBWs.");
13162+
Res = MinBWs.lookup(TE->UserTreeIndices.front().UserTE);
13163+
}
13164+
assert(Res && "Expected user node or perfect diamond match in MinBWs.");
13165+
bool IsSigned = Res->second;
1314513166
Vec = Builder.CreateIntCast(Vec, PrevVec->getType(), IsSigned);
1314613167
}
1314713168
PrevVec->replaceAllUsesWith(Vec);
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux < %s | FileCheck %s
3+
4+
define i64 @test(ptr %p) {
5+
; CHECK-LABEL: define i64 @test(
6+
; CHECK-SAME: ptr [[P:%.*]]) {
7+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P]], i64 12
8+
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> zeroinitializer, zeroinitializer
9+
; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], zeroinitializer
10+
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i32> [[TMP3]], zeroinitializer
11+
; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], zeroinitializer
12+
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i32> [[TMP5]], zeroinitializer
13+
; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], zeroinitializer
14+
; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[TMP7]], zeroinitializer
15+
; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], zeroinitializer
16+
; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i32> [[TMP9]], zeroinitializer
17+
; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], zeroinitializer
18+
; CHECK-NEXT: [[TMP12:%.*]] = trunc <4 x i32> [[TMP11]] to <4 x i8>
19+
; CHECK-NEXT: store <4 x i8> [[TMP12]], ptr [[TMP1]], align 1
20+
; CHECK-NEXT: ret i64 0
21+
;
22+
%1 = getelementptr i8, ptr %p, i64 13
23+
%2 = getelementptr i8, ptr %p, i64 14
24+
%3 = getelementptr i8, ptr %p, i64 15
25+
%4 = getelementptr i8, ptr %p, i64 12
26+
%5 = zext i8 0 to i32
27+
%6 = and i32 %5, 0
28+
%.not866 = icmp eq i32 %6, 0
29+
%7 = select i1 %.not866, i32 0, i32 0
30+
%8 = xor i32 0, %7
31+
%9 = zext i8 0 to i32
32+
%10 = and i32 %9, 0
33+
%.not871 = icmp eq i32 %10, 0
34+
%11 = select i1 %.not871, i32 0, i32 0
35+
%12 = xor i32 0, %11
36+
%13 = xor i32 %9, 0
37+
%14 = xor i32 %13, 0
38+
%15 = xor i32 %14, 0
39+
%16 = xor i32 %15, 0
40+
%17 = xor i32 %16, 0
41+
%18 = xor i32 %17, %12
42+
%19 = xor i32 %18, 0
43+
%20 = xor i32 %19, 0
44+
%21 = xor i32 %20, 0
45+
%22 = xor i32 %21, 0
46+
%23 = trunc i32 %22 to i8
47+
store i8 %23, ptr %4, align 1
48+
%24 = xor i32 %9, 0
49+
%25 = xor i32 %24, 0
50+
%26 = xor i32 %25, 0
51+
%27 = xor i32 %26, 0
52+
%28 = xor i32 %27, 0
53+
%29 = xor i32 %28, %8
54+
%30 = xor i32 %29, 0
55+
%31 = xor i32 %30, 0
56+
%32 = xor i32 %31, 0
57+
%33 = xor i32 %32, 0
58+
%34 = trunc i32 %33 to i8
59+
store i8 %34, ptr %1, align 1
60+
%35 = xor i32 0, %5
61+
%36 = xor i32 %35, 0
62+
%37 = xor i32 %36, 0
63+
%38 = xor i32 %37, 0
64+
%39 = xor i32 %38, 0
65+
%40 = xor i32 %39, %8
66+
%41 = xor i32 %40, 0
67+
%42 = xor i32 %41, 0
68+
%43 = xor i32 %42, 0
69+
%44 = xor i32 %43, 0
70+
%45 = trunc i32 %44 to i8
71+
store i8 %45, ptr %2, align 1
72+
%46 = xor i32 %35, 0
73+
%47 = xor i32 %46, 0
74+
%48 = xor i32 %47, 0
75+
%49 = xor i32 %48, 0
76+
%50 = xor i32 %49, %8
77+
%51 = xor i32 %50, 0
78+
%52 = xor i32 %51, 0
79+
%53 = xor i32 %52, 0
80+
%54 = xor i32 %53, 0
81+
%55 = trunc i32 %54 to i8
82+
store i8 %55, ptr %3, align 1
83+
ret i64 0
84+
}

0 commit comments

Comments
 (0)