Skip to content

Commit ef1d19b

Browse files
committed
[SLP]Fix PR89438: check for all tree entries for the resized value.
Need to check all possible entries, before trying looking for the minbitwidth in the user node. Otherwise we may incorrectly get signedness info.
1 parent 5ef5eb6 commit ef1d19b

File tree

2 files changed

+138
-22
lines changed

2 files changed

+138
-22
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 46 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13139,31 +13139,55 @@ Value *BoUpSLP::vectorizeTree(
1313913139
assert(Vec->getType()->isIntOrIntVectorTy() &&
1314013140
PrevVec->getType()->isIntOrIntVectorTy() &&
1314113141
"Expected integer vector types only.");
13142-
std::optional<std::pair<unsigned long, bool>> Res;
13143-
if (const TreeEntry *BaseTE = getTreeEntry(TE->Scalars.front())) {
13144-
SmallVector<const TreeEntry *> BaseTEs;
13145-
if (BaseTE->isSame(TE->Scalars))
13146-
BaseTEs.push_back(BaseTE);
13147-
auto It = MultiNodeScalars.find(TE->Scalars.front());
13148-
if (It != MultiNodeScalars.end()) {
13149-
for (const TreeEntry *MNTE : It->getSecond())
13150-
if (MNTE->isSame(TE->Scalars))
13151-
BaseTEs.push_back(MNTE);
13142+
std::optional<bool> IsSigned;
13143+
for (Value *V : TE->Scalars) {
13144+
if (const TreeEntry *BaseTE = getTreeEntry(V)) {
13145+
auto It = MinBWs.find(BaseTE);
13146+
if (It != MinBWs.end()) {
13147+
IsSigned = IsSigned.value_or(false) || It->second.second;
13148+
if (*IsSigned)
13149+
break;
13150+
}
13151+
for (const TreeEntry *MNTE : MultiNodeScalars.lookup(V)) {
13152+
auto It = MinBWs.find(MNTE);
13153+
if (It != MinBWs.end()) {
13154+
IsSigned = IsSigned.value_or(false) || It->second.second;
13155+
if (*IsSigned)
13156+
break;
13157+
}
13158+
}
13159+
if (IsSigned.value_or(false))
13160+
break;
13161+
// Scan through gather nodes.
13162+
for (const TreeEntry *BVE : ValueToGatherNodes.lookup(V)) {
13163+
auto It = MinBWs.find(BVE);
13164+
if (It != MinBWs.end()) {
13165+
IsSigned = IsSigned.value_or(false) || It->second.second;
13166+
if (*IsSigned)
13167+
break;
13168+
}
13169+
}
13170+
if (IsSigned.value_or(false))
13171+
break;
13172+
if (auto *EE = dyn_cast<ExtractElementInst>(V)) {
13173+
IsSigned =
13174+
IsSigned.value_or(false) ||
13175+
!isKnownNonNegative(EE->getVectorOperand(), SimplifyQuery(*DL));
13176+
continue;
13177+
}
13178+
if (IsSigned.value_or(false))
13179+
break;
1315213180
}
13153-
const auto *BaseIt = find_if(BaseTEs, [&](const TreeEntry *BaseTE) {
13154-
return MinBWs.contains(BaseTE);
13155-
});
13156-
if (BaseIt != BaseTEs.end())
13157-
Res = MinBWs.lookup(*BaseIt);
1315813181
}
13159-
if (!Res) {
13160-
assert(MinBWs.contains(TE->UserTreeIndices.front().UserTE) &&
13161-
"Expected user in MinBWs.");
13162-
Res = MinBWs.lookup(TE->UserTreeIndices.front().UserTE);
13182+
if (IsSigned.value_or(false)) {
13183+
// Final attempt - check user node.
13184+
auto It = MinBWs.find(TE->UserTreeIndices.front().UserTE);
13185+
if (It != MinBWs.end())
13186+
IsSigned = It->second.second;
1316313187
}
13164-
assert(Res && "Expected user node or perfect diamond match in MinBWs.");
13165-
bool IsSigned = Res->second;
13166-
Vec = Builder.CreateIntCast(Vec, PrevVec->getType(), IsSigned);
13188+
assert(IsSigned &&
13189+
"Expected user node or perfect diamond match in MinBWs.");
13190+
Vec = Builder.CreateIntCast(Vec, PrevVec->getType(), *IsSigned);
1316713191
}
1316813192
PrevVec->replaceAllUsesWith(Vec);
1316913193
PostponedValues.try_emplace(Vec).first->second.push_back(TE);

llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-reduced.ll

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,95 @@ define i64 @test(ptr %p) {
8282
store i8 %55, ptr %3, align 1
8383
ret i64 0
8484
}
85+
86+
define i64 @test1(ptr %p) {
87+
; CHECK-LABEL: define i64 @test1(
88+
; CHECK-SAME: ptr [[P:%.*]]) {
89+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P]], i64 12
90+
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> zeroinitializer, zeroinitializer
91+
; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], zeroinitializer
92+
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i32> [[TMP3]], zeroinitializer
93+
; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], zeroinitializer
94+
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i32> [[TMP5]], zeroinitializer
95+
; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], zeroinitializer
96+
; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[TMP7]], zeroinitializer
97+
; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], zeroinitializer
98+
; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i32> [[TMP9]], zeroinitializer
99+
; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], zeroinitializer
100+
; CHECK-NEXT: [[TMP12:%.*]] = trunc <4 x i32> [[TMP11]] to <4 x i8>
101+
; CHECK-NEXT: store <4 x i8> [[TMP12]], ptr [[TMP1]], align 1
102+
; CHECK-NEXT: ret i64 0
103+
;
104+
%1 = getelementptr i8, ptr %p, i64 13
105+
%2 = getelementptr i8, ptr %p, i64 14
106+
%3 = getelementptr i8, ptr %p, i64 15
107+
%4 = getelementptr i8, ptr %p, i64 12
108+
%5 = zext i8 0 to i32
109+
%6 = and i32 %5, 0
110+
%.not866 = icmp eq i32 %6, 0
111+
%7 = select i1 %.not866, i32 0, i32 0
112+
%8 = xor i32 0, %7
113+
%9 = zext i8 0 to i32
114+
%10 = and i32 %9, 0
115+
%.not869 = icmp eq i32 %10, 0
116+
%11 = select i1 %.not869, i32 0, i32 0
117+
%12 = xor i32 0, %11
118+
%13 = zext i8 0 to i32
119+
%14 = and i32 %13, 0
120+
%.not871 = icmp eq i32 %14, 0
121+
%15 = select i1 %.not871, i32 0, i32 0
122+
%16 = xor i32 0, %15
123+
%17 = zext i8 0 to i32
124+
%18 = and i32 %17, 0
125+
%.not874 = icmp eq i32 %18, 0
126+
%19 = select i1 %.not874, i32 0, i32 0
127+
%20 = xor i32 0, %19
128+
%21 = xor i32 %13, 0
129+
%22 = xor i32 %21, 0
130+
%23 = xor i32 %22, 0
131+
%24 = xor i32 %23, 0
132+
%25 = xor i32 %24, 0
133+
%26 = xor i32 %25, 0
134+
%27 = xor i32 %26, %8
135+
%28 = xor i32 %27, 0
136+
%29 = xor i32 %28, 0
137+
%30 = xor i32 %29, 0
138+
%31 = trunc i32 %30 to i8
139+
store i8 %31, ptr %4, align 1
140+
%32 = xor i32 %13, 0
141+
%33 = xor i32 %32, 0
142+
%34 = xor i32 %33, 0
143+
%35 = xor i32 %34, 0
144+
%36 = xor i32 %35, 0
145+
%37 = xor i32 %36, 0
146+
%38 = xor i32 %37, %20
147+
%39 = xor i32 %38, 0
148+
%40 = xor i32 %39, 0
149+
%41 = xor i32 %40, 0
150+
%42 = trunc i32 %41 to i8
151+
store i8 %42, ptr %1, align 1
152+
%43 = xor i32 %9, 0
153+
%44 = xor i32 %43, 0
154+
%45 = xor i32 %44, 0
155+
%46 = xor i32 %45, 0
156+
%47 = xor i32 %46, 0
157+
%48 = xor i32 %47, 0
158+
%49 = xor i32 %48, %16
159+
%50 = xor i32 %49, 0
160+
%51 = xor i32 %50, 0
161+
%52 = xor i32 %51, 0
162+
%53 = trunc i32 %52 to i8
163+
store i8 %53, ptr %2, align 1
164+
%54 = xor i32 %43, 0
165+
%55 = xor i32 %54, 0
166+
%56 = xor i32 %55, 0
167+
%57 = xor i32 %56, 0
168+
%58 = xor i32 %57, 0
169+
%59 = xor i32 %58, %12
170+
%60 = xor i32 %59, 0
171+
%61 = xor i32 %60, 0
172+
%62 = xor i32 %61, 0
173+
%63 = trunc i32 %62 to i8
174+
store i8 %63, ptr %3, align 1
175+
ret i64 0
176+
}

0 commit comments

Comments
 (0)