Skip to content

Commit 54ca1e2

Browse files
committed
[SLP]Fix PR80027: include initial trunc nodes to the demoted values.
Need to include initial sext/zext/trunc nodes to the list of the demoted root values to correctly calculate the cost and handle the vectorization.
1 parent dfde6e8 commit 54ca1e2

File tree

3 files changed

+56
-12
lines changed

3 files changed

+56
-12
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14251,13 +14251,15 @@ void BoUpSLP::computeMinimumValueSizes() {
1425114251
// resize to the final type.
1425214252
bool IsTruncRoot = false;
1425314253
bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
14254+
SmallVector<unsigned> RootDemotes;
1425414255
if (NodeIdx != 0 &&
1425514256
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
1425614257
(VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt ||
1425714258
VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt ||
1425814259
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
1425914260
assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
1426014261
IsTruncRoot = VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc;
14262+
RootDemotes.push_back(NodeIdx);
1426114263
IsProfitableToDemoteRoot = true;
1426214264
++NodeIdx;
1426314265
}
@@ -14394,6 +14396,7 @@ void BoUpSLP::computeMinimumValueSizes() {
1439414396
while (NodeIdx < VectorizableTree.size() &&
1439514397
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
1439614398
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
14399+
RootDemotes.push_back(NodeIdx);
1439714400
++NodeIdx;
1439814401
IsTruncRoot = true;
1439914402
}
@@ -14409,14 +14412,22 @@ void BoUpSLP::computeMinimumValueSizes() {
1440914412
unsigned MaxBitWidth = ComputeMaxBitWidth(
1441014413
TreeRoot, VectorizableTree[NodeIdx]->getVectorFactor(), IsTopRoot,
1441114414
IsProfitableToDemoteRoot, Opcode, Limit, IsTruncRoot);
14415+
for (unsigned Idx : RootDemotes)
14416+
ToDemote.append(VectorizableTree[Idx]->Scalars.begin(),
14417+
VectorizableTree[Idx]->Scalars.end());
14418+
RootDemotes.clear();
1441214419
IsTopRoot = false;
1441314420
IsProfitableToDemoteRoot = true;
1441414421

1441514422
if (TruncNodes.empty()) {
1441614423
NodeIdx = VectorizableTree.size();
1441714424
} else {
14418-
NodeIdx = *TruncNodes.begin() + 1;
14419-
TruncNodes.erase(TruncNodes.begin());
14425+
unsigned NewIdx = 0;
14426+
do {
14427+
NewIdx = *TruncNodes.begin() + 1;
14428+
TruncNodes.erase(TruncNodes.begin());
14429+
} while (NewIdx <= NodeIdx && !TruncNodes.empty());
14430+
NodeIdx = NewIdx;
1442014431
IsTruncRoot = true;
1442114432
}
1442214433

llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,9 @@ define void @partial_vec_invalid_cost() #0 {
77
; CHECK-LABEL: define void @partial_vec_invalid_cost(
88
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
99
; CHECK-NEXT: entry:
10-
; CHECK-NEXT: [[LSHR_1:%.*]] = lshr i96 0, 0
11-
; CHECK-NEXT: [[LSHR_2:%.*]] = lshr i96 0, 0
12-
; CHECK-NEXT: [[TRUNC_I96_1:%.*]] = trunc i96 [[LSHR_1]] to i32
13-
; CHECK-NEXT: [[TRUNC_I96_2:%.*]] = trunc i96 [[LSHR_2]] to i32
14-
; CHECK-NEXT: [[TRUNC_I96_3:%.*]] = trunc i96 0 to i32
15-
; CHECK-NEXT: [[TRUNC_I96_4:%.*]] = trunc i96 0 to i32
1610
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> zeroinitializer)
17-
; CHECK-NEXT: [[OP_RDX:%.*]] = or i32 [[TMP0]], [[TRUNC_I96_2]]
18-
; CHECK-NEXT: [[OP_RDX1:%.*]] = or i32 [[TRUNC_I96_1]], [[TRUNC_I96_3]]
19-
; CHECK-NEXT: [[OP_RDX2:%.*]] = or i32 [[OP_RDX]], [[OP_RDX1]]
20-
; CHECK-NEXT: [[OP_RDX3:%.*]] = or i32 [[OP_RDX2]], [[TRUNC_I96_4]]
11+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> zeroinitializer)
12+
; CHECK-NEXT: [[OP_RDX3:%.*]] = or i32 [[TMP0]], [[TMP1]]
2113
; CHECK-NEXT: [[STORE_THIS:%.*]] = zext i32 [[OP_RDX3]] to i96
2214
; CHECK-NEXT: store i96 [[STORE_THIS]], ptr null, align 16
2315
; CHECK-NEXT: ret void
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=systemz -mcpu=z15 %s | FileCheck %s
3+
4+
define void @test(ptr %a, i8 %0, i16 %b.promoted.i) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: ptr [[A:%.*]], i8 [[TMP0:%.*]], i16 [[B_PROMOTED_I:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i128
8+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[B_PROMOTED_I]], i32 0
9+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
10+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i128> poison, i128 [[TMP2]], i32 0
11+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i128> [[TMP5]], <4 x i128> poison, <4 x i32> zeroinitializer
12+
; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i128> [[TMP6]] to <4 x i16>
13+
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i16> [[TMP4]], [[TMP7]]
14+
; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i16> [[TMP8]] to <4 x i1>
15+
; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP9]])
16+
; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i64
17+
; CHECK-NEXT: [[OP_RDX:%.*]] = and i64 [[TMP11]], 1
18+
; CHECK-NEXT: store i64 [[OP_RDX]], ptr [[A]], align 8
19+
; CHECK-NEXT: ret void
20+
;
21+
%2 = zext i8 %0 to i128
22+
%3 = zext i16 %b.promoted.i to i128
23+
%4 = or i128 %3, %2
24+
%5 = trunc i128 %4 to i64
25+
%6 = and i64 %5, 1
26+
%7 = zext i16 %b.promoted.i to i128
27+
%8 = or i128 %7, %2
28+
%9 = trunc i128 %8 to i64
29+
%10 = and i64 %6, %9
30+
%11 = zext i16 %b.promoted.i to i128
31+
%12 = or i128 %11, %2
32+
%13 = trunc i128 %12 to i64
33+
%14 = and i64 %10, %13
34+
%15 = zext i16 %b.promoted.i to i128
35+
%16 = or i128 %15, %2
36+
%17 = trunc i128 %16 to i64
37+
%18 = and i64 %14, %17
38+
store i64 %18, ptr %a, align 8
39+
ret void
40+
}
41+

0 commit comments

Comments
 (0)