Skip to content

Commit 9cb7dff

Browse files
committed
[SLP]Fix PR80027: handle case when ext is not reduced but its operand is.
Need to handle the case, where the resize operation itself is not reduced but its operand is. In this case need to take an extra analysis for the operand, not the instruction itself.
1 parent 4f0b5d5 commit 9cb7dff

File tree

2 files changed

+55
-0
lines changed

2 files changed

+55
-0
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8791,6 +8791,10 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
87918791
} else if (It != MinBWs.end()) {
87928792
assert(BWSz > SrcBWSz && "Invalid cast!");
87938793
VecOpcode = It->second.second ? Instruction::SExt : Instruction::ZExt;
8794+
} else if (SrcIt != MinBWs.end()) {
8795+
assert(BWSz > SrcBWSz && "Invalid cast!");
8796+
VecOpcode =
8797+
SrcIt->second.second ? Instruction::SExt : Instruction::ZExt;
87948798
}
87958799
}
87968800
auto GetScalarCost = [&](unsigned Idx) -> InstructionCost {
@@ -12142,6 +12146,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1214212146
VecOpcode = Instruction::BitCast;
1214312147
} else if (BWSz < SrcBWSz) {
1214412148
VecOpcode = Instruction::Trunc;
12149+
} else if (It != MinBWs.end()) {
12150+
assert(BWSz > SrcBWSz && "Invalid cast!");
12151+
VecOpcode = It->second.second ? Instruction::SExt : Instruction::ZExt;
1214512152
} else if (SrcIt != MinBWs.end()) {
1214612153
assert(BWSz > SrcBWSz && "Invalid cast!");
1214712154
VecOpcode =
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=systemz-unknown -mcpu=z13 < %s | FileCheck %s
3+
4+
define void @test(i64 %0, i1 %.cmp.i.2, i1 %1, ptr %a) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: i64 [[TMP0:%.*]], i1 [[DOTCMP_I_2:%.*]], i1 [[TMP1:%.*]], ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0
8+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> poison, <4 x i32> zeroinitializer
9+
; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[TMP4]], <i64 63, i64 63, i64 63, i64 63>
10+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i1> poison, i1 [[DOTCMP_I_2]], i32 0
11+
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[TMP1]], i32 1
12+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i1> [[TMP7]], <4 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 0>
13+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i1> [[TMP7]], <4 x i1> poison, <4 x i32> <i32 1, i32 1, i32 0, i32 1>
14+
; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i1>
15+
; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP10]], <4 x i1> [[TMP8]]
16+
; CHECK-NEXT: [[TMP12:%.*]] = zext <4 x i1> [[TMP11]] to <4 x i32>
17+
; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], <i32 1, i32 1, i32 1, i32 1>
18+
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP13]])
19+
; CHECK-NEXT: store i32 [[TMP14]], ptr [[A]], align 4
20+
; CHECK-NEXT: ret void
21+
;
22+
%.lobit.i.2 = lshr i64 %0, 63
23+
%3 = zext i1 %.cmp.i.2 to i64
24+
%4 = select i1 %1, i64 %.lobit.i.2, i64 %3
25+
%5 = trunc i64 %4 to i32
26+
%6 = xor i32 %5, 1
27+
%.lobit.i.3 = lshr i64 %0, 63
28+
%7 = zext i1 %.cmp.i.2 to i64
29+
%8 = select i1 %1, i64 %.lobit.i.3, i64 %7
30+
%9 = trunc i64 %8 to i32
31+
%10 = xor i32 %9, 1
32+
%11 = or i32 %10, %6
33+
%.lobit.i.4 = lshr i64 %0, 63
34+
%12 = zext i1 %1 to i64
35+
%13 = select i1 %.cmp.i.2, i64 %.lobit.i.4, i64 %12
36+
%14 = trunc i64 %13 to i32
37+
%15 = xor i32 %14, 1
38+
%16 = or i32 %15, %11
39+
%.lobit.i.5 = lshr i64 %0, 63
40+
%17 = zext i1 %.cmp.i.2 to i64
41+
%18 = select i1 %1, i64 %.lobit.i.5, i64 %17
42+
%19 = trunc i64 %18 to i32
43+
%20 = xor i32 %19, 1
44+
%21 = or i32 %20, %16
45+
store i32 %21, ptr %a, align 4
46+
ret void
47+
}
48+

0 commit comments

Comments
 (0)