Skip to content

Commit 342f7d0

Browse files
committed
[SLP]Fix PR86620: check final minbitwidth for truncs/exts before
accepting it. If the minbitwidth is deduced from the demanded elements, need to check the final bitwidthfor trunc/ext instruction, bot blindly accepting the used one.
1 parent 1ad29a5 commit 342f7d0

File tree

3 files changed

+10
-10
lines changed

3 files changed

+10
-10
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14092,12 +14092,14 @@ bool BoUpSLP::collectValuesToDemote(
1409214092
MaxDepthLevel = 1;
1409314093
if (IsProfitableToDemoteRoot)
1409414094
IsProfitableToDemote = true;
14095+
(void)IsPotentiallyTruncated(V, BitWidth);
1409514096
break;
1409614097
case Instruction::ZExt:
1409714098
case Instruction::SExt:
1409814099
if (!IsTruncRoot)
1409914100
MaxDepthLevel = 1;
1410014101
IsProfitableToDemote = true;
14102+
(void)IsPotentiallyTruncated(V, BitWidth);
1410114103
break;
1410214104

1410314105
// We can demote certain binary operations if we can demote both of their

llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,10 @@ define i32 @test() {
88
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
99
; CHECK-NEXT: entry:
1010
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 @c, i64 24, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 4)
11-
; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i16>
12-
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i16> [[TMP1]], <i16 -1, i16 -1, i16 -1, i16 -1>
13-
; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[TMP2]], <i16 -1, i16 -1, i16 -1, i16 -1>
14-
; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[TMP3]])
15-
; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP4]] to i32
11+
; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i32>
12+
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], <i32 65535, i32 65535, i32 65535, i32 65535>
13+
; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], <i32 65535, i32 65535, i32 65535, i32 65535>
14+
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP3]])
1615
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP5]], i32 1)
1716
; CHECK-NEXT: ret i32 [[TMP6]]
1817
;

llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2-
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-6 < %s | FileCheck %s
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-9 < %s | FileCheck %s
33

44
define void @t(i64 %v) {
55
; CHECK-LABEL: define void @t(
66
; CHECK-SAME: i64 [[V:%.*]]) {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i32 0
99
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> zeroinitializer
10-
; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i16>
11-
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i16> [[TMP2]], <i16 5, i16 6, i16 3, i16 2>
12-
; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP3]])
13-
; CHECK-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i32
10+
; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
11+
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], <i32 5, i32 6, i32 3, i32 2>
12+
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP3]])
1413
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 65535
1514
; CHECK-NEXT: store i32 [[TMP6]], ptr null, align 4
1615
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)