Skip to content

Commit d94dc5f

Browse files
committed
[SLP]Fix PR86763: do not truncate reductions to the demanded bits size.
Need to adjust ReductionBitWIdth after minbitwidth analysis, if the demanded bits analysis sjows tht its size is less than the size of the vectorized value. It prevents incorrect sign-zero extension transformation after.
1 parent 742a82a commit d94dc5f

File tree

3 files changed

+11
-5
lines changed

3 files changed

+11
-5
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14415,6 +14415,13 @@ void BoUpSLP::computeMinimumValueSizes() {
1441514415
unsigned MaxBitWidth = ComputeMaxBitWidth(
1441614416
TreeRoot, VectorizableTree[NodeIdx]->getVectorFactor(), IsTopRoot,
1441714417
IsProfitableToDemoteRoot, Opcode, Limit, IsTruncRoot);
14418+
if (ReductionBitWidth != 0 && (IsTopRoot || !RootDemotes.empty())) {
14419+
if (MaxBitWidth != 0 && ReductionBitWidth < MaxBitWidth)
14420+
ReductionBitWidth = bit_ceil(MaxBitWidth);
14421+
else if (MaxBitWidth == 0)
14422+
ReductionBitWidth = 0;
14423+
}
14424+
1441814425
for (unsigned Idx : RootDemotes)
1441914426
ToDemote.append(VectorizableTree[Idx]->Scalars.begin(),
1442014427
VectorizableTree[Idx]->Scalars.end());

llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ define i32 @test(ptr %0, ptr %1) {
66
; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: [[LOAD_5:%.*]] = load i32, ptr [[TMP1]], align 4
9-
; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> <i1 true, i1 true, i1 true, i1 true>)
10-
; CHECK-NEXT: [[TMP3:%.*]] = sext i1 [[TMP2]] to i32
9+
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> <i8 1, i8 1, i8 1, i8 1>)
10+
; CHECK-NEXT: [[TMP3:%.*]] = sext i8 [[TMP2]] to i32
1111
; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP3]], [[LOAD_5]]
1212
; CHECK-NEXT: ret i32 [[OP_RDX]]
1313
;

llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,8 @@ define void @test(ptr %a, i8 %0, i16 %b.promoted.i) {
1111
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i128> [[TMP5]], <4 x i128> poison, <4 x i32> zeroinitializer
1212
; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i128> [[TMP6]] to <4 x i16>
1313
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i16> [[TMP4]], [[TMP7]]
14-
; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i16> [[TMP8]] to <4 x i1>
15-
; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP9]])
16-
; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i64
14+
; CHECK-NEXT: [[TMP9:%.*]] = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> [[TMP8]])
15+
; CHECK-NEXT: [[TMP11:%.*]] = zext i16 [[TMP9]] to i64
1716
; CHECK-NEXT: [[OP_RDX:%.*]] = and i64 [[TMP11]], 1
1817
; CHECK-NEXT: store i64 [[OP_RDX]], ptr [[A]], align 8
1918
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)