[SLP]Fix PR86763: do not truncate reductions to the demanded bits size.

alexey-bataev · alexey-bataev · commit d94dc5f0d63b · 2024-03-27T14:34:59.000-07:00
Need to adjust ReductionBitWIdth after minbitwidth analysis, if the
demanded bits analysis sjows tht its size is less than the size of the
vectorized value. It prevents incorrect sign-zero extension
transformation after.
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -14415,6 +14415,13 @@ void BoUpSLP::computeMinimumValueSizes() {
     unsigned MaxBitWidth = ComputeMaxBitWidth(
         TreeRoot, VectorizableTree[NodeIdx]->getVectorFactor(), IsTopRoot,
         IsProfitableToDemoteRoot, Opcode, Limit, IsTruncRoot);
+    if (ReductionBitWidth != 0 && (IsTopRoot || !RootDemotes.empty())) {
+      if (MaxBitWidth != 0 && ReductionBitWidth < MaxBitWidth)
+        ReductionBitWidth = bit_ceil(MaxBitWidth);
+      else if (MaxBitWidth == 0)
+        ReductionBitWidth = 0;
+    }
+
     for (unsigned Idx : RootDemotes)
       ToDemote.append(VectorizableTree[Idx]->Scalars.begin(),
                       VectorizableTree[Idx]->Scalars.end());
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll
@@ -6,8 +6,8 @@ define i32 @test(ptr %0, ptr %1) {
 ; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[LOAD_5:%.*]] = load i32, ptr [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> <i1 true, i1 true, i1 true, i1 true>)
-; CHECK-NEXT:    [[TMP3:%.*]] = sext i1 [[TMP2]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> <i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = sext i8 [[TMP2]] to i32
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = and i32 [[TMP3]], [[LOAD_5]]
 ; CHECK-NEXT:    ret i32 [[OP_RDX]]
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll
@@ -11,9 +11,8 @@ define void @test(ptr %a, i8 %0, i16 %b.promoted.i) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i128> [[TMP5]], <4 x i128> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = trunc <4 x i128> [[TMP6]] to <4 x i16>
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i16> [[TMP4]], [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = trunc <4 x i16> [[TMP8]] to <4 x i1>
-; CHECK-NEXT:    [[TMP10:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP9]])
-; CHECK-NEXT:    [[TMP11:%.*]] = zext i1 [[TMP10]] to i64
+; CHECK-NEXT:    [[TMP9:%.*]] = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> [[TMP8]])
+; CHECK-NEXT:    [[TMP11:%.*]] = zext i16 [[TMP9]] to i64
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = and i64 [[TMP11]], 1
 ; CHECK-NEXT:    store i64 [[OP_RDX]], ptr [[A]], align 8
 ; CHECK-NEXT:    ret void