[SLP]Fix PR80027: handle case when ext is not reduced but its operand is.

alexey-bataev · alexey-bataev · commit 9cb7dffa8819 · 2024-04-02T09:32:25.000-07:00
Need to handle the case, where the resize operation itself is not
reduced but its operand is. In this case need to take an extra analysis
for the operand, not the instruction itself.
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8791,6 +8791,10 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
       } else if (It != MinBWs.end()) {
         assert(BWSz > SrcBWSz && "Invalid cast!");
         VecOpcode = It->second.second ? Instruction::SExt : Instruction::ZExt;
+      } else if (SrcIt != MinBWs.end()) {
+        assert(BWSz > SrcBWSz && "Invalid cast!");
+        VecOpcode =
+            SrcIt->second.second ? Instruction::SExt : Instruction::ZExt;
       }
     }
     auto GetScalarCost = [&](unsigned Idx) -> InstructionCost {
@@ -12142,6 +12146,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
           VecOpcode = Instruction::BitCast;
         } else if (BWSz < SrcBWSz) {
           VecOpcode = Instruction::Trunc;
+        } else if (It != MinBWs.end()) {
+          assert(BWSz > SrcBWSz && "Invalid cast!");
+          VecOpcode = It->second.second ? Instruction::SExt : Instruction::ZExt;
         } else if (SrcIt != MinBWs.end()) {
           assert(BWSz > SrcBWSz && "Invalid cast!");
           VecOpcode =
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S --passes=slp-vectorizer -mtriple=systemz-unknown -mcpu=z13 < %s | FileCheck %s
+
+define void @test(i64 %0, i1 %.cmp.i.2, i1 %1, ptr %a) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i64 [[TMP0:%.*]], i1 [[DOTCMP_I_2:%.*]], i1 [[TMP1:%.*]], ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr <4 x i64> [[TMP4]], <i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i1> poison, i1 [[DOTCMP_I_2]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i1> [[TMP7]], <4 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i1> [[TMP7]], <4 x i1> poison, <4 x i32> <i32 1, i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i1>
+; CHECK-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP10]], <4 x i1> [[TMP8]]
+; CHECK-NEXT:    [[TMP12:%.*]] = zext <4 x i1> [[TMP11]] to <4 x i32>
+; CHECK-NEXT:    [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP13]])
+; CHECK-NEXT:    store i32 [[TMP14]], ptr [[A]], align 4
+; CHECK-NEXT:    ret void
+;
+  %.lobit.i.2 = lshr i64 %0, 63
+  %3 = zext i1 %.cmp.i.2 to i64
+  %4 = select i1 %1, i64 %.lobit.i.2, i64 %3
+  %5 = trunc i64 %4 to i32
+  %6 = xor i32 %5, 1
+  %.lobit.i.3 = lshr i64 %0, 63
+  %7 = zext i1 %.cmp.i.2 to i64
+  %8 = select i1 %1, i64 %.lobit.i.3, i64 %7
+  %9 = trunc i64 %8 to i32
+  %10 = xor i32 %9, 1
+  %11 = or i32 %10, %6
+  %.lobit.i.4 = lshr i64 %0, 63
+  %12 = zext i1 %1 to i64
+  %13 = select i1 %.cmp.i.2, i64 %.lobit.i.4, i64 %12
+  %14 = trunc i64 %13 to i32
+  %15 = xor i32 %14, 1
+  %16 = or i32 %15, %11
+  %.lobit.i.5 = lshr i64 %0, 63
+  %17 = zext i1 %.cmp.i.2 to i64
+  %18 = select i1 %1, i64 %.lobit.i.5, i64 %17
+  %19 = trunc i64 %18 to i32
+  %20 = xor i32 %19, 1
+  %21 = or i32 %20, %16
+  store i32 %21, ptr %a, align 4
+  ret void
+}
+