[VectorCombine]Fix a crash during long vector analysis.

alexey-bataev · alexey-bataev · commit c2ae16f6a72a · 2023-10-09T14:22:37.000-07:00
If the analysis of the single vector requested, need to use original
type to avoid crash
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1473,11 +1473,11 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
   bool UsesSecondVec =
       any_of(ConcatMask, [&](int M) { return M >= NumInputElts; });
   InstructionCost OldCost = TTI.getShuffleCost(
-      UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc, VecType,
-      Shuffle->getShuffleMask());
+      UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc,
+      UsesSecondVec ? VecType : ShuffleInputType, Shuffle->getShuffleMask());
   InstructionCost NewCost = TTI.getShuffleCost(
-      UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc, VecType,
-      ConcatMask);
+      UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc,
+      UsesSecondVec ? VecType : ShuffleInputType, ConcatMask);
 
   LLVM_DEBUG(dbgs() << "Found a reduction feeding from a shuffle: " << *Shuffle
                     << "\n");
diff --git a/llvm/test/Transforms/VectorCombine/X86/reduction-two-vecs-combine.ll b/llvm/test/Transforms/VectorCombine/X86/reduction-two-vecs-combine.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S --passes=vector-combine -mtriple=x86_64-unknown-linux < %s | FileCheck %s
+
+define i16 @test_spill_mixed() {
+; CHECK-LABEL: define i16 @test_spill_mixed() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, <4 x i32> <i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP0]])
+; CHECK-NEXT:    ret i16 0
+;
+entry:
+  %0 = shufflevector <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, <4 x i32> <i32 28, i32 29, i32 30, i32 31>
+  %1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %0)
+  ret i16 0
+}
+
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)