[SLP]Extra check if the intruction matked for removal, must be replaced in reduction ops

alexey-bataev · alexey-bataev · commit e7080fd735d0 · 2024-10-31T09:59:35.000-07:00
If the instruction is vectorized and it is a part of the reduced values gather/buildvector node, it should replaced in reduced operation instructions before removal properly, to avoid compiler crash. Fixes #114371
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -16127,11 +16127,13 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
       if (IE->Idx != 0 &&
           !(VectorizableTree.front()->isGather() &&
             !IE->UserTreeIndices.empty() &&
-            any_of(IE->UserTreeIndices,
-                   [&](const EdgeInfo &EI) {
-                     return EI.UserTE == VectorizableTree.front().get() &&
-                            EI.EdgeIdx == UINT_MAX;
-                   })) &&
+            (ValueToGatherNodes.lookup(I).contains(
+                 VectorizableTree.front().get()) ||
+             any_of(IE->UserTreeIndices,
+                    [&](const EdgeInfo &EI) {
+                      return EI.UserTE == VectorizableTree.front().get() &&
+                             EI.EdgeIdx == UINT_MAX;
+                    }))) &&
           !(GatheredLoadsEntriesFirst.has_value() &&
             IE->Idx >= *GatheredLoadsEntriesFirst &&
             VectorizableTree.front()->isGather() &&
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external-reduced-value-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/X86/external-reduced-value-vectorized.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i32 @test(ptr %c, i16 %a, i16 %0) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: ptr [[C:%.*]], i16 [[A:%.*]], i16 [[TMP0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <2 x i32> <i32 poison, i32 0>
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i16> [[TMP5]], i16 [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ugt <4 x i16> [[TMP7]], [[TMP4]]
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp ult i16 [[A]], -2
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 poison>
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <8 x i1> [[TMP10]], i1 [[TMP9]], i32 7
+; CHECK-NEXT:    [[TMP12:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP11]], <4 x i1> [[TMP8]], i64 0)
+; CHECK-NEXT:    [[TMP13:%.*]] = freeze <8 x i1> [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP13]])
+; CHECK-NEXT:    [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; CHECK-NEXT:    store i32 [[TMP15]], ptr [[C]], align 4
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %tobool = icmp ne i16 %a, 0
+  %1 = zext i1 %tobool to i16
+  %cmp3 = icmp ugt i16 %0, %1
+  %2 = and i1 %tobool, %cmp3
+  %tobool.1 = icmp ne i16 %a, 0
+  %3 = zext i1 %tobool.1 to i16
+  %cmp3.1 = icmp ugt i16 %0, %3
+  %4 = and i1 %tobool.1, %cmp3.1
+  %5 = select i1 %2, i1 %4, i1 false
+  %tobool.2 = icmp ne i16 %a, 0
+  %6 = zext i1 %tobool.2 to i16
+  %cmp3.2 = icmp ugt i16 %0, %6
+  %7 = and i1 %tobool.2, %cmp3.2
+  %8 = select i1 %5, i1 %7, i1 false
+  %tobool.3 = icmp ne i16 %a, 0
+  %9 = zext i1 %tobool.3 to i16
+  %cmp3.3 = icmp ugt i16 %a, %9
+  %10 = icmp ult i16 %a, -2
+  %11 = and i1 %10, %cmp3.3
+  %12 = select i1 %8, i1 %11, i1 false
+  %13 = zext i1 %12 to i32
+  store i32 %13, ptr %c, align 4
+  ret i32 0
+}