[VectorCombine] vectorizeLoadInsert - only fold when inserting into a poison vector #119906

RKSimon · 2024-12-13T17:32:44Z

We have corresponding poison tests in the "-inseltpoison.ll" sibling test files.

Fixes #119900

llvmbot · 2024-12-13T17:33:19Z

@llvm/pr-subscribers-vectorizers

@llvm/pr-subscribers-backend-amdgpu

Author: Simon Pilgrim (RKSimon)

Changes

If we're inserting into a non-poison undef value, the shuffle mask must reference the undef value and not just set the mask index to poison

We have corresponding poison tests in the "-inseltpoison.ll" sibling test files.

Fixes #119900

Full diff: https://github.com/llvm/llvm-project/pull/119906.diff

3 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+7-3)
(modified) llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll (+1-1)
(modified) llvm/test/Transforms/VectorCombine/X86/load.ll (+19-19)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 09489e24984530..af8269f1e37066 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -179,7 +179,9 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
   // Match insert into fixed vector of scalar value.
   // TODO: Handle non-zero insert index.
   Value *Scalar;
-  if (!match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) ||
+  UndefValue *BaseVec;
+  if (!match(&I, m_InsertElt(m_UndefValue(BaseVec), m_Value(Scalar),
+                             m_ZeroInt())) ||
       !Scalar->hasOneUse())
     return false;
 
@@ -268,7 +270,9 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
   // still need a shuffle to change the vector size.
   auto *Ty = cast<FixedVectorType>(I.getType());
   unsigned OutputNumElts = Ty->getNumElements();
-  SmallVector<int, 16> Mask(OutputNumElts, PoisonMaskElem);
+  SmallVector<int, 16> Mask(OutputNumElts, isa<PoisonValue>(BaseVec)
+                                               ? PoisonMaskElem
+                                               : MinVecNumElts);
   assert(OffsetEltIndex < MinVecNumElts && "Address offset too big");
   Mask[0] = OffsetEltIndex;
   if (OffsetEltIndex)
@@ -286,7 +290,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
   Value *CastedPtr =
       Builder.CreatePointerBitCastOrAddrSpaceCast(SrcPtr, Builder.getPtrTy(AS));
   Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
-  VecLd = Builder.CreateShuffleVector(VecLd, Mask);
+  VecLd = Builder.CreateShuffleVector(VecLd, PoisonValue::get(MinVecTy), Mask);
 
   replaceValue(I, *VecLd);
   ++NumVecLoad;
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
index 94b8c98a80df5c..902293985255ab 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
@@ -13,7 +13,7 @@ define protected amdgpu_kernel void @load_from_other_as(ptr nocapture nonnull %r
 ; CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_HOGE:%.*]], align 4, addrspace(5)
 ; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x float>, ptr [[TMP0]], align 4
-; CHECK-NEXT:    [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    store <4 x float> [[E]], ptr [[RESULTPTR:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll
index bdd05a1a37c70f..d734aabd19d4f4 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -159,7 +159,7 @@ define double @larger_fp_scalar_256bit_vec(ptr align 32 dereferenceable(32) %p)
 define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %s = load float, ptr %p, align 4
@@ -170,7 +170,7 @@ define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) n
 define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @casted_load_f32_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %s = load float, ptr %p, align 4
@@ -183,7 +183,7 @@ define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16)
 define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_i32_insert_v4i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %s = load i32, ptr %p, align 4
@@ -196,7 +196,7 @@ define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nof
 define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @casted_load_i32_insert_v4i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %s = load i32, ptr %p, align 4
@@ -209,7 +209,7 @@ define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %
 define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @gep00_load_f32_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %s = load float, ptr %p, align 16
@@ -222,7 +222,7 @@ define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16)
 define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(ptr addrspace(44) align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr addrspace(44) [[P:%.*]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %s = load float, ptr addrspace(44) %p, align 16
@@ -236,7 +236,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %
 ; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 2
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; CHECK-NEXT:    ret <8 x i16> [[R]]
 ;
   %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
@@ -256,7 +256,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable
 ;
 ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref(
 ; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16
-; AVX2-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX2-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; AVX2-NEXT:    ret <8 x i16> [[R]]
 ;
   %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
@@ -276,7 +276,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 derefer
 ;
 ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
 ; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2
-; AVX2-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX2-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; AVX2-NEXT:    ret <8 x i16> [[R]]
 ;
   %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
@@ -305,7 +305,7 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceabl
 define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(20) %p) nofree nosync {
 ; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 12
@@ -337,7 +337,7 @@ define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) %
 ; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; CHECK-NEXT:    ret <8 x i16> [[R]]
 ;
   %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0
@@ -440,7 +440,7 @@ define <4 x float> @load_f32_insert_v4f32_volatile(ptr align 16 dereferenceable(
 define <4 x float> @load_f32_insert_v4f32_align(ptr align 1 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v4f32_align(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %s = load float, ptr %p, align 4
@@ -464,7 +464,7 @@ define <4 x float> @load_f32_insert_v4f32_deref(ptr align 4 dereferenceable(15)
 define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_i32_insert_v8i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <8 x i32> [[R]]
 ;
   %s = load i32, ptr %p, align 4
@@ -475,7 +475,7 @@ define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nof
 define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @casted_load_i32_insert_v8i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <8 x i32> [[R]]
 ;
   %s = load i32, ptr %p, align 4
@@ -486,7 +486,7 @@ define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %
 define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v16f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <16 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <16 x float> [[R]]
 ;
   %s = load float, ptr %p, align 4
@@ -497,7 +497,7 @@ define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p)
 define <2 x float> @load_f32_insert_v2f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v2f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 0, i32 4>
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
   %s = load float, ptr %p, align 4
@@ -550,7 +550,7 @@ define void @PR47558_multiple_use_load(ptr nocapture nonnull %resultptr, ptr noc
 define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_v2f32_extract_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %l = load <2 x float>, ptr %p, align 4
@@ -562,7 +562,7 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable
 define <4 x float> @load_v8f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_v8f32_extract_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %l = load <8 x float>, ptr %p, align 4
@@ -598,7 +598,7 @@ define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(ptr align 1 dereferenceab
 ;
 ; AVX2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
 ; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 4
-; AVX2-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX2-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 2, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; AVX2-NEXT:    ret <8 x i16> [[R]]
 ;
   %gep = getelementptr inbounds <2 x i16>, ptr %p, i64 1

llvmbot · 2024-12-13T17:33:20Z

@llvm/pr-subscribers-llvm-transforms

Author: Simon Pilgrim (RKSimon)

Changes

If we're inserting into a non-poison undef value, the shuffle mask must reference the undef value and not just set the mask index to poison

We have corresponding poison tests in the "-inseltpoison.ll" sibling test files.

Fixes #119900

Full diff: https://github.com/llvm/llvm-project/pull/119906.diff

3 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+7-3)
(modified) llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll (+1-1)
(modified) llvm/test/Transforms/VectorCombine/X86/load.ll (+19-19)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 09489e24984530..af8269f1e37066 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -179,7 +179,9 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
   // Match insert into fixed vector of scalar value.
   // TODO: Handle non-zero insert index.
   Value *Scalar;
-  if (!match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) ||
+  UndefValue *BaseVec;
+  if (!match(&I, m_InsertElt(m_UndefValue(BaseVec), m_Value(Scalar),
+                             m_ZeroInt())) ||
       !Scalar->hasOneUse())
     return false;
 
@@ -268,7 +270,9 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
   // still need a shuffle to change the vector size.
   auto *Ty = cast<FixedVectorType>(I.getType());
   unsigned OutputNumElts = Ty->getNumElements();
-  SmallVector<int, 16> Mask(OutputNumElts, PoisonMaskElem);
+  SmallVector<int, 16> Mask(OutputNumElts, isa<PoisonValue>(BaseVec)
+                                               ? PoisonMaskElem
+                                               : MinVecNumElts);
   assert(OffsetEltIndex < MinVecNumElts && "Address offset too big");
   Mask[0] = OffsetEltIndex;
   if (OffsetEltIndex)
@@ -286,7 +290,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
   Value *CastedPtr =
       Builder.CreatePointerBitCastOrAddrSpaceCast(SrcPtr, Builder.getPtrTy(AS));
   Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
-  VecLd = Builder.CreateShuffleVector(VecLd, Mask);
+  VecLd = Builder.CreateShuffleVector(VecLd, PoisonValue::get(MinVecTy), Mask);
 
   replaceValue(I, *VecLd);
   ++NumVecLoad;
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
index 94b8c98a80df5c..902293985255ab 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
@@ -13,7 +13,7 @@ define protected amdgpu_kernel void @load_from_other_as(ptr nocapture nonnull %r
 ; CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_HOGE:%.*]], align 4, addrspace(5)
 ; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x float>, ptr [[TMP0]], align 4
-; CHECK-NEXT:    [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    store <4 x float> [[E]], ptr [[RESULTPTR:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll
index bdd05a1a37c70f..d734aabd19d4f4 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -159,7 +159,7 @@ define double @larger_fp_scalar_256bit_vec(ptr align 32 dereferenceable(32) %p)
 define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %s = load float, ptr %p, align 4
@@ -170,7 +170,7 @@ define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) n
 define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @casted_load_f32_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %s = load float, ptr %p, align 4
@@ -183,7 +183,7 @@ define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16)
 define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_i32_insert_v4i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %s = load i32, ptr %p, align 4
@@ -196,7 +196,7 @@ define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nof
 define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @casted_load_i32_insert_v4i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %s = load i32, ptr %p, align 4
@@ -209,7 +209,7 @@ define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %
 define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @gep00_load_f32_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %s = load float, ptr %p, align 16
@@ -222,7 +222,7 @@ define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16)
 define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(ptr addrspace(44) align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr addrspace(44) [[P:%.*]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %s = load float, ptr addrspace(44) %p, align 16
@@ -236,7 +236,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %
 ; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 2
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; CHECK-NEXT:    ret <8 x i16> [[R]]
 ;
   %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
@@ -256,7 +256,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable
 ;
 ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref(
 ; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16
-; AVX2-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX2-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; AVX2-NEXT:    ret <8 x i16> [[R]]
 ;
   %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
@@ -276,7 +276,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 derefer
 ;
 ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
 ; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2
-; AVX2-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX2-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; AVX2-NEXT:    ret <8 x i16> [[R]]
 ;
   %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
@@ -305,7 +305,7 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceabl
 define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(20) %p) nofree nosync {
 ; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 12
@@ -337,7 +337,7 @@ define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) %
 ; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; CHECK-NEXT:    ret <8 x i16> [[R]]
 ;
   %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0
@@ -440,7 +440,7 @@ define <4 x float> @load_f32_insert_v4f32_volatile(ptr align 16 dereferenceable(
 define <4 x float> @load_f32_insert_v4f32_align(ptr align 1 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v4f32_align(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %s = load float, ptr %p, align 4
@@ -464,7 +464,7 @@ define <4 x float> @load_f32_insert_v4f32_deref(ptr align 4 dereferenceable(15)
 define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_i32_insert_v8i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <8 x i32> [[R]]
 ;
   %s = load i32, ptr %p, align 4
@@ -475,7 +475,7 @@ define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nof
 define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @casted_load_i32_insert_v8i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <8 x i32> [[R]]
 ;
   %s = load i32, ptr %p, align 4
@@ -486,7 +486,7 @@ define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %
 define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v16f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <16 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <16 x float> [[R]]
 ;
   %s = load float, ptr %p, align 4
@@ -497,7 +497,7 @@ define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p)
 define <2 x float> @load_f32_insert_v2f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v2f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 0, i32 4>
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
   %s = load float, ptr %p, align 4
@@ -550,7 +550,7 @@ define void @PR47558_multiple_use_load(ptr nocapture nonnull %resultptr, ptr noc
 define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_v2f32_extract_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %l = load <2 x float>, ptr %p, align 4
@@ -562,7 +562,7 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable
 define <4 x float> @load_v8f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_v8f32_extract_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %l = load <8 x float>, ptr %p, align 4
@@ -598,7 +598,7 @@ define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(ptr align 1 dereferenceab
 ;
 ; AVX2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
 ; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 4
-; AVX2-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX2-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 2, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; AVX2-NEXT:    ret <8 x i16> [[R]]
 ;
   %gep = getelementptr inbounds <2 x i16>, ptr %p, i64 1

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

… poison vector We have corresponding poison tests in the "-inseltpoison.ll" sibling test files. Fixes llvm#119900

github-actions · 2024-12-13T17:59:09Z

⚠️ undef deprecator found issues in your code. ⚠️

You can test this locally with the following command:

git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 754499c1e9410d51a4c41e71388c304de61366a0 81e78500cb515e9a05b95ab37d0bde9634a501dd llvm/lib/Transforms/Vectorize/VectorCombine.cpp llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll llvm/test/Transforms/VectorCombine/X86/load.ll

The following files introduce new uses of undef:

llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
llvm/test/Transforms/VectorCombine/X86/load.ll

Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields undef. You should use poison values for placeholders instead.

In tests, avoid using undef and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead.

For example, this is considered a bad practice:

define void @fn() {
  ...
  br i1 undef, ...
}

Please use the following instead:

define void @fn(i1 %cond) {
  ...
  br i1 %cond, ...
}

Please refer to the Undefined Behavior Manual for more information.

nunoplopes

LGTM, thank you!

nikic

LGTM

llvm-ci · 2024-12-14T12:11:36Z

LLVM Buildbot has detected a new failure on builder lldb-aarch64-ubuntu running on linaro-lldb-aarch64-ubuntu while building llvm at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/9805

Here is the relevant piece of the build log for the reference

Step 6 (test) failure: build (failure)
...
PASS: lldb-unit :: ValueObject/./LLDBValueObjectTests/1/3 (2051 of 2060)
PASS: lldb-unit :: Utility/./UtilityTests/4/8 (2052 of 2060)
PASS: lldb-unit :: Utility/./UtilityTests/1/8 (2053 of 2060)
PASS: lldb-unit :: ValueObject/./LLDBValueObjectTests/2/3 (2054 of 2060)
PASS: lldb-unit :: tools/lldb-server/tests/./LLDBServerTests/0/2 (2055 of 2060)
PASS: lldb-unit :: tools/lldb-server/tests/./LLDBServerTests/1/2 (2056 of 2060)
PASS: lldb-unit :: Target/./TargetTests/11/14 (2057 of 2060)
PASS: lldb-unit :: Host/./HostTests/2/13 (2058 of 2060)
PASS: lldb-unit :: Process/gdb-remote/./ProcessGdbRemoteTests/8/9 (2059 of 2060)
UNRESOLVED: lldb-api :: tools/lldb-server/TestLldbGdbServer.py (2060 of 2060)
******************** TEST 'lldb-api :: tools/lldb-server/TestLldbGdbServer.py' FAILED ********************
Script:
--
/usr/bin/python3.10 /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./lib --env LLVM_INCLUDE_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/include --env LLVM_TOOLS_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin --arch aarch64 --build-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex --lldb-module-cache-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/lldb --compiler /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/clang --dsymutil /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/dsymutil --make /usr/bin/make --llvm-tools-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin --lldb-obj-root /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/tools/lldb --lldb-libs-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./lib /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/tools/lldb-server -p TestLldbGdbServer.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 20.0.0git (https://github.com/llvm/llvm-project.git revision cc54a0ce5674b740c2136d7bd2416ffeb4a230cf)
  clang revision cc54a0ce5674b740c2136d7bd2416ffeb4a230cf
  llvm revision cc54a0ce5674b740c2136d7bd2416ffeb4a230cf
Skipping the following test categories: ['libc++', 'dsym', 'gmodules', 'debugserver', 'objc']

--
Command Output (stderr):
--
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hc_then_Csignal_signals_correct_thread_launch_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hc_then_Csignal_signals_correct_thread_launch_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_fails_on_another_pid_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_fails_on_minus_one_pid_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_fails_on_zero_pid_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_switches_to_3_threads_launch_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_switches_to_3_threads_launch_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_P_and_p_thread_suffix_work_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_P_and_p_thread_suffix_work_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_P_writes_all_gpr_registers_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_P_writes_all_gpr_registers_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_attach_commandline_continue_app_exits_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
Program aborted due to an unhandled Error:
Operation not permitted
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb-server gdbserver --attach=3419660 --reverse-connect [127.0.0.1]:54925
 #0 0x0000aaaadc099600 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb-server+0xb19600)
 #1 0x0000aaaadc097630 llvm::sys::RunSignalHandlers() (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb-server+0xb17630)
 #2 0x0000aaaadc099d10 SignalHandler(int) Signals.cpp:0:0
 #3 0x0000ffff95c617dc (linux-vdso.so.1+0x7dc)
 #4 0x0000ffff9546f200 __pthread_kill_implementation ./nptl/pthread_kill.c:44:76

RKSimon requested review from nikic and nunoplopes December 13, 2024 17:32

llvmbot added backend:AMDGPU vectorizers llvm:transforms labels Dec 13, 2024

nikic reviewed Dec 13, 2024

View reviewed changes

llvm/lib/Transforms/Vectorize/VectorCombine.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Vectorize/VectorCombine.cpp Outdated Show resolved Hide resolved

[VectorCombine] vectorizeLoadInsert - only fold when inserting into a…

81e7850

… poison vector We have corresponding poison tests in the "-inseltpoison.ll" sibling test files. Fixes llvm#119900

RKSimon force-pushed the vectorcombine-insert-undef-load branch from 0edd559 to 81e7850 Compare December 13, 2024 17:55

RKSimon changed the title ~~[VectorCombine] vectorizeLoadInsert - fix shuffle when inserting into non-poison undef value~~ [VectorCombine] vectorizeLoadInsert - only fold when inserting into a poison vector Dec 13, 2024

nunoplopes approved these changes Dec 13, 2024

View reviewed changes

nikic approved these changes Dec 13, 2024

View reviewed changes

RKSimon merged commit cc54a0c into llvm:main Dec 14, 2024
7 of 8 checks passed

RKSimon deleted the vectorcombine-insert-undef-load branch December 14, 2024 11:56

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[VectorCombine] vectorizeLoadInsert - only fold when inserting into a poison vector #119906

[VectorCombine] vectorizeLoadInsert - only fold when inserting into a poison vector #119906

Uh oh!

RKSimon commented Dec 13, 2024 •

edited

Loading

Uh oh!

llvmbot commented Dec 13, 2024 •

edited

Loading

Uh oh!

llvmbot commented Dec 13, 2024

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Dec 13, 2024

Uh oh!

nunoplopes left a comment

Uh oh!

nikic left a comment

Uh oh!

Uh oh!

llvm-ci commented Dec 14, 2024

Uh oh!

Uh oh!

[VectorCombine] vectorizeLoadInsert - only fold when inserting into a poison vector #119906

[VectorCombine] vectorizeLoadInsert - only fold when inserting into a poison vector #119906

Uh oh!

Conversation

RKSimon commented Dec 13, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Dec 13, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Dec 13, 2024

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Dec 13, 2024

Uh oh!

nunoplopes left a comment

Choose a reason for hiding this comment

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Dec 14, 2024

Uh oh!

Uh oh!

RKSimon commented Dec 13, 2024 •

edited

Loading

llvmbot commented Dec 13, 2024 •

edited

Loading