-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[VectorCombine] vectorizeLoadInsert - only fold when inserting into a poison vector #119906
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-backend-amdgpu Author: Simon Pilgrim (RKSimon) ChangesIf we're inserting into a non-poison undef value, the shuffle mask must reference the undef value and not just set the mask index to poison We have corresponding poison tests in the "-inseltpoison.ll" sibling test files. Fixes #119900 Full diff: https://github.com/llvm/llvm-project/pull/119906.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 09489e24984530..af8269f1e37066 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -179,7 +179,9 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
// Match insert into fixed vector of scalar value.
// TODO: Handle non-zero insert index.
Value *Scalar;
- if (!match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) ||
+ UndefValue *BaseVec;
+ if (!match(&I, m_InsertElt(m_UndefValue(BaseVec), m_Value(Scalar),
+ m_ZeroInt())) ||
!Scalar->hasOneUse())
return false;
@@ -268,7 +270,9 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
// still need a shuffle to change the vector size.
auto *Ty = cast<FixedVectorType>(I.getType());
unsigned OutputNumElts = Ty->getNumElements();
- SmallVector<int, 16> Mask(OutputNumElts, PoisonMaskElem);
+ SmallVector<int, 16> Mask(OutputNumElts, isa<PoisonValue>(BaseVec)
+ ? PoisonMaskElem
+ : MinVecNumElts);
assert(OffsetEltIndex < MinVecNumElts && "Address offset too big");
Mask[0] = OffsetEltIndex;
if (OffsetEltIndex)
@@ -286,7 +290,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
Value *CastedPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(SrcPtr, Builder.getPtrTy(AS));
Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
- VecLd = Builder.CreateShuffleVector(VecLd, Mask);
+ VecLd = Builder.CreateShuffleVector(VecLd, PoisonValue::get(MinVecTy), Mask);
replaceValue(I, *VecLd);
++NumVecLoad;
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
index 94b8c98a80df5c..902293985255ab 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
@@ -13,7 +13,7 @@ define protected amdgpu_kernel void @load_from_other_as(ptr nocapture nonnull %r
; CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_HOGE:%.*]], align 4, addrspace(5)
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x float>, ptr [[TMP0]], align 4
-; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
; CHECK-NEXT: store <4 x float> [[E]], ptr [[RESULTPTR:%.*]], align 16
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll
index bdd05a1a37c70f..d734aabd19d4f4 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -159,7 +159,7 @@ define double @larger_fp_scalar_256bit_vec(ptr align 32 dereferenceable(32) %p)
define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%s = load float, ptr %p, align 4
@@ -170,7 +170,7 @@ define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) n
define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @casted_load_f32_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%s = load float, ptr %p, align 4
@@ -183,7 +183,7 @@ define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16)
define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_i32_insert_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%s = load i32, ptr %p, align 4
@@ -196,7 +196,7 @@ define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nof
define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @casted_load_i32_insert_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%s = load i32, ptr %p, align 4
@@ -209,7 +209,7 @@ define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %
define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @gep00_load_f32_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%s = load float, ptr %p, align 16
@@ -222,7 +222,7 @@ define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16)
define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(ptr addrspace(44) align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr addrspace(44) [[P:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%s = load float, ptr addrspace(44) %p, align 16
@@ -236,7 +236,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %
; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 2
-; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; CHECK-NEXT: ret <8 x i16> [[R]]
;
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
@@ -256,7 +256,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable
;
; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref(
; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16
-; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; AVX2-NEXT: ret <8 x i16> [[R]]
;
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
@@ -276,7 +276,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 derefer
;
; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2
-; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; AVX2-NEXT: ret <8 x i16> [[R]]
;
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
@@ -305,7 +305,7 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceabl
define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(20) %p) nofree nosync {
; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 12
@@ -337,7 +337,7 @@ define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) %
; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; CHECK-NEXT: ret <8 x i16> [[R]]
;
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0
@@ -440,7 +440,7 @@ define <4 x float> @load_f32_insert_v4f32_volatile(ptr align 16 dereferenceable(
define <4 x float> @load_f32_insert_v4f32_align(ptr align 1 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v4f32_align(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%s = load float, ptr %p, align 4
@@ -464,7 +464,7 @@ define <4 x float> @load_f32_insert_v4f32_deref(ptr align 4 dereferenceable(15)
define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_i32_insert_v8i32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <8 x i32> [[R]]
;
%s = load i32, ptr %p, align 4
@@ -475,7 +475,7 @@ define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nof
define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @casted_load_i32_insert_v8i32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <8 x i32> [[R]]
;
%s = load i32, ptr %p, align 4
@@ -486,7 +486,7 @@ define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %
define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v16f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <16 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <16 x float> [[R]]
;
%s = load float, ptr %p, align 4
@@ -497,7 +497,7 @@ define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p)
define <2 x float> @load_f32_insert_v2f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v2f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 0, i32 4>
; CHECK-NEXT: ret <2 x float> [[R]]
;
%s = load float, ptr %p, align 4
@@ -550,7 +550,7 @@ define void @PR47558_multiple_use_load(ptr nocapture nonnull %resultptr, ptr noc
define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_v2f32_extract_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%l = load <2 x float>, ptr %p, align 4
@@ -562,7 +562,7 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable
define <4 x float> @load_v8f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_v8f32_extract_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%l = load <8 x float>, ptr %p, align 4
@@ -598,7 +598,7 @@ define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(ptr align 1 dereferenceab
;
; AVX2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 4
-; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 2, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; AVX2-NEXT: ret <8 x i16> [[R]]
;
%gep = getelementptr inbounds <2 x i16>, ptr %p, i64 1
|
@llvm/pr-subscribers-llvm-transforms Author: Simon Pilgrim (RKSimon) ChangesIf we're inserting into a non-poison undef value, the shuffle mask must reference the undef value and not just set the mask index to poison We have corresponding poison tests in the "-inseltpoison.ll" sibling test files. Fixes #119900 Full diff: https://github.com/llvm/llvm-project/pull/119906.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 09489e24984530..af8269f1e37066 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -179,7 +179,9 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
// Match insert into fixed vector of scalar value.
// TODO: Handle non-zero insert index.
Value *Scalar;
- if (!match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) ||
+ UndefValue *BaseVec;
+ if (!match(&I, m_InsertElt(m_UndefValue(BaseVec), m_Value(Scalar),
+ m_ZeroInt())) ||
!Scalar->hasOneUse())
return false;
@@ -268,7 +270,9 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
// still need a shuffle to change the vector size.
auto *Ty = cast<FixedVectorType>(I.getType());
unsigned OutputNumElts = Ty->getNumElements();
- SmallVector<int, 16> Mask(OutputNumElts, PoisonMaskElem);
+ SmallVector<int, 16> Mask(OutputNumElts, isa<PoisonValue>(BaseVec)
+ ? PoisonMaskElem
+ : MinVecNumElts);
assert(OffsetEltIndex < MinVecNumElts && "Address offset too big");
Mask[0] = OffsetEltIndex;
if (OffsetEltIndex)
@@ -286,7 +290,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
Value *CastedPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(SrcPtr, Builder.getPtrTy(AS));
Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
- VecLd = Builder.CreateShuffleVector(VecLd, Mask);
+ VecLd = Builder.CreateShuffleVector(VecLd, PoisonValue::get(MinVecTy), Mask);
replaceValue(I, *VecLd);
++NumVecLoad;
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
index 94b8c98a80df5c..902293985255ab 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
@@ -13,7 +13,7 @@ define protected amdgpu_kernel void @load_from_other_as(ptr nocapture nonnull %r
; CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_HOGE:%.*]], align 4, addrspace(5)
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x float>, ptr [[TMP0]], align 4
-; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
; CHECK-NEXT: store <4 x float> [[E]], ptr [[RESULTPTR:%.*]], align 16
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll
index bdd05a1a37c70f..d734aabd19d4f4 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -159,7 +159,7 @@ define double @larger_fp_scalar_256bit_vec(ptr align 32 dereferenceable(32) %p)
define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%s = load float, ptr %p, align 4
@@ -170,7 +170,7 @@ define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) n
define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @casted_load_f32_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%s = load float, ptr %p, align 4
@@ -183,7 +183,7 @@ define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16)
define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_i32_insert_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%s = load i32, ptr %p, align 4
@@ -196,7 +196,7 @@ define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nof
define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @casted_load_i32_insert_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%s = load i32, ptr %p, align 4
@@ -209,7 +209,7 @@ define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %
define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @gep00_load_f32_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%s = load float, ptr %p, align 16
@@ -222,7 +222,7 @@ define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16)
define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(ptr addrspace(44) align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr addrspace(44) [[P:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%s = load float, ptr addrspace(44) %p, align 16
@@ -236,7 +236,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %
; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 2
-; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; CHECK-NEXT: ret <8 x i16> [[R]]
;
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
@@ -256,7 +256,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable
;
; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref(
; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16
-; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; AVX2-NEXT: ret <8 x i16> [[R]]
;
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
@@ -276,7 +276,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 derefer
;
; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2
-; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; AVX2-NEXT: ret <8 x i16> [[R]]
;
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
@@ -305,7 +305,7 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceabl
define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(20) %p) nofree nosync {
; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 12
@@ -337,7 +337,7 @@ define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) %
; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; CHECK-NEXT: ret <8 x i16> [[R]]
;
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0
@@ -440,7 +440,7 @@ define <4 x float> @load_f32_insert_v4f32_volatile(ptr align 16 dereferenceable(
define <4 x float> @load_f32_insert_v4f32_align(ptr align 1 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v4f32_align(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%s = load float, ptr %p, align 4
@@ -464,7 +464,7 @@ define <4 x float> @load_f32_insert_v4f32_deref(ptr align 4 dereferenceable(15)
define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_i32_insert_v8i32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <8 x i32> [[R]]
;
%s = load i32, ptr %p, align 4
@@ -475,7 +475,7 @@ define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nof
define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @casted_load_i32_insert_v8i32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <8 x i32> [[R]]
;
%s = load i32, ptr %p, align 4
@@ -486,7 +486,7 @@ define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %
define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v16f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <16 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <16 x float> [[R]]
;
%s = load float, ptr %p, align 4
@@ -497,7 +497,7 @@ define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p)
define <2 x float> @load_f32_insert_v2f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v2f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 0, i32 4>
; CHECK-NEXT: ret <2 x float> [[R]]
;
%s = load float, ptr %p, align 4
@@ -550,7 +550,7 @@ define void @PR47558_multiple_use_load(ptr nocapture nonnull %resultptr, ptr noc
define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_v2f32_extract_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%l = load <2 x float>, ptr %p, align 4
@@ -562,7 +562,7 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable
define <4 x float> @load_v8f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_v8f32_extract_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%l = load <8 x float>, ptr %p, align 4
@@ -598,7 +598,7 @@ define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(ptr align 1 dereferenceab
;
; AVX2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 4
-; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 2, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
; AVX2-NEXT: ret <8 x i16> [[R]]
;
%gep = getelementptr inbounds <2 x i16>, ptr %p, i64 1
|
… poison vector We have corresponding poison tests in the "-inseltpoison.ll" sibling test files. Fixes llvm#119900
0edd559
to
81e7850
Compare
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 754499c1e9410d51a4c41e71388c304de61366a0 81e78500cb515e9a05b95ab37d0bde9634a501dd llvm/lib/Transforms/Vectorize/VectorCombine.cpp llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll llvm/test/Transforms/VectorCombine/X86/load.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thank you!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/9805 Here is the relevant piece of the build log for the reference
|
We have corresponding poison tests in the "-inseltpoison.ll" sibling test files.
Fixes #119900