-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[VectorCombine] Allow shuffling between vectors the same type but different element sizes #121216
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-vectorizers Author: hanbeom (ParkHanbum) Changes
This commit allows combining extract/insert for vectors of Proof: https://alive2.llvm.org/ce/z/ELNLr7 Patch is 32.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121216.diff 5 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index ecbc13d489eb37..d6e4e53cca7692 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3018,24 +3018,37 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
return false;
auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
- if (!VecTy || SrcVec->getType() != VecTy)
+ auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->getType());
+ // We can try combining vectors with different element sizes.
+ if (!VecTy || !SrcVecTy ||
+ SrcVecTy->getElementType() != VecTy->getElementType())
return false;
unsigned NumElts = VecTy->getNumElements();
- if (ExtIdx >= NumElts || InsIdx >= NumElts)
+ unsigned NumSrcElts = SrcVecTy->getNumElements();
+ if (InsIdx >= NumElts || NumElts == 1)
return false;
// Insertion into poison is a cheaper single operand shuffle.
TargetTransformInfo::ShuffleKind SK;
SmallVector<int> Mask(NumElts, PoisonMaskElem);
- if (isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
+
+ bool NeedExpOrNarrow = NumSrcElts != NumElts;
+ bool NeedDstSrcSwap = isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec);
+ if (NeedDstSrcSwap) {
SK = TargetTransformInfo::SK_PermuteSingleSrc;
- Mask[InsIdx] = ExtIdx;
+ if (!NeedExpOrNarrow)
+ Mask[InsIdx] = ExtIdx;
+ else
+ Mask[InsIdx] = 0;
std::swap(DstVec, SrcVec);
} else {
SK = TargetTransformInfo::SK_PermuteTwoSrc;
std::iota(Mask.begin(), Mask.end(), 0);
- Mask[InsIdx] = ExtIdx + NumElts;
+ if (!NeedExpOrNarrow)
+ Mask[InsIdx] = ExtIdx + NumElts;
+ else
+ Mask[InsIdx] = NumElts;
}
// Cost
@@ -3047,8 +3060,23 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx);
InstructionCost OldCost = ExtCost + InsCost;
- InstructionCost NewCost = TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0,
- nullptr, {DstVec, SrcVec});
+ InstructionCost NewCost = 0;
+ SmallVector<int> ExtToVecMask;
+ if (!NeedExpOrNarrow) {
+ NewCost = TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0, nullptr,
+ {DstVec, SrcVec});
+ } else {
+ // When creating length-changing-vector, always create with a Mask whose
+ // first element has an ExtIdx, so that the first element of the vector
+ // being created is always the target to be extracted.
+ ExtToVecMask.assign(NumElts, PoisonMaskElem);
+ ExtToVecMask[0] = ExtIdx;
+ // Add cost for expanding or narrowing
+ NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
+ VecTy, ExtToVecMask, CostKind);
+ NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind);
+ }
+
if (!Ext->hasOneUse())
NewCost += ExtCost;
@@ -3059,6 +3087,13 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
if (OldCost < NewCost)
return false;
+ if (NeedExpOrNarrow) {
+ if (!NeedDstSrcSwap)
+ SrcVec = Builder.CreateShuffleVector(SrcVec, ExtToVecMask);
+ else
+ DstVec = Builder.CreateShuffleVector(DstVec, ExtToVecMask);
+ }
+
// Canonicalize undef param to RHS to help further folds.
if (isa<UndefValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
new file mode 100644
index 00000000000000..fe303438f9588d
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
@@ -0,0 +1,201 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
+
+
+define <4 x double> @src_ins0_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext0_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> poison, double %ext, i32 0
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins1_v4f64_ext0_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> poison, double %ext, i32 1
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; SSE-LABEL: @src_ins2_v4f64_ext0_v2f64(
+; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
+; SSE-NEXT: ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins2_v4f64_ext0_v2f64(
+; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
+; AVX-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> poison, double %ext, i32 2
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; SSE-LABEL: @src_ins3_v4f64_ext0_v2f64(
+; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
+; SSE-NEXT: ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins3_v4f64_ext0_v2f64(
+; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
+; AVX-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> poison, double %ext, i32 3
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> poison, double %ext, i32 0
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> poison, double %ext, i32 1
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; SSE-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
+; SSE-NEXT: ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
+; AVX-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> poison, double %ext, i32 2
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; SSE-LABEL: @src_ins3_v4f64_ext1_v2f64(
+; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
+; SSE-NEXT: ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins3_v4f64_ext1_v2f64(
+; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
+; AVX-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> poison, double %ext, i32 3
+ ret <4 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 0
+ %ins = insertelement <2 x double> poison, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 1
+ %ins = insertelement <2 x double> poison, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 2
+ %ins = insertelement <2 x double> poison, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 3
+ %ins = insertelement <2 x double> poison, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 0
+ %ins = insertelement <2 x double> poison, double %ext, i32 1
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 1
+ %ins = insertelement <2 x double> poison, double %ext, i32 1
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 2
+ %ins = insertelement <2 x double> poison, double %ext, i32 1
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 3
+ %ins = insertelement <2 x double> poison, double %ext, i32 1
+ ret <2 x double> %ins
+}
+
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll
new file mode 100644
index 00000000000000..6051e6ff512fe7
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll
@@ -0,0 +1,186 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
+
+
+define <4 x double> @src_ins0_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext0_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> undef, double %ext, i32 0
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins1_v4f64_ext0_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> undef, double %ext, i32 1
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 2
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> undef, double %ext, i32 2
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 3
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> undef, double %ext, i32 3
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> undef, double %ext, i32 0
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; SSE-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> undef, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
+; SSE-NEXT: ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1
+; AVX-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> undef, double %ext, i32 1
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 2
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> undef, double %ext, i32 2
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 0>
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> undef, double %ext, i32 3
+ ret <4 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 0
+ %ins = insertelement <2 x double> undef, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 1
+ %ins = insertelement <2 x double> undef, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 2
+ %ins = insertelement <2 x double> undef, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 3
+ %ins = insertelement <2 x double> undef, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, ...
[truncated]
|
✅ With the latest revision this PR passed the undef deprecator. |
@RKSimon can you help? |
I'll take a look in the next few days, |
@RKSimon Please review when you have time. I'd also appreciate it if you could let me know how I should respond to comments made about undef. |
auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->getType()); | ||
// We can try combining vectors with different element sizes. | ||
if (!VecTy || !SrcVecTy || | ||
SrcVecTy->getElementType() != VecTy->getElementType()) | ||
return false; | ||
|
||
unsigned NumElts = VecTy->getNumElements(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(pedantic) Rename this NumDstElts?
return false; | ||
|
||
unsigned NumElts = VecTy->getNumElements(); | ||
if (ExtIdx >= NumElts || InsIdx >= NumElts) | ||
unsigned NumSrcElts = SrcVecTy->getNumElements(); | ||
if (InsIdx >= NumElts || NumElts == 1) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add ExtIdx >= NumSrcElts check
if (!NeedDstSrcSwap) | ||
SrcVec = Builder.CreateShuffleVector(SrcVec, ExtToVecMask); | ||
else | ||
DstVec = Builder.CreateShuffleVector(DstVec, ExtToVecMask); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Haven't you already swapped the SrcVec/DstVec?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I wrote the code like this because the position of SrcVec changes depending on when it was swapped and when it was not.
; CHECK-NEXT: ret <4 x double> [[INS]] | ||
; | ||
%ext = extractelement <2 x double> %b, i32 0 | ||
%ins = insertelement <4 x double> undef, double %ext, i32 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
don't bother with undef insertions
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we can remove extract-insert-undef.ll entirely
@RKSimon can I ask something? In this patch, I fixed the extract target to be placed at zero on the vector. If possible, does placing the extract idx in the vector equal to insert idx help with backend optimization? |
47941d9
to
3b75045
Compare
; CHECK-NEXT: ret <4 x double> [[INS]] | ||
; | ||
%ext = extractelement <2 x double> %b, i32 0 | ||
%ins = insertelement <4 x double> undef, double %ext, i32 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we can remove extract-insert-undef.ll entirely
…ferent element sizes `foldInsExtVectorToShuffle` function combines the extract/insert of a vector into a vector through a shuffle. However, we only supported coupling between vectors of the same size. This commit allows combining extract/insert for vectors of the same type but with different sizes by converting the length of the vectors. Proof: https://alive2.llvm.org/ce/z/EWFmfL Fixed llvm#120772
3b75045
to
e6525d9
Compare
if (!NeedExpOrNarrow) { | ||
// Ignore 'free' identity insertion shuffle. | ||
// TODO: getShuffleCost should return TCC_Free for Identity shuffles. | ||
if (!ShuffleVectorInst::isIdentityMask(Mask, NumDstElts)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You need NumSrcElts here isIdentityMask needs to be told the src element count (it knows the dst count from Mask.size())
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/30/builds/15362 Here is the relevant piece of the build log for the reference
|
… but different element sizes (llvm#121216)" This reverts commit 8c1dbac.
@RKSimon I'm very sorry for causing you trouble by my mistake. |
Don't worry - it was an easy fix. There's always going to be things that miss the existing tests. |
…ferent element sizes (llvm#121216) `foldInsExtVectorToShuffle` function combines the extract/insert of a vector into a vector through a shuffle. However, we only supported coupling between vectors of the same size. This commit allows combining extract/insert for vectors of the same type but with different sizes by converting the length of the vectors. Proof: https://alive2.llvm.org/ce/z/ELNLr7 Fixed llvm#120772
…Cost with the input operand type, not the result Typo in llvm#121216 Fixes llvm#126085
foldInsExtVectorToShuffle
function combines the extract/insertof a vector into a vector through a shuffle. However, we only
supported coupling between vectors of the same size.
This commit allows combining extract/insert for vectors of
the same type but with different sizes by converting
the length of the vectors.
Proof: https://alive2.llvm.org/ce/z/ELNLr7
Fixed #120772