-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Revert "[VectorCombine] Combine scalar fneg with insert/extract to vector fneg when length is different" #120422
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…ctor fne…" This reverts commit b7a8d95.
@llvm/pr-subscribers-llvm-transforms Author: Simon Pilgrim (RKSimon) ChangesReverts llvm/llvm-project#115209 - investigating a reported regression Full diff: https://github.com/llvm/llvm-project/pull/120422.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 5254fab1cdc9143..791006c48b5ddf5 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -666,10 +666,9 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
m_ExtractElt(m_Value(SrcVec), m_SpecificInt(Index))))))
return false;
+ // TODO: We could handle this with a length-changing shuffle.
auto *VecTy = cast<FixedVectorType>(I.getType());
- auto *ScalarTy = VecTy->getScalarType();
- auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->getType());
- if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType())
+ if (SrcVec->getType() != VecTy)
return false;
// Ignore bogus insert/extract index.
@@ -683,6 +682,8 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
SmallVector<int> Mask(NumElts);
std::iota(Mask.begin(), Mask.end(), 0);
Mask[Index] = Index + NumElts;
+
+ Type *ScalarTy = VecTy->getScalarType();
InstructionCost OldCost =
TTI.getArithmeticInstrCost(Instruction::FNeg, ScalarTy, CostKind) +
TTI.getVectorInstrCost(I, VecTy, CostKind, Index);
@@ -697,33 +698,14 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
TTI.getArithmeticInstrCost(Instruction::FNeg, VecTy, CostKind) +
TTI.getShuffleCost(TargetTransformInfo::SK_Select, VecTy, Mask, CostKind);
- bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
- // If the lengths of the two vectors are not equal,
- // we need to add a length-change vector. Add this cost.
- SmallVector<int> SrcMask;
- if (NeedLenChg) {
- SrcMask.assign(NumElts, PoisonMaskElem);
- SrcMask[Index] = Index;
- NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
- SrcVecTy, SrcMask, CostKind);
- }
-
if (NewCost > OldCost)
return false;
- Value *NewShuf;
- // insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index
+ // insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index -->
+ // shuffle DestVec, (fneg SrcVec), Mask
Value *VecFNeg = Builder.CreateFNegFMF(SrcVec, FNeg);
- if (NeedLenChg) {
- // shuffle DestVec, (shuffle (fneg SrcVec), poison, SrcMask), Mask
- Value *LenChgShuf = Builder.CreateShuffleVector(SrcVec, SrcMask);
- NewShuf = Builder.CreateShuffleVector(DestVec, LenChgShuf, Mask);
- } else {
- // shuffle DestVec, (fneg SrcVec), Mask
- NewShuf = Builder.CreateShuffleVector(DestVec, VecFNeg, Mask);
- }
-
- replaceValue(I, *NewShuf);
+ Value *Shuf = Builder.CreateShuffleVector(DestVec, VecFNeg, Mask);
+ replaceValue(I, *Shuf);
return true;
}
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
index 83f94ba46a072f6..df5fcdb7beb6566 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
@@ -18,19 +18,6 @@ define <4 x float> @ext0_v4f32(<4 x float> %x, <4 x float> %y) {
ret <4 x float> %r
}
-define <4 x float> @ext0_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
-; CHECK-LABEL: @ext0_v2f32v4f32(
-; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
-; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 0
-; CHECK-NEXT: ret <4 x float> [[R]]
-;
- %e = extractelement <2 x float> %x, i32 0
- %n = fneg float %e
- %r = insertelement <4 x float> %y, float %n, i32 0
- ret <4 x float> %r
-}
-
; Eliminating extract/insert is profitable.
define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
@@ -45,19 +32,6 @@ define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
ret <4 x float> %r
}
-define <4 x float> @ext2_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
-; CHECK-LABEL: @ext2_v2f32v4f32(
-; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 poison>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT: ret <4 x float> [[R]]
-;
- %e = extractelement <2 x float> %x, i32 2
- %n = fneg float %e
- %r = insertelement <4 x float> %y, float %n, i32 2
- ret <4 x float> %r
-}
-
; Eliminating extract/insert is still profitable. Flags propagate.
define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
@@ -72,25 +46,6 @@ define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
ret <2 x double> %r
}
-define <4 x double> @ext1_v2f64v4f64(<2 x double> %x, <4 x double> %y) {
-; SSE-LABEL: @ext1_v2f64v4f64(
-; SSE-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
-; SSE-NEXT: [[N:%.*]] = fneg nsz double [[E]]
-; SSE-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 1
-; SSE-NEXT: ret <4 x double> [[R]]
-;
-; AVX-LABEL: @ext1_v2f64v4f64(
-; AVX-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]]
-; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
-; AVX-NEXT: [[R:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
-; AVX-NEXT: ret <4 x double> [[R]]
-;
- %e = extractelement <2 x double> %x, i32 1
- %n = fneg nsz double %e
- %r = insertelement <4 x double> %y, double %n, i32 1
- ret <4 x double> %r
-}
-
; The vector fneg would cost twice as much as the scalar op with SSE,
; so we don't transform there (the shuffle would also be more expensive).
@@ -112,19 +67,6 @@ define <8 x float> @ext7_v8f32(<8 x float> %x, <8 x float> %y) {
ret <8 x float> %r
}
-define <8 x float> @ext7_v4f32v8f32(<4 x float> %x, <8 x float> %y) {
-; CHECK-LABEL: @ext7_v4f32v8f32(
-; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
-; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 7
-; CHECK-NEXT: ret <8 x float> [[R]]
-;
- %e = extractelement <4 x float> %x, i32 3
- %n = fneg float %e
- %r = insertelement <8 x float> %y, float %n, i32 7
- ret <8 x float> %r
-}
-
; Same as above with an extra use of the extracted element.
define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
@@ -149,21 +91,6 @@ define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
ret <8 x float> %r
}
-define <8 x float> @ext7_v4f32v8f32_use1(<4 x float> %x, <8 x float> %y) {
-; CHECK-LABEL: @ext7_v4f32v8f32_use1(
-; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
-; CHECK-NEXT: call void @use(float [[E]])
-; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3
-; CHECK-NEXT: ret <8 x float> [[R]]
-;
- %e = extractelement <4 x float> %x, i32 3
- call void @use(float %e)
- %n = fneg float %e
- %r = insertelement <8 x float> %y, float %n, i32 3
- ret <8 x float> %r
-}
-
; Negative test - the transform is likely not profitable if the fneg has another use.
define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) {
@@ -181,21 +108,6 @@ define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) {
ret <8 x float> %r
}
-define <8 x float> @ext7_v4f32v8f32_use2(<4 x float> %x, <8 x float> %y) {
-; CHECK-LABEL: @ext7_v4f32v8f32_use2(
-; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
-; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT: call void @use(float [[N]])
-; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3
-; CHECK-NEXT: ret <8 x float> [[R]]
-;
- %e = extractelement <4 x float> %x, i32 3
- %n = fneg float %e
- call void @use(float %n)
- %r = insertelement <8 x float> %y, float %n, i32 3
- ret <8 x float> %r
-}
-
; Negative test - can't convert variable index to a shuffle.
define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %index) {
@@ -211,19 +123,6 @@ define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %
ret <2 x double> %r
}
-define <4 x double> @ext_index_var_v2f64v4f64(<2 x double> %x, <4 x double> %y, i32 %index) {
-; CHECK-LABEL: @ext_index_var_v2f64v4f64(
-; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 [[INDEX:%.*]]
-; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 [[INDEX]]
-; CHECK-NEXT: ret <4 x double> [[R]]
-;
- %e = extractelement <2 x double> %x, i32 %index
- %n = fneg nsz double %e
- %r = insertelement <4 x double> %y, double %n, i32 %index
- ret <4 x double> %r
-}
-
; Negative test - require same extract/insert index for simple shuffle.
; TODO: We could handle this by adjusting the cost calculation.
@@ -240,33 +139,6 @@ define <2 x double> @ext1_v2f64_ins0(<2 x double> %x, <2 x double> %y) {
ret <2 x double> %r
}
-; Negative test - extract from an index greater than the vector width of the destination
-define <2 x double> @ext3_v4f64v2f64(<4 x double> %x, <2 x double> %y) {
-; CHECK-LABEL: @ext3_v4f64v2f64(
-; CHECK-NEXT: [[E:%.*]] = extractelement <4 x double> [[X:%.*]], i32 3
-; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[Y:%.*]], double [[N]], i32 1
-; CHECK-NEXT: ret <2 x double> [[R]]
-;
- %e = extractelement <4 x double> %x, i32 3
- %n = fneg nsz double %e
- %r = insertelement <2 x double> %y, double %n, i32 1
- ret <2 x double> %r
-}
-
-define <4 x double> @ext1_v2f64v4f64_ins0(<2 x double> %x, <4 x double> %y) {
-; CHECK-LABEL: @ext1_v2f64v4f64_ins0(
-; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
-; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 0
-; CHECK-NEXT: ret <4 x double> [[R]]
-;
- %e = extractelement <2 x double> %x, i32 1
- %n = fneg nsz double %e
- %r = insertelement <4 x double> %y, double %n, i32 0
- ret <4 x double> %r
-}
-
; Negative test - avoid changing poison ops
define <4 x float> @ext12_v4f32(<4 x float> %x, <4 x float> %y) {
@@ -282,19 +154,6 @@ define <4 x float> @ext12_v4f32(<4 x float> %x, <4 x float> %y) {
ret <4 x float> %r
}
-define <4 x float> @ext12_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
-; CHECK-LABEL: @ext12_v2f32v4f32(
-; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 6
-; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 12
-; CHECK-NEXT: ret <4 x float> [[R]]
-;
- %e = extractelement <2 x float> %x, i32 6
- %n = fneg float %e
- %r = insertelement <4 x float> %y, float %n, i32 12
- ret <4 x float> %r
-}
-
; This used to crash because we assumed matching a true, unary fneg instruction.
define <2 x float> @ext1_v2f32_fsub(<2 x float> %x) {
@@ -322,16 +181,3 @@ define <2 x float> @ext1_v2f32_fsub_fmf(<2 x float> %x, <2 x float> %y) {
%r = insertelement <2 x float> %y, float %s, i32 1
ret <2 x float> %r
}
-
-define <4 x float> @ext1_v2f32v4f32_fsub_fmf(<2 x float> %x, <4 x float> %y) {
-; CHECK-LABEL: @ext1_v2f32v4f32_fsub_fmf(
-; CHECK-NEXT: [[TMP1:%.*]] = fneg nnan nsz <2 x float> [[X:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
-; CHECK-NEXT: ret <4 x float> [[R]]
-;
- %e = extractelement <2 x float> %x, i32 1
- %s = fsub nsz nnan float 0.0, %e
- %r = insertelement <4 x float> %y, float %s, i32 1
- ret <4 x float> %r
-}
|
@llvm/pr-subscribers-vectorizers Author: Simon Pilgrim (RKSimon) ChangesReverts llvm/llvm-project#115209 - investigating a reported regression Full diff: https://github.com/llvm/llvm-project/pull/120422.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 5254fab1cdc9143..791006c48b5ddf5 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -666,10 +666,9 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
m_ExtractElt(m_Value(SrcVec), m_SpecificInt(Index))))))
return false;
+ // TODO: We could handle this with a length-changing shuffle.
auto *VecTy = cast<FixedVectorType>(I.getType());
- auto *ScalarTy = VecTy->getScalarType();
- auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->getType());
- if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType())
+ if (SrcVec->getType() != VecTy)
return false;
// Ignore bogus insert/extract index.
@@ -683,6 +682,8 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
SmallVector<int> Mask(NumElts);
std::iota(Mask.begin(), Mask.end(), 0);
Mask[Index] = Index + NumElts;
+
+ Type *ScalarTy = VecTy->getScalarType();
InstructionCost OldCost =
TTI.getArithmeticInstrCost(Instruction::FNeg, ScalarTy, CostKind) +
TTI.getVectorInstrCost(I, VecTy, CostKind, Index);
@@ -697,33 +698,14 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
TTI.getArithmeticInstrCost(Instruction::FNeg, VecTy, CostKind) +
TTI.getShuffleCost(TargetTransformInfo::SK_Select, VecTy, Mask, CostKind);
- bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
- // If the lengths of the two vectors are not equal,
- // we need to add a length-change vector. Add this cost.
- SmallVector<int> SrcMask;
- if (NeedLenChg) {
- SrcMask.assign(NumElts, PoisonMaskElem);
- SrcMask[Index] = Index;
- NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
- SrcVecTy, SrcMask, CostKind);
- }
-
if (NewCost > OldCost)
return false;
- Value *NewShuf;
- // insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index
+ // insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index -->
+ // shuffle DestVec, (fneg SrcVec), Mask
Value *VecFNeg = Builder.CreateFNegFMF(SrcVec, FNeg);
- if (NeedLenChg) {
- // shuffle DestVec, (shuffle (fneg SrcVec), poison, SrcMask), Mask
- Value *LenChgShuf = Builder.CreateShuffleVector(SrcVec, SrcMask);
- NewShuf = Builder.CreateShuffleVector(DestVec, LenChgShuf, Mask);
- } else {
- // shuffle DestVec, (fneg SrcVec), Mask
- NewShuf = Builder.CreateShuffleVector(DestVec, VecFNeg, Mask);
- }
-
- replaceValue(I, *NewShuf);
+ Value *Shuf = Builder.CreateShuffleVector(DestVec, VecFNeg, Mask);
+ replaceValue(I, *Shuf);
return true;
}
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
index 83f94ba46a072f6..df5fcdb7beb6566 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
@@ -18,19 +18,6 @@ define <4 x float> @ext0_v4f32(<4 x float> %x, <4 x float> %y) {
ret <4 x float> %r
}
-define <4 x float> @ext0_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
-; CHECK-LABEL: @ext0_v2f32v4f32(
-; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
-; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 0
-; CHECK-NEXT: ret <4 x float> [[R]]
-;
- %e = extractelement <2 x float> %x, i32 0
- %n = fneg float %e
- %r = insertelement <4 x float> %y, float %n, i32 0
- ret <4 x float> %r
-}
-
; Eliminating extract/insert is profitable.
define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
@@ -45,19 +32,6 @@ define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
ret <4 x float> %r
}
-define <4 x float> @ext2_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
-; CHECK-LABEL: @ext2_v2f32v4f32(
-; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 poison>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT: ret <4 x float> [[R]]
-;
- %e = extractelement <2 x float> %x, i32 2
- %n = fneg float %e
- %r = insertelement <4 x float> %y, float %n, i32 2
- ret <4 x float> %r
-}
-
; Eliminating extract/insert is still profitable. Flags propagate.
define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
@@ -72,25 +46,6 @@ define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
ret <2 x double> %r
}
-define <4 x double> @ext1_v2f64v4f64(<2 x double> %x, <4 x double> %y) {
-; SSE-LABEL: @ext1_v2f64v4f64(
-; SSE-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
-; SSE-NEXT: [[N:%.*]] = fneg nsz double [[E]]
-; SSE-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 1
-; SSE-NEXT: ret <4 x double> [[R]]
-;
-; AVX-LABEL: @ext1_v2f64v4f64(
-; AVX-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]]
-; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
-; AVX-NEXT: [[R:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
-; AVX-NEXT: ret <4 x double> [[R]]
-;
- %e = extractelement <2 x double> %x, i32 1
- %n = fneg nsz double %e
- %r = insertelement <4 x double> %y, double %n, i32 1
- ret <4 x double> %r
-}
-
; The vector fneg would cost twice as much as the scalar op with SSE,
; so we don't transform there (the shuffle would also be more expensive).
@@ -112,19 +67,6 @@ define <8 x float> @ext7_v8f32(<8 x float> %x, <8 x float> %y) {
ret <8 x float> %r
}
-define <8 x float> @ext7_v4f32v8f32(<4 x float> %x, <8 x float> %y) {
-; CHECK-LABEL: @ext7_v4f32v8f32(
-; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
-; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 7
-; CHECK-NEXT: ret <8 x float> [[R]]
-;
- %e = extractelement <4 x float> %x, i32 3
- %n = fneg float %e
- %r = insertelement <8 x float> %y, float %n, i32 7
- ret <8 x float> %r
-}
-
; Same as above with an extra use of the extracted element.
define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
@@ -149,21 +91,6 @@ define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
ret <8 x float> %r
}
-define <8 x float> @ext7_v4f32v8f32_use1(<4 x float> %x, <8 x float> %y) {
-; CHECK-LABEL: @ext7_v4f32v8f32_use1(
-; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
-; CHECK-NEXT: call void @use(float [[E]])
-; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3
-; CHECK-NEXT: ret <8 x float> [[R]]
-;
- %e = extractelement <4 x float> %x, i32 3
- call void @use(float %e)
- %n = fneg float %e
- %r = insertelement <8 x float> %y, float %n, i32 3
- ret <8 x float> %r
-}
-
; Negative test - the transform is likely not profitable if the fneg has another use.
define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) {
@@ -181,21 +108,6 @@ define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) {
ret <8 x float> %r
}
-define <8 x float> @ext7_v4f32v8f32_use2(<4 x float> %x, <8 x float> %y) {
-; CHECK-LABEL: @ext7_v4f32v8f32_use2(
-; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
-; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT: call void @use(float [[N]])
-; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3
-; CHECK-NEXT: ret <8 x float> [[R]]
-;
- %e = extractelement <4 x float> %x, i32 3
- %n = fneg float %e
- call void @use(float %n)
- %r = insertelement <8 x float> %y, float %n, i32 3
- ret <8 x float> %r
-}
-
; Negative test - can't convert variable index to a shuffle.
define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %index) {
@@ -211,19 +123,6 @@ define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %
ret <2 x double> %r
}
-define <4 x double> @ext_index_var_v2f64v4f64(<2 x double> %x, <4 x double> %y, i32 %index) {
-; CHECK-LABEL: @ext_index_var_v2f64v4f64(
-; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 [[INDEX:%.*]]
-; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 [[INDEX]]
-; CHECK-NEXT: ret <4 x double> [[R]]
-;
- %e = extractelement <2 x double> %x, i32 %index
- %n = fneg nsz double %e
- %r = insertelement <4 x double> %y, double %n, i32 %index
- ret <4 x double> %r
-}
-
; Negative test - require same extract/insert index for simple shuffle.
; TODO: We could handle this by adjusting the cost calculation.
@@ -240,33 +139,6 @@ define <2 x double> @ext1_v2f64_ins0(<2 x double> %x, <2 x double> %y) {
ret <2 x double> %r
}
-; Negative test - extract from an index greater than the vector width of the destination
-define <2 x double> @ext3_v4f64v2f64(<4 x double> %x, <2 x double> %y) {
-; CHECK-LABEL: @ext3_v4f64v2f64(
-; CHECK-NEXT: [[E:%.*]] = extractelement <4 x double> [[X:%.*]], i32 3
-; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[Y:%.*]], double [[N]], i32 1
-; CHECK-NEXT: ret <2 x double> [[R]]
-;
- %e = extractelement <4 x double> %x, i32 3
- %n = fneg nsz double %e
- %r = insertelement <2 x double> %y, double %n, i32 1
- ret <2 x double> %r
-}
-
-define <4 x double> @ext1_v2f64v4f64_ins0(<2 x double> %x, <4 x double> %y) {
-; CHECK-LABEL: @ext1_v2f64v4f64_ins0(
-; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
-; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 0
-; CHECK-NEXT: ret <4 x double> [[R]]
-;
- %e = extractelement <2 x double> %x, i32 1
- %n = fneg nsz double %e
- %r = insertelement <4 x double> %y, double %n, i32 0
- ret <4 x double> %r
-}
-
; Negative test - avoid changing poison ops
define <4 x float> @ext12_v4f32(<4 x float> %x, <4 x float> %y) {
@@ -282,19 +154,6 @@ define <4 x float> @ext12_v4f32(<4 x float> %x, <4 x float> %y) {
ret <4 x float> %r
}
-define <4 x float> @ext12_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
-; CHECK-LABEL: @ext12_v2f32v4f32(
-; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 6
-; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 12
-; CHECK-NEXT: ret <4 x float> [[R]]
-;
- %e = extractelement <2 x float> %x, i32 6
- %n = fneg float %e
- %r = insertelement <4 x float> %y, float %n, i32 12
- ret <4 x float> %r
-}
-
; This used to crash because we assumed matching a true, unary fneg instruction.
define <2 x float> @ext1_v2f32_fsub(<2 x float> %x) {
@@ -322,16 +181,3 @@ define <2 x float> @ext1_v2f32_fsub_fmf(<2 x float> %x, <2 x float> %y) {
%r = insertelement <2 x float> %y, float %s, i32 1
ret <2 x float> %r
}
-
-define <4 x float> @ext1_v2f32v4f32_fsub_fmf(<2 x float> %x, <4 x float> %y) {
-; CHECK-LABEL: @ext1_v2f32v4f32_fsub_fmf(
-; CHECK-NEXT: [[TMP1:%.*]] = fneg nnan nsz <2 x float> [[X:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
-; CHECK-NEXT: ret <4 x float> [[R]]
-;
- %e = extractelement <2 x float> %x, i32 1
- %s = fsub nsz nnan float 0.0, %e
- %r = insertelement <4 x float> %y, float %s, i32 1
- ret <4 x float> %r
-}
|
Reverts #115209 - investigating a reported regression