Skip to content

Revert "[VectorCombine] Combine scalar fneg with insert/extract to vector fneg when length is different" #120422

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 18, 2024

Conversation

RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Dec 18, 2024

Reverts #115209 - investigating a reported regression

@RKSimon RKSimon merged commit fbc18b8 into main Dec 18, 2024
5 of 7 checks passed
@RKSimon RKSimon deleted the revert-115209-vector_combine3 branch December 18, 2024 13:32
@llvmbot
Copy link
Member

llvmbot commented Dec 18, 2024

@llvm/pr-subscribers-llvm-transforms

Author: Simon Pilgrim (RKSimon)

Changes

Reverts llvm/llvm-project#115209 - investigating a reported regression


Full diff: https://github.com/llvm/llvm-project/pull/120422.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+8-26)
  • (modified) llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll (-154)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 5254fab1cdc9143..791006c48b5ddf5 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -666,10 +666,9 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
                        m_ExtractElt(m_Value(SrcVec), m_SpecificInt(Index))))))
     return false;
 
+  // TODO: We could handle this with a length-changing shuffle.
   auto *VecTy = cast<FixedVectorType>(I.getType());
-  auto *ScalarTy = VecTy->getScalarType();
-  auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->getType());
-  if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType())
+  if (SrcVec->getType() != VecTy)
     return false;
 
   // Ignore bogus insert/extract index.
@@ -683,6 +682,8 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
   SmallVector<int> Mask(NumElts);
   std::iota(Mask.begin(), Mask.end(), 0);
   Mask[Index] = Index + NumElts;
+
+  Type *ScalarTy = VecTy->getScalarType();
   InstructionCost OldCost =
       TTI.getArithmeticInstrCost(Instruction::FNeg, ScalarTy, CostKind) +
       TTI.getVectorInstrCost(I, VecTy, CostKind, Index);
@@ -697,33 +698,14 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
       TTI.getArithmeticInstrCost(Instruction::FNeg, VecTy, CostKind) +
       TTI.getShuffleCost(TargetTransformInfo::SK_Select, VecTy, Mask, CostKind);
 
-  bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
-  // If the lengths of the two vectors are not equal,
-  // we need to add a length-change vector. Add this cost.
-  SmallVector<int> SrcMask;
-  if (NeedLenChg) {
-    SrcMask.assign(NumElts, PoisonMaskElem);
-    SrcMask[Index] = Index;
-    NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
-                                  SrcVecTy, SrcMask, CostKind);
-  }
-
   if (NewCost > OldCost)
     return false;
 
-  Value *NewShuf;
-  // insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index
+  // insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index -->
+  // shuffle DestVec, (fneg SrcVec), Mask
   Value *VecFNeg = Builder.CreateFNegFMF(SrcVec, FNeg);
-  if (NeedLenChg) {
-    // shuffle DestVec, (shuffle (fneg SrcVec), poison, SrcMask), Mask
-    Value *LenChgShuf = Builder.CreateShuffleVector(SrcVec, SrcMask);
-    NewShuf = Builder.CreateShuffleVector(DestVec, LenChgShuf, Mask);
-  } else {
-    // shuffle DestVec, (fneg SrcVec), Mask
-    NewShuf = Builder.CreateShuffleVector(DestVec, VecFNeg, Mask);
-  }
-
-  replaceValue(I, *NewShuf);
+  Value *Shuf = Builder.CreateShuffleVector(DestVec, VecFNeg, Mask);
+  replaceValue(I, *Shuf);
   return true;
 }
 
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
index 83f94ba46a072f6..df5fcdb7beb6566 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
@@ -18,19 +18,6 @@ define <4 x float> @ext0_v4f32(<4 x float> %x, <4 x float> %y) {
   ret <4 x float> %r
 }
 
-define <4 x float> @ext0_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
-; CHECK-LABEL: @ext0_v2f32v4f32(
-; CHECK-NEXT:    [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
-; CHECK-NEXT:    [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 0
-; CHECK-NEXT:    ret <4 x float> [[R]]
-;
-  %e = extractelement <2 x float> %x, i32 0
-  %n = fneg float %e
-  %r = insertelement <4 x float> %y, float %n, i32 0
-  ret <4 x float> %r
-}
-
 ; Eliminating extract/insert is profitable.
 
 define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
@@ -45,19 +32,6 @@ define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
   ret <4 x float> %r
 }
 
-define <4 x float> @ext2_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
-; CHECK-LABEL: @ext2_v2f32v4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 poison>
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[R]]
-;
-  %e = extractelement <2 x float> %x, i32 2
-  %n = fneg float %e
-  %r = insertelement <4 x float> %y, float %n, i32 2
-  ret <4 x float> %r
-}
-
 ; Eliminating extract/insert is still profitable. Flags propagate.
 
 define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
@@ -72,25 +46,6 @@ define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
   ret <2 x double> %r
 }
 
-define <4 x double> @ext1_v2f64v4f64(<2 x double> %x, <4 x double> %y) {
-; SSE-LABEL: @ext1_v2f64v4f64(
-; SSE-NEXT:    [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
-; SSE-NEXT:    [[N:%.*]] = fneg nsz double [[E]]
-; SSE-NEXT:    [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 1
-; SSE-NEXT:    ret <4 x double> [[R]]
-;
-; AVX-LABEL: @ext1_v2f64v4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]]
-; AVX-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
-; AVX-NEXT:    [[R:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
-; AVX-NEXT:    ret <4 x double> [[R]]
-;
-  %e = extractelement <2 x double> %x, i32 1
-  %n = fneg nsz double %e
-  %r = insertelement <4 x double> %y, double %n, i32 1
-  ret <4 x double> %r
-}
-
 ; The vector fneg would cost twice as much as the scalar op with SSE,
 ; so we don't transform there (the shuffle would also be more expensive).
 
@@ -112,19 +67,6 @@ define <8 x float> @ext7_v8f32(<8 x float> %x, <8 x float> %y) {
   ret <8 x float> %r
 }
 
-define <8 x float> @ext7_v4f32v8f32(<4 x float> %x, <8 x float> %y) {
-; CHECK-LABEL: @ext7_v4f32v8f32(
-; CHECK-NEXT:    [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
-; CHECK-NEXT:    [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT:    [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 7
-; CHECK-NEXT:    ret <8 x float> [[R]]
-;
-  %e = extractelement <4 x float> %x, i32 3
-  %n = fneg float %e
-  %r = insertelement <8 x float> %y, float %n, i32 7
-  ret <8 x float> %r
-}
-
 ; Same as above with an extra use of the extracted element.
 
 define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
@@ -149,21 +91,6 @@ define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
   ret <8 x float> %r
 }
 
-define <8 x float> @ext7_v4f32v8f32_use1(<4 x float> %x, <8 x float> %y) {
-; CHECK-LABEL: @ext7_v4f32v8f32_use1(
-; CHECK-NEXT:    [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
-; CHECK-NEXT:    call void @use(float [[E]])
-; CHECK-NEXT:    [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT:    [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3
-; CHECK-NEXT:    ret <8 x float> [[R]]
-;
-  %e = extractelement <4 x float> %x, i32 3
-  call void @use(float %e)
-  %n = fneg float %e
-  %r = insertelement <8 x float> %y, float %n, i32 3
-  ret <8 x float> %r
-}
-
 ; Negative test - the transform is likely not profitable if the fneg has another use.
 
 define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) {
@@ -181,21 +108,6 @@ define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) {
   ret <8 x float> %r
 }
 
-define <8 x float> @ext7_v4f32v8f32_use2(<4 x float> %x, <8 x float> %y) {
-; CHECK-LABEL: @ext7_v4f32v8f32_use2(
-; CHECK-NEXT:    [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
-; CHECK-NEXT:    [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT:    call void @use(float [[N]])
-; CHECK-NEXT:    [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3
-; CHECK-NEXT:    ret <8 x float> [[R]]
-;
-  %e = extractelement <4 x float> %x, i32 3
-  %n = fneg float %e
-  call void @use(float %n)
-  %r = insertelement <8 x float> %y, float %n, i32 3
-  ret <8 x float> %r
-}
-
 ; Negative test - can't convert variable index to a shuffle.
 
 define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %index) {
@@ -211,19 +123,6 @@ define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %
   ret <2 x double> %r
 }
 
-define <4 x double> @ext_index_var_v2f64v4f64(<2 x double> %x, <4 x double> %y, i32 %index) {
-; CHECK-LABEL: @ext_index_var_v2f64v4f64(
-; CHECK-NEXT:    [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 [[INDEX:%.*]]
-; CHECK-NEXT:    [[N:%.*]] = fneg nsz double [[E]]
-; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 [[INDEX]]
-; CHECK-NEXT:    ret <4 x double> [[R]]
-;
-  %e = extractelement <2 x double> %x, i32 %index
-  %n = fneg nsz double %e
-  %r = insertelement <4 x double> %y, double %n, i32 %index
-  ret <4 x double> %r
-}
-
 ; Negative test - require same extract/insert index for simple shuffle.
 ; TODO: We could handle this by adjusting the cost calculation.
 
@@ -240,33 +139,6 @@ define <2 x double> @ext1_v2f64_ins0(<2 x double> %x, <2 x double> %y) {
   ret <2 x double> %r
 }
 
-; Negative test - extract from an index greater than the vector width of the destination
-define <2 x double> @ext3_v4f64v2f64(<4 x double> %x, <2 x double> %y) {
-; CHECK-LABEL: @ext3_v4f64v2f64(
-; CHECK-NEXT:    [[E:%.*]] = extractelement <4 x double> [[X:%.*]], i32 3
-; CHECK-NEXT:    [[N:%.*]] = fneg nsz double [[E]]
-; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x double> [[Y:%.*]], double [[N]], i32 1
-; CHECK-NEXT:    ret <2 x double> [[R]]
-;
-  %e = extractelement <4 x double> %x, i32 3
-  %n = fneg nsz double %e
-  %r = insertelement <2 x double> %y, double %n, i32 1
-  ret <2 x double> %r
-}
-
-define <4 x double> @ext1_v2f64v4f64_ins0(<2 x double> %x, <4 x double> %y) {
-; CHECK-LABEL: @ext1_v2f64v4f64_ins0(
-; CHECK-NEXT:    [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
-; CHECK-NEXT:    [[N:%.*]] = fneg nsz double [[E]]
-; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 0
-; CHECK-NEXT:    ret <4 x double> [[R]]
-;
-  %e = extractelement <2 x double> %x, i32 1
-  %n = fneg nsz double %e
-  %r = insertelement <4 x double> %y, double %n, i32 0
-  ret <4 x double> %r
-}
-
 ; Negative test - avoid changing poison ops
 
 define <4 x float> @ext12_v4f32(<4 x float> %x, <4 x float> %y) {
@@ -282,19 +154,6 @@ define <4 x float> @ext12_v4f32(<4 x float> %x, <4 x float> %y) {
   ret <4 x float> %r
 }
 
-define <4 x float> @ext12_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
-; CHECK-LABEL: @ext12_v2f32v4f32(
-; CHECK-NEXT:    [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 6
-; CHECK-NEXT:    [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 12
-; CHECK-NEXT:    ret <4 x float> [[R]]
-;
-  %e = extractelement <2 x float> %x, i32 6
-  %n = fneg float %e
-  %r = insertelement <4 x float> %y, float %n, i32 12
-  ret <4 x float> %r
-}
-
 ; This used to crash because we assumed matching a true, unary fneg instruction.
 
 define <2 x float> @ext1_v2f32_fsub(<2 x float> %x) {
@@ -322,16 +181,3 @@ define <2 x float> @ext1_v2f32_fsub_fmf(<2 x float> %x, <2 x float> %y) {
   %r = insertelement <2 x float> %y, float %s, i32 1
   ret <2 x float> %r
 }
-
-define <4 x float> @ext1_v2f32v4f32_fsub_fmf(<2 x float> %x, <4 x float> %y) {
-; CHECK-LABEL: @ext1_v2f32v4f32_fsub_fmf(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg nnan nsz <2 x float> [[X:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[R]]
-;
-  %e = extractelement <2 x float> %x, i32 1
-  %s = fsub nsz nnan float 0.0, %e
-  %r = insertelement <4 x float> %y, float %s, i32 1
-  ret <4 x float> %r
-}

@llvmbot
Copy link
Member

llvmbot commented Dec 18, 2024

@llvm/pr-subscribers-vectorizers

Author: Simon Pilgrim (RKSimon)

Changes

Reverts llvm/llvm-project#115209 - investigating a reported regression


Full diff: https://github.com/llvm/llvm-project/pull/120422.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+8-26)
  • (modified) llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll (-154)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 5254fab1cdc9143..791006c48b5ddf5 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -666,10 +666,9 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
                        m_ExtractElt(m_Value(SrcVec), m_SpecificInt(Index))))))
     return false;
 
+  // TODO: We could handle this with a length-changing shuffle.
   auto *VecTy = cast<FixedVectorType>(I.getType());
-  auto *ScalarTy = VecTy->getScalarType();
-  auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->getType());
-  if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType())
+  if (SrcVec->getType() != VecTy)
     return false;
 
   // Ignore bogus insert/extract index.
@@ -683,6 +682,8 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
   SmallVector<int> Mask(NumElts);
   std::iota(Mask.begin(), Mask.end(), 0);
   Mask[Index] = Index + NumElts;
+
+  Type *ScalarTy = VecTy->getScalarType();
   InstructionCost OldCost =
       TTI.getArithmeticInstrCost(Instruction::FNeg, ScalarTy, CostKind) +
       TTI.getVectorInstrCost(I, VecTy, CostKind, Index);
@@ -697,33 +698,14 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
       TTI.getArithmeticInstrCost(Instruction::FNeg, VecTy, CostKind) +
       TTI.getShuffleCost(TargetTransformInfo::SK_Select, VecTy, Mask, CostKind);
 
-  bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
-  // If the lengths of the two vectors are not equal,
-  // we need to add a length-change vector. Add this cost.
-  SmallVector<int> SrcMask;
-  if (NeedLenChg) {
-    SrcMask.assign(NumElts, PoisonMaskElem);
-    SrcMask[Index] = Index;
-    NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
-                                  SrcVecTy, SrcMask, CostKind);
-  }
-
   if (NewCost > OldCost)
     return false;
 
-  Value *NewShuf;
-  // insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index
+  // insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index -->
+  // shuffle DestVec, (fneg SrcVec), Mask
   Value *VecFNeg = Builder.CreateFNegFMF(SrcVec, FNeg);
-  if (NeedLenChg) {
-    // shuffle DestVec, (shuffle (fneg SrcVec), poison, SrcMask), Mask
-    Value *LenChgShuf = Builder.CreateShuffleVector(SrcVec, SrcMask);
-    NewShuf = Builder.CreateShuffleVector(DestVec, LenChgShuf, Mask);
-  } else {
-    // shuffle DestVec, (fneg SrcVec), Mask
-    NewShuf = Builder.CreateShuffleVector(DestVec, VecFNeg, Mask);
-  }
-
-  replaceValue(I, *NewShuf);
+  Value *Shuf = Builder.CreateShuffleVector(DestVec, VecFNeg, Mask);
+  replaceValue(I, *Shuf);
   return true;
 }
 
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
index 83f94ba46a072f6..df5fcdb7beb6566 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
@@ -18,19 +18,6 @@ define <4 x float> @ext0_v4f32(<4 x float> %x, <4 x float> %y) {
   ret <4 x float> %r
 }
 
-define <4 x float> @ext0_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
-; CHECK-LABEL: @ext0_v2f32v4f32(
-; CHECK-NEXT:    [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
-; CHECK-NEXT:    [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 0
-; CHECK-NEXT:    ret <4 x float> [[R]]
-;
-  %e = extractelement <2 x float> %x, i32 0
-  %n = fneg float %e
-  %r = insertelement <4 x float> %y, float %n, i32 0
-  ret <4 x float> %r
-}
-
 ; Eliminating extract/insert is profitable.
 
 define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
@@ -45,19 +32,6 @@ define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
   ret <4 x float> %r
 }
 
-define <4 x float> @ext2_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
-; CHECK-LABEL: @ext2_v2f32v4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 poison>
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[R]]
-;
-  %e = extractelement <2 x float> %x, i32 2
-  %n = fneg float %e
-  %r = insertelement <4 x float> %y, float %n, i32 2
-  ret <4 x float> %r
-}
-
 ; Eliminating extract/insert is still profitable. Flags propagate.
 
 define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
@@ -72,25 +46,6 @@ define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
   ret <2 x double> %r
 }
 
-define <4 x double> @ext1_v2f64v4f64(<2 x double> %x, <4 x double> %y) {
-; SSE-LABEL: @ext1_v2f64v4f64(
-; SSE-NEXT:    [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
-; SSE-NEXT:    [[N:%.*]] = fneg nsz double [[E]]
-; SSE-NEXT:    [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 1
-; SSE-NEXT:    ret <4 x double> [[R]]
-;
-; AVX-LABEL: @ext1_v2f64v4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]]
-; AVX-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
-; AVX-NEXT:    [[R:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
-; AVX-NEXT:    ret <4 x double> [[R]]
-;
-  %e = extractelement <2 x double> %x, i32 1
-  %n = fneg nsz double %e
-  %r = insertelement <4 x double> %y, double %n, i32 1
-  ret <4 x double> %r
-}
-
 ; The vector fneg would cost twice as much as the scalar op with SSE,
 ; so we don't transform there (the shuffle would also be more expensive).
 
@@ -112,19 +67,6 @@ define <8 x float> @ext7_v8f32(<8 x float> %x, <8 x float> %y) {
   ret <8 x float> %r
 }
 
-define <8 x float> @ext7_v4f32v8f32(<4 x float> %x, <8 x float> %y) {
-; CHECK-LABEL: @ext7_v4f32v8f32(
-; CHECK-NEXT:    [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
-; CHECK-NEXT:    [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT:    [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 7
-; CHECK-NEXT:    ret <8 x float> [[R]]
-;
-  %e = extractelement <4 x float> %x, i32 3
-  %n = fneg float %e
-  %r = insertelement <8 x float> %y, float %n, i32 7
-  ret <8 x float> %r
-}
-
 ; Same as above with an extra use of the extracted element.
 
 define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
@@ -149,21 +91,6 @@ define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
   ret <8 x float> %r
 }
 
-define <8 x float> @ext7_v4f32v8f32_use1(<4 x float> %x, <8 x float> %y) {
-; CHECK-LABEL: @ext7_v4f32v8f32_use1(
-; CHECK-NEXT:    [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
-; CHECK-NEXT:    call void @use(float [[E]])
-; CHECK-NEXT:    [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT:    [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3
-; CHECK-NEXT:    ret <8 x float> [[R]]
-;
-  %e = extractelement <4 x float> %x, i32 3
-  call void @use(float %e)
-  %n = fneg float %e
-  %r = insertelement <8 x float> %y, float %n, i32 3
-  ret <8 x float> %r
-}
-
 ; Negative test - the transform is likely not profitable if the fneg has another use.
 
 define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) {
@@ -181,21 +108,6 @@ define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) {
   ret <8 x float> %r
 }
 
-define <8 x float> @ext7_v4f32v8f32_use2(<4 x float> %x, <8 x float> %y) {
-; CHECK-LABEL: @ext7_v4f32v8f32_use2(
-; CHECK-NEXT:    [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
-; CHECK-NEXT:    [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT:    call void @use(float [[N]])
-; CHECK-NEXT:    [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3
-; CHECK-NEXT:    ret <8 x float> [[R]]
-;
-  %e = extractelement <4 x float> %x, i32 3
-  %n = fneg float %e
-  call void @use(float %n)
-  %r = insertelement <8 x float> %y, float %n, i32 3
-  ret <8 x float> %r
-}
-
 ; Negative test - can't convert variable index to a shuffle.
 
 define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %index) {
@@ -211,19 +123,6 @@ define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %
   ret <2 x double> %r
 }
 
-define <4 x double> @ext_index_var_v2f64v4f64(<2 x double> %x, <4 x double> %y, i32 %index) {
-; CHECK-LABEL: @ext_index_var_v2f64v4f64(
-; CHECK-NEXT:    [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 [[INDEX:%.*]]
-; CHECK-NEXT:    [[N:%.*]] = fneg nsz double [[E]]
-; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 [[INDEX]]
-; CHECK-NEXT:    ret <4 x double> [[R]]
-;
-  %e = extractelement <2 x double> %x, i32 %index
-  %n = fneg nsz double %e
-  %r = insertelement <4 x double> %y, double %n, i32 %index
-  ret <4 x double> %r
-}
-
 ; Negative test - require same extract/insert index for simple shuffle.
 ; TODO: We could handle this by adjusting the cost calculation.
 
@@ -240,33 +139,6 @@ define <2 x double> @ext1_v2f64_ins0(<2 x double> %x, <2 x double> %y) {
   ret <2 x double> %r
 }
 
-; Negative test - extract from an index greater than the vector width of the destination
-define <2 x double> @ext3_v4f64v2f64(<4 x double> %x, <2 x double> %y) {
-; CHECK-LABEL: @ext3_v4f64v2f64(
-; CHECK-NEXT:    [[E:%.*]] = extractelement <4 x double> [[X:%.*]], i32 3
-; CHECK-NEXT:    [[N:%.*]] = fneg nsz double [[E]]
-; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x double> [[Y:%.*]], double [[N]], i32 1
-; CHECK-NEXT:    ret <2 x double> [[R]]
-;
-  %e = extractelement <4 x double> %x, i32 3
-  %n = fneg nsz double %e
-  %r = insertelement <2 x double> %y, double %n, i32 1
-  ret <2 x double> %r
-}
-
-define <4 x double> @ext1_v2f64v4f64_ins0(<2 x double> %x, <4 x double> %y) {
-; CHECK-LABEL: @ext1_v2f64v4f64_ins0(
-; CHECK-NEXT:    [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
-; CHECK-NEXT:    [[N:%.*]] = fneg nsz double [[E]]
-; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 0
-; CHECK-NEXT:    ret <4 x double> [[R]]
-;
-  %e = extractelement <2 x double> %x, i32 1
-  %n = fneg nsz double %e
-  %r = insertelement <4 x double> %y, double %n, i32 0
-  ret <4 x double> %r
-}
-
 ; Negative test - avoid changing poison ops
 
 define <4 x float> @ext12_v4f32(<4 x float> %x, <4 x float> %y) {
@@ -282,19 +154,6 @@ define <4 x float> @ext12_v4f32(<4 x float> %x, <4 x float> %y) {
   ret <4 x float> %r
 }
 
-define <4 x float> @ext12_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
-; CHECK-LABEL: @ext12_v2f32v4f32(
-; CHECK-NEXT:    [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 6
-; CHECK-NEXT:    [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 12
-; CHECK-NEXT:    ret <4 x float> [[R]]
-;
-  %e = extractelement <2 x float> %x, i32 6
-  %n = fneg float %e
-  %r = insertelement <4 x float> %y, float %n, i32 12
-  ret <4 x float> %r
-}
-
 ; This used to crash because we assumed matching a true, unary fneg instruction.
 
 define <2 x float> @ext1_v2f32_fsub(<2 x float> %x) {
@@ -322,16 +181,3 @@ define <2 x float> @ext1_v2f32_fsub_fmf(<2 x float> %x, <2 x float> %y) {
   %r = insertelement <2 x float> %y, float %s, i32 1
   ret <2 x float> %r
 }
-
-define <4 x float> @ext1_v2f32v4f32_fsub_fmf(<2 x float> %x, <4 x float> %y) {
-; CHECK-LABEL: @ext1_v2f32v4f32_fsub_fmf(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg nnan nsz <2 x float> [[X:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[R]]
-;
-  %e = extractelement <2 x float> %x, i32 1
-  %s = fsub nsz nnan float 0.0, %e
-  %r = insertelement <4 x float> %y, float %s, i32 1
-  ret <4 x float> %r
-}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants