Skip to content

Commit 1c5baed

Browse files
committed
[VectorCombine] Combine scalar fneg with insert/extract to vector fneg when length is different
insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index -> shuffle DestVec, (shuffle (fneg SrcVec), poison, SrcMask), Mask Original combining left the combine between vectors of different lengths as a TODO. this commit do that. (see #[baab4aa])
1 parent 1bb7b0d commit 1c5baed

File tree

2 files changed

+57
-22
lines changed

2 files changed

+57
-22
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -665,9 +665,9 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
665665
m_ExtractElt(m_Value(SrcVec), m_SpecificInt(Index))))))
666666
return false;
667667

668-
// TODO: We could handle this with a length-changing shuffle.
669668
auto *VecTy = cast<FixedVectorType>(I.getType());
670-
if (SrcVec->getType() != VecTy)
669+
auto *SrcVecTy = cast<FixedVectorType>(SrcVec->getType());
670+
if (SrcVecTy->getScalarType() != VecTy->getScalarType())
671671
return false;
672672

673673
// Ignore bogus insert/extract index.
@@ -682,7 +682,7 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
682682
std::iota(Mask.begin(), Mask.end(), 0);
683683
Mask[Index] = Index + NumElts;
684684

685-
Type *ScalarTy = VecTy->getScalarType();
685+
Type *ScalarTy = SrcVecTy->getScalarType();
686686
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
687687
InstructionCost OldCost =
688688
TTI.getArithmeticInstrCost(Instruction::FNeg, ScalarTy) +
@@ -698,14 +698,31 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
698698
TTI.getArithmeticInstrCost(Instruction::FNeg, VecTy) +
699699
TTI.getShuffleCost(TargetTransformInfo::SK_Select, VecTy, Mask);
700700

701+
bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
702+
// If the lengths of the two vectors are not equal,
703+
// we need to add a length-change vector. Add this cost.
704+
if (NeedLenChg)
705+
NewCost +=
706+
TTI.getShuffleCost(TargetTransformInfo::SK_Select, SrcVecTy, Mask);
707+
701708
if (NewCost > OldCost)
702709
return false;
703710

704-
// insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index -->
705-
// shuffle DestVec, (fneg SrcVec), Mask
711+
Value *NewShuf;
712+
// insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index
706713
Value *VecFNeg = Builder.CreateFNegFMF(SrcVec, FNeg);
707-
Value *Shuf = Builder.CreateShuffleVector(DestVec, VecFNeg, Mask);
708-
replaceValue(I, *Shuf);
714+
if (NeedLenChg) {
715+
// shuffle DestVec, (shuffle (fneg SrcVec), poison, SrcMask), Mask
716+
SmallVector<int> SrcMask(NumElts, PoisonMaskElem);
717+
SrcMask[Index] = Index;
718+
Value *LenChgShuf = Builder.CreateShuffleVector(
719+
SrcVec, PoisonValue::get(SrcVecTy), SrcMask);
720+
NewShuf = Builder.CreateShuffleVector(DestVec, LenChgShuf, Mask);
721+
} else
722+
// shuffle DestVec, (fneg SrcVec), Mask
723+
NewShuf = Builder.CreateShuffleVector(DestVec, VecFNeg, Mask);
724+
725+
replaceValue(I, *NewShuf);
709726
return true;
710727
}
711728

llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,17 @@ define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
4646
}
4747

4848
define <4 x float> @ext2_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
49-
; CHECK-LABEL: @ext2_v2f32v4f32(
50-
; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 2
51-
; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
52-
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 2
53-
; CHECK-NEXT: ret <4 x float> [[R]]
49+
; SSE-LABEL: @ext2_v2f32v4f32(
50+
; SSE-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 2
51+
; SSE-NEXT: [[N:%.*]] = fneg float [[E]]
52+
; SSE-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 2
53+
; SSE-NEXT: ret <4 x float> [[R]]
54+
;
55+
; AVX-LABEL: @ext2_v2f32v4f32(
56+
; AVX-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
57+
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 poison>
58+
; AVX-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
59+
; AVX-NEXT: ret <4 x float> [[R]]
5460
;
5561
%e = extractelement <2 x float> %x, i32 2
5662
%n = fneg float %e
@@ -73,11 +79,17 @@ define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
7379
}
7480

7581
define <4 x double> @ext1_v2f64v4f64(<2 x double> %x, <4 x double> %y) {
76-
; CHECK-LABEL: @ext1_v2f64v4f64(
77-
; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
78-
; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
79-
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 1
80-
; CHECK-NEXT: ret <4 x double> [[R]]
82+
; SSE-LABEL: @ext1_v2f64v4f64(
83+
; SSE-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
84+
; SSE-NEXT: [[N:%.*]] = fneg nsz double [[E]]
85+
; SSE-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 1
86+
; SSE-NEXT: ret <4 x double> [[R]]
87+
;
88+
; AVX-LABEL: @ext1_v2f64v4f64(
89+
; AVX-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]]
90+
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
91+
; AVX-NEXT: [[R:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
92+
; AVX-NEXT: ret <4 x double> [[R]]
8193
;
8294
%e = extractelement <2 x double> %x, i32 1
8395
%n = fneg nsz double %e
@@ -304,11 +316,17 @@ define <2 x float> @ext1_v2f32_fsub_fmf(<2 x float> %x, <2 x float> %y) {
304316
}
305317

306318
define <4 x float> @ext1_v2f32v4f32_fsub_fmf(<2 x float> %x, <4 x float> %y) {
307-
; CHECK-LABEL: @ext1_v2f32v4f32_fsub_fmf(
308-
; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
309-
; CHECK-NEXT: [[S:%.*]] = fsub nnan nsz float 0.000000e+00, [[E]]
310-
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[S]], i32 1
311-
; CHECK-NEXT: ret <4 x float> [[R]]
319+
; SSE-LABEL: @ext1_v2f32v4f32_fsub_fmf(
320+
; SSE-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
321+
; SSE-NEXT: [[S:%.*]] = fsub nnan nsz float 0.000000e+00, [[E]]
322+
; SSE-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[S]], i32 1
323+
; SSE-NEXT: ret <4 x float> [[R]]
324+
;
325+
; AVX-LABEL: @ext1_v2f32v4f32_fsub_fmf(
326+
; AVX-NEXT: [[TMP1:%.*]] = fneg nnan nsz <2 x float> [[X:%.*]]
327+
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
328+
; AVX-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
329+
; AVX-NEXT: ret <4 x float> [[R]]
312330
;
313331
%e = extractelement <2 x float> %x, i32 1
314332
%s = fsub nsz nnan float 0.0, %e

0 commit comments

Comments
 (0)