Skip to content

Commit 20286a3

Browse files
committed
[VectorCombine] Combine scalar fneg with insert/extract to vector fneg when length is different
insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index -> shuffle DestVec, (shuffle (fneg SrcVec), poison, SrcMask), Mask Original combining left the combine between vectors of different lengths as a TODO. this commit do that. (see #[baab4aa])
1 parent e03e74d commit 20286a3

File tree

2 files changed

+57
-22
lines changed

2 files changed

+57
-22
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -650,9 +650,9 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
650650
m_ExtractElt(m_Value(SrcVec), m_SpecificInt(Index))))))
651651
return false;
652652

653-
// TODO: We could handle this with a length-changing shuffle.
654653
auto *VecTy = cast<FixedVectorType>(I.getType());
655-
if (SrcVec->getType() != VecTy)
654+
auto *SrcVecTy = cast<FixedVectorType>(SrcVec->getType());
655+
if (SrcVecTy->getScalarType() != VecTy->getScalarType())
656656
return false;
657657

658658
// Ignore bogus insert/extract index.
@@ -667,7 +667,7 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
667667
std::iota(Mask.begin(), Mask.end(), 0);
668668
Mask[Index] = Index + NumElts;
669669

670-
Type *ScalarTy = VecTy->getScalarType();
670+
Type *ScalarTy = SrcVecTy->getScalarType();
671671
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
672672
InstructionCost OldCost =
673673
TTI.getArithmeticInstrCost(Instruction::FNeg, ScalarTy) +
@@ -683,14 +683,31 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
683683
TTI.getArithmeticInstrCost(Instruction::FNeg, VecTy) +
684684
TTI.getShuffleCost(TargetTransformInfo::SK_Select, VecTy, Mask);
685685

686+
bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
687+
// If the lengths of the two vectors are not equal,
688+
// we need to add a length-change vector. Add this cost.
689+
if (NeedLenChg)
690+
NewCost +=
691+
TTI.getShuffleCost(TargetTransformInfo::SK_Select, SrcVecTy, Mask);
692+
686693
if (NewCost > OldCost)
687694
return false;
688695

689-
// insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index -->
690-
// shuffle DestVec, (fneg SrcVec), Mask
696+
Value *NewShuf;
697+
// insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index
691698
Value *VecFNeg = Builder.CreateFNegFMF(SrcVec, FNeg);
692-
Value *Shuf = Builder.CreateShuffleVector(DestVec, VecFNeg, Mask);
693-
replaceValue(I, *Shuf);
699+
if (NeedLenChg) {
700+
// shuffle DestVec, (shuffle (fneg SrcVec), poison, SrcMask), Mask
701+
SmallVector<int> SrcMask(NumElts, PoisonMaskElem);
702+
SrcMask[Index] = Index;
703+
Value *LenChgShuf = Builder.CreateShuffleVector(
704+
SrcVec, PoisonValue::get(SrcVecTy), SrcMask);
705+
NewShuf = Builder.CreateShuffleVector(DestVec, LenChgShuf, Mask);
706+
} else
707+
// shuffle DestVec, (fneg SrcVec), Mask
708+
NewShuf = Builder.CreateShuffleVector(DestVec, VecFNeg, Mask);
709+
710+
replaceValue(I, *NewShuf);
694711
return true;
695712
}
696713

llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,17 @@ define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
4646
}
4747

4848
define <4 x float> @ext2_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
49-
; CHECK-LABEL: @ext2_v2f32v4f32(
50-
; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 2
51-
; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
52-
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 2
53-
; CHECK-NEXT: ret <4 x float> [[R]]
49+
; SSE-LABEL: @ext2_v2f32v4f32(
50+
; SSE-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 2
51+
; SSE-NEXT: [[N:%.*]] = fneg float [[E]]
52+
; SSE-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 2
53+
; SSE-NEXT: ret <4 x float> [[R]]
54+
;
55+
; AVX-LABEL: @ext2_v2f32v4f32(
56+
; AVX-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
57+
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 poison>
58+
; AVX-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
59+
; AVX-NEXT: ret <4 x float> [[R]]
5460
;
5561
%e = extractelement <2 x float> %x, i32 2
5662
%n = fneg float %e
@@ -73,11 +79,17 @@ define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
7379
}
7480

7581
define <4 x double> @ext1_v2f64v4f64(<2 x double> %x, <4 x double> %y) {
76-
; CHECK-LABEL: @ext1_v2f64v4f64(
77-
; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
78-
; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
79-
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 1
80-
; CHECK-NEXT: ret <4 x double> [[R]]
82+
; SSE-LABEL: @ext1_v2f64v4f64(
83+
; SSE-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
84+
; SSE-NEXT: [[N:%.*]] = fneg nsz double [[E]]
85+
; SSE-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 1
86+
; SSE-NEXT: ret <4 x double> [[R]]
87+
;
88+
; AVX-LABEL: @ext1_v2f64v4f64(
89+
; AVX-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]]
90+
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
91+
; AVX-NEXT: [[R:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
92+
; AVX-NEXT: ret <4 x double> [[R]]
8193
;
8294
%e = extractelement <2 x double> %x, i32 1
8395
%n = fneg nsz double %e
@@ -304,11 +316,17 @@ define <2 x float> @ext1_v2f32_fsub_fmf(<2 x float> %x, <2 x float> %y) {
304316
}
305317

306318
define <4 x float> @ext1_v2f32v4f32_fsub_fmf(<2 x float> %x, <4 x float> %y) {
307-
; CHECK-LABEL: @ext1_v2f32v4f32_fsub_fmf(
308-
; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
309-
; CHECK-NEXT: [[S:%.*]] = fsub nnan nsz float 0.000000e+00, [[E]]
310-
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[S]], i32 1
311-
; CHECK-NEXT: ret <4 x float> [[R]]
319+
; SSE-LABEL: @ext1_v2f32v4f32_fsub_fmf(
320+
; SSE-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
321+
; SSE-NEXT: [[S:%.*]] = fsub nnan nsz float 0.000000e+00, [[E]]
322+
; SSE-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[S]], i32 1
323+
; SSE-NEXT: ret <4 x float> [[R]]
324+
;
325+
; AVX-LABEL: @ext1_v2f32v4f32_fsub_fmf(
326+
; AVX-NEXT: [[TMP1:%.*]] = fneg nnan nsz <2 x float> [[X:%.*]]
327+
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
328+
; AVX-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
329+
; AVX-NEXT: ret <4 x float> [[R]]
312330
;
313331
%e = extractelement <2 x float> %x, i32 1
314332
%s = fsub nsz nnan float 0.0, %e

0 commit comments

Comments
 (0)