Skip to content

Commit e3f8c22

Browse files
committed
[VectorCombine] foldInsExtVectorToShuffle - inserting into a poison base vector can be modelled as a single src shuffle
We already canonicalized an undef base vector to the RHS to improve further folding, this extends this to improve the shuffle cost estimate of the single src shuffle
1 parent b85ddba commit e3f8c22

File tree

3 files changed

+34
-38
lines changed

3 files changed

+34
-38
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3025,21 +3025,32 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
30253025
if (ExtIdx >= NumElts || InsIdx >= NumElts)
30263026
return false;
30273027

3028-
SmallVector<int> Mask(NumElts, 0);
3029-
std::iota(Mask.begin(), Mask.end(), 0);
3030-
Mask[InsIdx] = ExtIdx + NumElts;
3028+
// Insertion into poison is a cheaper single operand shuffle.
3029+
TargetTransformInfo::ShuffleKind SK;
3030+
SmallVector<int> Mask(NumElts, PoisonMaskElem);
3031+
if (isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
3032+
SK = TargetTransformInfo::SK_PermuteSingleSrc;
3033+
Mask[InsIdx] = ExtIdx;
3034+
std::swap(DstVec, SrcVec);
3035+
} else {
3036+
SK = TargetTransformInfo::SK_PermuteTwoSrc;
3037+
std::iota(Mask.begin(), Mask.end(), 0);
3038+
Mask[InsIdx] = ExtIdx + NumElts;
3039+
}
3040+
30313041
// Cost
30323042
auto *Ins = cast<InsertElementInst>(&I);
30333043
auto *Ext = cast<ExtractElementInst>(I.getOperand(1));
3034-
3035-
InstructionCost OldCost =
3036-
TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx) +
3044+
InstructionCost InsCost =
30373045
TTI.getVectorInstrCost(*Ins, VecTy, CostKind, InsIdx);
3046+
InstructionCost ExtCost =
3047+
TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx);
3048+
InstructionCost OldCost = ExtCost + InsCost;
30383049

3039-
InstructionCost NewCost = TTI.getShuffleCost(
3040-
TargetTransformInfo::SK_PermuteTwoSrc, VecTy, Mask, CostKind);
3050+
InstructionCost NewCost = TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0,
3051+
nullptr, {DstVec, SrcVec});
30413052
if (!Ext->hasOneUse())
3042-
NewCost += TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx);
3053+
NewCost += ExtCost;
30433054

30443055
LLVM_DEBUG(dbgs() << "Found a insert/extract shuffle-like pair : " << I
30453056
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost

llvm/test/Transforms/PhaseOrdering/X86/hadd.ll

Lines changed: 13 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -80,43 +80,28 @@ define <8 x i16> @add_v8i16_u1234567(<8 x i16> %a, <8 x i16> %b) {
8080
; SSE4-LABEL: @add_v8i16_u1234567(
8181
; SSE4-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
8282
; SSE4-NEXT: [[TMP1:%.*]] = add <8 x i16> [[A]], [[SHIFT]]
83-
; SSE4-NEXT: [[HADD1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
8483
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
8584
; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 4, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
8685
; SSE4-NEXT: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
87-
; SSE4-NEXT: [[HADD32:%.*]] = shufflevector <8 x i16> [[HADD1]], <8 x i16> [[TMP4]], <8 x i32> <i32 poison, i32 1, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
86+
; SSE4-NEXT: [[HADD32:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP4]], <8 x i32> <i32 poison, i32 2, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
8887
; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
8988
; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
9089
; SSE4-NEXT: [[TMP7:%.*]] = add <8 x i16> [[TMP5]], [[TMP6]]
9190
; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[HADD32]], <8 x i16> [[TMP7]], <8 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
9291
; SSE4-NEXT: ret <8 x i16> [[RESULT]]
9392
;
94-
; AVX2-LABEL: @add_v8i16_u1234567(
95-
; AVX2-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
96-
; AVX2-NEXT: [[TMP1:%.*]] = add <8 x i16> [[A]], [[SHIFT]]
97-
; AVX2-NEXT: [[HADD1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
98-
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
99-
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 4, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
100-
; AVX2-NEXT: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
101-
; AVX2-NEXT: [[HADD32:%.*]] = shufflevector <8 x i16> [[HADD1]], <8 x i16> [[TMP4]], <8 x i32> <i32 poison, i32 1, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
102-
; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
103-
; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
104-
; AVX2-NEXT: [[TMP7:%.*]] = add <8 x i16> [[TMP5]], [[TMP6]]
105-
; AVX2-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[HADD32]], <8 x i16> [[TMP7]], <8 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
106-
; AVX2-NEXT: ret <8 x i16> [[RESULT]]
107-
;
108-
; AVX512-LABEL: @add_v8i16_u1234567(
109-
; AVX512-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
110-
; AVX512-NEXT: [[TMP1:%.*]] = add <8 x i16> [[A]], [[SHIFT]]
111-
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
112-
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 4, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
113-
; AVX512-NEXT: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
114-
; AVX512-NEXT: [[HADD32:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP4]], <8 x i32> <i32 poison, i32 2, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
115-
; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
116-
; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
117-
; AVX512-NEXT: [[TMP7:%.*]] = add <8 x i16> [[TMP5]], [[TMP6]]
118-
; AVX512-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[HADD32]], <8 x i16> [[TMP7]], <8 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
119-
; AVX512-NEXT: ret <8 x i16> [[RESULT]]
93+
; AVX-LABEL: @add_v8i16_u1234567(
94+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
95+
; AVX-NEXT: [[TMP1:%.*]] = add <8 x i16> [[A]], [[SHIFT]]
96+
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
97+
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 4, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
98+
; AVX-NEXT: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
99+
; AVX-NEXT: [[HADD32:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP4]], <8 x i32> <i32 poison, i32 2, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
100+
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
101+
; AVX-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
102+
; AVX-NEXT: [[TMP7:%.*]] = add <8 x i16> [[TMP5]], [[TMP6]]
103+
; AVX-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[HADD32]], <8 x i16> [[TMP7]], <8 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
104+
; AVX-NEXT: ret <8 x i16> [[RESULT]]
120105
;
121106
%a0 = extractelement <8 x i16> %a, i32 0
122107
%a1 = extractelement <8 x i16> %a, i32 1

llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
488488
; AVX-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
489489
; AVX-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
490490
; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
491-
; AVX-NEXT: [[V1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 4, i32 2, i32 6, i32 7>
491+
; AVX-NEXT: [[V1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 poison, i32 2, i32 poison, i32 poison>
492492
; AVX-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
493493
; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
494494
; AVX-NEXT: ret <4 x float> [[V3]]

0 commit comments

Comments
 (0)