Skip to content

Commit 8c1dbac

Browse files
authored
[VectorCombine] Allow shuffling between vectors the same type but different element sizes (llvm#121216)
`foldInsExtVectorToShuffle` function combines the extract/insert of a vector into a vector through a shuffle. However, we only supported coupling between vectors of the same size. This commit allows combining extract/insert for vectors of the same type but with different sizes by converting the length of the vectors. Proof: https://alive2.llvm.org/ce/z/ELNLr7 Fixed llvm#120772
1 parent 4daf307 commit 8c1dbac

File tree

4 files changed

+438
-18
lines changed

4 files changed

+438
-18
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 54 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3147,42 +3147,73 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
31473147
m_ConstantInt(InsIdx))))
31483148
return false;
31493149

3150-
auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
3151-
if (!VecTy || SrcVec->getType() != VecTy)
3150+
auto *DstVecTy = dyn_cast<FixedVectorType>(I.getType());
3151+
auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->getType());
3152+
// We can try combining vectors with different element sizes.
3153+
if (!DstVecTy || !SrcVecTy ||
3154+
SrcVecTy->getElementType() != DstVecTy->getElementType())
31523155
return false;
31533156

3154-
unsigned NumElts = VecTy->getNumElements();
3155-
if (ExtIdx >= NumElts || InsIdx >= NumElts)
3157+
unsigned NumDstElts = DstVecTy->getNumElements();
3158+
unsigned NumSrcElts = SrcVecTy->getNumElements();
3159+
if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
31563160
return false;
31573161

31583162
// Insertion into poison is a cheaper single operand shuffle.
31593163
TargetTransformInfo::ShuffleKind SK;
3160-
SmallVector<int> Mask(NumElts, PoisonMaskElem);
3161-
if (isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
3164+
SmallVector<int> Mask(NumDstElts, PoisonMaskElem);
3165+
3166+
bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
3167+
bool IsExtIdxInBounds = ExtIdx < NumDstElts;
3168+
bool NeedDstSrcSwap = isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec);
3169+
if (NeedDstSrcSwap) {
31623170
SK = TargetTransformInfo::SK_PermuteSingleSrc;
3163-
Mask[InsIdx] = ExtIdx;
3171+
if (!IsExtIdxInBounds && NeedExpOrNarrow)
3172+
Mask[InsIdx] = 0;
3173+
else
3174+
Mask[InsIdx] = ExtIdx;
31643175
std::swap(DstVec, SrcVec);
31653176
} else {
31663177
SK = TargetTransformInfo::SK_PermuteTwoSrc;
31673178
std::iota(Mask.begin(), Mask.end(), 0);
3168-
Mask[InsIdx] = ExtIdx + NumElts;
3179+
if (!IsExtIdxInBounds && NeedExpOrNarrow)
3180+
Mask[InsIdx] = NumDstElts;
3181+
else
3182+
Mask[InsIdx] = ExtIdx + NumDstElts;
31693183
}
31703184

31713185
// Cost
31723186
auto *Ins = cast<InsertElementInst>(&I);
31733187
auto *Ext = cast<ExtractElementInst>(I.getOperand(1));
31743188
InstructionCost InsCost =
3175-
TTI.getVectorInstrCost(*Ins, VecTy, CostKind, InsIdx);
3189+
TTI.getVectorInstrCost(*Ins, DstVecTy, CostKind, InsIdx);
31763190
InstructionCost ExtCost =
3177-
TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx);
3191+
TTI.getVectorInstrCost(*Ext, DstVecTy, CostKind, ExtIdx);
31783192
InstructionCost OldCost = ExtCost + InsCost;
31793193

3180-
// Ignore 'free' identity insertion shuffle.
3181-
// TODO: getShuffleCost should return TCC_Free for Identity shuffles.
31823194
InstructionCost NewCost = 0;
3183-
if (!ShuffleVectorInst::isIdentityMask(Mask, NumElts))
3184-
NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0, nullptr,
3185-
{DstVec, SrcVec});
3195+
SmallVector<int> ExtToVecMask;
3196+
if (!NeedExpOrNarrow) {
3197+
// Ignore 'free' identity insertion shuffle.
3198+
// TODO: getShuffleCost should return TCC_Free for Identity shuffles.
3199+
if (!ShuffleVectorInst::isIdentityMask(Mask, NumSrcElts))
3200+
NewCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr,
3201+
{DstVec, SrcVec});
3202+
} else {
3203+
// When creating length-changing-vector, always create with a Mask whose
3204+
// first element has an ExtIdx, so that the first element of the vector
3205+
// being created is always the target to be extracted.
3206+
ExtToVecMask.assign(NumDstElts, PoisonMaskElem);
3207+
if (IsExtIdxInBounds)
3208+
ExtToVecMask[ExtIdx] = ExtIdx;
3209+
else
3210+
ExtToVecMask[0] = ExtIdx;
3211+
// Add cost for expanding or narrowing
3212+
NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
3213+
DstVecTy, ExtToVecMask, CostKind);
3214+
NewCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind);
3215+
}
3216+
31863217
if (!Ext->hasOneUse())
31873218
NewCost += ExtCost;
31883219

@@ -3193,9 +3224,16 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
31933224
if (OldCost < NewCost)
31943225
return false;
31953226

3227+
if (NeedExpOrNarrow) {
3228+
if (!NeedDstSrcSwap)
3229+
SrcVec = Builder.CreateShuffleVector(SrcVec, ExtToVecMask);
3230+
else
3231+
DstVec = Builder.CreateShuffleVector(DstVec, ExtToVecMask);
3232+
}
3233+
31963234
// Canonicalize undef param to RHS to help further folds.
31973235
if (isa<UndefValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
3198-
ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
3236+
ShuffleVectorInst::commuteShuffleMask(Mask, NumDstElts);
31993237
std::swap(DstVec, SrcVec);
32003238
}
32013239

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
3+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
4+
5+
6+
define <4 x double> @src_ins0_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
7+
; CHECK-LABEL: @src_ins0_v4f64_ext0_v2f64(
8+
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
9+
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
10+
; CHECK-NEXT: ret <4 x double> [[INS]]
11+
;
12+
%ext = extractelement <2 x double> %b, i32 0
13+
%ins = insertelement <4 x double> poison, double %ext, i32 0
14+
ret <4 x double> %ins
15+
}
16+
17+
define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
18+
; CHECK-LABEL: @src_ins1_v4f64_ext0_v2f64(
19+
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
20+
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 1
21+
; CHECK-NEXT: ret <4 x double> [[INS]]
22+
;
23+
%ext = extractelement <2 x double> %b, i32 0
24+
%ins = insertelement <4 x double> poison, double %ext, i32 1
25+
ret <4 x double> %ins
26+
}
27+
28+
define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
29+
; SSE-LABEL: @src_ins2_v4f64_ext0_v2f64(
30+
; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
31+
; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
32+
; SSE-NEXT: ret <4 x double> [[INS]]
33+
;
34+
; AVX-LABEL: @src_ins2_v4f64_ext0_v2f64(
35+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
36+
; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
37+
; AVX-NEXT: ret <4 x double> [[INS]]
38+
;
39+
%ext = extractelement <2 x double> %b, i32 0
40+
%ins = insertelement <4 x double> poison, double %ext, i32 2
41+
ret <4 x double> %ins
42+
}
43+
44+
define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
45+
; SSE-LABEL: @src_ins3_v4f64_ext0_v2f64(
46+
; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
47+
; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
48+
; SSE-NEXT: ret <4 x double> [[INS]]
49+
;
50+
; AVX-LABEL: @src_ins3_v4f64_ext0_v2f64(
51+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
52+
; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
53+
; AVX-NEXT: ret <4 x double> [[INS]]
54+
;
55+
%ext = extractelement <2 x double> %b, i32 0
56+
%ins = insertelement <4 x double> poison, double %ext, i32 3
57+
ret <4 x double> %ins
58+
}
59+
60+
define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
61+
; SSE-LABEL: @src_ins0_v4f64_ext1_v2f64(
62+
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
63+
; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
64+
; SSE-NEXT: ret <4 x double> [[INS]]
65+
;
66+
; AVX-LABEL: @src_ins0_v4f64_ext1_v2f64(
67+
; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
68+
; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
69+
; AVX-NEXT: ret <4 x double> [[INS]]
70+
;
71+
%ext = extractelement <2 x double> %b, i32 1
72+
%ins = insertelement <4 x double> poison, double %ext, i32 0
73+
ret <4 x double> %ins
74+
}
75+
76+
define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
77+
; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
78+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
79+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
80+
; CHECK-NEXT: ret <4 x double> [[INS]]
81+
;
82+
%ext = extractelement <2 x double> %b, i32 1
83+
%ins = insertelement <4 x double> poison, double %ext, i32 1
84+
ret <4 x double> %ins
85+
}
86+
87+
define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
88+
; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
89+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
90+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>
91+
; CHECK-NEXT: ret <4 x double> [[INS]]
92+
;
93+
%ext = extractelement <2 x double> %b, i32 1
94+
%ins = insertelement <4 x double> poison, double %ext, i32 2
95+
ret <4 x double> %ins
96+
}
97+
98+
define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
99+
; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
100+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
101+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 1>
102+
; CHECK-NEXT: ret <4 x double> [[INS]]
103+
;
104+
%ext = extractelement <2 x double> %b, i32 1
105+
%ins = insertelement <4 x double> poison, double %ext, i32 3
106+
ret <4 x double> %ins
107+
}
108+
109+
define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
110+
; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64(
111+
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
112+
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
113+
; CHECK-NEXT: ret <2 x double> [[INS]]
114+
;
115+
%ext = extractelement <4 x double> %b, i32 0
116+
%ins = insertelement <2 x double> poison, double %ext, i32 0
117+
ret <2 x double> %ins
118+
}
119+
120+
define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
121+
; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
122+
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
123+
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
124+
; CHECK-NEXT: ret <2 x double> [[INS]]
125+
;
126+
%ext = extractelement <4 x double> %b, i32 1
127+
%ins = insertelement <2 x double> poison, double %ext, i32 0
128+
ret <2 x double> %ins
129+
}
130+
131+
define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
132+
; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64(
133+
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
134+
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
135+
; CHECK-NEXT: ret <2 x double> [[INS]]
136+
;
137+
%ext = extractelement <4 x double> %b, i32 2
138+
%ins = insertelement <2 x double> poison, double %ext, i32 0
139+
ret <2 x double> %ins
140+
}
141+
142+
define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
143+
; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64(
144+
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
145+
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
146+
; CHECK-NEXT: ret <2 x double> [[INS]]
147+
;
148+
%ext = extractelement <4 x double> %b, i32 3
149+
%ins = insertelement <2 x double> poison, double %ext, i32 0
150+
ret <2 x double> %ins
151+
}
152+
153+
define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
154+
; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
155+
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
156+
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
157+
; CHECK-NEXT: ret <2 x double> [[INS]]
158+
;
159+
%ext = extractelement <4 x double> %b, i32 0
160+
%ins = insertelement <2 x double> poison, double %ext, i32 1
161+
ret <2 x double> %ins
162+
}
163+
164+
define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
165+
; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
166+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 1>
167+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 1>
168+
; CHECK-NEXT: ret <2 x double> [[INS]]
169+
;
170+
%ext = extractelement <4 x double> %b, i32 1
171+
%ins = insertelement <2 x double> poison, double %ext, i32 1
172+
ret <2 x double> %ins
173+
}
174+
175+
define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
176+
; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64(
177+
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
178+
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
179+
; CHECK-NEXT: ret <2 x double> [[INS]]
180+
;
181+
%ext = extractelement <4 x double> %b, i32 2
182+
%ins = insertelement <2 x double> poison, double %ext, i32 1
183+
ret <2 x double> %ins
184+
}
185+
186+
define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
187+
; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
188+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
189+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
190+
; CHECK-NEXT: ret <2 x double> [[INS]]
191+
;
192+
%ext = extractelement <4 x double> %b, i32 3
193+
%ins = insertelement <2 x double> poison, double %ext, i32 1
194+
ret <2 x double> %ins
195+
}
196+

0 commit comments

Comments
 (0)