Skip to content

Commit a1d4702

Browse files
committed
[VectorCombine] Allow shuffling between vectors the same type but different element sizes
`foldInsExtVectorToShuffle` function combines the extract/insert of a vector into a vector through a shuffle. However, we only supported coupling between vectors of the same size. This commit allows combining extract/insert for vectors of the same type but with different sizes by converting the length of the vectors. Proof: https://alive2.llvm.org/ce/z/ELNLr7 Fixed #120772
1 parent b9e1035 commit a1d4702

File tree

5 files changed

+127
-71
lines changed

5 files changed

+127
-71
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3068,24 +3068,37 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
30683068
return false;
30693069

30703070
auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
3071-
if (!VecTy || SrcVec->getType() != VecTy)
3071+
auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->getType());
3072+
// We can try combining vectors with different element sizes.
3073+
if (!VecTy || !SrcVecTy ||
3074+
SrcVecTy->getElementType() != VecTy->getElementType())
30723075
return false;
30733076

30743077
unsigned NumElts = VecTy->getNumElements();
3075-
if (ExtIdx >= NumElts || InsIdx >= NumElts)
3078+
unsigned NumSrcElts = SrcVecTy->getNumElements();
3079+
if (InsIdx >= NumElts || NumElts == 1)
30763080
return false;
30773081

30783082
// Insertion into poison is a cheaper single operand shuffle.
30793083
TargetTransformInfo::ShuffleKind SK;
30803084
SmallVector<int> Mask(NumElts, PoisonMaskElem);
3081-
if (isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
3085+
3086+
bool NeedExpOrNarrow = NumSrcElts != NumElts;
3087+
bool NeedDstSrcSwap = isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec);
3088+
if (NeedDstSrcSwap) {
30823089
SK = TargetTransformInfo::SK_PermuteSingleSrc;
3083-
Mask[InsIdx] = ExtIdx;
3090+
if (!NeedExpOrNarrow)
3091+
Mask[InsIdx] = ExtIdx;
3092+
else
3093+
Mask[InsIdx] = 0;
30843094
std::swap(DstVec, SrcVec);
30853095
} else {
30863096
SK = TargetTransformInfo::SK_PermuteTwoSrc;
30873097
std::iota(Mask.begin(), Mask.end(), 0);
3088-
Mask[InsIdx] = ExtIdx + NumElts;
3098+
if (!NeedExpOrNarrow)
3099+
Mask[InsIdx] = ExtIdx + NumElts;
3100+
else
3101+
Mask[InsIdx] = NumElts;
30893102
}
30903103

30913104
// Cost
@@ -3097,12 +3110,26 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
30973110
TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx);
30983111
InstructionCost OldCost = ExtCost + InsCost;
30993112

3100-
// Ignore 'free' identity insertion shuffle.
3101-
// TODO: getShuffleCost should return TCC_Free for Identity shuffles.
31023113
InstructionCost NewCost = 0;
3103-
if (!ShuffleVectorInst::isIdentityMask(Mask, NumElts))
3104-
NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0, nullptr,
3105-
{DstVec, SrcVec});
3114+
SmallVector<int> ExtToVecMask;
3115+
if (!NeedExpOrNarrow) {
3116+
// Ignore 'free' identity insertion shuffle.
3117+
// TODO: getShuffleCost should return TCC_Free for Identity shuffles.
3118+
if (!ShuffleVectorInst::isIdentityMask(Mask, NumElts))
3119+
NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0, nullptr,
3120+
{DstVec, SrcVec});
3121+
} else {
3122+
// When creating length-changing-vector, always create with a Mask whose
3123+
// first element has an ExtIdx, so that the first element of the vector
3124+
// being created is always the target to be extracted.
3125+
ExtToVecMask.assign(NumElts, PoisonMaskElem);
3126+
ExtToVecMask[0] = ExtIdx;
3127+
// Add cost for expanding or narrowing
3128+
NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
3129+
VecTy, ExtToVecMask, CostKind);
3130+
NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind);
3131+
}
3132+
31063133
if (!Ext->hasOneUse())
31073134
NewCost += ExtCost;
31083135

@@ -3113,6 +3140,13 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
31133140
if (OldCost < NewCost)
31143141
return false;
31153142

3143+
if (NeedExpOrNarrow) {
3144+
if (!NeedDstSrcSwap)
3145+
SrcVec = Builder.CreateShuffleVector(SrcVec, ExtToVecMask);
3146+
else
3147+
DstVec = Builder.CreateShuffleVector(DstVec, ExtToVecMask);
3148+
}
3149+
31163150
// Canonicalize undef param to RHS to help further folds.
31173151
if (isa<UndefValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
31183152
ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);

llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll

Lines changed: 35 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -26,21 +26,31 @@ define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
2626
}
2727

2828
define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
29-
; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64(
30-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
31-
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
32-
; CHECK-NEXT: ret <4 x double> [[INS]]
29+
; SSE-LABEL: @src_ins2_v4f64_ext0_v2f64(
30+
; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
31+
; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
32+
; SSE-NEXT: ret <4 x double> [[INS]]
33+
;
34+
; AVX-LABEL: @src_ins2_v4f64_ext0_v2f64(
35+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
36+
; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
37+
; AVX-NEXT: ret <4 x double> [[INS]]
3338
;
3439
%ext = extractelement <2 x double> %b, i32 0
3540
%ins = insertelement <4 x double> poison, double %ext, i32 2
3641
ret <4 x double> %ins
3742
}
3843

3944
define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
40-
; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64(
41-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
42-
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
43-
; CHECK-NEXT: ret <4 x double> [[INS]]
45+
; SSE-LABEL: @src_ins3_v4f64_ext0_v2f64(
46+
; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
47+
; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
48+
; SSE-NEXT: ret <4 x double> [[INS]]
49+
;
50+
; AVX-LABEL: @src_ins3_v4f64_ext0_v2f64(
51+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
52+
; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
53+
; AVX-NEXT: ret <4 x double> [[INS]]
4454
;
4555
%ext = extractelement <2 x double> %b, i32 0
4656
%ins = insertelement <4 x double> poison, double %ext, i32 3
@@ -60,8 +70,8 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
6070

6171
define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
6272
; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
63-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
64-
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 1
73+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
74+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>
6575
; CHECK-NEXT: ret <4 x double> [[INS]]
6676
;
6777
%ext = extractelement <2 x double> %b, i32 1
@@ -70,10 +80,15 @@ define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
7080
}
7181

7282
define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
73-
; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
74-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
75-
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
76-
; CHECK-NEXT: ret <4 x double> [[INS]]
83+
; SSE-LABEL: @src_ins2_v4f64_ext1_v2f64(
84+
; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
85+
; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
86+
; SSE-NEXT: ret <4 x double> [[INS]]
87+
;
88+
; AVX-LABEL: @src_ins2_v4f64_ext1_v2f64(
89+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
90+
; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
91+
; AVX-NEXT: ret <4 x double> [[INS]]
7792
;
7893
%ext = extractelement <2 x double> %b, i32 1
7994
%ins = insertelement <4 x double> poison, double %ext, i32 2
@@ -82,8 +97,8 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
8297

8398
define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
8499
; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
85-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
86-
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
100+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
101+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
87102
; CHECK-NEXT: ret <4 x double> [[INS]]
88103
;
89104
%ext = extractelement <2 x double> %b, i32 1
@@ -148,8 +163,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
148163

149164
define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
150165
; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
151-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
152-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
166+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
167+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
153168
; CHECK-NEXT: ret <2 x double> [[INS]]
154169
;
155170
%ext = extractelement <4 x double> %b, i32 1
@@ -170,15 +185,12 @@ define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b)
170185

171186
define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
172187
; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
173-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
174-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
188+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
189+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
175190
; CHECK-NEXT: ret <2 x double> [[INS]]
176191
;
177192
%ext = extractelement <4 x double> %b, i32 3
178193
%ins = insertelement <2 x double> poison, double %ext, i32 1
179194
ret <2 x double> %ins
180195
}
181196

182-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
183-
; AVX: {{.*}}
184-
; SSE: {{.*}}

llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,15 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
5959
}
6060

6161
define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
62-
; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
63-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
64-
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1
65-
; CHECK-NEXT: ret <4 x double> [[INS]]
62+
; SSE-LABEL: @src_ins1_v4f64_ext1_v2f64(
63+
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
64+
; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> undef, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
65+
; SSE-NEXT: ret <4 x double> [[INS]]
66+
;
67+
; AVX-LABEL: @src_ins1_v4f64_ext1_v2f64(
68+
; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
69+
; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1
70+
; AVX-NEXT: ret <4 x double> [[INS]]
6671
;
6772
%ext = extractelement <2 x double> %b, i32 1
6873
%ins = insertelement <4 x double> undef, double %ext, i32 1
@@ -82,8 +87,8 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
8287

8388
define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
8489
; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
85-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
86-
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 3
90+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
91+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 0>
8792
; CHECK-NEXT: ret <4 x double> [[INS]]
8893
;
8994
%ext = extractelement <2 x double> %b, i32 1
@@ -148,8 +153,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
148153

149154
define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
150155
; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
151-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
152-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1
156+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
157+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> <i32 2, i32 0>
153158
; CHECK-NEXT: ret <2 x double> [[INS]]
154159
;
155160
%ext = extractelement <4 x double> %b, i32 1
@@ -170,15 +175,12 @@ define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b)
170175

171176
define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
172177
; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
173-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
174-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1
178+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
179+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> <i32 2, i32 0>
175180
; CHECK-NEXT: ret <2 x double> [[INS]]
176181
;
177182
%ext = extractelement <4 x double> %b, i32 3
178183
%ins = insertelement <2 x double> undef, double %ext, i32 1
179184
ret <2 x double> %ins
180185
}
181186

182-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
183-
; AVX: {{.*}}
184-
; SSE: {{.*}}

0 commit comments

Comments
 (0)