Skip to content

Commit 26390f2

Browse files
authored
[VectorCombine] foldShuffleOfShuffles - fold shuffle(shuffle(x,y),poison) length changing masks (#144690)
The shuffle merging code assumes that the shuffle sources are all the same type, which fails if we've changed length and don't have 2 inner shuffles. We already handle length-changing shuffles if we do have 2 inner shuffles. This patch creates a fake "all poison" shuffle mask and reuses the other shuffle's sources, which can be safely used with the existing merge code. The alternative was a considerable refactor of the merge code to account for different vector widths...... Fixes #144656
1 parent d2c0451 commit 26390f2

File tree

4 files changed

+32
-28
lines changed

4 files changed

+32
-28
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2282,6 +2282,17 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
22822282
if (!Match0 && !Match1)
22832283
return false;
22842284

2285+
// If the outer shuffle is a permute, then create a fake inner all-poison
2286+
// shuffle. This is easier than accounting for length-changing shuffles below.
2287+
SmallVector<int, 16> PoisonMask1;
2288+
if (!Match1 && isa<PoisonValue>(OuterV1)) {
2289+
X1 = X0;
2290+
Y1 = Y0;
2291+
PoisonMask1.append(InnerMask0.size(), PoisonMaskElem);
2292+
InnerMask1 = PoisonMask1;
2293+
Match1 = true; // fake match
2294+
}
2295+
22852296
X0 = Match0 ? X0 : OuterV0;
22862297
Y0 = Match0 ? Y0 : OuterV0;
22872298
X1 = Match1 ? X1 : OuterV1;
@@ -2356,11 +2367,11 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
23562367
// Try to merge the shuffles if the new shuffle is not costly.
23572368
InstructionCost InnerCost0 = 0;
23582369
if (Match0)
2359-
InnerCost0 = TTI.getInstructionCost(cast<Instruction>(OuterV0), CostKind);
2370+
InnerCost0 = TTI.getInstructionCost(cast<User>(OuterV0), CostKind);
23602371

23612372
InstructionCost InnerCost1 = 0;
23622373
if (Match1)
2363-
InnerCost1 = TTI.getInstructionCost(cast<Instruction>(OuterV1), CostKind);
2374+
InnerCost1 = TTI.getInstructionCost(cast<User>(OuterV1), CostKind);
23642375

23652376
InstructionCost OuterCost = TTI.getInstructionCost(&I, CostKind);
23662377

llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -262,10 +262,8 @@ define <8 x half> @splatandidentity(<8 x half> %a, <8 x half> %b) {
262262

263263
define <8 x half> @splattwice(<8 x half> %a, <8 x half> %b) {
264264
; CHECK-LABEL: @splattwice(
265-
; CHECK-NEXT: [[AS:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> zeroinitializer
266-
; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> zeroinitializer
267-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[AS]], <4 x half> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
268-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[BS]], <4 x half> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
265+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
266+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
269267
; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[TMP1]], [[TMP2]]
270268
; CHECK-NEXT: ret <8 x half> [[R]]
271269
;

llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
3232
; SSE-NEXT: ret <4 x double> [[INS]]
3333
;
3434
; AVX-LABEL: @src_ins2_v4f64_ext0_v2f64(
35-
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
36-
; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
35+
; AVX-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
3736
; AVX-NEXT: ret <4 x double> [[INS]]
3837
;
3938
%ext = extractelement <2 x double> %b, i32 0
@@ -48,8 +47,7 @@ define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
4847
; SSE-NEXT: ret <4 x double> [[INS]]
4948
;
5049
; AVX-LABEL: @src_ins3_v4f64_ext0_v2f64(
51-
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
52-
; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
50+
; AVX-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
5351
; AVX-NEXT: ret <4 x double> [[INS]]
5452
;
5553
%ext = extractelement <2 x double> %b, i32 0
@@ -86,8 +84,7 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
8684
; SSE-NEXT: ret <4 x double> [[INS]]
8785
;
8886
; AVX-LABEL: @src_ins2_v4f64_ext1_v2f64(
89-
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
90-
; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>
87+
; AVX-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>
9188
; AVX-NEXT: ret <4 x double> [[INS]]
9289
;
9390
%ext = extractelement <2 x double> %b, i32 1
@@ -96,10 +93,14 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
9693
}
9794

9895
define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
99-
; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
100-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
101-
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 1>
102-
; CHECK-NEXT: ret <4 x double> [[INS]]
96+
; SSE-LABEL: @src_ins3_v4f64_ext1_v2f64(
97+
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
98+
; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 1>
99+
; SSE-NEXT: ret <4 x double> [[INS]]
100+
;
101+
; AVX-LABEL: @src_ins3_v4f64_ext1_v2f64(
102+
; AVX-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 1>
103+
; AVX-NEXT: ret <4 x double> [[INS]]
103104
;
104105
%ext = extractelement <2 x double> %b, i32 1
105106
%ins = insertelement <4 x double> poison, double %ext, i32 3
@@ -119,8 +120,7 @@ define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
119120

120121
define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
121122
; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
122-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 1>
123-
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
123+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
124124
; CHECK-NEXT: ret <2 x double> [[INS]]
125125
;
126126
%ext = extractelement <4 x double> %b, i32 1
@@ -152,8 +152,7 @@ define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b)
152152

153153
define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
154154
; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
155-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 poison>
156-
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
155+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 0>
157156
; CHECK-NEXT: ret <2 x double> [[INS]]
158157
;
159158
%ext = extractelement <4 x double> %b, i32 0
@@ -164,8 +163,7 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
164163
define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
165164
; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
166165
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 1>
167-
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 1>
168-
; CHECK-NEXT: ret <2 x double> [[INS]]
166+
; CHECK-NEXT: ret <2 x double> [[TMP1]]
169167
;
170168
%ext = extractelement <4 x double> %b, i32 1
171169
%ins = insertelement <2 x double> poison, double %ext, i32 1
@@ -174,8 +172,7 @@ define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b)
174172

175173
define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
176174
; SSE-LABEL: @src_ins1_v2f64_ext2_v4f64(
177-
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 2, i32 poison>
178-
; SSE-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
175+
; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 2>
179176
; SSE-NEXT: ret <2 x double> [[INS]]
180177
;
181178
; AVX-LABEL: @src_ins1_v2f64_ext2_v4f64(
@@ -190,8 +187,7 @@ define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b)
190187

191188
define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
192189
; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
193-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
194-
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
190+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 3>
195191
; CHECK-NEXT: ret <2 x double> [[INS]]
196192
;
197193
%ext = extractelement <4 x double> %b, i32 3

llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -578,8 +578,7 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 derefer
578578
; CHECK-NEXT: [[L:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4
579579
; CHECK-NEXT: store <1 x i32> [[L]], ptr [[STORE_PTR:%.*]], align 4
580580
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i32> [[L]], <1 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
581-
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
582-
; CHECK-NEXT: ret <8 x i32> [[R]]
581+
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
583582
;
584583
%l = load <1 x i32>, ptr %p, align 4
585584
store <1 x i32> %l, ptr %store_ptr

0 commit comments

Comments
 (0)