Skip to content

Commit eb2b453

Browse files
committed
[VectorCombine] foldInsExtVectorToShuffle - ensure we call getShuffleCost with the input operand type, not the result
Typo in #121216 Fixes #126085
1 parent 6575154 commit eb2b453

File tree

5 files changed

+71
-36
lines changed

5 files changed

+71
-36
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3210,7 +3210,7 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
32103210
ExtToVecMask[0] = ExtIdx;
32113211
// Add cost for expanding or narrowing
32123212
NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
3213-
DstVecTy, ExtToVecMask, CostKind);
3213+
SrcVecTy, ExtToVecMask, CostKind);
32143214
NewCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind);
32153215
}
32163216

llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,10 @@ define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
5858
}
5959

6060
define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
61-
; SSE-LABEL: @src_ins0_v4f64_ext1_v2f64(
62-
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
63-
; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
64-
; SSE-NEXT: ret <4 x double> [[INS]]
65-
;
66-
; AVX-LABEL: @src_ins0_v4f64_ext1_v2f64(
67-
; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
68-
; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
69-
; AVX-NEXT: ret <4 x double> [[INS]]
61+
; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64(
62+
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
63+
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
64+
; CHECK-NEXT: ret <4 x double> [[INS]]
7065
;
7166
%ext = extractelement <2 x double> %b, i32 1
7267
%ins = insertelement <4 x double> poison, double %ext, i32 0
@@ -85,10 +80,15 @@ define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
8580
}
8681

8782
define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
88-
; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
89-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
90-
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>
91-
; CHECK-NEXT: ret <4 x double> [[INS]]
83+
; SSE-LABEL: @src_ins2_v4f64_ext1_v2f64(
84+
; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
85+
; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
86+
; SSE-NEXT: ret <4 x double> [[INS]]
87+
;
88+
; AVX-LABEL: @src_ins2_v4f64_ext1_v2f64(
89+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
90+
; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>
91+
; AVX-NEXT: ret <4 x double> [[INS]]
9292
;
9393
%ext = extractelement <2 x double> %b, i32 1
9494
%ins = insertelement <4 x double> poison, double %ext, i32 2
@@ -119,8 +119,8 @@ define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
119119

120120
define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
121121
; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
122-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
123-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
122+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 1>
123+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
124124
; CHECK-NEXT: ret <2 x double> [[INS]]
125125
;
126126
%ext = extractelement <4 x double> %b, i32 1
@@ -152,8 +152,8 @@ define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b)
152152

153153
define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
154154
; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
155-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
156-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
155+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 poison>
156+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
157157
; CHECK-NEXT: ret <2 x double> [[INS]]
158158
;
159159
%ext = extractelement <4 x double> %b, i32 0
@@ -173,10 +173,15 @@ define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b)
173173
}
174174

175175
define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
176-
; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64(
177-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
178-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
179-
; CHECK-NEXT: ret <2 x double> [[INS]]
176+
; SSE-LABEL: @src_ins1_v2f64_ext2_v4f64(
177+
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 2, i32 poison>
178+
; SSE-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
179+
; SSE-NEXT: ret <2 x double> [[INS]]
180+
;
181+
; AVX-LABEL: @src_ins1_v2f64_ext2_v4f64(
182+
; AVX-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
183+
; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
184+
; AVX-NEXT: ret <2 x double> [[INS]]
180185
;
181186
%ext = extractelement <4 x double> %b, i32 2
182187
%ins = insertelement <2 x double> poison, double %ext, i32 1

llvm/test/Transforms/VectorCombine/X86/extract-insert.ll

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,8 @@ define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
9898

9999
define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
100100
; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64(
101-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
102-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
101+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 poison>
102+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 2, i32 1>
103103
; CHECK-NEXT: ret <2 x double> [[INS]]
104104
;
105105
%ext = extractelement <4 x double> %b, i32 0
@@ -119,10 +119,15 @@ define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b)
119119
}
120120

121121
define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
122-
; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64(
123-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
124-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
125-
; CHECK-NEXT: ret <2 x double> [[INS]]
122+
; SSE-LABEL: @src_ins0_v2f64_ext2_v4f64(
123+
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 2, i32 poison>
124+
; SSE-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 2, i32 1>
125+
; SSE-NEXT: ret <2 x double> [[INS]]
126+
;
127+
; AVX-LABEL: @src_ins0_v2f64_ext2_v4f64(
128+
; AVX-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
129+
; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
130+
; AVX-NEXT: ret <2 x double> [[INS]]
126131
;
127132
%ext = extractelement <4 x double> %b, i32 2
128133
%ins = insertelement <2 x double> %a, double %ext, i32 0
@@ -142,8 +147,8 @@ define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b)
142147

143148
define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
144149
; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
145-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
146-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
150+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 poison>
151+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
147152
; CHECK-NEXT: ret <2 x double> [[INS]]
148153
;
149154
%ext = extractelement <4 x double> %b, i32 0
@@ -163,10 +168,15 @@ define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b)
163168
}
164169

165170
define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
166-
; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64(
167-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
168-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
169-
; CHECK-NEXT: ret <2 x double> [[INS]]
171+
; SSE-LABEL: @src_ins1_v2f64_ext2_v4f64(
172+
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 2, i32 poison>
173+
; SSE-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
174+
; SSE-NEXT: ret <2 x double> [[INS]]
175+
;
176+
; AVX-LABEL: @src_ins1_v2f64_ext2_v4f64(
177+
; AVX-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
178+
; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
179+
; AVX-NEXT: ret <2 x double> [[INS]]
170180
;
171181
%ext = extractelement <4 x double> %b, i32 2
172182
%ins = insertelement <2 x double> %a, double %ext, i32 1

llvm/test/Transforms/VectorCombine/X86/load.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -574,8 +574,8 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 derefer
574574
; CHECK-LABEL: @load_v1i32_extract_insert_v8i32_extra_use(
575575
; CHECK-NEXT: [[L:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4
576576
; CHECK-NEXT: store <1 x i32> [[L]], ptr [[STORE_PTR:%.*]], align 4
577-
; CHECK-NEXT: [[S:%.*]] = extractelement <1 x i32> [[L]], i32 0
578-
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> undef, i32 [[S]], i32 0
577+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i32> [[L]], <1 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
578+
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
579579
; CHECK-NEXT: ret <8 x i32> [[R]]
580580
;
581581
%l = load <1 x i32>, ptr %p, align 4
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- | FileCheck %s
3+
4+
define i32 @test(ptr %a0) {
5+
; CHECK-LABEL: define i32 @test(
6+
; CHECK-SAME: ptr [[A0:%.*]]) {
7+
; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[A0]], align 1
8+
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <16 x i8> [[LOAD]], <16 x i8> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
9+
; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i8> [[LOAD]], i64 11
10+
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x i8> [[SHUF]], i8 [[ELT]], i64 1
11+
; CHECK-NEXT: [[RES:%.*]] = bitcast <4 x i8> [[INS]] to i32
12+
; CHECK-NEXT: ret i32 [[RES]]
13+
;
14+
%load = load <16 x i8>, ptr %a0, align 1
15+
%shuf = shufflevector <16 x i8> %load, <16 x i8> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
16+
%elt = extractelement <16 x i8> %load, i64 11
17+
%ins = insertelement <4 x i8> %shuf, i8 %elt, i64 1
18+
%res = bitcast <4 x i8> %ins to i32
19+
ret i32 %res
20+
}

0 commit comments

Comments
 (0)