Skip to content

Commit 1bb7b0d

Browse files
committed
Add tests for combine extract/insert between vectors of different lengths
1 parent e8c07f7 commit 1bb7b0d

File tree

1 file changed

+134
-0
lines changed

1 file changed

+134
-0
lines changed

llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,19 @@ define <4 x float> @ext0_v4f32(<4 x float> %x, <4 x float> %y) {
1818
ret <4 x float> %r
1919
}
2020

21+
define <4 x float> @ext0_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
22+
; CHECK-LABEL: @ext0_v2f32v4f32(
23+
; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
24+
; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
25+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 0
26+
; CHECK-NEXT: ret <4 x float> [[R]]
27+
;
28+
%e = extractelement <2 x float> %x, i32 0
29+
%n = fneg float %e
30+
%r = insertelement <4 x float> %y, float %n, i32 0
31+
ret <4 x float> %r
32+
}
33+
2134
; Eliminating extract/insert is profitable.
2235

2336
define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
@@ -32,6 +45,19 @@ define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
3245
ret <4 x float> %r
3346
}
3447

48+
define <4 x float> @ext2_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
49+
; CHECK-LABEL: @ext2_v2f32v4f32(
50+
; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 2
51+
; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
52+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 2
53+
; CHECK-NEXT: ret <4 x float> [[R]]
54+
;
55+
%e = extractelement <2 x float> %x, i32 2
56+
%n = fneg float %e
57+
%r = insertelement <4 x float> %y, float %n, i32 2
58+
ret <4 x float> %r
59+
}
60+
3561
; Eliminating extract/insert is still profitable. Flags propagate.
3662

3763
define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
@@ -46,6 +72,19 @@ define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
4672
ret <2 x double> %r
4773
}
4874

75+
define <4 x double> @ext1_v2f64v4f64(<2 x double> %x, <4 x double> %y) {
76+
; CHECK-LABEL: @ext1_v2f64v4f64(
77+
; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
78+
; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
79+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 1
80+
; CHECK-NEXT: ret <4 x double> [[R]]
81+
;
82+
%e = extractelement <2 x double> %x, i32 1
83+
%n = fneg nsz double %e
84+
%r = insertelement <4 x double> %y, double %n, i32 1
85+
ret <4 x double> %r
86+
}
87+
4988
; The vector fneg would cost twice as much as the scalar op with SSE,
5089
; so we don't transform there (the shuffle would also be more expensive).
5190

@@ -67,6 +106,19 @@ define <8 x float> @ext7_v8f32(<8 x float> %x, <8 x float> %y) {
67106
ret <8 x float> %r
68107
}
69108

109+
define <8 x float> @ext7_v4f32v8f32(<4 x float> %x, <8 x float> %y) {
110+
; CHECK-LABEL: @ext7_v4f32v8f32(
111+
; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
112+
; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
113+
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 7
114+
; CHECK-NEXT: ret <8 x float> [[R]]
115+
;
116+
%e = extractelement <4 x float> %x, i32 3
117+
%n = fneg float %e
118+
%r = insertelement <8 x float> %y, float %n, i32 7
119+
ret <8 x float> %r
120+
}
121+
70122
; Same as above with an extra use of the extracted element.
71123

72124
define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
@@ -91,6 +143,21 @@ define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
91143
ret <8 x float> %r
92144
}
93145

146+
define <8 x float> @ext7_v4f32v8f32_use1(<4 x float> %x, <8 x float> %y) {
147+
; CHECK-LABEL: @ext7_v4f32v8f32_use1(
148+
; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
149+
; CHECK-NEXT: call void @use(float [[E]])
150+
; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
151+
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3
152+
; CHECK-NEXT: ret <8 x float> [[R]]
153+
;
154+
%e = extractelement <4 x float> %x, i32 3
155+
call void @use(float %e)
156+
%n = fneg float %e
157+
%r = insertelement <8 x float> %y, float %n, i32 3
158+
ret <8 x float> %r
159+
}
160+
94161
; Negative test - the transform is likely not profitable if the fneg has another use.
95162

96163
define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) {
@@ -108,6 +175,21 @@ define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) {
108175
ret <8 x float> %r
109176
}
110177

178+
define <8 x float> @ext7_v4f32v8f32_use2(<4 x float> %x, <8 x float> %y) {
179+
; CHECK-LABEL: @ext7_v4f32v8f32_use2(
180+
; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
181+
; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
182+
; CHECK-NEXT: call void @use(float [[N]])
183+
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3
184+
; CHECK-NEXT: ret <8 x float> [[R]]
185+
;
186+
%e = extractelement <4 x float> %x, i32 3
187+
%n = fneg float %e
188+
call void @use(float %n)
189+
%r = insertelement <8 x float> %y, float %n, i32 3
190+
ret <8 x float> %r
191+
}
192+
111193
; Negative test - can't convert variable index to a shuffle.
112194

113195
define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %index) {
@@ -123,6 +205,19 @@ define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %
123205
ret <2 x double> %r
124206
}
125207

208+
define <4 x double> @ext_index_var_v2f64v4f64(<2 x double> %x, <4 x double> %y, i32 %index) {
209+
; CHECK-LABEL: @ext_index_var_v2f64v4f64(
210+
; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 [[INDEX:%.*]]
211+
; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
212+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 [[INDEX]]
213+
; CHECK-NEXT: ret <4 x double> [[R]]
214+
;
215+
%e = extractelement <2 x double> %x, i32 %index
216+
%n = fneg nsz double %e
217+
%r = insertelement <4 x double> %y, double %n, i32 %index
218+
ret <4 x double> %r
219+
}
220+
126221
; Negative test - require same extract/insert index for simple shuffle.
127222
; TODO: We could handle this by adjusting the cost calculation.
128223

@@ -139,6 +234,19 @@ define <2 x double> @ext1_v2f64_ins0(<2 x double> %x, <2 x double> %y) {
139234
ret <2 x double> %r
140235
}
141236

237+
define <4 x double> @ext1_v2f64v4f64_ins0(<2 x double> %x, <4 x double> %y) {
238+
; CHECK-LABEL: @ext1_v2f64v4f64_ins0(
239+
; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
240+
; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
241+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 0
242+
; CHECK-NEXT: ret <4 x double> [[R]]
243+
;
244+
%e = extractelement <2 x double> %x, i32 1
245+
%n = fneg nsz double %e
246+
%r = insertelement <4 x double> %y, double %n, i32 0
247+
ret <4 x double> %r
248+
}
249+
142250
; Negative test - avoid changing poison ops
143251

144252
define <4 x float> @ext12_v4f32(<4 x float> %x, <4 x float> %y) {
@@ -154,6 +262,19 @@ define <4 x float> @ext12_v4f32(<4 x float> %x, <4 x float> %y) {
154262
ret <4 x float> %r
155263
}
156264

265+
define <4 x float> @ext12_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
266+
; CHECK-LABEL: @ext12_v2f32v4f32(
267+
; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 6
268+
; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
269+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 12
270+
; CHECK-NEXT: ret <4 x float> [[R]]
271+
;
272+
%e = extractelement <2 x float> %x, i32 6
273+
%n = fneg float %e
274+
%r = insertelement <4 x float> %y, float %n, i32 12
275+
ret <4 x float> %r
276+
}
277+
157278
; This used to crash because we assumed matching a true, unary fneg instruction.
158279

159280
define <2 x float> @ext1_v2f32_fsub(<2 x float> %x) {
@@ -181,3 +302,16 @@ define <2 x float> @ext1_v2f32_fsub_fmf(<2 x float> %x, <2 x float> %y) {
181302
%r = insertelement <2 x float> %y, float %s, i32 1
182303
ret <2 x float> %r
183304
}
305+
306+
define <4 x float> @ext1_v2f32v4f32_fsub_fmf(<2 x float> %x, <4 x float> %y) {
307+
; CHECK-LABEL: @ext1_v2f32v4f32_fsub_fmf(
308+
; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
309+
; CHECK-NEXT: [[S:%.*]] = fsub nnan nsz float 0.000000e+00, [[E]]
310+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[S]], i32 1
311+
; CHECK-NEXT: ret <4 x float> [[R]]
312+
;
313+
%e = extractelement <2 x float> %x, i32 1
314+
%s = fsub nsz nnan float 0.0, %e
315+
%r = insertelement <4 x float> %y, float %s, i32 1
316+
ret <4 x float> %r
317+
}

0 commit comments

Comments
 (0)