@@ -18,19 +18,6 @@ define <4 x float> @ext0_v4f32(<4 x float> %x, <4 x float> %y) {
18
18
ret <4 x float > %r
19
19
}
20
20
21
- define <4 x float > @ext0_v2f32v4f32 (<2 x float > %x , <4 x float > %y ) {
22
- ; CHECK-LABEL: @ext0_v2f32v4f32(
23
- ; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
24
- ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
25
- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 0
26
- ; CHECK-NEXT: ret <4 x float> [[R]]
27
- ;
28
- %e = extractelement <2 x float > %x , i32 0
29
- %n = fneg float %e
30
- %r = insertelement <4 x float > %y , float %n , i32 0
31
- ret <4 x float > %r
32
- }
33
-
34
21
; Eliminating extract/insert is profitable.
35
22
36
23
define <4 x float > @ext2_v4f32 (<4 x float > %x , <4 x float > %y ) {
@@ -45,19 +32,6 @@ define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
45
32
ret <4 x float > %r
46
33
}
47
34
48
- define <4 x float > @ext2_v2f32v4f32 (<2 x float > %x , <4 x float > %y ) {
49
- ; CHECK-LABEL: @ext2_v2f32v4f32(
50
- ; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
51
- ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 poison>
52
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
53
- ; CHECK-NEXT: ret <4 x float> [[R]]
54
- ;
55
- %e = extractelement <2 x float > %x , i32 2
56
- %n = fneg float %e
57
- %r = insertelement <4 x float > %y , float %n , i32 2
58
- ret <4 x float > %r
59
- }
60
-
61
35
; Eliminating extract/insert is still profitable. Flags propagate.
62
36
63
37
define <2 x double > @ext1_v2f64 (<2 x double > %x , <2 x double > %y ) {
@@ -72,25 +46,6 @@ define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
72
46
ret <2 x double > %r
73
47
}
74
48
75
- define <4 x double > @ext1_v2f64v4f64 (<2 x double > %x , <4 x double > %y ) {
76
- ; SSE-LABEL: @ext1_v2f64v4f64(
77
- ; SSE-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
78
- ; SSE-NEXT: [[N:%.*]] = fneg nsz double [[E]]
79
- ; SSE-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 1
80
- ; SSE-NEXT: ret <4 x double> [[R]]
81
- ;
82
- ; AVX-LABEL: @ext1_v2f64v4f64(
83
- ; AVX-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]]
84
- ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
85
- ; AVX-NEXT: [[R:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
86
- ; AVX-NEXT: ret <4 x double> [[R]]
87
- ;
88
- %e = extractelement <2 x double > %x , i32 1
89
- %n = fneg nsz double %e
90
- %r = insertelement <4 x double > %y , double %n , i32 1
91
- ret <4 x double > %r
92
- }
93
-
94
49
; The vector fneg would cost twice as much as the scalar op with SSE,
95
50
; so we don't transform there (the shuffle would also be more expensive).
96
51
@@ -112,19 +67,6 @@ define <8 x float> @ext7_v8f32(<8 x float> %x, <8 x float> %y) {
112
67
ret <8 x float > %r
113
68
}
114
69
115
- define <8 x float > @ext7_v4f32v8f32 (<4 x float > %x , <8 x float > %y ) {
116
- ; CHECK-LABEL: @ext7_v4f32v8f32(
117
- ; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
118
- ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
119
- ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 7
120
- ; CHECK-NEXT: ret <8 x float> [[R]]
121
- ;
122
- %e = extractelement <4 x float > %x , i32 3
123
- %n = fneg float %e
124
- %r = insertelement <8 x float > %y , float %n , i32 7
125
- ret <8 x float > %r
126
- }
127
-
128
70
; Same as above with an extra use of the extracted element.
129
71
130
72
define <8 x float > @ext7_v8f32_use1 (<8 x float > %x , <8 x float > %y ) {
@@ -149,21 +91,6 @@ define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
149
91
ret <8 x float > %r
150
92
}
151
93
152
- define <8 x float > @ext7_v4f32v8f32_use1 (<4 x float > %x , <8 x float > %y ) {
153
- ; CHECK-LABEL: @ext7_v4f32v8f32_use1(
154
- ; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
155
- ; CHECK-NEXT: call void @use(float [[E]])
156
- ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
157
- ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3
158
- ; CHECK-NEXT: ret <8 x float> [[R]]
159
- ;
160
- %e = extractelement <4 x float > %x , i32 3
161
- call void @use (float %e )
162
- %n = fneg float %e
163
- %r = insertelement <8 x float > %y , float %n , i32 3
164
- ret <8 x float > %r
165
- }
166
-
167
94
; Negative test - the transform is likely not profitable if the fneg has another use.
168
95
169
96
define <8 x float > @ext7_v8f32_use2 (<8 x float > %x , <8 x float > %y ) {
@@ -181,21 +108,6 @@ define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) {
181
108
ret <8 x float > %r
182
109
}
183
110
184
- define <8 x float > @ext7_v4f32v8f32_use2 (<4 x float > %x , <8 x float > %y ) {
185
- ; CHECK-LABEL: @ext7_v4f32v8f32_use2(
186
- ; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
187
- ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
188
- ; CHECK-NEXT: call void @use(float [[N]])
189
- ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3
190
- ; CHECK-NEXT: ret <8 x float> [[R]]
191
- ;
192
- %e = extractelement <4 x float > %x , i32 3
193
- %n = fneg float %e
194
- call void @use (float %n )
195
- %r = insertelement <8 x float > %y , float %n , i32 3
196
- ret <8 x float > %r
197
- }
198
-
199
111
; Negative test - can't convert variable index to a shuffle.
200
112
201
113
define <2 x double > @ext_index_var_v2f64 (<2 x double > %x , <2 x double > %y , i32 %index ) {
@@ -211,19 +123,6 @@ define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %
211
123
ret <2 x double > %r
212
124
}
213
125
214
- define <4 x double > @ext_index_var_v2f64v4f64 (<2 x double > %x , <4 x double > %y , i32 %index ) {
215
- ; CHECK-LABEL: @ext_index_var_v2f64v4f64(
216
- ; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 [[INDEX:%.*]]
217
- ; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
218
- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 [[INDEX]]
219
- ; CHECK-NEXT: ret <4 x double> [[R]]
220
- ;
221
- %e = extractelement <2 x double > %x , i32 %index
222
- %n = fneg nsz double %e
223
- %r = insertelement <4 x double > %y , double %n , i32 %index
224
- ret <4 x double > %r
225
- }
226
-
227
126
; Negative test - require same extract/insert index for simple shuffle.
228
127
; TODO: We could handle this by adjusting the cost calculation.
229
128
@@ -240,33 +139,6 @@ define <2 x double> @ext1_v2f64_ins0(<2 x double> %x, <2 x double> %y) {
240
139
ret <2 x double > %r
241
140
}
242
141
243
- ; Negative test - extract from an index greater than the vector width of the destination
244
- define <2 x double > @ext3_v4f64v2f64 (<4 x double > %x , <2 x double > %y ) {
245
- ; CHECK-LABEL: @ext3_v4f64v2f64(
246
- ; CHECK-NEXT: [[E:%.*]] = extractelement <4 x double> [[X:%.*]], i32 3
247
- ; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
248
- ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[Y:%.*]], double [[N]], i32 1
249
- ; CHECK-NEXT: ret <2 x double> [[R]]
250
- ;
251
- %e = extractelement <4 x double > %x , i32 3
252
- %n = fneg nsz double %e
253
- %r = insertelement <2 x double > %y , double %n , i32 1
254
- ret <2 x double > %r
255
- }
256
-
257
- define <4 x double > @ext1_v2f64v4f64_ins0 (<2 x double > %x , <4 x double > %y ) {
258
- ; CHECK-LABEL: @ext1_v2f64v4f64_ins0(
259
- ; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
260
- ; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
261
- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 0
262
- ; CHECK-NEXT: ret <4 x double> [[R]]
263
- ;
264
- %e = extractelement <2 x double > %x , i32 1
265
- %n = fneg nsz double %e
266
- %r = insertelement <4 x double > %y , double %n , i32 0
267
- ret <4 x double > %r
268
- }
269
-
270
142
; Negative test - avoid changing poison ops
271
143
272
144
define <4 x float > @ext12_v4f32 (<4 x float > %x , <4 x float > %y ) {
@@ -282,19 +154,6 @@ define <4 x float> @ext12_v4f32(<4 x float> %x, <4 x float> %y) {
282
154
ret <4 x float > %r
283
155
}
284
156
285
- define <4 x float > @ext12_v2f32v4f32 (<2 x float > %x , <4 x float > %y ) {
286
- ; CHECK-LABEL: @ext12_v2f32v4f32(
287
- ; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 6
288
- ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
289
- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 12
290
- ; CHECK-NEXT: ret <4 x float> [[R]]
291
- ;
292
- %e = extractelement <2 x float > %x , i32 6
293
- %n = fneg float %e
294
- %r = insertelement <4 x float > %y , float %n , i32 12
295
- ret <4 x float > %r
296
- }
297
-
298
157
; This used to crash because we assumed matching a true, unary fneg instruction.
299
158
300
159
define <2 x float > @ext1_v2f32_fsub (<2 x float > %x ) {
@@ -322,16 +181,3 @@ define <2 x float> @ext1_v2f32_fsub_fmf(<2 x float> %x, <2 x float> %y) {
322
181
%r = insertelement <2 x float > %y , float %s , i32 1
323
182
ret <2 x float > %r
324
183
}
325
-
326
- define <4 x float > @ext1_v2f32v4f32_fsub_fmf (<2 x float > %x , <4 x float > %y ) {
327
- ; CHECK-LABEL: @ext1_v2f32v4f32_fsub_fmf(
328
- ; CHECK-NEXT: [[TMP1:%.*]] = fneg nnan nsz <2 x float> [[X:%.*]]
329
- ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
330
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
331
- ; CHECK-NEXT: ret <4 x float> [[R]]
332
- ;
333
- %e = extractelement <2 x float > %x , i32 1
334
- %s = fsub nsz nnan float 0 .0 , %e
335
- %r = insertelement <4 x float > %y , float %s , i32 1
336
- ret <4 x float > %r
337
- }
0 commit comments