@@ -138,13 +138,13 @@ define void @store_i64_stride6_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
138
138
; AVX512-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
139
139
; AVX512-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
140
140
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
141
- ; AVX512-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm1
142
- ; AVX512-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,7 ,9,11]
143
- ; AVX512-NEXT: vpermi2q %zmm1 , %zmm0 , %zmm2
144
- ; AVX512-NEXT: vpmovsxbq {{.*#+}} zmm3 = [0,2,4,6,8,10,1,3]
145
- ; AVX512-NEXT: vpermi2q %zmm1 , %zmm0, %zmm3
146
- ; AVX512-NEXT: vmovdqa64 %zmm3, (%rax)
147
- ; AVX512-NEXT: vmovdqa %ymm2, 64 (%rax)
141
+ ; AVX512-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm2
142
+ ; AVX512-NEXT: vpmovsxbq {{.*#+}} ymm3 = [1,3 ,9,11]
143
+ ; AVX512-NEXT: vpermi2q %zmm2 , %zmm1 , %zmm3
144
+ ; AVX512-NEXT: vpmovsxbq {{.*#+}} zmm1 = [0,2,4,6,8,10,1,3]
145
+ ; AVX512-NEXT: vpermi2q %zmm2 , %zmm0, %zmm1
146
+ ; AVX512-NEXT: vmovdqa %ymm3, 64 (%rax)
147
+ ; AVX512-NEXT: vmovdqa64 %zmm1, (%rax)
148
148
; AVX512-NEXT: vzeroupper
149
149
; AVX512-NEXT: retq
150
150
;
@@ -157,13 +157,13 @@ define void @store_i64_stride6_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
157
157
; AVX512-FCP-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
158
158
; AVX512-FCP-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
159
159
; AVX512-FCP-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
160
- ; AVX512-FCP-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm1
161
- ; AVX512-FCP-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,7 ,9,11]
162
- ; AVX512-FCP-NEXT: vpermi2q %zmm1 , %zmm0 , %zmm2
163
- ; AVX512-FCP-NEXT: vpmovsxbq {{.*#+}} zmm3 = [0,2,4,6,8,10,1,3]
164
- ; AVX512-FCP-NEXT: vpermi2q %zmm1 , %zmm0, %zmm3
165
- ; AVX512-FCP-NEXT: vmovdqa64 %zmm3, (%rax)
166
- ; AVX512-FCP-NEXT: vmovdqa %ymm2, 64 (%rax)
160
+ ; AVX512-FCP-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm2
161
+ ; AVX512-FCP-NEXT: vpmovsxbq {{.*#+}} ymm3 = [1,3 ,9,11]
162
+ ; AVX512-FCP-NEXT: vpermi2q %zmm2 , %zmm1 , %zmm3
163
+ ; AVX512-FCP-NEXT: vpmovsxbq {{.*#+}} zmm1 = [0,2,4,6,8,10,1,3]
164
+ ; AVX512-FCP-NEXT: vpermi2q %zmm2 , %zmm0, %zmm1
165
+ ; AVX512-FCP-NEXT: vmovdqa %ymm3, 64 (%rax)
166
+ ; AVX512-FCP-NEXT: vmovdqa64 %zmm1, (%rax)
167
167
; AVX512-FCP-NEXT: vzeroupper
168
168
; AVX512-FCP-NEXT: retq
169
169
;
@@ -176,13 +176,13 @@ define void @store_i64_stride6_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
176
176
; AVX512DQ-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
177
177
; AVX512DQ-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
178
178
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
179
- ; AVX512DQ-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm1
180
- ; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,7 ,9,11]
181
- ; AVX512DQ-NEXT: vpermi2q %zmm1 , %zmm0 , %zmm2
182
- ; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm3 = [0,2,4,6,8,10,1,3]
183
- ; AVX512DQ-NEXT: vpermi2q %zmm1 , %zmm0, %zmm3
184
- ; AVX512DQ-NEXT: vmovdqa64 %zmm3, (%rax)
185
- ; AVX512DQ-NEXT: vmovdqa %ymm2, 64 (%rax)
179
+ ; AVX512DQ-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm2
180
+ ; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} ymm3 = [1,3 ,9,11]
181
+ ; AVX512DQ-NEXT: vpermi2q %zmm2 , %zmm1 , %zmm3
182
+ ; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm1 = [0,2,4,6,8,10,1,3]
183
+ ; AVX512DQ-NEXT: vpermi2q %zmm2 , %zmm0, %zmm1
184
+ ; AVX512DQ-NEXT: vmovdqa %ymm3, 64 (%rax)
185
+ ; AVX512DQ-NEXT: vmovdqa64 %zmm1, (%rax)
186
186
; AVX512DQ-NEXT: vzeroupper
187
187
; AVX512DQ-NEXT: retq
188
188
;
@@ -195,13 +195,13 @@ define void @store_i64_stride6_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
195
195
; AVX512DQ-FCP-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
196
196
; AVX512DQ-FCP-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
197
197
; AVX512DQ-FCP-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
198
- ; AVX512DQ-FCP-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm1
199
- ; AVX512DQ-FCP-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,7 ,9,11]
200
- ; AVX512DQ-FCP-NEXT: vpermi2q %zmm1 , %zmm0 , %zmm2
201
- ; AVX512DQ-FCP-NEXT: vpmovsxbq {{.*#+}} zmm3 = [0,2,4,6,8,10,1,3]
202
- ; AVX512DQ-FCP-NEXT: vpermi2q %zmm1 , %zmm0, %zmm3
203
- ; AVX512DQ-FCP-NEXT: vmovdqa64 %zmm3, (%rax)
204
- ; AVX512DQ-FCP-NEXT: vmovdqa %ymm2, 64 (%rax)
198
+ ; AVX512DQ-FCP-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm2
199
+ ; AVX512DQ-FCP-NEXT: vpmovsxbq {{.*#+}} ymm3 = [1,3 ,9,11]
200
+ ; AVX512DQ-FCP-NEXT: vpermi2q %zmm2 , %zmm1 , %zmm3
201
+ ; AVX512DQ-FCP-NEXT: vpmovsxbq {{.*#+}} zmm1 = [0,2,4,6,8,10,1,3]
202
+ ; AVX512DQ-FCP-NEXT: vpermi2q %zmm2 , %zmm0, %zmm1
203
+ ; AVX512DQ-FCP-NEXT: vmovdqa %ymm3, 64 (%rax)
204
+ ; AVX512DQ-FCP-NEXT: vmovdqa64 %zmm1, (%rax)
205
205
; AVX512DQ-FCP-NEXT: vzeroupper
206
206
; AVX512DQ-FCP-NEXT: retq
207
207
;
@@ -214,13 +214,13 @@ define void @store_i64_stride6_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
214
214
; AVX512BW-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
215
215
; AVX512BW-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
216
216
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
217
- ; AVX512BW-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm1
218
- ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,7 ,9,11]
219
- ; AVX512BW-NEXT: vpermi2q %zmm1 , %zmm0 , %zmm2
220
- ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm3 = [0,2,4,6,8,10,1,3]
221
- ; AVX512BW-NEXT: vpermi2q %zmm1 , %zmm0, %zmm3
222
- ; AVX512BW-NEXT: vmovdqa64 %zmm3, (%rax)
223
- ; AVX512BW-NEXT: vmovdqa %ymm2, 64 (%rax)
217
+ ; AVX512BW-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm2
218
+ ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} ymm3 = [1,3 ,9,11]
219
+ ; AVX512BW-NEXT: vpermi2q %zmm2 , %zmm1 , %zmm3
220
+ ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm1 = [0,2,4,6,8,10,1,3]
221
+ ; AVX512BW-NEXT: vpermi2q %zmm2 , %zmm0, %zmm1
222
+ ; AVX512BW-NEXT: vmovdqa %ymm3, 64 (%rax)
223
+ ; AVX512BW-NEXT: vmovdqa64 %zmm1, (%rax)
224
224
; AVX512BW-NEXT: vzeroupper
225
225
; AVX512BW-NEXT: retq
226
226
;
@@ -233,13 +233,13 @@ define void @store_i64_stride6_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
233
233
; AVX512BW-FCP-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
234
234
; AVX512BW-FCP-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
235
235
; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
236
- ; AVX512BW-FCP-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm1
237
- ; AVX512BW-FCP-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,7 ,9,11]
238
- ; AVX512BW-FCP-NEXT: vpermi2q %zmm1 , %zmm0 , %zmm2
239
- ; AVX512BW-FCP-NEXT: vpmovsxbq {{.*#+}} zmm3 = [0,2,4,6,8,10,1,3]
240
- ; AVX512BW-FCP-NEXT: vpermi2q %zmm1 , %zmm0, %zmm3
241
- ; AVX512BW-FCP-NEXT: vmovdqa64 %zmm3, (%rax)
242
- ; AVX512BW-FCP-NEXT: vmovdqa %ymm2, 64 (%rax)
236
+ ; AVX512BW-FCP-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm2
237
+ ; AVX512BW-FCP-NEXT: vpmovsxbq {{.*#+}} ymm3 = [1,3 ,9,11]
238
+ ; AVX512BW-FCP-NEXT: vpermi2q %zmm2 , %zmm1 , %zmm3
239
+ ; AVX512BW-FCP-NEXT: vpmovsxbq {{.*#+}} zmm1 = [0,2,4,6,8,10,1,3]
240
+ ; AVX512BW-FCP-NEXT: vpermi2q %zmm2 , %zmm0, %zmm1
241
+ ; AVX512BW-FCP-NEXT: vmovdqa %ymm3, 64 (%rax)
242
+ ; AVX512BW-FCP-NEXT: vmovdqa64 %zmm1, (%rax)
243
243
; AVX512BW-FCP-NEXT: vzeroupper
244
244
; AVX512BW-FCP-NEXT: retq
245
245
;
@@ -252,13 +252,13 @@ define void @store_i64_stride6_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
252
252
; AVX512DQ-BW-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
253
253
; AVX512DQ-BW-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
254
254
; AVX512DQ-BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
255
- ; AVX512DQ-BW-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm1
256
- ; AVX512DQ-BW-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,7 ,9,11]
257
- ; AVX512DQ-BW-NEXT: vpermi2q %zmm1 , %zmm0 , %zmm2
258
- ; AVX512DQ-BW-NEXT: vpmovsxbq {{.*#+}} zmm3 = [0,2,4,6,8,10,1,3]
259
- ; AVX512DQ-BW-NEXT: vpermi2q %zmm1 , %zmm0, %zmm3
260
- ; AVX512DQ-BW-NEXT: vmovdqa64 %zmm3, (%rax)
261
- ; AVX512DQ-BW-NEXT: vmovdqa %ymm2, 64 (%rax)
255
+ ; AVX512DQ-BW-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm2
256
+ ; AVX512DQ-BW-NEXT: vpmovsxbq {{.*#+}} ymm3 = [1,3 ,9,11]
257
+ ; AVX512DQ-BW-NEXT: vpermi2q %zmm2 , %zmm1 , %zmm3
258
+ ; AVX512DQ-BW-NEXT: vpmovsxbq {{.*#+}} zmm1 = [0,2,4,6,8,10,1,3]
259
+ ; AVX512DQ-BW-NEXT: vpermi2q %zmm2 , %zmm0, %zmm1
260
+ ; AVX512DQ-BW-NEXT: vmovdqa %ymm3, 64 (%rax)
261
+ ; AVX512DQ-BW-NEXT: vmovdqa64 %zmm1, (%rax)
262
262
; AVX512DQ-BW-NEXT: vzeroupper
263
263
; AVX512DQ-BW-NEXT: retq
264
264
;
@@ -271,13 +271,13 @@ define void @store_i64_stride6_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
271
271
; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
272
272
; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
273
273
; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
274
- ; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm1
275
- ; AVX512DQ-BW-FCP-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,7 ,9,11]
276
- ; AVX512DQ-BW-FCP-NEXT: vpermi2q %zmm1 , %zmm0 , %zmm2
277
- ; AVX512DQ-BW-FCP-NEXT: vpmovsxbq {{.*#+}} zmm3 = [0,2,4,6,8,10,1,3]
278
- ; AVX512DQ-BW-FCP-NEXT: vpermi2q %zmm1 , %zmm0, %zmm3
279
- ; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm3, (%rax)
280
- ; AVX512DQ-BW-FCP-NEXT: vmovdqa %ymm2, 64 (%rax)
274
+ ; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm2
275
+ ; AVX512DQ-BW-FCP-NEXT: vpmovsxbq {{.*#+}} ymm3 = [1,3 ,9,11]
276
+ ; AVX512DQ-BW-FCP-NEXT: vpermi2q %zmm2 , %zmm1 , %zmm3
277
+ ; AVX512DQ-BW-FCP-NEXT: vpmovsxbq {{.*#+}} zmm1 = [0,2,4,6,8,10,1,3]
278
+ ; AVX512DQ-BW-FCP-NEXT: vpermi2q %zmm2 , %zmm0, %zmm1
279
+ ; AVX512DQ-BW-FCP-NEXT: vmovdqa %ymm3, 64 (%rax)
280
+ ; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm1, (%rax)
281
281
; AVX512DQ-BW-FCP-NEXT: vzeroupper
282
282
; AVX512DQ-BW-FCP-NEXT: retq
283
283
%in.vec0 = load <2 x i64>, ptr %in.vecptr0, align 64
0 commit comments