@@ -226,9 +226,8 @@ define void @load_i32_stride8_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
226
226
; AVX512-FCP-NEXT: vmovaps (%rdi), %ymm4
227
227
; AVX512-FCP-NEXT: vunpcklps {{.*#+}} ymm5 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[4],ymm1[4],ymm4[5],ymm1[5]
228
228
; AVX512-FCP-NEXT: vextractf128 $1, %ymm5, %xmm5
229
- ; AVX512-FCP-NEXT: vpmovsxbd {{.*#+}} ymm6 = [0,0,0,0,5,13,5,5]
229
+ ; AVX512-FCP-NEXT: vmovaps {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm6
230
230
; AVX512-FCP-NEXT: vpermps (%rdi), %zmm6, %zmm6
231
- ; AVX512-FCP-NEXT: vextractf128 $1, %ymm6, %xmm6
232
231
; AVX512-FCP-NEXT: vunpckhps {{.*#+}} ymm1 = ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[6],ymm1[6],ymm4[7],ymm1[7]
233
232
; AVX512-FCP-NEXT: vextractf128 $1, %ymm1, %xmm4
234
233
; AVX512-FCP-NEXT: vshufps {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
@@ -292,9 +291,8 @@ define void @load_i32_stride8_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
292
291
; AVX512DQ-FCP-NEXT: vmovaps (%rdi), %ymm4
293
292
; AVX512DQ-FCP-NEXT: vunpcklps {{.*#+}} ymm5 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[4],ymm1[4],ymm4[5],ymm1[5]
294
293
; AVX512DQ-FCP-NEXT: vextractf128 $1, %ymm5, %xmm5
295
- ; AVX512DQ-FCP-NEXT: vpmovsxbd {{.*#+}} ymm6 = [0,0,0,0,5,13,5,5]
294
+ ; AVX512DQ-FCP-NEXT: vmovaps {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm6
296
295
; AVX512DQ-FCP-NEXT: vpermps (%rdi), %zmm6, %zmm6
297
- ; AVX512DQ-FCP-NEXT: vextractf128 $1, %ymm6, %xmm6
298
296
; AVX512DQ-FCP-NEXT: vunpckhps {{.*#+}} ymm1 = ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[6],ymm1[6],ymm4[7],ymm1[7]
299
297
; AVX512DQ-FCP-NEXT: vextractf128 $1, %ymm1, %xmm4
300
298
; AVX512DQ-FCP-NEXT: vshufps {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
@@ -358,9 +356,8 @@ define void @load_i32_stride8_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
358
356
; AVX512BW-FCP-NEXT: vmovaps (%rdi), %ymm4
359
357
; AVX512BW-FCP-NEXT: vunpcklps {{.*#+}} ymm5 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[4],ymm1[4],ymm4[5],ymm1[5]
360
358
; AVX512BW-FCP-NEXT: vextractf128 $1, %ymm5, %xmm5
361
- ; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} ymm6 = [0,0,0,0,5,13,5,5]
359
+ ; AVX512BW-FCP-NEXT: vmovaps {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm6
362
360
; AVX512BW-FCP-NEXT: vpermps (%rdi), %zmm6, %zmm6
363
- ; AVX512BW-FCP-NEXT: vextractf128 $1, %ymm6, %xmm6
364
361
; AVX512BW-FCP-NEXT: vunpckhps {{.*#+}} ymm1 = ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[6],ymm1[6],ymm4[7],ymm1[7]
365
362
; AVX512BW-FCP-NEXT: vextractf128 $1, %ymm1, %xmm4
366
363
; AVX512BW-FCP-NEXT: vshufps {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
@@ -424,9 +421,8 @@ define void @load_i32_stride8_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
424
421
; AVX512DQ-BW-FCP-NEXT: vmovaps (%rdi), %ymm4
425
422
; AVX512DQ-BW-FCP-NEXT: vunpcklps {{.*#+}} ymm5 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[4],ymm1[4],ymm4[5],ymm1[5]
426
423
; AVX512DQ-BW-FCP-NEXT: vextractf128 $1, %ymm5, %xmm5
427
- ; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} ymm6 = [0,0,0,0,5,13,5,5]
424
+ ; AVX512DQ-BW-FCP-NEXT: vmovaps {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm6
428
425
; AVX512DQ-BW-FCP-NEXT: vpermps (%rdi), %zmm6, %zmm6
429
- ; AVX512DQ-BW-FCP-NEXT: vextractf128 $1, %ymm6, %xmm6
430
426
; AVX512DQ-BW-FCP-NEXT: vunpckhps {{.*#+}} ymm1 = ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[6],ymm1[6],ymm4[7],ymm1[7]
431
427
; AVX512DQ-BW-FCP-NEXT: vextractf128 $1, %ymm1, %xmm4
432
428
; AVX512DQ-BW-FCP-NEXT: vshufps {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
0 commit comments