@@ -389,57 +389,28 @@ define <32 x i8> @trunc_shuffle_v32i16_v32i8_ofs1(<32 x i16> %a0) {
389
389
; AVX512VL-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
390
390
; AVX512VL-FAST-PERLANE-NEXT: retq
391
391
;
392
- ; AVX512BW-FAST-ALL-LABEL: trunc_shuffle_v32i16_v32i8_ofs1:
393
- ; AVX512BW-FAST-ALL: # %bb.0:
394
- ; AVX512BW-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm1 = [0,2,9,11]
395
- ; AVX512BW-FAST-ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
396
- ; AVX512BW-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,u,u,1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31]
397
- ; AVX512BW-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31,u,u,u,u,u,u,u,u]
398
- ; AVX512BW-FAST-ALL-NEXT: vpermt2q %zmm2, %zmm1, %zmm0
399
- ; AVX512BW-FAST-ALL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
400
- ; AVX512BW-FAST-ALL-NEXT: retq
401
- ;
402
- ; AVX512BW-FAST-PERLANE-LABEL: trunc_shuffle_v32i16_v32i8_ofs1:
403
- ; AVX512BW-FAST-PERLANE: # %bb.0:
404
- ; AVX512BW-FAST-PERLANE-NEXT: vextracti64x4 $1, %zmm0, %ymm1
405
- ; AVX512BW-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,3,5,7,9,11,13,15,1,3,5,7,9,11,13,15,1,3,5,7,9,11,13,15,1,3,5,7,9,11,13,15]
406
- ; AVX512BW-FAST-PERLANE-NEXT: vpshufb %ymm2, %ymm1, %ymm1
407
- ; AVX512BW-FAST-PERLANE-NEXT: vpshufb %ymm2, %ymm0, %ymm0
408
- ; AVX512BW-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
409
- ; AVX512BW-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
410
- ; AVX512BW-FAST-PERLANE-NEXT: retq
411
- ;
412
- ; AVX512BWVL-FAST-ALL-LABEL: trunc_shuffle_v32i16_v32i8_ofs1:
413
- ; AVX512BWVL-FAST-ALL: # %bb.0:
414
- ; AVX512BWVL-FAST-ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
415
- ; AVX512BWVL-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31]
416
- ; AVX512BWVL-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31,u,u,u,u,u,u,u,u]
417
- ; AVX512BWVL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm0 = [0,2,5,7]
418
- ; AVX512BWVL-FAST-ALL-NEXT: vpermi2q %ymm1, %ymm2, %ymm0
419
- ; AVX512BWVL-FAST-ALL-NEXT: retq
392
+ ; AVX512BW-LABEL: trunc_shuffle_v32i16_v32i8_ofs1:
393
+ ; AVX512BW: # %bb.0:
394
+ ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
395
+ ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
396
+ ; AVX512BW-NEXT: retq
420
397
;
421
- ; AVX512BWVL-FAST-PERLANE-LABEL: trunc_shuffle_v32i16_v32i8_ofs1:
422
- ; AVX512BWVL-FAST-PERLANE: # %bb.0:
423
- ; AVX512BWVL-FAST-PERLANE-NEXT: vextracti64x4 $1, %zmm0, %ymm1
424
- ; AVX512BWVL-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,3,5,7,9,11,13,15,1,3,5,7,9,11,13,15,1,3,5,7,9,11,13,15,1,3,5,7,9,11,13,15]
425
- ; AVX512BWVL-FAST-PERLANE-NEXT: vpshufb %ymm2, %ymm1, %ymm1
426
- ; AVX512BWVL-FAST-PERLANE-NEXT: vpshufb %ymm2, %ymm0, %ymm0
427
- ; AVX512BWVL-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
428
- ; AVX512BWVL-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
429
- ; AVX512BWVL-FAST-PERLANE-NEXT: retq
398
+ ; AVX512BWVL-LABEL: trunc_shuffle_v32i16_v32i8_ofs1:
399
+ ; AVX512BWVL: # %bb.0:
400
+ ; AVX512BWVL-NEXT: vpsrlw $8, %zmm0, %zmm0
401
+ ; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
402
+ ; AVX512BWVL-NEXT: retq
430
403
;
431
404
; AVX512VBMI-LABEL: trunc_shuffle_v32i16_v32i8_ofs1:
432
405
; AVX512VBMI: # %bb.0:
433
- ; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,5,7,9,11,13,15,97,99,101,103,105,107,109,111,17,19,21,23,25,27,29,31,113,115,117,119,121,123,125,127]
434
- ; AVX512VBMI-NEXT: vpermi2b %zmm0, %zmm0, %zmm1
435
- ; AVX512VBMI-NEXT: vpermq {{.*#+}} ymm0 = ymm1[0,2,1,3]
406
+ ; AVX512VBMI-NEXT: vpsrlw $8, %zmm0, %zmm0
407
+ ; AVX512VBMI-NEXT: vpmovwb %zmm0, %ymm0
436
408
; AVX512VBMI-NEXT: retq
437
409
;
438
410
; AVX512VBMIVL-LABEL: trunc_shuffle_v32i16_v32i8_ofs1:
439
411
; AVX512VBMIVL: # %bb.0:
440
- ; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63]
441
- ; AVX512VBMIVL-NEXT: vpermb %zmm0, %zmm1, %zmm0
442
- ; AVX512VBMIVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
412
+ ; AVX512VBMIVL-NEXT: vpsrlw $8, %zmm0, %zmm0
413
+ ; AVX512VBMIVL-NEXT: vpmovwb %zmm0, %ymm0
443
414
; AVX512VBMIVL-NEXT: retq
444
415
%bc = bitcast <32 x i16 > %a0 to <64 x i8 >
445
416
%res = shufflevector <64 x i8 > %bc , <64 x i8 > poison, <32 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 , i32 17 , i32 19 , i32 21 , i32 23 , i32 25 , i32 27 , i32 29 , i32 31 , i32 33 , i32 35 , i32 37 , i32 39 , i32 41 , i32 43 , i32 45 , i32 47 , i32 49 , i32 51 , i32 53 , i32 55 , i32 57 , i32 59 , i32 61 , i32 63 >
@@ -523,3 +494,8 @@ define <16 x i8> @trunc_v8i64_to_v8i8_return_v16i8(<8 x i64> %vec) nounwind {
523
494
ret <16 x i8 > %result
524
495
}
525
496
497
+ ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
498
+ ; AVX512BW-FAST-ALL: {{.*}}
499
+ ; AVX512BW-FAST-PERLANE: {{.*}}
500
+ ; AVX512BWVL-FAST-ALL: {{.*}}
501
+ ; AVX512BWVL-FAST-PERLANE: {{.*}}
0 commit comments