2
2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
3
3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL,AVX512VL-FAST-ALL
4
4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL,AVX512VL-FAST-PERLANE
5
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW,AVX512BW-FAST-ALL
6
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW,AVX512BW-FAST-PERLANE
7
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL,AVX512BWVL-FAST-ALL
8
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL,AVX512BWVL-FAST-PERLANE
5
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
6
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
7
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
8
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
9
9
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VBMI
10
10
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VBMI
11
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VBMIVL
12
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VBMIVL
11
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VBMI
12
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VBMI
13
13
14
14
; PR31551
15
15
; Pairs of shufflevector:trunc functions with functional equivalence.
@@ -74,13 +74,6 @@ define void @shuffle_v64i8_to_v32i8(ptr %L, ptr %S) nounwind {
74
74
; AVX512VBMI-NEXT: vpmovwb %zmm0, (%rsi)
75
75
; AVX512VBMI-NEXT: vzeroupper
76
76
; AVX512VBMI-NEXT: retq
77
- ;
78
- ; AVX512VBMIVL-LABEL: shuffle_v64i8_to_v32i8:
79
- ; AVX512VBMIVL: # %bb.0:
80
- ; AVX512VBMIVL-NEXT: vmovdqa64 (%rdi), %zmm0
81
- ; AVX512VBMIVL-NEXT: vpmovwb %zmm0, (%rsi)
82
- ; AVX512VBMIVL-NEXT: vzeroupper
83
- ; AVX512VBMIVL-NEXT: retq
84
77
%vec = load <64 x i8 >, ptr %L
85
78
%strided.vec = shufflevector <64 x i8 > %vec , <64 x i8 > undef , <32 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 , i32 16 , i32 18 , i32 20 , i32 22 , i32 24 , i32 26 , i32 28 , i32 30 , i32 32 , i32 34 , i32 36 , i32 38 , i32 40 , i32 42 , i32 44 , i32 46 , i32 48 , i32 50 , i32 52 , i32 54 , i32 56 , i32 58 , i32 60 , i32 62 >
86
79
store <32 x i8 > %strided.vec , ptr %S
@@ -126,13 +119,6 @@ define void @trunc_v32i16_to_v32i8(ptr %L, ptr %S) nounwind {
126
119
; AVX512VBMI-NEXT: vpmovwb %zmm0, (%rsi)
127
120
; AVX512VBMI-NEXT: vzeroupper
128
121
; AVX512VBMI-NEXT: retq
129
- ;
130
- ; AVX512VBMIVL-LABEL: trunc_v32i16_to_v32i8:
131
- ; AVX512VBMIVL: # %bb.0:
132
- ; AVX512VBMIVL-NEXT: vmovdqa64 (%rdi), %zmm0
133
- ; AVX512VBMIVL-NEXT: vpmovwb %zmm0, (%rsi)
134
- ; AVX512VBMIVL-NEXT: vzeroupper
135
- ; AVX512VBMIVL-NEXT: retq
136
122
%vec = load <64 x i8 >, ptr %L
137
123
%bc = bitcast <64 x i8 > %vec to <32 x i16 >
138
124
%strided.vec = trunc <32 x i16 > %bc to <32 x i8 >
@@ -346,14 +332,6 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_
346
332
; AVX512VBMI-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
347
333
; AVX512VBMI-NEXT: vzeroupper
348
334
; AVX512VBMI-NEXT: retq
349
- ;
350
- ; AVX512VBMIVL-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
351
- ; AVX512VBMIVL: # %bb.0:
352
- ; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,62]
353
- ; AVX512VBMIVL-NEXT: vpermb %zmm0, %zmm1, %zmm0
354
- ; AVX512VBMIVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
355
- ; AVX512VBMIVL-NEXT: vzeroupper
356
- ; AVX512VBMIVL-NEXT: retq
357
335
%res = shufflevector <64 x i8 > %x , <64 x i8 > %x , <16 x i32 > <i32 1 , i32 5 , i32 9 , i32 13 , i32 17 , i32 21 , i32 25 , i32 29 , i32 33 , i32 37 , i32 41 , i32 45 , i32 49 , i32 53 , i32 57 , i32 62 >
358
336
ret <16 x i8 > %res
359
337
}
@@ -406,12 +384,6 @@ define <32 x i8> @trunc_shuffle_v32i16_v32i8_ofs1(<32 x i16> %a0) {
406
384
; AVX512VBMI-NEXT: vpsrlw $8, %zmm0, %zmm0
407
385
; AVX512VBMI-NEXT: vpmovwb %zmm0, %ymm0
408
386
; AVX512VBMI-NEXT: retq
409
- ;
410
- ; AVX512VBMIVL-LABEL: trunc_shuffle_v32i16_v32i8_ofs1:
411
- ; AVX512VBMIVL: # %bb.0:
412
- ; AVX512VBMIVL-NEXT: vpsrlw $8, %zmm0, %zmm0
413
- ; AVX512VBMIVL-NEXT: vpmovwb %zmm0, %ymm0
414
- ; AVX512VBMIVL-NEXT: retq
415
387
%bc = bitcast <32 x i16 > %a0 to <64 x i8 >
416
388
%res = shufflevector <64 x i8 > %bc , <64 x i8 > poison, <32 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 , i32 17 , i32 19 , i32 21 , i32 23 , i32 25 , i32 27 , i32 29 , i32 31 , i32 33 , i32 35 , i32 37 , i32 39 , i32 41 , i32 43 , i32 45 , i32 47 , i32 49 , i32 51 , i32 53 , i32 55 , i32 57 , i32 59 , i32 61 , i32 63 >
417
389
ret <32 x i8 > %res
@@ -442,11 +414,9 @@ define <4 x double> @PR34175(ptr %p) {
442
414
;
443
415
; AVX512BW-LABEL: PR34175:
444
416
; AVX512BW: # %bb.0:
445
- ; AVX512BW-NEXT: vmovq {{.*#+}} xmm0 = [0,8,32,40,0,0,0,0]
446
- ; AVX512BW-NEXT: vmovdqu (%rdi), %ymm1
447
- ; AVX512BW-NEXT: vmovdqu 32(%rdi), %ymm2
448
- ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
449
- ; AVX512BW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
417
+ ; AVX512BW-NEXT: vmovq {{.*#+}} xmm0 = [0,8,16,24,0,0,0,0]
418
+ ; AVX512BW-NEXT: vpermw (%rdi), %zmm0, %zmm0
419
+ ; AVX512BW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
450
420
; AVX512BW-NEXT: vcvtdq2pd %xmm0, %ymm0
451
421
; AVX512BW-NEXT: retq
452
422
;
@@ -460,21 +430,11 @@ define <4 x double> @PR34175(ptr %p) {
460
430
;
461
431
; AVX512VBMI-LABEL: PR34175:
462
432
; AVX512VBMI: # %bb.0:
463
- ; AVX512VBMI-NEXT: vmovq {{.*#+}} xmm0 = [0,8,32,40,0,0,0,0]
464
- ; AVX512VBMI-NEXT: vmovdqu (%rdi), %ymm1
465
- ; AVX512VBMI-NEXT: vmovdqu 32(%rdi), %ymm2
466
- ; AVX512VBMI-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
467
- ; AVX512VBMI-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
433
+ ; AVX512VBMI-NEXT: vmovq {{.*#+}} xmm0 = [0,8,16,24,0,0,0,0]
434
+ ; AVX512VBMI-NEXT: vpermw (%rdi), %zmm0, %zmm0
435
+ ; AVX512VBMI-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
468
436
; AVX512VBMI-NEXT: vcvtdq2pd %xmm0, %ymm0
469
437
; AVX512VBMI-NEXT: retq
470
- ;
471
- ; AVX512VBMIVL-LABEL: PR34175:
472
- ; AVX512VBMIVL: # %bb.0:
473
- ; AVX512VBMIVL-NEXT: vmovq {{.*#+}} xmm0 = [0,8,16,24,0,0,0,0]
474
- ; AVX512VBMIVL-NEXT: vpermw (%rdi), %zmm0, %zmm0
475
- ; AVX512VBMIVL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
476
- ; AVX512VBMIVL-NEXT: vcvtdq2pd %xmm0, %ymm0
477
- ; AVX512VBMIVL-NEXT: retq
478
438
%v = load <32 x i16 >, ptr %p , align 2
479
439
%shuf = shufflevector <32 x i16 > %v , <32 x i16 > undef , <4 x i32 > <i32 0 , i32 8 , i32 16 , i32 24 >
480
440
%tofp = uitofp <4 x i16 > %shuf to <4 x double >
@@ -492,8 +452,3 @@ define <16 x i8> @trunc_v8i64_to_v8i8_return_v16i8(<8 x i64> %vec) nounwind {
492
452
ret <16 x i8 > %result
493
453
}
494
454
495
- ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
496
- ; AVX512BW-FAST-ALL: {{.*}}
497
- ; AVX512BW-FAST-PERLANE: {{.*}}
498
- ; AVX512BWVL-FAST-ALL: {{.*}}
499
- ; AVX512BWVL-FAST-PERLANE: {{.*}}
0 commit comments