Skip to content

Commit 0662791

Browse files
committed
[X86] vector-interleaved tests - add AVX512-SLOW/AVX512-FAST common prefixes to reduce duplication
These aren't always used but its lot more manageable to keep the vector-interleaved files using the same RUN lines wherever possible
1 parent 85ec68d commit 0662791

File tree

56 files changed

+1471
-2196
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+1471
-2196
lines changed

llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-2.ll

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1,AVX2,AVX2-ONLY,AVX2-SLOW,FALLBACK2
55
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX1,AVX2,AVX2-ONLY,AVX2-FAST,FALLBACK3
66
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX1,AVX2,AVX2-ONLY,AVX2-FAST-PERLANE,FALLBACK4
7-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512F-SLOW,AVX512F-ONLY-SLOW,FALLBACK5
8-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512F-FAST,AVX512F-ONLY-FAST,FALLBACK6
9-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512F-SLOW,AVX512DQ-SLOW,FALLBACK7
10-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512F-FAST,AVX512DQ-FAST,FALLBACK8
11-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512BW-SLOW,AVX512BW-ONLY-SLOW,FALLBACK9
12-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512BW-FAST,AVX512BW-ONLY-FAST,FALLBACK10
13-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512BW-SLOW,AVX512DQBW-SLOW,FALLBACK11
14-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512BW-FAST,AVX512DQBW-FAST,FALLBACK12
7+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512-SLOW,AVX512F-SLOW,AVX512F-ONLY-SLOW,FALLBACK5
8+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512-FAST,AVX512F-FAST,AVX512F-ONLY-FAST,FALLBACK6
9+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512-SLOW,AVX512F-SLOW,AVX512DQ-SLOW,FALLBACK7
10+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512-FAST,AVX512F-FAST,AVX512DQ-FAST,FALLBACK8
11+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512-SLOW,AVX512BW-SLOW,AVX512BW-ONLY-SLOW,FALLBACK9
12+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512-FAST,AVX512BW-FAST,AVX512BW-ONLY-FAST,FALLBACK10
13+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512-SLOW,AVX512BW-SLOW,AVX512DQBW-SLOW,FALLBACK11
14+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512-FAST,AVX512BW-FAST,AVX512DQBW-FAST,FALLBACK12
1515

1616
; These patterns are produced by LoopVectorizer for interleaved loads.
1717

@@ -965,6 +965,8 @@ define void @load_i16_stride2_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no
965965
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
966966
; AVX2: {{.*}}
967967
; AVX2-ONLY: {{.*}}
968+
; AVX512-FAST: {{.*}}
969+
; AVX512-SLOW: {{.*}}
968970
; AVX512BW-FAST: {{.*}}
969971
; AVX512BW-ONLY-FAST: {{.*}}
970972
; AVX512BW-ONLY-SLOW: {{.*}}

llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-3.ll

Lines changed: 30 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1,AVX2,AVX2-ONLY,AVX2-SLOW,FALLBACK2
55
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX1,AVX2,AVX2-ONLY,AVX2-FAST,FALLBACK3
66
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX1,AVX2,AVX2-ONLY,AVX2-FAST-PERLANE,FALLBACK4
7-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512F-SLOW,AVX512F-ONLY-SLOW,FALLBACK5
8-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512F-FAST,AVX512F-ONLY-FAST,FALLBACK6
9-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512F-SLOW,AVX512DQ-SLOW,FALLBACK7
10-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512F-FAST,AVX512DQ-FAST,FALLBACK8
11-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512BW-SLOW,AVX512BW-ONLY-SLOW,FALLBACK9
12-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512BW-FAST,AVX512BW-ONLY-FAST,FALLBACK10
13-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512BW-SLOW,AVX512DQBW-SLOW,FALLBACK11
14-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512BW-FAST,AVX512DQBW-FAST,FALLBACK12
7+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512-SLOW,AVX512F-SLOW,AVX512F-ONLY-SLOW,FALLBACK5
8+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512-FAST,AVX512F-FAST,AVX512F-ONLY-FAST,FALLBACK6
9+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512-SLOW,AVX512F-SLOW,AVX512DQ-SLOW,FALLBACK7
10+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512-FAST,AVX512F-FAST,AVX512DQ-FAST,FALLBACK8
11+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512-SLOW,AVX512BW-SLOW,AVX512BW-ONLY-SLOW,FALLBACK9
12+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512-FAST,AVX512BW-FAST,AVX512BW-ONLY-FAST,FALLBACK10
13+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512-SLOW,AVX512BW-SLOW,AVX512DQBW-SLOW,FALLBACK11
14+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512-FAST,AVX512BW-FAST,AVX512DQBW-FAST,FALLBACK12
1515

1616
; These patterns are produced by LoopVectorizer for interleaved loads.
1717

@@ -77,53 +77,29 @@ define void @load_i16_stride3_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
7777
; AVX2-FAST-PERLANE-NEXT: vmovd %xmm0, (%rcx)
7878
; AVX2-FAST-PERLANE-NEXT: retq
7979
;
80-
; AVX512F-SLOW-LABEL: load_i16_stride3_vf2:
81-
; AVX512F-SLOW: # %bb.0:
82-
; AVX512F-SLOW-NEXT: vmovdqa (%rdi), %xmm0
83-
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,3,2,3,4,5,6,7]
84-
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
85-
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[1,2,2,3,4,5,6,7]
86-
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
87-
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
88-
; AVX512F-SLOW-NEXT: vmovd %xmm1, (%rsi)
89-
; AVX512F-SLOW-NEXT: vmovd %xmm2, (%rdx)
90-
; AVX512F-SLOW-NEXT: vmovd %xmm0, (%rcx)
91-
; AVX512F-SLOW-NEXT: retq
80+
; AVX512-SLOW-LABEL: load_i16_stride3_vf2:
81+
; AVX512-SLOW: # %bb.0:
82+
; AVX512-SLOW-NEXT: vmovdqa (%rdi), %xmm0
83+
; AVX512-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,3,2,3,4,5,6,7]
84+
; AVX512-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
85+
; AVX512-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[1,2,2,3,4,5,6,7]
86+
; AVX512-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
87+
; AVX512-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
88+
; AVX512-SLOW-NEXT: vmovd %xmm1, (%rsi)
89+
; AVX512-SLOW-NEXT: vmovd %xmm2, (%rdx)
90+
; AVX512-SLOW-NEXT: vmovd %xmm0, (%rcx)
91+
; AVX512-SLOW-NEXT: retq
9292
;
93-
; AVX512F-FAST-LABEL: load_i16_stride3_vf2:
94-
; AVX512F-FAST: # %bb.0:
95-
; AVX512F-FAST-NEXT: vmovdqa (%rdi), %xmm0
96-
; AVX512F-FAST-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,3,2,3,4,5,6,7]
97-
; AVX512F-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,3,8,9,u,u,u,u,u,u,u,u,u,u,u,u]
98-
; AVX512F-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,10,11,u,u,u,u,u,u,u,u,u,u,u,u]
99-
; AVX512F-FAST-NEXT: vmovd %xmm1, (%rsi)
100-
; AVX512F-FAST-NEXT: vmovd %xmm2, (%rdx)
101-
; AVX512F-FAST-NEXT: vmovd %xmm0, (%rcx)
102-
; AVX512F-FAST-NEXT: retq
103-
;
104-
; AVX512BW-SLOW-LABEL: load_i16_stride3_vf2:
105-
; AVX512BW-SLOW: # %bb.0:
106-
; AVX512BW-SLOW-NEXT: vmovdqa (%rdi), %xmm0
107-
; AVX512BW-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,3,2,3,4,5,6,7]
108-
; AVX512BW-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
109-
; AVX512BW-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[1,2,2,3,4,5,6,7]
110-
; AVX512BW-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
111-
; AVX512BW-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
112-
; AVX512BW-SLOW-NEXT: vmovd %xmm1, (%rsi)
113-
; AVX512BW-SLOW-NEXT: vmovd %xmm2, (%rdx)
114-
; AVX512BW-SLOW-NEXT: vmovd %xmm0, (%rcx)
115-
; AVX512BW-SLOW-NEXT: retq
116-
;
117-
; AVX512BW-FAST-LABEL: load_i16_stride3_vf2:
118-
; AVX512BW-FAST: # %bb.0:
119-
; AVX512BW-FAST-NEXT: vmovdqa (%rdi), %xmm0
120-
; AVX512BW-FAST-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,3,2,3,4,5,6,7]
121-
; AVX512BW-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,3,8,9,u,u,u,u,u,u,u,u,u,u,u,u]
122-
; AVX512BW-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,10,11,u,u,u,u,u,u,u,u,u,u,u,u]
123-
; AVX512BW-FAST-NEXT: vmovd %xmm1, (%rsi)
124-
; AVX512BW-FAST-NEXT: vmovd %xmm2, (%rdx)
125-
; AVX512BW-FAST-NEXT: vmovd %xmm0, (%rcx)
126-
; AVX512BW-FAST-NEXT: retq
93+
; AVX512-FAST-LABEL: load_i16_stride3_vf2:
94+
; AVX512-FAST: # %bb.0:
95+
; AVX512-FAST-NEXT: vmovdqa (%rdi), %xmm0
96+
; AVX512-FAST-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,3,2,3,4,5,6,7]
97+
; AVX512-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,3,8,9,u,u,u,u,u,u,u,u,u,u,u,u]
98+
; AVX512-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,10,11,u,u,u,u,u,u,u,u,u,u,u,u]
99+
; AVX512-FAST-NEXT: vmovd %xmm1, (%rsi)
100+
; AVX512-FAST-NEXT: vmovd %xmm2, (%rdx)
101+
; AVX512-FAST-NEXT: vmovd %xmm0, (%rcx)
102+
; AVX512-FAST-NEXT: retq
127103
%wide.vec = load <6 x i16>, ptr %in.vec, align 64
128104
%strided.vec0 = shufflevector <6 x i16> %wide.vec, <6 x i16> poison, <2 x i32> <i32 0, i32 3>
129105
%strided.vec1 = shufflevector <6 x i16> %wide.vec, <6 x i16> poison, <2 x i32> <i32 1, i32 4>

llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll

Lines changed: 35 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1,AVX2,AVX2-ONLY,AVX2-SLOW,FALLBACK2
55
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX1,AVX2,AVX2-ONLY,AVX2-FAST,FALLBACK3
66
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX1,AVX2,AVX2-ONLY,AVX2-FAST-PERLANE,FALLBACK4
7-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512F-SLOW,AVX512F-ONLY-SLOW,FALLBACK5
8-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512F-FAST,AVX512F-ONLY-FAST,FALLBACK6
9-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512F-SLOW,AVX512DQ-SLOW,FALLBACK7
10-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512F-FAST,AVX512DQ-FAST,FALLBACK8
11-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512BW-SLOW,AVX512BW-ONLY-SLOW,FALLBACK9
12-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512BW-FAST,AVX512BW-ONLY-FAST,FALLBACK10
13-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512BW-SLOW,AVX512DQBW-SLOW,FALLBACK11
14-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512BW-FAST,AVX512DQBW-FAST,FALLBACK12
7+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512-SLOW,AVX512F-SLOW,AVX512F-ONLY-SLOW,FALLBACK5
8+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512-FAST,AVX512F-FAST,AVX512F-ONLY-FAST,FALLBACK6
9+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512-SLOW,AVX512F-SLOW,AVX512DQ-SLOW,FALLBACK7
10+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512F,AVX512-FAST,AVX512F-FAST,AVX512DQ-FAST,FALLBACK8
11+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512-SLOW,AVX512BW-SLOW,AVX512BW-ONLY-SLOW,FALLBACK9
12+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512-FAST,AVX512BW-FAST,AVX512BW-ONLY-FAST,FALLBACK10
13+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512-SLOW,AVX512BW-SLOW,AVX512DQBW-SLOW,FALLBACK11
14+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX512,AVX512BW,AVX512-FAST,AVX512BW-FAST,AVX512DQBW-FAST,FALLBACK12
1515

1616
; These patterns are produced by LoopVectorizer for interleaved loads.
1717

@@ -46,59 +46,32 @@ define void @load_i16_stride4_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
4646
; AVX1-NEXT: vmovd %xmm0, (%r8)
4747
; AVX1-NEXT: retq
4848
;
49-
; AVX512F-SLOW-LABEL: load_i16_stride4_vf2:
50-
; AVX512F-SLOW: # %bb.0:
51-
; AVX512F-SLOW-NEXT: vmovdqa (%rdi), %xmm0
52-
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
53-
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,3,2,3,4,5,6,7]
54-
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
55-
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[2,0,2,3,4,5,6,7]
56-
; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7]
57-
; AVX512F-SLOW-NEXT: vpmovqw %xmm0, (%rsi)
58-
; AVX512F-SLOW-NEXT: vmovd %xmm1, (%rdx)
59-
; AVX512F-SLOW-NEXT: vmovd %xmm3, (%rcx)
60-
; AVX512F-SLOW-NEXT: vmovd %xmm2, (%r8)
61-
; AVX512F-SLOW-NEXT: retq
62-
;
63-
; AVX512F-FAST-LABEL: load_i16_stride4_vf2:
64-
; AVX512F-FAST: # %bb.0:
65-
; AVX512F-FAST-NEXT: vmovdqa (%rdi), %xmm0
66-
; AVX512F-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3,10,11,u,u,u,u,u,u,u,u,u,u,u,u]
67-
; AVX512F-FAST-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
68-
; AVX512F-FAST-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[2,0,2,3,4,5,6,7]
69-
; AVX512F-FAST-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7]
70-
; AVX512F-FAST-NEXT: vpmovqw %xmm0, (%rsi)
71-
; AVX512F-FAST-NEXT: vmovd %xmm1, (%rdx)
72-
; AVX512F-FAST-NEXT: vmovd %xmm3, (%rcx)
73-
; AVX512F-FAST-NEXT: vmovd %xmm2, (%r8)
74-
; AVX512F-FAST-NEXT: retq
75-
;
76-
; AVX512BW-SLOW-LABEL: load_i16_stride4_vf2:
77-
; AVX512BW-SLOW: # %bb.0:
78-
; AVX512BW-SLOW-NEXT: vmovdqa (%rdi), %xmm0
79-
; AVX512BW-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
80-
; AVX512BW-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,3,2,3,4,5,6,7]
81-
; AVX512BW-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
82-
; AVX512BW-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[2,0,2,3,4,5,6,7]
83-
; AVX512BW-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7]
84-
; AVX512BW-SLOW-NEXT: vpmovqw %xmm0, (%rsi)
85-
; AVX512BW-SLOW-NEXT: vmovd %xmm1, (%rdx)
86-
; AVX512BW-SLOW-NEXT: vmovd %xmm3, (%rcx)
87-
; AVX512BW-SLOW-NEXT: vmovd %xmm2, (%r8)
88-
; AVX512BW-SLOW-NEXT: retq
49+
; AVX512-SLOW-LABEL: load_i16_stride4_vf2:
50+
; AVX512-SLOW: # %bb.0:
51+
; AVX512-SLOW-NEXT: vmovdqa (%rdi), %xmm0
52+
; AVX512-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
53+
; AVX512-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,3,2,3,4,5,6,7]
54+
; AVX512-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
55+
; AVX512-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[2,0,2,3,4,5,6,7]
56+
; AVX512-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7]
57+
; AVX512-SLOW-NEXT: vpmovqw %xmm0, (%rsi)
58+
; AVX512-SLOW-NEXT: vmovd %xmm1, (%rdx)
59+
; AVX512-SLOW-NEXT: vmovd %xmm3, (%rcx)
60+
; AVX512-SLOW-NEXT: vmovd %xmm2, (%r8)
61+
; AVX512-SLOW-NEXT: retq
8962
;
90-
; AVX512BW-FAST-LABEL: load_i16_stride4_vf2:
91-
; AVX512BW-FAST: # %bb.0:
92-
; AVX512BW-FAST-NEXT: vmovdqa (%rdi), %xmm0
93-
; AVX512BW-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3,10,11,u,u,u,u,u,u,u,u,u,u,u,u]
94-
; AVX512BW-FAST-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
95-
; AVX512BW-FAST-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[2,0,2,3,4,5,6,7]
96-
; AVX512BW-FAST-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7]
97-
; AVX512BW-FAST-NEXT: vpmovqw %xmm0, (%rsi)
98-
; AVX512BW-FAST-NEXT: vmovd %xmm1, (%rdx)
99-
; AVX512BW-FAST-NEXT: vmovd %xmm3, (%rcx)
100-
; AVX512BW-FAST-NEXT: vmovd %xmm2, (%r8)
101-
; AVX512BW-FAST-NEXT: retq
63+
; AVX512-FAST-LABEL: load_i16_stride4_vf2:
64+
; AVX512-FAST: # %bb.0:
65+
; AVX512-FAST-NEXT: vmovdqa (%rdi), %xmm0
66+
; AVX512-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3,10,11,u,u,u,u,u,u,u,u,u,u,u,u]
67+
; AVX512-FAST-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
68+
; AVX512-FAST-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[2,0,2,3,4,5,6,7]
69+
; AVX512-FAST-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7]
70+
; AVX512-FAST-NEXT: vpmovqw %xmm0, (%rsi)
71+
; AVX512-FAST-NEXT: vmovd %xmm1, (%rdx)
72+
; AVX512-FAST-NEXT: vmovd %xmm3, (%rcx)
73+
; AVX512-FAST-NEXT: vmovd %xmm2, (%r8)
74+
; AVX512-FAST-NEXT: retq
10275
%wide.vec = load <8 x i16>, ptr %in.vec, align 64
10376
%strided.vec0 = shufflevector <8 x i16> %wide.vec, <8 x i16> poison, <2 x i32> <i32 0, i32 4>
10477
%strided.vec1 = shufflevector <8 x i16> %wide.vec, <8 x i16> poison, <2 x i32> <i32 1, i32 5>
@@ -5451,8 +5424,10 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
54515424
; AVX: {{.*}}
54525425
; AVX2: {{.*}}
54535426
; AVX2-ONLY: {{.*}}
5427+
; AVX512BW-FAST: {{.*}}
54545428
; AVX512BW-ONLY-FAST: {{.*}}
54555429
; AVX512BW-ONLY-SLOW: {{.*}}
5430+
; AVX512BW-SLOW: {{.*}}
54565431
; AVX512DQ-FAST: {{.*}}
54575432
; AVX512DQ-SLOW: {{.*}}
54585433
; AVX512DQBW-FAST: {{.*}}

0 commit comments

Comments
 (0)