Skip to content

Commit 40c6bab

Browse files
committed
[X86] Adjust tests to run on gfni capable cpu levels
AVX512BWVL capable cpus are required 512-bit gfni
1 parent bec48f8 commit 40c6bab

File tree

3 files changed

+117
-209
lines changed

3 files changed

+117
-209
lines changed

llvm/test/CodeGen/X86/gfni-funnel-shifts.ll

Lines changed: 40 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,+gfni | FileCheck %s --check-prefixes=GFNISSE
3-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX1
4-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX2
5-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX512,GFNIAVX512F
6-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX512,GFNIAVX512BW
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+gfni | FileCheck %s --check-prefixes=GFNIAVX1OR2,GFNIAVX1
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+gfni | FileCheck %s --check-prefixes=GFNIAVX1OR2,GFNIAVX2
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+gfni | FileCheck %s --check-prefixes=GFNIAVX512
76

87
;
98
; 128 Bit Vector Funnel Shifts
@@ -19,31 +18,20 @@ define <16 x i8> @splatconstant_fshl_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind
1918
; GFNISSE-NEXT: por %xmm1, %xmm0
2019
; GFNISSE-NEXT: retq
2120
;
22-
; GFNIAVX1-LABEL: splatconstant_fshl_v16i8:
23-
; GFNIAVX1: # %bb.0:
24-
; GFNIAVX1-NEXT: vpsrlw $5, %xmm1, %xmm1
25-
; GFNIAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
26-
; GFNIAVX1-NEXT: vpsllw $3, %xmm0, %xmm0
27-
; GFNIAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28-
; GFNIAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
29-
; GFNIAVX1-NEXT: retq
30-
;
31-
; GFNIAVX2-LABEL: splatconstant_fshl_v16i8:
32-
; GFNIAVX2: # %bb.0:
33-
; GFNIAVX2-NEXT: vpsrlw $5, %xmm1, %xmm1
34-
; GFNIAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
35-
; GFNIAVX2-NEXT: vpsllw $3, %xmm0, %xmm0
36-
; GFNIAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
37-
; GFNIAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
38-
; GFNIAVX2-NEXT: retq
21+
; GFNIAVX1OR2-LABEL: splatconstant_fshl_v16i8:
22+
; GFNIAVX1OR2: # %bb.0:
23+
; GFNIAVX1OR2-NEXT: vpsrlw $5, %xmm1, %xmm1
24+
; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
25+
; GFNIAVX1OR2-NEXT: vpsllw $3, %xmm0, %xmm0
26+
; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27+
; GFNIAVX1OR2-NEXT: vpor %xmm1, %xmm0, %xmm0
28+
; GFNIAVX1OR2-NEXT: retq
3929
;
4030
; GFNIAVX512-LABEL: splatconstant_fshl_v16i8:
4131
; GFNIAVX512: # %bb.0:
4232
; GFNIAVX512-NEXT: vpsllw $3, %xmm0, %xmm2
4333
; GFNIAVX512-NEXT: vpsrlw $5, %xmm1, %xmm0
44-
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
45-
; GFNIAVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
46-
; GFNIAVX512-NEXT: vzeroupper
34+
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
4735
; GFNIAVX512-NEXT: retq
4836
%res = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
4937
ret <16 x i8> %res
@@ -59,13 +47,20 @@ define <16 x i8> @splatconstant_fshr_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind
5947
; GFNISSE-NEXT: por %xmm1, %xmm0
6048
; GFNISSE-NEXT: retq
6149
;
62-
; GFNIAVX-LABEL: splatconstant_fshr_v16i8:
63-
; GFNIAVX: # %bb.0:
64-
; GFNIAVX-NEXT: vpsrlw $7, %xmm1, %xmm1
65-
; GFNIAVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
66-
; GFNIAVX-NEXT: vpaddb %xmm0, %xmm0, %xmm0
67-
; GFNIAVX-NEXT: vpor %xmm1, %xmm0, %xmm0
68-
; GFNIAVX-NEXT: retq
50+
; GFNIAVX1OR2-LABEL: splatconstant_fshr_v16i8:
51+
; GFNIAVX1OR2: # %bb.0:
52+
; GFNIAVX1OR2-NEXT: vpsrlw $7, %xmm1, %xmm1
53+
; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
54+
; GFNIAVX1OR2-NEXT: vpaddb %xmm0, %xmm0, %xmm0
55+
; GFNIAVX1OR2-NEXT: vpor %xmm1, %xmm0, %xmm0
56+
; GFNIAVX1OR2-NEXT: retq
57+
;
58+
; GFNIAVX512-LABEL: splatconstant_fshr_v16i8:
59+
; GFNIAVX512: # %bb.0:
60+
; GFNIAVX512-NEXT: vpsrlw $7, %xmm1, %xmm1
61+
; GFNIAVX512-NEXT: vpaddb %xmm0, %xmm0, %xmm0
62+
; GFNIAVX512-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
63+
; GFNIAVX512-NEXT: retq
6964
%res = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>)
7065
ret <16 x i8> %res
7166
}
@@ -124,8 +119,7 @@ define <32 x i8> @splatconstant_fshl_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind
124119
; GFNIAVX512: # %bb.0:
125120
; GFNIAVX512-NEXT: vpsllw $4, %ymm0, %ymm2
126121
; GFNIAVX512-NEXT: vpsrlw $4, %ymm1, %ymm0
127-
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
128-
; GFNIAVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
122+
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0
129123
; GFNIAVX512-NEXT: retq
130124
%res = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
131125
ret <32 x i8> %res
@@ -181,8 +175,7 @@ define <32 x i8> @splatconstant_fshr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind
181175
; GFNIAVX512: # %bb.0:
182176
; GFNIAVX512-NEXT: vpsllw $2, %ymm0, %ymm2
183177
; GFNIAVX512-NEXT: vpsrlw $6, %ymm1, %ymm0
184-
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
185-
; GFNIAVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
178+
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0
186179
; GFNIAVX512-NEXT: retq
187180
%res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>)
188181
ret <32 x i8> %res
@@ -255,25 +248,12 @@ define <64 x i8> @splatconstant_fshl_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind
255248
; GFNIAVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
256249
; GFNIAVX2-NEXT: retq
257250
;
258-
; GFNIAVX512F-LABEL: splatconstant_fshl_v64i8:
259-
; GFNIAVX512F: # %bb.0:
260-
; GFNIAVX512F-NEXT: vpsrlw $7, %ymm1, %ymm2
261-
; GFNIAVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
262-
; GFNIAVX512F-NEXT: vpsrlw $7, %ymm1, %ymm1
263-
; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
264-
; GFNIAVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm2
265-
; GFNIAVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
266-
; GFNIAVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm0
267-
; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
268-
; GFNIAVX512F-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
269-
; GFNIAVX512F-NEXT: retq
270-
;
271-
; GFNIAVX512BW-LABEL: splatconstant_fshl_v64i8:
272-
; GFNIAVX512BW: # %bb.0:
273-
; GFNIAVX512BW-NEXT: vpsrlw $7, %zmm1, %zmm1
274-
; GFNIAVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0
275-
; GFNIAVX512BW-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
276-
; GFNIAVX512BW-NEXT: retq
251+
; GFNIAVX512-LABEL: splatconstant_fshl_v64i8:
252+
; GFNIAVX512: # %bb.0:
253+
; GFNIAVX512-NEXT: vpsrlw $7, %zmm1, %zmm1
254+
; GFNIAVX512-NEXT: vpaddb %zmm0, %zmm0, %zmm0
255+
; GFNIAVX512-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
256+
; GFNIAVX512-NEXT: retq
277257
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
278258
ret <64 x i8> %res
279259
}
@@ -355,25 +335,12 @@ define <64 x i8> @splatconstant_fshr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind
355335
; GFNIAVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
356336
; GFNIAVX2-NEXT: retq
357337
;
358-
; GFNIAVX512F-LABEL: splatconstant_fshr_v64i8:
359-
; GFNIAVX512F: # %bb.0:
360-
; GFNIAVX512F-NEXT: vpsllw $6, %ymm0, %ymm2
361-
; GFNIAVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
362-
; GFNIAVX512F-NEXT: vpsllw $6, %ymm0, %ymm0
363-
; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
364-
; GFNIAVX512F-NEXT: vpsrlw $2, %ymm1, %ymm0
365-
; GFNIAVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
366-
; GFNIAVX512F-NEXT: vpsrlw $2, %ymm1, %ymm1
367-
; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
368-
; GFNIAVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
369-
; GFNIAVX512F-NEXT: retq
370-
;
371-
; GFNIAVX512BW-LABEL: splatconstant_fshr_v64i8:
372-
; GFNIAVX512BW: # %bb.0:
373-
; GFNIAVX512BW-NEXT: vpsllw $6, %zmm0, %zmm2
374-
; GFNIAVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm0
375-
; GFNIAVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
376-
; GFNIAVX512BW-NEXT: retq
338+
; GFNIAVX512-LABEL: splatconstant_fshr_v64i8:
339+
; GFNIAVX512: # %bb.0:
340+
; GFNIAVX512-NEXT: vpsllw $6, %zmm0, %zmm2
341+
; GFNIAVX512-NEXT: vpsrlw $2, %zmm1, %zmm0
342+
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
343+
; GFNIAVX512-NEXT: retq
377344
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>)
378345
ret <64 x i8> %res
379346
}

llvm/test/CodeGen/X86/gfni-rotates.ll

Lines changed: 40 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,+gfni | FileCheck %s --check-prefixes=GFNISSE
3-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX1
4-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX2
5-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX512,GFNIAVX512F
6-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX512,GFNIAVX512BW
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+gfni | FileCheck %s --check-prefixes=GFNIAVX1OR2,GFNIAVX1
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+gfni | FileCheck %s --check-prefixes=GFNIAVX1OR2,GFNIAVX2
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+gfni | FileCheck %s --check-prefixes=GFNIAVX512
76

87
;
98
; 128 Bit Vector Rotates
@@ -20,31 +19,20 @@ define <16 x i8> @splatconstant_rotl_v16i8(<16 x i8> %a) nounwind {
2019
; GFNISSE-NEXT: por %xmm1, %xmm0
2120
; GFNISSE-NEXT: retq
2221
;
23-
; GFNIAVX1-LABEL: splatconstant_rotl_v16i8:
24-
; GFNIAVX1: # %bb.0:
25-
; GFNIAVX1-NEXT: vpsrlw $5, %xmm0, %xmm1
26-
; GFNIAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
27-
; GFNIAVX1-NEXT: vpsllw $3, %xmm0, %xmm0
28-
; GFNIAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29-
; GFNIAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
30-
; GFNIAVX1-NEXT: retq
31-
;
32-
; GFNIAVX2-LABEL: splatconstant_rotl_v16i8:
33-
; GFNIAVX2: # %bb.0:
34-
; GFNIAVX2-NEXT: vpsrlw $5, %xmm0, %xmm1
35-
; GFNIAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
36-
; GFNIAVX2-NEXT: vpsllw $3, %xmm0, %xmm0
37-
; GFNIAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
38-
; GFNIAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
39-
; GFNIAVX2-NEXT: retq
22+
; GFNIAVX1OR2-LABEL: splatconstant_rotl_v16i8:
23+
; GFNIAVX1OR2: # %bb.0:
24+
; GFNIAVX1OR2-NEXT: vpsrlw $5, %xmm0, %xmm1
25+
; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
26+
; GFNIAVX1OR2-NEXT: vpsllw $3, %xmm0, %xmm0
27+
; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28+
; GFNIAVX1OR2-NEXT: vpor %xmm1, %xmm0, %xmm0
29+
; GFNIAVX1OR2-NEXT: retq
4030
;
4131
; GFNIAVX512-LABEL: splatconstant_rotl_v16i8:
4232
; GFNIAVX512: # %bb.0:
4333
; GFNIAVX512-NEXT: vpsllw $3, %xmm0, %xmm1
4434
; GFNIAVX512-NEXT: vpsrlw $5, %xmm0, %xmm0
45-
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
46-
; GFNIAVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
47-
; GFNIAVX512-NEXT: vzeroupper
35+
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
4836
; GFNIAVX512-NEXT: retq
4937
%res = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
5038
ret <16 x i8> %res
@@ -61,13 +49,20 @@ define <16 x i8> @splatconstant_rotr_v16i8(<16 x i8> %a) nounwind {
6149
; GFNISSE-NEXT: por %xmm1, %xmm0
6250
; GFNISSE-NEXT: retq
6351
;
64-
; GFNIAVX-LABEL: splatconstant_rotr_v16i8:
65-
; GFNIAVX: # %bb.0:
66-
; GFNIAVX-NEXT: vpsrlw $7, %xmm0, %xmm1
67-
; GFNIAVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
68-
; GFNIAVX-NEXT: vpaddb %xmm0, %xmm0, %xmm0
69-
; GFNIAVX-NEXT: vpor %xmm1, %xmm0, %xmm0
70-
; GFNIAVX-NEXT: retq
52+
; GFNIAVX1OR2-LABEL: splatconstant_rotr_v16i8:
53+
; GFNIAVX1OR2: # %bb.0:
54+
; GFNIAVX1OR2-NEXT: vpsrlw $7, %xmm0, %xmm1
55+
; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
56+
; GFNIAVX1OR2-NEXT: vpaddb %xmm0, %xmm0, %xmm0
57+
; GFNIAVX1OR2-NEXT: vpor %xmm1, %xmm0, %xmm0
58+
; GFNIAVX1OR2-NEXT: retq
59+
;
60+
; GFNIAVX512-LABEL: splatconstant_rotr_v16i8:
61+
; GFNIAVX512: # %bb.0:
62+
; GFNIAVX512-NEXT: vpsrlw $7, %xmm0, %xmm1
63+
; GFNIAVX512-NEXT: vpaddb %xmm0, %xmm0, %xmm0
64+
; GFNIAVX512-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
65+
; GFNIAVX512-NEXT: retq
7166
%res = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>)
7267
ret <16 x i8> %res
7368
}
@@ -126,8 +121,7 @@ define <32 x i8> @splatconstant_rotl_v32i8(<32 x i8> %a) nounwind {
126121
; GFNIAVX512: # %bb.0:
127122
; GFNIAVX512-NEXT: vpsllw $4, %ymm0, %ymm1
128123
; GFNIAVX512-NEXT: vpsrlw $4, %ymm0, %ymm0
129-
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
130-
; GFNIAVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
124+
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
131125
; GFNIAVX512-NEXT: retq
132126
%res = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a, <32 x i8> %a, <32 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
133127
ret <32 x i8> %res
@@ -183,8 +177,7 @@ define <32 x i8> @splatconstant_rotr_v32i8(<32 x i8> %a) nounwind {
183177
; GFNIAVX512: # %bb.0:
184178
; GFNIAVX512-NEXT: vpsllw $2, %ymm0, %ymm1
185179
; GFNIAVX512-NEXT: vpsrlw $6, %ymm0, %ymm0
186-
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
187-
; GFNIAVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
180+
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
188181
; GFNIAVX512-NEXT: retq
189182
%res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a, <32 x i8> %a, <32 x i8> <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>)
190183
ret <32 x i8> %res
@@ -259,24 +252,12 @@ define <64 x i8> @splatconstant_rotl_v64i8(<64 x i8> %a) nounwind {
259252
; GFNIAVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
260253
; GFNIAVX2-NEXT: retq
261254
;
262-
; GFNIAVX512F-LABEL: splatconstant_rotl_v64i8:
263-
; GFNIAVX512F: # %bb.0:
264-
; GFNIAVX512F-NEXT: vpsrlw $7, %ymm0, %ymm1
265-
; GFNIAVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
266-
; GFNIAVX512F-NEXT: vpsrlw $7, %ymm2, %ymm3
267-
; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
268-
; GFNIAVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm0
269-
; GFNIAVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm2
270-
; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
271-
; GFNIAVX512F-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
272-
; GFNIAVX512F-NEXT: retq
273-
;
274-
; GFNIAVX512BW-LABEL: splatconstant_rotl_v64i8:
275-
; GFNIAVX512BW: # %bb.0:
276-
; GFNIAVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm1
277-
; GFNIAVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0
278-
; GFNIAVX512BW-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
279-
; GFNIAVX512BW-NEXT: retq
255+
; GFNIAVX512-LABEL: splatconstant_rotl_v64i8:
256+
; GFNIAVX512: # %bb.0:
257+
; GFNIAVX512-NEXT: vpsrlw $7, %zmm0, %zmm1
258+
; GFNIAVX512-NEXT: vpaddb %zmm0, %zmm0, %zmm0
259+
; GFNIAVX512-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
260+
; GFNIAVX512-NEXT: retq
280261
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a, <64 x i8> %a, <64 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
281262
ret <64 x i8> %res
282263
}
@@ -359,24 +340,12 @@ define <64 x i8> @splatconstant_rotr_v64i8(<64 x i8> %a) nounwind {
359340
; GFNIAVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
360341
; GFNIAVX2-NEXT: retq
361342
;
362-
; GFNIAVX512F-LABEL: splatconstant_rotr_v64i8:
363-
; GFNIAVX512F: # %bb.0:
364-
; GFNIAVX512F-NEXT: vpsllw $6, %ymm0, %ymm1
365-
; GFNIAVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
366-
; GFNIAVX512F-NEXT: vpsllw $6, %ymm2, %ymm3
367-
; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
368-
; GFNIAVX512F-NEXT: vpsrlw $2, %ymm0, %ymm0
369-
; GFNIAVX512F-NEXT: vpsrlw $2, %ymm2, %ymm2
370-
; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
371-
; GFNIAVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
372-
; GFNIAVX512F-NEXT: retq
373-
;
374-
; GFNIAVX512BW-LABEL: splatconstant_rotr_v64i8:
375-
; GFNIAVX512BW: # %bb.0:
376-
; GFNIAVX512BW-NEXT: vpsllw $6, %zmm0, %zmm1
377-
; GFNIAVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm0
378-
; GFNIAVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
379-
; GFNIAVX512BW-NEXT: retq
343+
; GFNIAVX512-LABEL: splatconstant_rotr_v64i8:
344+
; GFNIAVX512: # %bb.0:
345+
; GFNIAVX512-NEXT: vpsllw $6, %zmm0, %zmm1
346+
; GFNIAVX512-NEXT: vpsrlw $2, %zmm0, %zmm0
347+
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
348+
; GFNIAVX512-NEXT: retq
380349
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a, <64 x i8> %a, <64 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>)
381350
ret <64 x i8> %res
382351
}

0 commit comments

Comments
 (0)