1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,+gfni | FileCheck %s --check-prefixes=GFNISSE
3
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX1
4
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX2
5
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX512,GFNIAVX512F
6
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX512,GFNIAVX512BW
3
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+gfni | FileCheck %s --check-prefixes=GFNIAVX1OR2,GFNIAVX1
4
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+gfni | FileCheck %s --check-prefixes=GFNIAVX1OR2,GFNIAVX2
5
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+gfni | FileCheck %s --check-prefixes=GFNIAVX512
7
6
8
7
;
9
8
; 128 Bit Vector Funnel Shifts
@@ -19,31 +18,20 @@ define <16 x i8> @splatconstant_fshl_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind
19
18
; GFNISSE-NEXT: por %xmm1, %xmm0
20
19
; GFNISSE-NEXT: retq
21
20
;
22
- ; GFNIAVX1-LABEL: splatconstant_fshl_v16i8:
23
- ; GFNIAVX1: # %bb.0:
24
- ; GFNIAVX1-NEXT: vpsrlw $5, %xmm1, %xmm1
25
- ; GFNIAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
26
- ; GFNIAVX1-NEXT: vpsllw $3, %xmm0, %xmm0
27
- ; GFNIAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28
- ; GFNIAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
29
- ; GFNIAVX1-NEXT: retq
30
- ;
31
- ; GFNIAVX2-LABEL: splatconstant_fshl_v16i8:
32
- ; GFNIAVX2: # %bb.0:
33
- ; GFNIAVX2-NEXT: vpsrlw $5, %xmm1, %xmm1
34
- ; GFNIAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
35
- ; GFNIAVX2-NEXT: vpsllw $3, %xmm0, %xmm0
36
- ; GFNIAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
37
- ; GFNIAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
38
- ; GFNIAVX2-NEXT: retq
21
+ ; GFNIAVX1OR2-LABEL: splatconstant_fshl_v16i8:
22
+ ; GFNIAVX1OR2: # %bb.0:
23
+ ; GFNIAVX1OR2-NEXT: vpsrlw $5, %xmm1, %xmm1
24
+ ; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
25
+ ; GFNIAVX1OR2-NEXT: vpsllw $3, %xmm0, %xmm0
26
+ ; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27
+ ; GFNIAVX1OR2-NEXT: vpor %xmm1, %xmm0, %xmm0
28
+ ; GFNIAVX1OR2-NEXT: retq
39
29
;
40
30
; GFNIAVX512-LABEL: splatconstant_fshl_v16i8:
41
31
; GFNIAVX512: # %bb.0:
42
32
; GFNIAVX512-NEXT: vpsllw $3, %xmm0, %xmm2
43
33
; GFNIAVX512-NEXT: vpsrlw $5, %xmm1, %xmm0
44
- ; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
45
- ; GFNIAVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
46
- ; GFNIAVX512-NEXT: vzeroupper
34
+ ; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
47
35
; GFNIAVX512-NEXT: retq
48
36
%res = call <16 x i8 > @llvm.fshl.v16i8 (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > <i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 >)
49
37
ret <16 x i8 > %res
@@ -59,13 +47,20 @@ define <16 x i8> @splatconstant_fshr_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind
59
47
; GFNISSE-NEXT: por %xmm1, %xmm0
60
48
; GFNISSE-NEXT: retq
61
49
;
62
- ; GFNIAVX-LABEL: splatconstant_fshr_v16i8:
63
- ; GFNIAVX: # %bb.0:
64
- ; GFNIAVX-NEXT: vpsrlw $7, %xmm1, %xmm1
65
- ; GFNIAVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
66
- ; GFNIAVX-NEXT: vpaddb %xmm0, %xmm0, %xmm0
67
- ; GFNIAVX-NEXT: vpor %xmm1, %xmm0, %xmm0
68
- ; GFNIAVX-NEXT: retq
50
+ ; GFNIAVX1OR2-LABEL: splatconstant_fshr_v16i8:
51
+ ; GFNIAVX1OR2: # %bb.0:
52
+ ; GFNIAVX1OR2-NEXT: vpsrlw $7, %xmm1, %xmm1
53
+ ; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
54
+ ; GFNIAVX1OR2-NEXT: vpaddb %xmm0, %xmm0, %xmm0
55
+ ; GFNIAVX1OR2-NEXT: vpor %xmm1, %xmm0, %xmm0
56
+ ; GFNIAVX1OR2-NEXT: retq
57
+ ;
58
+ ; GFNIAVX512-LABEL: splatconstant_fshr_v16i8:
59
+ ; GFNIAVX512: # %bb.0:
60
+ ; GFNIAVX512-NEXT: vpsrlw $7, %xmm1, %xmm1
61
+ ; GFNIAVX512-NEXT: vpaddb %xmm0, %xmm0, %xmm0
62
+ ; GFNIAVX512-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
63
+ ; GFNIAVX512-NEXT: retq
69
64
%res = call <16 x i8 > @llvm.fshr.v16i8 (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > <i8 7 , i8 7 , i8 7 , i8 7 , i8 7 , i8 7 , i8 7 , i8 7 , i8 7 , i8 7 , i8 7 , i8 7 , i8 7 , i8 7 , i8 7 , i8 7 >)
70
65
ret <16 x i8 > %res
71
66
}
@@ -124,8 +119,7 @@ define <32 x i8> @splatconstant_fshl_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind
124
119
; GFNIAVX512: # %bb.0:
125
120
; GFNIAVX512-NEXT: vpsllw $4, %ymm0, %ymm2
126
121
; GFNIAVX512-NEXT: vpsrlw $4, %ymm1, %ymm0
127
- ; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
128
- ; GFNIAVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
122
+ ; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0
129
123
; GFNIAVX512-NEXT: retq
130
124
%res = call <32 x i8 > @llvm.fshl.v32i8 (<32 x i8 > %a , <32 x i8 > %b , <32 x i8 > <i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 >)
131
125
ret <32 x i8 > %res
@@ -181,8 +175,7 @@ define <32 x i8> @splatconstant_fshr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind
181
175
; GFNIAVX512: # %bb.0:
182
176
; GFNIAVX512-NEXT: vpsllw $2, %ymm0, %ymm2
183
177
; GFNIAVX512-NEXT: vpsrlw $6, %ymm1, %ymm0
184
- ; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
185
- ; GFNIAVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
178
+ ; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0
186
179
; GFNIAVX512-NEXT: retq
187
180
%res = call <32 x i8 > @llvm.fshr.v32i8 (<32 x i8 > %a , <32 x i8 > %b , <32 x i8 > <i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 , i8 6 >)
188
181
ret <32 x i8 > %res
@@ -255,25 +248,12 @@ define <64 x i8> @splatconstant_fshl_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind
255
248
; GFNIAVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
256
249
; GFNIAVX2-NEXT: retq
257
250
;
258
- ; GFNIAVX512F-LABEL: splatconstant_fshl_v64i8:
259
- ; GFNIAVX512F: # %bb.0:
260
- ; GFNIAVX512F-NEXT: vpsrlw $7, %ymm1, %ymm2
261
- ; GFNIAVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
262
- ; GFNIAVX512F-NEXT: vpsrlw $7, %ymm1, %ymm1
263
- ; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
264
- ; GFNIAVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm2
265
- ; GFNIAVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
266
- ; GFNIAVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm0
267
- ; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
268
- ; GFNIAVX512F-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
269
- ; GFNIAVX512F-NEXT: retq
270
- ;
271
- ; GFNIAVX512BW-LABEL: splatconstant_fshl_v64i8:
272
- ; GFNIAVX512BW: # %bb.0:
273
- ; GFNIAVX512BW-NEXT: vpsrlw $7, %zmm1, %zmm1
274
- ; GFNIAVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0
275
- ; GFNIAVX512BW-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
276
- ; GFNIAVX512BW-NEXT: retq
251
+ ; GFNIAVX512-LABEL: splatconstant_fshl_v64i8:
252
+ ; GFNIAVX512: # %bb.0:
253
+ ; GFNIAVX512-NEXT: vpsrlw $7, %zmm1, %zmm1
254
+ ; GFNIAVX512-NEXT: vpaddb %zmm0, %zmm0, %zmm0
255
+ ; GFNIAVX512-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
256
+ ; GFNIAVX512-NEXT: retq
277
257
%res = call <64 x i8 > @llvm.fshl.v64i8 (<64 x i8 > %a , <64 x i8 > %b , <64 x i8 > <i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 >)
278
258
ret <64 x i8 > %res
279
259
}
@@ -355,25 +335,12 @@ define <64 x i8> @splatconstant_fshr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind
355
335
; GFNIAVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
356
336
; GFNIAVX2-NEXT: retq
357
337
;
358
- ; GFNIAVX512F-LABEL: splatconstant_fshr_v64i8:
359
- ; GFNIAVX512F: # %bb.0:
360
- ; GFNIAVX512F-NEXT: vpsllw $6, %ymm0, %ymm2
361
- ; GFNIAVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
362
- ; GFNIAVX512F-NEXT: vpsllw $6, %ymm0, %ymm0
363
- ; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
364
- ; GFNIAVX512F-NEXT: vpsrlw $2, %ymm1, %ymm0
365
- ; GFNIAVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
366
- ; GFNIAVX512F-NEXT: vpsrlw $2, %ymm1, %ymm1
367
- ; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
368
- ; GFNIAVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
369
- ; GFNIAVX512F-NEXT: retq
370
- ;
371
- ; GFNIAVX512BW-LABEL: splatconstant_fshr_v64i8:
372
- ; GFNIAVX512BW: # %bb.0:
373
- ; GFNIAVX512BW-NEXT: vpsllw $6, %zmm0, %zmm2
374
- ; GFNIAVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm0
375
- ; GFNIAVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
376
- ; GFNIAVX512BW-NEXT: retq
338
+ ; GFNIAVX512-LABEL: splatconstant_fshr_v64i8:
339
+ ; GFNIAVX512: # %bb.0:
340
+ ; GFNIAVX512-NEXT: vpsllw $6, %zmm0, %zmm2
341
+ ; GFNIAVX512-NEXT: vpsrlw $2, %zmm1, %zmm0
342
+ ; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
343
+ ; GFNIAVX512-NEXT: retq
377
344
%res = call <64 x i8 > @llvm.fshr.v64i8 (<64 x i8 > %a , <64 x i8 > %b , <64 x i8 > <i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 , i8 2 >)
378
345
ret <64 x i8 > %res
379
346
}
0 commit comments