Skip to content

Commit d556ed5

Browse files
committed
[X86] Add GFNI test coverage without AVX512BW
512-bit GFNI doesn't require AVX512BW
1 parent 4681079 commit d556ed5

File tree

3 files changed

+1388
-538
lines changed

3 files changed

+1388
-538
lines changed

llvm/test/CodeGen/X86/gfni-funnel-shifts.ll

Lines changed: 44 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,+gfni | FileCheck %s --check-prefixes=GFNISSE
3-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+gfni | FileCheck %s --check-prefixes=GFNIAVX1OR2,GFNIAVX1
4-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+gfni | FileCheck %s --check-prefixes=GFNIAVX1OR2,GFNIAVX2
5-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+gfni | FileCheck %s --check-prefixes=GFNIAVX512
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX1OR2,GFNIAVX1
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX1OR2,GFNIAVX2
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX512,GFNIAVX512VL
6+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX512,GFNIAVX512BW
67

78
;
89
; 128 Bit Vector Funnel Shifts
@@ -248,12 +249,25 @@ define <64 x i8> @splatconstant_fshl_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind
248249
; GFNIAVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
249250
; GFNIAVX2-NEXT: retq
250251
;
251-
; GFNIAVX512-LABEL: splatconstant_fshl_v64i8:
252-
; GFNIAVX512: # %bb.0:
253-
; GFNIAVX512-NEXT: vpsrlw $7, %zmm1, %zmm1
254-
; GFNIAVX512-NEXT: vpaddb %zmm0, %zmm0, %zmm0
255-
; GFNIAVX512-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
256-
; GFNIAVX512-NEXT: retq
252+
; GFNIAVX512VL-LABEL: splatconstant_fshl_v64i8:
253+
; GFNIAVX512VL: # %bb.0:
254+
; GFNIAVX512VL-NEXT: vpsrlw $7, %ymm1, %ymm2
255+
; GFNIAVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
256+
; GFNIAVX512VL-NEXT: vpsrlw $7, %ymm1, %ymm1
257+
; GFNIAVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
258+
; GFNIAVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
259+
; GFNIAVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
260+
; GFNIAVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm0
261+
; GFNIAVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
262+
; GFNIAVX512VL-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
263+
; GFNIAVX512VL-NEXT: retq
264+
;
265+
; GFNIAVX512BW-LABEL: splatconstant_fshl_v64i8:
266+
; GFNIAVX512BW: # %bb.0:
267+
; GFNIAVX512BW-NEXT: vpsrlw $7, %zmm1, %zmm1
268+
; GFNIAVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0
269+
; GFNIAVX512BW-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
270+
; GFNIAVX512BW-NEXT: retq
257271
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
258272
ret <64 x i8> %res
259273
}
@@ -335,13 +349,28 @@ define <64 x i8> @splatconstant_fshr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind
335349
; GFNIAVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
336350
; GFNIAVX2-NEXT: retq
337351
;
338-
; GFNIAVX512-LABEL: splatconstant_fshr_v64i8:
339-
; GFNIAVX512: # %bb.0:
340-
; GFNIAVX512-NEXT: vpsllw $6, %zmm0, %zmm2
341-
; GFNIAVX512-NEXT: vpsrlw $2, %zmm1, %zmm0
342-
; GFNIAVX512-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm0
343-
; GFNIAVX512-NEXT: retq
352+
; GFNIAVX512VL-LABEL: splatconstant_fshr_v64i8:
353+
; GFNIAVX512VL: # %bb.0:
354+
; GFNIAVX512VL-NEXT: vpsllw $6, %ymm0, %ymm2
355+
; GFNIAVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
356+
; GFNIAVX512VL-NEXT: vpsllw $6, %ymm0, %ymm0
357+
; GFNIAVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
358+
; GFNIAVX512VL-NEXT: vpsrlw $2, %ymm1, %ymm0
359+
; GFNIAVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
360+
; GFNIAVX512VL-NEXT: vpsrlw $2, %ymm1, %ymm1
361+
; GFNIAVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
362+
; GFNIAVX512VL-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm0
363+
; GFNIAVX512VL-NEXT: retq
364+
;
365+
; GFNIAVX512BW-LABEL: splatconstant_fshr_v64i8:
366+
; GFNIAVX512BW: # %bb.0:
367+
; GFNIAVX512BW-NEXT: vpsllw $6, %zmm0, %zmm2
368+
; GFNIAVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm0
369+
; GFNIAVX512BW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm0
370+
; GFNIAVX512BW-NEXT: retq
344371
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>)
345372
ret <64 x i8> %res
346373
}
347374
declare <64 x i8> @llvm.fshr.v64i8(<64 x i8>, <64 x i8>, <64 x i8>)
375+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
376+
; GFNIAVX: {{.*}}

0 commit comments

Comments
 (0)