|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 | 2 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,+gfni | FileCheck %s --check-prefixes=GFNISSE
|
3 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+gfni | FileCheck %s --check-prefixes=GFNIAVX1OR2,GFNIAVX1 |
4 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+gfni | FileCheck %s --check-prefixes=GFNIAVX1OR2,GFNIAVX2 |
5 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+gfni | FileCheck %s --check-prefixes=GFNIAVX512 |
| 3 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX1OR2,GFNIAVX1 |
| 4 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX1OR2,GFNIAVX2 |
| 5 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX512,GFNIAVX512VL |
| 6 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX512,GFNIAVX512BW |
6 | 7 |
|
7 | 8 | ;
|
8 | 9 | ; 128 Bit Vector Funnel Shifts
|
@@ -248,12 +249,25 @@ define <64 x i8> @splatconstant_fshl_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind
|
248 | 249 | ; GFNIAVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
|
249 | 250 | ; GFNIAVX2-NEXT: retq
|
250 | 251 | ;
|
251 |
| -; GFNIAVX512-LABEL: splatconstant_fshl_v64i8: |
252 |
| -; GFNIAVX512: # %bb.0: |
253 |
| -; GFNIAVX512-NEXT: vpsrlw $7, %zmm1, %zmm1 |
254 |
| -; GFNIAVX512-NEXT: vpaddb %zmm0, %zmm0, %zmm0 |
255 |
| -; GFNIAVX512-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0 |
256 |
| -; GFNIAVX512-NEXT: retq |
| 252 | +; GFNIAVX512VL-LABEL: splatconstant_fshl_v64i8: |
| 253 | +; GFNIAVX512VL: # %bb.0: |
| 254 | +; GFNIAVX512VL-NEXT: vpsrlw $7, %ymm1, %ymm2 |
| 255 | +; GFNIAVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 |
| 256 | +; GFNIAVX512VL-NEXT: vpsrlw $7, %ymm1, %ymm1 |
| 257 | +; GFNIAVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 |
| 258 | +; GFNIAVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2 |
| 259 | +; GFNIAVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 |
| 260 | +; GFNIAVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm0 |
| 261 | +; GFNIAVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 |
| 262 | +; GFNIAVX512VL-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0 |
| 263 | +; GFNIAVX512VL-NEXT: retq |
| 264 | +; |
| 265 | +; GFNIAVX512BW-LABEL: splatconstant_fshl_v64i8: |
| 266 | +; GFNIAVX512BW: # %bb.0: |
| 267 | +; GFNIAVX512BW-NEXT: vpsrlw $7, %zmm1, %zmm1 |
| 268 | +; GFNIAVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 |
| 269 | +; GFNIAVX512BW-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0 |
| 270 | +; GFNIAVX512BW-NEXT: retq |
257 | 271 | %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
|
258 | 272 | ret <64 x i8> %res
|
259 | 273 | }
|
@@ -335,13 +349,28 @@ define <64 x i8> @splatconstant_fshr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind
|
335 | 349 | ; GFNIAVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
|
336 | 350 | ; GFNIAVX2-NEXT: retq
|
337 | 351 | ;
|
338 |
| -; GFNIAVX512-LABEL: splatconstant_fshr_v64i8: |
339 |
| -; GFNIAVX512: # %bb.0: |
340 |
| -; GFNIAVX512-NEXT: vpsllw $6, %zmm0, %zmm2 |
341 |
| -; GFNIAVX512-NEXT: vpsrlw $2, %zmm1, %zmm0 |
342 |
| -; GFNIAVX512-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm0 |
343 |
| -; GFNIAVX512-NEXT: retq |
| 352 | +; GFNIAVX512VL-LABEL: splatconstant_fshr_v64i8: |
| 353 | +; GFNIAVX512VL: # %bb.0: |
| 354 | +; GFNIAVX512VL-NEXT: vpsllw $6, %ymm0, %ymm2 |
| 355 | +; GFNIAVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 |
| 356 | +; GFNIAVX512VL-NEXT: vpsllw $6, %ymm0, %ymm0 |
| 357 | +; GFNIAVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2 |
| 358 | +; GFNIAVX512VL-NEXT: vpsrlw $2, %ymm1, %ymm0 |
| 359 | +; GFNIAVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 |
| 360 | +; GFNIAVX512VL-NEXT: vpsrlw $2, %ymm1, %ymm1 |
| 361 | +; GFNIAVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 |
| 362 | +; GFNIAVX512VL-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm0 |
| 363 | +; GFNIAVX512VL-NEXT: retq |
| 364 | +; |
| 365 | +; GFNIAVX512BW-LABEL: splatconstant_fshr_v64i8: |
| 366 | +; GFNIAVX512BW: # %bb.0: |
| 367 | +; GFNIAVX512BW-NEXT: vpsllw $6, %zmm0, %zmm2 |
| 368 | +; GFNIAVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm0 |
| 369 | +; GFNIAVX512BW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm0 |
| 370 | +; GFNIAVX512BW-NEXT: retq |
344 | 371 | %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>)
|
345 | 372 | ret <64 x i8> %res
|
346 | 373 | }
|
347 | 374 | declare <64 x i8> @llvm.fshr.v64i8(<64 x i8>, <64 x i8>, <64 x i8>)
|
| 375 | +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| 376 | +; GFNIAVX: {{.*}} |
0 commit comments