Skip to content

Commit 45c3fe8

Browse files
authored
[X86] Add test coverage for the concatable sources vpermv3 -> vpermv fold for non-constant shuffle masks (#133415)
Test both forward/reverse concat cases
1 parent b009c5a commit 45c3fe8

File tree

1 file changed

+47
-0
lines changed

1 file changed

+47
-0
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X86
3+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X64
4+
5+
declare <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>) #1
6+
7+
define <4 x double> @concat_vpermv3_ops_vpermv_v4f64(ptr %p0, <4 x i64> %m) {
8+
; X86-LABEL: concat_vpermv3_ops_vpermv_v4f64:
9+
; X86: # %bb.0:
10+
; X86-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
12+
; X86-NEXT: vpermpd (%eax), %zmm0, %zmm0
13+
; X86-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
14+
; X86-NEXT: retl
15+
;
16+
; X64-LABEL: concat_vpermv3_ops_vpermv_v4f64:
17+
; X64: # %bb.0:
18+
; X64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
19+
; X64-NEXT: vpermpd (%rdi), %zmm0, %zmm0
20+
; X64-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
21+
; X64-NEXT: retq
22+
%p1 = getelementptr inbounds nuw i8, ptr %p0, i64 32
23+
%lo = load <4 x double>, ptr %p0, align 32
24+
%hi = load <4 x double>, ptr %p1, align 32
25+
%res = tail call noundef <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %lo, <4 x i64> %m, <4 x double> %hi)
26+
ret <4 x double> %res
27+
}
28+
29+
define <4 x double> @concat_vpermv3_ops_vpermv_swap_v4f64(ptr %p0, <4 x i64> %m) {
30+
; X86-LABEL: concat_vpermv3_ops_vpermv_swap_v4f64:
31+
; X86: # %bb.0:
32+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
33+
; X86-NEXT: vmovapd 32(%eax), %ymm1
34+
; X86-NEXT: vpermi2pd (%eax), %ymm1, %ymm0
35+
; X86-NEXT: retl
36+
;
37+
; X64-LABEL: concat_vpermv3_ops_vpermv_swap_v4f64:
38+
; X64: # %bb.0:
39+
; X64-NEXT: vmovapd 32(%rdi), %ymm1
40+
; X64-NEXT: vpermi2pd (%rdi), %ymm1, %ymm0
41+
; X64-NEXT: retq
42+
%p1 = getelementptr inbounds nuw i8, ptr %p0, i64 32
43+
%lo = load <4 x double>, ptr %p1, align 32
44+
%hi = load <4 x double>, ptr %p0, align 32
45+
%res = tail call noundef <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %lo, <4 x i64> %m, <4 x double> %hi)
46+
ret <4 x double> %res
47+
}

0 commit comments

Comments
 (0)