|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | +; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X86 |
| 3 | +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X64 |
| 4 | + |
| 5 | +declare <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>) #1 |
| 6 | + |
| 7 | +define <4 x double> @concat_vpermv3_ops_vpermv_v4f64(ptr %p0, <4 x i64> %m) { |
| 8 | +; X86-LABEL: concat_vpermv3_ops_vpermv_v4f64: |
| 9 | +; X86: # %bb.0: |
| 10 | +; X86-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| 11 | +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| 12 | +; X86-NEXT: vpermpd (%eax), %zmm0, %zmm0 |
| 13 | +; X86-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| 14 | +; X86-NEXT: retl |
| 15 | +; |
| 16 | +; X64-LABEL: concat_vpermv3_ops_vpermv_v4f64: |
| 17 | +; X64: # %bb.0: |
| 18 | +; X64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| 19 | +; X64-NEXT: vpermpd (%rdi), %zmm0, %zmm0 |
| 20 | +; X64-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| 21 | +; X64-NEXT: retq |
| 22 | + %p1 = getelementptr inbounds nuw i8, ptr %p0, i64 32 |
| 23 | + %lo = load <4 x double>, ptr %p0, align 32 |
| 24 | + %hi = load <4 x double>, ptr %p1, align 32 |
| 25 | + %res = tail call noundef <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %lo, <4 x i64> %m, <4 x double> %hi) |
| 26 | + ret <4 x double> %res |
| 27 | +} |
| 28 | + |
| 29 | +define <4 x double> @concat_vpermv3_ops_vpermv_swap_v4f64(ptr %p0, <4 x i64> %m) { |
| 30 | +; X86-LABEL: concat_vpermv3_ops_vpermv_swap_v4f64: |
| 31 | +; X86: # %bb.0: |
| 32 | +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| 33 | +; X86-NEXT: vmovapd 32(%eax), %ymm1 |
| 34 | +; X86-NEXT: vpermi2pd (%eax), %ymm1, %ymm0 |
| 35 | +; X86-NEXT: retl |
| 36 | +; |
| 37 | +; X64-LABEL: concat_vpermv3_ops_vpermv_swap_v4f64: |
| 38 | +; X64: # %bb.0: |
| 39 | +; X64-NEXT: vmovapd 32(%rdi), %ymm1 |
| 40 | +; X64-NEXT: vpermi2pd (%rdi), %ymm1, %ymm0 |
| 41 | +; X64-NEXT: retq |
| 42 | + %p1 = getelementptr inbounds nuw i8, ptr %p0, i64 32 |
| 43 | + %lo = load <4 x double>, ptr %p1, align 32 |
| 44 | + %hi = load <4 x double>, ptr %p0, align 32 |
| 45 | + %res = tail call noundef <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %lo, <4 x i64> %m, <4 x double> %hi) |
| 46 | + ret <4 x double> %res |
| 47 | +} |
0 commit comments