-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86][AVX10.2] Support AVX10.2 MOVZXC new Instructions. #108537
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang Author: Mahesh-Attarde (mahesh-attarde) ChangesRef.: https://cdrdv2.intel.com/v1/dl/getContent/828965 Chapter 14 INTEL® AVX10 ZERO-EXTENDING PARTIAL VECTOR COPY INSTRUCTIONS Full diff: https://github.com/llvm/llvm-project/pull/108537.diff 15 Files Affected:
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index a21e3901f63fea..fb55dca0fda405 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -155,6 +155,7 @@ set(x86_files
avx10_2_512satcvtintrin.h
avx10_2bf16intrin.h
avx10_2convertintrin.h
+ avx10_2copyintrin.h
avx10_2minmaxintrin.h
avx10_2niintrin.h
avx10_2satcvtdsintrin.h
diff --git a/clang/lib/Headers/avx10_2copyintrin.h b/clang/lib/Headers/avx10_2copyintrin.h
new file mode 100644
index 00000000000000..13e76c6abe8993
--- /dev/null
+++ b/clang/lib/Headers/avx10_2copyintrin.h
@@ -0,0 +1,34 @@
+/*===---- avx10_2copyintrin.h - AVX10.2 Copy intrinsics -------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <avx10_2copyintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2COPYINTRIN_H
+#define __AVX10_2COPYINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
+ __min_vector_width__(128)))
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi32(__m128i __A) {
+ return (__m128i)__builtin_shufflevector(
+ (__v4si)__A, (__v4si)_mm_setzero_si128(), 0, 4, 4, 4);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi16(__m128i __A) {
+ return (__m128i)__builtin_shufflevector(
+ (__v8hi)__A, (__v8hi)_mm_setzero_si128(), 0, 8, 8, 8, 8, 8, 8, 8);
+}
+
+#undef __DEFAULT_FN_ATTRS128
+
+#endif // __AVX10_2COPYINTRIN_H
\ No newline at end of file
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 280154f3c1026e..3fbabffa98df20 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -651,6 +651,7 @@ _storebe_i64(void * __P, long long __D) {
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
#include <avx10_2bf16intrin.h>
#include <avx10_2convertintrin.h>
+#include <avx10_2copyintrin.h>
#include <avx10_2minmaxintrin.h>
#include <avx10_2niintrin.h>
#include <avx10_2satcvtdsintrin.h>
diff --git a/clang/test/CodeGen/X86/avx512copy-builtins.c b/clang/test/CodeGen/X86/avx512copy-builtins.c
new file mode 100644
index 00000000000000..06f7507bde53ed
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx512copy-builtins.c
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 \
+// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m128i test_mm_move_epi32(__m128i A) {
+ // CHECK-LABEL: test_mm_move_epi32
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ return _mm_move_epi32(A);
+}
+
+__m128i test_mm_move_epi16(__m128i A) {
+ // CHECK-LABEL: test_mm_move_epi16
+ // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+ return _mm_move_epi16(A);
+}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3c5b952ff62e24..38999de669c013 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12319,7 +12319,7 @@ static SDValue lowerShuffleAsElementInsertion(
}
V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S);
} else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 ||
- EltVT == MVT::i16) {
+ (EltVT == MVT::i16 && !Subtarget.hasAVX10_2())) {
// Either not inserting from the low element of the input or the input
// element size is too small to use VZEXT_MOVL to clear the high bits.
return SDValue();
@@ -38197,7 +38197,8 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) ||
- (MaskEltSize == 16 && Subtarget.hasFP16())) &&
+ (MaskEltSize == 16 &&
+ (Subtarget.hasFP16() || Subtarget.hasAVX10_2()))) &&
isUndefOrEqual(Mask[0], 0) &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
Shuffle = X86ISD::VZEXT_MOVL;
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index ada2bbaffd6645..f66705a5a3de35 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1537,3 +1537,67 @@ defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_
defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub,
X86Fnmsub, SchedWriteFMA>;
}
+
+//-------------------------------------------------
+// AVX10 MOVZXC (COPY) instructions
+//-------------------------------------------------
+let Predicates = [HasAVX10_2] in {
+ def VMOVZPDILo2PDIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovd\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst, (v4i32 (X86vzmovl
+ (v4i32 VR128X:$src))))]>, EVEX,
+ Sched<[WriteVecMoveFromGpr]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+ def VMOVZPDILo2PDIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
+ (ins i32mem:$src),
+ "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<32, CD8VT1>,
+ Sched<[WriteVecLoad]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+ def VMOVZPDILo2PDIZmr : AVX512PDI<0xD6, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128X:$src),
+ "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<32, CD8VT1>,
+ Sched<[WriteVecStore]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+ def VMOVZPDILo2PDIZrr2 : AVX512PDI<0xD6, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+ Sched<[WriteVecMoveFromGpr]>;
+ def : InstAlias<"vmovd.s\t{$src, $dst|$dst, $src}",
+ (VMOVZPDILo2PDIZrr2 VR128X:$dst, VR128X:$src), 0>;
+
+def VMOVZPWILo2PWIZrr : AVX512XSI<0x6E, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst, (v8i16 (X86vzmovl
+ (v8i16 VR128X:$src))))]>, EVEX, T_MAP5,
+ Sched<[WriteVecMoveFromGpr]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+ def VMOVZPWILo2PWIZrm : AVX512XSI<0x6E, MRMSrcMem, (outs VR128X:$dst),
+ (ins i16mem:$src),
+ "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<16, CD8VT1>, T_MAP5,
+ Sched<[WriteVecLoad]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+ def VMOVZPWILo2PWIZmr : AVX512XSI<0x7E, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<16, CD8VT1>, T_MAP5,
+ Sched<[WriteVecStore]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+ def VMOVZPWILo2PWIZrr2 : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}",
+ []>, EVEX, T_MAP5,
+ Sched<[WriteVecMoveFromGpr]>;
+ def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
+ (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
new file mode 100644
index 00000000000000..a7ca23792e6feb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX102
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefixes=NOAVX512MOVZXC
+
+define <4 x i32> @test_mm_move_epi32(<4 x i32> %a0) nounwind {
+; AVX102-LABEL: test_mm_move_epi32:
+; AVX102: # %bb.0:
+; AVX102-NEXT: vmovd %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xc0]
+; AVX102-NEXT: retq # encoding: [0xc3]
+;
+; NOAVX512MOVZXC-LABEL: test_mm_move_epi32:
+; NOAVX512MOVZXC: # %bb.0:
+; NOAVX512MOVZXC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9]
+; NOAVX512MOVZXC-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01]
+; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3]
+; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3]
+ %res = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ ret <4 x i32> %res
+}
+
+define <8 x i16> @test_mm_move_epi16(<8 x i16> %a0) nounwind {
+; AVX102-LABEL: test_mm_move_epi16:
+; AVX102: # %bb.0:
+; AVX102-NEXT: vmovw %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xc0]
+; AVX102-NEXT: retq # encoding: [0xc3]
+;
+; NOAVX512MOVZXC-LABEL: test_mm_move_epi16:
+; NOAVX512MOVZXC: # %bb.0:
+; NOAVX512MOVZXC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
+; NOAVX512MOVZXC-NEXT: vpblendw $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0e,0xc0,0x01]
+; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
+; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3]
+ %res = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+ ret <8 x i16> %res
+}
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt
new file mode 100644
index 00000000000000..e86c2340a486c5
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vmovd (%ecx), %xmm5
+# INTEL: vmovd xmm5, dword ptr [ecx]
+0x62 0xf1 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovd %xmm5, (%ecx)
+# INTEL: vmovd dword ptr [ecx], xmm5
+0x62 0xf1 0x7d 0x08 0xd6 0x29
+
+# ATT: vmovd %xmm2, %xmm1
+# INTEL: vmovd xmm1, xmm2
+0x62 0xf1 0x7e 0x08 0x7e 0xca
+
+# ATT: vmovd %xmm2, %xmm1
+# INTEL: vmovd xmm1, xmm2
+0x62 0xf1 0x7d 0x08 0xd6 0xca
+
+# ATT: vmovw %xmm5, (%ecx)
+# INTEL: vmovw dword ptr [ecx], xmm5
+0x62 0xf5 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovw (%ecx), %xmm5
+# INTEL: vmovw xmm5, word ptr [ecx]
+0x62 0xf5 0x7e 0x08 0x6e 0x29
+
+# ATT: vmovw %xmm2, %xmm1
+# INTEL: vmovw xmm1, xmm2
+0x62 0xf5 0x7e 0x08 0x6e 0xca
+
+# ATT: vmovw %xmm2, %xmm1
+# INTEL: vmovw xmm1, xmm2
+0x62 0xf5 0x7e 0x08 0x7e 0xca
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt
new file mode 100644
index 00000000000000..36ddd75a77ad39
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vmovd (%rcx), %xmm29
+# INTEL: vmovd xmm29, dword ptr [rcx]
+0x62 0x61 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovd %xmm29, (%rcx)
+# INTEL: vmovd dword ptr [rcx], xmm29
+0x62 0x61 0x7d 0x08 0xd6 0x29
+
+# ATT: vmovd %xmm22, %xmm21
+# INTEL: vmovd xmm21, xmm22
+0x62 0xa1 0x7e 0x08 0x7e 0xee
+
+# ATT: vmovd %xmm22, %xmm21
+# INTEL: vmovd xmm21, xmm22
+0x62 0xa1 0x7d 0x08 0xd6 0xee
+
+# ATT: vmovw %xmm29, (%rcx)
+# INTEL: vmovw dword ptr [rcx], xmm29
+0x62 0x65 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovw (%rcx), %xmm29
+# INTEL: vmovw xmm29, word ptr [rcx]
+0x62 0x65 0x7e 0x08 0x6e 0x29
+
+# ATT: vmovw %xmm22, %xmm21
+# INTEL: vmovw xmm21, xmm22
+0x62 0xa5 0x7e 0x08 0x6e 0xee
+
+# ATT: vmovw %xmm22, %xmm21
+# INTEL: vmovw xmm21, xmm22
+0x62 0xa5 0x7e 0x08 0x7e 0xee
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-att.s b/llvm/test/MC/X86/avx10.2-copy-32-att.s
new file mode 100644
index 00000000000000..a77f19a5dce542
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-32-att.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
+ vmovd %xmm2, %xmm1
+
+// CHECK: vmovd %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca]
+ vmovd.s %xmm2, %xmm1
+
+// CHECK: vmovw %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca]
+ vmovw %xmm2, %xmm1
+
+// CHECK: vmovw %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca]
+ vmovw.s %xmm2, %xmm1
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-intel.s b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
new file mode 100644
index 00000000000000..222dc2f939c77a
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd xmm1, xmm2
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
+ vmovd xmm1, xmm2
+
+// CHECK: vmovd xmm1, xmm2
+// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca]
+ vmovd.s xmm1, xmm2
+
+// CHECK: vmovw xmm1, xmm2
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca]
+ vmovw xmm1, xmm2
+
+// CHECK: vmovw xmm1, xmm2
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca]
+ vmovw.s xmm1, xmm2
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-att.s b/llvm/test/MC/X86/avx10.2-copy-64-att.s
new file mode 100644
index 00000000000000..e27d333222a38a
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-64-att.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee]
+ vmovd %xmm22, %xmm21
+
+// CHECK: vmovd %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee]
+ vmovd.s %xmm22, %xmm21
+
+// CHECK: vmovw %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee]
+ vmovw %xmm22, %xmm21
+
+// CHECK: vmovw %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee]
+ vmovw.s %xmm22, %xmm21
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-intel.s b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
new file mode 100644
index 00000000000000..ed364d4402313d
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd xmm21, xmm22
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee]
+ vmovd xmm21, xmm22
+
+// CHECK: vmovd xmm21, xmm22
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee]
+ vmovd.s xmm21, xmm22
+
+// CHECK: vmovw xmm21, xmm22
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee]
+ vmovw xmm21, xmm22
+
+// CHECK: vmovw xmm21, xmm22
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee]
+ vmovw.s xmm21, xmm22
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index be1b59eb50c91c..a993cce57696a8 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -1614,8 +1614,10 @@ static const X86FoldTableEntry Table1[] = {
{X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0},
{X86::VMOVUPSrr, X86::VMOVUPSrm, 0},
{X86::VMOVW2SHrr, X86::VMOVWrm, TB_NO_REVERSE},
+ {X86::VMOVZPDILo2PDIZrr, X86::VMOVZPDILo2PDIZrm, TB_NO_REVERSE},
{X86::VMOVZPQILo2PQIZrr, X86::VMOVQI2PQIZrm, TB_NO_REVERSE},
{X86::VMOVZPQILo2PQIrr, X86::VMOVQI2PQIrm, TB_NO_REVERSE},
+ {X86::VMOVZPWILo2PWIZrr, X86::VMOVZPWILo2PWIZrm, TB_NO_REVERSE},
{X86::VPABSBYrr, X86::VPABSBYrm, 0},
{X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0},
{X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0},
diff --git a/llvm/utils/TableGen/X86ManualInstrMapping.def b/llvm/utils/TableGen/X86ManualInstrMapping.def
index f0154b80a80dbe..53a276a9343f54 100644
--- a/llvm/utils/TableGen/X86ManualInstrMapping.def
+++ b/llvm/utils/TableGen/X86ManualInstrMapping.def
@@ -32,6 +32,7 @@ NOCOMP(VPSRAQZ128ri)
NOCOMP(VPSRAQZ128rm)
NOCOMP(VPSRAQZ128rr)
NOCOMP(VSCALEFPSZ128rm)
+NOCOMP(VMOVZPDILo2PDIZrr)
NOCOMP(VDBPSADBWZ256rmi)
NOCOMP(VDBPSADBWZ256rri)
NOCOMP(VPMAXSQZ256rm)
|
@llvm/pr-subscribers-mc Author: Mahesh-Attarde (mahesh-attarde) ChangesRef.: https://cdrdv2.intel.com/v1/dl/getContent/828965 Chapter 14 INTEL® AVX10 ZERO-EXTENDING PARTIAL VECTOR COPY INSTRUCTIONS Full diff: https://github.com/llvm/llvm-project/pull/108537.diff 15 Files Affected:
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index a21e3901f63fea..fb55dca0fda405 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -155,6 +155,7 @@ set(x86_files
avx10_2_512satcvtintrin.h
avx10_2bf16intrin.h
avx10_2convertintrin.h
+ avx10_2copyintrin.h
avx10_2minmaxintrin.h
avx10_2niintrin.h
avx10_2satcvtdsintrin.h
diff --git a/clang/lib/Headers/avx10_2copyintrin.h b/clang/lib/Headers/avx10_2copyintrin.h
new file mode 100644
index 00000000000000..13e76c6abe8993
--- /dev/null
+++ b/clang/lib/Headers/avx10_2copyintrin.h
@@ -0,0 +1,34 @@
+/*===---- avx10_2copyintrin.h - AVX10.2 Copy intrinsics -------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <avx10_2copyintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2COPYINTRIN_H
+#define __AVX10_2COPYINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
+ __min_vector_width__(128)))
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi32(__m128i __A) {
+ return (__m128i)__builtin_shufflevector(
+ (__v4si)__A, (__v4si)_mm_setzero_si128(), 0, 4, 4, 4);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi16(__m128i __A) {
+ return (__m128i)__builtin_shufflevector(
+ (__v8hi)__A, (__v8hi)_mm_setzero_si128(), 0, 8, 8, 8, 8, 8, 8, 8);
+}
+
+#undef __DEFAULT_FN_ATTRS128
+
+#endif // __AVX10_2COPYINTRIN_H
\ No newline at end of file
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 280154f3c1026e..3fbabffa98df20 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -651,6 +651,7 @@ _storebe_i64(void * __P, long long __D) {
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
#include <avx10_2bf16intrin.h>
#include <avx10_2convertintrin.h>
+#include <avx10_2copyintrin.h>
#include <avx10_2minmaxintrin.h>
#include <avx10_2niintrin.h>
#include <avx10_2satcvtdsintrin.h>
diff --git a/clang/test/CodeGen/X86/avx512copy-builtins.c b/clang/test/CodeGen/X86/avx512copy-builtins.c
new file mode 100644
index 00000000000000..06f7507bde53ed
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx512copy-builtins.c
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 \
+// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m128i test_mm_move_epi32(__m128i A) {
+ // CHECK-LABEL: test_mm_move_epi32
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ return _mm_move_epi32(A);
+}
+
+__m128i test_mm_move_epi16(__m128i A) {
+ // CHECK-LABEL: test_mm_move_epi16
+ // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+ return _mm_move_epi16(A);
+}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3c5b952ff62e24..38999de669c013 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12319,7 +12319,7 @@ static SDValue lowerShuffleAsElementInsertion(
}
V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S);
} else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 ||
- EltVT == MVT::i16) {
+ (EltVT == MVT::i16 && !Subtarget.hasAVX10_2())) {
// Either not inserting from the low element of the input or the input
// element size is too small to use VZEXT_MOVL to clear the high bits.
return SDValue();
@@ -38197,7 +38197,8 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) ||
- (MaskEltSize == 16 && Subtarget.hasFP16())) &&
+ (MaskEltSize == 16 &&
+ (Subtarget.hasFP16() || Subtarget.hasAVX10_2()))) &&
isUndefOrEqual(Mask[0], 0) &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
Shuffle = X86ISD::VZEXT_MOVL;
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index ada2bbaffd6645..f66705a5a3de35 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1537,3 +1537,67 @@ defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_
defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub,
X86Fnmsub, SchedWriteFMA>;
}
+
+//-------------------------------------------------
+// AVX10 MOVZXC (COPY) instructions
+//-------------------------------------------------
+let Predicates = [HasAVX10_2] in {
+ def VMOVZPDILo2PDIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovd\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst, (v4i32 (X86vzmovl
+ (v4i32 VR128X:$src))))]>, EVEX,
+ Sched<[WriteVecMoveFromGpr]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+ def VMOVZPDILo2PDIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
+ (ins i32mem:$src),
+ "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<32, CD8VT1>,
+ Sched<[WriteVecLoad]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+ def VMOVZPDILo2PDIZmr : AVX512PDI<0xD6, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128X:$src),
+ "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<32, CD8VT1>,
+ Sched<[WriteVecStore]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+ def VMOVZPDILo2PDIZrr2 : AVX512PDI<0xD6, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+ Sched<[WriteVecMoveFromGpr]>;
+ def : InstAlias<"vmovd.s\t{$src, $dst|$dst, $src}",
+ (VMOVZPDILo2PDIZrr2 VR128X:$dst, VR128X:$src), 0>;
+
+def VMOVZPWILo2PWIZrr : AVX512XSI<0x6E, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst, (v8i16 (X86vzmovl
+ (v8i16 VR128X:$src))))]>, EVEX, T_MAP5,
+ Sched<[WriteVecMoveFromGpr]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+ def VMOVZPWILo2PWIZrm : AVX512XSI<0x6E, MRMSrcMem, (outs VR128X:$dst),
+ (ins i16mem:$src),
+ "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<16, CD8VT1>, T_MAP5,
+ Sched<[WriteVecLoad]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+ def VMOVZPWILo2PWIZmr : AVX512XSI<0x7E, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<16, CD8VT1>, T_MAP5,
+ Sched<[WriteVecStore]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+ def VMOVZPWILo2PWIZrr2 : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}",
+ []>, EVEX, T_MAP5,
+ Sched<[WriteVecMoveFromGpr]>;
+ def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
+ (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
new file mode 100644
index 00000000000000..a7ca23792e6feb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX102
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefixes=NOAVX512MOVZXC
+
+define <4 x i32> @test_mm_move_epi32(<4 x i32> %a0) nounwind {
+; AVX102-LABEL: test_mm_move_epi32:
+; AVX102: # %bb.0:
+; AVX102-NEXT: vmovd %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xc0]
+; AVX102-NEXT: retq # encoding: [0xc3]
+;
+; NOAVX512MOVZXC-LABEL: test_mm_move_epi32:
+; NOAVX512MOVZXC: # %bb.0:
+; NOAVX512MOVZXC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9]
+; NOAVX512MOVZXC-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01]
+; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3]
+; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3]
+ %res = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ ret <4 x i32> %res
+}
+
+define <8 x i16> @test_mm_move_epi16(<8 x i16> %a0) nounwind {
+; AVX102-LABEL: test_mm_move_epi16:
+; AVX102: # %bb.0:
+; AVX102-NEXT: vmovw %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xc0]
+; AVX102-NEXT: retq # encoding: [0xc3]
+;
+; NOAVX512MOVZXC-LABEL: test_mm_move_epi16:
+; NOAVX512MOVZXC: # %bb.0:
+; NOAVX512MOVZXC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
+; NOAVX512MOVZXC-NEXT: vpblendw $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0e,0xc0,0x01]
+; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
+; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3]
+ %res = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+ ret <8 x i16> %res
+}
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt
new file mode 100644
index 00000000000000..e86c2340a486c5
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vmovd (%ecx), %xmm5
+# INTEL: vmovd xmm5, dword ptr [ecx]
+0x62 0xf1 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovd %xmm5, (%ecx)
+# INTEL: vmovd dword ptr [ecx], xmm5
+0x62 0xf1 0x7d 0x08 0xd6 0x29
+
+# ATT: vmovd %xmm2, %xmm1
+# INTEL: vmovd xmm1, xmm2
+0x62 0xf1 0x7e 0x08 0x7e 0xca
+
+# ATT: vmovd %xmm2, %xmm1
+# INTEL: vmovd xmm1, xmm2
+0x62 0xf1 0x7d 0x08 0xd6 0xca
+
+# ATT: vmovw %xmm5, (%ecx)
+# INTEL: vmovw dword ptr [ecx], xmm5
+0x62 0xf5 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovw (%ecx), %xmm5
+# INTEL: vmovw xmm5, word ptr [ecx]
+0x62 0xf5 0x7e 0x08 0x6e 0x29
+
+# ATT: vmovw %xmm2, %xmm1
+# INTEL: vmovw xmm1, xmm2
+0x62 0xf5 0x7e 0x08 0x6e 0xca
+
+# ATT: vmovw %xmm2, %xmm1
+# INTEL: vmovw xmm1, xmm2
+0x62 0xf5 0x7e 0x08 0x7e 0xca
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt
new file mode 100644
index 00000000000000..36ddd75a77ad39
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vmovd (%rcx), %xmm29
+# INTEL: vmovd xmm29, dword ptr [rcx]
+0x62 0x61 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovd %xmm29, (%rcx)
+# INTEL: vmovd dword ptr [rcx], xmm29
+0x62 0x61 0x7d 0x08 0xd6 0x29
+
+# ATT: vmovd %xmm22, %xmm21
+# INTEL: vmovd xmm21, xmm22
+0x62 0xa1 0x7e 0x08 0x7e 0xee
+
+# ATT: vmovd %xmm22, %xmm21
+# INTEL: vmovd xmm21, xmm22
+0x62 0xa1 0x7d 0x08 0xd6 0xee
+
+# ATT: vmovw %xmm29, (%rcx)
+# INTEL: vmovw dword ptr [rcx], xmm29
+0x62 0x65 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovw (%rcx), %xmm29
+# INTEL: vmovw xmm29, word ptr [rcx]
+0x62 0x65 0x7e 0x08 0x6e 0x29
+
+# ATT: vmovw %xmm22, %xmm21
+# INTEL: vmovw xmm21, xmm22
+0x62 0xa5 0x7e 0x08 0x6e 0xee
+
+# ATT: vmovw %xmm22, %xmm21
+# INTEL: vmovw xmm21, xmm22
+0x62 0xa5 0x7e 0x08 0x7e 0xee
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-att.s b/llvm/test/MC/X86/avx10.2-copy-32-att.s
new file mode 100644
index 00000000000000..a77f19a5dce542
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-32-att.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
+ vmovd %xmm2, %xmm1
+
+// CHECK: vmovd %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca]
+ vmovd.s %xmm2, %xmm1
+
+// CHECK: vmovw %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca]
+ vmovw %xmm2, %xmm1
+
+// CHECK: vmovw %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca]
+ vmovw.s %xmm2, %xmm1
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-intel.s b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
new file mode 100644
index 00000000000000..222dc2f939c77a
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd xmm1, xmm2
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
+ vmovd xmm1, xmm2
+
+// CHECK: vmovd xmm1, xmm2
+// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca]
+ vmovd.s xmm1, xmm2
+
+// CHECK: vmovw xmm1, xmm2
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca]
+ vmovw xmm1, xmm2
+
+// CHECK: vmovw xmm1, xmm2
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca]
+ vmovw.s xmm1, xmm2
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-att.s b/llvm/test/MC/X86/avx10.2-copy-64-att.s
new file mode 100644
index 00000000000000..e27d333222a38a
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-64-att.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee]
+ vmovd %xmm22, %xmm21
+
+// CHECK: vmovd %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee]
+ vmovd.s %xmm22, %xmm21
+
+// CHECK: vmovw %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee]
+ vmovw %xmm22, %xmm21
+
+// CHECK: vmovw %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee]
+ vmovw.s %xmm22, %xmm21
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-intel.s b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
new file mode 100644
index 00000000000000..ed364d4402313d
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd xmm21, xmm22
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee]
+ vmovd xmm21, xmm22
+
+// CHECK: vmovd xmm21, xmm22
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee]
+ vmovd.s xmm21, xmm22
+
+// CHECK: vmovw xmm21, xmm22
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee]
+ vmovw xmm21, xmm22
+
+// CHECK: vmovw xmm21, xmm22
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee]
+ vmovw.s xmm21, xmm22
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index be1b59eb50c91c..a993cce57696a8 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -1614,8 +1614,10 @@ static const X86FoldTableEntry Table1[] = {
{X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0},
{X86::VMOVUPSrr, X86::VMOVUPSrm, 0},
{X86::VMOVW2SHrr, X86::VMOVWrm, TB_NO_REVERSE},
+ {X86::VMOVZPDILo2PDIZrr, X86::VMOVZPDILo2PDIZrm, TB_NO_REVERSE},
{X86::VMOVZPQILo2PQIZrr, X86::VMOVQI2PQIZrm, TB_NO_REVERSE},
{X86::VMOVZPQILo2PQIrr, X86::VMOVQI2PQIrm, TB_NO_REVERSE},
+ {X86::VMOVZPWILo2PWIZrr, X86::VMOVZPWILo2PWIZrm, TB_NO_REVERSE},
{X86::VPABSBYrr, X86::VPABSBYrm, 0},
{X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0},
{X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0},
diff --git a/llvm/utils/TableGen/X86ManualInstrMapping.def b/llvm/utils/TableGen/X86ManualInstrMapping.def
index f0154b80a80dbe..53a276a9343f54 100644
--- a/llvm/utils/TableGen/X86ManualInstrMapping.def
+++ b/llvm/utils/TableGen/X86ManualInstrMapping.def
@@ -32,6 +32,7 @@ NOCOMP(VPSRAQZ128ri)
NOCOMP(VPSRAQZ128rm)
NOCOMP(VPSRAQZ128rr)
NOCOMP(VSCALEFPSZ128rm)
+NOCOMP(VMOVZPDILo2PDIZrr)
NOCOMP(VDBPSADBWZ256rmi)
NOCOMP(VDBPSADBWZ256rri)
NOCOMP(VPMAXSQZ256rm)
|
@phoebewang @FreddyLeaf @KanRobert Can you review please? |
|
||
#undef __DEFAULT_FN_ATTRS128 | ||
|
||
#endif // __AVX10_2COPYINTRIN_H |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No idea how you get this, but when you see this sign, you need to add a new line in the file.
llvm/lib/Target/X86/X86InstrAVX10.td
Outdated
Sched<[WriteVecMoveFromGpr]>; | ||
def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}", | ||
(VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ditto.
|
||
// CHECK: vmovd %xmm2, %xmm1 | ||
// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca] | ||
vmovd %xmm2, %xmm1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing memory tests.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are not preset in akshay's original patch.
Adding it here.
@@ -38197,7 +38197,8 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, | |||
|
|||
// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS). | |||
if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) || | |||
(MaskEltSize == 16 && Subtarget.hasFP16())) && | |||
(MaskEltSize == 16 && | |||
(Subtarget.hasFP16() || Subtarget.hasAVX10_2()))) && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not necessary. AVX10.2 implies FP16.
; X86-NEXT: vpbroadcastw %eax, %xmm1 | ||
; X86-NEXT: vmovsh %xmm1, %xmm0, %xmm0 | ||
; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@phoebewang
Removing F16 Check results in this change. I could not decide whether this is positive or negative since uops table entries are missing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean keep hasFP16
and remove hasAVX10_2
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
fc41a32
to
e4ab22f
Compare
@phoebewang can you merge please? |
@mahesh-attarde please solve the conflict. |
This reverts commit be0013472904aaa960ff1b5fe1add5b5be79973d.
e4ab22f
to
5c19184
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/146/builds/1186 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/136/builds/960 Here is the relevant piece of the build log for the reference
|
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965 Chapter 14 INTEL® AVX10 ZERO-EXTENDING PARTIAL VECTOR COPY INSTRUCTIONS --------- Co-authored-by: mattarde <[email protected]>
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965
Chapter 14 INTEL® AVX10 ZERO-EXTENDING PARTIAL VECTOR COPY INSTRUCTIONS