Skip to content

[X86][AVX10.2] Support AVX10.2 MOVZXC new Instructions. #108537

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Sep 18, 2024

Conversation

mahesh-attarde
Copy link
Contributor

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965

Chapter 14 INTEL® AVX10 ZERO-EXTENDING PARTIAL VECTOR COPY INSTRUCTIONS

@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics mc Machine (object) code labels Sep 13, 2024
@llvmbot
Copy link
Member

llvmbot commented Sep 13, 2024

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-clang

Author: Mahesh-Attarde (mahesh-attarde)

Changes

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965

Chapter 14 INTEL® AVX10 ZERO-EXTENDING PARTIAL VECTOR COPY INSTRUCTIONS


Full diff: https://github.com/llvm/llvm-project/pull/108537.diff

15 Files Affected:

  • (modified) clang/lib/Headers/CMakeLists.txt (+1)
  • (added) clang/lib/Headers/avx10_2copyintrin.h (+34)
  • (modified) clang/lib/Headers/immintrin.h (+1)
  • (added) clang/test/CodeGen/X86/avx512copy-builtins.c (+17)
  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+3-2)
  • (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+64)
  • (added) llvm/test/CodeGen/X86/avx512copy-intrinsics.ll (+35)
  • (added) llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt (+34)
  • (added) llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt (+34)
  • (added) llvm/test/MC/X86/avx10.2-copy-32-att.s (+17)
  • (added) llvm/test/MC/X86/avx10.2-copy-32-intel.s (+17)
  • (added) llvm/test/MC/X86/avx10.2-copy-64-att.s (+17)
  • (added) llvm/test/MC/X86/avx10.2-copy-64-intel.s (+17)
  • (modified) llvm/test/TableGen/x86-fold-tables.inc (+2)
  • (modified) llvm/utils/TableGen/X86ManualInstrMapping.def (+1)
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index a21e3901f63fea..fb55dca0fda405 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -155,6 +155,7 @@ set(x86_files
   avx10_2_512satcvtintrin.h
   avx10_2bf16intrin.h
   avx10_2convertintrin.h
+  avx10_2copyintrin.h
   avx10_2minmaxintrin.h
   avx10_2niintrin.h
   avx10_2satcvtdsintrin.h
diff --git a/clang/lib/Headers/avx10_2copyintrin.h b/clang/lib/Headers/avx10_2copyintrin.h
new file mode 100644
index 00000000000000..13e76c6abe8993
--- /dev/null
+++ b/clang/lib/Headers/avx10_2copyintrin.h
@@ -0,0 +1,34 @@
+/*===---- avx10_2copyintrin.h - AVX10.2 Copy intrinsics -------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx10_2copyintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2COPYINTRIN_H
+#define __AVX10_2COPYINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+                 __min_vector_width__(128)))
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi32(__m128i __A) {
+  return (__m128i)__builtin_shufflevector(
+      (__v4si)__A, (__v4si)_mm_setzero_si128(), 0, 4, 4, 4);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi16(__m128i __A) {
+  return (__m128i)__builtin_shufflevector(
+      (__v8hi)__A, (__v8hi)_mm_setzero_si128(), 0, 8, 8, 8, 8, 8, 8, 8);
+}
+
+#undef __DEFAULT_FN_ATTRS128
+
+#endif // __AVX10_2COPYINTRIN_H
\ No newline at end of file
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 280154f3c1026e..3fbabffa98df20 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -651,6 +651,7 @@ _storebe_i64(void * __P, long long __D) {
 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
 #include <avx10_2bf16intrin.h>
 #include <avx10_2convertintrin.h>
+#include <avx10_2copyintrin.h>
 #include <avx10_2minmaxintrin.h>
 #include <avx10_2niintrin.h>
 #include <avx10_2satcvtdsintrin.h>
diff --git a/clang/test/CodeGen/X86/avx512copy-builtins.c b/clang/test/CodeGen/X86/avx512copy-builtins.c
new file mode 100644
index 00000000000000..06f7507bde53ed
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx512copy-builtins.c
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 \
+// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m128i test_mm_move_epi32(__m128i A) {
+  // CHECK-LABEL: test_mm_move_epi32
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+  return _mm_move_epi32(A);
+}
+
+__m128i test_mm_move_epi16(__m128i A) {
+  // CHECK-LABEL: test_mm_move_epi16
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+  return _mm_move_epi16(A);
+}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3c5b952ff62e24..38999de669c013 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12319,7 +12319,7 @@ static SDValue lowerShuffleAsElementInsertion(
     }
     V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S);
   } else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 ||
-             EltVT == MVT::i16) {
+             (EltVT == MVT::i16 && !Subtarget.hasAVX10_2())) {
     // Either not inserting from the low element of the input or the input
     // element size is too small to use VZEXT_MOVL to clear the high bits.
     return SDValue();
@@ -38197,7 +38197,8 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
 
   // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
   if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) ||
-       (MaskEltSize == 16 && Subtarget.hasFP16())) &&
+       (MaskEltSize == 16 &&
+        (Subtarget.hasFP16() || Subtarget.hasAVX10_2()))) &&
       isUndefOrEqual(Mask[0], 0) &&
       isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
     Shuffle = X86ISD::VZEXT_MOVL;
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index ada2bbaffd6645..f66705a5a3de35 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1537,3 +1537,67 @@ defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_
 defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub,
                                               X86Fnmsub, SchedWriteFMA>;
 }
+
+//-------------------------------------------------
+// AVX10 MOVZXC (COPY) instructions
+//-------------------------------------------------
+let Predicates = [HasAVX10_2] in {
+  def VMOVZPDILo2PDIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
+                                    (ins VR128X:$src),
+                                    "vmovd\t{$src, $dst|$dst, $src}",
+                                    [(set VR128X:$dst, (v4i32 (X86vzmovl
+                                    (v4i32 VR128X:$src))))]>, EVEX,
+                                    Sched<[WriteVecMoveFromGpr]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+  def VMOVZPDILo2PDIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
+                                    (ins i32mem:$src),
+                                    "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+                                     EVEX_CD8<32, CD8VT1>,
+                                     Sched<[WriteVecLoad]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+  def VMOVZPDILo2PDIZmr : AVX512PDI<0xD6, MRMDestMem, (outs),
+                                    (ins i32mem:$dst, VR128X:$src),
+                                    "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+                                    EVEX_CD8<32, CD8VT1>,
+                                    Sched<[WriteVecStore]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+  def VMOVZPDILo2PDIZrr2 : AVX512PDI<0xD6, MRMSrcReg, (outs VR128X:$dst),
+                                     (ins VR128X:$src),
+                                     "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+                                     Sched<[WriteVecMoveFromGpr]>;
+  def : InstAlias<"vmovd.s\t{$src, $dst|$dst, $src}",
+                  (VMOVZPDILo2PDIZrr2 VR128X:$dst, VR128X:$src), 0>;
+
+def VMOVZPWILo2PWIZrr : AVX512XSI<0x6E, MRMSrcReg, (outs VR128X:$dst),
+                                  (ins VR128X:$src),
+                                  "vmovw\t{$src, $dst|$dst, $src}",
+                                  [(set VR128X:$dst, (v8i16 (X86vzmovl
+                                  (v8i16 VR128X:$src))))]>, EVEX, T_MAP5,
+                                  Sched<[WriteVecMoveFromGpr]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+  def VMOVZPWILo2PWIZrm : AVX512XSI<0x6E, MRMSrcMem, (outs VR128X:$dst),
+                                    (ins i16mem:$src),
+                                    "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
+                                    EVEX_CD8<16, CD8VT1>, T_MAP5,
+                                    Sched<[WriteVecLoad]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+  def VMOVZPWILo2PWIZmr : AVX512XSI<0x7E, MRMDestMem, (outs),
+                                    (ins i32mem:$dst, VR128X:$src),
+                                    "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
+                                    EVEX_CD8<16, CD8VT1>, T_MAP5,
+                                    Sched<[WriteVecStore]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+  def VMOVZPWILo2PWIZrr2 : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
+                                     (ins VR128X:$src),
+                                     "vmovw\t{$src, $dst|$dst, $src}",
+                                     []>, EVEX, T_MAP5,
+                                     Sched<[WriteVecMoveFromGpr]>;
+  def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
+                  (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
new file mode 100644
index 00000000000000..a7ca23792e6feb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX102
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefixes=NOAVX512MOVZXC
+
+define <4 x i32> @test_mm_move_epi32(<4 x i32> %a0) nounwind {
+; AVX102-LABEL: test_mm_move_epi32:
+; AVX102:       # %bb.0:
+; AVX102-NEXT:    vmovd %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xc0]
+; AVX102-NEXT:    retq # encoding: [0xc3]
+;
+; NOAVX512MOVZXC-LABEL: test_mm_move_epi32:
+; NOAVX512MOVZXC:       # %bb.0:
+; NOAVX512MOVZXC-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9]
+; NOAVX512MOVZXC-NEXT:    vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01]
+; NOAVX512MOVZXC-NEXT:    # xmm0 = xmm0[0],xmm1[1,2,3]
+; NOAVX512MOVZXC-NEXT:    retq # encoding: [0xc3]
+  %res = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+  ret <4 x i32> %res
+}
+
+define <8 x i16> @test_mm_move_epi16(<8 x i16> %a0) nounwind {
+; AVX102-LABEL: test_mm_move_epi16:
+; AVX102:       # %bb.0:
+; AVX102-NEXT:    vmovw %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xc0]
+; AVX102-NEXT:    retq # encoding: [0xc3]
+;
+; NOAVX512MOVZXC-LABEL: test_mm_move_epi16:
+; NOAVX512MOVZXC:       # %bb.0:
+; NOAVX512MOVZXC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
+; NOAVX512MOVZXC-NEXT:    vpblendw $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0e,0xc0,0x01]
+; NOAVX512MOVZXC-NEXT:    # xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
+; NOAVX512MOVZXC-NEXT:    retq # encoding: [0xc3]
+  %res = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+  ret <8 x i16> %res
+}
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt
new file mode 100644
index 00000000000000..e86c2340a486c5
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:   vmovd   (%ecx), %xmm5
+# INTEL: vmovd   xmm5, dword ptr [ecx]
+0x62 0xf1 0x7e 0x08 0x7e 0x29
+
+# ATT:   vmovd   %xmm5, (%ecx)
+# INTEL: vmovd   dword ptr [ecx], xmm5
+0x62 0xf1 0x7d 0x08 0xd6 0x29
+
+# ATT:   vmovd   %xmm2, %xmm1
+# INTEL: vmovd   xmm1, xmm2
+0x62 0xf1 0x7e 0x08 0x7e 0xca
+
+# ATT:   vmovd   %xmm2, %xmm1
+# INTEL: vmovd   xmm1, xmm2
+0x62 0xf1 0x7d 0x08 0xd6 0xca
+
+# ATT:   vmovw   %xmm5, (%ecx)
+# INTEL: vmovw   dword ptr [ecx], xmm5
+0x62 0xf5 0x7e 0x08 0x7e 0x29
+
+# ATT:   vmovw   (%ecx), %xmm5
+# INTEL: vmovw   xmm5, word ptr [ecx]
+0x62 0xf5 0x7e 0x08 0x6e 0x29
+
+# ATT:   vmovw   %xmm2, %xmm1
+# INTEL: vmovw   xmm1, xmm2
+0x62 0xf5 0x7e 0x08 0x6e 0xca
+
+# ATT:   vmovw   %xmm2, %xmm1
+# INTEL: vmovw   xmm1, xmm2
+0x62 0xf5 0x7e 0x08 0x7e 0xca
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt
new file mode 100644
index 00000000000000..36ddd75a77ad39
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:   vmovd   (%rcx), %xmm29
+# INTEL: vmovd   xmm29, dword ptr [rcx]
+0x62 0x61 0x7e 0x08 0x7e 0x29
+
+# ATT:   vmovd   %xmm29, (%rcx)
+# INTEL: vmovd   dword ptr [rcx], xmm29
+0x62 0x61 0x7d 0x08 0xd6 0x29
+
+# ATT:   vmovd   %xmm22, %xmm21
+# INTEL: vmovd   xmm21, xmm22
+0x62 0xa1 0x7e 0x08 0x7e 0xee
+
+# ATT:   vmovd   %xmm22, %xmm21
+# INTEL: vmovd   xmm21, xmm22
+0x62 0xa1 0x7d 0x08 0xd6 0xee
+
+# ATT:   vmovw   %xmm29, (%rcx)
+# INTEL: vmovw   dword ptr [rcx], xmm29
+0x62 0x65 0x7e 0x08 0x7e 0x29
+
+# ATT:   vmovw   (%rcx), %xmm29
+# INTEL: vmovw   xmm29, word ptr [rcx]
+0x62 0x65 0x7e 0x08 0x6e 0x29
+
+# ATT:   vmovw   %xmm22, %xmm21
+# INTEL: vmovw   xmm21, xmm22
+0x62 0xa5 0x7e 0x08 0x6e 0xee
+
+# ATT:   vmovw   %xmm22, %xmm21
+# INTEL: vmovw   xmm21, xmm22
+0x62 0xa5 0x7e 0x08 0x7e 0xee
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-att.s b/llvm/test/MC/X86/avx10.2-copy-32-att.s
new file mode 100644
index 00000000000000..a77f19a5dce542
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-32-att.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd   %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
+          vmovd   %xmm2, %xmm1
+
+// CHECK: vmovd   %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca]
+          vmovd.s   %xmm2, %xmm1
+
+// CHECK: vmovw   %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca]
+          vmovw   %xmm2, %xmm1
+
+// CHECK: vmovw   %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca]
+          vmovw.s   %xmm2, %xmm1
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-intel.s b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
new file mode 100644
index 00000000000000..222dc2f939c77a
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd   xmm1, xmm2
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
+          vmovd   xmm1, xmm2
+
+// CHECK: vmovd   xmm1, xmm2
+// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca]
+          vmovd.s   xmm1, xmm2
+
+// CHECK: vmovw   xmm1, xmm2
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca]
+          vmovw   xmm1, xmm2
+
+// CHECK: vmovw   xmm1, xmm2
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca]
+          vmovw.s   xmm1, xmm2
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-att.s b/llvm/test/MC/X86/avx10.2-copy-64-att.s
new file mode 100644
index 00000000000000..e27d333222a38a
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-64-att.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd   %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee]
+          vmovd   %xmm22, %xmm21
+
+// CHECK: vmovd   %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee]
+          vmovd.s   %xmm22, %xmm21
+
+// CHECK: vmovw   %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee]
+          vmovw   %xmm22, %xmm21
+
+// CHECK: vmovw   %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee]
+          vmovw.s   %xmm22, %xmm21
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-intel.s b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
new file mode 100644
index 00000000000000..ed364d4402313d
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd   xmm21, xmm22
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee]
+          vmovd   xmm21, xmm22
+
+// CHECK: vmovd   xmm21, xmm22
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee]
+          vmovd.s   xmm21, xmm22
+
+// CHECK: vmovw   xmm21, xmm22
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee]
+          vmovw   xmm21, xmm22
+
+// CHECK: vmovw   xmm21, xmm22
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee]
+          vmovw.s   xmm21, xmm22
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index be1b59eb50c91c..a993cce57696a8 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -1614,8 +1614,10 @@ static const X86FoldTableEntry Table1[] = {
   {X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0},
   {X86::VMOVUPSrr, X86::VMOVUPSrm, 0},
   {X86::VMOVW2SHrr, X86::VMOVWrm, TB_NO_REVERSE},
+  {X86::VMOVZPDILo2PDIZrr, X86::VMOVZPDILo2PDIZrm, TB_NO_REVERSE},
   {X86::VMOVZPQILo2PQIZrr, X86::VMOVQI2PQIZrm, TB_NO_REVERSE},
   {X86::VMOVZPQILo2PQIrr, X86::VMOVQI2PQIrm, TB_NO_REVERSE},
+  {X86::VMOVZPWILo2PWIZrr, X86::VMOVZPWILo2PWIZrm, TB_NO_REVERSE},
   {X86::VPABSBYrr, X86::VPABSBYrm, 0},
   {X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0},
   {X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0},
diff --git a/llvm/utils/TableGen/X86ManualInstrMapping.def b/llvm/utils/TableGen/X86ManualInstrMapping.def
index f0154b80a80dbe..53a276a9343f54 100644
--- a/llvm/utils/TableGen/X86ManualInstrMapping.def
+++ b/llvm/utils/TableGen/X86ManualInstrMapping.def
@@ -32,6 +32,7 @@ NOCOMP(VPSRAQZ128ri)
 NOCOMP(VPSRAQZ128rm)
 NOCOMP(VPSRAQZ128rr)
 NOCOMP(VSCALEFPSZ128rm)
+NOCOMP(VMOVZPDILo2PDIZrr)
 NOCOMP(VDBPSADBWZ256rmi)
 NOCOMP(VDBPSADBWZ256rri)
 NOCOMP(VPMAXSQZ256rm)

@llvmbot
Copy link
Member

llvmbot commented Sep 13, 2024

@llvm/pr-subscribers-mc

Author: Mahesh-Attarde (mahesh-attarde)

Changes

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965

Chapter 14 INTEL® AVX10 ZERO-EXTENDING PARTIAL VECTOR COPY INSTRUCTIONS


Full diff: https://github.com/llvm/llvm-project/pull/108537.diff

15 Files Affected:

  • (modified) clang/lib/Headers/CMakeLists.txt (+1)
  • (added) clang/lib/Headers/avx10_2copyintrin.h (+34)
  • (modified) clang/lib/Headers/immintrin.h (+1)
  • (added) clang/test/CodeGen/X86/avx512copy-builtins.c (+17)
  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+3-2)
  • (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+64)
  • (added) llvm/test/CodeGen/X86/avx512copy-intrinsics.ll (+35)
  • (added) llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt (+34)
  • (added) llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt (+34)
  • (added) llvm/test/MC/X86/avx10.2-copy-32-att.s (+17)
  • (added) llvm/test/MC/X86/avx10.2-copy-32-intel.s (+17)
  • (added) llvm/test/MC/X86/avx10.2-copy-64-att.s (+17)
  • (added) llvm/test/MC/X86/avx10.2-copy-64-intel.s (+17)
  • (modified) llvm/test/TableGen/x86-fold-tables.inc (+2)
  • (modified) llvm/utils/TableGen/X86ManualInstrMapping.def (+1)
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index a21e3901f63fea..fb55dca0fda405 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -155,6 +155,7 @@ set(x86_files
   avx10_2_512satcvtintrin.h
   avx10_2bf16intrin.h
   avx10_2convertintrin.h
+  avx10_2copyintrin.h
   avx10_2minmaxintrin.h
   avx10_2niintrin.h
   avx10_2satcvtdsintrin.h
diff --git a/clang/lib/Headers/avx10_2copyintrin.h b/clang/lib/Headers/avx10_2copyintrin.h
new file mode 100644
index 00000000000000..13e76c6abe8993
--- /dev/null
+++ b/clang/lib/Headers/avx10_2copyintrin.h
@@ -0,0 +1,34 @@
+/*===---- avx10_2copyintrin.h - AVX10.2 Copy intrinsics -------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx10_2copyintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2COPYINTRIN_H
+#define __AVX10_2COPYINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+                 __min_vector_width__(128)))
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi32(__m128i __A) {
+  return (__m128i)__builtin_shufflevector(
+      (__v4si)__A, (__v4si)_mm_setzero_si128(), 0, 4, 4, 4);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi16(__m128i __A) {
+  return (__m128i)__builtin_shufflevector(
+      (__v8hi)__A, (__v8hi)_mm_setzero_si128(), 0, 8, 8, 8, 8, 8, 8, 8);
+}
+
+#undef __DEFAULT_FN_ATTRS128
+
+#endif // __AVX10_2COPYINTRIN_H
\ No newline at end of file
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 280154f3c1026e..3fbabffa98df20 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -651,6 +651,7 @@ _storebe_i64(void * __P, long long __D) {
 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
 #include <avx10_2bf16intrin.h>
 #include <avx10_2convertintrin.h>
+#include <avx10_2copyintrin.h>
 #include <avx10_2minmaxintrin.h>
 #include <avx10_2niintrin.h>
 #include <avx10_2satcvtdsintrin.h>
diff --git a/clang/test/CodeGen/X86/avx512copy-builtins.c b/clang/test/CodeGen/X86/avx512copy-builtins.c
new file mode 100644
index 00000000000000..06f7507bde53ed
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx512copy-builtins.c
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 \
+// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m128i test_mm_move_epi32(__m128i A) {
+  // CHECK-LABEL: test_mm_move_epi32
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+  return _mm_move_epi32(A);
+}
+
+__m128i test_mm_move_epi16(__m128i A) {
+  // CHECK-LABEL: test_mm_move_epi16
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+  return _mm_move_epi16(A);
+}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3c5b952ff62e24..38999de669c013 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12319,7 +12319,7 @@ static SDValue lowerShuffleAsElementInsertion(
     }
     V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S);
   } else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 ||
-             EltVT == MVT::i16) {
+             (EltVT == MVT::i16 && !Subtarget.hasAVX10_2())) {
     // Either not inserting from the low element of the input or the input
     // element size is too small to use VZEXT_MOVL to clear the high bits.
     return SDValue();
@@ -38197,7 +38197,8 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
 
   // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
   if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) ||
-       (MaskEltSize == 16 && Subtarget.hasFP16())) &&
+       (MaskEltSize == 16 &&
+        (Subtarget.hasFP16() || Subtarget.hasAVX10_2()))) &&
       isUndefOrEqual(Mask[0], 0) &&
       isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
     Shuffle = X86ISD::VZEXT_MOVL;
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index ada2bbaffd6645..f66705a5a3de35 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1537,3 +1537,67 @@ defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_
 defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub,
                                               X86Fnmsub, SchedWriteFMA>;
 }
+
+//-------------------------------------------------
+// AVX10 MOVZXC (COPY) instructions
+//-------------------------------------------------
+let Predicates = [HasAVX10_2] in {
+  def VMOVZPDILo2PDIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
+                                    (ins VR128X:$src),
+                                    "vmovd\t{$src, $dst|$dst, $src}",
+                                    [(set VR128X:$dst, (v4i32 (X86vzmovl
+                                    (v4i32 VR128X:$src))))]>, EVEX,
+                                    Sched<[WriteVecMoveFromGpr]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+  def VMOVZPDILo2PDIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
+                                    (ins i32mem:$src),
+                                    "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+                                     EVEX_CD8<32, CD8VT1>,
+                                     Sched<[WriteVecLoad]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+  def VMOVZPDILo2PDIZmr : AVX512PDI<0xD6, MRMDestMem, (outs),
+                                    (ins i32mem:$dst, VR128X:$src),
+                                    "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+                                    EVEX_CD8<32, CD8VT1>,
+                                    Sched<[WriteVecStore]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+  def VMOVZPDILo2PDIZrr2 : AVX512PDI<0xD6, MRMSrcReg, (outs VR128X:$dst),
+                                     (ins VR128X:$src),
+                                     "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+                                     Sched<[WriteVecMoveFromGpr]>;
+  def : InstAlias<"vmovd.s\t{$src, $dst|$dst, $src}",
+                  (VMOVZPDILo2PDIZrr2 VR128X:$dst, VR128X:$src), 0>;
+
+def VMOVZPWILo2PWIZrr : AVX512XSI<0x6E, MRMSrcReg, (outs VR128X:$dst),
+                                  (ins VR128X:$src),
+                                  "vmovw\t{$src, $dst|$dst, $src}",
+                                  [(set VR128X:$dst, (v8i16 (X86vzmovl
+                                  (v8i16 VR128X:$src))))]>, EVEX, T_MAP5,
+                                  Sched<[WriteVecMoveFromGpr]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+  def VMOVZPWILo2PWIZrm : AVX512XSI<0x6E, MRMSrcMem, (outs VR128X:$dst),
+                                    (ins i16mem:$src),
+                                    "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
+                                    EVEX_CD8<16, CD8VT1>, T_MAP5,
+                                    Sched<[WriteVecLoad]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+  def VMOVZPWILo2PWIZmr : AVX512XSI<0x7E, MRMDestMem, (outs),
+                                    (ins i32mem:$dst, VR128X:$src),
+                                    "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
+                                    EVEX_CD8<16, CD8VT1>, T_MAP5,
+                                    Sched<[WriteVecStore]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+  def VMOVZPWILo2PWIZrr2 : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
+                                     (ins VR128X:$src),
+                                     "vmovw\t{$src, $dst|$dst, $src}",
+                                     []>, EVEX, T_MAP5,
+                                     Sched<[WriteVecMoveFromGpr]>;
+  def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
+                  (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
new file mode 100644
index 00000000000000..a7ca23792e6feb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX102
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefixes=NOAVX512MOVZXC
+
+define <4 x i32> @test_mm_move_epi32(<4 x i32> %a0) nounwind {
+; AVX102-LABEL: test_mm_move_epi32:
+; AVX102:       # %bb.0:
+; AVX102-NEXT:    vmovd %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xc0]
+; AVX102-NEXT:    retq # encoding: [0xc3]
+;
+; NOAVX512MOVZXC-LABEL: test_mm_move_epi32:
+; NOAVX512MOVZXC:       # %bb.0:
+; NOAVX512MOVZXC-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9]
+; NOAVX512MOVZXC-NEXT:    vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01]
+; NOAVX512MOVZXC-NEXT:    # xmm0 = xmm0[0],xmm1[1,2,3]
+; NOAVX512MOVZXC-NEXT:    retq # encoding: [0xc3]
+  %res = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+  ret <4 x i32> %res
+}
+
+define <8 x i16> @test_mm_move_epi16(<8 x i16> %a0) nounwind {
+; AVX102-LABEL: test_mm_move_epi16:
+; AVX102:       # %bb.0:
+; AVX102-NEXT:    vmovw %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xc0]
+; AVX102-NEXT:    retq # encoding: [0xc3]
+;
+; NOAVX512MOVZXC-LABEL: test_mm_move_epi16:
+; NOAVX512MOVZXC:       # %bb.0:
+; NOAVX512MOVZXC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
+; NOAVX512MOVZXC-NEXT:    vpblendw $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0e,0xc0,0x01]
+; NOAVX512MOVZXC-NEXT:    # xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
+; NOAVX512MOVZXC-NEXT:    retq # encoding: [0xc3]
+  %res = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+  ret <8 x i16> %res
+}
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt
new file mode 100644
index 00000000000000..e86c2340a486c5
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:   vmovd   (%ecx), %xmm5
+# INTEL: vmovd   xmm5, dword ptr [ecx]
+0x62 0xf1 0x7e 0x08 0x7e 0x29
+
+# ATT:   vmovd   %xmm5, (%ecx)
+# INTEL: vmovd   dword ptr [ecx], xmm5
+0x62 0xf1 0x7d 0x08 0xd6 0x29
+
+# ATT:   vmovd   %xmm2, %xmm1
+# INTEL: vmovd   xmm1, xmm2
+0x62 0xf1 0x7e 0x08 0x7e 0xca
+
+# ATT:   vmovd   %xmm2, %xmm1
+# INTEL: vmovd   xmm1, xmm2
+0x62 0xf1 0x7d 0x08 0xd6 0xca
+
+# ATT:   vmovw   %xmm5, (%ecx)
+# INTEL: vmovw   dword ptr [ecx], xmm5
+0x62 0xf5 0x7e 0x08 0x7e 0x29
+
+# ATT:   vmovw   (%ecx), %xmm5
+# INTEL: vmovw   xmm5, word ptr [ecx]
+0x62 0xf5 0x7e 0x08 0x6e 0x29
+
+# ATT:   vmovw   %xmm2, %xmm1
+# INTEL: vmovw   xmm1, xmm2
+0x62 0xf5 0x7e 0x08 0x6e 0xca
+
+# ATT:   vmovw   %xmm2, %xmm1
+# INTEL: vmovw   xmm1, xmm2
+0x62 0xf5 0x7e 0x08 0x7e 0xca
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt
new file mode 100644
index 00000000000000..36ddd75a77ad39
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:   vmovd   (%rcx), %xmm29
+# INTEL: vmovd   xmm29, dword ptr [rcx]
+0x62 0x61 0x7e 0x08 0x7e 0x29
+
+# ATT:   vmovd   %xmm29, (%rcx)
+# INTEL: vmovd   dword ptr [rcx], xmm29
+0x62 0x61 0x7d 0x08 0xd6 0x29
+
+# ATT:   vmovd   %xmm22, %xmm21
+# INTEL: vmovd   xmm21, xmm22
+0x62 0xa1 0x7e 0x08 0x7e 0xee
+
+# ATT:   vmovd   %xmm22, %xmm21
+# INTEL: vmovd   xmm21, xmm22
+0x62 0xa1 0x7d 0x08 0xd6 0xee
+
+# ATT:   vmovw   %xmm29, (%rcx)
+# INTEL: vmovw   dword ptr [rcx], xmm29
+0x62 0x65 0x7e 0x08 0x7e 0x29
+
+# ATT:   vmovw   (%rcx), %xmm29
+# INTEL: vmovw   xmm29, word ptr [rcx]
+0x62 0x65 0x7e 0x08 0x6e 0x29
+
+# ATT:   vmovw   %xmm22, %xmm21
+# INTEL: vmovw   xmm21, xmm22
+0x62 0xa5 0x7e 0x08 0x6e 0xee
+
+# ATT:   vmovw   %xmm22, %xmm21
+# INTEL: vmovw   xmm21, xmm22
+0x62 0xa5 0x7e 0x08 0x7e 0xee
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-att.s b/llvm/test/MC/X86/avx10.2-copy-32-att.s
new file mode 100644
index 00000000000000..a77f19a5dce542
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-32-att.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd   %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
+          vmovd   %xmm2, %xmm1
+
+// CHECK: vmovd   %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca]
+          vmovd.s   %xmm2, %xmm1
+
+// CHECK: vmovw   %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca]
+          vmovw   %xmm2, %xmm1
+
+// CHECK: vmovw   %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca]
+          vmovw.s   %xmm2, %xmm1
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-intel.s b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
new file mode 100644
index 00000000000000..222dc2f939c77a
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd   xmm1, xmm2
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
+          vmovd   xmm1, xmm2
+
+// CHECK: vmovd   xmm1, xmm2
+// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca]
+          vmovd.s   xmm1, xmm2
+
+// CHECK: vmovw   xmm1, xmm2
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca]
+          vmovw   xmm1, xmm2
+
+// CHECK: vmovw   xmm1, xmm2
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca]
+          vmovw.s   xmm1, xmm2
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-att.s b/llvm/test/MC/X86/avx10.2-copy-64-att.s
new file mode 100644
index 00000000000000..e27d333222a38a
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-64-att.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd   %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee]
+          vmovd   %xmm22, %xmm21
+
+// CHECK: vmovd   %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee]
+          vmovd.s   %xmm22, %xmm21
+
+// CHECK: vmovw   %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee]
+          vmovw   %xmm22, %xmm21
+
+// CHECK: vmovw   %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee]
+          vmovw.s   %xmm22, %xmm21
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-intel.s b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
new file mode 100644
index 00000000000000..ed364d4402313d
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd   xmm21, xmm22
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee]
+          vmovd   xmm21, xmm22
+
+// CHECK: vmovd   xmm21, xmm22
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee]
+          vmovd.s   xmm21, xmm22
+
+// CHECK: vmovw   xmm21, xmm22
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee]
+          vmovw   xmm21, xmm22
+
+// CHECK: vmovw   xmm21, xmm22
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee]
+          vmovw.s   xmm21, xmm22
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index be1b59eb50c91c..a993cce57696a8 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -1614,8 +1614,10 @@ static const X86FoldTableEntry Table1[] = {
   {X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0},
   {X86::VMOVUPSrr, X86::VMOVUPSrm, 0},
   {X86::VMOVW2SHrr, X86::VMOVWrm, TB_NO_REVERSE},
+  {X86::VMOVZPDILo2PDIZrr, X86::VMOVZPDILo2PDIZrm, TB_NO_REVERSE},
   {X86::VMOVZPQILo2PQIZrr, X86::VMOVQI2PQIZrm, TB_NO_REVERSE},
   {X86::VMOVZPQILo2PQIrr, X86::VMOVQI2PQIrm, TB_NO_REVERSE},
+  {X86::VMOVZPWILo2PWIZrr, X86::VMOVZPWILo2PWIZrm, TB_NO_REVERSE},
   {X86::VPABSBYrr, X86::VPABSBYrm, 0},
   {X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0},
   {X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0},
diff --git a/llvm/utils/TableGen/X86ManualInstrMapping.def b/llvm/utils/TableGen/X86ManualInstrMapping.def
index f0154b80a80dbe..53a276a9343f54 100644
--- a/llvm/utils/TableGen/X86ManualInstrMapping.def
+++ b/llvm/utils/TableGen/X86ManualInstrMapping.def
@@ -32,6 +32,7 @@ NOCOMP(VPSRAQZ128ri)
 NOCOMP(VPSRAQZ128rm)
 NOCOMP(VPSRAQZ128rr)
 NOCOMP(VSCALEFPSZ128rm)
+NOCOMP(VMOVZPDILo2PDIZrr)
 NOCOMP(VDBPSADBWZ256rmi)
 NOCOMP(VDBPSADBWZ256rri)
 NOCOMP(VPMAXSQZ256rm)

@mahesh-attarde
Copy link
Contributor Author

@phoebewang @FreddyLeaf @KanRobert Can you review please?


#undef __DEFAULT_FN_ATTRS128

#endif // __AVX10_2COPYINTRIN_H
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No idea how you get this, but when you see this sign, you need to add a new line in the file.

Sched<[WriteVecMoveFromGpr]>;
def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
(VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto.


// CHECK: vmovd %xmm2, %xmm1
// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
vmovd %xmm2, %xmm1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing memory tests.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are not preset in akshay's original patch.
Adding it here.

@@ -38197,7 +38197,8 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,

// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) ||
(MaskEltSize == 16 && Subtarget.hasFP16())) &&
(MaskEltSize == 16 &&
(Subtarget.hasFP16() || Subtarget.hasAVX10_2()))) &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not necessary. AVX10.2 implies FP16.

; X86-NEXT: vpbroadcastw %eax, %xmm1
; X86-NEXT: vmovsh %xmm1, %xmm0, %xmm0
; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
Copy link
Contributor Author

@mahesh-attarde mahesh-attarde Sep 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@phoebewang
Removing F16 Check results in this change. I could not decide whether this is positive or negative since uops table entries are missing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean keep hasFP16 and remove hasAVX10_2.

Copy link
Contributor

@phoebewang phoebewang left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@mahesh-attarde mahesh-attarde force-pushed the copy branch 2 times, most recently from fc41a32 to e4ab22f Compare September 18, 2024 08:39
@mahesh-attarde
Copy link
Contributor Author

@phoebewang can you merge please?

@phoebewang
Copy link
Contributor

@mahesh-attarde please solve the conflict.

@phoebewang phoebewang merged commit 311e4e3 into llvm:main Sep 18, 2024
8 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Sep 19, 2024

LLVM Buildbot has detected a new failure on builder lld-x86_64-win running on as-worker-93 while building clang,llvm at step 7 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/146/builds/1186

Here is the relevant piece of the build log for the reference
Step 7 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM-Unit :: Support/./SupportTests.exe/36/87' FAILED ********************
Script(shard):
--
GTEST_OUTPUT=json:C:\a\lld-x86_64-win\build\unittests\Support\.\SupportTests.exe-LLVM-Unit-17480-36-87.json GTEST_SHUFFLE=0 GTEST_TOTAL_SHARDS=87 GTEST_SHARD_INDEX=36 C:\a\lld-x86_64-win\build\unittests\Support\.\SupportTests.exe
--

Script:
--
C:\a\lld-x86_64-win\build\unittests\Support\.\SupportTests.exe --gtest_filter=ProgramEnvTest.CreateProcessLongPath
--
C:\a\lld-x86_64-win\llvm-project\llvm\unittests\Support\ProgramTest.cpp(160): error: Expected equality of these values:
  0
  RC
    Which is: -2

C:\a\lld-x86_64-win\llvm-project\llvm\unittests\Support\ProgramTest.cpp(163): error: fs::remove(Twine(LongPath)): did not return errc::success.
error number: 13
error message: permission denied



C:\a\lld-x86_64-win\llvm-project\llvm\unittests\Support\ProgramTest.cpp:160
Expected equality of these values:
  0
  RC
    Which is: -2

C:\a\lld-x86_64-win\llvm-project\llvm\unittests\Support\ProgramTest.cpp:163
fs::remove(Twine(LongPath)): did not return errc::success.
error number: 13
error message: permission denied




********************


@llvm-ci
Copy link
Collaborator

llvm-ci commented Sep 19, 2024

LLVM Buildbot has detected a new failure on builder clang-s390x-linux-lnt running on systemz-1 while building clang,llvm at step 7 "ninja check 1".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/136/builds/960

Here is the relevant piece of the build log for the reference
Step 7 (ninja check 1) failure: stage 1 checked (failure)
******************** TEST 'libFuzzer-s390x-default-Linux :: fuzzer-timeout.test' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 1: /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/stage1/./bin/clang    -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta   --driver-mode=g++ -O2 -gline-tables-only -fsanitize=address,fuzzer -I/home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/llvm/compiler-rt/lib/fuzzer  /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/llvm/compiler-rt/test/fuzzer/TimeoutTest.cpp -o /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/S390XDefaultLinuxConfig/Output/fuzzer-timeout.test.tmp-TimeoutTest
+ /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/stage1/./bin/clang -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta --driver-mode=g++ -O2 -gline-tables-only -fsanitize=address,fuzzer -I/home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/llvm/compiler-rt/lib/fuzzer /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/llvm/compiler-rt/test/fuzzer/TimeoutTest.cpp -o /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/S390XDefaultLinuxConfig/Output/fuzzer-timeout.test.tmp-TimeoutTest
RUN: at line 2: /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/stage1/./bin/clang    -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta   --driver-mode=g++ -O2 -gline-tables-only -fsanitize=address,fuzzer -I/home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/llvm/compiler-rt/lib/fuzzer  /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/llvm/compiler-rt/test/fuzzer/TimeoutEmptyTest.cpp -o /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/S390XDefaultLinuxConfig/Output/fuzzer-timeout.test.tmp-TimeoutEmptyTest
+ /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/stage1/./bin/clang -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta --driver-mode=g++ -O2 -gline-tables-only -fsanitize=address,fuzzer -I/home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/llvm/compiler-rt/lib/fuzzer /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/llvm/compiler-rt/test/fuzzer/TimeoutEmptyTest.cpp -o /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/S390XDefaultLinuxConfig/Output/fuzzer-timeout.test.tmp-TimeoutEmptyTest
RUN: at line 3: not  /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/S390XDefaultLinuxConfig/Output/fuzzer-timeout.test.tmp-TimeoutTest -timeout=1 2>&1 | FileCheck /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/llvm/compiler-rt/test/fuzzer/fuzzer-timeout.test --check-prefix=TimeoutTest
+ FileCheck /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/llvm/compiler-rt/test/fuzzer/fuzzer-timeout.test --check-prefix=TimeoutTest
+ not /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/S390XDefaultLinuxConfig/Output/fuzzer-timeout.test.tmp-TimeoutTest -timeout=1
RUN: at line 12: not  /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/S390XDefaultLinuxConfig/Output/fuzzer-timeout.test.tmp-TimeoutTest -timeout=1 /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/llvm/compiler-rt/test/fuzzer/hi.txt 2>&1 | FileCheck /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/llvm/compiler-rt/test/fuzzer/fuzzer-timeout.test --check-prefix=SingleInputTimeoutTest
+ not /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/S390XDefaultLinuxConfig/Output/fuzzer-timeout.test.tmp-TimeoutTest -timeout=1 /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/llvm/compiler-rt/test/fuzzer/hi.txt
+ FileCheck /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/llvm/compiler-rt/test/fuzzer/fuzzer-timeout.test --check-prefix=SingleInputTimeoutTest
RUN: at line 16: /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/S390XDefaultLinuxConfig/Output/fuzzer-timeout.test.tmp-TimeoutTest -timeout=1 -timeout_exitcode=0
+ /home/uweigand/sandbox/buildbot/clang-s390x-linux-lnt/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/S390XDefaultLinuxConfig/Output/fuzzer-timeout.test.tmp-TimeoutTest -timeout=1 -timeout_exitcode=0
INFO: Running with entropic power schedule (0xFF, 100).
INFO: Seed: 815710211
INFO: Loaded 1 modules   (13 inline 8-bit counters): 13 [0x2aa0cd69e50, 0x2aa0cd69e5d), 
INFO: Loaded 1 PC tables (13 PCs): 13 [0x2aa0cd69e60,0x2aa0cd69f30), 
INFO: -max_len is not provided; libFuzzer will not generate inputs larger than 4096 bytes
INFO: A corpus is not provided, starting from an empty corpus
#2	INITED cov: 2 ft: 2 corp: 1/1b exec/s: 0 rss: 32Mb
#2039	NEW    cov: 3 ft: 3 corp: 2/2b lim: 21 exec/s: 0 rss: 32Mb L: 1/1 MS: 2 ChangeBit-ChangeBit-
#2155	NEW    cov: 4 ft: 4 corp: 3/4b lim: 21 exec/s: 0 rss: 32Mb L: 2/2 MS: 1 CopyPart-
#19036	NEW    cov: 5 ft: 5 corp: 4/6b lim: 184 exec/s: 0 rss: 33Mb L: 2/2 MS: 1 InsertByte-
#19047	NEW    cov: 6 ft: 6 corp: 5/10b lim: 184 exec/s: 0 rss: 33Mb L: 4/4 MS: 1 CrossOver-
#19089	REDUCE cov: 6 ft: 6 corp: 5/9b lim: 184 exec/s: 0 rss: 33Mb L: 3/3 MS: 2 EraseBytes-CrossOver-
ALARM: working on the last Unit for 1 seconds
       and the timeout value is 1 (use -timeout=N to change)
MS: 2 CrossOver-InsertRepeatedBytes-; base unit: 0c1bc52c50016933679b0980ccff3680e5831162
0x48,0x69,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0xa,
Hi!!!!!!!!!!!!!!!!!!!!!!!!!!!!\012
artifact_prefix='./'; Test unit written to ./timeout-23bc5ddb0cea9da4053ea32b85f3eeb2d02bc192
Base64: SGkhISEhISEhISEhISEhISEhISEhISEhISEhISEhCg==
==2710321== ERROR: libFuzzer: timeout after 1 seconds
AddressSanitizer:DEADLYSIGNAL
=================================================================
AddressSanitizer:DEADLYSIGNAL
=================================================================
AddressSanitizer: CHECK failed: asan_report.cpp:199 "((current_error_.kind)) == ((kErrorKindInvalid))" (0x1, 0x0) (tid=2710321)
    <empty stack>

MS: 2 CrossOver-InsertRepeatedBytes-; base unit: 0c1bc52c50016933679b0980ccff3680e5831162
0x48,0x69,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0xa,
Hi!!!!!!!!!!!!!!!!!!!!!!!!!!!!\012
artifact_prefix='./'; Test unit written to ./crash-23bc5ddb0cea9da4053ea32b85f3eeb2d02bc192
Base64: SGkhISEhISEhISEhISEhISEhISEhISEhISEhISEhCg==

...

tmsri pushed a commit to tmsri/llvm-project that referenced this pull request Sep 19, 2024
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965

Chapter 14 INTEL® AVX10 ZERO-EXTENDING PARTIAL VECTOR COPY INSTRUCTIONS

---------

Co-authored-by: mattarde <[email protected]>
@mahesh-attarde mahesh-attarde deleted the copy branch September 20, 2024 17:27
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants