-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86] Support SM4 EVEX version intrinsics/instructions. #113402
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-x86 Author: Freddy Ye (FreddyLeaf) ChangesRef.: https://cdrdv2.intel.com/v1/dl/getContent/671368 Patch is 59.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113402.diff 16 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 4c6b22cca421ca..4486eb73a11fa6 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2179,6 +2179,10 @@ TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4")
TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
+// SM4_EVEX
+TARGET_BUILTIN(__builtin_ia32_vsm4key4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
+TARGET_BUILTIN(__builtin_ia32_vsm4rnds4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
+
// AVX10 MINMAX
TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16128, "V8yV8yV8yIi", "nV:128:", "avx10.2-256")
TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16256, "V16yV16yV16yIi", "nV:256:", "avx10.2-256")
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index ff392e7122a448..6a594dad0b67d2 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -241,6 +241,7 @@ set(x86_files
shaintrin.h
sm3intrin.h
sm4intrin.h
+ sm4evexintrin.h
smmintrin.h
tbmintrin.h
tmmintrin.h
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 3fbabffa98df20..1b83dd2162707c 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -665,6 +665,9 @@ _storebe_i64(void * __P, long long __D) {
#include <avx10_2_512niintrin.h>
#include <avx10_2_512satcvtdsintrin.h>
#include <avx10_2_512satcvtintrin.h>
+#if (defined(__SM4__))
+#include <sm4evexintrin.h>
+#endif
#endif
#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
diff --git a/clang/lib/Headers/sm4evexintrin.h b/clang/lib/Headers/sm4evexintrin.h
new file mode 100644
index 00000000000000..f6ae0037baea03
--- /dev/null
+++ b/clang/lib/Headers/sm4evexintrin.h
@@ -0,0 +1,32 @@
+/*===--------------- sm4evexintrin.h - SM4 EVEX intrinsics -----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <sm4evexintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __SM4EVEXINTRIN_H
+#define __SM4EVEXINTRIN_H
+
+#define __DEFAULT_FN_ATTRS512 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("sm4,avx10.2-512"), __min_vector_width__(512)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
+ return (__m512i)__builtin_ia32_vsm4key4512((__v16su)__A, (__v16su)__B);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
+ return (__m512i)__builtin_ia32_vsm4rnds4512((__v16su)__A, (__v16su)__B);
+}
+
+#undef __DEFAULT_FN_ATTRS512
+
+#endif // __SM4EVEXINTRIN_H
diff --git a/clang/test/CodeGen/X86/sm4-evex-builtins.c b/clang/test/CodeGen/X86/sm4-evex-builtins.c
new file mode 100644
index 00000000000000..0e54bd008d4fb0
--- /dev/null
+++ b/clang/test/CodeGen/X86/sm4-evex-builtins.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-- -target-feature +sm4 \
+// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -ffreestanding -triple=i386-- -target-feature +sm4 \
+// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m512i test_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
+ // CHECK-LABEL: @test_mm512_sm4key4_epi32(
+ // CHECK: call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+ return _mm512_sm4key4_epi32(__A, __B);
+}
+
+__m512i test_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
+ // CHECK-LABEL: @test_mm512_sm4rnds4_epi32(
+ // CHECK: call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+ return _mm512_sm4rnds4_epi32(__A, __B);
+}
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index e5853789c78b63..16764210537689 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -196,6 +196,8 @@ Changes to the X86 Backend
* Support ISA of `AVX10.2-256` and `AVX10.2-512`.
+* Support ISA of `SM4(EVEX)`.
+
Changes to the OCaml bindings
-----------------------------
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 5262e3154ff721..7725bda1f4f598 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -6109,6 +6109,12 @@ let TargetPrefix = "x86" in {
DefaultAttrsIntrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
+def int_x86_vsm4key4512 : ClangBuiltin<"__builtin_ia32_vsm4key4512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
+ [IntrNoMem]>;
+def int_x86_vsm4rnds4512 : ClangBuiltin<"__builtin_ia32_vsm4rnds4512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
+ [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// RAO-INT intrinsics
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 625f2e01d47218..640011f5ed28d7 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1647,3 +1647,23 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
(VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
}
+
+// SM4(EVEX)
+multiclass avx10_sm4_base<string OpStr> {
+ // SM4_Base is in X86InstrSSE.td.
+ let Predicates = [HasSM4, HasAVX10_2] in {
+ defm Z128 : SM4_Base<OpStr, avx512vl_i32_info.info128.RC,
+ "128", avx512vl_i32_info.info128.LdFrag,
+ avx512vl_i32_info.info128.MemOp>, EVEX_V128;
+ defm Z256 : SM4_Base<OpStr, avx512vl_i32_info.info256.RC,
+ "256", avx512vl_i32_info.info256.LdFrag,
+ avx512vl_i32_info.info256.MemOp>, EVEX_V256;
+ }
+ let Predicates = [HasSM4, HasAVX10_2_512] in
+ defm Z : SM4_Base<OpStr, avx512vl_i32_info.info512.RC,
+ "512", avx512vl_i32_info.info512.LdFrag,
+ avx512vl_i32_info.info512.MemOp>, EVEX_V512;
+}
+
+defm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV;
+defm VSM4RNDS4 : avx10_sm4_base<"vsm4rnds4">, T8, XD, EVEX, VVVV;
diff --git a/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
new file mode 100644
index 00000000000000..fc46d3cf23fd41
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s
+
+define <4 x i32> @test_int_x86_vsm4key4128(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsm4key4 %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7a,0xda,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
+ ret <4 x i32> %ret
+}
+declare <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
+
+define <8 x i32> @test_int_x86_vsm4key4256(<8 x i32> %A, <8 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsm4key4 %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7e,0xda,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B)
+ ret <8 x i32> %ret
+}
+declare <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B)
+
+define <16 x i32> @test_int_x86_vsm4key4512(<16 x i32> %A, <16 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsm4key4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0xda,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B)
+ ret <16 x i32> %ret
+}
+declare <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B)
+
+define <4 x i32> @test_int_x86_vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsm4rnds4 %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7b,0xda,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
+ ret <4 x i32> %ret
+}
+declare <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
+
+define <8 x i32> @test_int_x86_vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsm4rnds4 %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7f,0xda,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B)
+ ret <8 x i32> %ret
+}
+declare <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B)
+
+define <16 x i32> @test_int_x86_vsm4rnds4512(<16 x i32> %A, <16 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsm4rnds4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7f,0x48,0xda,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B)
+ ret <16 x i32> %ret
+}
+declare <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B)
+
diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
new file mode 100644
index 00000000000000..f89f4b5a8c0fb8
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
@@ -0,0 +1,170 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vsm4key4 %zmm24, %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmm24
+0x62,0x82,0x46,0x40,0xda,0xf0
+
+# ATT: vsm4key4 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsm4key4 291(%r8,%rax,4), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsm4key4 (%rip), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsm4key4 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsm4key4 8128(%rcx), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x46,0x40,0xda,0x71,0x7f
+
+# ATT: vsm4key4 -8192(%rdx), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+0x62,0xe2,0x46,0x40,0xda,0x72,0x80
+
+# ATT: vsm4rnds4 %zmm24, %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmm24
+0x62,0x82,0x47,0x40,0xda,0xf0
+
+# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsm4rnds4 291(%r8,%rax,4), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsm4rnds4 (%rip), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsm4rnds4 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsm4rnds4 8128(%rcx), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x47,0x40,0xda,0x71,0x7f
+
+# ATT: vsm4rnds4 -8192(%rdx), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+0x62,0xe2,0x47,0x40,0xda,0x72,0x80
+
+# ATT: vsm4key4 %ymm24, %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymm24
+0x62,0x82,0x46,0x20,0xda,0xf0
+
+# ATT: vsm4key4 %xmm24, %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmm24
+0x62,0x82,0x46,0x00,0xda,0xf0
+
+# ATT: vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsm4key4 (%rip), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vsm4key4 4064(%rcx), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x46,0x20,0xda,0x71,0x7f
+
+# ATT: vsm4key4 -4096(%rdx), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+0x62,0xe2,0x46,0x20,0xda,0x72,0x80
+
+# ATT: vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsm4key4 (%rip), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsm4key4 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vsm4key4 2032(%rcx), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x46,0x00,0xda,0x71,0x7f
+
+# ATT: vsm4key4 -2048(%rdx), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+0x62,0xe2,0x46,0x00,0xda,0x72,0x80
+
+# ATT: vsm4rnds4 %ymm24, %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymm24
+0x62,0x82,0x47,0x20,0xda,0xf0
+
+# ATT: vsm4rnds4 %xmm24, %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmm24
+0x62,0x82,0x47,0x00,0xda,0xf0
+
+# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsm4rnds4 291(%r8,%rax,4), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsm4rnds4 (%rip), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsm4rnds4 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vsm4rnds4 4064(%rcx), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x47,0x20,0xda,0x71,0x7f
+
+# ATT: vsm4rnds4 -4096(%rdx), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+0x62,0xe2,0x47,0x20,0xda,0x72,0x80
+
+# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsm4rnds4 291(%r8,%rax,4), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsm4rnds4 (%rip), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsm4rnds4 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vsm4rnds4 2032(%rcx), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x47,0x00,0xda,0x71,0x7f
+
+# ATT: vsm4rnds4 -2048(%rdx), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+0x62,0xe2,0x47,0x00,0xda,0x72,0x80
diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
new file mode 100644
index 00000000000000..c1cb271a967b13
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
@@ -0,0 +1,170 @@
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vsm4key4 %zmm4, %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmm4
+0x62,0xf2,0x66,0x48,0xda,0xd4
+
+# ATT: vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsm4key4 (%eax), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+0x62,0xf2,0x66,0x48,0xda,0x10
+
+# ATT: vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsm4key4 8128(%ecx), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x66,0x48,0xda,0x51,0x7f
+
+# ATT: vsm4key4 -8192(%edx), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+0x62,0xf2,0x66,0x48,0xda,0x52,0x80
+
+# ATT: vsm4rnds4 %zmm4, %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmm4
+0x62,0xf2,0x67,0x48,0xda,0xd4
+
+# ATT: vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsm4rnds4 (%eax), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+0x62,0xf2,0x67,0x48,0xda,0x10
+
+# ATT: vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsm4rnds4 8128(%ecx), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x67,0x48,0xda,0x51,0x7f
+
+# ATT: vsm4rnds4 -8192(%edx), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+0x62,0xf2,0x67,0x48,0xda,0x52,0x80
+
+# ATT: vsm4key4 %ymm4, %ymm3, %ymm2
+# INTEL: vsm4key4 ymm2, ymm3, ymm4
+0x62,0xf2,0x66,0x28,0xda,0xd4
+
+# ATT: vsm4key4 %xmm4, %xmm3, %xmm2
+# INTEL: vsm4key4 xmm2, xmm3, xmm4
+0x62,0xf2,0x66,0x08,0xda,0xd4
+
+# ATT: vsm4key4 268435456(%esp...
[truncated]
|
@llvm/pr-subscribers-clang Author: Freddy Ye (FreddyLeaf) ChangesRef.: https://cdrdv2.intel.com/v1/dl/getContent/671368 Patch is 59.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113402.diff 16 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 4c6b22cca421ca..4486eb73a11fa6 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2179,6 +2179,10 @@ TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4")
TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
+// SM4_EVEX
+TARGET_BUILTIN(__builtin_ia32_vsm4key4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
+TARGET_BUILTIN(__builtin_ia32_vsm4rnds4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
+
// AVX10 MINMAX
TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16128, "V8yV8yV8yIi", "nV:128:", "avx10.2-256")
TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16256, "V16yV16yV16yIi", "nV:256:", "avx10.2-256")
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index ff392e7122a448..6a594dad0b67d2 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -241,6 +241,7 @@ set(x86_files
shaintrin.h
sm3intrin.h
sm4intrin.h
+ sm4evexintrin.h
smmintrin.h
tbmintrin.h
tmmintrin.h
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 3fbabffa98df20..1b83dd2162707c 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -665,6 +665,9 @@ _storebe_i64(void * __P, long long __D) {
#include <avx10_2_512niintrin.h>
#include <avx10_2_512satcvtdsintrin.h>
#include <avx10_2_512satcvtintrin.h>
+#if (defined(__SM4__))
+#include <sm4evexintrin.h>
+#endif
#endif
#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
diff --git a/clang/lib/Headers/sm4evexintrin.h b/clang/lib/Headers/sm4evexintrin.h
new file mode 100644
index 00000000000000..f6ae0037baea03
--- /dev/null
+++ b/clang/lib/Headers/sm4evexintrin.h
@@ -0,0 +1,32 @@
+/*===--------------- sm4evexintrin.h - SM4 EVEX intrinsics -----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <sm4evexintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __SM4EVEXINTRIN_H
+#define __SM4EVEXINTRIN_H
+
+#define __DEFAULT_FN_ATTRS512 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("sm4,avx10.2-512"), __min_vector_width__(512)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
+ return (__m512i)__builtin_ia32_vsm4key4512((__v16su)__A, (__v16su)__B);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
+ return (__m512i)__builtin_ia32_vsm4rnds4512((__v16su)__A, (__v16su)__B);
+}
+
+#undef __DEFAULT_FN_ATTRS512
+
+#endif // __SM4EVEXINTRIN_H
diff --git a/clang/test/CodeGen/X86/sm4-evex-builtins.c b/clang/test/CodeGen/X86/sm4-evex-builtins.c
new file mode 100644
index 00000000000000..0e54bd008d4fb0
--- /dev/null
+++ b/clang/test/CodeGen/X86/sm4-evex-builtins.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-- -target-feature +sm4 \
+// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -ffreestanding -triple=i386-- -target-feature +sm4 \
+// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m512i test_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
+ // CHECK-LABEL: @test_mm512_sm4key4_epi32(
+ // CHECK: call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+ return _mm512_sm4key4_epi32(__A, __B);
+}
+
+__m512i test_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
+ // CHECK-LABEL: @test_mm512_sm4rnds4_epi32(
+ // CHECK: call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+ return _mm512_sm4rnds4_epi32(__A, __B);
+}
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index e5853789c78b63..16764210537689 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -196,6 +196,8 @@ Changes to the X86 Backend
* Support ISA of `AVX10.2-256` and `AVX10.2-512`.
+* Support ISA of `SM4(EVEX)`.
+
Changes to the OCaml bindings
-----------------------------
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 5262e3154ff721..7725bda1f4f598 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -6109,6 +6109,12 @@ let TargetPrefix = "x86" in {
DefaultAttrsIntrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
+def int_x86_vsm4key4512 : ClangBuiltin<"__builtin_ia32_vsm4key4512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
+ [IntrNoMem]>;
+def int_x86_vsm4rnds4512 : ClangBuiltin<"__builtin_ia32_vsm4rnds4512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
+ [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// RAO-INT intrinsics
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 625f2e01d47218..640011f5ed28d7 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1647,3 +1647,23 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
(VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
}
+
+// SM4(EVEX)
+multiclass avx10_sm4_base<string OpStr> {
+ // SM4_Base is in X86InstrSSE.td.
+ let Predicates = [HasSM4, HasAVX10_2] in {
+ defm Z128 : SM4_Base<OpStr, avx512vl_i32_info.info128.RC,
+ "128", avx512vl_i32_info.info128.LdFrag,
+ avx512vl_i32_info.info128.MemOp>, EVEX_V128;
+ defm Z256 : SM4_Base<OpStr, avx512vl_i32_info.info256.RC,
+ "256", avx512vl_i32_info.info256.LdFrag,
+ avx512vl_i32_info.info256.MemOp>, EVEX_V256;
+ }
+ let Predicates = [HasSM4, HasAVX10_2_512] in
+ defm Z : SM4_Base<OpStr, avx512vl_i32_info.info512.RC,
+ "512", avx512vl_i32_info.info512.LdFrag,
+ avx512vl_i32_info.info512.MemOp>, EVEX_V512;
+}
+
+defm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV;
+defm VSM4RNDS4 : avx10_sm4_base<"vsm4rnds4">, T8, XD, EVEX, VVVV;
diff --git a/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
new file mode 100644
index 00000000000000..fc46d3cf23fd41
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s
+
+define <4 x i32> @test_int_x86_vsm4key4128(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsm4key4 %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7a,0xda,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
+ ret <4 x i32> %ret
+}
+declare <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
+
+define <8 x i32> @test_int_x86_vsm4key4256(<8 x i32> %A, <8 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsm4key4 %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7e,0xda,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B)
+ ret <8 x i32> %ret
+}
+declare <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B)
+
+define <16 x i32> @test_int_x86_vsm4key4512(<16 x i32> %A, <16 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsm4key4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0xda,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B)
+ ret <16 x i32> %ret
+}
+declare <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B)
+
+define <4 x i32> @test_int_x86_vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsm4rnds4 %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7b,0xda,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
+ ret <4 x i32> %ret
+}
+declare <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
+
+define <8 x i32> @test_int_x86_vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsm4rnds4 %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7f,0xda,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B)
+ ret <8 x i32> %ret
+}
+declare <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B)
+
+define <16 x i32> @test_int_x86_vsm4rnds4512(<16 x i32> %A, <16 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsm4rnds4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7f,0x48,0xda,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B)
+ ret <16 x i32> %ret
+}
+declare <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B)
+
diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
new file mode 100644
index 00000000000000..f89f4b5a8c0fb8
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
@@ -0,0 +1,170 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vsm4key4 %zmm24, %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmm24
+0x62,0x82,0x46,0x40,0xda,0xf0
+
+# ATT: vsm4key4 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsm4key4 291(%r8,%rax,4), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsm4key4 (%rip), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsm4key4 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsm4key4 8128(%rcx), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x46,0x40,0xda,0x71,0x7f
+
+# ATT: vsm4key4 -8192(%rdx), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+0x62,0xe2,0x46,0x40,0xda,0x72,0x80
+
+# ATT: vsm4rnds4 %zmm24, %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmm24
+0x62,0x82,0x47,0x40,0xda,0xf0
+
+# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsm4rnds4 291(%r8,%rax,4), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsm4rnds4 (%rip), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsm4rnds4 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsm4rnds4 8128(%rcx), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x47,0x40,0xda,0x71,0x7f
+
+# ATT: vsm4rnds4 -8192(%rdx), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+0x62,0xe2,0x47,0x40,0xda,0x72,0x80
+
+# ATT: vsm4key4 %ymm24, %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymm24
+0x62,0x82,0x46,0x20,0xda,0xf0
+
+# ATT: vsm4key4 %xmm24, %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmm24
+0x62,0x82,0x46,0x00,0xda,0xf0
+
+# ATT: vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsm4key4 (%rip), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vsm4key4 4064(%rcx), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x46,0x20,0xda,0x71,0x7f
+
+# ATT: vsm4key4 -4096(%rdx), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+0x62,0xe2,0x46,0x20,0xda,0x72,0x80
+
+# ATT: vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsm4key4 (%rip), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsm4key4 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vsm4key4 2032(%rcx), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x46,0x00,0xda,0x71,0x7f
+
+# ATT: vsm4key4 -2048(%rdx), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+0x62,0xe2,0x46,0x00,0xda,0x72,0x80
+
+# ATT: vsm4rnds4 %ymm24, %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymm24
+0x62,0x82,0x47,0x20,0xda,0xf0
+
+# ATT: vsm4rnds4 %xmm24, %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmm24
+0x62,0x82,0x47,0x00,0xda,0xf0
+
+# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsm4rnds4 291(%r8,%rax,4), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsm4rnds4 (%rip), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsm4rnds4 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vsm4rnds4 4064(%rcx), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x47,0x20,0xda,0x71,0x7f
+
+# ATT: vsm4rnds4 -4096(%rdx), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+0x62,0xe2,0x47,0x20,0xda,0x72,0x80
+
+# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsm4rnds4 291(%r8,%rax,4), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsm4rnds4 (%rip), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsm4rnds4 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vsm4rnds4 2032(%rcx), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x47,0x00,0xda,0x71,0x7f
+
+# ATT: vsm4rnds4 -2048(%rdx), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+0x62,0xe2,0x47,0x00,0xda,0x72,0x80
diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
new file mode 100644
index 00000000000000..c1cb271a967b13
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
@@ -0,0 +1,170 @@
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vsm4key4 %zmm4, %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmm4
+0x62,0xf2,0x66,0x48,0xda,0xd4
+
+# ATT: vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsm4key4 (%eax), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+0x62,0xf2,0x66,0x48,0xda,0x10
+
+# ATT: vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsm4key4 8128(%ecx), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x66,0x48,0xda,0x51,0x7f
+
+# ATT: vsm4key4 -8192(%edx), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+0x62,0xf2,0x66,0x48,0xda,0x52,0x80
+
+# ATT: vsm4rnds4 %zmm4, %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmm4
+0x62,0xf2,0x67,0x48,0xda,0xd4
+
+# ATT: vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsm4rnds4 (%eax), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+0x62,0xf2,0x67,0x48,0xda,0x10
+
+# ATT: vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsm4rnds4 8128(%ecx), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x67,0x48,0xda,0x51,0x7f
+
+# ATT: vsm4rnds4 -8192(%edx), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+0x62,0xf2,0x67,0x48,0xda,0x52,0x80
+
+# ATT: vsm4key4 %ymm4, %ymm3, %ymm2
+# INTEL: vsm4key4 ymm2, ymm3, ymm4
+0x62,0xf2,0x66,0x28,0xda,0xd4
+
+# ATT: vsm4key4 %xmm4, %xmm3, %xmm2
+# INTEL: vsm4key4 xmm2, xmm3, xmm4
+0x62,0xf2,0x66,0x08,0xda,0xd4
+
+# ATT: vsm4key4 268435456(%esp...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/146/builds/1473 Here is the relevant piece of the build log for the reference
|
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/671368