Skip to content

Commit 5aa1275

Browse files
authored
[X86] Support SM4 EVEX version intrinsics/instructions. (#113402)
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/671368
1 parent 39ac64c commit 5aa1275

File tree

17 files changed

+1232
-0
lines changed

17 files changed

+1232
-0
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,10 @@ X86 Support
628628
* Supported MINMAX intrinsics of ``*_(mask(z)))_minmax(ne)_p[s|d|h|bh]`` and
629629
``*_(mask(z)))_minmax_s[s|d|h]``.
630630

631+
- Supported intrinsics for ``SM4 and AVX10.2``.
632+
* Supported SM4 intrinsics of ``_mm512_sm4key4_epi32`` and
633+
``_mm512_sm4rnds4_epi32``.
634+
631635
- All intrinsics in adcintrin.h can now be used in constant expressions.
632636

633637
- All intrinsics in adxintrin.h can now be used in constant expressions.

clang/include/clang/Basic/BuiltinsX86.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2179,6 +2179,10 @@ TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
21792179
TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4")
21802180
TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
21812181

2182+
// SM4_EVEX
2183+
TARGET_BUILTIN(__builtin_ia32_vsm4key4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
2184+
TARGET_BUILTIN(__builtin_ia32_vsm4rnds4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
2185+
21822186
// AVX10 MINMAX
21832187
TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16128, "V8yV8yV8yIi", "nV:128:", "avx10.2-256")
21842188
TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16256, "V16yV16yV16yIi", "nV:256:", "avx10.2-256")

clang/lib/Headers/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ set(x86_files
243243
shaintrin.h
244244
sm3intrin.h
245245
sm4intrin.h
246+
sm4evexintrin.h
246247
smmintrin.h
247248
tbmintrin.h
248249
tmmintrin.h

clang/lib/Headers/immintrin.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,11 @@ _storebe_i64(void * __P, long long __D) {
677677
#include <avx10_2_512satcvtintrin.h>
678678
#endif
679679

680+
#if !defined(__SCE__) || __has_feature(modules) || \
681+
(defined(__AVX10_2_512__) && defined(__SM4__))
682+
#include <sm4evexintrin.h>
683+
#endif
684+
680685
#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
681686
#include <enqcmdintrin.h>
682687
#endif

clang/lib/Headers/sm4evexintrin.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*===--------------- sm4evexintrin.h - SM4 EVEX intrinsics -----------------===
2+
*
3+
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
* See https://llvm.org/LICENSE.txt for license information.
5+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
*
7+
*===----------------------------------------------------------------------===
8+
*/
9+
#ifndef __IMMINTRIN_H
10+
#error "Never use <sm4evexintrin.h> directly; include <immintrin.h> instead."
11+
#endif // __IMMINTRIN_H
12+
13+
#ifndef __SM4EVEXINTRIN_H
14+
#define __SM4EVEXINTRIN_H
15+
16+
#define __DEFAULT_FN_ATTRS512 \
17+
__attribute__((__always_inline__, __nodebug__, \
18+
__target__("sm4,avx10.2-512"), __min_vector_width__(512)))
19+
20+
static __inline__ __m512i __DEFAULT_FN_ATTRS512
21+
_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
22+
return (__m512i)__builtin_ia32_vsm4key4512((__v16su)__A, (__v16su)__B);
23+
}
24+
25+
static __inline__ __m512i __DEFAULT_FN_ATTRS512
26+
_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
27+
return (__m512i)__builtin_ia32_vsm4rnds4512((__v16su)__A, (__v16su)__B);
28+
}
29+
30+
#undef __DEFAULT_FN_ATTRS512
31+
32+
#endif // __SM4EVEXINTRIN_H
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-- -target-feature +sm4 \
2+
// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
3+
// RUN: %clang_cc1 %s -ffreestanding -triple=i386-- -target-feature +sm4 \
4+
// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
5+
6+
#include <immintrin.h>
7+
#include <stddef.h>
8+
9+
__m512i test_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
10+
// CHECK-LABEL: @test_mm512_sm4key4_epi32(
11+
// CHECK: call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
12+
return _mm512_sm4key4_epi32(__A, __B);
13+
}
14+
15+
__m512i test_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
16+
// CHECK-LABEL: @test_mm512_sm4rnds4_epi32(
17+
// CHECK: call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
18+
return _mm512_sm4rnds4_epi32(__A, __B);
19+
}

llvm/docs/ReleaseNotes.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,8 @@ Changes to the X86 Backend
219219

220220
* Supported instructions of `MOVRS AND AVX10.2`
221221

222+
* Supported ISA of `SM4(EVEX)`.
223+
222224
Changes to the OCaml bindings
223225
-----------------------------
224226

llvm/include/llvm/IR/IntrinsicsX86.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6099,6 +6099,11 @@ let TargetPrefix = "x86" in {
60996099
DefaultAttrsIntrinsic<[llvm_v8i32_ty],
61006100
[llvm_v8i32_ty, llvm_v8i32_ty],
61016101
[IntrNoMem]>;
6102+
def int_x86_vsm4key4512
6103+
: ClangBuiltin<"__builtin_ia32_vsm4key4512">,
6104+
DefaultAttrsIntrinsic<[llvm_v16i32_ty],
6105+
[llvm_v16i32_ty, llvm_v16i32_ty],
6106+
[IntrNoMem]>;
61026107
def int_x86_vsm4rnds4128
61036108
: ClangBuiltin<"__builtin_ia32_vsm4rnds4128">,
61046109
DefaultAttrsIntrinsic<[llvm_v4i32_ty],
@@ -6109,6 +6114,11 @@ let TargetPrefix = "x86" in {
61096114
DefaultAttrsIntrinsic<[llvm_v8i32_ty],
61106115
[llvm_v8i32_ty, llvm_v8i32_ty],
61116116
[IntrNoMem]>;
6117+
def int_x86_vsm4rnds4512
6118+
: ClangBuiltin<"__builtin_ia32_vsm4rnds4512">,
6119+
DefaultAttrsIntrinsic<[llvm_v16i32_ty],
6120+
[llvm_v16i32_ty, llvm_v16i32_ty],
6121+
[IntrNoMem]>;
61126122
}
61136123
//===----------------------------------------------------------------------===//
61146124
// RAO-INT intrinsics

llvm/lib/Target/X86/X86InstrAVX10.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1675,3 +1675,17 @@ defm VMOVRSD : vmovrs_p_vl<0x6f, "vmovrsd", avx512vl_i32_info>,
16751675
T_MAP5, XS, EVEX_CD8<32, CD8VF>, Sched<[WriteVecLoad]>;
16761676
defm VMOVRSQ : vmovrs_p_vl<0x6f, "vmovrsq", avx512vl_i64_info>,
16771677
T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VF>, Sched<[WriteVecLoad]>;
1678+
1679+
// SM4(EVEX)
1680+
multiclass avx10_sm4_base<string OpStr> {
1681+
// SM4_Base is in X86InstrSSE.td.
1682+
let Predicates = [HasSM4, HasAVX10_2], AddedComplexity = 1 in {
1683+
defm Z128 : SM4_Base<OpStr, VR128X, "128", loadv4i32, i128mem>, EVEX_V128;
1684+
defm Z256 : SM4_Base<OpStr, VR256X, "256", loadv8i32, i256mem>, EVEX_V256;
1685+
}
1686+
let Predicates = [HasSM4, HasAVX10_2_512] in
1687+
defm Z : SM4_Base<OpStr, VR512, "512", loadv16i32, i512mem>, EVEX_V512;
1688+
}
1689+
1690+
defm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV;
1691+
defm VSM4RNDS4 : avx10_sm4_base<"vsm4rnds4">, T8, XD, EVEX, VVVV;
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s
3+
; RUN: llc < %s -verify-machineinstrs -mtriple=i686-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s
4+
5+
define <4 x i32> @test_int_x86_vsm4key4128(<4 x i32> %A, <4 x i32> %B) {
6+
; CHECK-LABEL: test_int_x86_vsm4key4128:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: vsm4key4 %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0xda,0xc1]
9+
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
10+
%ret = call <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
11+
ret <4 x i32> %ret
12+
}
13+
declare <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
14+
15+
define <8 x i32> @test_int_x86_vsm4key4256(<8 x i32> %A, <8 x i32> %B) {
16+
; CHECK-LABEL: test_int_x86_vsm4key4256:
17+
; CHECK: # %bb.0:
18+
; CHECK-NEXT: vsm4key4 %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7e,0xda,0xc1]
19+
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
20+
%ret = call <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B)
21+
ret <8 x i32> %ret
22+
}
23+
declare <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B)
24+
25+
define <16 x i32> @test_int_x86_vsm4key4512(<16 x i32> %A, <16 x i32> %B) {
26+
; CHECK-LABEL: test_int_x86_vsm4key4512:
27+
; CHECK: # %bb.0:
28+
; CHECK-NEXT: vsm4key4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0xda,0xc1]
29+
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
30+
%ret = call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B)
31+
ret <16 x i32> %ret
32+
}
33+
declare <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B)
34+
35+
define <4 x i32> @test_int_x86_vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) {
36+
; CHECK-LABEL: test_int_x86_vsm4rnds4128:
37+
; CHECK: # %bb.0:
38+
; CHECK-NEXT: vsm4rnds4 %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0xda,0xc1]
39+
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
40+
%ret = call <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
41+
ret <4 x i32> %ret
42+
}
43+
declare <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
44+
45+
define <8 x i32> @test_int_x86_vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) {
46+
; CHECK-LABEL: test_int_x86_vsm4rnds4256:
47+
; CHECK: # %bb.0:
48+
; CHECK-NEXT: vsm4rnds4 %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7f,0xda,0xc1]
49+
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
50+
%ret = call <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B)
51+
ret <8 x i32> %ret
52+
}
53+
declare <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B)
54+
55+
define <16 x i32> @test_int_x86_vsm4rnds4512(<16 x i32> %A, <16 x i32> %B) {
56+
; CHECK-LABEL: test_int_x86_vsm4rnds4512:
57+
; CHECK: # %bb.0:
58+
; CHECK-NEXT: vsm4rnds4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7f,0x48,0xda,0xc1]
59+
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
60+
%ret = call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B)
61+
ret <16 x i32> %ret
62+
}
63+
declare <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B)
64+
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s --check-prefixes=ATT
2+
# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
3+
4+
# ATT: vsm4key4 %zmm4, %zmm3, %zmm2
5+
# INTEL: vsm4key4 zmm2, zmm3, zmm4
6+
0x62,0xf2,0x66,0x48,0xda,0xd4
7+
8+
# ATT: vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2
9+
# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
10+
0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
11+
12+
# ATT: vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2
13+
# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
14+
0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
15+
16+
# ATT: vsm4key4 (%eax), %zmm3, %zmm2
17+
# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [eax]
18+
0x62,0xf2,0x66,0x48,0xda,0x10
19+
20+
# ATT: vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2
21+
# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
22+
0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
23+
24+
# ATT: vsm4key4 8128(%ecx), %zmm3, %zmm2
25+
# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
26+
0x62,0xf2,0x66,0x48,0xda,0x51,0x7f
27+
28+
# ATT: vsm4key4 -8192(%edx), %zmm3, %zmm2
29+
# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
30+
0x62,0xf2,0x66,0x48,0xda,0x52,0x80
31+
32+
# ATT: vsm4rnds4 %zmm4, %zmm3, %zmm2
33+
# INTEL: vsm4rnds4 zmm2, zmm3, zmm4
34+
0x62,0xf2,0x67,0x48,0xda,0xd4
35+
36+
# ATT: vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2
37+
# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
38+
0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
39+
40+
# ATT: vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2
41+
# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
42+
0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
43+
44+
# ATT: vsm4rnds4 (%eax), %zmm3, %zmm2
45+
# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
46+
0x62,0xf2,0x67,0x48,0xda,0x10
47+
48+
# ATT: vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2
49+
# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
50+
0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
51+
52+
# ATT: vsm4rnds4 8128(%ecx), %zmm3, %zmm2
53+
# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
54+
0x62,0xf2,0x67,0x48,0xda,0x51,0x7f
55+
56+
# ATT: vsm4rnds4 -8192(%edx), %zmm3, %zmm2
57+
# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
58+
0x62,0xf2,0x67,0x48,0xda,0x52,0x80
59+
60+
# ATT: vsm4key4 %ymm4, %ymm3, %ymm2
61+
# INTEL: vsm4key4 ymm2, ymm3, ymm4
62+
0x62,0xf2,0x66,0x28,0xda,0xd4
63+
64+
# ATT: vsm4key4 %xmm4, %xmm3, %xmm2
65+
# INTEL: vsm4key4 xmm2, xmm3, xmm4
66+
0x62,0xf2,0x66,0x08,0xda,0xd4
67+
68+
# ATT: vsm4key4 268435456(%esp,%esi,8), %ymm3, %ymm2
69+
# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
70+
0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
71+
72+
# ATT: vsm4key4 291(%edi,%eax,4), %ymm3, %ymm2
73+
# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
74+
0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00
75+
76+
# ATT: vsm4key4 (%eax), %ymm3, %ymm2
77+
# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [eax]
78+
0x62,0xf2,0x66,0x28,0xda,0x10
79+
80+
# ATT: vsm4key4 -1024(,%ebp,2), %ymm3, %ymm2
81+
# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
82+
0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff
83+
84+
# ATT: vsm4key4 4064(%ecx), %ymm3, %ymm2
85+
# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
86+
0x62,0xf2,0x66,0x28,0xda,0x51,0x7f
87+
88+
# ATT: vsm4key4 -4096(%edx), %ymm3, %ymm2
89+
# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
90+
0x62,0xf2,0x66,0x28,0xda,0x52,0x80
91+
92+
# ATT: vsm4key4 268435456(%esp,%esi,8), %xmm3, %xmm2
93+
# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
94+
0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
95+
96+
# ATT: vsm4key4 291(%edi,%eax,4), %xmm3, %xmm2
97+
# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
98+
0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00
99+
100+
# ATT: vsm4key4 (%eax), %xmm3, %xmm2
101+
# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [eax]
102+
0x62,0xf2,0x66,0x08,0xda,0x10
103+
104+
# ATT: vsm4key4 -512(,%ebp,2), %xmm3, %xmm2
105+
# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
106+
0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff
107+
108+
# ATT: vsm4key4 2032(%ecx), %xmm3, %xmm2
109+
# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
110+
0x62,0xf2,0x66,0x08,0xda,0x51,0x7f
111+
112+
# ATT: vsm4key4 -2048(%edx), %xmm3, %xmm2
113+
# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
114+
0x62,0xf2,0x66,0x08,0xda,0x52,0x80
115+
116+
# ATT: vsm4rnds4 %ymm4, %ymm3, %ymm2
117+
# INTEL: vsm4rnds4 ymm2, ymm3, ymm4
118+
0x62,0xf2,0x67,0x28,0xda,0xd4
119+
120+
# ATT: vsm4rnds4 %xmm4, %xmm3, %xmm2
121+
# INTEL: vsm4rnds4 xmm2, xmm3, xmm4
122+
0x62,0xf2,0x67,0x08,0xda,0xd4
123+
124+
# ATT: vsm4rnds4 268435456(%esp,%esi,8), %ymm3, %ymm2
125+
# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
126+
0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
127+
128+
# ATT: vsm4rnds4 291(%edi,%eax,4), %ymm3, %ymm2
129+
# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
130+
0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00
131+
132+
# ATT: vsm4rnds4 (%eax), %ymm3, %ymm2
133+
# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
134+
0x62,0xf2,0x67,0x28,0xda,0x10
135+
136+
# ATT: vsm4rnds4 -1024(,%ebp,2), %ymm3, %ymm2
137+
# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
138+
0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff
139+
140+
# ATT: vsm4rnds4 4064(%ecx), %ymm3, %ymm2
141+
# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
142+
0x62,0xf2,0x67,0x28,0xda,0x51,0x7f
143+
144+
# ATT: vsm4rnds4 -4096(%edx), %ymm3, %ymm2
145+
# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
146+
0x62,0xf2,0x67,0x28,0xda,0x52,0x80
147+
148+
# ATT: vsm4rnds4 268435456(%esp,%esi,8), %xmm3, %xmm2
149+
# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
150+
0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
151+
152+
# ATT: vsm4rnds4 291(%edi,%eax,4), %xmm3, %xmm2
153+
# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
154+
0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00
155+
156+
# ATT: vsm4rnds4 (%eax), %xmm3, %xmm2
157+
# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
158+
0x62,0xf2,0x67,0x08,0xda,0x10
159+
160+
# ATT: vsm4rnds4 -512(,%ebp,2), %xmm3, %xmm2
161+
# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
162+
0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff
163+
164+
# ATT: vsm4rnds4 2032(%ecx), %xmm3, %xmm2
165+
# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
166+
0x62,0xf2,0x67,0x08,0xda,0x51,0x7f
167+
168+
# ATT: vsm4rnds4 -2048(%edx), %xmm3, %xmm2
169+
# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
170+
0x62,0xf2,0x67,0x08,0xda,0x52,0x80

0 commit comments

Comments
 (0)