Skip to content

Commit fc3b787

Browse files
committed
[X86] Add SHA512 instructions.
For more details about this instruction, please refer to the latest ISE document: https://www.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html Reviewed By: RKSimon, skan Differential Revision: https://reviews.llvm.org/D155146
1 parent c9953d9 commit fc3b787

File tree

27 files changed

+428
-3
lines changed

27 files changed

+428
-3
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -813,6 +813,10 @@ X86 Support
813813

814814
- Add ISA of ``AMX-COMPLEX`` which supports ``tcmmimfp16ps`` and
815815
``tcmmrlfp16ps``.
816+
- Support ISA of ``SHA512``.
817+
* Support intrinsic of ``_mm256_sha512msg1_epi64``.
818+
* Support intrinsic of ``_mm256_sha512msg2_epi64``.
819+
* Support intrinsic of ``_mm256_sha512rnds2_epi64``.
816820

817821
Arm and AArch64 Support
818822
^^^^^^^^^^^^^^^^^^^^^^^

clang/include/clang/Basic/BuiltinsX86.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2132,6 +2132,11 @@ TARGET_BUILTIN(__builtin_ia32_vcvtneoph2ps256, "V8fV16xC*", "nV:256:", "avxnecon
21322132
TARGET_BUILTIN(__builtin_ia32_vcvtneps2bf16128, "V8yV4f", "nV:128:", "avx512bf16,avx512vl|avxneconvert")
21332133
TARGET_BUILTIN(__builtin_ia32_vcvtneps2bf16256, "V8yV8f", "nV:256:", "avx512bf16,avx512vl|avxneconvert")
21342134

2135+
// SHA512
2136+
TARGET_BUILTIN(__builtin_ia32_vsha512msg1, "V4ULLiV4ULLiV2ULLi", "nV:256:", "sha512")
2137+
TARGET_BUILTIN(__builtin_ia32_vsha512msg2, "V4ULLiV4ULLiV4ULLi", "nV:256:", "sha512")
2138+
TARGET_BUILTIN(__builtin_ia32_vsha512rnds2, "V4ULLiV4ULLiV4ULLiV2ULLi", "nV:256:", "sha512")
2139+
21352140
TARGET_HEADER_BUILTIN(_InterlockedAnd64, "WiWiD*Wi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
21362141
TARGET_HEADER_BUILTIN(_InterlockedDecrement64, "WiWiD*", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
21372142
TARGET_HEADER_BUILTIN(_InterlockedExchange64, "WiWiD*Wi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5056,6 +5056,8 @@ def msgx : Flag<["-"], "msgx">, Group<m_x86_Features_Group>;
50565056
def mno_sgx : Flag<["-"], "mno-sgx">, Group<m_x86_Features_Group>;
50575057
def msha : Flag<["-"], "msha">, Group<m_x86_Features_Group>;
50585058
def mno_sha : Flag<["-"], "mno-sha">, Group<m_x86_Features_Group>;
5059+
def msha512 : Flag<["-"], "msha512">, Group<m_x86_Features_Group>;
5060+
def mno_sha512 : Flag<["-"], "mno-sha512">, Group<m_x86_Features_Group>;
50595061
def mtbm : Flag<["-"], "mtbm">, Group<m_x86_Features_Group>;
50605062
def mno_tbm : Flag<["-"], "mno-tbm">, Group<m_x86_Features_Group>;
50615063
def mtsxldtrk : Flag<["-"], "mtsxldtrk">, Group<m_x86_Features_Group>;

clang/lib/Basic/Targets/X86.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
261261
HasAVX512VP2INTERSECT = true;
262262
} else if (Feature == "+sha") {
263263
HasSHA = true;
264+
} else if (Feature == "+sha512") {
265+
HasSHA512 = true;
264266
} else if (Feature == "+shstk") {
265267
HasSHSTK = true;
266268
} else if (Feature == "+movbe") {
@@ -749,6 +751,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
749751
Builder.defineMacro("__AVX512VP2INTERSECT__");
750752
if (HasSHA)
751753
Builder.defineMacro("__SHA__");
754+
if (HasSHA512)
755+
Builder.defineMacro("__SHA512__");
752756

753757
if (HasFXSR)
754758
Builder.defineMacro("__FXSR__");
@@ -999,6 +1003,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
9991003
.Case("serialize", true)
10001004
.Case("sgx", true)
10011005
.Case("sha", true)
1006+
.Case("sha512", true)
10021007
.Case("shstk", true)
10031008
.Case("sse", true)
10041009
.Case("sse2", true)
@@ -1104,6 +1109,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
11041109
.Case("serialize", HasSERIALIZE)
11051110
.Case("sgx", HasSGX)
11061111
.Case("sha", HasSHA)
1112+
.Case("sha512", HasSHA512)
11071113
.Case("shstk", HasSHSTK)
11081114
.Case("sse", SSELevel >= SSE1)
11091115
.Case("sse2", SSELevel >= SSE2)

clang/lib/Basic/Targets/X86.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
112112
bool HasAVX512IFMA = false;
113113
bool HasAVX512VP2INTERSECT = false;
114114
bool HasSHA = false;
115+
bool HasSHA512 = false;
115116
bool HasSHSTK = false;
116117
bool HasSGX = false;
117118
bool HasCX8 = false;

clang/lib/Headers/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ set(x86_files
203203
rtmintrin.h
204204
serializeintrin.h
205205
sgxintrin.h
206+
sha512intrin.h
206207
shaintrin.h
207208
smmintrin.h
208209
tbmintrin.h

clang/lib/Headers/immintrin.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,11 @@
269269
#include <avxneconvertintrin.h>
270270
#endif
271271

272+
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
273+
defined(__SHA512__)
274+
#include <sha512intrin.h>
275+
#endif
276+
272277
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
273278
defined(__RDPID__)
274279
/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).

clang/lib/Headers/sha512intrin.h

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
/*===--------------- sha512intrin.h - SHA512 intrinsics -----------------===
2+
*
3+
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
* See https://llvm.org/LICENSE.txt for license information.
5+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
*
7+
*===-----------------------------------------------------------------------===
8+
*/
9+
10+
#ifndef __IMMINTRIN_H
11+
#error "Never use <sha512intrin.h> directly; include <immintrin.h> instead."
12+
#endif // __IMMINTRIN_H
13+
14+
#ifndef __SHA512INTRIN_H
15+
#define __SHA512INTRIN_H
16+
17+
#define __DEFAULT_FN_ATTRS256 \
18+
__attribute__((__always_inline__, __nodebug__, __target__("sha512"), \
19+
__min_vector_width__(256)))
20+
21+
/// This intrinisc is one of the two SHA512 message scheduling instructions.
22+
/// The intrinsic performs an intermediate calculation for the next four
23+
/// SHA512 message qwords. The calculated results are stored in \a dst.
24+
///
25+
/// \headerfile <immintrin.h>
26+
///
27+
/// \code
28+
/// __m256i _mm256_sha512msg1_epi64(__m256i __A, __m128i __B)
29+
/// \endcode
30+
///
31+
/// This intrinsic corresponds to the \c VSHA512MSG1 instruction.
32+
///
33+
/// \param __A
34+
/// A 256-bit vector of [4 x long long].
35+
/// \param __B
36+
/// A 128-bit vector of [2 x long long].
37+
/// \returns
38+
/// A 256-bit vector of [4 x long long].
39+
///
40+
/// \code{.operation}
41+
/// DEFINE ROR64(qword, n) {
42+
/// count := n % 64
43+
/// dest := (qword >> count) | (qword << (64 - count))
44+
/// RETURN dest
45+
/// }
46+
/// DEFINE SHR64(qword, n) {
47+
/// RETURN qword >> n
48+
/// }
49+
/// DEFINE s0(qword):
50+
/// RETURN ROR64(qword,1) ^ ROR64(qword, 8) ^ SHR64(qword, 7)
51+
/// }
52+
/// W[4] := __B.qword[0]
53+
/// W[3] := __A.qword[3]
54+
/// W[2] := __A.qword[2]
55+
/// W[1] := __A.qword[1]
56+
/// W[0] := __A.qword[0]
57+
/// dst.qword[3] := W[3] + s0(W[4])
58+
/// dst.qword[2] := W[2] + s0(W[3])
59+
/// dst.qword[1] := W[1] + s0(W[2])
60+
/// dst.qword[0] := W[0] + s0(W[1])
61+
/// dst[MAX:256] := 0
62+
/// \endcode
63+
static __inline__ __m256i __DEFAULT_FN_ATTRS256
64+
_mm256_sha512msg1_epi64(__m256i __A, __m128i __B) {
65+
return (__m256i)__builtin_ia32_vsha512msg1((__v4du)__A, (__v2du)__B);
66+
}
67+
68+
/// This intrinisc is one of the two SHA512 message scheduling instructions.
69+
/// The intrinsic performs the final calculation for the next four SHA512
70+
/// message qwords. The calculated results are stored in \a dst.
71+
///
72+
/// \headerfile <immintrin.h>
73+
///
74+
/// \code
75+
/// __m256i _mm256_sha512msg2_epi64(__m256i __A, __m256i __B)
76+
/// \endcode
77+
///
78+
/// This intrinsic corresponds to the \c VSHA512MSG2 instruction.
79+
///
80+
/// \param __A
81+
/// A 256-bit vector of [4 x long long].
82+
/// \param __B
83+
/// A 256-bit vector of [4 x long long].
84+
/// \returns
85+
/// A 256-bit vector of [4 x long long].
86+
///
87+
/// \code{.operation}
88+
/// DEFINE ROR64(qword, n) {
89+
/// count := n % 64
90+
/// dest := (qword >> count) | (qword << (64 - count))
91+
/// RETURN dest
92+
/// }
93+
/// DEFINE SHR64(qword, n) {
94+
/// RETURN qword >> n
95+
/// }
96+
/// DEFINE s1(qword) {
97+
/// RETURN ROR64(qword,19) ^ ROR64(qword, 61) ^ SHR64(qword, 6)
98+
/// }
99+
/// W[14] := __B.qword[2]
100+
/// W[15] := __B.qword[3]
101+
/// W[16] := __A.qword[0] + s1(W[14])
102+
/// W[17] := __A.qword[1] + s1(W[15])
103+
/// W[18] := __A.qword[2] + s1(W[16])
104+
/// W[19] := __A.qword[3] + s1(W[17])
105+
/// dst.qword[3] := W[19]
106+
/// dst.qword[2] := W[18]
107+
/// dst.qword[1] := W[17]
108+
/// dst.qword[0] := W[16]
109+
/// dst[MAX:256] := 0
110+
/// \endcode
111+
static __inline__ __m256i __DEFAULT_FN_ATTRS256
112+
_mm256_sha512msg2_epi64(__m256i __A, __m256i __B) {
113+
return (__m256i)__builtin_ia32_vsha512msg2((__v4du)__A, (__v4du)__B);
114+
}
115+
116+
/// This intrinisc performs two rounds of SHA512 operation using initial SHA512
117+
/// state (C,D,G,H) from \a __A, an initial SHA512 state (A,B,E,F) from
118+
/// \a __A, and a pre-computed sum of the next two round message qwords and
119+
/// the corresponding round constants from \a __C (only the two lower qwords
120+
/// of the third operand). The updated SHA512 state (A,B,E,F) is written to
121+
/// \a __A, and \a __A can be used as the updated state (C,D,G,H) in later
122+
/// rounds.
123+
///
124+
/// \headerfile <immintrin.h>
125+
///
126+
/// \code
127+
/// __m256i _mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C)
128+
/// \endcode
129+
///
130+
/// This intrinsic corresponds to the \c VSHA512RNDS2 instruction.
131+
///
132+
/// \param __A
133+
/// A 256-bit vector of [4 x long long].
134+
/// \param __B
135+
/// A 256-bit vector of [4 x long long].
136+
/// \param __C
137+
/// A 128-bit vector of [2 x long long].
138+
/// \returns
139+
/// A 256-bit vector of [4 x long long].
140+
///
141+
/// \code{.operation}
142+
/// DEFINE ROR64(qword, n) {
143+
/// count := n % 64
144+
/// dest := (qword >> count) | (qword << (64 - count))
145+
/// RETURN dest
146+
/// }
147+
/// DEFINE SHR64(qword, n) {
148+
/// RETURN qword >> n
149+
/// }
150+
/// DEFINE cap_sigma0(qword) {
151+
/// RETURN ROR64(qword,28) ^ ROR64(qword, 34) ^ ROR64(qword, 39)
152+
/// }
153+
/// DEFINE cap_sigma1(qword) {
154+
/// RETURN ROR64(qword,14) ^ ROR64(qword, 18) ^ ROR64(qword, 41)
155+
/// }
156+
/// DEFINE MAJ(a,b,c) {
157+
/// RETURN (a & b) ^ (a & c) ^ (b & c)
158+
/// }
159+
/// DEFINE CH(e,f,g) {
160+
/// RETURN (e & f) ^ (g & ~e)
161+
/// }
162+
/// A[0] := __B.qword[3]
163+
/// B[0] := __B.qword[2]
164+
/// C[0] := __C.qword[3]
165+
/// D[0] := __C.qword[2]
166+
/// E[0] := __B.qword[1]
167+
/// F[0] := __B.qword[0]
168+
/// G[0] := __C.qword[1]
169+
/// H[0] := __C.qword[0]
170+
/// WK[0]:= __A.qword[0]
171+
/// WK[1]:= __A.qword[1]
172+
/// FOR i := 0 to 1:
173+
/// A[i+1] := CH(E[i], F[i], G[i]) +
174+
/// cap_sigma1(E[i]) + WK[i] + H[i] +
175+
/// MAJ(A[i], B[i], C[i]) +
176+
/// cap_sigma0(A[i])
177+
/// B[i+1] := A[i]
178+
/// C[i+1] := B[i]
179+
/// D[i+1] := C[i]
180+
/// E[i+1] := CH(E[i], F[i], G[i]) +
181+
/// cap_sigma1(E[i]) + WK[i] + H[i] + D[i]
182+
/// F[i+1] := E[i]
183+
/// G[i+1] := F[i]
184+
/// H[i+1] := G[i]
185+
/// ENDFOR
186+
/// dst.qword[3] := A[2]
187+
/// dst.qword[2] := B[2]
188+
/// dst.qword[1] := E[2]
189+
/// dst.qword[0] := F[2]
190+
/// dst[MAX:256] := 0
191+
/// \endcode
192+
static __inline__ __m256i __DEFAULT_FN_ATTRS256
193+
_mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C) {
194+
return (__m256i)__builtin_ia32_vsha512rnds2((__v4du)__A, (__v4du)__B,
195+
(__v2du)__C);
196+
}
197+
198+
#undef __DEFAULT_FN_ATTRS256
199+
200+
#endif // __SHA512INTRIN_H
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +sha512 -emit-llvm -o - -Wall -Werror | FileCheck %s
2+
// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown -target-feature +sha512 -emit-llvm -o - -Wall -Werror | FileCheck %s
3+
4+
#include <immintrin.h>
5+
6+
__m256i test_mm256_sha512msg1_epi64(__m256i __A, __m128i __B) {
7+
// CHECK-LABEL: @test_mm256_sha512msg1_epi64(
8+
// CHECK: call <4 x i64> @llvm.x86.vsha512msg1(<4 x i64> %{{.*}}, <2 x i64> %{{.*}})
9+
return _mm256_sha512msg1_epi64(__A, __B);
10+
}
11+
12+
__m256i test_mm256_sha512msg2_epi64(__m256i __A, __m256i __B) {
13+
// CHECK-LABEL: @test_mm256_sha512msg2_epi64(
14+
// CHECK: call <4 x i64> @llvm.x86.vsha512msg2(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
15+
return _mm256_sha512msg2_epi64(__A, __B);
16+
}
17+
18+
__m256i test_mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C) {
19+
// CHECK-LABEL: @test_mm256_sha512rnds2_epi64(
20+
// CHECK: call <4 x i64> @llvm.x86.vsha512rnds2(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i64> %{{.*}})
21+
return _mm256_sha512rnds2_epi64(__A, __B, __C);
22+
}

clang/test/CodeGen/attr-target-x86.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,9 @@ void __attribute__((target("arch=x86-64-v4"))) x86_64_v4(void) {}
5454
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686"
5555
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
5656
// CHECK-NOT: tune-cpu
57-
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
57+
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
5858
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
59-
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
59+
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
6060
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
6161
// CHECK-NOT: tune-cpu
6262
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-3dnow,-3dnowa,-mmx"

clang/test/Driver/x86-target-features.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,11 @@
349349
// AVXNECONVERT: "-target-feature" "+avxneconvert"
350350
// NO-AVXNECONVERT: "-target-feature" "-avxneconvert"
351351

352+
// RUN: %clang --target=i386 -msha512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=SHA512 %s
353+
// RUN: %clang --target=i386 -mno-sha512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-SHA512 %s
354+
// SHA512: "-target-feature" "+sha512"
355+
// NO-SHA512: "-target-feature" "-sha512"
356+
352357
// RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s
353358
// RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s
354359
// CRC32: "-target-feature" "+crc32"

clang/test/Preprocessor/x86_target_features.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,19 @@
660660
// AVXNECONVERTNOAVX2-NOT: #define __AVX2__ 1
661661
// AVXNECONVERTNOAVX2-NOT: #define __AVXNECONVERT__ 1
662662

663+
// RUN: %clang -target i386-unknown-linux-gnu -march=atom -msha512 -x c -E -dM -o - %s | FileCheck -check-prefix=SHA512 %s
664+
665+
// SHA512: #define __AVX__ 1
666+
// SHA512: #define __SHA512__ 1
667+
668+
// RUN: %clang -target i386-unknown-linux-gnu -march=atom -mno-sha512 -x c -E -dM -o - %s | FileCheck -check-prefix=NOSHA512 %s
669+
// NOSHA512-NOT: #define __SHA512__ 1
670+
671+
// RUN: %clang -target i386-unknown-linux-gnu -march=atom -msha512 -mno-avx -x c -E -dM -o - %s | FileCheck -check-prefix=SHA512NOAVX %s
672+
673+
// SHA512NOAVX-NOT: #define __AVX__ 1
674+
// SHA512NOAVX-NOT: #define __SHA512__ 1
675+
663676
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s
664677

665678
// CRC32: #define __CRC32__ 1

llvm/docs/ReleaseNotes.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ Changes to the X86 Backend
279279
* ``__builtin_unpredictable`` (unpredictable metadata in LLVM IR), is handled by X86 Backend.
280280
``X86CmovConversion`` pass now respects this builtin and does not convert CMOVs to branches.
281281
* Add support for the ``PBNDKB`` instruction.
282-
282+
* Support ISA of ``SHA512``.
283283

284284
Changes to the OCaml bindings
285285
-----------------------------

llvm/include/llvm/IR/IntrinsicsX86.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5105,6 +5105,20 @@ let TargetPrefix = "x86" in {
51055105
[IntrNoMem]>;
51065106
}
51075107

5108+
//===----------------------------------------------------------------------===//
5109+
// SHA512 intrinsics
5110+
let TargetPrefix = "x86" in {
5111+
def int_x86_vsha512msg1 : ClangBuiltin<"__builtin_ia32_vsha512msg1">,
5112+
DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty],
5113+
[IntrNoMem]>;
5114+
def int_x86_vsha512msg2 : ClangBuiltin<"__builtin_ia32_vsha512msg2">,
5115+
DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
5116+
[IntrNoMem]>;
5117+
def int_x86_vsha512rnds2 : ClangBuiltin<"__builtin_ia32_vsha512rnds2">,
5118+
DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v2i64_ty],
5119+
[IntrNoMem]>;
5120+
}
5121+
51085122
//===----------------------------------------------------------------------===//
51095123
// Thread synchronization ops with timer.
51105124
let TargetPrefix = "x86" in {

llvm/include/llvm/TargetParser/X86TargetParser.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ X86_FEATURE (AVXNECONVERT, "avxneconvert")
227227
X86_FEATURE (AVXVNNI, "avxvnni")
228228
X86_FEATURE (AVXIFMA, "avxifma")
229229
X86_FEATURE (AVXVNNIINT8, "avxvnniint8")
230+
X86_FEATURE (SHA512, "sha512")
230231
// These features aren't really CPU features, but the frontend can set them.
231232
X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")
232233
X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches")

0 commit comments

Comments
 (0)