-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Bulk port 64-bit x86 builtins to TableGen #121043
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang Author: Chandler Carruth (chandlerc) ChangesThis PR follows #120831 (the PR contains both, only review the last Similar to that PR, this does a very mechanical port of X86 builtins to The current structure produces a file that exactly matches the original
Otherwise, only the order of builtins change, and no tests regress. Patch is 532.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121043.diff 9 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsBase.td b/clang/include/clang/Basic/BuiltinsBase.td
index cff182f3f282cb..afed3c815d3290 100644
--- a/clang/include/clang/Basic/BuiltinsBase.td
+++ b/clang/include/clang/Basic/BuiltinsBase.td
@@ -95,9 +95,6 @@ class CustomEntry {
}
class AtomicBuiltin : Builtin;
-class TargetBuiltin : Builtin {
- string Features = "";
-}
class LibBuiltin<string header, string languages = "ALL_LANGUAGES"> : Builtin {
string Header = header;
@@ -122,6 +119,14 @@ class OCL_DSELangBuiltin : LangBuiltin<"OCL_DSE">;
class OCL_GASLangBuiltin : LangBuiltin<"OCL_GAS">;
class OCLLangBuiltin : LangBuiltin<"ALL_OCL_LANGUAGES">;
+class TargetBuiltin : Builtin {
+ string Features = "";
+}
+class TargetLibBuiltin : TargetBuiltin {
+ string Header;
+ string Languages = "ALL_LANGUAGES";
+}
+
class Template<list<string> substitutions,
list<string> affixes,
bit as_prefix = 0> {
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
deleted file mode 100644
index 352b3a9ec594a7..00000000000000
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ /dev/null
@@ -1,2225 +0,0 @@
-//===--- BuiltinsX86.def - X86 Builtin function database --------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the X86-specific builtin function database. Users of
-// this file must define the BUILTIN macro to make use of this information.
-//
-//===----------------------------------------------------------------------===//
-
-// The format of this database matches clang/Basic/Builtins.def.
-
-// FIXME: Ideally we would be able to pull this information from what
-// LLVM already knows about X86 builtins. We need to match the LLVM
-// definition anyway, since code generation will lower to the
-// intrinsic if one exists.
-
-#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
-# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
-#endif
-
-#if defined(BUILTIN) && !defined(TARGET_HEADER_BUILTIN)
-# define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANG, FEATURE) BUILTIN(ID, TYPE, ATTRS)
-#endif
-
-// MMX
-//
-// All MMX instructions will be generated via builtins. Any MMX vector
-// types (<1 x i64>, <2 x i32>, etc.) that aren't used by these builtins will be
-// expanded by the back-end.
-// FIXME: _mm_prefetch must be a built-in because it takes a compile-time constant
-// argument and our prior approach of using a #define to the current built-in
-// doesn't work in the presence of re-declaration of _mm_prefetch for windows.
-TARGET_BUILTIN(_mm_prefetch, "vcC*i", "nc", "mmx")
-
-// SSE intrinsics.
-
-TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "n", "sse")
-TARGET_HEADER_BUILTIN(_mm_setcsr, "vUi", "nh",XMMINTRIN_H, ALL_LANGUAGES, "sse")
-TARGET_BUILTIN(__builtin_ia32_stmxcsr, "Ui", "n", "sse")
-TARGET_HEADER_BUILTIN(_mm_getcsr, "Ui", "nh", XMMINTRIN_H, ALL_LANGUAGES, "sse")
-TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_cvttss2si, "iV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "nV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_sfence, "v", "n", "sse")
-TARGET_HEADER_BUILTIN(_mm_sfence, "v", "nh", XMMINTRIN_H, ALL_LANGUAGES, "sse")
-TARGET_BUILTIN(__builtin_ia32_rcpps, "V4fV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_rcpss, "V4fV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_rsqrtps, "V4fV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_rsqrtss, "V4fV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_sqrtps, "V4fV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_sqrtss, "V4fV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_shufps, "V4fV4fV4fIi", "ncV:128:", "sse")
-
-TARGET_BUILTIN(__builtin_ia32_maskmovdqu, "vV16cV16cc*", "nV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_movmskpd, "iV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_movnti, "vi*i", "n", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pshufd, "V4iV4iIi", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pshuflw, "V8sV8sIi", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pshufhw, "V8sV8sIi", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2OiV16cV16c", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_shufpd, "V2dV2dV2dIi", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2OiV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvttsd2si, "iV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtsd2ss, "V4fV4fV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "n", "sse2")
-TARGET_HEADER_BUILTIN(_mm_clflush, "vvC*", "nh", EMMINTRIN_H, ALL_LANGUAGES, "sse2")
-TARGET_BUILTIN(__builtin_ia32_lfence, "v", "n", "sse2")
-TARGET_HEADER_BUILTIN(_mm_lfence, "v", "nh", EMMINTRIN_H, ALL_LANGUAGES, "sse2")
-TARGET_BUILTIN(__builtin_ia32_mfence, "v", "n", "sse2")
-TARGET_HEADER_BUILTIN(_mm_mfence, "v", "nh", EMMINTRIN_H, ALL_LANGUAGES, "sse2")
-TARGET_BUILTIN(__builtin_ia32_pause, "v", "n", "")
-TARGET_HEADER_BUILTIN(_mm_pause, "v", "nh", EMMINTRIN_H, ALL_LANGUAGES, "")
-TARGET_BUILTIN(__builtin_ia32_pmuludq128, "V2OiV4iV4i", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psraw128, "V8sV8sV8s", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrad128, "V4iV4iV4i", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrlw128, "V8sV8sV8s", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrld128, "V4iV4iV4i", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrlq128, "V2OiV2OiV2Oi", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psllw128, "V8sV8sV8s", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pslld128, "V4iV4iV4i", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psllq128, "V2OiV2OiV2Oi", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psllwi128, "V8sV8si", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pslldi128, "V4iV4ii", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psllqi128, "V2OiV2Oii", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrlwi128, "V8sV8si", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrldi128, "V4iV4ii", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrlqi128, "V2OiV2Oii", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrawi128, "V8sV8si", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psradi128, "V4iV4ii", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pmaddwd128, "V4iV8sV8s", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pslldqi128_byteshift, "V2OiV2OiIi", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrldqi128_byteshift, "V2OiV2OiIi", "ncV:128:", "sse2")
-
-TARGET_BUILTIN(__builtin_ia32_monitor, "vvC*UiUi", "n", "sse3")
-TARGET_BUILTIN(__builtin_ia32_mwait, "vUiUi", "n", "sse3")
-TARGET_BUILTIN(__builtin_ia32_lddqu, "V16ccC*", "nV:128:", "sse3")
-
-TARGET_BUILTIN(__builtin_ia32_palignr128, "V16cV16cV16cIi", "ncV:128:", "ssse3")
-
-TARGET_BUILTIN(__builtin_ia32_insertps128, "V4fV4fV4fIc", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pblendvb128, "V16cV16cV16cV16c", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pblendw128, "V8sV8sV8sIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_blendpd, "V2dV2dV2dIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_blendps, "V4fV4fV4fIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_packusdw128, "V8sV4iV4i", "ncV:128:", "sse4.1")
-
-TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2OiV4iV4i", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_roundss, "V4fV4fV4fIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_roundsd, "V2dV2dV2dIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_roundpd, "V2dV2dIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_dpps, "V4fV4fV4fIc", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_dppd, "V2dV2dV2dIc", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_ptestz128, "iV2OiV2Oi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_ptestc128, "iV2OiV2Oi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_ptestnzc128, "iV2OiV2Oi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_mpsadbw128, "V16cV16cV16cIc", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_phminposuw128, "V8sV8s", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v16qi, "cV16cIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v16qi, "V16cV16ccIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v4si, "V4iV4iiIi", "ncV:128:", "sse4.1")
-
-// SSE 4.2
-TARGET_BUILTIN(__builtin_ia32_pcmpistrm128, "V16cV16cV16cIc", "ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpistri128, "iV16cV16cIc", "ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestrm128, "V16cV16ciV16ciIc", "ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestri128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
-
-TARGET_BUILTIN(__builtin_ia32_pcmpistria128, "iV16cV16cIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpistric128, "iV16cV16cIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpistrio128, "iV16cV16cIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpistris128, "iV16cV16cIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpistriz128, "iV16cV16cIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestria128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestric128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestrio128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestris128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestriz128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
-
-TARGET_BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "nc", "crc32")
-TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "nc", "crc32")
-TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "nc", "crc32")
-
-// SSE4a
-TARGET_BUILTIN(__builtin_ia32_extrqi, "V2OiV2OiIcIc", "ncV:128:", "sse4a")
-TARGET_BUILTIN(__builtin_ia32_extrq, "V2OiV2OiV16c", "ncV:128:", "sse4a")
-TARGET_BUILTIN(__builtin_ia32_insertqi, "V2OiV2OiV2OiIcIc", "ncV:128:", "sse4a")
-TARGET_BUILTIN(__builtin_ia32_insertq, "V2OiV2OiV2Oi", "ncV:128:", "sse4a")
-TARGET_BUILTIN(__builtin_ia32_movntsd, "vd*V2d", "nV:128:", "sse4a")
-TARGET_BUILTIN(__builtin_ia32_movntss, "vf*V4f", "nV:128:", "sse4a")
-
-// AES
-TARGET_BUILTIN(__builtin_ia32_aesenc128, "V2OiV2OiV2Oi", "ncV:128:", "aes")
-TARGET_BUILTIN(__builtin_ia32_aesenclast128, "V2OiV2OiV2Oi", "ncV:128:", "aes")
-TARGET_BUILTIN(__builtin_ia32_aesdec128, "V2OiV2OiV2Oi", "ncV:128:", "aes")
-TARGET_BUILTIN(__builtin_ia32_aesdeclast128, "V2OiV2OiV2Oi", "ncV:128:", "aes")
-TARGET_BUILTIN(__builtin_ia32_aesimc128, "V2OiV2Oi", "ncV:128:", "aes")
-TARGET_BUILTIN(__builtin_ia32_aeskeygenassist128, "V2OiV2OiIc", "ncV:128:", "aes")
-
-// VAES
-TARGET_BUILTIN(__builtin_ia32_aesenc256, "V4OiV4OiV4Oi", "ncV:256:", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesenc512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512,vaes")
-TARGET_BUILTIN(__builtin_ia32_aesenclast256, "V4OiV4OiV4Oi", "ncV:256:", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesenclast512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512,vaes")
-TARGET_BUILTIN(__builtin_ia32_aesdec256, "V4OiV4OiV4Oi", "ncV:256:", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesdec512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512,vaes")
-TARGET_BUILTIN(__builtin_ia32_aesdeclast256, "V4OiV4OiV4Oi", "ncV:256:", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512,vaes")
-
-// GFNI
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v16qi, "V16cV16cV16cIc", "ncV:128:", "gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v32qi, "V32cV32cV32cIc", "ncV:256:", "avx,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v64qi, "V64cV64cV64cIc", "ncV:512:", "avx512f,evex512,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v16qi, "V16cV16cV16cIc", "ncV:128:", "gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v32qi, "V32cV32cV32cIc", "ncV:256:", "avx,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v64qi, "V64cV64cV64cIc", "ncV:512:", "avx512f,evex512,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v16qi, "V16cV16cV16c", "ncV:128:", "gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v32qi, "V32cV32cV32c", "ncV:256:", "avx,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "ncV:512:", "avx512f,evex512,gfni")
-
-// CLMUL
-TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2OiV2OiV2OiIc", "ncV:128:", "pclmul")
-
-// VPCLMULQDQ
-TARGET_BUILTIN(__builtin_ia32_pclmulqdq256, "V4OiV4OiV4OiIc", "ncV:256:", "vpclmulqdq")
-TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8OiV8OiV8OiIc", "ncV:512:", "avx512f,evex512,vpclmulqdq")
-
-// AVX
-TARGET_BUILTIN(__builtin_ia32_vpermilvarpd, "V2dV2dV2Oi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarps, "V4fV4fV4i", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarpd256, "V4dV4dV4Oi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarps256, "V8fV8fV8i", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_blendpd256, "V4dV4dV4dIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_blendps256, "V8fV8fV8fIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilpd, "V2dV2dIi", "ncV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilps, "V4fV4fIi", "ncV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilpd256, "V4dV4dIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilps256, "V8fV8fIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vinsertf128_pd256, "V4dV4dV2dIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vinsertf128_ps256, "V8fV8fV4fIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vinsertf128_si256, "V8iV8iV4iIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_rsqrtps256, "V8fV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_rcpps256, "V8fV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_roundpd256, "V4dV4dIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_roundps256, "V8fV8fIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestzpd, "iV2dV2d", "ncV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestcpd, "iV2dV2d", "ncV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestnzcpd, "iV2dV2d", "ncV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestzps, "iV4fV4f", "ncV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestcps, "iV4fV4f", "ncV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestnzcps, "iV4fV4f", "ncV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestzpd256, "iV4dV4d", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestcpd256, "iV4dV4d", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestnzcpd256, "iV4dV4d", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestzps256, "iV8fV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestcps256, "iV8fV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestnzcps256, "iV8fV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_ptestz256, "iV4OiV4Oi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_ptestc256, "iV4OiV4Oi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_ptestnzc256, "iV4OiV4Oi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_movmskpd256, "iV4d", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_movmskps256, "iV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vzeroall, "v", "n", "avx")
-TARGET_BUILTIN(__builtin_ia32_vzeroupper, "v", "n", "avx")
-TARGET_BUILTIN(__builtin_ia32_lddqu256, "V32ccC*", "nV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskloadpd, "V2dV2dC*V2Oi", "nV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskloadps, "V4fV4fC*V4i", "nV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskloadpd256, "V4dV4dC*V4Oi", "nV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskloadps256, "V8fV8fC*V8i", "nV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskstorepd, "vV2d*V2OiV2d", "nV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskstoreps, "vV4f*V4iV4f", "nV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskstorepd256, "vV4d*V4OiV4d", "nV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskstoreps256, "vV8f*V8iV8f", "nV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v32qi, "cV32cIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v16hi, "sV16sIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v8si, "iV8iIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v32qi, "V32cV32ccIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v16hi, "V16sV16ssIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "ncV:256:", "avx")
-
-// AVX2
-TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_packusdw256, "V16sV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_palignr256, "V32cV32cV32cIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pavgb256, "V32cV32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pavgw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pblendvb256, "V32cV32cV32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pblendw256, "V16sV16sV16sIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_phaddw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_phaddd256, "V8iV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_phaddsw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_phsubw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_phsubd256, "V8iV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_phsubsw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaddubsw256, "V16s...
[truncated]
|
I unfortunately dont' know enough about the x86 builtins(I think @phoebewang is the owner here who needs to look at them). But my limited-knowledge look through has no concerns. |
I don't see any removal in BuiltinsX86_64.def. Is it intended? |
let Features = "amx-tile", Attributes = [NoThrow] in { | ||
def tileloaddt164_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, void const *, size_t)">; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Merge to amx-tile
group.
let Features = "amx-tile", Attributes = [NoThrow] in { | ||
def tilestored64_internal : X86Builtin<"void(unsigned short, unsigned short, void *, size_t, _Vector<256, int>)">; | ||
def tilezero_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short)">; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ditto.
let Features = "amx-transpose", Attributes = [NoThrow] in { | ||
def t2rpntlvwz0t1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are a lot of separate definition like this. Please merge them with the same Feature and Attributes as a group. This is the only benefit to use a TD file AFAIC.
def movrsdi : X86Builtin<"signed long long int(void const *)">; | ||
} | ||
|
||
let Features = "movrs,avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The RequiredVectorWidth
was introduced to solve the problem when we don't want to generate 512-bit instruction in the age of AVX512. With AVX10, we have solved this problem through avx10.x--512
feature. I think we don't need it anymore. Without this attribute, we can further merge builtins under the same Features. What do you think @topperc ?
For many of the comments -- this, like the previous PR, is script generated based on the physical grouping of the current
Doh, I just forgot to do the git operation. Sorry about that. I'll get an updated version of this and the other PR after re-basing and such tomorrow. |
37ecefb
to
2635b6d
Compare
So looking through this, again, these are all done initially by automation. And the simple current form of that automation preserves the order of builtins in the FWIW, I can add logic to my script to ignore order and instead group things specifically by feature sets and attribute sets into the largest sets I can. However, this will very greatly permute the relative order of builtins in the file and it wasn't clear this was always the right call. For example, in several places there are logical groupings that seem more important that the specific feature sets, but maximally grouping by features will destroy this and merge those. So, my preference is to keep this as is, and allow folks familiar with the specific builtins to merge groups together that make logical sense in follow-up PRs. Would that work for folks to at least get us out from using the X-macros? As to benefits, there are huge benefits to using TableGen even if we're not taking fully advantage of the grouping to remove duplication. You can see some of those in #120534 which is the PR that is motivating all of my work here here. I did update this PR, it's now rebased on top-of-tree and the 64-bit And as a reminder, this is dependent on #120831 -- only the last commit should really be reviewed here. |
Agreed! Can you share the script please? We have many internal builtins need to update, it would be great if it can be done by automation. What's the X-macros mean? |
I've put the script in a gist here: https://gist.github.com/chandlerc/de807ea073beac351f87c660e1d4b7a0 X-macros: the Working on merging the base, and then will rebase this with the improvements from that. |
This PR follows llvm#120831 (the PR contains both, only review the last commit here as the other commit will be reviewed on the other PR). Similar to that PR, this does a very mechanical port of X86 builtins to TableGen. There is a *lot* of improvement available here to use TableGen more effectively and collapse repeated structures. But those can now be follow-up PRs that restructure *within* the `.td` file. The current structure produces a file that exactly matches the original X-macros except for the differences outlined in llvm#120831: - Horizontal whitespace - `long long` types now use `long long` outside of OpenCL, but switch to `long` in OpenCL for the core `__builtin_ia32_...` builtins. Otherwise, only the order of builtins change, and no tests regress.
2635b6d
to
3314a7d
Compare
Updated to rebase on top-of-tree with #120831 merged. Re-ran all the scripts to verify things. Using a variation on the command from my comment on the other PR: diff -u (rg '^TARGET' BuiltinsX86_64.def | sd ' +' ' ' | sd ',([X"])' ', $1' | sort | psub) (rg '^TARGET' dev/tools/clang/include/clang/Basic/BuiltinsX86_64.inc | sort | psub) I got the following diffs due to I guess let me know if I need to special case |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/64/builds/1883 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/98/builds/941 Here is the relevant piece of the build log for the reference
|
This PR follows #120831 for x86-64.
Similar to that PR, this does a very mechanical port of X86 builtins to
TableGen. There is a lot of improvement available here to use TableGen
more effectively and collapse repeated structures. But those can now be
follow-up PRs that restructure within the
.td
file.The current structure produces a file that exactly matches the original
X-macros except for the differences outlined in #120831:
long long
types now uselong long
outside of OpenCL, but switch tolong
in OpenCL where relevant.Otherwise, only the order of builtins change, and no tests regress.