Skip to content

[clang][x86] Add constexpr support for MULX intrinsics #110654

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 1, 2024

Conversation

RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Oct 1, 2024

With this patch all BMI2 intrinsics can now be used in constant expressions

With this patch all BMI2 intrinsics can now be used in constant expressions
Copy link

github-actions bot commented Oct 1, 2024

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff 91ef1f7caa4c8029952fd6d36e0858811d286bab 8e19d65e442a12f3114f9e028f7f2af4b9896405 --extensions c,h -- clang/lib/Headers/bmi2intrin.h clang/test/CodeGen/X86/bmi2-builtins.c
View the diff from clang-format here.
diff --git a/clang/lib/Headers/bmi2intrin.h b/clang/lib/Headers/bmi2intrin.h
index bdb61b13fb..449a85e83c 100644
--- a/clang/lib/Headers/bmi2intrin.h
+++ b/clang/lib/Headers/bmi2intrin.h
@@ -43,8 +43,8 @@
 /// \param __Y
 ///    The lower 8 bits specify the bit number of the lowest bit to zero.
 /// \returns The partially zeroed 32-bit value.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
-_bzhi_u32(unsigned int __X, unsigned int __Y) {
+static __inline__ unsigned int __DEFAULT_FN_ATTRS _bzhi_u32(unsigned int __X,
+                                                            unsigned int __Y) {
   return __builtin_ia32_bzhi_si(__X, __Y);
 }
 
@@ -72,8 +72,8 @@ _bzhi_u32(unsigned int __X, unsigned int __Y) {
 /// \param __Y
 ///    The 32-bit mask specifying where to deposit source bits.
 /// \returns The 32-bit result.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
-_pdep_u32(unsigned int __X, unsigned int __Y) {
+static __inline__ unsigned int __DEFAULT_FN_ATTRS _pdep_u32(unsigned int __X,
+                                                            unsigned int __Y) {
   return __builtin_ia32_pdep_si(__X, __Y);
 }
 
@@ -101,8 +101,8 @@ _pdep_u32(unsigned int __X, unsigned int __Y) {
 /// \param __Y
 ///    The 32-bit mask specifying which source bits to extract.
 /// \returns The 32-bit result.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
-_pext_u32(unsigned int __X, unsigned int __Y) {
+static __inline__ unsigned int __DEFAULT_FN_ATTRS _pext_u32(unsigned int __X,
+                                                            unsigned int __Y) {
   return __builtin_ia32_pext_si(__X, __Y);
 }
 
@@ -126,8 +126,9 @@ _pext_u32(unsigned int __X, unsigned int __Y) {
 /// \param __P
 ///    A pointer to memory for storing the upper half of the product.
 /// \returns The lower half of the product.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
-_mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P) {
+static __inline__ unsigned int __DEFAULT_FN_ATTRS _mulx_u32(unsigned int __X,
+                                                            unsigned int __Y,
+                                                            unsigned int *__P) {
   unsigned long long __res = (unsigned long long) __X * __Y;
   *__P = (unsigned int)(__res >> 32);
   return (unsigned int)__res;
@@ -238,9 +239,8 @@ _pext_u64(unsigned long long __X, unsigned long long __Y) {
 /// \param __P
 ///    A pointer to memory for storing the upper half of the product.
 /// \returns The lower half of the product.
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS
-_mulx_u64 (unsigned long long __X, unsigned long long __Y,
-           unsigned long long *__P) {
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mulx_u64(
+    unsigned long long __X, unsigned long long __Y, unsigned long long *__P) {
   unsigned __int128 __res = (unsigned __int128) __X * __Y;
   *__P = (unsigned long long) (__res >> 64);
   return (unsigned long long) __res;

Copy link
Contributor

@phoebewang phoebewang left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics labels Oct 1, 2024
@llvmbot
Copy link
Member

llvmbot commented Oct 1, 2024

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-clang

Author: Simon Pilgrim (RKSimon)

Changes

With this patch all BMI2 intrinsics can now be used in constant expressions


Full diff: https://github.com/llvm/llvm-project/pull/110654.diff

3 Files Affected:

  • (modified) clang/docs/ReleaseNotes.rst (+1-1)
  • (modified) clang/lib/Headers/bmi2intrin.h (+18-27)
  • (modified) clang/test/CodeGen/X86/bmi2-builtins.c (+24)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index a55a3b8687e46e..eb020895d856ad 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -522,7 +522,7 @@ X86 Support
 
 - All intrinsics in bmiintrin.h can now be used in constant expressions.
 
-- All bzhi/pdep/pext intrinsics in bmi2intrin.h can now be used in constant expressions.
+- All intrinsics in bmi2intrin.h can now be used in constant expressions.
 
 - All intrinsics in tbmintrin.h can now be used in constant expressions.
 
diff --git a/clang/lib/Headers/bmi2intrin.h b/clang/lib/Headers/bmi2intrin.h
index 7b2c2f145b14a0..bdb61b13fb83b6 100644
--- a/clang/lib/Headers/bmi2intrin.h
+++ b/clang/lib/Headers/bmi2intrin.h
@@ -15,12 +15,12 @@
 #define __BMI2INTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
-
 #if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("bmi2"))) constexpr
 #else
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
 #endif
 
 /// Copies the unsigned 32-bit integer \a __X and zeroes the upper bits
@@ -43,9 +43,8 @@
 /// \param __Y
 ///    The lower 8 bits specify the bit number of the lowest bit to zero.
 /// \returns The partially zeroed 32-bit value.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
-_bzhi_u32(unsigned int __X, unsigned int __Y)
-{
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_bzhi_u32(unsigned int __X, unsigned int __Y) {
   return __builtin_ia32_bzhi_si(__X, __Y);
 }
 
@@ -73,9 +72,8 @@ _bzhi_u32(unsigned int __X, unsigned int __Y)
 /// \param __Y
 ///    The 32-bit mask specifying where to deposit source bits.
 /// \returns The 32-bit result.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
-_pdep_u32(unsigned int __X, unsigned int __Y)
-{
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_pdep_u32(unsigned int __X, unsigned int __Y) {
   return __builtin_ia32_pdep_si(__X, __Y);
 }
 
@@ -103,9 +101,8 @@ _pdep_u32(unsigned int __X, unsigned int __Y)
 /// \param __Y
 ///    The 32-bit mask specifying which source bits to extract.
 /// \returns The 32-bit result.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
-_pext_u32(unsigned int __X, unsigned int __Y)
-{
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_pext_u32(unsigned int __X, unsigned int __Y) {
   return __builtin_ia32_pext_si(__X, __Y);
 }
 
@@ -130,8 +127,7 @@ _pext_u32(unsigned int __X, unsigned int __Y)
 ///    A pointer to memory for storing the upper half of the product.
 /// \returns The lower half of the product.
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
-_mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P)
-{
+_mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P) {
   unsigned long long __res = (unsigned long long) __X * __Y;
   *__P = (unsigned int)(__res >> 32);
   return (unsigned int)__res;
@@ -159,9 +155,8 @@ _mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P)
 /// \param __Y
 ///    The lower 8 bits specify the bit number of the lowest bit to zero.
 /// \returns The partially zeroed 64-bit value.
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
-_bzhi_u64(unsigned long long __X, unsigned long long __Y)
-{
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+_bzhi_u64(unsigned long long __X, unsigned long long __Y) {
   return __builtin_ia32_bzhi_di(__X, __Y);
 }
 
@@ -189,9 +184,8 @@ _bzhi_u64(unsigned long long __X, unsigned long long __Y)
 /// \param __Y
 ///    The 64-bit mask specifying where to deposit source bits.
 /// \returns The 64-bit result.
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
-_pdep_u64(unsigned long long __X, unsigned long long __Y)
-{
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+_pdep_u64(unsigned long long __X, unsigned long long __Y) {
   return __builtin_ia32_pdep_di(__X, __Y);
 }
 
@@ -219,9 +213,8 @@ _pdep_u64(unsigned long long __X, unsigned long long __Y)
 /// \param __Y
 ///    The 64-bit mask specifying which source bits to extract.
 /// \returns The 64-bit result.
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
-_pext_u64(unsigned long long __X, unsigned long long __Y)
-{
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+_pext_u64(unsigned long long __X, unsigned long long __Y) {
   return __builtin_ia32_pext_di(__X, __Y);
 }
 
@@ -247,8 +240,7 @@ _pext_u64(unsigned long long __X, unsigned long long __Y)
 /// \returns The lower half of the product.
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 _mulx_u64 (unsigned long long __X, unsigned long long __Y,
-	   unsigned long long *__P)
-{
+           unsigned long long *__P) {
   unsigned __int128 __res = (unsigned __int128) __X * __Y;
   *__P = (unsigned long long) (__res >> 64);
   return (unsigned long long) __res;
@@ -257,6 +249,5 @@ _mulx_u64 (unsigned long long __X, unsigned long long __Y,
 #endif /* __x86_64__  */
 
 #undef __DEFAULT_FN_ATTRS
-#undef __DEFAULT_FN_ATTRS_CONSTEXPR
 
 #endif /* __BMI2INTRIN_H */
diff --git a/clang/test/CodeGen/X86/bmi2-builtins.c b/clang/test/CodeGen/X86/bmi2-builtins.c
index e00bac4b51e903..48424f553768be 100644
--- a/clang/test/CodeGen/X86/bmi2-builtins.c
+++ b/clang/test/CodeGen/X86/bmi2-builtins.c
@@ -71,6 +71,18 @@ char pext32_1[_pext_u32(0x89ABCDEF,  0x000000F0) == 0x0000000E ? 1 : -1];
 char pext32_2[_pext_u32(0x89ABCDEF,  0xF00000F0) == 0x0000008E ? 1 : -1];
 char pext32_3[_pext_u32(0x89ABCDEF,  0xFFFFFFFF) == 0x89ABCDEF ? 1 : -1];
 
+constexpr unsigned long long
+test_mulx_u32(unsigned int X, unsigned int Y)
+{
+  unsigned int H{};
+  return _mulx_u32(X, Y, &H) | ((unsigned long long) H << 32);
+}
+
+void mulxu32() {
+  constexpr unsigned X = 0x89ABCDEF, Y = 0x01234567;
+  static_assert(test_mulx_u32(X,Y) == ((unsigned long long)X * Y));
+}
+
 #ifdef __x86_64__
 char bzhi64_0[_bzhi_u64(0x0123456789ABCDEFULL,   0) == 0x0000000000000000ULL ? 1 : -1];
 char bzhi64_1[_bzhi_u64(0x0123456789ABCDEFULL,  32) == 0x0000000089ABCDEFULL ? 1 : -1];
@@ -86,5 +98,17 @@ char pext64_0[_pext_u64(0x0123456789ABCDEFULL, 0x0000000000000000ULL) == 0x00000
 char pext64_1[_pext_u64(0x0123456789ABCDEFULL, 0x00000000000000F0ULL) == 0x000000000000000EULL ? 1 : -1];
 char pext64_2[_pext_u64(0x0123456789ABCDEFULL, 0xF00000F0F00000F0ULL) == 0x000000000000068EULL ? 1 : -1];
 char pext64_3[_pext_u64(0x0123456789ABCDEFULL, 0xFFFFFFFFFFFFFFFFULL) == 0x0123456789ABCDEFULL ? 1 : -1];
+
+constexpr unsigned __int128
+test_mulx_u64(unsigned long long X, unsigned long long Y)
+{
+  unsigned long long H{};
+  return _mulx_u64(X, Y, &H) | ((unsigned __int128) H << 64);
+}
+
+void mulxu64() {
+  constexpr unsigned long long X = 0x0123456789ABCDEFULL, Y = 0xFEDCBA9876543210ULL;
+  static_assert(test_mulx_u64(X,Y) == ((unsigned __int128)X * Y));
+}
 #endif
 #endif
\ No newline at end of file

@RKSimon RKSimon merged commit cb52e8e into llvm:main Oct 1, 2024
9 of 12 checks passed
@RKSimon RKSimon deleted the x86-constexpr-mulx branch October 1, 2024 14:12
Sterling-Augustine pushed a commit to Sterling-Augustine/llvm-project that referenced this pull request Oct 3, 2024
With this patch all BMI2 intrinsics can now be used in constant expressions
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants