Skip to content

[clang][x86] Add constexpr support for PDEP/PEXT intrinsics #110535

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 1, 2024

Conversation

RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Sep 30, 2024

No description provided.

@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics labels Sep 30, 2024
@llvmbot
Copy link
Member

llvmbot commented Sep 30, 2024

@llvm/pr-subscribers-clang

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/110535.diff

6 Files Affected:

  • (modified) clang/docs/ReleaseNotes.rst (+2)
  • (modified) clang/include/clang/Basic/BuiltinsX86.def (+2-2)
  • (modified) clang/include/clang/Basic/BuiltinsX86_64.def (+2-2)
  • (modified) clang/lib/AST/ExprConstant.cpp (+30)
  • (modified) clang/lib/Headers/bmi2intrin.h (+5-4)
  • (modified) clang/test/CodeGen/X86/bmi2-builtins.c (+20)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 02dfbfaaea2071..2a4f8542ebe908 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -516,6 +516,8 @@ X86 Support
   all bextr intrinsics in bmiintrin.h
   all tzcnt intrinsics in bmiintrin.h
   all bzhi intrinsics in bmi2intrin.h
+  all pdep intrinsics in bmi2intrin.h
+  all pext intrinsics in bmi2intrin.h
   all intrinsics in tbmintrin.h
 
 Arm and AArch64 Support
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 8cf9dbdef9bb3c..2a987abcf9a350 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -561,8 +561,8 @@ TARGET_BUILTIN(__builtin_ia32_tzcnt_u32, "UiUi", "ncE", "")
 
 // BMI2
 TARGET_BUILTIN(__builtin_ia32_bzhi_si, "UiUiUi", "ncE", "bmi2")
-TARGET_BUILTIN(__builtin_ia32_pdep_si, "UiUiUi", "nc", "bmi2")
-TARGET_BUILTIN(__builtin_ia32_pext_si, "UiUiUi", "nc", "bmi2")
+TARGET_BUILTIN(__builtin_ia32_pdep_si, "UiUiUi", "ncE", "bmi2")
+TARGET_BUILTIN(__builtin_ia32_pext_si, "UiUiUi", "ncE", "bmi2")
 
 // TBM
 TARGET_BUILTIN(__builtin_ia32_bextri_u32, "UiUiIUi", "ncE", "tbm")
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def
index dd4b15bac81989..d5fdb272d92d10 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -74,8 +74,8 @@ TARGET_BUILTIN(__builtin_ia32_lzcnt_u64, "UOiUOi", "ncE", "lzcnt")
 TARGET_BUILTIN(__builtin_ia32_bextr_u64, "UOiUOiUOi", "ncE", "bmi")
 TARGET_BUILTIN(__builtin_ia32_tzcnt_u64, "UOiUOi", "ncE", "")
 TARGET_BUILTIN(__builtin_ia32_bzhi_di, "UOiUOiUOi", "ncE", "bmi2")
-TARGET_BUILTIN(__builtin_ia32_pdep_di, "UOiUOiUOi", "nc", "bmi2")
-TARGET_BUILTIN(__builtin_ia32_pext_di, "UOiUOiUOi", "nc", "bmi2")
+TARGET_BUILTIN(__builtin_ia32_pdep_di, "UOiUOiUOi", "ncE", "bmi2")
+TARGET_BUILTIN(__builtin_ia32_pext_di, "UOiUOiUOi", "ncE", "bmi2")
 TARGET_BUILTIN(__builtin_ia32_bextri_u64, "UOiUOiIUOi", "ncE", "tbm")
 TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcUOiUiIUi", "n", "lwp")
 TARGET_BUILTIN(__builtin_ia32_lwpval64, "vUOiUiIUi", "n", "lwp")
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 402fd5dbae9a0f..cd8e9ee1e156fa 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13518,6 +13518,36 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
       return false;
     return Success(Val.countTrailingZeros(), E);
   }
+
+  case clang::X86::BI__builtin_ia32_pdep_si:
+  case clang::X86::BI__builtin_ia32_pdep_di: {
+    APSInt Val, Msk;
+    if (!EvaluateInteger(E->getArg(0), Val, Info) ||
+        !EvaluateInteger(E->getArg(1), Msk, Info))
+      return false;
+
+    unsigned BitWidth = Val.getBitWidth();
+    APInt Result = APInt::getZero(BitWidth);
+    for (unsigned I = 0, P = 0; I != BitWidth; ++I)
+      if (Msk[I])
+        Result.setBitVal(I, Val[P++]);
+    return Success(Result, E);
+  }
+
+  case clang::X86::BI__builtin_ia32_pext_si:
+  case clang::X86::BI__builtin_ia32_pext_di: {
+    APSInt Val, Msk;
+    if (!EvaluateInteger(E->getArg(0), Val, Info) ||
+        !EvaluateInteger(E->getArg(1), Msk, Info))
+      return false;
+
+    unsigned BitWidth = Val.getBitWidth();
+    APInt Result = APInt::getZero(BitWidth);
+    for (unsigned I = 0, P = 0; I != BitWidth; ++I)
+      if (Msk[I])
+        Result.setBitVal(P++, Val[I]);
+    return Success(Result, E);
+  }
   }
 }
 
diff --git a/clang/lib/Headers/bmi2intrin.h b/clang/lib/Headers/bmi2intrin.h
index 31d97e62719109..7b2c2f145b14a0 100644
--- a/clang/lib/Headers/bmi2intrin.h
+++ b/clang/lib/Headers/bmi2intrin.h
@@ -73,7 +73,7 @@ _bzhi_u32(unsigned int __X, unsigned int __Y)
 /// \param __Y
 ///    The 32-bit mask specifying where to deposit source bits.
 /// \returns The 32-bit result.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
 _pdep_u32(unsigned int __X, unsigned int __Y)
 {
   return __builtin_ia32_pdep_si(__X, __Y);
@@ -103,7 +103,7 @@ _pdep_u32(unsigned int __X, unsigned int __Y)
 /// \param __Y
 ///    The 32-bit mask specifying which source bits to extract.
 /// \returns The 32-bit result.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
 _pext_u32(unsigned int __X, unsigned int __Y)
 {
   return __builtin_ia32_pext_si(__X, __Y);
@@ -189,7 +189,7 @@ _bzhi_u64(unsigned long long __X, unsigned long long __Y)
 /// \param __Y
 ///    The 64-bit mask specifying where to deposit source bits.
 /// \returns The 64-bit result.
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
 _pdep_u64(unsigned long long __X, unsigned long long __Y)
 {
   return __builtin_ia32_pdep_di(__X, __Y);
@@ -219,7 +219,7 @@ _pdep_u64(unsigned long long __X, unsigned long long __Y)
 /// \param __Y
 ///    The 64-bit mask specifying which source bits to extract.
 /// \returns The 64-bit result.
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
 _pext_u64(unsigned long long __X, unsigned long long __Y)
 {
   return __builtin_ia32_pext_di(__X, __Y);
@@ -257,5 +257,6 @@ _mulx_u64 (unsigned long long __X, unsigned long long __Y,
 #endif /* __x86_64__  */
 
 #undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
 
 #endif /* __BMI2INTRIN_H */
diff --git a/clang/test/CodeGen/X86/bmi2-builtins.c b/clang/test/CodeGen/X86/bmi2-builtins.c
index 9312192f85a932..e00bac4b51e903 100644
--- a/clang/test/CodeGen/X86/bmi2-builtins.c
+++ b/clang/test/CodeGen/X86/bmi2-builtins.c
@@ -61,10 +61,30 @@ char bzhi32_3[_bzhi_u32(0x89ABCDEF,  32) == 0x89ABCDEF ? 1 : -1];
 char bzhi32_4[_bzhi_u32(0x89ABCDEF,  99) == 0x89ABCDEF ? 1 : -1];
 char bzhi32_5[_bzhi_u32(0x89ABCDEF, 260) == 0x0000000F ? 1 : -1];
 
+char pdep32_0[_pdep_u32(0x89ABCDEF,  0x00000000) == 0x00000000 ? 1 : -1];
+char pdep32_1[_pdep_u32(0x89ABCDEF,  0x000000F0) == 0x000000F0 ? 1 : -1];
+char pdep32_2[_pdep_u32(0x89ABCDEF,  0xF00000F0) == 0xE00000F0 ? 1 : -1];
+char pdep32_3[_pdep_u32(0x89ABCDEF,  0xFFFFFFFF) == 0x89ABCDEF ? 1 : -1];
+
+char pext32_0[_pext_u32(0x89ABCDEF,  0x00000000) == 0x00000000 ? 1 : -1];
+char pext32_1[_pext_u32(0x89ABCDEF,  0x000000F0) == 0x0000000E ? 1 : -1];
+char pext32_2[_pext_u32(0x89ABCDEF,  0xF00000F0) == 0x0000008E ? 1 : -1];
+char pext32_3[_pext_u32(0x89ABCDEF,  0xFFFFFFFF) == 0x89ABCDEF ? 1 : -1];
+
 #ifdef __x86_64__
 char bzhi64_0[_bzhi_u64(0x0123456789ABCDEFULL,   0) == 0x0000000000000000ULL ? 1 : -1];
 char bzhi64_1[_bzhi_u64(0x0123456789ABCDEFULL,  32) == 0x0000000089ABCDEFULL ? 1 : -1];
 char bzhi64_2[_bzhi_u64(0x0123456789ABCDEFULL,  99) == 0x0123456789ABCDEFULL ? 1 : -1];
 char bzhi64_3[_bzhi_u64(0x0123456789ABCDEFULL, 520) == 0x00000000000000EFULL ? 1 : -1];
+
+char pdep64_0[_pdep_u64(0x0123456789ABCDEFULL, 0x0000000000000000ULL) == 0x0000000000000000ULL ? 1 : -1];
+char pdep64_1[_pdep_u64(0x0123456789ABCDEFULL, 0x00000000000000F0ULL) == 0x00000000000000F0ULL ? 1 : -1];
+char pdep64_2[_pdep_u64(0x0123456789ABCDEFULL, 0xF00000F0F00000F0ULL) == 0xC00000D0E00000F0ULL ? 1 : -1];
+char pdep64_3[_pdep_u64(0x0123456789ABCDEFULL, 0xFFFFFFFFFFFFFFFFULL) == 0x0123456789ABCDEFULL ? 1 : -1];
+
+char pext64_0[_pext_u64(0x0123456789ABCDEFULL, 0x0000000000000000ULL) == 0x0000000000000000ULL ? 1 : -1];
+char pext64_1[_pext_u64(0x0123456789ABCDEFULL, 0x00000000000000F0ULL) == 0x000000000000000EULL ? 1 : -1];
+char pext64_2[_pext_u64(0x0123456789ABCDEFULL, 0xF00000F0F00000F0ULL) == 0x000000000000068EULL ? 1 : -1];
+char pext64_3[_pext_u64(0x0123456789ABCDEFULL, 0xFFFFFFFFFFFFFFFFULL) == 0x0123456789ABCDEFULL ? 1 : -1];
 #endif
 #endif
\ No newline at end of file

Copy link

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff 8815328b6bf0f8c5088f6df73ad853a5aba159d9 dd0a2928258eb083799dab03dff3a2f0b8a5f664 --extensions c,h,cpp -- clang/lib/AST/ExprConstant.cpp clang/lib/Headers/bmi2intrin.h clang/test/CodeGen/X86/bmi2-builtins.c
View the diff from clang-format here.
diff --git a/clang/lib/Headers/bmi2intrin.h b/clang/lib/Headers/bmi2intrin.h
index 7b2c2f145b..df2447f3c8 100644
--- a/clang/lib/Headers/bmi2intrin.h
+++ b/clang/lib/Headers/bmi2intrin.h
@@ -74,8 +74,7 @@ _bzhi_u32(unsigned int __X, unsigned int __Y)
 ///    The 32-bit mask specifying where to deposit source bits.
 /// \returns The 32-bit result.
 static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
-_pdep_u32(unsigned int __X, unsigned int __Y)
-{
+_pdep_u32(unsigned int __X, unsigned int __Y) {
   return __builtin_ia32_pdep_si(__X, __Y);
 }
 
@@ -104,8 +103,7 @@ _pdep_u32(unsigned int __X, unsigned int __Y)
 ///    The 32-bit mask specifying which source bits to extract.
 /// \returns The 32-bit result.
 static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
-_pext_u32(unsigned int __X, unsigned int __Y)
-{
+_pext_u32(unsigned int __X, unsigned int __Y) {
   return __builtin_ia32_pext_si(__X, __Y);
 }
 
@@ -190,8 +188,7 @@ _bzhi_u64(unsigned long long __X, unsigned long long __Y)
 ///    The 64-bit mask specifying where to deposit source bits.
 /// \returns The 64-bit result.
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
-_pdep_u64(unsigned long long __X, unsigned long long __Y)
-{
+_pdep_u64(unsigned long long __X, unsigned long long __Y) {
   return __builtin_ia32_pdep_di(__X, __Y);
 }
 
@@ -220,8 +217,7 @@ _pdep_u64(unsigned long long __X, unsigned long long __Y)
 ///    The 64-bit mask specifying which source bits to extract.
 /// \returns The 64-bit result.
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
-_pext_u64(unsigned long long __X, unsigned long long __Y)
-{
+_pext_u64(unsigned long long __X, unsigned long long __Y) {
   return __builtin_ia32_pext_di(__X, __Y);
 }
 

Copy link
Contributor

@phoebewang phoebewang left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@RKSimon RKSimon merged commit 271dc4a into llvm:main Oct 1, 2024
13 of 14 checks passed
@RKSimon RKSimon deleted the x86-constexpr-pdep-pext branch October 1, 2024 09:35
Sterling-Augustine pushed a commit to Sterling-Augustine/llvm-project that referenced this pull request Oct 3, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants