Skip to content

[clang][x86] Add constexpr support for PDEP/PEXT intrinsics #110535

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,8 @@ X86 Support
all bextr intrinsics in bmiintrin.h
all tzcnt intrinsics in bmiintrin.h
all bzhi intrinsics in bmi2intrin.h
all pdep intrinsics in bmi2intrin.h
all pext intrinsics in bmi2intrin.h
all intrinsics in tbmintrin.h

Arm and AArch64 Support
Expand Down
4 changes: 2 additions & 2 deletions clang/include/clang/Basic/BuiltinsX86.def
Original file line number Diff line number Diff line change
Expand Up @@ -561,8 +561,8 @@ TARGET_BUILTIN(__builtin_ia32_tzcnt_u32, "UiUi", "ncE", "")

// BMI2
TARGET_BUILTIN(__builtin_ia32_bzhi_si, "UiUiUi", "ncE", "bmi2")
TARGET_BUILTIN(__builtin_ia32_pdep_si, "UiUiUi", "nc", "bmi2")
TARGET_BUILTIN(__builtin_ia32_pext_si, "UiUiUi", "nc", "bmi2")
TARGET_BUILTIN(__builtin_ia32_pdep_si, "UiUiUi", "ncE", "bmi2")
TARGET_BUILTIN(__builtin_ia32_pext_si, "UiUiUi", "ncE", "bmi2")

// TBM
TARGET_BUILTIN(__builtin_ia32_bextri_u32, "UiUiIUi", "ncE", "tbm")
Expand Down
4 changes: 2 additions & 2 deletions clang/include/clang/Basic/BuiltinsX86_64.def
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ TARGET_BUILTIN(__builtin_ia32_lzcnt_u64, "UOiUOi", "ncE", "lzcnt")
TARGET_BUILTIN(__builtin_ia32_bextr_u64, "UOiUOiUOi", "ncE", "bmi")
TARGET_BUILTIN(__builtin_ia32_tzcnt_u64, "UOiUOi", "ncE", "")
TARGET_BUILTIN(__builtin_ia32_bzhi_di, "UOiUOiUOi", "ncE", "bmi2")
TARGET_BUILTIN(__builtin_ia32_pdep_di, "UOiUOiUOi", "nc", "bmi2")
TARGET_BUILTIN(__builtin_ia32_pext_di, "UOiUOiUOi", "nc", "bmi2")
TARGET_BUILTIN(__builtin_ia32_pdep_di, "UOiUOiUOi", "ncE", "bmi2")
TARGET_BUILTIN(__builtin_ia32_pext_di, "UOiUOiUOi", "ncE", "bmi2")
TARGET_BUILTIN(__builtin_ia32_bextri_u64, "UOiUOiIUOi", "ncE", "tbm")
TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcUOiUiIUi", "n", "lwp")
TARGET_BUILTIN(__builtin_ia32_lwpval64, "vUOiUiIUi", "n", "lwp")
Expand Down
30 changes: 30 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13518,6 +13518,36 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return false;
return Success(Val.countTrailingZeros(), E);
}

case clang::X86::BI__builtin_ia32_pdep_si:
case clang::X86::BI__builtin_ia32_pdep_di: {
APSInt Val, Msk;
if (!EvaluateInteger(E->getArg(0), Val, Info) ||
!EvaluateInteger(E->getArg(1), Msk, Info))
return false;

unsigned BitWidth = Val.getBitWidth();
APInt Result = APInt::getZero(BitWidth);
for (unsigned I = 0, P = 0; I != BitWidth; ++I)
if (Msk[I])
Result.setBitVal(I, Val[P++]);
return Success(Result, E);
}

case clang::X86::BI__builtin_ia32_pext_si:
case clang::X86::BI__builtin_ia32_pext_di: {
APSInt Val, Msk;
if (!EvaluateInteger(E->getArg(0), Val, Info) ||
!EvaluateInteger(E->getArg(1), Msk, Info))
return false;

unsigned BitWidth = Val.getBitWidth();
APInt Result = APInt::getZero(BitWidth);
for (unsigned I = 0, P = 0; I != BitWidth; ++I)
if (Msk[I])
Result.setBitVal(P++, Val[I]);
return Success(Result, E);
}
}
}

Expand Down
9 changes: 5 additions & 4 deletions clang/lib/Headers/bmi2intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ _bzhi_u32(unsigned int __X, unsigned int __Y)
/// \param __Y
/// The 32-bit mask specifying where to deposit source bits.
/// \returns The 32-bit result.
static __inline__ unsigned int __DEFAULT_FN_ATTRS
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
_pdep_u32(unsigned int __X, unsigned int __Y)
{
return __builtin_ia32_pdep_si(__X, __Y);
Expand Down Expand Up @@ -103,7 +103,7 @@ _pdep_u32(unsigned int __X, unsigned int __Y)
/// \param __Y
/// The 32-bit mask specifying which source bits to extract.
/// \returns The 32-bit result.
static __inline__ unsigned int __DEFAULT_FN_ATTRS
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
_pext_u32(unsigned int __X, unsigned int __Y)
{
return __builtin_ia32_pext_si(__X, __Y);
Expand Down Expand Up @@ -189,7 +189,7 @@ _bzhi_u64(unsigned long long __X, unsigned long long __Y)
/// \param __Y
/// The 64-bit mask specifying where to deposit source bits.
/// \returns The 64-bit result.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
_pdep_u64(unsigned long long __X, unsigned long long __Y)
{
return __builtin_ia32_pdep_di(__X, __Y);
Expand Down Expand Up @@ -219,7 +219,7 @@ _pdep_u64(unsigned long long __X, unsigned long long __Y)
/// \param __Y
/// The 64-bit mask specifying which source bits to extract.
/// \returns The 64-bit result.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
_pext_u64(unsigned long long __X, unsigned long long __Y)
{
return __builtin_ia32_pext_di(__X, __Y);
Expand Down Expand Up @@ -257,5 +257,6 @@ _mulx_u64 (unsigned long long __X, unsigned long long __Y,
#endif /* __x86_64__ */

#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS_CONSTEXPR

#endif /* __BMI2INTRIN_H */
20 changes: 20 additions & 0 deletions clang/test/CodeGen/X86/bmi2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,30 @@ char bzhi32_3[_bzhi_u32(0x89ABCDEF, 32) == 0x89ABCDEF ? 1 : -1];
char bzhi32_4[_bzhi_u32(0x89ABCDEF, 99) == 0x89ABCDEF ? 1 : -1];
char bzhi32_5[_bzhi_u32(0x89ABCDEF, 260) == 0x0000000F ? 1 : -1];

char pdep32_0[_pdep_u32(0x89ABCDEF, 0x00000000) == 0x00000000 ? 1 : -1];
char pdep32_1[_pdep_u32(0x89ABCDEF, 0x000000F0) == 0x000000F0 ? 1 : -1];
char pdep32_2[_pdep_u32(0x89ABCDEF, 0xF00000F0) == 0xE00000F0 ? 1 : -1];
char pdep32_3[_pdep_u32(0x89ABCDEF, 0xFFFFFFFF) == 0x89ABCDEF ? 1 : -1];

char pext32_0[_pext_u32(0x89ABCDEF, 0x00000000) == 0x00000000 ? 1 : -1];
char pext32_1[_pext_u32(0x89ABCDEF, 0x000000F0) == 0x0000000E ? 1 : -1];
char pext32_2[_pext_u32(0x89ABCDEF, 0xF00000F0) == 0x0000008E ? 1 : -1];
char pext32_3[_pext_u32(0x89ABCDEF, 0xFFFFFFFF) == 0x89ABCDEF ? 1 : -1];

#ifdef __x86_64__
char bzhi64_0[_bzhi_u64(0x0123456789ABCDEFULL, 0) == 0x0000000000000000ULL ? 1 : -1];
char bzhi64_1[_bzhi_u64(0x0123456789ABCDEFULL, 32) == 0x0000000089ABCDEFULL ? 1 : -1];
char bzhi64_2[_bzhi_u64(0x0123456789ABCDEFULL, 99) == 0x0123456789ABCDEFULL ? 1 : -1];
char bzhi64_3[_bzhi_u64(0x0123456789ABCDEFULL, 520) == 0x00000000000000EFULL ? 1 : -1];

char pdep64_0[_pdep_u64(0x0123456789ABCDEFULL, 0x0000000000000000ULL) == 0x0000000000000000ULL ? 1 : -1];
char pdep64_1[_pdep_u64(0x0123456789ABCDEFULL, 0x00000000000000F0ULL) == 0x00000000000000F0ULL ? 1 : -1];
char pdep64_2[_pdep_u64(0x0123456789ABCDEFULL, 0xF00000F0F00000F0ULL) == 0xC00000D0E00000F0ULL ? 1 : -1];
char pdep64_3[_pdep_u64(0x0123456789ABCDEFULL, 0xFFFFFFFFFFFFFFFFULL) == 0x0123456789ABCDEFULL ? 1 : -1];

char pext64_0[_pext_u64(0x0123456789ABCDEFULL, 0x0000000000000000ULL) == 0x0000000000000000ULL ? 1 : -1];
char pext64_1[_pext_u64(0x0123456789ABCDEFULL, 0x00000000000000F0ULL) == 0x000000000000000EULL ? 1 : -1];
char pext64_2[_pext_u64(0x0123456789ABCDEFULL, 0xF00000F0F00000F0ULL) == 0x000000000000068EULL ? 1 : -1];
char pext64_3[_pext_u64(0x0123456789ABCDEFULL, 0xFFFFFFFFFFFFFFFFULL) == 0x0123456789ABCDEFULL ? 1 : -1];
#endif
#endif
Loading