Skip to content

Commit cb52e8e

Browse files
authored
[clang][x86] Add constexpr support for MULX intrinsics (#110654)
With this patch all BMI2 intrinsics can now be used in constant expressions
1 parent d0f6777 commit cb52e8e

File tree

3 files changed

+43
-28
lines changed

3 files changed

+43
-28
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,7 @@ X86 Support
524524

525525
- All intrinsics in bmiintrin.h can now be used in constant expressions.
526526

527-
- All bzhi/pdep/pext intrinsics in bmi2intrin.h can now be used in constant expressions.
527+
- All intrinsics in bmi2intrin.h can now be used in constant expressions.
528528

529529
- All intrinsics in tbmintrin.h can now be used in constant expressions.
530530

clang/lib/Headers/bmi2intrin.h

Lines changed: 18 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@
1515
#define __BMI2INTRIN_H
1616

1717
/* Define the default attributes for the functions in this file. */
18-
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
19-
2018
#if defined(__cplusplus) && (__cplusplus >= 201103L)
21-
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
19+
#define __DEFAULT_FN_ATTRS \
20+
__attribute__((__always_inline__, __nodebug__, __target__("bmi2"))) constexpr
2221
#else
23-
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
22+
#define __DEFAULT_FN_ATTRS \
23+
__attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
2424
#endif
2525

2626
/// Copies the unsigned 32-bit integer \a __X and zeroes the upper bits
@@ -43,9 +43,8 @@
4343
/// \param __Y
4444
/// The lower 8 bits specify the bit number of the lowest bit to zero.
4545
/// \returns The partially zeroed 32-bit value.
46-
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
47-
_bzhi_u32(unsigned int __X, unsigned int __Y)
48-
{
46+
static __inline__ unsigned int __DEFAULT_FN_ATTRS
47+
_bzhi_u32(unsigned int __X, unsigned int __Y) {
4948
return __builtin_ia32_bzhi_si(__X, __Y);
5049
}
5150

@@ -73,9 +72,8 @@ _bzhi_u32(unsigned int __X, unsigned int __Y)
7372
/// \param __Y
7473
/// The 32-bit mask specifying where to deposit source bits.
7574
/// \returns The 32-bit result.
76-
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
77-
_pdep_u32(unsigned int __X, unsigned int __Y)
78-
{
75+
static __inline__ unsigned int __DEFAULT_FN_ATTRS
76+
_pdep_u32(unsigned int __X, unsigned int __Y) {
7977
return __builtin_ia32_pdep_si(__X, __Y);
8078
}
8179

@@ -103,9 +101,8 @@ _pdep_u32(unsigned int __X, unsigned int __Y)
103101
/// \param __Y
104102
/// The 32-bit mask specifying which source bits to extract.
105103
/// \returns The 32-bit result.
106-
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
107-
_pext_u32(unsigned int __X, unsigned int __Y)
108-
{
104+
static __inline__ unsigned int __DEFAULT_FN_ATTRS
105+
_pext_u32(unsigned int __X, unsigned int __Y) {
109106
return __builtin_ia32_pext_si(__X, __Y);
110107
}
111108

@@ -130,8 +127,7 @@ _pext_u32(unsigned int __X, unsigned int __Y)
130127
/// A pointer to memory for storing the upper half of the product.
131128
/// \returns The lower half of the product.
132129
static __inline__ unsigned int __DEFAULT_FN_ATTRS
133-
_mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P)
134-
{
130+
_mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P) {
135131
unsigned long long __res = (unsigned long long) __X * __Y;
136132
*__P = (unsigned int)(__res >> 32);
137133
return (unsigned int)__res;
@@ -159,9 +155,8 @@ _mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P)
159155
/// \param __Y
160156
/// The lower 8 bits specify the bit number of the lowest bit to zero.
161157
/// \returns The partially zeroed 64-bit value.
162-
static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
163-
_bzhi_u64(unsigned long long __X, unsigned long long __Y)
164-
{
158+
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
159+
_bzhi_u64(unsigned long long __X, unsigned long long __Y) {
165160
return __builtin_ia32_bzhi_di(__X, __Y);
166161
}
167162

@@ -189,9 +184,8 @@ _bzhi_u64(unsigned long long __X, unsigned long long __Y)
189184
/// \param __Y
190185
/// The 64-bit mask specifying where to deposit source bits.
191186
/// \returns The 64-bit result.
192-
static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
193-
_pdep_u64(unsigned long long __X, unsigned long long __Y)
194-
{
187+
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
188+
_pdep_u64(unsigned long long __X, unsigned long long __Y) {
195189
return __builtin_ia32_pdep_di(__X, __Y);
196190
}
197191

@@ -219,9 +213,8 @@ _pdep_u64(unsigned long long __X, unsigned long long __Y)
219213
/// \param __Y
220214
/// The 64-bit mask specifying which source bits to extract.
221215
/// \returns The 64-bit result.
222-
static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
223-
_pext_u64(unsigned long long __X, unsigned long long __Y)
224-
{
216+
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
217+
_pext_u64(unsigned long long __X, unsigned long long __Y) {
225218
return __builtin_ia32_pext_di(__X, __Y);
226219
}
227220

@@ -247,8 +240,7 @@ _pext_u64(unsigned long long __X, unsigned long long __Y)
247240
/// \returns The lower half of the product.
248241
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
249242
_mulx_u64 (unsigned long long __X, unsigned long long __Y,
250-
unsigned long long *__P)
251-
{
243+
unsigned long long *__P) {
252244
unsigned __int128 __res = (unsigned __int128) __X * __Y;
253245
*__P = (unsigned long long) (__res >> 64);
254246
return (unsigned long long) __res;
@@ -257,6 +249,5 @@ _mulx_u64 (unsigned long long __X, unsigned long long __Y,
257249
#endif /* __x86_64__ */
258250

259251
#undef __DEFAULT_FN_ATTRS
260-
#undef __DEFAULT_FN_ATTRS_CONSTEXPR
261252

262253
#endif /* __BMI2INTRIN_H */

clang/test/CodeGen/X86/bmi2-builtins.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,18 @@ char pext32_1[_pext_u32(0x89ABCDEF, 0x000000F0) == 0x0000000E ? 1 : -1];
7171
char pext32_2[_pext_u32(0x89ABCDEF, 0xF00000F0) == 0x0000008E ? 1 : -1];
7272
char pext32_3[_pext_u32(0x89ABCDEF, 0xFFFFFFFF) == 0x89ABCDEF ? 1 : -1];
7373

74+
constexpr unsigned long long
75+
test_mulx_u32(unsigned int X, unsigned int Y)
76+
{
77+
unsigned int H{};
78+
return _mulx_u32(X, Y, &H) | ((unsigned long long) H << 32);
79+
}
80+
81+
void mulxu32() {
82+
constexpr unsigned X = 0x89ABCDEF, Y = 0x01234567;
83+
static_assert(test_mulx_u32(X,Y) == ((unsigned long long)X * Y));
84+
}
85+
7486
#ifdef __x86_64__
7587
char bzhi64_0[_bzhi_u64(0x0123456789ABCDEFULL, 0) == 0x0000000000000000ULL ? 1 : -1];
7688
char bzhi64_1[_bzhi_u64(0x0123456789ABCDEFULL, 32) == 0x0000000089ABCDEFULL ? 1 : -1];
@@ -86,5 +98,17 @@ char pext64_0[_pext_u64(0x0123456789ABCDEFULL, 0x0000000000000000ULL) == 0x00000
8698
char pext64_1[_pext_u64(0x0123456789ABCDEFULL, 0x00000000000000F0ULL) == 0x000000000000000EULL ? 1 : -1];
8799
char pext64_2[_pext_u64(0x0123456789ABCDEFULL, 0xF00000F0F00000F0ULL) == 0x000000000000068EULL ? 1 : -1];
88100
char pext64_3[_pext_u64(0x0123456789ABCDEFULL, 0xFFFFFFFFFFFFFFFFULL) == 0x0123456789ABCDEFULL ? 1 : -1];
101+
102+
constexpr unsigned __int128
103+
test_mulx_u64(unsigned long long X, unsigned long long Y)
104+
{
105+
unsigned long long H{};
106+
return _mulx_u64(X, Y, &H) | ((unsigned __int128) H << 64);
107+
}
108+
109+
void mulxu64() {
110+
constexpr unsigned long long X = 0x0123456789ABCDEFULL, Y = 0xFEDCBA9876543210ULL;
111+
static_assert(test_mulx_u64(X,Y) == ((unsigned __int128)X * Y));
112+
}
89113
#endif
90114
#endif

0 commit comments

Comments
 (0)