Skip to content

Commit 93af9d6

Browse files
authored
[clang][x86] Add constexpr support for LZCNT/TZCNT intrinsics (#110499)
1 parent 9177e81 commit 93af9d6

File tree

8 files changed

+100
-8
lines changed

8 files changed

+100
-8
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,12 @@ X86 Support
511511
* Supported MINMAX intrinsics of ``*_(mask(z)))_minmax(ne)_p[s|d|h|bh]`` and
512512
``*_(mask(z)))_minmax_s[s|d|h]``.
513513

514+
- The following bit manipulation intrinsics can now be used in constant expressions:
515+
all lzcnt intrinsics in lzcntintrin.h
516+
all bextr intrinsics in bmiintrin.h
517+
all tzcnt intrinsics in bmiintrin.h
518+
all bextr intrinsics in tbmintrin.h
519+
514520
Arm and AArch64 Support
515521
^^^^^^^^^^^^^^^^^^^^^^^
516522

clang/include/clang/Basic/BuiltinsX86.def

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -551,13 +551,13 @@ TARGET_BUILTIN(__builtin_ia32_rdseed16_step, "UiUs*", "n", "rdseed")
551551
TARGET_BUILTIN(__builtin_ia32_rdseed32_step, "UiUi*", "n", "rdseed")
552552

553553
// LZCNT
554-
TARGET_BUILTIN(__builtin_ia32_lzcnt_u16, "UsUs", "nc", "lzcnt")
555-
TARGET_BUILTIN(__builtin_ia32_lzcnt_u32, "UiUi", "nc", "lzcnt")
554+
TARGET_BUILTIN(__builtin_ia32_lzcnt_u16, "UsUs", "ncE", "lzcnt")
555+
TARGET_BUILTIN(__builtin_ia32_lzcnt_u32, "UiUi", "ncE", "lzcnt")
556556

557557
// BMI
558558
TARGET_BUILTIN(__builtin_ia32_bextr_u32, "UiUiUi", "ncE", "bmi")
559-
TARGET_BUILTIN(__builtin_ia32_tzcnt_u16, "UsUs", "nc", "")
560-
TARGET_BUILTIN(__builtin_ia32_tzcnt_u32, "UiUi", "nc", "")
559+
TARGET_BUILTIN(__builtin_ia32_tzcnt_u16, "UsUs", "ncE", "")
560+
TARGET_BUILTIN(__builtin_ia32_tzcnt_u32, "UiUi", "ncE", "")
561561

562562
// BMI2
563563
TARGET_BUILTIN(__builtin_ia32_bzhi_si, "UiUiUi", "nc", "bmi2")

clang/include/clang/Basic/BuiltinsX86_64.def

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,9 @@ TARGET_BUILTIN(__builtin_ia32_addcarryx_u64, "UcUcUOiUOiUOi*", "n", "")
7070
TARGET_BUILTIN(__builtin_ia32_subborrow_u64, "UcUcUOiUOiUOi*", "n", "")
7171
TARGET_BUILTIN(__builtin_ia32_rdrand64_step, "UiUOi*", "n", "rdrnd")
7272
TARGET_BUILTIN(__builtin_ia32_rdseed64_step, "UiUOi*", "n", "rdseed")
73-
TARGET_BUILTIN(__builtin_ia32_lzcnt_u64, "UOiUOi", "nc", "lzcnt")
73+
TARGET_BUILTIN(__builtin_ia32_lzcnt_u64, "UOiUOi", "ncE", "lzcnt")
7474
TARGET_BUILTIN(__builtin_ia32_bextr_u64, "UOiUOiUOi", "ncE", "bmi")
75-
TARGET_BUILTIN(__builtin_ia32_tzcnt_u64, "UOiUOi", "nc", "")
75+
TARGET_BUILTIN(__builtin_ia32_tzcnt_u64, "UOiUOi", "ncE", "")
7676
TARGET_BUILTIN(__builtin_ia32_bzhi_di, "UOiUOiUOi", "nc", "bmi2")
7777
TARGET_BUILTIN(__builtin_ia32_pdep_di, "UOiUOiUOi", "nc", "bmi2")
7878
TARGET_BUILTIN(__builtin_ia32_pext_di, "UOiUOiUOi", "nc", "bmi2")

clang/lib/AST/ExprConstant.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13486,6 +13486,24 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1348613486
Result &= llvm::maskTrailingOnes<uint64_t>(Length);
1348713487
return Success(Result, E);
1348813488
}
13489+
13490+
case clang::X86::BI__builtin_ia32_lzcnt_u16:
13491+
case clang::X86::BI__builtin_ia32_lzcnt_u32:
13492+
case clang::X86::BI__builtin_ia32_lzcnt_u64: {
13493+
APSInt Val;
13494+
if (!EvaluateInteger(E->getArg(0), Val, Info))
13495+
return false;
13496+
return Success(Val.countLeadingZeros(), E);
13497+
}
13498+
13499+
case clang::X86::BI__builtin_ia32_tzcnt_u16:
13500+
case clang::X86::BI__builtin_ia32_tzcnt_u32:
13501+
case clang::X86::BI__builtin_ia32_tzcnt_u64: {
13502+
APSInt Val;
13503+
if (!EvaluateInteger(E->getArg(0), Val, Info))
13504+
return false;
13505+
return Success(Val.countTrailingZeros(), E);
13506+
}
1348913507
}
1349013508
}
1349113509

clang/lib/Headers/bmiintrin.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,12 @@
1717
/* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
1818
instruction behaves as BSF on non-BMI targets, there is code that expects
1919
to use it as a potentially faster version of BSF. */
20+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
21+
#define __RELAXED_FN_ATTRS \
22+
__attribute__((__always_inline__, __nodebug__)) constexpr
23+
#else
2024
#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
25+
#endif
2126

2227
/// Counts the number of trailing zero bits in the operand.
2328
///

clang/lib/Headers/lzcntintrin.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,13 @@
1515
#define __LZCNTINTRIN_H
1616

1717
/* Define the default attributes for the functions in this file. */
18-
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
18+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
19+
#define __DEFAULT_FN_ATTRS \
20+
__attribute__((__always_inline__, __nodebug__, __target__("lzcnt"))) constexpr
21+
#else
22+
#define __DEFAULT_FN_ATTRS \
23+
__attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
24+
#endif
1925

2026
#ifndef _MSC_VER
2127
/// Counts the number of leading zero bits in the operand.

clang/test/CodeGen/X86/bmi-builtins.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,26 @@ char bextr32_6[_bextr_u32(0x00000000, 0x00000000, 0x00000000) == 0x00000000 ? 1
249249
char bextr32_7[_bextr_u32(0x000003F0, 0xFFFFFF04, 0xFFFFFF10) == 0x0000003F ? 1 : -1];
250250
char bextr32_8[_bextr_u32(0x000003F0, 0xFFFFFF08, 0xFFFFFF30) == 0x00000003 ? 1 : -1];
251251

252+
char tzcntu16_0[__tzcnt_u16(0x0000) == 16 ? 1 : -1];
253+
char tzcntu16_1[__tzcnt_u16(0x0001) == 0 ? 1 : -1];
254+
char tzcntu16_2[__tzcnt_u16(0x0010) == 4 ? 1 : -1];
255+
256+
char tzcnt2u16_0[_tzcnt_u16(0x0000) == 16 ? 1 : -1];
257+
char tzcnt2u16_1[_tzcnt_u16(0x0001) == 0 ? 1 : -1];
258+
char tzcnt2u16_2[_tzcnt_u16(0x0010) == 4 ? 1 : -1];
259+
260+
char tzcntu32_0[__tzcnt_u32(0x00000000) == 32 ? 1 : -1];
261+
char tzcntu32_1[__tzcnt_u32(0x00000001) == 0 ? 1 : -1];
262+
char tzcntu32_2[__tzcnt_u32(0x00000080) == 7 ? 1 : -1];
263+
264+
char tzcnt2u32_0[_tzcnt_u32(0x00000000) == 32 ? 1 : -1];
265+
char tzcnt2u32_1[_tzcnt_u32(0x00000001) == 0 ? 1 : -1];
266+
char tzcnt2u32_2[_tzcnt_u32(0x00000080) == 7 ? 1 : -1];
267+
268+
char tzcnt3u32_0[_mm_tzcnt_32(0x00000000) == 32 ? 1 : -1];
269+
char tzcnt3u32_1[_mm_tzcnt_32(0x00000001) == 0 ? 1 : -1];
270+
char tzcnt3u32_2[_mm_tzcnt_32(0x00000080) == 7 ? 1 : -1];
271+
252272
#ifdef __x86_64__
253273
char bextr64_0[__bextr_u64(0x0000000000000000ULL, 0x0000000000000000ULL) == 0x0000000000000000ULL ? 1 : -1];
254274
char bextr64_1[__bextr_u64(0xF000000000000001ULL, 0x0000000000004001ULL) == 0x7800000000000000ULL ? 1 : -1];
@@ -261,5 +281,17 @@ char bextr64_5[_bextr2_u64(0xF000000000000001ULL, 0xFFFFFFFFFFFF1001ULL) == 0x00
261281
char bextr64_6[_bextr_u64(0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL) == 0x0000000000000000ULL ? 1 : -1];
262282
char bextr64_7[_bextr_u64(0xF000000000000001ULL, 0x0000000000000001ULL, 0x0000000000000040ULL) == 0x7800000000000000ULL ? 1 : -1];
263283
char bextr64_8[_bextr_u64(0xF000000000000001ULL, 0xFFFFFFFFFFFFFF01ULL, 0xFFFFFFFFFFFFFF10ULL) == 0x0000000000000000ULL ? 1 : -1];
284+
285+
char tzcntu64_0[__tzcnt_u64(0x0000000000000000ULL) == 64 ? 1 : -1];
286+
char tzcntu64_1[__tzcnt_u64(0x0000000000000001ULL) == 0 ? 1 : -1];
287+
char tzcntu64_2[__tzcnt_u64(0x0000000800000000ULL) == 35 ? 1 : -1];
288+
289+
char tzcnt2u64_0[_tzcnt_u64(0x0000000000000000ULL) == 64 ? 1 : -1];
290+
char tzcnt2u64_1[_tzcnt_u64(0x0000000000000001ULL) == 0 ? 1 : -1];
291+
char tzcnt2u64_2[_tzcnt_u64(0x0000000800000000ULL) == 35 ? 1 : -1];
292+
293+
char tzcnt3u64_0[_mm_tzcnt_64(0x0000000000000000ULL) == 64 ? 1 : -1];
294+
char tzcnt3u64_1[_mm_tzcnt_64(0x0000000000000001ULL) == 0 ? 1 : -1];
295+
char tzcnt3u64_2[_mm_tzcnt_64(0x0000000800000000ULL) == 35 ? 1 : -1];
264296
#endif
265297
#endif

clang/test/CodeGen/X86/lzcnt-builtins.c

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
1+
// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
2+
// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
23

34

45
#include <immintrin.h>
@@ -32,3 +33,27 @@ unsigned long long test__lzcnt_u64(unsigned long long __X)
3233
// CHECK: @llvm.ctlz.i64(i64 %{{.*}}, i1 false)
3334
return _lzcnt_u64(__X);
3435
}
36+
37+
38+
// Test constexpr handling.
39+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
40+
char lzcnt16_0[__lzcnt16(0x0000) == 16 ? 1 : -1];
41+
char lzcnt16_1[__lzcnt16(0x8000) == 0 ? 1 : -1];
42+
char lzcnt16_2[__lzcnt16(0x0010) == 11 ? 1 : -1];
43+
44+
char lzcnt32_0[__lzcnt32(0x00000000) == 32 ? 1 : -1];
45+
char lzcnt32_1[__lzcnt32(0x80000000) == 0 ? 1 : -1];
46+
char lzcnt32_2[__lzcnt32(0x00000010) == 27 ? 1 : -1];
47+
48+
char lzcnt64_0[__lzcnt64(0x0000000000000000ULL) == 64 ? 1 : -1];
49+
char lzcnt64_1[__lzcnt64(0x8000000000000000ULL) == 0 ? 1 : -1];
50+
char lzcnt64_2[__lzcnt64(0x0000000100000000ULL) == 31 ? 1 : -1];
51+
52+
char lzcntu32_0[_lzcnt_u32(0x00000000) == 32 ? 1 : -1];
53+
char lzcntu32_1[_lzcnt_u32(0x80000000) == 0 ? 1 : -1];
54+
char lzcntu32_2[_lzcnt_u32(0x00000010) == 27 ? 1 : -1];
55+
56+
char lzcntu64_0[_lzcnt_u64(0x0000000000000000ULL) == 64 ? 1 : -1];
57+
char lzcntu64_1[_lzcnt_u64(0x8000000000000000ULL) == 0 ? 1 : -1];
58+
char lzcntu64_2[_lzcnt_u64(0x0000000100000000ULL) == 31 ? 1 : -1];
59+
#endif

0 commit comments

Comments
 (0)