Skip to content

[clang][x86] Add constexpr support for LZCNT/TZCNT intrinsics #110499

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,12 @@ X86 Support
* Supported MINMAX intrinsics of ``*_(mask(z)))_minmax(ne)_p[s|d|h|bh]`` and
``*_(mask(z)))_minmax_s[s|d|h]``.

- The following bit manipulation intrinsics can now be used in constant expressions:
all lzcnt intrinsics in lzcntintrin.h
all bextr intrinsics in bmiintrin.h
all tzcnt intrinsics in bmiintrin.h
all bextr intrinsics in tbmintrin.h

Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
8 changes: 4 additions & 4 deletions clang/include/clang/Basic/BuiltinsX86.def
Original file line number Diff line number Diff line change
Expand Up @@ -551,13 +551,13 @@ TARGET_BUILTIN(__builtin_ia32_rdseed16_step, "UiUs*", "n", "rdseed")
TARGET_BUILTIN(__builtin_ia32_rdseed32_step, "UiUi*", "n", "rdseed")

// LZCNT
TARGET_BUILTIN(__builtin_ia32_lzcnt_u16, "UsUs", "nc", "lzcnt")
TARGET_BUILTIN(__builtin_ia32_lzcnt_u32, "UiUi", "nc", "lzcnt")
TARGET_BUILTIN(__builtin_ia32_lzcnt_u16, "UsUs", "ncE", "lzcnt")
TARGET_BUILTIN(__builtin_ia32_lzcnt_u32, "UiUi", "ncE", "lzcnt")

// BMI
TARGET_BUILTIN(__builtin_ia32_bextr_u32, "UiUiUi", "ncE", "bmi")
TARGET_BUILTIN(__builtin_ia32_tzcnt_u16, "UsUs", "nc", "")
TARGET_BUILTIN(__builtin_ia32_tzcnt_u32, "UiUi", "nc", "")
TARGET_BUILTIN(__builtin_ia32_tzcnt_u16, "UsUs", "ncE", "")
TARGET_BUILTIN(__builtin_ia32_tzcnt_u32, "UiUi", "ncE", "")

// BMI2
TARGET_BUILTIN(__builtin_ia32_bzhi_si, "UiUiUi", "nc", "bmi2")
Expand Down
4 changes: 2 additions & 2 deletions clang/include/clang/Basic/BuiltinsX86_64.def
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ TARGET_BUILTIN(__builtin_ia32_addcarryx_u64, "UcUcUOiUOiUOi*", "n", "")
TARGET_BUILTIN(__builtin_ia32_subborrow_u64, "UcUcUOiUOiUOi*", "n", "")
TARGET_BUILTIN(__builtin_ia32_rdrand64_step, "UiUOi*", "n", "rdrnd")
TARGET_BUILTIN(__builtin_ia32_rdseed64_step, "UiUOi*", "n", "rdseed")
TARGET_BUILTIN(__builtin_ia32_lzcnt_u64, "UOiUOi", "nc", "lzcnt")
TARGET_BUILTIN(__builtin_ia32_lzcnt_u64, "UOiUOi", "ncE", "lzcnt")
TARGET_BUILTIN(__builtin_ia32_bextr_u64, "UOiUOiUOi", "ncE", "bmi")
TARGET_BUILTIN(__builtin_ia32_tzcnt_u64, "UOiUOi", "nc", "")
TARGET_BUILTIN(__builtin_ia32_tzcnt_u64, "UOiUOi", "ncE", "")
TARGET_BUILTIN(__builtin_ia32_bzhi_di, "UOiUOiUOi", "nc", "bmi2")
TARGET_BUILTIN(__builtin_ia32_pdep_di, "UOiUOiUOi", "nc", "bmi2")
TARGET_BUILTIN(__builtin_ia32_pext_di, "UOiUOiUOi", "nc", "bmi2")
Expand Down
18 changes: 18 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13486,6 +13486,24 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
Result &= llvm::maskTrailingOnes<uint64_t>(Length);
return Success(Result, E);
}

case clang::X86::BI__builtin_ia32_lzcnt_u16:
case clang::X86::BI__builtin_ia32_lzcnt_u32:
case clang::X86::BI__builtin_ia32_lzcnt_u64: {
APSInt Val;
if (!EvaluateInteger(E->getArg(0), Val, Info))
return false;
return Success(Val.countLeadingZeros(), E);
}

case clang::X86::BI__builtin_ia32_tzcnt_u16:
case clang::X86::BI__builtin_ia32_tzcnt_u32:
case clang::X86::BI__builtin_ia32_tzcnt_u64: {
APSInt Val;
if (!EvaluateInteger(E->getArg(0), Val, Info))
return false;
return Success(Val.countTrailingZeros(), E);
}
}
}

Expand Down
5 changes: 5 additions & 0 deletions clang/lib/Headers/bmiintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@
/* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
instruction behaves as BSF on non-BMI targets, there is code that expects
to use it as a potentially faster version of BSF. */
#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __RELAXED_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__)) constexpr
#else
#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
#endif

/// Counts the number of trailing zero bits in the operand.
///
Expand Down
8 changes: 7 additions & 1 deletion clang/lib/Headers/lzcntintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,13 @@
#define __LZCNTINTRIN_H

/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("lzcnt"))) constexpr
#else
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
#endif

#ifndef _MSC_VER
/// Counts the number of leading zero bits in the operand.
Expand Down
32 changes: 32 additions & 0 deletions clang/test/CodeGen/X86/bmi-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,26 @@ char bextr32_6[_bextr_u32(0x00000000, 0x00000000, 0x00000000) == 0x00000000 ? 1
char bextr32_7[_bextr_u32(0x000003F0, 0xFFFFFF04, 0xFFFFFF10) == 0x0000003F ? 1 : -1];
char bextr32_8[_bextr_u32(0x000003F0, 0xFFFFFF08, 0xFFFFFF30) == 0x00000003 ? 1 : -1];

char tzcntu16_0[__tzcnt_u16(0x0000) == 16 ? 1 : -1];
char tzcntu16_1[__tzcnt_u16(0x0001) == 0 ? 1 : -1];
char tzcntu16_2[__tzcnt_u16(0x0010) == 4 ? 1 : -1];

char tzcnt2u16_0[_tzcnt_u16(0x0000) == 16 ? 1 : -1];
char tzcnt2u16_1[_tzcnt_u16(0x0001) == 0 ? 1 : -1];
char tzcnt2u16_2[_tzcnt_u16(0x0010) == 4 ? 1 : -1];

char tzcntu32_0[__tzcnt_u32(0x00000000) == 32 ? 1 : -1];
char tzcntu32_1[__tzcnt_u32(0x00000001) == 0 ? 1 : -1];
char tzcntu32_2[__tzcnt_u32(0x00000080) == 7 ? 1 : -1];

char tzcnt2u32_0[_tzcnt_u32(0x00000000) == 32 ? 1 : -1];
char tzcnt2u32_1[_tzcnt_u32(0x00000001) == 0 ? 1 : -1];
char tzcnt2u32_2[_tzcnt_u32(0x00000080) == 7 ? 1 : -1];

char tzcnt3u32_0[_mm_tzcnt_32(0x00000000) == 32 ? 1 : -1];
char tzcnt3u32_1[_mm_tzcnt_32(0x00000001) == 0 ? 1 : -1];
char tzcnt3u32_2[_mm_tzcnt_32(0x00000080) == 7 ? 1 : -1];

#ifdef __x86_64__
char bextr64_0[__bextr_u64(0x0000000000000000ULL, 0x0000000000000000ULL) == 0x0000000000000000ULL ? 1 : -1];
char bextr64_1[__bextr_u64(0xF000000000000001ULL, 0x0000000000004001ULL) == 0x7800000000000000ULL ? 1 : -1];
Expand All @@ -261,5 +281,17 @@ char bextr64_5[_bextr2_u64(0xF000000000000001ULL, 0xFFFFFFFFFFFF1001ULL) == 0x00
char bextr64_6[_bextr_u64(0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL) == 0x0000000000000000ULL ? 1 : -1];
char bextr64_7[_bextr_u64(0xF000000000000001ULL, 0x0000000000000001ULL, 0x0000000000000040ULL) == 0x7800000000000000ULL ? 1 : -1];
char bextr64_8[_bextr_u64(0xF000000000000001ULL, 0xFFFFFFFFFFFFFF01ULL, 0xFFFFFFFFFFFFFF10ULL) == 0x0000000000000000ULL ? 1 : -1];

char tzcntu64_0[__tzcnt_u64(0x0000000000000000ULL) == 64 ? 1 : -1];
char tzcntu64_1[__tzcnt_u64(0x0000000000000001ULL) == 0 ? 1 : -1];
char tzcntu64_2[__tzcnt_u64(0x0000000800000000ULL) == 35 ? 1 : -1];

char tzcnt2u64_0[_tzcnt_u64(0x0000000000000000ULL) == 64 ? 1 : -1];
char tzcnt2u64_1[_tzcnt_u64(0x0000000000000001ULL) == 0 ? 1 : -1];
char tzcnt2u64_2[_tzcnt_u64(0x0000000800000000ULL) == 35 ? 1 : -1];

char tzcnt3u64_0[_mm_tzcnt_64(0x0000000000000000ULL) == 64 ? 1 : -1];
char tzcnt3u64_1[_mm_tzcnt_64(0x0000000000000001ULL) == 0 ? 1 : -1];
char tzcnt3u64_2[_mm_tzcnt_64(0x0000000800000000ULL) == 35 ? 1 : -1];
#endif
#endif
27 changes: 26 additions & 1 deletion clang/test/CodeGen/X86/lzcnt-builtins.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s


#include <immintrin.h>
Expand Down Expand Up @@ -32,3 +33,27 @@ unsigned long long test__lzcnt_u64(unsigned long long __X)
// CHECK: @llvm.ctlz.i64(i64 %{{.*}}, i1 false)
return _lzcnt_u64(__X);
}


// Test constexpr handling.
#if defined(__cplusplus) && (__cplusplus >= 201103L)
char lzcnt16_0[__lzcnt16(0x0000) == 16 ? 1 : -1];
char lzcnt16_1[__lzcnt16(0x8000) == 0 ? 1 : -1];
char lzcnt16_2[__lzcnt16(0x0010) == 11 ? 1 : -1];

char lzcnt32_0[__lzcnt32(0x00000000) == 32 ? 1 : -1];
char lzcnt32_1[__lzcnt32(0x80000000) == 0 ? 1 : -1];
char lzcnt32_2[__lzcnt32(0x00000010) == 27 ? 1 : -1];

char lzcnt64_0[__lzcnt64(0x0000000000000000ULL) == 64 ? 1 : -1];
char lzcnt64_1[__lzcnt64(0x8000000000000000ULL) == 0 ? 1 : -1];
char lzcnt64_2[__lzcnt64(0x0000000100000000ULL) == 31 ? 1 : -1];

char lzcntu32_0[_lzcnt_u32(0x00000000) == 32 ? 1 : -1];
char lzcntu32_1[_lzcnt_u32(0x80000000) == 0 ? 1 : -1];
char lzcntu32_2[_lzcnt_u32(0x00000010) == 27 ? 1 : -1];

char lzcntu64_0[_lzcnt_u64(0x0000000000000000ULL) == 64 ? 1 : -1];
char lzcntu64_1[_lzcnt_u64(0x8000000000000000ULL) == 0 ? 1 : -1];
char lzcntu64_2[_lzcnt_u64(0x0000000100000000ULL) == 31 ? 1 : -1];
#endif
Loading