-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[clang][x86] Add constexpr support for LZCNT/TZCNT intrinsics #110499
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang Author: Simon Pilgrim (RKSimon) ChangesFull diff: https://github.com/llvm/llvm-project/pull/110499.diff 8 Files Affected:
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 35c31452cef411..d48601db023553 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -511,6 +511,12 @@ X86 Support
* Supported MINMAX intrinsics of ``*_(mask(z)))_minmax(ne)_p[s|d|h|bh]`` and
``*_(mask(z)))_minmax_s[s|d|h]``.
+- The following bit manipulation intrinsics can now be used in constant expressions:
+ all lzcnt intrinsics in lzcntintrin.h
+ all bextr intrinsics in bmiintrin.h
+ all tzcnt intrinsics in bmiintrin.h
+ all bextr intrinsics in tbmintrin.h
+
Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index e4eb9bfbdd1735..e68dcd922acbff 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -551,13 +551,13 @@ TARGET_BUILTIN(__builtin_ia32_rdseed16_step, "UiUs*", "n", "rdseed")
TARGET_BUILTIN(__builtin_ia32_rdseed32_step, "UiUi*", "n", "rdseed")
// LZCNT
-TARGET_BUILTIN(__builtin_ia32_lzcnt_u16, "UsUs", "nc", "lzcnt")
-TARGET_BUILTIN(__builtin_ia32_lzcnt_u32, "UiUi", "nc", "lzcnt")
+TARGET_BUILTIN(__builtin_ia32_lzcnt_u16, "UsUs", "ncE", "lzcnt")
+TARGET_BUILTIN(__builtin_ia32_lzcnt_u32, "UiUi", "ncE", "lzcnt")
// BMI
TARGET_BUILTIN(__builtin_ia32_bextr_u32, "UiUiUi", "ncE", "bmi")
-TARGET_BUILTIN(__builtin_ia32_tzcnt_u16, "UsUs", "nc", "")
-TARGET_BUILTIN(__builtin_ia32_tzcnt_u32, "UiUi", "nc", "")
+TARGET_BUILTIN(__builtin_ia32_tzcnt_u16, "UsUs", "ncE", "")
+TARGET_BUILTIN(__builtin_ia32_tzcnt_u32, "UiUi", "ncE", "")
// BMI2
TARGET_BUILTIN(__builtin_ia32_bzhi_si, "UiUiUi", "nc", "bmi2")
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def
index 81fd46ee6d1663..5f4252c91b8847 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -70,9 +70,9 @@ TARGET_BUILTIN(__builtin_ia32_addcarryx_u64, "UcUcUOiUOiUOi*", "n", "")
TARGET_BUILTIN(__builtin_ia32_subborrow_u64, "UcUcUOiUOiUOi*", "n", "")
TARGET_BUILTIN(__builtin_ia32_rdrand64_step, "UiUOi*", "n", "rdrnd")
TARGET_BUILTIN(__builtin_ia32_rdseed64_step, "UiUOi*", "n", "rdseed")
-TARGET_BUILTIN(__builtin_ia32_lzcnt_u64, "UOiUOi", "nc", "lzcnt")
+TARGET_BUILTIN(__builtin_ia32_lzcnt_u64, "UOiUOi", "ncE", "lzcnt")
TARGET_BUILTIN(__builtin_ia32_bextr_u64, "UOiUOiUOi", "ncE", "bmi")
-TARGET_BUILTIN(__builtin_ia32_tzcnt_u64, "UOiUOi", "nc", "")
+TARGET_BUILTIN(__builtin_ia32_tzcnt_u64, "UOiUOi", "ncE", "")
TARGET_BUILTIN(__builtin_ia32_bzhi_di, "UOiUOiUOi", "nc", "bmi2")
TARGET_BUILTIN(__builtin_ia32_pdep_di, "UOiUOiUOi", "nc", "bmi2")
TARGET_BUILTIN(__builtin_ia32_pext_di, "UOiUOiUOi", "nc", "bmi2")
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 51856693944761..834a7a1e2eb239 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13486,6 +13486,24 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
Result &= llvm::maskTrailingOnes<uint64_t>(Length);
return Success(Result, E);
}
+
+ case clang::X86::BI__builtin_ia32_lzcnt_u16:
+ case clang::X86::BI__builtin_ia32_lzcnt_u32:
+ case clang::X86::BI__builtin_ia32_lzcnt_u64: {
+ APSInt Val;
+ if (!EvaluateInteger(E->getArg(0), Val, Info))
+ return false;
+ return Success(Val.countLeadingZeros(), E);
+ }
+
+ case clang::X86::BI__builtin_ia32_tzcnt_u16:
+ case clang::X86::BI__builtin_ia32_tzcnt_u32:
+ case clang::X86::BI__builtin_ia32_tzcnt_u64: {
+ APSInt Val;
+ if (!EvaluateInteger(E->getArg(0), Val, Info))
+ return false;
+ return Success(Val.countTrailingZeros(), E);
+ }
}
}
diff --git a/clang/lib/Headers/bmiintrin.h b/clang/lib/Headers/bmiintrin.h
index 72c84d12c0e520..b0f44367633ce9 100644
--- a/clang/lib/Headers/bmiintrin.h
+++ b/clang/lib/Headers/bmiintrin.h
@@ -17,7 +17,11 @@
/* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
instruction behaves as BSF on non-BMI targets, there is code that expects
to use it as a potentially faster version of BSF. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) constexpr
+#else
#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#endif
/// Counts the number of trailing zero bits in the operand.
///
diff --git a/clang/lib/Headers/lzcntintrin.h b/clang/lib/Headers/lzcntintrin.h
index f4ddce9d0e6834..d746d91d9fe5f9 100644
--- a/clang/lib/Headers/lzcntintrin.h
+++ b/clang/lib/Headers/lzcntintrin.h
@@ -15,7 +15,11 @@
#define __LZCNTINTRIN_H
/* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt"))) constexpr
+#else
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
+#endif
#ifndef _MSC_VER
/// Counts the number of leading zero bits in the operand.
diff --git a/clang/test/CodeGen/X86/bmi-builtins.c b/clang/test/CodeGen/X86/bmi-builtins.c
index 530d38dcf342c0..6c0b2c440ea081 100644
--- a/clang/test/CodeGen/X86/bmi-builtins.c
+++ b/clang/test/CodeGen/X86/bmi-builtins.c
@@ -249,6 +249,26 @@ char bextr32_6[_bextr_u32(0x00000000, 0x00000000, 0x00000000) == 0x00000000 ? 1
char bextr32_7[_bextr_u32(0x000003F0, 0xFFFFFF04, 0xFFFFFF10) == 0x0000003F ? 1 : -1];
char bextr32_8[_bextr_u32(0x000003F0, 0xFFFFFF08, 0xFFFFFF30) == 0x00000003 ? 1 : -1];
+char tzcntu16_0[__tzcnt_u16(0x0000) == 16 ? 1 : -1];
+char tzcntu16_1[__tzcnt_u16(0x0001) == 0 ? 1 : -1];
+char tzcntu16_2[__tzcnt_u16(0x0010) == 4 ? 1 : -1];
+
+char tzcnt2u16_0[_tzcnt_u16(0x0000) == 16 ? 1 : -1];
+char tzcnt2u16_1[_tzcnt_u16(0x0001) == 0 ? 1 : -1];
+char tzcnt2u16_2[_tzcnt_u16(0x0010) == 4 ? 1 : -1];
+
+char tzcntu32_0[__tzcnt_u32(0x00000000) == 32 ? 1 : -1];
+char tzcntu32_1[__tzcnt_u32(0x00000001) == 0 ? 1 : -1];
+char tzcntu32_2[__tzcnt_u32(0x00000080) == 7 ? 1 : -1];
+
+char tzcnt2u32_0[_tzcnt_u32(0x00000000) == 32 ? 1 : -1];
+char tzcnt2u32_1[_tzcnt_u32(0x00000001) == 0 ? 1 : -1];
+char tzcnt2u32_2[_tzcnt_u32(0x00000080) == 7 ? 1 : -1];
+
+char tzcnt3u32_0[_mm_tzcnt_32(0x00000000) == 32 ? 1 : -1];
+char tzcnt3u32_1[_mm_tzcnt_32(0x00000001) == 0 ? 1 : -1];
+char tzcnt3u32_2[_mm_tzcnt_32(0x00000080) == 7 ? 1 : -1];
+
#ifdef __x86_64__
char bextr64_0[__bextr_u64(0x0000000000000000ULL, 0x0000000000000000ULL) == 0x0000000000000000ULL ? 1 : -1];
char bextr64_1[__bextr_u64(0xF000000000000001ULL, 0x0000000000004001ULL) == 0x7800000000000000ULL ? 1 : -1];
@@ -261,5 +281,17 @@ char bextr64_5[_bextr2_u64(0xF000000000000001ULL, 0xFFFFFFFFFFFF1001ULL) == 0x00
char bextr64_6[_bextr_u64(0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL) == 0x0000000000000000ULL ? 1 : -1];
char bextr64_7[_bextr_u64(0xF000000000000001ULL, 0x0000000000000001ULL, 0x0000000000000040ULL) == 0x7800000000000000ULL ? 1 : -1];
char bextr64_8[_bextr_u64(0xF000000000000001ULL, 0xFFFFFFFFFFFFFF01ULL, 0xFFFFFFFFFFFFFF10ULL) == 0x0000000000000000ULL ? 1 : -1];
+
+char tzcntu64_0[__tzcnt_u64(0x0000000000000000ULL) == 64 ? 1 : -1];
+char tzcntu64_1[__tzcnt_u64(0x0000000000000001ULL) == 0 ? 1 : -1];
+char tzcntu64_2[__tzcnt_u64(0x0000000800000000ULL) == 35 ? 1 : -1];
+
+char tzcnt2u64_0[_tzcnt_u64(0x0000000000000000ULL) == 64 ? 1 : -1];
+char tzcnt2u64_1[_tzcnt_u64(0x0000000000000001ULL) == 0 ? 1 : -1];
+char tzcnt2u64_2[_tzcnt_u64(0x0000000800000000ULL) == 35 ? 1 : -1];
+
+char tzcnt3u64_0[_mm_tzcnt_64(0x0000000000000000ULL) == 64 ? 1 : -1];
+char tzcnt3u64_1[_mm_tzcnt_64(0x0000000000000001ULL) == 0 ? 1 : -1];
+char tzcnt3u64_2[_mm_tzcnt_64(0x0000000800000000ULL) == 35 ? 1 : -1];
#endif
#endif
\ No newline at end of file
diff --git a/clang/test/CodeGen/X86/lzcnt-builtins.c b/clang/test/CodeGen/X86/lzcnt-builtins.c
index 9255207ffaef4f..18ced89fc79b1c 100644
--- a/clang/test/CodeGen/X86/lzcnt-builtins.c
+++ b/clang/test/CodeGen/X86/lzcnt-builtins.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
#include <immintrin.h>
@@ -32,3 +33,27 @@ unsigned long long test__lzcnt_u64(unsigned long long __X)
// CHECK: @llvm.ctlz.i64(i64 %{{.*}}, i1 false)
return _lzcnt_u64(__X);
}
+
+
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+char lzcnt16_0[__lzcnt16(0x0000) == 16 ? 1 : -1];
+char lzcnt16_1[__lzcnt16(0x8000) == 0 ? 1 : -1];
+char lzcnt16_2[__lzcnt16(0x0010) == 11 ? 1 : -1];
+
+char lzcnt32_0[__lzcnt32(0x00000000) == 32 ? 1 : -1];
+char lzcnt32_1[__lzcnt32(0x80000000) == 0 ? 1 : -1];
+char lzcnt32_2[__lzcnt32(0x00000010) == 27 ? 1 : -1];
+
+char lzcnt64_0[__lzcnt64(0x0000000000000000ULL) == 64 ? 1 : -1];
+char lzcnt64_1[__lzcnt64(0x8000000000000000ULL) == 0 ? 1 : -1];
+char lzcnt64_2[__lzcnt64(0x0000000100000000ULL) == 31 ? 1 : -1];
+
+char lzcntu32_0[_lzcnt_u32(0x00000000) == 32 ? 1 : -1];
+char lzcntu32_1[_lzcnt_u32(0x80000000) == 0 ? 1 : -1];
+char lzcntu32_2[_lzcnt_u32(0x00000010) == 27 ? 1 : -1];
+
+char lzcntu64_0[_lzcnt_u64(0x0000000000000000ULL) == 64 ? 1 : -1];
+char lzcntu64_1[_lzcnt_u64(0x8000000000000000ULL) == 0 ? 1 : -1];
+char lzcntu64_2[_lzcnt_u64(0x0000000100000000ULL) == 31 ? 1 : -1];
+#endif
\ No newline at end of file
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
phoebewang
approved these changes
Sep 30, 2024
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Labels
backend:X86
clang:frontend
Language frontend issues, e.g. anything involving "Sema"
clang:headers
Headers provided by Clang, e.g. for intrinsics
clang
Clang issues not falling into any other category
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.