Skip to content

[clang] Warn when builtin names are used outside of invocations #96097

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ Clang Frontend Potentially Breaking Changes
$ clang --target=<your target triple> -print-target-triple
<the normalized target triple>

- Clang now issues a deprecation warning when an identifier of a builtin is used
for something else than invoking the builtin, e.g. ``struct __is_pointer``.
This affects libstdc++ prior to 14.2, and libc++ prior to 3.5.

- The ``hasTypeLoc`` AST matcher will no longer match a ``classTemplateSpecializationDecl``;
existing uses should switch to ``templateArgumentLoc`` or ``hasAnyTemplateArgumentLoc`` instead.

Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Basic/DiagnosticLexKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ def warn_c99_keyword : Warning<"'%0' is a keyword in C99">,
def warn_c23_keyword : Warning<"'%0' is a keyword in C23">,
InGroup<C23Compat>, DefaultIgnore;

def warn_deprecated_builtin_replacement : Warning<
"using the name of the builtin '%0' outside of "
"a builtin invocation is deprecated">, InGroup<KeywordCompat>;

def ext_unterminated_char_or_string : ExtWarn<
"missing terminating %select{'|'\"'}0 character">, InGroup<InvalidPPToken>;
def ext_empty_character : ExtWarn<"empty character constant">,
Expand Down
19 changes: 17 additions & 2 deletions clang/include/clang/Basic/IdentifierTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,9 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
LLVM_PREFERRED_TYPE(bool)
unsigned IsFinal : 1;

LLVM_PREFERRED_TYPE(bool)
unsigned IsReusableBuiltinName : 1;

// 22 bits left in a 64-bit word.

// Managed by the language front-end.
Expand All @@ -213,7 +216,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false),
RevertedTokenID(false), OutOfDate(false), IsModulesImport(false),
IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false),
IsRestrictExpansion(false), IsFinal(false) {}
IsRestrictExpansion(false), IsFinal(false),
IsReusableBuiltinName(false) {}

public:
IdentifierInfo(const IdentifierInfo &) = delete;
Expand Down Expand Up @@ -332,6 +336,16 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
RevertedTokenID = false;
}

bool isReusableBuiltinName() const { return IsReusableBuiltinName; };

void setIsReusableBuiltinName(bool Val) {
IsReusableBuiltinName = Val;
if (Val)
NeedsHandleIdentifier = true;
else
RecomputeNeedsHandleIdentifier();
};

/// Return the preprocessor keyword ID for this identifier.
///
/// For example, "define" will return tok::pp_define.
Expand Down Expand Up @@ -569,7 +583,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
void RecomputeNeedsHandleIdentifier() {
NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
isExtensionToken() || isFutureCompatKeyword() ||
isOutOfDate() || isModulesImport();
isReusableBuiltinName() || isOutOfDate() ||
isModulesImport();
}
};

Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Lex/Preprocessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -1107,6 +1107,9 @@ class Preprocessor {
/// Whether tokens are being skipped until the through header is seen.
bool SkippingUntilPCHThroughHeader = false;

/// Whether we're evaluating __has_builtin().
bool EvaluatingHasBuiltinMacro = false;

/// \{
/// Cache of macro expanders to reduce malloc traffic.
enum { TokenLexerCacheSize = 8 };
Expand Down
4 changes: 0 additions & 4 deletions clang/include/clang/Parse/Parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,6 @@ class Parser : public CodeCompletionHandler {
mutable IdentifierInfo *Ident_import;
mutable IdentifierInfo *Ident_module;

// C++ type trait keywords that can be reverted to identifiers and still be
// used as type traits.
llvm::SmallDenseMap<IdentifierInfo *, tok::TokenKind> RevertibleTypeTraits;

std::unique_ptr<PragmaHandler> AlignHandler;
std::unique_ptr<PragmaHandler> GCCVisibilityHandler;
std::unique_ptr<PragmaHandler> OptionsHandler;
Expand Down
75 changes: 75 additions & 0 deletions clang/lib/Basic/IdentifierTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,79 @@ static KeywordStatus getKeywordStatus(const LangOptions &LangOpts,
return CurStatus;
}

static bool isReusableBuiltinName(tok::TokenKind TokenCode) {
#define RTT_JOIN(X, Y) X##Y
#define REVERTIBLE_TYPE_TRAIT(Name) \
case RTT_JOIN(tok::kw_, Name): \
return true;

switch (TokenCode) {
default:
return false;
REVERTIBLE_TYPE_TRAIT(__is_abstract);
REVERTIBLE_TYPE_TRAIT(__is_aggregate);
REVERTIBLE_TYPE_TRAIT(__is_arithmetic);
REVERTIBLE_TYPE_TRAIT(__is_array);
REVERTIBLE_TYPE_TRAIT(__is_assignable);
REVERTIBLE_TYPE_TRAIT(__is_base_of);
REVERTIBLE_TYPE_TRAIT(__is_bounded_array);
REVERTIBLE_TYPE_TRAIT(__is_class);
REVERTIBLE_TYPE_TRAIT(__is_complete_type);
REVERTIBLE_TYPE_TRAIT(__is_compound);
REVERTIBLE_TYPE_TRAIT(__is_const);
REVERTIBLE_TYPE_TRAIT(__is_constructible);
REVERTIBLE_TYPE_TRAIT(__is_convertible);
REVERTIBLE_TYPE_TRAIT(__is_convertible_to);
REVERTIBLE_TYPE_TRAIT(__is_destructible);
REVERTIBLE_TYPE_TRAIT(__is_empty);
REVERTIBLE_TYPE_TRAIT(__is_enum);
REVERTIBLE_TYPE_TRAIT(__is_floating_point);
REVERTIBLE_TYPE_TRAIT(__is_final);
REVERTIBLE_TYPE_TRAIT(__is_function);
REVERTIBLE_TYPE_TRAIT(__is_fundamental);
REVERTIBLE_TYPE_TRAIT(__is_integral);
REVERTIBLE_TYPE_TRAIT(__is_interface_class);
REVERTIBLE_TYPE_TRAIT(__is_layout_compatible);
REVERTIBLE_TYPE_TRAIT(__is_literal);
REVERTIBLE_TYPE_TRAIT(__is_lvalue_expr);
REVERTIBLE_TYPE_TRAIT(__is_lvalue_reference);
REVERTIBLE_TYPE_TRAIT(__is_member_function_pointer);
REVERTIBLE_TYPE_TRAIT(__is_member_object_pointer);
REVERTIBLE_TYPE_TRAIT(__is_member_pointer);
REVERTIBLE_TYPE_TRAIT(__is_nothrow_assignable);
REVERTIBLE_TYPE_TRAIT(__is_nothrow_constructible);
REVERTIBLE_TYPE_TRAIT(__is_nothrow_destructible);
REVERTIBLE_TYPE_TRAIT(__is_nothrow_convertible);
REVERTIBLE_TYPE_TRAIT(__is_nullptr);
REVERTIBLE_TYPE_TRAIT(__is_object);
REVERTIBLE_TYPE_TRAIT(__is_pod);
REVERTIBLE_TYPE_TRAIT(__is_pointer);
REVERTIBLE_TYPE_TRAIT(__is_polymorphic);
REVERTIBLE_TYPE_TRAIT(__is_reference);
REVERTIBLE_TYPE_TRAIT(__is_referenceable);
REVERTIBLE_TYPE_TRAIT(__is_rvalue_expr);
REVERTIBLE_TYPE_TRAIT(__is_rvalue_reference);
REVERTIBLE_TYPE_TRAIT(__is_same);
REVERTIBLE_TYPE_TRAIT(__is_scalar);
REVERTIBLE_TYPE_TRAIT(__is_scoped_enum);
REVERTIBLE_TYPE_TRAIT(__is_sealed);
REVERTIBLE_TYPE_TRAIT(__is_signed);
REVERTIBLE_TYPE_TRAIT(__is_standard_layout);
REVERTIBLE_TYPE_TRAIT(__is_trivial);
REVERTIBLE_TYPE_TRAIT(__is_trivially_assignable);
REVERTIBLE_TYPE_TRAIT(__is_trivially_constructible);
REVERTIBLE_TYPE_TRAIT(__is_trivially_copyable);
REVERTIBLE_TYPE_TRAIT(__is_trivially_equality_comparable);
REVERTIBLE_TYPE_TRAIT(__is_unbounded_array);
REVERTIBLE_TYPE_TRAIT(__is_union);
REVERTIBLE_TYPE_TRAIT(__is_unsigned);
REVERTIBLE_TYPE_TRAIT(__is_void);
REVERTIBLE_TYPE_TRAIT(__is_volatile);
REVERTIBLE_TYPE_TRAIT(__reference_binds_to_temporary);
}
return false;
}

/// AddKeyword - This method is used to associate a token ID with specific
/// identifiers because they are language keywords. This causes the lexer to
/// automatically map matching identifiers to specialized token codes.
Expand All @@ -261,6 +334,8 @@ static void AddKeyword(StringRef Keyword,
Table.get(Keyword, AddResult == KS_Future ? tok::identifier : TokenCode);
Info.setIsExtensionToken(AddResult == KS_Extension);
Info.setIsFutureCompatKeyword(AddResult == KS_Future);
Info.setIsReusableBuiltinName(LangOpts.CPlusPlus &&
isReusableBuiltinName(TokenCode));
}

/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Lex/PPMacroExpansion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1673,6 +1673,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
return II && HasExtension(*this, II->getName());
});
} else if (II == Ident__has_builtin) {
EvaluatingHasBuiltinMacro = true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not particularly proud of this workaround, and if reviewers think that we should do this in some other way (e.g. enable backtracking for every __has_builtin use, and look behind when we see a revertible type trait identifier), I'm open for suggestions.

EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
[this](Token &Tok, bool &HasLexedNextToken) -> int {
IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
Expand Down Expand Up @@ -1734,6 +1735,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
.Default(false);
}
});
EvaluatingHasBuiltinMacro = false;
} else if (II == Ident__has_constexpr_builtin) {
EvaluateFeatureLikeBuiltinMacro(
OS, Tok, II, *this, false,
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/Lex/Preprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,16 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
II.setIsFutureCompatKeyword(false);
}

// Standard libraries used to declare structs with the same (reserved) names
// as our builtins for type traits, e.g. __is_pod. This is deprecated, and
// we warn when our builtin identifiers are used for something else than
// invocation. Notable exception is __has_builtin macro.
if (II.isReusableBuiltinName() && !isNextPPTokenLParen() &&
!EvaluatingHasBuiltinMacro) {
Identifier.setKind(tok::identifier);
Diag(Identifier, diag::warn_deprecated_builtin_replacement) << II.getName();
}

// If this is an extension token, diagnose its use.
// We avoid diagnosing tokens that originate from macro definitions.
// FIXME: This warning is disabled in cases where it shouldn't be,
Expand Down
19 changes: 1 addition & 18 deletions clang/lib/Parse/ParseDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3906,24 +3906,7 @@ void Parser::ParseDeclarationSpecifiers(

continue;
}

case tok::kw___is_signed:
// GNU libstdc++ 4.4 uses __is_signed as an identifier, but Clang
// typically treats it as a trait. If we see __is_signed as it appears
// in libstdc++, e.g.,
//
// static const bool __is_signed;
//
// then treat __is_signed as an identifier rather than as a keyword.
if (DS.getTypeSpecType() == TST_bool &&
DS.getTypeQualifiers() == DeclSpec::TQ_const &&
DS.getStorageClassSpec() == DeclSpec::SCS_static)
TryKeywordIdentFallback(true);

// We're done with the declaration-specifiers.
goto DoneWithDeclSpec;

// typedef-name
// typedef-name
case tok::kw___super:
case tok::kw_decltype:
case tok::identifier:
Expand Down
71 changes: 0 additions & 71 deletions clang/lib/Parse/ParseDeclCXX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1718,77 +1718,6 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
// C++11 attributes
SourceLocation AttrFixitLoc = Tok.getLocation();

if (TagType == DeclSpec::TST_struct && Tok.isNot(tok::identifier) &&
!Tok.isAnnotation() && Tok.getIdentifierInfo() &&
Tok.isOneOf(
#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) tok::kw___##Trait,
#include "clang/Basic/TransformTypeTraits.def"
tok::kw___is_abstract,
tok::kw___is_aggregate,
tok::kw___is_arithmetic,
tok::kw___is_array,
tok::kw___is_assignable,
tok::kw___is_base_of,
tok::kw___is_bounded_array,
tok::kw___is_class,
tok::kw___is_complete_type,
tok::kw___is_compound,
tok::kw___is_const,
tok::kw___is_constructible,
tok::kw___is_convertible,
tok::kw___is_convertible_to,
tok::kw___is_destructible,
tok::kw___is_empty,
tok::kw___is_enum,
tok::kw___is_floating_point,
tok::kw___is_final,
tok::kw___is_function,
tok::kw___is_fundamental,
tok::kw___is_integral,
tok::kw___is_interface_class,
tok::kw___is_literal,
tok::kw___is_lvalue_expr,
tok::kw___is_lvalue_reference,
tok::kw___is_member_function_pointer,
tok::kw___is_member_object_pointer,
tok::kw___is_member_pointer,
tok::kw___is_nothrow_assignable,
tok::kw___is_nothrow_constructible,
tok::kw___is_nothrow_convertible,
tok::kw___is_nothrow_destructible,
tok::kw___is_nullptr,
tok::kw___is_object,
tok::kw___is_pod,
tok::kw___is_pointer,
tok::kw___is_polymorphic,
tok::kw___is_reference,
tok::kw___is_referenceable,
tok::kw___is_rvalue_expr,
tok::kw___is_rvalue_reference,
tok::kw___is_same,
tok::kw___is_scalar,
tok::kw___is_scoped_enum,
tok::kw___is_sealed,
tok::kw___is_signed,
tok::kw___is_standard_layout,
tok::kw___is_trivial,
tok::kw___is_trivially_equality_comparable,
tok::kw___is_trivially_assignable,
tok::kw___is_trivially_constructible,
tok::kw___is_trivially_copyable,
tok::kw___is_unbounded_array,
tok::kw___is_union,
tok::kw___is_unsigned,
tok::kw___is_void,
tok::kw___is_volatile
))
// GNU libstdc++ 4.2 and libc++ use certain intrinsic names as the
// name of struct templates, but some are keywords in GCC >= 4.3
// and Clang. Therefore, when we see the token sequence "struct
// X", make X into a normal identifier rather than a keyword, to
// allow libstdc++ 4.2 and libc++ to work properly.
TryKeywordIdentFallback(true);

struct PreserveAtomicIdentifierInfoRAII {
PreserveAtomicIdentifierInfoRAII(Token &Tok, bool Enabled)
: AtomicII(nullptr) {
Expand Down
Loading
Loading