Skip to content

Commit ae5efce

Browse files
LegalizeAdulthoodt-rasmud
authored andcommitted
[clang-tidy] Reject invalid enum initializers in C files
C requires that enum values fit into an int. Scan the macro tokens present in an initializing expression and reject macros that contain tokens that have suffixes making them larger than int. C forbids the comma operator in enum initializing expressions, so optionally reject comma operator. Differential Revision: https://reviews.llvm.org/D125622 Fixes llvm#55467 (cherry picked from commit b418ef5)
1 parent f319dcc commit ae5efce

File tree

5 files changed

+303
-146
lines changed

5 files changed

+303
-146
lines changed

clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "IntegralLiteralExpressionMatcher.h"
1010

11+
#include <algorithm>
1112
#include <cctype>
1213
#include <stdexcept>
1314

@@ -81,6 +82,50 @@ bool IntegralLiteralExpressionMatcher::unaryOperator() {
8182
return true;
8283
}
8384

85+
static LiteralSize literalTokenSize(const Token &Tok) {
86+
unsigned int Length = Tok.getLength();
87+
if (Length <= 1)
88+
return LiteralSize::Int;
89+
90+
bool SeenUnsigned = false;
91+
bool SeenLong = false;
92+
bool SeenLongLong = false;
93+
const char *Text = Tok.getLiteralData();
94+
for (unsigned int End = Length - 1; End > 0; --End) {
95+
if (std::isdigit(Text[End]))
96+
break;
97+
98+
if (std::toupper(Text[End]) == 'U')
99+
SeenUnsigned = true;
100+
else if (std::toupper(Text[End]) == 'L') {
101+
if (SeenLong)
102+
SeenLongLong = true;
103+
SeenLong = true;
104+
}
105+
}
106+
107+
if (SeenLongLong) {
108+
if (SeenUnsigned)
109+
return LiteralSize::UnsignedLongLong;
110+
111+
return LiteralSize::LongLong;
112+
}
113+
if (SeenLong) {
114+
if (SeenUnsigned)
115+
return LiteralSize::UnsignedLong;
116+
117+
return LiteralSize::Long;
118+
}
119+
if (SeenUnsigned)
120+
return LiteralSize::UnsignedInt;
121+
122+
return LiteralSize::Int;
123+
}
124+
125+
static bool operator<(LiteralSize LHS, LiteralSize RHS) {
126+
return static_cast<int>(LHS) < static_cast<int>(RHS);
127+
}
128+
84129
bool IntegralLiteralExpressionMatcher::unaryExpr() {
85130
if (!unaryOperator())
86131
return false;
@@ -102,7 +147,10 @@ bool IntegralLiteralExpressionMatcher::unaryExpr() {
102147
!isIntegralConstant(*Current)) {
103148
return false;
104149
}
150+
151+
LargestSize = std::max(LargestSize, literalTokenSize(*Current));
105152
++Current;
153+
106154
return true;
107155
}
108156

@@ -217,14 +265,24 @@ bool IntegralLiteralExpressionMatcher::conditionalExpr() {
217265
}
218266

219267
bool IntegralLiteralExpressionMatcher::commaExpr() {
220-
return nonTerminalChainedExpr<tok::TokenKind::comma>(
221-
&IntegralLiteralExpressionMatcher::conditionalExpr);
268+
auto Pred = CommaAllowed
269+
? std::function<bool(Token)>(
270+
[](Token Tok) { return Tok.is(tok::TokenKind::comma); })
271+
: std::function<bool(Token)>([](Token) { return false; });
272+
return nonTerminalChainedExpr(
273+
&IntegralLiteralExpressionMatcher::conditionalExpr, Pred);
222274
}
223275

224276
bool IntegralLiteralExpressionMatcher::expr() { return commaExpr(); }
225277

226278
bool IntegralLiteralExpressionMatcher::match() {
227-
return expr() && Current == End;
279+
// Top-level allowed expression is conditionalExpr(), not expr(), because
280+
// comma operators are only valid initializers when used inside parentheses.
281+
return conditionalExpr() && Current == End;
282+
}
283+
284+
LiteralSize IntegralLiteralExpressionMatcher::largestLiteralSize() const {
285+
return LargestSize;
228286
}
229287

230288
} // namespace modernize

clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,27 @@ namespace clang {
1616
namespace tidy {
1717
namespace modernize {
1818

19+
enum class LiteralSize {
20+
Unknown = 0,
21+
Int,
22+
UnsignedInt,
23+
Long,
24+
UnsignedLong,
25+
LongLong,
26+
UnsignedLongLong
27+
};
28+
1929
// Parses an array of tokens and returns true if they conform to the rules of
2030
// C++ for whole expressions involving integral literals. Follows the operator
21-
// precedence rules of C++.
31+
// precedence rules of C++. Optionally exclude comma operator expressions.
2232
class IntegralLiteralExpressionMatcher {
2333
public:
24-
IntegralLiteralExpressionMatcher(ArrayRef<Token> Tokens)
25-
: Current(Tokens.begin()), End(Tokens.end()) {}
34+
IntegralLiteralExpressionMatcher(ArrayRef<Token> Tokens, bool CommaAllowed)
35+
: Current(Tokens.begin()), End(Tokens.end()), CommaAllowed(CommaAllowed) {
36+
}
2637

2738
bool match();
39+
LiteralSize largestLiteralSize() const;
2840

2941
private:
3042
bool advance();
@@ -64,6 +76,8 @@ class IntegralLiteralExpressionMatcher {
6476

6577
ArrayRef<Token>::iterator Current;
6678
ArrayRef<Token>::iterator End;
79+
LiteralSize LargestSize{LiteralSize::Unknown};
80+
bool CommaAllowed;
6781
};
6882

6983
} // namespace modernize

clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -312,8 +312,14 @@ void MacroToEnumCallbacks::FileChanged(SourceLocation Loc,
312312

313313
bool MacroToEnumCallbacks::isInitializer(ArrayRef<Token> MacroTokens)
314314
{
315-
IntegralLiteralExpressionMatcher Matcher(MacroTokens);
316-
return Matcher.match();
315+
IntegralLiteralExpressionMatcher Matcher(MacroTokens, LangOpts.C99 == 0);
316+
bool Matched = Matcher.match();
317+
bool isC = !LangOpts.CPlusPlus;
318+
if (isC && (Matcher.largestLiteralSize() != LiteralSize::Int &&
319+
Matcher.largestLiteralSize() != LiteralSize::UnsignedInt))
320+
return false;
321+
322+
return Matched;
317323
}
318324

319325

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// RUN: %check_clang_tidy %s modernize-macro-to-enum %t
2+
3+
// C requires enum values to fit into an int.
4+
#define TOO_BIG1 1L
5+
#define TOO_BIG2 1UL
6+
#define TOO_BIG3 1LL
7+
#define TOO_BIG4 1ULL
8+
9+
// C forbids comma operator in initializing expressions.
10+
#define BAD_OP 1, 2
11+
12+
#define SIZE_OK1 1
13+
#define SIZE_OK2 1U
14+
// CHECK-MESSAGES: :[[@LINE-2]]:1: warning: replace macro with enum [modernize-macro-to-enum]
15+
// CHECK-MESSAGES: :[[@LINE-3]]:9: warning: macro 'SIZE_OK1' defines an integral constant; prefer an enum instead
16+
// CHECK-MESSAGES: :[[@LINE-3]]:9: warning: macro 'SIZE_OK2' defines an integral constant; prefer an enum instead

0 commit comments

Comments
 (0)