Skip to content

Commit 04410c5

Browse files
committed
[clang-tidy] extend bugprone-signed-char-misuse check.
Summary: Cover a new use case when using a 'signed char' as an integer might lead to issue with non-ASCII characters. Comparing a 'signed char' with an 'unsigned char' using equality / unequality operator produces an unexpected result for non-ASCII characters. Reviewers: aaron.ballman, alexfh, hokein, njames93 Reviewed By: njames93 Subscribers: xazax.hun, cfe-commits Tags: #clang, #clang-tools-extra Differential Revision: https://reviews.llvm.org/D75749
1 parent ee862ad commit 04410c5

File tree

4 files changed

+226
-56
lines changed

4 files changed

+226
-56
lines changed

clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp

Lines changed: 79 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ namespace clang {
1818
namespace tidy {
1919
namespace bugprone {
2020

21+
static constexpr int UnsignedASCIIUpperBound = 127;
22+
2123
static Matcher<TypedefDecl> hasAnyListedName(const std::string &Names) {
2224
const std::vector<std::string> NameList =
2325
utils::options::parseStringList(Names);
@@ -33,70 +35,114 @@ void SignedCharMisuseCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
3335
Options.store(Opts, "CharTypdefsToIgnore", CharTypdefsToIgnoreList);
3436
}
3537

36-
void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) {
38+
// Create a matcher for char -> integer cast.
39+
BindableMatcher<clang::Stmt> SignedCharMisuseCheck::charCastExpression(
40+
bool IsSigned, const Matcher<clang::QualType> &IntegerType,
41+
const std::string &CastBindName) const {
3742
// We can ignore typedefs which are some kind of integer types
3843
// (e.g. typedef char sal_Int8). In this case, we don't need to
3944
// worry about the misinterpretation of char values.
4045
const auto IntTypedef = qualType(
4146
hasDeclaration(typedefDecl(hasAnyListedName(CharTypdefsToIgnoreList))));
4247

43-
const auto SignedCharType = expr(hasType(qualType(
44-
allOf(isAnyCharacter(), isSignedInteger(), unless(IntTypedef)))));
45-
46-
const auto IntegerType = qualType(allOf(isInteger(), unless(isAnyCharacter()),
47-
unless(booleanType())))
48-
.bind("integerType");
48+
auto CharTypeExpr = expr();
49+
if (IsSigned) {
50+
CharTypeExpr = expr(hasType(
51+
qualType(isAnyCharacter(), isSignedInteger(), unless(IntTypedef))));
52+
} else {
53+
CharTypeExpr = expr(hasType(qualType(
54+
isAnyCharacter(), unless(isSignedInteger()), unless(IntTypedef))));
55+
}
4956

50-
// We are interested in signed char -> integer conversion.
5157
const auto ImplicitCastExpr =
52-
implicitCastExpr(hasSourceExpression(SignedCharType),
58+
implicitCastExpr(hasSourceExpression(CharTypeExpr),
5359
hasImplicitDestinationType(IntegerType))
54-
.bind("castExpression");
60+
.bind(CastBindName);
5561

5662
const auto CStyleCastExpr = cStyleCastExpr(has(ImplicitCastExpr));
5763
const auto StaticCastExpr = cxxStaticCastExpr(has(ImplicitCastExpr));
5864
const auto FunctionalCastExpr = cxxFunctionalCastExpr(has(ImplicitCastExpr));
5965

6066
// We catch any type of casts to an integer. We need to have these cast
6167
// expressions explicitly to catch only those casts which are direct children
62-
// of an assignment/declaration.
63-
const auto CastExpr = expr(anyOf(ImplicitCastExpr, CStyleCastExpr,
64-
StaticCastExpr, FunctionalCastExpr));
68+
// of the checked expressions. (e.g. assignment, declaration).
69+
return expr(anyOf(ImplicitCastExpr, CStyleCastExpr, StaticCastExpr,
70+
FunctionalCastExpr));
71+
}
6572

66-
// Catch assignments with the suspicious type conversion.
67-
const auto AssignmentOperatorExpr = expr(binaryOperator(
68-
hasOperatorName("="), hasLHS(hasType(IntegerType)), hasRHS(CastExpr)));
73+
void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) {
74+
const auto IntegerType =
75+
qualType(isInteger(), unless(isAnyCharacter()), unless(booleanType()))
76+
.bind("integerType");
77+
const auto SignedCharCastExpr =
78+
charCastExpression(true, IntegerType, "signedCastExpression");
79+
const auto UnSignedCharCastExpr =
80+
charCastExpression(false, IntegerType, "unsignedCastExpression");
81+
82+
// Catch assignments with singed char -> integer conversion.
83+
const auto AssignmentOperatorExpr =
84+
expr(binaryOperator(hasOperatorName("="), hasLHS(hasType(IntegerType)),
85+
hasRHS(SignedCharCastExpr)));
6986

7087
Finder->addMatcher(AssignmentOperatorExpr, this);
7188

72-
// Catch declarations with the suspicious type conversion.
73-
const auto Declaration =
74-
varDecl(isDefinition(), hasType(IntegerType), hasInitializer(CastExpr));
89+
// Catch declarations with singed char -> integer conversion.
90+
const auto Declaration = varDecl(isDefinition(), hasType(IntegerType),
91+
hasInitializer(SignedCharCastExpr));
7592

7693
Finder->addMatcher(Declaration, this);
94+
95+
// Catch signed char/unsigned char comparison.
96+
const auto CompareOperator =
97+
expr(binaryOperator(hasAnyOperatorName("==", "!="),
98+
anyOf(allOf(hasLHS(SignedCharCastExpr),
99+
hasRHS(UnSignedCharCastExpr)),
100+
allOf(hasLHS(UnSignedCharCastExpr),
101+
hasRHS(SignedCharCastExpr)))))
102+
.bind("comparison");
103+
104+
Finder->addMatcher(CompareOperator, this);
77105
}
78106

79107
void SignedCharMisuseCheck::check(const MatchFinder::MatchResult &Result) {
80-
const auto *CastExpression =
81-
Result.Nodes.getNodeAs<ImplicitCastExpr>("castExpression");
82-
const auto *IntegerType = Result.Nodes.getNodeAs<QualType>("integerType");
83-
assert(CastExpression);
84-
assert(IntegerType);
108+
const auto *SignedCastExpression =
109+
Result.Nodes.getNodeAs<ImplicitCastExpr>("signedCastExpression");
85110

86-
// Ignore the match if we know that the value is not negative.
111+
// Ignore the match if we know that the signed char's value is not negative.
87112
// The potential misinterpretation happens for negative values only.
88113
Expr::EvalResult EVResult;
89-
if (!CastExpression->isValueDependent() &&
90-
CastExpression->getSubExpr()->EvaluateAsInt(EVResult, *Result.Context)) {
91-
llvm::APSInt Value1 = EVResult.Val.getInt();
92-
if (Value1.isNonNegative())
114+
if (!SignedCastExpression->isValueDependent() &&
115+
SignedCastExpression->getSubExpr()->EvaluateAsInt(EVResult,
116+
*Result.Context)) {
117+
llvm::APSInt Value = EVResult.Val.getInt();
118+
if (Value.isNonNegative())
93119
return;
94120
}
95121

96-
diag(CastExpression->getBeginLoc(),
97-
"'signed char' to %0 conversion; "
98-
"consider casting to 'unsigned char' first.")
99-
<< *IntegerType;
122+
if (const auto *Comparison = Result.Nodes.getNodeAs<Expr>("comparison")) {
123+
const auto *UnSignedCastExpression =
124+
Result.Nodes.getNodeAs<ImplicitCastExpr>("unsignedCastExpression");
125+
126+
// We can ignore the ASCII value range also for unsigned char.
127+
Expr::EvalResult EVResult;
128+
if (!UnSignedCastExpression->isValueDependent() &&
129+
UnSignedCastExpression->getSubExpr()->EvaluateAsInt(EVResult,
130+
*Result.Context)) {
131+
llvm::APSInt Value = EVResult.Val.getInt();
132+
if (Value <= UnsignedASCIIUpperBound)
133+
return;
134+
}
135+
136+
diag(Comparison->getBeginLoc(),
137+
"comparison between 'signed char' and 'unsigned char'");
138+
} else if (const auto *IntegerType =
139+
Result.Nodes.getNodeAs<QualType>("integerType")) {
140+
diag(SignedCastExpression->getBeginLoc(),
141+
"'signed char' to %0 conversion; "
142+
"consider casting to 'unsigned char' first.")
143+
<< *IntegerType;
144+
} else
145+
llvm_unreachable("Unexpected match");
100146
}
101147

102148
} // namespace bugprone

clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,11 @@ namespace clang {
1515
namespace tidy {
1616
namespace bugprone {
1717

18-
/// Finds ``signed char`` -> integer conversions which might indicate a programming
19-
/// error. The basic problem with the ``signed char``, that it might store the
20-
/// non-ASCII characters as negative values. The human programmer probably
21-
/// expects that after an integer conversion the converted value matches with the
22-
/// character code (a value from [0..255]), however, the actual value is in
23-
/// [-128..127] interval. This also applies to the plain ``char`` type on
24-
/// those implementations which represent ``char`` similar to ``signed char``.
18+
/// Finds those ``signed char`` -> integer conversions which might indicate a
19+
/// programming error. The basic problem with the ``signed char``, that it might
20+
/// store the non-ASCII characters as negative values. This behavior can cause a
21+
/// misunderstanding of the written code both when an explicit and when an
22+
/// implicit conversion happens.
2523
///
2624
/// For the user-facing documentation see:
2725
/// http://clang.llvm.org/extra/clang-tidy/checks/bugprone-signed-char-misuse.html
@@ -34,6 +32,11 @@ class SignedCharMisuseCheck : public ClangTidyCheck {
3432
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
3533

3634
private:
35+
ast_matchers::internal::BindableMatcher<clang::Stmt> charCastExpression(
36+
bool IsSigned,
37+
const ast_matchers::internal::Matcher<clang::QualType> &IntegerType,
38+
const std::string &CastBindName) const;
39+
3740
const std::string CharTypdefsToIgnoreList;
3841
};
3942

clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,39 @@
33
bugprone-signed-char-misuse
44
===========================
55

6-
Finds ``signed char`` -> integer conversions which might indicate a programming
7-
error. The basic problem with the ``signed char``, that it might store the
8-
non-ASCII characters as negative values. The human programmer probably
9-
expects that after an integer conversion the converted value matches with the
6+
Finds those ``signed char`` -> integer conversions which might indicate a
7+
programming error. The basic problem with the ``signed char``, that it might
8+
store the non-ASCII characters as negative values. This behavior can cause a
9+
misunderstanding of the written code both when an explicit and when an
10+
implicit conversion happens.
11+
12+
When the code contains an explicit ``signed char`` -> integer conversion, the
13+
human programmer probably expects that the converted value matches with the
1014
character code (a value from [0..255]), however, the actual value is in
11-
[-128..127] interval. This also applies to the plain ``char`` type on
12-
those implementations which represent ``char`` similar to ``signed char``.
13-
14-
To avoid this kind of misinterpretation, the desired way of converting from a
15-
``signed char`` to an integer value is converting to ``unsigned char`` first,
16-
which stores all the characters in the positive [0..255] interval which matches
17-
with the known character codes.
18-
19-
It depends on the actual platform whether ``char`` is handled as ``signed char``
15+
[-128..127] interval. To avoid this kind of misinterpretation, the desired way
16+
of converting from a ``signed char`` to an integer value is converting to
17+
``unsigned char`` first, which stores all the characters in the positive [0..255]
18+
interval which matches the known character codes.
19+
20+
In case of implicit conversion, the programmer might not actually be aware
21+
that a conversion happened and char value is used as an integer. There are
22+
some use cases when this unawareness might lead to a functionally imperfect code.
23+
For example, checking the equality of a ``signed char`` and an ``unsigned char``
24+
variable is something we should avoid in C++ code. During this comparison,
25+
the two variables are converted to integers which have different value ranges.
26+
For ``signed char``, the non-ASCII characters are stored as a value in [-128..-1]
27+
interval, while the same characters are stored in the [128..255] interval for
28+
an ``unsigned char``.
29+
30+
It depends on the actual platform whether plain ``char`` is handled as ``signed char``
2031
by default and so it is caught by this check or not. To change the default behavior
2132
you can use ``-funsigned-char`` and ``-fsigned-char`` compilation options.
2233

2334
Currently, this check is limited to assignments and variable declarations,
24-
where a ``signed char`` is assigned to an integer variable. There are other
25-
use cases where the same misinterpretation might lead to similar bogus
26-
behavior.
35+
where a ``signed char`` is assigned to an integer variable and to
36+
equality/inequality comparisons between ``signed char`` and ``unsigned char``.
37+
There are other use cases where the unexpected value ranges might lead to
38+
similar bogus behavior.
2739

2840
See also:
2941
`STR34-C. Cast characters to unsigned char before converting to larger integer sizes
@@ -67,6 +79,29 @@ an ``unsigned char`` value first.
6779
return IChar;
6880
}
6981

82+
Another use case is checking the equality of two ``char`` variables with
83+
different signedness. Inside the non-ASCII value range this comparison between
84+
a ``signed char`` and an ``unsigned char`` always returns ``false``.
85+
86+
.. code-block:: c++
87+
88+
bool compare(signed char SChar, unsigned char USChar) {
89+
if (SChar == USChar)
90+
return true;
91+
return false;
92+
}
93+
94+
The easiest way to fix this kind of comparison is casting one of the arguments,
95+
so both arguments will have the same type.
96+
97+
.. code-block:: c++
98+
99+
bool compare(signed char SChar, unsigned char USChar) {
100+
if (static_cast<unsigned char>(SChar) == USChar)
101+
return true;
102+
return false;
103+
}
104+
70105
.. option:: CharTypdefsToIgnore
71106

72107
A semicolon-separated list of typedef names. In this list, we can list

clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,34 @@ int CharPointer(signed char *CCharacter) {
6262
return NCharacter;
6363
}
6464

65+
int SignedUnsignedCharEquality(signed char SCharacter) {
66+
unsigned char USCharacter = 'a';
67+
if (SCharacter == USCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse]
68+
return 1;
69+
return 0;
70+
}
71+
72+
int SignedUnsignedCharIneqiality(signed char SCharacter) {
73+
unsigned char USCharacter = 'a';
74+
if (SCharacter != USCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse]
75+
return 1;
76+
return 0;
77+
}
78+
79+
int CompareWithNonAsciiConstant(unsigned char USCharacter) {
80+
const signed char SCharacter = -5;
81+
if (USCharacter == SCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse]
82+
return 1;
83+
return 0;
84+
}
85+
86+
int CompareWithUnsignedNonAsciiConstant(signed char SCharacter) {
87+
const unsigned char USCharacter = 128;
88+
if (USCharacter == SCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse]
89+
return 1;
90+
return 0;
91+
}
92+
6593
///////////////////////////////////////////////////////////////////
6694
/// Test cases correctly ignored by the check.
6795

@@ -121,3 +149,61 @@ unsigned char CharToCharCast() {
121149

122150
return USCharacter;
123151
}
152+
153+
int FixComparisonWithSignedCharCast(signed char SCharacter) {
154+
unsigned char USCharacter = 'a';
155+
if (SCharacter == static_cast<signed char>(USCharacter))
156+
return 1;
157+
return 0;
158+
}
159+
160+
int FixComparisonWithUnSignedCharCast(signed char SCharacter) {
161+
unsigned char USCharacter = 'a';
162+
if (static_cast<unsigned char>(SCharacter) == USCharacter)
163+
return 1;
164+
return 0;
165+
}
166+
167+
// Make sure we don't catch other type of char comparison.
168+
int SameCharTypeComparison(signed char SCharacter) {
169+
signed char SCharacter2 = 'a';
170+
if (SCharacter == SCharacter2)
171+
return 1;
172+
return 0;
173+
}
174+
175+
// Make sure we don't catch other type of char comparison.
176+
int SameCharTypeComparison2(unsigned char USCharacter) {
177+
unsigned char USCharacter2 = 'a';
178+
if (USCharacter == USCharacter2)
179+
return 1;
180+
return 0;
181+
}
182+
183+
// Make sure we don't catch integer - char comparison.
184+
int CharIntComparison(signed char SCharacter) {
185+
int ICharacter = 10;
186+
if (SCharacter == ICharacter)
187+
return 1;
188+
return 0;
189+
}
190+
191+
int CompareWithAsciiLiteral(unsigned char USCharacter) {
192+
if (USCharacter == 'x') // no warning
193+
return 1;
194+
return 0;
195+
}
196+
197+
int CompareWithAsciiConstant(unsigned char USCharacter) {
198+
const signed char SCharacter = 'a';
199+
if (USCharacter == SCharacter)
200+
return 1;
201+
return 0;
202+
}
203+
204+
int CompareWithUnsignedAsciiConstant(signed char SCharacter) {
205+
const unsigned char USCharacter = 'a';
206+
if (USCharacter == SCharacter)
207+
return 1;
208+
return 0;
209+
}

0 commit comments

Comments
 (0)