Skip to content

Commit e3bd67e

Browse files
committed
[clang][Sema] check default argument promotions for printf
The main focus of this patch is to make ArgType::matchesType check for possible default parameter promotions when the argType is not a pointer. If so, no warning will be given for `int`, `unsigned int` types as corresponding arguments to %hhd and %hd. However, the usage of %hhd corresponding to short is relatively rare, and it is more likely to be a misuse. This patch keeps the original behavior of clang like this as much as possible, while making it more convenient to consider the default arguments promotion. Fixes #57102 Reviewed By: aaron.ballman, nickdesaulniers, #clang-language-wg Differential Revision: https://reviews.llvm.org/D132568
1 parent 62454e8 commit e3bd67e

File tree

8 files changed

+237
-43
lines changed

8 files changed

+237
-43
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,10 @@ AIX Support
175175
C Language Changes in Clang
176176
---------------------------
177177

178+
- Adjusted ``-Wformat`` warnings according to `WG14 N2562 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2562.pdf>`_.
179+
Clang will now consider default argument promotions in printf, and remove unnecessary warnings.
180+
Especially ``int`` argument with specifier ``%hhd`` and ``%hd``.
181+
178182
C2x Feature Support
179183
-------------------
180184

clang/include/clang/AST/FormatString.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,14 @@ class ArgType {
261261
/// instance, "%d" and float.
262262
NoMatch = 0,
263263
/// The conversion specifier and the argument type are compatible. For
264-
/// instance, "%d" and _Bool.
264+
/// instance, "%d" and int.
265265
Match = 1,
266+
/// The conversion specifier and the argument type are compatible because of
267+
/// default argument promotions. For instance, "%hhd" and int.
268+
MatchPromotion,
269+
/// The conversion specifier and the argument type are compatible but still
270+
/// seems likely to be an error. For instanace, "%hhd" and short.
271+
NoMatchPromotionTypeConfusion,
266272
/// The conversion specifier and the argument type are disallowed by the C
267273
/// standard, but are in practice harmless. For instance, "%p" and int*.
268274
NoMatchPedantic,

clang/lib/AST/FormatString.cpp

Lines changed: 80 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -348,25 +348,42 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const {
348348
return Match;
349349

350350
case AnyCharTy: {
351-
if (const EnumType *ETy = argTy->getAs<EnumType>()) {
351+
if (const auto *ETy = argTy->getAs<EnumType>()) {
352352
// If the enum is incomplete we know nothing about the underlying type.
353353
// Assume that it's 'int'.
354354
if (!ETy->getDecl()->isComplete())
355355
return NoMatch;
356356
argTy = ETy->getDecl()->getIntegerType();
357357
}
358358

359-
if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
359+
if (const auto *BT = argTy->getAs<BuiltinType>()) {
360+
// The types are perfectly matched?
360361
switch (BT->getKind()) {
362+
default:
363+
break;
364+
case BuiltinType::Char_S:
365+
case BuiltinType::SChar:
366+
case BuiltinType::UChar:
367+
case BuiltinType::Char_U:
368+
case BuiltinType::Bool:
369+
return Match;
370+
}
371+
// "Partially matched" because of promotions?
372+
if (!Ptr) {
373+
switch (BT->getKind()) {
361374
default:
362375
break;
363-
case BuiltinType::Char_S:
364-
case BuiltinType::SChar:
365-
case BuiltinType::UChar:
366-
case BuiltinType::Char_U:
367-
case BuiltinType::Bool:
368-
return Match;
376+
case BuiltinType::Int:
377+
case BuiltinType::UInt:
378+
return MatchPromotion;
379+
case BuiltinType::Short:
380+
case BuiltinType::UShort:
381+
case BuiltinType::WChar_S:
382+
case BuiltinType::WChar_U:
383+
return NoMatchPromotionTypeConfusion;
384+
}
369385
}
386+
}
370387
return NoMatch;
371388
}
372389

@@ -383,8 +400,9 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const {
383400

384401
if (T == argTy)
385402
return Match;
386-
// Check for "compatible types".
387-
if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
403+
if (const auto *BT = argTy->getAs<BuiltinType>()) {
404+
// Check if the only difference between them is signed vs unsigned
405+
// if true, we consider they are compatible.
388406
switch (BT->getKind()) {
389407
default:
390408
break;
@@ -395,25 +413,66 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const {
395413
case BuiltinType::Bool:
396414
if (T == C.UnsignedShortTy || T == C.ShortTy)
397415
return NoMatchTypeConfusion;
398-
return T == C.UnsignedCharTy || T == C.SignedCharTy ? Match
399-
: NoMatch;
416+
if (T == C.UnsignedCharTy || T == C.SignedCharTy)
417+
return Match;
418+
break;
400419
case BuiltinType::Short:
401-
return T == C.UnsignedShortTy ? Match : NoMatch;
420+
if (T == C.UnsignedShortTy)
421+
return Match;
422+
break;
402423
case BuiltinType::UShort:
403-
return T == C.ShortTy ? Match : NoMatch;
424+
if (T == C.ShortTy)
425+
return Match;
426+
break;
404427
case BuiltinType::Int:
405-
return T == C.UnsignedIntTy ? Match : NoMatch;
428+
if (T == C.UnsignedIntTy)
429+
return Match;
430+
break;
406431
case BuiltinType::UInt:
407-
return T == C.IntTy ? Match : NoMatch;
432+
if (T == C.IntTy)
433+
return Match;
434+
break;
408435
case BuiltinType::Long:
409-
return T == C.UnsignedLongTy ? Match : NoMatch;
436+
if (T == C.UnsignedLongTy)
437+
return Match;
438+
break;
410439
case BuiltinType::ULong:
411-
return T == C.LongTy ? Match : NoMatch;
440+
if (T == C.LongTy)
441+
return Match;
442+
break;
412443
case BuiltinType::LongLong:
413-
return T == C.UnsignedLongLongTy ? Match : NoMatch;
444+
if (T == C.UnsignedLongLongTy)
445+
return Match;
446+
break;
414447
case BuiltinType::ULongLong:
415-
return T == C.LongLongTy ? Match : NoMatch;
416-
}
448+
if (T == C.LongLongTy)
449+
return Match;
450+
break;
451+
}
452+
// "Partially matched" because of promotions?
453+
if (!Ptr) {
454+
switch (BT->getKind()) {
455+
default:
456+
break;
457+
case BuiltinType::Int:
458+
case BuiltinType::UInt:
459+
if (T == C.SignedCharTy || T == C.UnsignedCharTy ||
460+
T == C.ShortTy || T == C.UnsignedShortTy || T == C.WCharTy ||
461+
T == C.WideCharTy)
462+
return MatchPromotion;
463+
break;
464+
case BuiltinType::Short:
465+
case BuiltinType::UShort:
466+
if (T == C.SignedCharTy || T == C.UnsignedCharTy)
467+
return NoMatchPromotionTypeConfusion;
468+
break;
469+
case BuiltinType::WChar_U:
470+
case BuiltinType::WChar_S:
471+
if (T != C.WCharTy && T != C.WideCharTy)
472+
return NoMatchPromotionTypeConfusion;
473+
}
474+
}
475+
}
417476
return NoMatch;
418477
}
419478

clang/lib/Sema/SemaChecking.cpp

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10081,10 +10081,14 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
1008110081
return true;
1008210082
}
1008310083

10084-
analyze_printf::ArgType::MatchKind Match = AT.matchesType(S.Context, ExprTy);
10085-
if (Match == analyze_printf::ArgType::Match)
10084+
ArgType::MatchKind ImplicitMatch = ArgType::NoMatch;
10085+
ArgType::MatchKind Match = AT.matchesType(S.Context, ExprTy);
10086+
if (Match == ArgType::Match)
1008610087
return true;
1008710088

10089+
// NoMatchPromotionTypeConfusion should be only returned in ImplictCastExpr
10090+
assert(Match != ArgType::NoMatchPromotionTypeConfusion);
10091+
1008810092
// Look through argument promotions for our error message's reported type.
1008910093
// This includes the integral and floating promotions, but excludes array
1009010094
// and function pointer decay (seeing that an argument intended to be a
@@ -10101,13 +10105,9 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
1010110105
if (ICE->getType() == S.Context.IntTy ||
1010210106
ICE->getType() == S.Context.UnsignedIntTy) {
1010310107
// All further checking is done on the subexpression
10104-
const analyze_printf::ArgType::MatchKind ImplicitMatch =
10105-
AT.matchesType(S.Context, ExprTy);
10106-
if (ImplicitMatch == analyze_printf::ArgType::Match)
10108+
ImplicitMatch = AT.matchesType(S.Context, ExprTy);
10109+
if (ImplicitMatch == ArgType::Match)
1010710110
return true;
10108-
if (ImplicitMatch == ArgType::NoMatchPedantic ||
10109-
ImplicitMatch == ArgType::NoMatchTypeConfusion)
10110-
Match = ImplicitMatch;
1011110111
}
1011210112
}
1011310113
} else if (const CharacterLiteral *CL = dyn_cast<CharacterLiteral>(E)) {
@@ -10118,10 +10118,29 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
1011810118
// modifier is provided.
1011910119
if (ExprTy == S.Context.IntTy &&
1012010120
FS.getLengthModifier().getKind() != LengthModifier::AsChar)
10121-
if (llvm::isUIntN(S.Context.getCharWidth(), CL->getValue()))
10121+
if (llvm::isUIntN(S.Context.getCharWidth(), CL->getValue())) {
1012210122
ExprTy = S.Context.CharTy;
10123+
// To improve check results, we consider a character literal in C
10124+
// to be a 'char' rather than an 'int'. 'printf("%hd", 'a');' is
10125+
// more likely a type confusion situation, so we will suggest to
10126+
// use '%hhd' instead by discarding the MatchPromotion.
10127+
if (Match == ArgType::MatchPromotion)
10128+
Match = ArgType::NoMatch;
10129+
}
1012310130
}
10124-
10131+
if (Match == ArgType::MatchPromotion) {
10132+
// WG14 N2562 only clarified promotions in *printf
10133+
// For NSLog in ObjC, just preserve -Wformat behavior
10134+
if (!S.getLangOpts().ObjC &&
10135+
ImplicitMatch != ArgType::NoMatchPromotionTypeConfusion &&
10136+
ImplicitMatch != ArgType::NoMatchTypeConfusion)
10137+
return true;
10138+
Match = ArgType::NoMatch;
10139+
}
10140+
if (ImplicitMatch == ArgType::NoMatchPedantic ||
10141+
ImplicitMatch == ArgType::NoMatchTypeConfusion)
10142+
Match = ImplicitMatch;
10143+
assert(Match != ArgType::MatchPromotion);
1012510144
// Look through enums to their underlying type.
1012610145
bool IsEnum = false;
1012710146
if (auto EnumTy = ExprTy->getAs<EnumType>()) {
@@ -10194,7 +10213,10 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
1019410213
if (IntendedTy == ExprTy && !ShouldNotPrintDirectly) {
1019510214
unsigned Diag;
1019610215
switch (Match) {
10197-
case ArgType::Match: llvm_unreachable("expected non-matching");
10216+
case ArgType::Match:
10217+
case ArgType::MatchPromotion:
10218+
case ArgType::NoMatchPromotionTypeConfusion:
10219+
llvm_unreachable("expected non-matching");
1019810220
case ArgType::NoMatchPedantic:
1019910221
Diag = diag::warn_format_conversion_argument_type_mismatch_pedantic;
1020010222
break;
@@ -10291,7 +10313,10 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
1029110313
case Sema::VAK_ValidInCXX11: {
1029210314
unsigned Diag;
1029310315
switch (Match) {
10294-
case ArgType::Match: llvm_unreachable("expected non-matching");
10316+
case ArgType::Match:
10317+
case ArgType::MatchPromotion:
10318+
case ArgType::NoMatchPromotionTypeConfusion:
10319+
llvm_unreachable("expected non-matching");
1029510320
case ArgType::NoMatchPedantic:
1029610321
Diag = diag::warn_format_conversion_argument_type_mismatch_pedantic;
1029710322
break;

clang/test/Sema/format-strings-freebsd.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ void check_freebsd_kernel_extensions(int i, long l, char *s, short h)
3535
freebsd_kernel_printf("%lr", l); // no-warning
3636

3737
// h modifier expects a short
38-
freebsd_kernel_printf("%hr", i); // expected-warning{{format specifies type 'short' but the argument has type 'int'}}
38+
freebsd_kernel_printf("%hr", i); // no-warning
3939
freebsd_kernel_printf("%hr", h); // no-warning
40-
freebsd_kernel_printf("%hy", i); // expected-warning{{format specifies type 'short' but the argument has type 'int'}}
40+
freebsd_kernel_printf("%hy", i); // no-warning
4141
freebsd_kernel_printf("%hy", h); // no-warning
4242

4343
// %y expects an int

clang/test/Sema/format-strings-scanf.c

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,3 +246,55 @@ void check_conditional_literal(char *s, int *i) {
246246
scanf(i ? "%d" : "%d", i, s); // expected-warning{{data argument not used}}
247247
scanf(i ? "%s" : "%d", s); // expected-warning{{format specifies type 'int *'}}
248248
}
249+
250+
void test_promotion(void) {
251+
// No promotions for *scanf pointers clarified in N2562
252+
// https://github.com/llvm/llvm-project/issues/57102
253+
// N2562: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2562.pdf
254+
int i;
255+
signed char sc;
256+
unsigned char uc;
257+
short ss;
258+
unsigned short us;
259+
260+
// pointers could not be "promoted"
261+
scanf("%hhd", &i); // expected-warning{{format specifies type 'char *' but the argument has type 'int *'}}
262+
scanf("%hd", &i); // expected-warning{{format specifies type 'short *' but the argument has type 'int *'}}
263+
scanf("%d", &i); // no-warning
264+
// char & uchar
265+
scanf("%hhd", &sc); // no-warning
266+
scanf("%hhd", &uc); // no-warning
267+
scanf("%hd", &sc); // expected-warning{{format specifies type 'short *' but the argument has type 'signed char *'}}
268+
scanf("%hd", &uc); // expected-warning{{format specifies type 'short *' but the argument has type 'unsigned char *'}}
269+
scanf("%d", &sc); // expected-warning{{format specifies type 'int *' but the argument has type 'signed char *'}}
270+
scanf("%d", &uc); // expected-warning{{format specifies type 'int *' but the argument has type 'unsigned char *'}}
271+
// short & ushort
272+
scanf("%hhd", &ss); // expected-warning{{format specifies type 'char *' but the argument has type 'short *'}}
273+
scanf("%hhd", &us); // expected-warning{{format specifies type 'char *' but the argument has type 'unsigned short *'}}
274+
scanf("%hd", &ss); // no-warning
275+
scanf("%hd", &us); // no-warning
276+
scanf("%d", &ss); // expected-warning{{format specifies type 'int *' but the argument has type 'short *'}}
277+
scanf("%d", &us); // expected-warning{{format specifies type 'int *' but the argument has type 'unsigned short *'}}
278+
279+
// long types
280+
scanf("%ld", &i); // expected-warning{{format specifies type 'long *' but the argument has type 'int *'}}
281+
scanf("%lld", &i); // expected-warning{{format specifies type 'long long *' but the argument has type 'int *'}}
282+
scanf("%ld", &sc); // expected-warning{{format specifies type 'long *' but the argument has type 'signed char *'}}
283+
scanf("%lld", &sc); // expected-warning{{format specifies type 'long long *' but the argument has type 'signed char *'}}
284+
scanf("%ld", &uc); // expected-warning{{format specifies type 'long *' but the argument has type 'unsigned char *'}}
285+
scanf("%lld", &uc); // expected-warning{{format specifies type 'long long *' but the argument has type 'unsigned char *'}}
286+
scanf("%llx", &i); // expected-warning{{format specifies type 'unsigned long long *' but the argument has type 'int *'}}
287+
288+
// ill-formed floats
289+
scanf("%hf", // expected-warning{{length modifier 'h' results in undefined behavior or no effect with 'f' conversion specifier}}
290+
&sc);
291+
292+
// pointers in scanf
293+
scanf("%s", i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
294+
295+
// FIXME: does this match what the C committee allows or should it be pedantically warned on?
296+
char c;
297+
void *vp;
298+
scanf("%hhd", &c); // Pedantic warning?
299+
scanf("%hhd", vp); // expected-warning{{format specifies type 'char *' but the argument has type 'void *'}}
300+
}

clang/test/Sema/format-strings.c

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -830,3 +830,57 @@ void test_block(void) {
830830
printf_arg2("foo", "%s string %i\n", "aaa", 123);
831831
printf_arg2("%s string\n", "foo", "bar"); // expected-warning{{data argument not used by format string}}
832832
}
833+
834+
void test_promotion(void) {
835+
// Default argument promotions for *printf in N2562
836+
// https://github.com/llvm/llvm-project/issues/57102
837+
// N2562: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2562.pdf
838+
int i;
839+
signed char sc;
840+
unsigned char uc;
841+
char c;
842+
short ss;
843+
unsigned short us;
844+
845+
printf("%hhd %hd %d %hhd %hd %d", i, i, i, sc, sc, sc); // no-warning
846+
printf("%hhd %hd %d %hhd %hd %d", uc, uc, uc, c, c, c); // no-warning
847+
848+
// %ld %lld %llx
849+
printf("%ld", i); // expected-warning{{format specifies type 'long' but the argument has type 'int'}}
850+
printf("%lld", i); // expected-warning{{format specifies type 'long long' but the argument has type 'int'}}
851+
printf("%ld", sc); // expected-warning{{format specifies type 'long' but the argument has type 'signed char'}}
852+
printf("%lld", sc); // expected-warning{{format specifies type 'long long' but the argument has type 'signed char'}}
853+
printf("%ld", uc); // expected-warning{{format specifies type 'long' but the argument has type 'unsigned char'}}
854+
printf("%lld", uc); // expected-warning{{format specifies type 'long long' but the argument has type 'unsigned char'}}
855+
printf("%llx", i); // expected-warning{{format specifies type 'unsigned long long' but the argument has type 'int'}}
856+
857+
// ill formed spec for floats
858+
printf("%hf", // expected-warning{{length modifier 'h' results in undefined behavior or no effect with 'f' conversion specifier}}
859+
sc); // expected-warning{{format specifies type 'double' but the argument has type 'signed char'}}
860+
861+
// for %hhd and `short` they are compatible by promotions but more likely misuse
862+
printf("%hd", ss); // no-warning
863+
printf("%hhd", ss); // expected-warning{{format specifies type 'char' but the argument has type 'short'}}
864+
printf("%hu", us); // no-warning
865+
printf("%hhu", ss); // expected-warning{{format specifies type 'unsigned char' but the argument has type 'short'}}
866+
867+
// floats & integers are not compatible
868+
printf("%f", i); // expected-warning{{format specifies type 'double' but the argument has type 'int'}}
869+
printf("%f", sc); // expected-warning{{format specifies type 'double' but the argument has type 'signed char'}}
870+
printf("%f", uc); // expected-warning{{format specifies type 'double' but the argument has type 'unsigned char'}}
871+
printf("%f", c); // expected-warning{{format specifies type 'double' but the argument has type 'char'}}
872+
printf("%f", ss); // expected-warning{{format specifies type 'double' but the argument has type 'short'}}
873+
printf("%f", us); // expected-warning{{format specifies type 'double' but the argument has type 'unsigned short'}}
874+
875+
// character literals
876+
// In C language engineering practice, printing a character literal with %hhd or %d is common, but %hd may be misuse.
877+
printf("%hhu", 'a'); // no-warning
878+
printf("%hhd", 'a'); // no-warning
879+
printf("%hd", 'a'); // expected-warning{{format specifies type 'short' but the argument has type 'char'}}
880+
printf("%hu", 'a'); // expected-warning{{format specifies type 'unsigned short' but the argument has type 'char'}}
881+
printf("%d", 'a'); // no-warning
882+
printf("%u", 'a'); // no-warning
883+
884+
// pointers
885+
printf("%s", i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
886+
}

clang/www/c_status.html

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -819,13 +819,7 @@ <h2 id="c2x">C2x implementation status</h2>
819819
<tr>
820820
<td>Unclear type relationship between a format specifier and its argument</td>
821821
<td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2562.pdf">N2562</a></td>
822-
<td class="partial" align="center">
823-
<details><summary>Partial</summary>
824-
Clang supports diagnostics checking format specifier validity, but
825-
does not yet account for all of the changes in this paper, especially
826-
regarding length modifiers like <code>h</code> and <code>hh</code>.
827-
</details>
828-
</td>
822+
<td class="unreleased" align="center">Clang 16</td>
829823
</tr>
830824
<!-- Apr 2021 Papers -->
831825
<tr>

0 commit comments

Comments
 (0)