Skip to content

Commit a9780a6

Browse files
[clang] Implement __attribute__((format_matches))
This implements ``__attribute__((format_matches))``, as described in the RFC: https://discourse.llvm.org/t/rfc-format-attribute-attribute-format-like/83076 The ``format`` attribute only allows the compiler to check that a format string matches its arguments. If the format string is passed independently of its arguments, there is no way to have the compiler check it. ``format_matches(flavor, fmtidx, example)`` allows the compiler to check format strings against the ``example`` format string instead of against format arguments. See the changes to AttrDocs.td in this diff for more information. Implementation-wise, this change subclasses CheckPrintfHandler and CheckScanfHandler to allow them to collect specifiers into arrays, and implements comparing that two specifiers are equivalent. Radar-Id: 84936554
1 parent 4922350 commit a9780a6

File tree

12 files changed

+1128
-186
lines changed

12 files changed

+1128
-186
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,50 @@ Non-comprehensive list of changes in this release
430430
- Matrix types (a Clang extension) can now be used in pseudo-destructor expressions,
431431
which allows them to be stored in STL containers.
432432

433+
- There is a new ``format_matches`` attribute to complement the existing
434+
``format`` attribute. ``format_matches`` allows the compiler to verify that
435+
a format string argument is equivalent to a reference format string: it is
436+
useful when a function accepts a format string without its accompanying
437+
arguments to format. For instance:
438+
439+
.. code-block:: c
440+
441+
static int status_code;
442+
static const char *status_string;
443+
444+
void print_status(const char *fmt) {
445+
fprintf(stderr, fmt, status_code, status_string);
446+
// ^ warning: format string is not a string literal [-Wformat-nonliteral]
447+
}
448+
449+
void stuff(void) {
450+
print_status("%s (%#08x)\n");
451+
// order of %s and %x is swapped but there is no diagnostic
452+
}
453+
454+
Before the introducion of ``format_matches``, this code cannot be verified
455+
at compile-time. ``format_matches`` plugs that hole:
456+
457+
.. code-block:: c
458+
459+
__attribute__((format_matches(printf, 1, "%x %s")))
460+
void print_status(const char *fmt) {
461+
fprintf(stderr, fmt, status_code, status_string);
462+
// ^ `fmt` verified as if it was "%x %s" here; no longer triggers
463+
// -Wformat-nonliteral, would warn if arguments did not match "%x %s"
464+
}
465+
466+
void stuff(void) {
467+
print_status("%s (%#08x)\n");
468+
// warning: format specifier 's' is incompatible with 'x'
469+
// warning: format specifier 'x' is incompatible with 's'
470+
}
471+
472+
Like with ``format``, the first argument is the format string flavor and the
473+
second argument is the index of the format string parameter.
474+
``format_matches`` accepts an example valid format string as its third
475+
argument. For more information, see the Clang attributes documentation.
476+
433477
New Compiler Flags
434478
------------------
435479

clang/include/clang/AST/FormatString.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ class ArgType {
292292
};
293293

294294
private:
295-
const Kind K;
295+
Kind K;
296296
QualType T;
297297
const char *Name = nullptr;
298298
bool Ptr = false;
@@ -338,6 +338,7 @@ class ArgType {
338338
}
339339

340340
MatchKind matchesType(ASTContext &C, QualType argTy) const;
341+
MatchKind matchesArgType(ASTContext &C, const ArgType &other) const;
341342

342343
QualType getRepresentativeType(ASTContext &C) const;
343344

clang/include/clang/Basic/Attr.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1813,6 +1813,16 @@ def Format : InheritableAttr {
18131813
let Documentation = [FormatDocs];
18141814
}
18151815

1816+
def FormatMatches : InheritableAttr {
1817+
let Spellings = [GCC<"format_matches">];
1818+
let Args = [IdentifierArgument<"Type">, IntArgument<"FormatIdx">, ExprArgument<"ExpectedFormat">];
1819+
let AdditionalMembers = [{
1820+
StringLiteral *getFormatString() const;
1821+
}];
1822+
let Subjects = SubjectList<[ObjCMethod, Block, HasFunctionProto]>;
1823+
let Documentation = [FormatMatchesDocs];
1824+
}
1825+
18161826
def FormatArg : InheritableAttr {
18171827
let Spellings = [GCC<"format_arg">];
18181828
let Args = [ParamIdxArgument<"FormatIdx">];

clang/include/clang/Basic/AttrDocs.td

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3743,6 +3743,103 @@ behavior of the program is undefined.
37433743
}];
37443744
}
37453745

3746+
def FormatMatchesDocs : Documentation {
3747+
let Category = DocCatFunction;
3748+
let Content = [{
3749+
3750+
The ``format`` attribute is the basis for the enforcement of diagnostics in the
3751+
``-Wformat`` family, but it only handles the case where the format string is
3752+
passed along with the arguments it is going to format. It cannot handle the case
3753+
where the format string and the format arguments are passed separately from each
3754+
other. For instance:
3755+
3756+
.. code-block:: c
3757+
3758+
static const char *first_name;
3759+
static double todays_temperature;
3760+
static int wind_speed;
3761+
3762+
void say_hi(const char *fmt) {
3763+
printf(fmt, first_name, todays_temperature); // warning: format string is not a string literal
3764+
printf(fmt, first_name, wind_speed); // warning: format string is not a string literal
3765+
}
3766+
3767+
int main() {
3768+
say_hi("hello %s, it is %g degrees outside");
3769+
say_hi("hello %s, it is %d degrees outside!"); // no diagnostic
3770+
}
3771+
3772+
In this example, ``fmt`` is expected to format a ``const char *`` and a
3773+
``double``, but these values are not passed to ``say_hi``. Without the
3774+
``format`` attribute (which cannot apply in this case), the -Wformat-nonliteral
3775+
diagnostic triggers in the body of ``say_hi``, and incorrect ``say_hi`` call
3776+
sites do not trigger a diagnostic.
3777+
3778+
To complement the ``format`` attribute, Clang also defines the
3779+
``format_matches`` attribute. Its syntax is similar to the ``format``
3780+
attribute's, but instead of taking the index of the first formatted value
3781+
argument, it takes a C string literal with the expected specifiers:
3782+
3783+
.. code-block:: c
3784+
3785+
static const char *first_name;
3786+
static double todays_temperature;
3787+
static int wind_speed;
3788+
3789+
__attribute__((__format_matches__(printf, 1, "%s %g")))
3790+
void say_hi(const char *fmt) {
3791+
printf(fmt, first_name, todays_temperature); // no dignostic
3792+
printf(fmt, first_name, wind_speed); // warning: format specifies type 'int' but the argument has type 'double'
3793+
}
3794+
3795+
int main() {
3796+
say_hi("hello %s, it is %g degrees outside");
3797+
say_hi("it is %g degrees outside, have a good day %s!");
3798+
// warning: format specifies 'double' where 'const char *' is required
3799+
// warning: format specifies 'const char *' where 'double' is required
3800+
}
3801+
3802+
The third argument to ``format_matches`` is expected to evaluate to a **C string
3803+
literal** even when the format string would normally be a different type for the
3804+
given flavor, like a ``CFStringRef`` or a ``NSString *``.
3805+
3806+
In the implementation of a function with the ``format_matches`` attribute,
3807+
format verification works as if the format string was identical to the one
3808+
specified in the attribute.
3809+
3810+
At the call sites of functions with the ``format_matches`` attribute, format
3811+
verification instead compares the two format strings to evaluate their
3812+
equivalence. Each format flavor defines equivalence between format specifiers.
3813+
Generally speaking, two specifiers are equivalent if they format the same type.
3814+
For instance, in the ``printf`` flavor, ``%2i`` and ``%-0.5d`` are compatible.
3815+
When ``-Wformat-signedness`` is disabled, ``%d`` and ``%u`` are compatible. For
3816+
a negative example, ``%ld`` is incompatible with ``%d``.
3817+
3818+
Do note the following un-obvious cases:
3819+
3820+
* Passing ``NULL`` as the format string is accepted without diagnostics.
3821+
* When the format string is not NULL, it cannot _miss_ specifiers, even in
3822+
trailing positions. For instance, ``%d`` is not accepted when the required
3823+
format is ``%d %d %d``.
3824+
* Specifiers for integers as small or smaller than ``int`` (such as ``%hhd``)
3825+
are all mutually compatible because standard argument promotion ensures that
3826+
integers are at least the size of ``int`` when passed as variadic arguments.
3827+
With ``-Wformat-signedness``, mixing specifier for types with a different
3828+
signedness still results in a diagnostic.
3829+
* Format strings expecting a variable modifier (such as ``%*s``) are
3830+
incompatible with format strings that would itemize the variable modifiers
3831+
(such as ``%i %s``), even if the two specify ABI-compatible argument lists.
3832+
* All pointer specifiers, modifiers aside, are mutually incompatible. For
3833+
instance, ``%s`` is not compatible with ``%p``, and ``%p`` is not compatible
3834+
with ``%n``, and ``%hhn`` is incompatible with ``%s``, even if the pointers
3835+
are ABI-compatible or identical on the selected platform. However, ``%0.5s``
3836+
is compatible with ``%s``, since the difference only exists in modifier flags.
3837+
This is not overridable with ``-Wformat-pedantic`` or its inverse, which
3838+
control similar behavior in ``-Wformat``.
3839+
3840+
}];
3841+
}
3842+
37463843
def FlagEnumDocs : Documentation {
37473844
let Category = DocCatDecl;
37483845
let Content = [{

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7709,6 +7709,7 @@ def warn_format_nonliteral_noargs : Warning<
77097709
def warn_format_nonliteral : Warning<
77107710
"format string is not a string literal">,
77117711
InGroup<FormatNonLiteral>, DefaultIgnore;
7712+
def err_format_nonliteral : Error<"format string is not a string literal">;
77127713

77137714
def err_unexpected_interface : Error<
77147715
"unexpected interface name %0: expected expression">;
@@ -10017,6 +10018,8 @@ def note_previous_declaration_as : Note<
1001710018

1001810019
def warn_printf_insufficient_data_args : Warning<
1001910020
"more '%%' conversions than data arguments">, InGroup<FormatInsufficientArgs>;
10021+
def warn_format_cmp_insufficient_specifiers : Warning<
10022+
"fewer specifiers in format string than expected">, InGroup<FormatInsufficientArgs>;
1002010023
def warn_printf_data_arg_not_used : Warning<
1002110024
"data argument not used by format string">, InGroup<FormatExtraArgs>;
1002210025
def warn_format_invalid_conversion : Warning<
@@ -10134,6 +10137,24 @@ def note_format_fix_specifier : Note<"did you mean to use '%0'?">;
1013410137
def note_printf_c_str: Note<"did you mean to call the %0 method?">;
1013510138
def note_format_security_fixit: Note<
1013610139
"treat the string as an argument to avoid this">;
10140+
def warn_format_cmp_role_mismatch : Warning<
10141+
"format argument is %select{a value|an indirect field width|an indirect "
10142+
"precision|an auxiliary value}0, but it should be %select{a value|an indirect "
10143+
"field width|an indirect precision|an auxiliary value}1">, InGroup<Format>;
10144+
def warn_format_cmp_modifierfor_mismatch : Warning<
10145+
"format argument modifies specifier at position %0, but it should modify "
10146+
"specifier at position %1">, InGroup<Format>;
10147+
def warn_format_cmp_sensitivity_mismatch : Warning<
10148+
"argument sensitivity is %select{unspecified|private|public|sensitive}0, but "
10149+
"it should be %select{unspecified|private|public|sensitive}1">, InGroup<Format>;
10150+
def warn_format_cmp_specifier_mismatch : Warning<
10151+
"format specifier '%0' is incompatible with '%1'">, InGroup<Format>;
10152+
def warn_format_cmp_specifier_sign_mismatch : Warning<
10153+
"signedness of format specifier '%0' is incompatible with '%1'">, InGroup<Format>;
10154+
def warn_format_cmp_specifier_mismatch_pedantic : Extension<
10155+
warn_format_cmp_specifier_sign_mismatch.Summary>, InGroup<FormatPedantic>;
10156+
def note_format_cmp_with : Note<
10157+
"comparing with this %select{specifier|format string}0">;
1013710158

1013810159
def warn_null_arg : Warning<
1013910160
"null passed to a callee that requires a non-null argument">,

clang/include/clang/Sema/Sema.h

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2153,6 +2153,7 @@ class Sema final : public SemaBase {
21532153
FAPK_Fixed, // values to format are fixed (no C-style variadic arguments)
21542154
FAPK_Variadic, // values to format are passed as variadic arguments
21552155
FAPK_VAList, // values to format are passed in a va_list
2156+
FAPK_Elsewhere, // values to format are not passed to this function
21562157
};
21572158

21582159
// Used to grab the relevant information from a FormatAttr and a
@@ -2163,12 +2164,15 @@ class Sema final : public SemaBase {
21632164
FormatArgumentPassingKind ArgPassingKind;
21642165
};
21652166

2166-
/// Given a FunctionDecl's FormatAttr, attempts to populate the
2167-
/// FomatStringInfo parameter with the FormatAttr's correct format_idx and
2168-
/// firstDataArg. Returns true when the format fits the function and the
2169-
/// FormatStringInfo has been populated.
2170-
static bool getFormatStringInfo(const FormatAttr *Format, bool IsCXXMember,
2171-
bool IsVariadic, FormatStringInfo *FSI);
2167+
/// Given a function and its FormatAttr or FormatMatchesAttr info, attempts to
2168+
/// populate the FomatStringInfo parameter with the attribute's correct
2169+
/// format_idx and firstDataArg. Returns true when the format fits the
2170+
/// function and the FormatStringInfo has been populated.
2171+
static bool getFormatStringInfo(const Decl *Function, unsigned FormatIdx,
2172+
unsigned FirstArg, FormatStringInfo *FSI);
2173+
static bool getFormatStringInfo(unsigned FormatIdx, unsigned FirstArg,
2174+
bool IsCXXMember, bool IsVariadic,
2175+
FormatStringInfo *FSI);
21722176

21732177
// Used by C++ template instantiation.
21742178
ExprResult BuiltinShuffleVector(CallExpr *TheCall);
@@ -2191,7 +2195,9 @@ class Sema final : public SemaBase {
21912195
FST_Syslog,
21922196
FST_Unknown
21932197
};
2198+
static FormatStringType GetFormatStringType(StringRef FormatFlavor);
21942199
static FormatStringType GetFormatStringType(const FormatAttr *Format);
2200+
static FormatStringType GetFormatStringType(const FormatMatchesAttr *Format);
21952201

21962202
bool FormatStringHasSArg(const StringLiteral *FExpr);
21972203

@@ -2552,11 +2558,17 @@ class Sema final : public SemaBase {
25522558
VariadicCallType CallType, SourceLocation Loc,
25532559
SourceRange Range,
25542560
llvm::SmallBitVector &CheckedVarArgs);
2561+
bool CheckFormatString(const FormatMatchesAttr *Format,
2562+
ArrayRef<const Expr *> Args, bool IsCXXMember,
2563+
VariadicCallType CallType, SourceLocation Loc,
2564+
SourceRange Range,
2565+
llvm::SmallBitVector &CheckedVarArgs);
25552566
bool CheckFormatArguments(ArrayRef<const Expr *> Args,
2556-
FormatArgumentPassingKind FAPK, unsigned format_idx,
2557-
unsigned firstDataArg, FormatStringType Type,
2558-
VariadicCallType CallType, SourceLocation Loc,
2559-
SourceRange range,
2567+
FormatArgumentPassingKind FAPK,
2568+
const StringLiteral *ReferenceFormatString,
2569+
unsigned format_idx, unsigned firstDataArg,
2570+
FormatStringType Type, VariadicCallType CallType,
2571+
SourceLocation Loc, SourceRange range,
25602572
llvm::SmallBitVector &CheckedVarArgs);
25612573

25622574
void CheckInfNaNFunction(const CallExpr *Call, const FunctionDecl *FDecl);
@@ -4544,6 +4556,11 @@ class Sema final : public SemaBase {
45444556
FormatAttr *mergeFormatAttr(Decl *D, const AttributeCommonInfo &CI,
45454557
IdentifierInfo *Format, int FormatIdx,
45464558
int FirstArg);
4559+
FormatMatchesAttr *mergeFormatMatchesAttr(Decl *D,
4560+
const AttributeCommonInfo &CI,
4561+
IdentifierInfo *Format,
4562+
int FormatIdx,
4563+
StringLiteral *FormatStr);
45474564

45484565
/// AddAlignedAttr - Adds an aligned attribute to a particular declaration.
45494566
void AddAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E,

clang/lib/AST/AttrImpl.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,4 +270,9 @@ unsigned AlignedAttr::getAlignment(ASTContext &Ctx) const {
270270
return Ctx.getTargetDefaultAlignForAttributeAligned();
271271
}
272272

273+
StringLiteral *FormatMatchesAttr::getFormatString() const {
274+
// This cannot go in headers because StringLiteral and Expr may be incomplete.
275+
return cast<StringLiteral>(getExpectedFormat());
276+
}
277+
273278
#include "clang/AST/AttrImpl.inc"

0 commit comments

Comments
 (0)