Skip to content

Commit 979619e

Browse files
committed
[-Wunsafe-buffer-usage] Add warn on unsafe calls to libc functions
Warning about calls to libc functions involving buffer access. Warned functions are hardcoded by names. (rdar://117182250)
1 parent c89e9e7 commit 979619e

8 files changed

+501
-4
lines changed

clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H
1616

1717
#include "clang/AST/Decl.h"
18+
#include "clang/AST/Expr.h"
1819
#include "clang/AST/Stmt.h"
1920
#include "clang/Basic/SourceLocation.h"
2021
#include "llvm/Support/Debug.h"
@@ -106,6 +107,13 @@ class UnsafeBufferUsageHandler {
106107
virtual void handleUnsafeOperation(const Stmt *Operation,
107108
bool IsRelatedToDecl, ASTContext &Ctx) = 0;
108109

110+
/// Invoked when a call to an unsafe libc function is found.
111+
/// \param PrintfInfo is 0 if the callee function is not a member of the
112+
/// printf family; is 1 if the callee is `sprintf`; is 2 if
113+
/// the callee is other printfs
114+
virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
115+
ASTContext &Ctx) = 0;
116+
109117
/// Invoked when an unsafe operation with a std container is found.
110118
virtual void handleUnsafeOperationInContainer(const Stmt *Operation,
111119
bool IsRelatedToDecl,

clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
3838
WARNING_GADGET(UnsafeBufferUsageAttr)
3939
WARNING_GADGET(UnsafeBufferUsageCtorAttr)
4040
WARNING_GADGET(DataInvocation)
41+
WARNING_GADGET(UnsafeLibcFunctionCall)
4142
WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)`
4243
FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context
4344
FIXABLE_GADGET(DerefSimplePtrArithFixable)

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12376,6 +12376,11 @@ def warn_unsafe_buffer_operation : Warning<
1237612376
"%select{unsafe pointer operation|unsafe pointer arithmetic|"
1237712377
"unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">,
1237812378
InGroup<UnsafeBufferUsage>, DefaultIgnore;
12379+
def warn_unsafe_buffer_libc_call : Warning<
12380+
"function %0 introduces unsafe buffer access">,
12381+
InGroup<UnsafeBufferUsage>, DefaultIgnore;
12382+
def note_unsafe_buffer_printf_call : Note<
12383+
"%select{| change to 'snprintf' for explicit bounds checking | use 'std::string::c_str' as pointer to guarantee null-termination}0">;
1237912384
def note_unsafe_buffer_operation : Note<
1238012385
"used%select{| in pointer arithmetic| in buffer access}0 here">;
1238112386
def note_unsafe_buffer_variable_fixit_group : Note<

clang/lib/Analysis/UnsafeBufferUsage.cpp

Lines changed: 324 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,22 @@
99
#include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
1010
#include "clang/AST/ASTContext.h"
1111
#include "clang/AST/Decl.h"
12+
#include "clang/AST/DeclCXX.h"
1213
#include "clang/AST/Expr.h"
14+
#include "clang/AST/ExprCXX.h"
1315
#include "clang/AST/RecursiveASTVisitor.h"
1416
#include "clang/AST/Stmt.h"
1517
#include "clang/AST/StmtVisitor.h"
1618
#include "clang/ASTMatchers/ASTMatchFinder.h"
1719
#include "clang/ASTMatchers/ASTMatchers.h"
18-
#include "clang/Basic/CharInfo.h"
1920
#include "clang/Basic/SourceLocation.h"
2021
#include "clang/Lex/Lexer.h"
2122
#include "clang/Lex/Preprocessor.h"
2223
#include "llvm/ADT/APSInt.h"
24+
#include "llvm/ADT/DenseMap.h"
2325
#include "llvm/ADT/SmallVector.h"
2426
#include "llvm/ADT/StringRef.h"
27+
#include "llvm/ADT/iterator_range.h"
2528
#include "llvm/Support/Casting.h"
2629
#include <memory>
2730
#include <optional>
@@ -443,6 +446,289 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
443446
return false;
444447
}
445448

449+
AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) {
450+
static const std::set<StringRef> PredefinedNames{
451+
// numeric conversion:
452+
"atof",
453+
"atoi",
454+
"atol",
455+
"atoll",
456+
"strtol",
457+
"strtoll",
458+
"strtoul",
459+
"strtoull",
460+
"strtof",
461+
"strtod",
462+
"strtold",
463+
"strtoimax",
464+
"strtoumax",
465+
// "strfromf", "strfromd", "strfroml", // C23?
466+
// string manipulation:
467+
"strcpy",
468+
"strncpy",
469+
"strlcpy",
470+
"strcat",
471+
"strncat",
472+
"strlcat",
473+
"strxfrm",
474+
"strdup",
475+
"strndup",
476+
// string examination:
477+
"strlen",
478+
"strnlen",
479+
"strcmp",
480+
"strncmp",
481+
"stricmp",
482+
"strcasecmp",
483+
"strcoll",
484+
"strchr",
485+
"strrchr",
486+
"strspn",
487+
"strcspn",
488+
"strpbrk",
489+
"strstr",
490+
"strtok",
491+
// "mem-" functions
492+
"memchr",
493+
"wmemchr",
494+
"memcmp",
495+
"wmemcmp",
496+
"memcpy",
497+
"memccpy",
498+
"mempcpy",
499+
"wmemcpy",
500+
"memmove",
501+
"wmemmove",
502+
"memset",
503+
"wmemset",
504+
// IO:
505+
"fread",
506+
"fwrite",
507+
"fgets",
508+
"fgetws",
509+
"gets",
510+
"fputs",
511+
"fputws",
512+
"puts",
513+
// others
514+
"strerror_s",
515+
"strerror_r",
516+
"bcopy",
517+
"bzero",
518+
"bsearch",
519+
"qsort",
520+
};
521+
522+
// A tiny name parser for unsafe libc function names with additional
523+
// checks for `printf`s:
524+
struct FuncNameMatch {
525+
const CallExpr *const Call;
526+
enum ResultKind {
527+
NO, // no match
528+
YES, // matched a name that is not a member of the printf family
529+
SPRINTF, // matched `sprintf`
530+
OTHER_PRINTF, // matched a printf function that is not `sprintf`
531+
};
532+
FuncNameMatch(const CallExpr *Call) : Call(Call) {}
533+
534+
// For a name `S` in `PredefinedNames` or a member of the printf/scanf
535+
// family, define matching function names with `S` by the grammar below:
536+
//
537+
// CoreName := S | S["wcs"/"str"]
538+
// LibcName := CoreName | CoreName + "_s"
539+
// MatchingName := "__builtin_" + LibcName |
540+
// "__builtin___" + LibcName + "_chk" |
541+
// "__asan_" + LibcName
542+
//
543+
// (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.)
544+
ResultKind matchName(StringRef FunName, bool isBuiltin) {
545+
// Try to match __builtin_:
546+
if (isBuiltin && FunName.starts_with("__builtin_"))
547+
// Then either it is __builtin_LibcName or __builtin___LibcName_chk or
548+
// no match:
549+
return matchLibcNameOrBuiltinChk(
550+
FunName.drop_front(10 /* truncate "__builtin_" */));
551+
// Try to match __asan_:
552+
if (FunName.starts_with("__asan_"))
553+
return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */));
554+
return matchLibcName(FunName);
555+
}
556+
557+
// Parameter `Name` is the substring after stripping off the prefix
558+
// "__builtin_".
559+
ResultKind matchLibcNameOrBuiltinChk(StringRef Name) {
560+
if (Name.starts_with("__") && Name.ends_with("_chk"))
561+
return matchLibcName(
562+
Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */);
563+
return matchLibcName(Name);
564+
}
565+
566+
ResultKind matchLibcName(StringRef Name) {
567+
if (Name.ends_with("_s"))
568+
return matchCoreName(Name.drop_back(2 /* truncate "_s" */));
569+
return matchCoreName(Name);
570+
}
571+
572+
ResultKind matchCoreName(StringRef Name) {
573+
if (PredefinedNames.find(Name.str()) != PredefinedNames.end())
574+
return !isSafeStrlen(Name) ? YES
575+
: NO; // match unless it's a safe strlen call
576+
577+
std::string NameWCS = Name.str();
578+
size_t WcsPos = NameWCS.find("wcs");
579+
580+
while (WcsPos != std::string::npos) {
581+
NameWCS[WcsPos++] = 's';
582+
NameWCS[WcsPos++] = 't';
583+
NameWCS[WcsPos++] = 'r';
584+
WcsPos = NameWCS.find("wcs", WcsPos);
585+
}
586+
if (PredefinedNames.find(NameWCS) != PredefinedNames.end())
587+
return !isSafeStrlen(NameWCS) ? YES : NO;
588+
return matchPrintfOrScanfFamily(Name);
589+
}
590+
591+
ResultKind matchPrintfOrScanfFamily(StringRef Name) {
592+
if (Name.ends_with("scanf"))
593+
return YES; // simply warn about scanf functions
594+
if (!Name.ends_with("printf"))
595+
return NO; // neither printf nor scanf
596+
if (Name.starts_with("v"))
597+
// cannot check args for va_list, so `vprintf`s are treated as regular
598+
// unsafe libc calls:
599+
return YES;
600+
601+
// Truncate "printf", focus on prefixes. There are different possible
602+
// name prefixes: "k", "f", "s", "sn", "fw", ..., "snw". We strip off the
603+
// 'w' and handle printfs differently by "k", "f", "s", "sn" or no prefix:
604+
StringRef Prefix = Name.drop_back(6);
605+
606+
if (Prefix.ends_with("w"))
607+
Prefix = Prefix.drop_back(1);
608+
return isUnsafePrintf(Prefix);
609+
}
610+
611+
// A pointer type expression is known to be null-terminated, if it has the
612+
// form: E.c_str(), for any expression E of `std::string` type.
613+
static bool isNullTermPointer(const Expr *Ptr) {
614+
if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts()))
615+
return true;
616+
if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) {
617+
const CXXMethodDecl *MD = MCE->getMethodDecl();
618+
const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
619+
620+
if (MD && RD && RD->isInStdNamespace())
621+
if (MD->getName() == "c_str" && RD->getName() == "basic_string")
622+
return true;
623+
}
624+
return false;
625+
}
626+
627+
// Check safe patterns for printfs w.r.t their prefixes:
628+
ResultKind
629+
isUnsafePrintf(StringRef Prefix /* empty, 'k', 'f', 's', or 'sn' */) {
630+
auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool {
631+
return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
632+
};
633+
634+
if (Prefix.empty() ||
635+
Prefix == "k") // printf: all pointer args should be null-terminated
636+
return llvm::any_of(Call->arguments(), AnyUnsafeStrPtr) ? OTHER_PRINTF
637+
: NO;
638+
639+
if (Prefix == "f" && Call->getNumArgs() > 1) {
640+
// fprintf, same as printf but skip the first arg
641+
auto Range = llvm::make_range(Call->arg_begin() + 1, Call->arg_end());
642+
return llvm::any_of(Range, AnyUnsafeStrPtr) ? OTHER_PRINTF : NO;
643+
}
644+
if (Prefix == "sn" && Call->getNumArgs() > 2) {
645+
// The first two arguments need to be in safe patterns, which is checked
646+
// by `isSafeSizedby`:
647+
auto Range = llvm::make_range(Call->arg_begin() + 2, Call->arg_end());
648+
return (!isSafeSizedby(*Call->arg_begin(), *(Call->arg_begin() + 1)) ||
649+
llvm::any_of(Range, AnyUnsafeStrPtr))
650+
? OTHER_PRINTF
651+
: NO;
652+
}
653+
if (Prefix == "s")
654+
return SPRINTF;
655+
return NO;
656+
}
657+
658+
// Checks if the two Exprs `SizedByPtr` and `Size` are in the pattern:
659+
// SizedByPtr := DRE.data()
660+
// Size := DRE.size_bytes(), for a same DRE of sized-container/view
661+
// type.
662+
bool isSafeSizedby(const Expr *SizedByPtr, const Expr *Size) {
663+
static StringRef SizedObjs[] = {"span", "array", "vector",
664+
"basic_string_view", "basic_string"};
665+
if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(SizedByPtr))
666+
if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) {
667+
if (auto *DREPtr =
668+
dyn_cast<DeclRefExpr>(MCEPtr->getImplicitObjectArgument()))
669+
if (auto *DRESize =
670+
dyn_cast<DeclRefExpr>(MCESize->getImplicitObjectArgument()))
671+
if (DREPtr->getDecl() ==
672+
DRESize->getDecl()) // coming out of the same variable
673+
if (MCEPtr->getMethodDecl()->getName() == "data")
674+
if (MCESize->getMethodDecl()->getName() == "size_bytes")
675+
for (StringRef SizedObj : SizedObjs)
676+
if (MCEPtr->getRecordDecl()->isInStdNamespace() &&
677+
MCEPtr->getRecordDecl()
678+
->getCanonicalDecl()
679+
->getName() == SizedObj)
680+
return true;
681+
}
682+
return false;
683+
}
684+
685+
// This is safe: strlen("hello"). We don't want to be noisy on this case.
686+
bool isSafeStrlen(StringRef Name) {
687+
return Name == "strlen" && Call->getNumArgs() == 1 &&
688+
isa<StringLiteral>(Call->getArg(0)->IgnoreParenImpCasts());
689+
}
690+
} FuncNameMatch{&Node};
691+
692+
const FunctionDecl *FD = Node.getDirectCallee();
693+
const IdentifierInfo *II;
694+
695+
if (!FD)
696+
return false;
697+
II = FD->getIdentifier();
698+
// If this is a special C++ name without IdentifierInfo, it can't be a
699+
// C library function.
700+
if (!II)
701+
return false;
702+
703+
// Look through 'extern "C"' and anything similar invented in the future.
704+
// In general, C library functions will be in the TU directly.
705+
if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit()) {
706+
// If that's not the case, we also consider "C functions" re-declared in
707+
// `std` namespace.
708+
if (!FD->getDeclContext()->getRedeclContext()->isStdNamespace())
709+
return false;
710+
}
711+
712+
// If this function is not externally visible, it is not a C library function.
713+
// Note that we make an exception for inline functions, which may be
714+
// declared in header files without external linkage.
715+
if (!FD->isInlined() && !FD->isExternallyVisible())
716+
return false;
717+
718+
FuncNameMatch::ResultKind RK =
719+
FuncNameMatch.matchName(II->getName(), FD->getBuiltinID());
720+
721+
// Bind extra strings for additional information passing to Gadgets:
722+
if (RK == FuncNameMatch::ResultKind::OTHER_PRINTF)
723+
stmt()
724+
.bind("UnsafeLibcFunctionCall_printf_family")
725+
.matches(Node, Finder, Builder);
726+
if (RK == FuncNameMatch::ResultKind::SPRINTF)
727+
stmt()
728+
.bind("UnsafeLibcFunctionCall_sprintf")
729+
.matches(Node, Finder, Builder);
730+
return RK != FuncNameMatch::ResultKind::NO;
731+
}
446732
} // namespace clang::ast_matchers
447733

448734
namespace {
@@ -1025,6 +1311,43 @@ class DataInvocationGadget : public WarningGadget {
10251311
DeclUseList getClaimedVarUseSites() const override { return {}; }
10261312
};
10271313

1314+
class UnsafeLibcFunctionCallGadget : public WarningGadget {
1315+
const CallExpr *const Call;
1316+
constexpr static const char *const Tag = "UnsafeLibcFunctionCall";
1317+
enum WarnedFunKind {
1318+
OTHERS = 0,
1319+
SPRINTF = 1,
1320+
OTHER_PRINTFS = 2
1321+
} WarnedFunKind = OTHERS;
1322+
1323+
public:
1324+
UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result)
1325+
: WarningGadget(Kind::UnsafeLibcFunctionCall),
1326+
Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) {
1327+
if (Result.Nodes.getNodeAs<Stmt>("UnsafeLibcFunctionCall_sprintf"))
1328+
WarnedFunKind = SPRINTF;
1329+
else if (Result.Nodes.getNodeAs<Stmt>(
1330+
"UnsafeLibcFunctionCall_printf_family"))
1331+
WarnedFunKind = OTHER_PRINTFS;
1332+
}
1333+
1334+
static Matcher matcher() {
1335+
return stmt(callExpr(isUnsafeLibcFunctionCall()).bind(Tag));
1336+
}
1337+
1338+
const Stmt *getBaseStmt() const { return Call; }
1339+
1340+
SourceLocation getSourceLoc() const override { return Call->getBeginLoc(); }
1341+
1342+
void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1343+
bool IsRelatedToDecl,
1344+
ASTContext &Ctx) const override {
1345+
Handler.handleUnsafeLibcCall(Call, WarnedFunKind, Ctx);
1346+
}
1347+
1348+
DeclUseList getClaimedVarUseSites() const override { return {}; }
1349+
};
1350+
10281351
// Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
10291352
// Context (see `isInUnspecifiedLvalueContext`).
10301353
// Note here `[]` is the built-in subscript operator.

0 commit comments

Comments
 (0)