Skip to content

Commit 0ccb5a8

Browse files
committed
[-Wunsafe-buffer-usage] Add warn on unsafe calls to libc functions
Warning about calls to libc functions involving buffer access. Warned functions are hardcoded by names.
1 parent c89e9e7 commit 0ccb5a8

File tree

4 files changed

+360
-4
lines changed

4 files changed

+360
-4
lines changed

clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
3838
WARNING_GADGET(UnsafeBufferUsageAttr)
3939
WARNING_GADGET(UnsafeBufferUsageCtorAttr)
4040
WARNING_GADGET(DataInvocation)
41+
WARNING_GADGET(UnsafeLibcFunctionCall)
4142
WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)`
4243
FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context
4344
FIXABLE_GADGET(DerefSimplePtrArithFixable)

clang/lib/Analysis/UnsafeBufferUsage.cpp

Lines changed: 278 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,25 @@
99
#include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
1010
#include "clang/AST/ASTContext.h"
1111
#include "clang/AST/Decl.h"
12+
#include "clang/AST/DeclCXX.h"
1213
#include "clang/AST/Expr.h"
14+
#include "clang/AST/ExprCXX.h"
1315
#include "clang/AST/RecursiveASTVisitor.h"
1416
#include "clang/AST/Stmt.h"
1517
#include "clang/AST/StmtVisitor.h"
1618
#include "clang/ASTMatchers/ASTMatchFinder.h"
1719
#include "clang/ASTMatchers/ASTMatchers.h"
18-
#include "clang/Basic/CharInfo.h"
1920
#include "clang/Basic/SourceLocation.h"
2021
#include "clang/Lex/Lexer.h"
2122
#include "clang/Lex/Preprocessor.h"
2223
#include "llvm/ADT/APSInt.h"
24+
#include "llvm/ADT/DenseMap.h"
25+
#include "llvm/ADT/STLExtras.h"
2326
#include "llvm/ADT/SmallVector.h"
2427
#include "llvm/ADT/StringRef.h"
28+
#include "llvm/ADT/iterator_range.h"
2529
#include "llvm/Support/Casting.h"
30+
#include <cstddef>
2631
#include <memory>
2732
#include <optional>
2833
#include <queue>
@@ -443,6 +448,260 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
443448
return false;
444449
}
445450

451+
AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) {
452+
static const std::set<StringRef> PredefinedNames{
453+
// numeric conversion:
454+
"atof",
455+
"atoi",
456+
"atol",
457+
"atoll",
458+
"strtol",
459+
"strtoll",
460+
"strtoul",
461+
"strtoull",
462+
"strtof",
463+
"strtod",
464+
"strtold",
465+
"strtoimax",
466+
"strtoumax",
467+
// "strfromf", "strfromd", "strfroml", // C23?
468+
// string manipulation:
469+
"strcpy",
470+
"strncpy",
471+
"strlcpy",
472+
"strcat",
473+
"strncat",
474+
"strlcat",
475+
"strxfrm",
476+
"strdup",
477+
"strndup",
478+
// string examination:
479+
"strlen",
480+
"strnlen",
481+
"strcmp",
482+
"strncmp",
483+
"stricmp",
484+
"strcasecmp",
485+
"strcoll",
486+
"strchr",
487+
"strrchr",
488+
"strspn",
489+
"strcspn",
490+
"strpbrk",
491+
"strstr",
492+
"strtok",
493+
// "mem-" functions
494+
"memchr",
495+
"wmemchr",
496+
"memcmp",
497+
"wmemcmp",
498+
"memcpy",
499+
"memccpy",
500+
"mempcpy",
501+
"wmemcpy",
502+
"memmove",
503+
"wmemmove",
504+
"memset",
505+
"wmemset",
506+
// IO:
507+
"fread",
508+
"fwrite",
509+
"fgets",
510+
"fgetws",
511+
"gets",
512+
"fputs",
513+
"fputws",
514+
"puts",
515+
// others
516+
"strerror_s",
517+
"strerror_r",
518+
"bcopy",
519+
"bzero",
520+
"bsearch",
521+
"qsort",
522+
};
523+
524+
// A tiny name parser for unsafe libc function names with additional
525+
// checks for `printf`s:
526+
struct FuncNameMatch {
527+
const CallExpr *const Call;
528+
FuncNameMatch(const CallExpr *Call) : Call(Call) {}
529+
530+
// For a name `S` in `PredefinedNames` or a member of the printf/scanf
531+
// family, define matching function names with `S` by the grammar below:
532+
//
533+
// CoreName := S | S["wcs"/"str"]
534+
// LibcName := CoreName | CoreName + "_s"
535+
// MatchingName := "__builtin_" + LibcName |
536+
// "__builtin___" + LibcName + "_chk" |
537+
// "__asan_" + LibcName
538+
//
539+
// (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.)
540+
bool matchName(StringRef FunName) {
541+
// Try to match __builtin_:
542+
if (FunName.starts_with("__builtin_"))
543+
// Then either it is __builtin_LibcName or __builtin___LibcName_chk or
544+
// no match:
545+
return matchLibcNameOrBuiltinChk(
546+
FunName.drop_front(10 /* truncate "__builtin_" */));
547+
// Try to match __asan_:
548+
if (FunName.starts_with("__asan_"))
549+
return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */));
550+
return matchLibcName(FunName);
551+
}
552+
553+
// Parameter `Name` is the substring after stripping off the prefix
554+
// "__builtin_".
555+
bool matchLibcNameOrBuiltinChk(StringRef Name) {
556+
if (Name.starts_with("__") && Name.ends_with("_chk"))
557+
return matchLibcName(
558+
Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */);
559+
return matchLibcName(Name);
560+
}
561+
562+
bool matchLibcName(StringRef Name) {
563+
if (Name.ends_with("_s"))
564+
return matchCoreName(Name.drop_back(2 /* truncate "_s" */));
565+
return matchCoreName(Name);
566+
}
567+
568+
bool matchCoreName(StringRef Name) {
569+
if (PredefinedNames.find(Name.str()) != PredefinedNames.end())
570+
return !isSafeStrlen(Name); // match unless it's a safe strlen call
571+
572+
std::string NameWCS = Name.str();
573+
size_t WcsPos = NameWCS.find("wcs");
574+
575+
while (WcsPos != std::string::npos) {
576+
NameWCS[WcsPos++] = 's';
577+
NameWCS[WcsPos++] = 't';
578+
NameWCS[WcsPos++] = 'r';
579+
WcsPos = NameWCS.find("wcs", WcsPos);
580+
}
581+
if (PredefinedNames.find(NameWCS) != PredefinedNames.end())
582+
return !isSafeStrlen(NameWCS);
583+
return matchPrintfOrScanfFamily(Name);
584+
}
585+
586+
bool matchPrintfOrScanfFamily(StringRef Name) {
587+
if (Name.ends_with("scanf"))
588+
return true; // simply warn about scanf functions
589+
if (!Name.ends_with("printf"))
590+
return false; // neither printf nor scanf
591+
if (Name.starts_with("v"))
592+
// cannot check args for va_list, so `vprintf`s are treated as regular
593+
// unsafe libc calls:
594+
return true;
595+
596+
// Truncate "printf", focus on prefixes. There are different possible
597+
// name prefixes: "k", "f", "s", "sn", "fw", ..., "snw". We strip off the
598+
// 'w' and handle printfs differently by "k", "f", "s", "sn" or no prefix:
599+
StringRef Prefix = Name.drop_back(6);
600+
601+
if (Prefix.ends_with("w"))
602+
Prefix = Prefix.drop_back(1);
603+
return isUnsafePrintf(Prefix);
604+
}
605+
606+
// A pointer type expression is known to be null-terminated, if it has the
607+
// form: E.c_str(), for any expression E of `std::string` type.
608+
static bool isNullTermPointer(const Expr *Ptr) {
609+
if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts()))
610+
return true;
611+
if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) {
612+
const CXXMethodDecl *MD = MCE->getMethodDecl();
613+
const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
614+
615+
if (MD && RD && RD->isInStdNamespace())
616+
if (MD->getName() == "c_str" && RD->getName() == "basic_string")
617+
return true;
618+
}
619+
return false;
620+
}
621+
622+
// Check safe patterns for printfs w.r.t their prefixes:
623+
bool isUnsafePrintf(StringRef Prefix /* empty, 'k', 'f', 's', or 'sn' */) {
624+
auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool {
625+
return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
626+
};
627+
628+
if (Prefix.empty() || Prefix == "k") // printf: all pointer args should be null-terminated
629+
return llvm::any_of(Call->arguments(), AnyUnsafeStrPtr);
630+
631+
if (Prefix == "f" && Call->getNumArgs() > 1) {
632+
// fprintf, same as printf but skip the first arg
633+
auto Range = llvm::make_range(Call->arg_begin() + 1, Call->arg_end());
634+
return llvm::any_of(Range, AnyUnsafeStrPtr);
635+
}
636+
if (Prefix == "sn" && Call->getNumArgs() > 2) {
637+
// The first two arguments need to be in safe patterns, which is checked
638+
// by `isSafeSizedby`:
639+
auto Range = llvm::make_range(Call->arg_begin() + 2, Call->arg_end());
640+
return !isSafeSizedby(*Call->arg_begin(), *(Call->arg_begin() + 1)) ||
641+
llvm::any_of(Range, AnyUnsafeStrPtr);
642+
}
643+
// Note `sprintf`s should be changed to `snprintf`s, so they are treated
644+
// as regular unsafe libc calls:
645+
return true;
646+
}
647+
648+
// Checks if the two Exprs `SizedByPtr` and `Size` are in the pattern:
649+
// SizedByPtr := DRE.data()
650+
// Size := DRE.size_bytes(), for a same DRE of sized-container/view
651+
// type.
652+
bool isSafeSizedby(const Expr *SizedByPtr, const Expr *Size) {
653+
static StringRef SizedObjs[] = {"span", "array", "vector",
654+
"basic_string_view", "basic_string"};
655+
if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(SizedByPtr))
656+
if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) {
657+
if (auto *DREPtr =
658+
dyn_cast<DeclRefExpr>(MCEPtr->getImplicitObjectArgument()))
659+
if (auto *DRESize =
660+
dyn_cast<DeclRefExpr>(MCESize->getImplicitObjectArgument()))
661+
if (DREPtr->getDecl() ==
662+
DRESize->getDecl()) // coming out of the same variable
663+
if (MCEPtr->getMethodDecl()->getName() == "data")
664+
if (MCESize->getMethodDecl()->getName() == "size_bytes")
665+
for (StringRef SizedObj : SizedObjs)
666+
if (MCEPtr->getRecordDecl()->isInStdNamespace() &&
667+
MCEPtr->getRecordDecl()
668+
->getCanonicalDecl()
669+
->getName() == SizedObj)
670+
return true;
671+
}
672+
return false;
673+
}
674+
675+
// This is safe: strlen("hello"). We don't want to be noisy on this case.
676+
bool isSafeStrlen(StringRef Name) {
677+
return Name == "strlen" && Call->getNumArgs() == 1 &&
678+
isa<StringLiteral>(Call->getArg(0)->IgnoreParenImpCasts());
679+
}
680+
} FuncNameMatch{&Node};
681+
682+
const FunctionDecl *FD = Node.getDirectCallee();
683+
const IdentifierInfo *II;
684+
685+
if (!FD)
686+
return false;
687+
II = FD->getIdentifier();
688+
// If this is a special C++ name without IdentifierInfo, it can't be a
689+
// C library function.
690+
if (!II)
691+
return false;
692+
693+
// Look through 'extern "C"' and anything similar invented in the future.
694+
// If this function is not in TU directly, it is not a C library function.
695+
if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit())
696+
return false;
697+
698+
// If this function is not externally visible, it is not a C library function.
699+
// Note that we make an exception for inline functions, which may be
700+
// declared in header files without external linkage.
701+
if (!FD->isInlined() && !FD->isExternallyVisible())
702+
return false;
703+
return FuncNameMatch.matchName(II->getName());
704+
}
446705
} // namespace clang::ast_matchers
447706

448707
namespace {
@@ -1025,6 +1284,24 @@ class DataInvocationGadget : public WarningGadget {
10251284
DeclUseList getClaimedVarUseSites() const override { return {}; }
10261285
};
10271286

1287+
class UnsafeLibcFunctionCallGadget : public WarningGadget {
1288+
const CallExpr *const Call;
1289+
constexpr static const char *const Tag = "UnsafeLibcFunctionCall";
1290+
1291+
public:
1292+
UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result)
1293+
: WarningGadget(Kind::UnsafeLibcFunctionCall),
1294+
Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) {}
1295+
1296+
static Matcher matcher() {
1297+
return stmt(callExpr(isUnsafeLibcFunctionCall()).bind(Tag));
1298+
}
1299+
1300+
const Stmt *getBaseStmt() const override { return Call; }
1301+
1302+
DeclUseList getClaimedVarUseSites() const override { return {}; }
1303+
};
1304+
10281305
// Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
10291306
// Context (see `isInUnspecifiedLvalueContext`).
10301307
// Note here `[]` is the built-in subscript operator.

0 commit comments

Comments
 (0)