|
9 | 9 | #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
|
10 | 10 | #include "clang/AST/ASTContext.h"
|
11 | 11 | #include "clang/AST/Decl.h"
|
| 12 | +#include "clang/AST/DeclCXX.h" |
12 | 13 | #include "clang/AST/Expr.h"
|
| 14 | +#include "clang/AST/ExprCXX.h" |
13 | 15 | #include "clang/AST/RecursiveASTVisitor.h"
|
14 | 16 | #include "clang/AST/Stmt.h"
|
15 | 17 | #include "clang/AST/StmtVisitor.h"
|
16 | 18 | #include "clang/ASTMatchers/ASTMatchFinder.h"
|
17 | 19 | #include "clang/ASTMatchers/ASTMatchers.h"
|
18 |
| -#include "clang/Basic/CharInfo.h" |
19 | 20 | #include "clang/Basic/SourceLocation.h"
|
20 | 21 | #include "clang/Lex/Lexer.h"
|
21 | 22 | #include "clang/Lex/Preprocessor.h"
|
22 | 23 | #include "llvm/ADT/APSInt.h"
|
| 24 | +#include "llvm/ADT/DenseMap.h" |
23 | 25 | #include "llvm/ADT/SmallVector.h"
|
24 | 26 | #include "llvm/ADT/StringRef.h"
|
| 27 | +#include "llvm/ADT/iterator_range.h" |
25 | 28 | #include "llvm/Support/Casting.h"
|
26 | 29 | #include <memory>
|
27 | 30 | #include <optional>
|
@@ -443,6 +446,289 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
|
443 | 446 | return false;
|
444 | 447 | }
|
445 | 448 |
|
| 449 | +AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) { |
| 450 | + static const std::set<StringRef> PredefinedNames{ |
| 451 | + // numeric conversion: |
| 452 | + "atof", |
| 453 | + "atoi", |
| 454 | + "atol", |
| 455 | + "atoll", |
| 456 | + "strtol", |
| 457 | + "strtoll", |
| 458 | + "strtoul", |
| 459 | + "strtoull", |
| 460 | + "strtof", |
| 461 | + "strtod", |
| 462 | + "strtold", |
| 463 | + "strtoimax", |
| 464 | + "strtoumax", |
| 465 | + // "strfromf", "strfromd", "strfroml", // C23? |
| 466 | + // string manipulation: |
| 467 | + "strcpy", |
| 468 | + "strncpy", |
| 469 | + "strlcpy", |
| 470 | + "strcat", |
| 471 | + "strncat", |
| 472 | + "strlcat", |
| 473 | + "strxfrm", |
| 474 | + "strdup", |
| 475 | + "strndup", |
| 476 | + // string examination: |
| 477 | + "strlen", |
| 478 | + "strnlen", |
| 479 | + "strcmp", |
| 480 | + "strncmp", |
| 481 | + "stricmp", |
| 482 | + "strcasecmp", |
| 483 | + "strcoll", |
| 484 | + "strchr", |
| 485 | + "strrchr", |
| 486 | + "strspn", |
| 487 | + "strcspn", |
| 488 | + "strpbrk", |
| 489 | + "strstr", |
| 490 | + "strtok", |
| 491 | + // "mem-" functions |
| 492 | + "memchr", |
| 493 | + "wmemchr", |
| 494 | + "memcmp", |
| 495 | + "wmemcmp", |
| 496 | + "memcpy", |
| 497 | + "memccpy", |
| 498 | + "mempcpy", |
| 499 | + "wmemcpy", |
| 500 | + "memmove", |
| 501 | + "wmemmove", |
| 502 | + "memset", |
| 503 | + "wmemset", |
| 504 | + // IO: |
| 505 | + "fread", |
| 506 | + "fwrite", |
| 507 | + "fgets", |
| 508 | + "fgetws", |
| 509 | + "gets", |
| 510 | + "fputs", |
| 511 | + "fputws", |
| 512 | + "puts", |
| 513 | + // others |
| 514 | + "strerror_s", |
| 515 | + "strerror_r", |
| 516 | + "bcopy", |
| 517 | + "bzero", |
| 518 | + "bsearch", |
| 519 | + "qsort", |
| 520 | + }; |
| 521 | + |
| 522 | + // A tiny name parser for unsafe libc function names with additional |
| 523 | + // checks for `printf`s: |
| 524 | + struct FuncNameMatch { |
| 525 | + const CallExpr *const Call; |
| 526 | + enum ResultKind { |
| 527 | + NO, // no match |
| 528 | + YES, // matched a name that is not a member of the printf family |
| 529 | + SPRINTF, // matched `sprintf` |
| 530 | + OTHER_PRINTF, // matched a printf function that is not `sprintf` |
| 531 | + }; |
| 532 | + FuncNameMatch(const CallExpr *Call) : Call(Call) {} |
| 533 | + |
| 534 | + // For a name `S` in `PredefinedNames` or a member of the printf/scanf |
| 535 | + // family, define matching function names with `S` by the grammar below: |
| 536 | + // |
| 537 | + // CoreName := S | S["wcs"/"str"] |
| 538 | + // LibcName := CoreName | CoreName + "_s" |
| 539 | + // MatchingName := "__builtin_" + LibcName | |
| 540 | + // "__builtin___" + LibcName + "_chk" | |
| 541 | + // "__asan_" + LibcName |
| 542 | + // |
| 543 | + // (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.) |
| 544 | + ResultKind matchName(StringRef FunName, bool isBuiltin) { |
| 545 | + // Try to match __builtin_: |
| 546 | + if (isBuiltin && FunName.starts_with("__builtin_")) |
| 547 | + // Then either it is __builtin_LibcName or __builtin___LibcName_chk or |
| 548 | + // no match: |
| 549 | + return matchLibcNameOrBuiltinChk( |
| 550 | + FunName.drop_front(10 /* truncate "__builtin_" */)); |
| 551 | + // Try to match __asan_: |
| 552 | + if (FunName.starts_with("__asan_")) |
| 553 | + return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */)); |
| 554 | + return matchLibcName(FunName); |
| 555 | + } |
| 556 | + |
| 557 | + // Parameter `Name` is the substring after stripping off the prefix |
| 558 | + // "__builtin_". |
| 559 | + ResultKind matchLibcNameOrBuiltinChk(StringRef Name) { |
| 560 | + if (Name.starts_with("__") && Name.ends_with("_chk")) |
| 561 | + return matchLibcName( |
| 562 | + Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */); |
| 563 | + return matchLibcName(Name); |
| 564 | + } |
| 565 | + |
| 566 | + ResultKind matchLibcName(StringRef Name) { |
| 567 | + if (Name.ends_with("_s")) |
| 568 | + return matchCoreName(Name.drop_back(2 /* truncate "_s" */)); |
| 569 | + return matchCoreName(Name); |
| 570 | + } |
| 571 | + |
| 572 | + ResultKind matchCoreName(StringRef Name) { |
| 573 | + if (PredefinedNames.find(Name.str()) != PredefinedNames.end()) |
| 574 | + return !isSafeStrlen(Name) ? YES |
| 575 | + : NO; // match unless it's a safe strlen call |
| 576 | + |
| 577 | + std::string NameWCS = Name.str(); |
| 578 | + size_t WcsPos = NameWCS.find("wcs"); |
| 579 | + |
| 580 | + while (WcsPos != std::string::npos) { |
| 581 | + NameWCS[WcsPos++] = 's'; |
| 582 | + NameWCS[WcsPos++] = 't'; |
| 583 | + NameWCS[WcsPos++] = 'r'; |
| 584 | + WcsPos = NameWCS.find("wcs", WcsPos); |
| 585 | + } |
| 586 | + if (PredefinedNames.find(NameWCS) != PredefinedNames.end()) |
| 587 | + return !isSafeStrlen(NameWCS) ? YES : NO; |
| 588 | + return matchPrintfOrScanfFamily(Name); |
| 589 | + } |
| 590 | + |
| 591 | + ResultKind matchPrintfOrScanfFamily(StringRef Name) { |
| 592 | + if (Name.ends_with("scanf")) |
| 593 | + return YES; // simply warn about scanf functions |
| 594 | + if (!Name.ends_with("printf")) |
| 595 | + return NO; // neither printf nor scanf |
| 596 | + if (Name.starts_with("v")) |
| 597 | + // cannot check args for va_list, so `vprintf`s are treated as regular |
| 598 | + // unsafe libc calls: |
| 599 | + return YES; |
| 600 | + |
| 601 | + // Truncate "printf", focus on prefixes. There are different possible |
| 602 | + // name prefixes: "k", "f", "s", "sn", "fw", ..., "snw". We strip off the |
| 603 | + // 'w' and handle printfs differently by "k", "f", "s", "sn" or no prefix: |
| 604 | + StringRef Prefix = Name.drop_back(6); |
| 605 | + |
| 606 | + if (Prefix.ends_with("w")) |
| 607 | + Prefix = Prefix.drop_back(1); |
| 608 | + return isUnsafePrintf(Prefix); |
| 609 | + } |
| 610 | + |
| 611 | + // A pointer type expression is known to be null-terminated, if it has the |
| 612 | + // form: E.c_str(), for any expression E of `std::string` type. |
| 613 | + static bool isNullTermPointer(const Expr *Ptr) { |
| 614 | + if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts())) |
| 615 | + return true; |
| 616 | + if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) { |
| 617 | + const CXXMethodDecl *MD = MCE->getMethodDecl(); |
| 618 | + const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl(); |
| 619 | + |
| 620 | + if (MD && RD && RD->isInStdNamespace()) |
| 621 | + if (MD->getName() == "c_str" && RD->getName() == "basic_string") |
| 622 | + return true; |
| 623 | + } |
| 624 | + return false; |
| 625 | + } |
| 626 | + |
| 627 | + // Check safe patterns for printfs w.r.t their prefixes: |
| 628 | + ResultKind |
| 629 | + isUnsafePrintf(StringRef Prefix /* empty, 'k', 'f', 's', or 'sn' */) { |
| 630 | + auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool { |
| 631 | + return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); |
| 632 | + }; |
| 633 | + |
| 634 | + if (Prefix.empty() || |
| 635 | + Prefix == "k") // printf: all pointer args should be null-terminated |
| 636 | + return llvm::any_of(Call->arguments(), AnyUnsafeStrPtr) ? OTHER_PRINTF |
| 637 | + : NO; |
| 638 | + |
| 639 | + if (Prefix == "f" && Call->getNumArgs() > 1) { |
| 640 | + // fprintf, same as printf but skip the first arg |
| 641 | + auto Range = llvm::make_range(Call->arg_begin() + 1, Call->arg_end()); |
| 642 | + return llvm::any_of(Range, AnyUnsafeStrPtr) ? OTHER_PRINTF : NO; |
| 643 | + } |
| 644 | + if (Prefix == "sn" && Call->getNumArgs() > 2) { |
| 645 | + // The first two arguments need to be in safe patterns, which is checked |
| 646 | + // by `isSafeSizedby`: |
| 647 | + auto Range = llvm::make_range(Call->arg_begin() + 2, Call->arg_end()); |
| 648 | + return (!isSafeSizedby(*Call->arg_begin(), *(Call->arg_begin() + 1)) || |
| 649 | + llvm::any_of(Range, AnyUnsafeStrPtr)) |
| 650 | + ? OTHER_PRINTF |
| 651 | + : NO; |
| 652 | + } |
| 653 | + if (Prefix == "s") |
| 654 | + return SPRINTF; |
| 655 | + return NO; |
| 656 | + } |
| 657 | + |
| 658 | + // Checks if the two Exprs `SizedByPtr` and `Size` are in the pattern: |
| 659 | + // SizedByPtr := DRE.data() |
| 660 | + // Size := DRE.size_bytes(), for a same DRE of sized-container/view |
| 661 | + // type. |
| 662 | + bool isSafeSizedby(const Expr *SizedByPtr, const Expr *Size) { |
| 663 | + static StringRef SizedObjs[] = {"span", "array", "vector", |
| 664 | + "basic_string_view", "basic_string"}; |
| 665 | + if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(SizedByPtr)) |
| 666 | + if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) { |
| 667 | + if (auto *DREPtr = |
| 668 | + dyn_cast<DeclRefExpr>(MCEPtr->getImplicitObjectArgument())) |
| 669 | + if (auto *DRESize = |
| 670 | + dyn_cast<DeclRefExpr>(MCESize->getImplicitObjectArgument())) |
| 671 | + if (DREPtr->getDecl() == |
| 672 | + DRESize->getDecl()) // coming out of the same variable |
| 673 | + if (MCEPtr->getMethodDecl()->getName() == "data") |
| 674 | + if (MCESize->getMethodDecl()->getName() == "size_bytes") |
| 675 | + for (StringRef SizedObj : SizedObjs) |
| 676 | + if (MCEPtr->getRecordDecl()->isInStdNamespace() && |
| 677 | + MCEPtr->getRecordDecl() |
| 678 | + ->getCanonicalDecl() |
| 679 | + ->getName() == SizedObj) |
| 680 | + return true; |
| 681 | + } |
| 682 | + return false; |
| 683 | + } |
| 684 | + |
| 685 | + // This is safe: strlen("hello"). We don't want to be noisy on this case. |
| 686 | + bool isSafeStrlen(StringRef Name) { |
| 687 | + return Name == "strlen" && Call->getNumArgs() == 1 && |
| 688 | + isa<StringLiteral>(Call->getArg(0)->IgnoreParenImpCasts()); |
| 689 | + } |
| 690 | + } FuncNameMatch{&Node}; |
| 691 | + |
| 692 | + const FunctionDecl *FD = Node.getDirectCallee(); |
| 693 | + const IdentifierInfo *II; |
| 694 | + |
| 695 | + if (!FD) |
| 696 | + return false; |
| 697 | + II = FD->getIdentifier(); |
| 698 | + // If this is a special C++ name without IdentifierInfo, it can't be a |
| 699 | + // C library function. |
| 700 | + if (!II) |
| 701 | + return false; |
| 702 | + |
| 703 | + // Look through 'extern "C"' and anything similar invented in the future. |
| 704 | + // In general, C library functions will be in the TU directly. |
| 705 | + if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit()) { |
| 706 | + // If that's not the case, we also consider "C functions" re-declared in |
| 707 | + // `std` namespace. |
| 708 | + if (!FD->getDeclContext()->getRedeclContext()->isStdNamespace()) |
| 709 | + return false; |
| 710 | + } |
| 711 | + |
| 712 | + // If this function is not externally visible, it is not a C library function. |
| 713 | + // Note that we make an exception for inline functions, which may be |
| 714 | + // declared in header files without external linkage. |
| 715 | + if (!FD->isInlined() && !FD->isExternallyVisible()) |
| 716 | + return false; |
| 717 | + |
| 718 | + FuncNameMatch::ResultKind RK = |
| 719 | + FuncNameMatch.matchName(II->getName(), FD->getBuiltinID()); |
| 720 | + |
| 721 | + // Bind extra strings for additional information passing to Gadgets: |
| 722 | + if (RK == FuncNameMatch::ResultKind::OTHER_PRINTF) |
| 723 | + stmt() |
| 724 | + .bind("UnsafeLibcFunctionCall_printf_family") |
| 725 | + .matches(Node, Finder, Builder); |
| 726 | + if (RK == FuncNameMatch::ResultKind::SPRINTF) |
| 727 | + stmt() |
| 728 | + .bind("UnsafeLibcFunctionCall_sprintf") |
| 729 | + .matches(Node, Finder, Builder); |
| 730 | + return RK != FuncNameMatch::ResultKind::NO; |
| 731 | +} |
446 | 732 | } // namespace clang::ast_matchers
|
447 | 733 |
|
448 | 734 | namespace {
|
@@ -1025,6 +1311,43 @@ class DataInvocationGadget : public WarningGadget {
|
1025 | 1311 | DeclUseList getClaimedVarUseSites() const override { return {}; }
|
1026 | 1312 | };
|
1027 | 1313 |
|
| 1314 | +class UnsafeLibcFunctionCallGadget : public WarningGadget { |
| 1315 | + const CallExpr *const Call; |
| 1316 | + constexpr static const char *const Tag = "UnsafeLibcFunctionCall"; |
| 1317 | + enum WarnedFunKind { |
| 1318 | + OTHERS = 0, |
| 1319 | + SPRINTF = 1, |
| 1320 | + OTHER_PRINTFS = 2 |
| 1321 | + } WarnedFunKind = OTHERS; |
| 1322 | + |
| 1323 | +public: |
| 1324 | + UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result) |
| 1325 | + : WarningGadget(Kind::UnsafeLibcFunctionCall), |
| 1326 | + Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) { |
| 1327 | + if (Result.Nodes.getNodeAs<Stmt>("UnsafeLibcFunctionCall_sprintf")) |
| 1328 | + WarnedFunKind = SPRINTF; |
| 1329 | + else if (Result.Nodes.getNodeAs<Stmt>( |
| 1330 | + "UnsafeLibcFunctionCall_printf_family")) |
| 1331 | + WarnedFunKind = OTHER_PRINTFS; |
| 1332 | + } |
| 1333 | + |
| 1334 | + static Matcher matcher() { |
| 1335 | + return stmt(callExpr(isUnsafeLibcFunctionCall()).bind(Tag)); |
| 1336 | + } |
| 1337 | + |
| 1338 | + const Stmt *getBaseStmt() const { return Call; } |
| 1339 | + |
| 1340 | + SourceLocation getSourceLoc() const override { return Call->getBeginLoc(); } |
| 1341 | + |
| 1342 | + void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, |
| 1343 | + bool IsRelatedToDecl, |
| 1344 | + ASTContext &Ctx) const override { |
| 1345 | + Handler.handleUnsafeLibcCall(Call, WarnedFunKind, Ctx); |
| 1346 | + } |
| 1347 | + |
| 1348 | + DeclUseList getClaimedVarUseSites() const override { return {}; } |
| 1349 | +}; |
| 1350 | + |
1028 | 1351 | // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
|
1029 | 1352 | // Context (see `isInUnspecifiedLvalueContext`).
|
1030 | 1353 | // Note here `[]` is the built-in subscript operator.
|
|
0 commit comments