|
9 | 9 | #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
|
10 | 10 | #include "clang/AST/ASTContext.h"
|
11 | 11 | #include "clang/AST/Decl.h"
|
| 12 | +#include "clang/AST/DeclCXX.h" |
12 | 13 | #include "clang/AST/Expr.h"
|
| 14 | +#include "clang/AST/ExprCXX.h" |
13 | 15 | #include "clang/AST/RecursiveASTVisitor.h"
|
14 | 16 | #include "clang/AST/Stmt.h"
|
15 | 17 | #include "clang/AST/StmtVisitor.h"
|
16 | 18 | #include "clang/ASTMatchers/ASTMatchFinder.h"
|
17 | 19 | #include "clang/ASTMatchers/ASTMatchers.h"
|
18 |
| -#include "clang/Basic/CharInfo.h" |
19 | 20 | #include "clang/Basic/SourceLocation.h"
|
20 | 21 | #include "clang/Lex/Lexer.h"
|
21 | 22 | #include "clang/Lex/Preprocessor.h"
|
22 | 23 | #include "llvm/ADT/APSInt.h"
|
| 24 | +#include "llvm/ADT/DenseMap.h" |
| 25 | +#include "llvm/ADT/STLExtras.h" |
23 | 26 | #include "llvm/ADT/SmallVector.h"
|
24 | 27 | #include "llvm/ADT/StringRef.h"
|
| 28 | +#include "llvm/ADT/iterator_range.h" |
25 | 29 | #include "llvm/Support/Casting.h"
|
| 30 | +#include <cstddef> |
26 | 31 | #include <memory>
|
27 | 32 | #include <optional>
|
28 | 33 | #include <queue>
|
@@ -443,6 +448,260 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
|
443 | 448 | return false;
|
444 | 449 | }
|
445 | 450 |
|
| 451 | +AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) { |
| 452 | + static const std::set<StringRef> PredefinedNames{ |
| 453 | + // numeric conversion: |
| 454 | + "atof", |
| 455 | + "atoi", |
| 456 | + "atol", |
| 457 | + "atoll", |
| 458 | + "strtol", |
| 459 | + "strtoll", |
| 460 | + "strtoul", |
| 461 | + "strtoull", |
| 462 | + "strtof", |
| 463 | + "strtod", |
| 464 | + "strtold", |
| 465 | + "strtoimax", |
| 466 | + "strtoumax", |
| 467 | + // "strfromf", "strfromd", "strfroml", // C23? |
| 468 | + // string manipulation: |
| 469 | + "strcpy", |
| 470 | + "strncpy", |
| 471 | + "strlcpy", |
| 472 | + "strcat", |
| 473 | + "strncat", |
| 474 | + "strlcat", |
| 475 | + "strxfrm", |
| 476 | + "strdup", |
| 477 | + "strndup", |
| 478 | + // string examination: |
| 479 | + "strlen", |
| 480 | + "strnlen", |
| 481 | + "strcmp", |
| 482 | + "strncmp", |
| 483 | + "stricmp", |
| 484 | + "strcasecmp", |
| 485 | + "strcoll", |
| 486 | + "strchr", |
| 487 | + "strrchr", |
| 488 | + "strspn", |
| 489 | + "strcspn", |
| 490 | + "strpbrk", |
| 491 | + "strstr", |
| 492 | + "strtok", |
| 493 | + // "mem-" functions |
| 494 | + "memchr", |
| 495 | + "wmemchr", |
| 496 | + "memcmp", |
| 497 | + "wmemcmp", |
| 498 | + "memcpy", |
| 499 | + "memccpy", |
| 500 | + "mempcpy", |
| 501 | + "wmemcpy", |
| 502 | + "memmove", |
| 503 | + "wmemmove", |
| 504 | + "memset", |
| 505 | + "wmemset", |
| 506 | + // IO: |
| 507 | + "fread", |
| 508 | + "fwrite", |
| 509 | + "fgets", |
| 510 | + "fgetws", |
| 511 | + "gets", |
| 512 | + "fputs", |
| 513 | + "fputws", |
| 514 | + "puts", |
| 515 | + // others |
| 516 | + "strerror_s", |
| 517 | + "strerror_r", |
| 518 | + "bcopy", |
| 519 | + "bzero", |
| 520 | + "bsearch", |
| 521 | + "qsort", |
| 522 | + }; |
| 523 | + |
| 524 | + // A tiny name parser for unsafe libc function names with additional |
| 525 | + // checks for `printf`s: |
| 526 | + struct FuncNameMatch { |
| 527 | + const CallExpr *const Call; |
| 528 | + FuncNameMatch(const CallExpr *Call) : Call(Call) {} |
| 529 | + |
| 530 | + // For a name `S` in `PredefinedNames` or a member of the printf/scanf |
| 531 | + // family, define matching function names with `S` by the grammar below: |
| 532 | + // |
| 533 | + // CoreName := S | S["wcs"/"str"] |
| 534 | + // LibcName := CoreName | CoreName + "_s" |
| 535 | + // MatchingName := "__builtin_" + LibcName | |
| 536 | + // "__builtin___" + LibcName + "_chk" | |
| 537 | + // "__asan_" + LibcName |
| 538 | + // |
| 539 | + // (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.) |
| 540 | + bool matchName(StringRef FunName) { |
| 541 | + // Try to match __builtin_: |
| 542 | + if (FunName.starts_with("__builtin_")) |
| 543 | + // Then either it is __builtin_LibcName or __builtin___LibcName_chk or |
| 544 | + // no match: |
| 545 | + return matchLibcNameOrBuiltinChk( |
| 546 | + FunName.drop_front(10 /* truncate "__builtin_" */)); |
| 547 | + // Try to match __asan_: |
| 548 | + if (FunName.starts_with("__asan_")) |
| 549 | + return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */)); |
| 550 | + return matchLibcName(FunName); |
| 551 | + } |
| 552 | + |
| 553 | + // Parameter `Name` is the substring after stripping off the prefix |
| 554 | + // "__builtin_". |
| 555 | + bool matchLibcNameOrBuiltinChk(StringRef Name) { |
| 556 | + if (Name.starts_with("__") && Name.ends_with("_chk")) |
| 557 | + return matchLibcName( |
| 558 | + Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */); |
| 559 | + return matchLibcName(Name); |
| 560 | + } |
| 561 | + |
| 562 | + bool matchLibcName(StringRef Name) { |
| 563 | + if (Name.ends_with("_s")) |
| 564 | + return matchCoreName(Name.drop_back(2 /* truncate "_s" */)); |
| 565 | + return matchCoreName(Name); |
| 566 | + } |
| 567 | + |
| 568 | + bool matchCoreName(StringRef Name) { |
| 569 | + if (PredefinedNames.find(Name.str()) != PredefinedNames.end()) |
| 570 | + return !isSafeStrlen(Name); // match unless it's a safe strlen call |
| 571 | + |
| 572 | + std::string NameWCS = Name.str(); |
| 573 | + size_t WcsPos = NameWCS.find("wcs"); |
| 574 | + |
| 575 | + while (WcsPos != std::string::npos) { |
| 576 | + NameWCS[WcsPos++] = 's'; |
| 577 | + NameWCS[WcsPos++] = 't'; |
| 578 | + NameWCS[WcsPos++] = 'r'; |
| 579 | + WcsPos = NameWCS.find("wcs", WcsPos); |
| 580 | + } |
| 581 | + if (PredefinedNames.find(NameWCS) != PredefinedNames.end()) |
| 582 | + return !isSafeStrlen(NameWCS); |
| 583 | + return matchPrintfOrScanfFamily(Name); |
| 584 | + } |
| 585 | + |
| 586 | + bool matchPrintfOrScanfFamily(StringRef Name) { |
| 587 | + if (Name.ends_with("scanf")) |
| 588 | + return true; // simply warn about scanf functions |
| 589 | + if (!Name.ends_with("printf")) |
| 590 | + return false; // neither printf nor scanf |
| 591 | + if (Name.starts_with("v")) |
| 592 | + // cannot check args for va_list, so `vprintf`s are treated as regular |
| 593 | + // unsafe libc calls: |
| 594 | + return true; |
| 595 | + |
| 596 | + // Truncate "printf", focus on prefixes. There are different possible |
| 597 | + // name prefixes: "k", "f", "s", "sn", "fw", ..., "snw". We strip off the |
| 598 | + // 'w' and handle printfs differently by "k", "f", "s", "sn" or no prefix: |
| 599 | + StringRef Prefix = Name.drop_back(6); |
| 600 | + |
| 601 | + if (Prefix.ends_with("w")) |
| 602 | + Prefix = Prefix.drop_back(1); |
| 603 | + return isUnsafePrintf(Prefix); |
| 604 | + } |
| 605 | + |
| 606 | + // A pointer type expression is known to be null-terminated, if it has the |
| 607 | + // form: E.c_str(), for any expression E of `std::string` type. |
| 608 | + static bool isNullTermPointer(const Expr *Ptr) { |
| 609 | + if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts())) |
| 610 | + return true; |
| 611 | + if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) { |
| 612 | + const CXXMethodDecl *MD = MCE->getMethodDecl(); |
| 613 | + const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl(); |
| 614 | + |
| 615 | + if (MD && RD && RD->isInStdNamespace()) |
| 616 | + if (MD->getName() == "c_str" && RD->getName() == "basic_string") |
| 617 | + return true; |
| 618 | + } |
| 619 | + return false; |
| 620 | + } |
| 621 | + |
| 622 | + // Check safe patterns for printfs w.r.t their prefixes: |
| 623 | + bool isUnsafePrintf(StringRef Prefix /* empty, 'k', 'f', 's', or 'sn' */) { |
| 624 | + auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool { |
| 625 | + return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); |
| 626 | + }; |
| 627 | + |
| 628 | + if (Prefix.empty() || Prefix == "k") // printf: all pointer args should be null-terminated |
| 629 | + return llvm::any_of(Call->arguments(), AnyUnsafeStrPtr); |
| 630 | + |
| 631 | + if (Prefix == "f" && Call->getNumArgs() > 1) { |
| 632 | + // fprintf, same as printf but skip the first arg |
| 633 | + auto Range = llvm::make_range(Call->arg_begin() + 1, Call->arg_end()); |
| 634 | + return llvm::any_of(Range, AnyUnsafeStrPtr); |
| 635 | + } |
| 636 | + if (Prefix == "sn" && Call->getNumArgs() > 2) { |
| 637 | + // The first two arguments need to be in safe patterns, which is checked |
| 638 | + // by `isSafeSizedby`: |
| 639 | + auto Range = llvm::make_range(Call->arg_begin() + 2, Call->arg_end()); |
| 640 | + return !isSafeSizedby(*Call->arg_begin(), *(Call->arg_begin() + 1)) || |
| 641 | + llvm::any_of(Range, AnyUnsafeStrPtr); |
| 642 | + } |
| 643 | + // Note `sprintf`s should be changed to `snprintf`s, so they are treated |
| 644 | + // as regular unsafe libc calls: |
| 645 | + return true; |
| 646 | + } |
| 647 | + |
| 648 | + // Checks if the two Exprs `SizedByPtr` and `Size` are in the pattern: |
| 649 | + // SizedByPtr := DRE.data() |
| 650 | + // Size := DRE.size_bytes(), for a same DRE of sized-container/view |
| 651 | + // type. |
| 652 | + bool isSafeSizedby(const Expr *SizedByPtr, const Expr *Size) { |
| 653 | + static StringRef SizedObjs[] = {"span", "array", "vector", |
| 654 | + "basic_string_view", "basic_string"}; |
| 655 | + if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(SizedByPtr)) |
| 656 | + if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) { |
| 657 | + if (auto *DREPtr = |
| 658 | + dyn_cast<DeclRefExpr>(MCEPtr->getImplicitObjectArgument())) |
| 659 | + if (auto *DRESize = |
| 660 | + dyn_cast<DeclRefExpr>(MCESize->getImplicitObjectArgument())) |
| 661 | + if (DREPtr->getDecl() == |
| 662 | + DRESize->getDecl()) // coming out of the same variable |
| 663 | + if (MCEPtr->getMethodDecl()->getName() == "data") |
| 664 | + if (MCESize->getMethodDecl()->getName() == "size_bytes") |
| 665 | + for (StringRef SizedObj : SizedObjs) |
| 666 | + if (MCEPtr->getRecordDecl()->isInStdNamespace() && |
| 667 | + MCEPtr->getRecordDecl() |
| 668 | + ->getCanonicalDecl() |
| 669 | + ->getName() == SizedObj) |
| 670 | + return true; |
| 671 | + } |
| 672 | + return false; |
| 673 | + } |
| 674 | + |
| 675 | + // This is safe: strlen("hello"). We don't want to be noisy on this case. |
| 676 | + bool isSafeStrlen(StringRef Name) { |
| 677 | + return Name == "strlen" && Call->getNumArgs() == 1 && |
| 678 | + isa<StringLiteral>(Call->getArg(0)->IgnoreParenImpCasts()); |
| 679 | + } |
| 680 | + } FuncNameMatch{&Node}; |
| 681 | + |
| 682 | + const FunctionDecl *FD = Node.getDirectCallee(); |
| 683 | + const IdentifierInfo *II; |
| 684 | + |
| 685 | + if (!FD) |
| 686 | + return false; |
| 687 | + II = FD->getIdentifier(); |
| 688 | + // If this is a special C++ name without IdentifierInfo, it can't be a |
| 689 | + // C library function. |
| 690 | + if (!II) |
| 691 | + return false; |
| 692 | + |
| 693 | + // Look through 'extern "C"' and anything similar invented in the future. |
| 694 | + // If this function is not in TU directly, it is not a C library function. |
| 695 | + if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit()) |
| 696 | + return false; |
| 697 | + |
| 698 | + // If this function is not externally visible, it is not a C library function. |
| 699 | + // Note that we make an exception for inline functions, which may be |
| 700 | + // declared in header files without external linkage. |
| 701 | + if (!FD->isInlined() && !FD->isExternallyVisible()) |
| 702 | + return false; |
| 703 | + return FuncNameMatch.matchName(II->getName()); |
| 704 | +} |
446 | 705 | } // namespace clang::ast_matchers
|
447 | 706 |
|
448 | 707 | namespace {
|
@@ -1025,6 +1284,24 @@ class DataInvocationGadget : public WarningGadget {
|
1025 | 1284 | DeclUseList getClaimedVarUseSites() const override { return {}; }
|
1026 | 1285 | };
|
1027 | 1286 |
|
| 1287 | +class UnsafeLibcFunctionCallGadget : public WarningGadget { |
| 1288 | + const CallExpr *const Call; |
| 1289 | + constexpr static const char *const Tag = "UnsafeLibcFunctionCall"; |
| 1290 | + |
| 1291 | +public: |
| 1292 | + UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result) |
| 1293 | + : WarningGadget(Kind::UnsafeLibcFunctionCall), |
| 1294 | + Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) {} |
| 1295 | + |
| 1296 | + static Matcher matcher() { |
| 1297 | + return stmt(callExpr(isUnsafeLibcFunctionCall()).bind(Tag)); |
| 1298 | + } |
| 1299 | + |
| 1300 | + const Stmt *getBaseStmt() const override { return Call; } |
| 1301 | + |
| 1302 | + DeclUseList getClaimedVarUseSites() const override { return {}; } |
| 1303 | +}; |
| 1304 | + |
1028 | 1305 | // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
|
1029 | 1306 | // Context (see `isInUnspecifiedLvalueContext`).
|
1030 | 1307 | // Note here `[]` is the built-in subscript operator.
|
|
0 commit comments