Skip to content

WIP Add suppress weak loop assumptions visitor #111258

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 164 additions & 1 deletion clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,28 @@
//
//===----------------------------------------------------------------------===//

#include "clang/AST/ASTContext.h"
#include "clang/AST/CharUnits.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ParentMapContext.h"
#include "clang/AST/Stmt.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/Analysis/ProgramPoint.h"
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Checkers/Taint.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
#include "llvm/ADT/SmallString.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
Expand All @@ -32,6 +42,158 @@ using namespace ento;
using namespace taint;
using llvm::formatv;

namespace {

class SuppressReportsHavingWeakLoopAssumption : public BugReporterVisitor {
public:
SuppressReportsHavingWeakLoopAssumption(const PathSensitiveBugReport &R)
: OrigCursor(R.getErrorNode()) {}

private:
friend class BugReporterVisitor;

// The exploded node we are currently visiting, but in the original exploded
// graph. This graph is not trimmed, unlike the one we usually visit.
const ExplodedNode *OrigCursor;

llvm::DenseSet<const Stmt *> LoopsSeen;
llvm::DenseMap<const Expr *, unsigned> NumTimesTakenTrueDecision;
llvm::DenseMap<const Expr *, unsigned> NumTimesTakenFalseDecision;

void Profile(llvm::FoldingSetNodeID &ID) const final {
static const bool Tag = 0;
ID.AddPointer(&Tag);
}

static const ExplodedNode *selectMatchingPred(const ExplodedNode *OrigSucc,
unsigned SoughtPredId) {
auto MatchesSoughtId = [SoughtPredId](const ExplodedNode *N) {
return N->getID() == SoughtPredId;
};
auto It = llvm::find_if(OrigSucc->preds(), MatchesSoughtId);
assert(It != OrigSucc->pred_end());
return *It;
}

static bool isLoopAndNonNull(const Stmt *S) {
return isa_and_nonnull<ForStmt, WhileStmt, CXXForRangeStmt, DoStmt>(S);
}

void registerLoopStatements(const ExplodedNode *N) {
if (auto Entrance = N->getLocation().getAs<BlockEntrance>()) {
const Stmt *TermStmt = Entrance->getBlock()->getTerminatorStmt();
if (isLoopAndNonNull(TermStmt))
LoopsSeen.insert(TermStmt);
}
}

void registerOccurrence(llvm::DenseMap<const Expr *, unsigned> &Map,
const Expr *Key) {
if (auto [Place, Inserted] = Map.try_emplace(Key, 1); !Inserted)
++Place->second;
}

PathDiagnosticPieceRef VisitNode(const ExplodedNode *Succ,
BugReporterContext &BRC,
PathSensitiveBugReport &BR) final {
OrigCursor = selectMatchingPred(OrigCursor, Succ->getID());
assert(OrigCursor->getID() == Succ->getID());

registerLoopStatements(Succ);

auto AsPostStmt = Succ->getLocationAs<PostStmt>();
const Expr *CurrExpr =
AsPostStmt ? dyn_cast<Expr>(AsPostStmt->getStmt()) : nullptr;

bool IsSplittingTwoWays = OrigCursor->succ_size() == 2;

if (!CurrExpr || !IsSplittingTwoWays)
return nullptr;

const ExplodedNode *Next = Succ->getFirstSucc();
SValBuilder &SVB = Succ->getState()->getStateManager().getSValBuilder();
auto Query = SVB.evalCast(Succ->getSVal(CurrExpr), SVB.getConditionType(),
CurrExpr->getType())
.getAs<DefinedOrUnknownSVal>();
if (!Query)
return nullptr;

ProgramStateRef TakenTrueBranch = Next->getState()->assume(*Query, true);
registerOccurrence(TakenTrueBranch ? NumTimesTakenTrueDecision
: NumTimesTakenFalseDecision,
CurrExpr);

return nullptr;
}

static const Expr *getCond(const Stmt *S) {
assert(isLoopAndNonNull(S));
switch (S->getStmtClass()) {
case Stmt::StmtClass::ForStmtClass:
return cast<ForStmt>(S)->getCond();
case Stmt::StmtClass::WhileStmtClass:
return cast<WhileStmt>(S)->getCond();
case Stmt::StmtClass::CXXForRangeStmtClass:
return cast<CXXForRangeStmt>(S)->getCond();
case Stmt::StmtClass::DoStmtClass:
return cast<DoStmt>(S)->getCond();
default:
return nullptr;
}
}

void trySuppressReport(PathSensitiveBugReport &R, const Stmt *Loop,
const Expr *CondSubExpr) const {
unsigned NumTimesTakenLoopBody =
NumTimesTakenTrueDecision.lookup(CondSubExpr);
unsigned NumTimesNotTakenLoopBody =
NumTimesTakenFalseDecision.lookup(CondSubExpr);

// Adjust for do-while loops, where the loop body is always taken.
if (isa<DoStmt>(Loop))
NumTimesTakenLoopBody += 1;

// Suppress the report if it avoided a loop with an eager assumption.
if (NumTimesTakenLoopBody == 0 && NumTimesNotTakenLoopBody == 1) {
// llvm::errs() << "eagerly decided never taking the loop body\n";
R.markInvalid("eagerly decided never taking the loop body", CondSubExpr);
return;
}

// Suppress the report if it iterates with eager assumptions 3 or more
// times.
if (NumTimesTakenLoopBody > 2 && NumTimesNotTakenLoopBody == 0) {
// llvm::errs() << "eagerly taken the loop body at least 2 times\n";
R.markInvalid("eagerly taken the loop body at least 2 times",
CondSubExpr);
}
}

void finalizeVisitor(BugReporterContext &BRC, const ExplodedNode *,
PathSensitiveBugReport &R) final {
ASTContext &Ctx = BRC.getASTContext();

// Go over all the loops we have seen (either avoided or entered), and check
// if the condition of such loops were eagerly decided.
for (const Stmt *Loop : LoopsSeen) {
if (const Expr *Cond = getCond(Loop)) {
// Let's try all sub-exprs to cover cases that use short-circuiting.
// We need to check if any sub-expression of the condition was eagerly
// decided, because the '&&' and '||' logical operators could
// short-circuit, thus the expression on which we recorded the
// "eager decision" was a sub-expression of the Loop condition.
using namespace ast_matchers;
for (auto Binding : match(findAll(expr().bind("e")), *Cond, Ctx)) {
trySuppressReport(R, Loop, Binding.getNodeAs<Expr>("e"));
if (!R.isValid())
return;
}
}
}
}
};
} // namespace

namespace {
/// If `E` is a "clean" array subscript expression, return the type of the
/// accessed element. If the base of the subscript expression is modified by
Expand Down Expand Up @@ -722,6 +884,7 @@ void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
if (Extent)
markPartsInteresting(*BR, ErrorState, *Extent, IsTaintBug);

BR->addVisitor<SuppressReportsHavingWeakLoopAssumption>(*BR);
C.emitReport(std::move(BR));
}

Expand Down
150 changes: 149 additions & 1 deletion clang/test/Analysis/out-of-bounds.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,alpha.security.ArrayBoundV2,debug.ExprInspection -verify %s
// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,alpha.security.ArrayBoundV2,debug.ExprInspection -verify=expected,eagerlyassume %s
// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,alpha.security.ArrayBoundV2,debug.ExprInspection -analyzer-config eagerly-assume=false -verify %s

// Note that eagerly-assume=false is tested separately because the
// WeakLoopAssumption suppression heuristic uses different code paths to
// achieve the same result with and without eagerly-assume.

void clang_analyzer_eval(int);

Expand Down Expand Up @@ -194,3 +199,146 @@ char test_comparison_with_extent_symbol(struct incomplete *p) {
return ((char *)p)[-1]; // no-warning
}

// WeakLoopAssumption suppression
///////////////////////////////////////////////////////////////////////

int GlobalArray[100];
int loop_suppress_after_zero_iterations(unsigned len) {
for (unsigned i = 0; i < len; i++)
if (GlobalArray[i] > 0)
return GlobalArray[i];
// Previously this would have produced an overflow warning because splitting
// the state on the loop condition introduced an execution path where the
// analyzer thinks that len == 0.
// There are many situations where the programmer knows that an argument is
// positive, but this is not indicated in the source code, so we must avoid
// reporting errors (especially out of bounds errors) on these branches,
// because otherwise we'd get prohibitively many false positives.
return GlobalArray[len - 1]; // no-warning
}

void loop_report_in_second_iteration(int len) {
int buf[1] = {0};
for (int i = 0; i < len; i++) {
// When a programmer writes a loop, we may assume that they intended at
// least two iterations.
buf[i] = 1; // expected-warning{{Out of bound access to memory}}
}
}

void loop_suppress_in_third_iteration(int len) {
int buf[2] = {0};
for (int i = 0; i < len; i++) {
// We should suppress array bounds errors on the third and later iterations
// of loops, because sometimes programmers write a loop in sitiuations
// where they know that there will be at most two iterations, but the
// analyzer cannot deduce this property.
buf[i] = 1; // no-warning
}
}

int no_suppression_when_no_assumption_zero_iterations(unsigned len) {
if (len != 0) {
// This 'if' introduces a state split between len == 0 and len != 0.
}

// On the branch where we assumed that len is zero, this loop will be
// skipped. We (intentionally) don't suppress this execution path becasue
// here the analyzer doesn't assume anything new when it evaluates the loop
// condition.
for (unsigned i = 0; i < len; i++)
if (GlobalArray[i] > 0)
return GlobalArray[i];

return GlobalArray[len - 1]; // expected-warning{{Out of bound access to memory}}
}

void no_suppression_when_no_assumption_third_iteration(int len) {
if (len < 20) {
// This 'if' introduces a state split with len >= 20 on one branch.
}

int buf[2] = {0};
for (int i = 0; i < len; i++) {
// As in no_suppression_when_no_assumption_zero_iterations, the suppression
// only activates when the analyzer assumes something new in the loop
// condition. On the branch where `len >= 20` entering the third iteration
// doesn't involve a new assumption, so this warning is not suppressed:
buf[i] = 1; // expected-warning{{Out of bound access to memory}}
}
}

void loop_suppress_in_third_iteration_logical_and(int len, int flag) {
int buf[2] = {0};
for (int i = 0; i < len && flag; i++) {
// FIXME: In this case the checker should suppress the warning the same way
// as it's suppressed in loop_suppress_in_third_iteration, but the
// suppression is not activated because the terminator statement associated
// with the loop is just the expression 'flag', while 'i < len' is a
// separate terminator statement that's associated with the
// short-circuiting operator '&&'.
// I have seen a real-world FP that looks like this, but it is much rarer
// than the basic setup.
buf[i] = 1; // no-warning
}
}

void loop_suppress_in_third_iteration_logical_and_2(int len, int flag) {
int buf[2] = {0};
for (int i = 0; flag && i < len; i++) {
// If the two operands of '&&' are flipped, the suppression works, because
// then 'flag' is the terminator statement associated with '&&' (which
// determines whether the short-circuiting happens or not) and 'i < len' is
// the terminator statement of the loop itself.
buf[i] = 1; // no-warning
}
}

void loop_suppress_in_third_iteration_cast(int len) {
int buf[2] = {0};
for (int i = 0; (unsigned)(i < len); i++) {
// The behavior of this suppression is slightly different under
// eagerly-assume=true (the default) and eagerly-assume=false:
// * When eager assumptions are disabled, it's enough to look for cases
// where we get two non-null states from splitting the state over the
// 'SVal' that represents the full loop condition.
// * When eager assumptions are enabled, we also accept situations when the
// loop condition expression triggers an eager state split and therefore
// we won't see a state split at the "normal" point because it's executed
// on two already separated execution paths.
// However, for the sake of simplicity we don't activate the suppression in
// cases when _a subexpression_ of the loop condition triggers an eager
// assumption. There are already many differences between analysis with and
// without eager assumptions, so it would be pointless to write more
// complicated code to eliminate these rare differences.
buf[i] = 1; // no-warning
}
}

int coinflip(void);
int do_while_report_after_one_iteration(void) {
int i = 0;
do {
i++;
} while (coinflip());
// Unlike `loop_suppress_after_zero_iterations`, running just one iteration
// in a do-while is not a corner case that would produce too many false
// positives, so don't suppress bounds errors in these situations.
return GlobalArray[i-2]; // expected-warning{{Out of bound access to memory}}
}

void do_while_report_in_second_iteration(int len) {
int buf[1] = {0};
int i = 0;
do {
buf[i] = 1; // expected-warning{{Out of bound access to memory}}
} while (i++ < len);
}

void do_while_suppress_in_third_iteration(int len) {
int buf[2] = {0};
int i = 0;
do {
buf[i] = 1; // no-warning
} while (i++ < len);
}