-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[analyzer] Switch to PostStmt callbacks in ArrayBoundV2 #72107
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ab102e9
819a21b
5ae2f66
4044f86
4fb7355
9c85c7e
017b1d6
a422b32
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ | |
//===----------------------------------------------------------------------===// | ||
|
||
#include "clang/AST/CharUnits.h" | ||
#include "clang/AST/ParentMapContext.h" | ||
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" | ||
#include "clang/StaticAnalyzer/Checkers/Taint.h" | ||
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" | ||
|
@@ -34,20 +35,46 @@ using llvm::formatv; | |
namespace { | ||
enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint }; | ||
|
||
class ArrayBoundCheckerV2 : | ||
public Checker<check::Location> { | ||
struct Messages { | ||
std::string Short, Full; | ||
}; | ||
|
||
// NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt` | ||
// instead of `PreStmt` because the current implementation passes the whole | ||
// expression to `CheckerContext::getSVal()` which only works after the | ||
// symbolic evaluation of the expression. (To turn them into `PreStmt` | ||
// callbacks, we'd need to duplicate the logic that evaluates these | ||
// expressions.) The `MemberExpr` callback would work as `PreStmt` but it's | ||
// defined as `PostStmt` for the sake of consistency with the other callbacks. | ||
class ArrayBoundCheckerV2 : public Checker<check::PostStmt<ArraySubscriptExpr>, | ||
check::PostStmt<UnaryOperator>, | ||
check::PostStmt<MemberExpr>> { | ||
BugType BT{this, "Out-of-bound access"}; | ||
BugType TaintBT{this, "Out-of-bound access", categories::TaintedData}; | ||
|
||
void performCheck(const Expr *E, CheckerContext &C) const; | ||
|
||
void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind, | ||
NonLoc Offset, std::string RegName, std::string Msg) const; | ||
NonLoc Offset, Messages Msgs) const; | ||
|
||
static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC); | ||
|
||
static bool isInAddressOf(const Stmt *S, ASTContext &AC); | ||
|
||
public: | ||
void checkLocation(SVal l, bool isLoad, const Stmt *S, | ||
CheckerContext &C) const; | ||
void checkPostStmt(const ArraySubscriptExpr *E, CheckerContext &C) const { | ||
performCheck(E, C); | ||
} | ||
void checkPostStmt(const UnaryOperator *E, CheckerContext &C) const { | ||
if (E->getOpcode() == UO_Deref) | ||
performCheck(E, C); | ||
} | ||
void checkPostStmt(const MemberExpr *E, CheckerContext &C) const { | ||
if (E->isArrow()) | ||
performCheck(E->getBase(), C); | ||
} | ||
}; | ||
|
||
} // anonymous namespace | ||
|
||
/// For a given Location that can be represented as a symbolic expression | ||
|
@@ -149,9 +176,11 @@ getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, | |
// where the first one corresponds to "value below threshold" and the second | ||
// corresponds to "value at or above threshold". Returns {nullptr, nullptr} in | ||
// the case when the evaluation fails. | ||
// If the optional argument CheckEquality is true, then use BO_EQ instead of | ||
// the default BO_LT after consistently applying the same simplification steps. | ||
static std::pair<ProgramStateRef, ProgramStateRef> | ||
compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, | ||
SValBuilder &SVB) { | ||
SValBuilder &SVB, bool CheckEquality = false) { | ||
if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) { | ||
std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB); | ||
} | ||
|
@@ -167,8 +196,10 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, | |
return {nullptr, State}; | ||
} | ||
} | ||
const BinaryOperatorKind OpKind = CheckEquality ? BO_EQ : BO_LT; | ||
auto BelowThreshold = | ||
SVB.evalBinOpNN(State, BO_LT, Value, Threshold, SVB.getConditionType()).getAs<NonLoc>(); | ||
SVB.evalBinOpNN(State, OpKind, Value, Threshold, SVB.getConditionType()) | ||
.getAs<NonLoc>(); | ||
|
||
if (BelowThreshold) | ||
return State->assume(*BelowThreshold); | ||
|
@@ -217,16 +248,19 @@ static std::string getShortMsg(OOB_Kind Kind, std::string RegName) { | |
return formatv(ShortMsgTemplates[Kind], RegName); | ||
} | ||
|
||
static std::string getPrecedesMsg(std::string RegName, NonLoc Offset) { | ||
static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) { | ||
std::string RegName = getRegionName(Region); | ||
SmallString<128> Buf; | ||
llvm::raw_svector_ostream Out(Buf); | ||
Out << "Access of " << RegName << " at negative byte offset"; | ||
if (auto ConcreteIdx = Offset.getAs<nonloc::ConcreteInt>()) | ||
Out << ' ' << ConcreteIdx->getValue(); | ||
return std::string(Buf); | ||
return {getShortMsg(OOB_Precedes, RegName), std::string(Buf)}; | ||
} | ||
static std::string getExceedsMsg(ASTContext &ACtx, std::string RegName, | ||
NonLoc Offset, NonLoc Extent, SVal Location) { | ||
|
||
static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region, | ||
NonLoc Offset, NonLoc Extent, SVal Location) { | ||
std::string RegName = getRegionName(Region); | ||
const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>(); | ||
assert(EReg && "this checker only handles element access"); | ||
QualType ElemType = EReg->getElementType(); | ||
|
@@ -273,20 +307,18 @@ static std::string getExceedsMsg(ASTContext &ACtx, std::string RegName, | |
Out << "s"; | ||
} | ||
|
||
return std::string(Buf); | ||
} | ||
static std::string getTaintMsg(std::string RegName) { | ||
SmallString<128> Buf; | ||
llvm::raw_svector_ostream Out(Buf); | ||
Out << "Access of " << RegName | ||
<< " with a tainted offset that may be too large"; | ||
return std::string(Buf); | ||
return {getShortMsg(OOB_Exceeds, RegName), std::string(Buf)}; | ||
} | ||
|
||
void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad, | ||
const Stmt *LoadS, | ||
CheckerContext &C) const { | ||
static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName) { | ||
std::string RegName = getRegionName(Region); | ||
return {formatv("Potential out of bound access to {0} with tainted {1}", | ||
RegName, OffsetName), | ||
formatv("Access of {0} with a tainted {1} that may be too large", | ||
RegName, OffsetName)}; | ||
} | ||
|
||
void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const { | ||
// NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping | ||
// some new logic here that reasons directly about memory region extents. | ||
// Once that logic is more mature, we can bring it back to assumeInBound() | ||
|
@@ -297,12 +329,14 @@ void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad, | |
// have some flexibility in defining the base region, we can achieve | ||
// various levels of conservatism in our buffer overflow checking. | ||
|
||
const SVal Location = C.getSVal(E); | ||
|
||
// The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as | ||
// #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX) | ||
// and incomplete analysis of these leads to false positives. As even | ||
// accurate reports would be confusing for the users, just disable reports | ||
// from these macros: | ||
if (isFromCtypeMacro(LoadS, C.getASTContext())) | ||
if (isFromCtypeMacro(E, C.getASTContext())) | ||
return; | ||
|
||
ProgramStateRef State = C.getState(); | ||
|
@@ -331,9 +365,8 @@ void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad, | |
|
||
if (PrecedesLowerBound && !WithinLowerBound) { | ||
// We know that the index definitely precedes the lower bound. | ||
std::string RegName = getRegionName(Reg); | ||
std::string Msg = getPrecedesMsg(RegName, ByteOffset); | ||
reportOOB(C, PrecedesLowerBound, OOB_Precedes, ByteOffset, RegName, Msg); | ||
Messages Msgs = getPrecedesMsgs(Reg, ByteOffset); | ||
reportOOB(C, PrecedesLowerBound, OOB_Precedes, ByteOffset, Msgs); | ||
return; | ||
} | ||
|
||
|
@@ -350,17 +383,38 @@ void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad, | |
if (ExceedsUpperBound) { | ||
if (!WithinUpperBound) { | ||
// We know that the index definitely exceeds the upper bound. | ||
std::string RegName = getRegionName(Reg); | ||
std::string Msg = getExceedsMsg(C.getASTContext(), RegName, ByteOffset, | ||
*KnownSize, Location); | ||
reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, RegName, Msg); | ||
if (isa<ArraySubscriptExpr>(E) && isInAddressOf(E, C.getASTContext())) { | ||
// ...but this is within an addressof expression, so we need to check | ||
// for the exceptional case that `&array[size]` is valid. | ||
auto [EqualsToThreshold, NotEqualToThreshold] = | ||
compareValueToThreshold(ExceedsUpperBound, ByteOffset, *KnownSize, | ||
SVB, /*CheckEquality=*/true); | ||
if (EqualsToThreshold && !NotEqualToThreshold) { | ||
// We are definitely in the exceptional case, so return early | ||
// instead of reporting a bug. | ||
C.addTransition(EqualsToThreshold); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this transition needed? This checker should not add assumptions to the state, only check for conditions and add only error transitions. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right that On the other hand, the variable I think it's valuable to record these assumptions with a state transition, because they improve the accuracy of the modeling. (Otherwise the analyzer could produce bug reports that rely on assumptions that contradict each other.) Currently the assumptions of this checker are added silently but I'll add note tags like "Assuming index is non-negative" for them in a followup commit. |
||
return; | ||
} | ||
} | ||
Messages Msgs = getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, | ||
*KnownSize, Location); | ||
reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, Msgs); | ||
return; | ||
} | ||
if (isTainted(State, ByteOffset)) { | ||
// Both cases are possible, but the index is tainted, so report. | ||
// Both cases are possible, but the offset is tainted, so report. | ||
std::string RegName = getRegionName(Reg); | ||
std::string Msg = getTaintMsg(RegName); | ||
reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, RegName, Msg); | ||
|
||
// Diagnostic detail: "tainted offset" is always correct, but the | ||
// common case is that 'idx' is tainted in 'arr[idx]' and then it's | ||
// nicer to say "tainted index". | ||
const char *OffsetName = "offset"; | ||
if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) | ||
if (isTainted(State, ASE->getIdx(), C.getLocationContext())) | ||
OffsetName = "index"; | ||
|
||
Messages Msgs = getTaintMsgs(Reg, OffsetName); | ||
reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, Msgs); | ||
return; | ||
} | ||
} | ||
|
@@ -374,17 +428,14 @@ void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad, | |
|
||
void ArrayBoundCheckerV2::reportOOB(CheckerContext &C, | ||
ProgramStateRef ErrorState, OOB_Kind Kind, | ||
NonLoc Offset, std::string RegName, | ||
std::string Msg) const { | ||
NonLoc Offset, Messages Msgs) const { | ||
|
||
ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState); | ||
if (!ErrorNode) | ||
return; | ||
|
||
std::string ShortMsg = getShortMsg(Kind, RegName); | ||
|
||
auto BR = std::make_unique<PathSensitiveBugReport>( | ||
Kind == OOB_Taint ? TaintBT : BT, ShortMsg, Msg, ErrorNode); | ||
Kind == OOB_Taint ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode); | ||
|
||
// Track back the propagation of taintedness. | ||
if (Kind == OOB_Taint) | ||
|
@@ -413,6 +464,18 @@ bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) { | |
(MacroName == "isupper") || (MacroName == "isxdigit")); | ||
} | ||
|
||
bool ArrayBoundCheckerV2::isInAddressOf(const Stmt *S, ASTContext &ACtx) { | ||
ParentMapContext &ParentCtx = ACtx.getParentMapContext(); | ||
do { | ||
const DynTypedNodeList Parents = ParentCtx.getParents(*S); | ||
if (Parents.empty()) | ||
return false; | ||
S = Parents[0].get<Stmt>(); | ||
} while (isa_and_nonnull<ParenExpr, ImplicitCastExpr>(S)); | ||
const auto *UnaryOp = dyn_cast_or_null<UnaryOperator>(S); | ||
return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf; | ||
} | ||
|
||
void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) { | ||
mgr.registerChecker<ArrayBoundCheckerV2>(); | ||
} | ||
|
Uh oh!
There was an error while loading. Please reload this page.