-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[analyzer][NFC] Simplifications in ArrayBoundV2 #67572
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang-static-analyzer-1 @llvm/pr-subscribers-clang ChangesI'm planning to improve diagnostics generation in Changes in this commit:
Full diff: https://github.com/llvm/llvm-project/pull/67572.diff 1 Files Affected:
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index db4a2fcea9b2cdd..8c26649621fe431 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -32,15 +32,13 @@ using namespace taint;
namespace {
class ArrayBoundCheckerV2 :
public Checker<check::Location> {
- mutable std::unique_ptr<BugType> BT;
- mutable std::unique_ptr<BugType> TaintBT;
+ BugType BT{this, "Out-of-bound access"};
+ BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
- enum OOB_Kind { OOB_Precedes, OOB_Excedes };
+ enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
- void reportOOB(CheckerContext &C, ProgramStateRef errorState,
- OOB_Kind kind) const;
- void reportTaintOOB(CheckerContext &C, ProgramStateRef errorState,
- SVal TaintedSVal) const;
+ void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind,
+ SVal TaintedSVal = UnknownVal()) const;
static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
@@ -49,25 +47,56 @@ class ArrayBoundCheckerV2 :
CheckerContext &C) const;
};
-// FIXME: Eventually replace RegionRawOffset with this class.
-class RegionRawOffsetV2 {
-private:
- const SubRegion *baseRegion;
- NonLoc byteOffset;
+/// For a given Location that can be represented as a symbolic expression
+/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
+/// Arr and the distance of Location from the beginning of Arr (expressed in a
+/// NonLoc that specifies the number of CharUnits). Returns nullopt when these
+/// cannot be determined.
+std::optional<std::pair<const SubRegion *, NonLoc>>
+computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
+ QualType T = SVB.getArrayIndexType();
+ auto Calc = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) {
+ // We will use this utility to add and multiply values.
+ return SVB.evalBinOpNN(State, Op, LHS, RHS, T).getAs<NonLoc>();
+ };
-public:
- RegionRawOffsetV2(const SubRegion *base, NonLoc offset)
- : baseRegion(base), byteOffset(offset) { assert(base); }
+ const auto *Region = Location.getAsRegion()->getAs<SubRegion>();
+ std::optional<NonLoc> Offset = std::nullopt;
+
+ while (const auto *ERegion = dyn_cast_or_null<ElementRegion>(Region)) {
+ const auto Index = ERegion->getIndex().getAs<NonLoc>();
+ if (!Index)
+ return std::nullopt;
+
+ QualType ElemType = ERegion->getElementType();
+ // If the element is an incomplete type, go no further.
+ if (ElemType->isIncompleteType())
+ return std::nullopt;
+
+ // Calculate Delta = Index * sizeof(ElemType).
+ NonLoc Size = SVB.makeArrayIndex(
+ SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
+ auto Delta = Calc(BO_Mul, *Index, Size);
+ if (!Delta)
+ return std::nullopt;
+
+ // Perform Offset += Delta, handling the initial nullopt as 0.
+ if (!Offset) {
+ Offset = Delta;
+ } else {
+ Offset = Calc(BO_Add, *Offset, *Delta);
+ if (!Offset)
+ return std::nullopt;
+ }
- NonLoc getByteOffset() const { return byteOffset; }
- const SubRegion *getRegion() const { return baseRegion; }
+ // Continute the offset calculations with the SuperRegion.
+ Region = ERegion->getSuperRegion()->getAs<SubRegion>();
+ }
- static std::optional<RegionRawOffsetV2>
- computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location);
+ if (Region && Offset)
+ return std::make_pair(Region, *Offset);
- void dump() const;
- void dumpToStream(raw_ostream &os) const;
-};
+ return std::nullopt;
}
// TODO: once the constraint manager is smart enough to handle non simplified
@@ -86,8 +115,8 @@ getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
APSIntType(extent.getValue()).convert(SIE->getRHS());
switch (SIE->getOpcode()) {
case BO_Mul:
- // The constant should never be 0 here, since it the result of scaling
- // based on the size of a type which is never 0.
+ // The constant should never be 0 here, becasue multiplication by zero
+ // is simplified by the engine.
if ((extent.getValue() % constant) != 0)
return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
else
@@ -163,16 +192,15 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
return;
ProgramStateRef state = checkerContext.getState();
-
SValBuilder &svalBuilder = checkerContext.getSValBuilder();
- const std::optional<RegionRawOffsetV2> &RawOffset =
- RegionRawOffsetV2::computeOffset(state, svalBuilder, location);
+
+ const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
+ computeOffset(state, svalBuilder, location);
if (!RawOffset)
return;
- NonLoc ByteOffset = RawOffset->getByteOffset();
- const SubRegion *Reg = RawOffset->getRegion();
+ auto [Reg, ByteOffset] = *RawOffset;
// CHECK LOWER BOUND
const MemSpaceRegion *Space = Reg->getMemorySpace();
@@ -207,12 +235,13 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
if (state_exceedsUpperBound) {
if (!state_withinUpperBound) {
// We know that the index definitely exceeds the upper bound.
- reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
+ reportOOB(checkerContext, state_exceedsUpperBound, OOB_Exceeds);
return;
}
if (isTainted(state, ByteOffset)) {
// Both cases are possible, but the index is tainted, so report.
- reportTaintOOB(checkerContext, state_exceedsUpperBound, ByteOffset);
+ reportOOB(checkerContext, state_exceedsUpperBound, OOB_Taint,
+ ByteOffset);
return;
}
}
@@ -224,58 +253,40 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
checkerContext.addTransition(state);
}
-void ArrayBoundCheckerV2::reportTaintOOB(CheckerContext &checkerContext,
- ProgramStateRef errorState,
- SVal TaintedSVal) const {
- ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
- if (!errorNode)
- return;
-
- if (!TaintBT)
- TaintBT.reset(
- new BugType(this, "Out-of-bound access", categories::TaintedData));
-
- SmallString<256> buf;
- llvm::raw_svector_ostream os(buf);
- os << "Out of bound memory access (index is tainted)";
- auto BR =
- std::make_unique<PathSensitiveBugReport>(*TaintBT, os.str(), errorNode);
-
- // Track back the propagation of taintedness.
- for (SymbolRef Sym : getTaintedSymbols(errorState, TaintedSVal)) {
- BR->markInteresting(Sym);
- }
-
- checkerContext.emitReport(std::move(BR));
-}
-
-void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext,
- ProgramStateRef errorState,
- OOB_Kind kind) const {
+void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
+ ProgramStateRef ErrorState, OOB_Kind Kind,
+ SVal TaintedSVal) const {
- ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
- if (!errorNode)
+ ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
+ if (!ErrorNode)
return;
- if (!BT)
- BT.reset(new BugType(this, "Out-of-bound access"));
+ // FIXME: This diagnostics are preliminary, and they'll be replaced soon by a
+ // follow-up commit.
- // FIXME: This diagnostics are preliminary. We should get far better
- // diagnostics for explaining buffer overruns.
+ SmallString<128> Buf;
+ llvm::raw_svector_ostream Out(Buf);
+ Out << "Out of bound memory access ";
- SmallString<256> buf;
- llvm::raw_svector_ostream os(buf);
- os << "Out of bound memory access ";
- switch (kind) {
+ switch (Kind) {
case OOB_Precedes:
- os << "(accessed memory precedes memory block)";
+ Out << "(accessed memory precedes memory block)";
+ break;
+ case OOB_Exceeds:
+ Out << "(access exceeds upper limit of memory block)";
break;
- case OOB_Excedes:
- os << "(access exceeds upper limit of memory block)";
+ case OOB_Taint:
+ Out << "(index is tainted)";
break;
}
- auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode);
- checkerContext.emitReport(std::move(BR));
+ auto BR = std::make_unique<PathSensitiveBugReport>(
+ Kind == OOB_Taint ? TaintBT : BT, Out.str(), ErrorNode);
+ // Track back the propagation of taintedness, or do nothing if TaintedSVal is
+ // the default UnknownVal().
+ for (SymbolRef Sym : getTaintedSymbols(ErrorState, TaintedSVal)) {
+ BR->markInteresting(Sym);
+ }
+ C.emitReport(std::move(BR));
}
bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
@@ -296,67 +307,7 @@ bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
(MacroName == "ispunct") || (MacroName == "isspace") ||
(MacroName == "isupper") || (MacroName == "isxdigit"));
}
-
-#ifndef NDEBUG
-LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const {
- dumpToStream(llvm::errs());
-}
-
-void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const {
- os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}';
-}
-#endif
-
-/// For a given Location that can be represented as a symbolic expression
-/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
-/// Arr and the distance of Location from the beginning of Arr (expressed in a
-/// NonLoc that specifies the number of CharUnits). Returns nullopt when these
-/// cannot be determined.
-std::optional<RegionRawOffsetV2>
-RegionRawOffsetV2::computeOffset(ProgramStateRef State, SValBuilder &SVB,
- SVal Location) {
- QualType T = SVB.getArrayIndexType();
- auto Calc = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) {
- // We will use this utility to add and multiply values.
- return SVB.evalBinOpNN(State, Op, LHS, RHS, T).getAs<NonLoc>();
- };
-
- const MemRegion *Region = Location.getAsRegion();
- NonLoc Offset = SVB.makeZeroArrayIndex();
-
- while (Region) {
- if (const auto *ERegion = dyn_cast<ElementRegion>(Region)) {
- if (const auto Index = ERegion->getIndex().getAs<NonLoc>()) {
- QualType ElemType = ERegion->getElementType();
- // If the element is an incomplete type, go no further.
- if (ElemType->isIncompleteType())
- return std::nullopt;
-
- // Perform Offset += Index * sizeof(ElemType); then continue the offset
- // calculations with SuperRegion:
- NonLoc Size = SVB.makeArrayIndex(
- SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
- if (auto Delta = Calc(BO_Mul, *Index, Size)) {
- if (auto NewOffset = Calc(BO_Add, Offset, *Delta)) {
- Offset = *NewOffset;
- Region = ERegion->getSuperRegion();
- continue;
- }
- }
- }
- } else if (const auto *SRegion = dyn_cast<SubRegion>(Region)) {
- // NOTE: The dyn_cast<>() is expected to succeed, it'd be very surprising
- // to see a MemSpaceRegion at this point.
- // FIXME: We may return with {<Region>, 0} even if we didn't handle any
- // ElementRegion layers. I think that this behavior was introduced
- // accidentally by 8a4c760c204546aba566e302f299f7ed2e00e287 in 2011, so
- // it may be useful to review it in the future.
- return RegionRawOffsetV2(SRegion, Offset);
- }
- return std::nullopt;
- }
- return std::nullopt;
-}
+} // anonymous namespace
void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
mgr.registerChecker<ArrayBoundCheckerV2>();
|
2b6a23c
to
448999e
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
I remember I looked this once. I postponed my comments because I was expecting some numbers or confirmation of that this patch is indeed NFC, thus no report or notes change. I'm not sure if this actually holds, given the changes, but let me know in any case. BTW I'm fine with slight modifications if we call it NFCI or just without NFC. If really nothing changes, consider this approved :) |
I'm confident that this patch is NFC, but my claim is based on theoretical reasoning (a.k.a. "I think I didn't make a mistake"). I have a background in theoretical mathematics, so for me "I have a rough a proof in my head" is intuitively stronger than "I verified it on some arbitrary input"; but you're right that this change is so complex that it deserves an experimental check. I started some analysis on open source projects and I'll give results on Monday (edit: Tuesday). |
Oh I see. It makes sense to me. And on second thought, it shouldn't be much of a hassle doing it anyways. |
448999e
to
583b6c3
Compare
I'm planning to improve diagnostics generation in `ArrayBoundCheckerV2` but before that I'm refactoring the source code to clean up some over-complicated code and an inaccurate comment. Changes in this commit: - Remove the `mutable std::unique_ptr<BugType>` boilerplate, because it's no longer needed. - Remove the code duplication between the methods `reportOOB()` and `reportTaintedOOB()`. - Eliminate the class `RegionRawOffsetV2` because it's just a "reinvent the wheel" version of `std::pair` and it was used only once, as a temporary object that was immediately decomposed. (I suspect that `RegionRawOffset` in MemRegion.cpp could also be eliminated.) - Flatten the code of `computeOffset()` which had contained six nested indentation levels before this commit. - Ensure that `computeOffset()` returns `std::nullopt` instead of a `{Region, <zero array index>}` pair in the case when it encounters a `Location` that is not an `ElementRegion`. This ensures that the `checkLocation` callback returns early when it handles a memory access where it has "nothing to do" (no subscript operation or equivalent pointer arithmetic). (Note that this is still NFC because zero is a valid index everywhere, so the old logic without this shortcut eventually reached the same conclusion.) - Correct a wrong explanation comment in `getSimplifiedOffsets()`.
583b6c3
to
7bd280e
Compare
After some delays (I had a short vacation, then forgot about this patch) I finally got some test results... and they revealed that it was causing lots of segfaults. @steakhal thanks for asking for the experimental confirmation, it was useful! After fixing the buggy line and pushing the update, I re-analyzed the open source projects, and this produced the intended results: the analysis did not crash and the set of the reports did not change. For reference, I insert the table of the test results, but it contains no differences:
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
@steakhal Could you briefly look at this commit? I'm open to modifying it if you say that it's necessary, but if I could merge it, then I'd be able to rebase and finalize my next commit (which adds detailed diagnostic messages to ArrayBoundV2). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No blocking comments.
I'll merge this change soon, when I'll start to work on rebasing the followup commit (unless there is additional feedback until then). Thanks for the reviews! |
I'm planning to improve diagnostics generation in
ArrayBoundCheckerV2
but before that I'm refactoring the source code to clean up some over-complicated code and an inaccurate comment.Changes in this commit:
mutable std::unique_ptr<BugType>
boilerplate, because it's no longer needed.reportOOB()
andreportTaintedOOB()
.RegionRawOffsetV2
because it's just a "reinvent the wheel" version ofstd::pair
and it was used only once, as a temporary object that was immediately decomposed. (I suspect thatRegionRawOffset
in MemRegion.cpp could also be eliminated.)computeOffset()
which had contained six nested indentation levels before this commit.computeOffset()
returnsstd::nullopt
instead of a{Region, <zero array index>}
pair in the case when it encounters aLocation
that is not anElementRegion
. This ensures that thecheckLocation
callback returns early when it handles a memory access where it has "nothing to do" (no subscript operation or equivalent pointer arithmetic). (Note that this is still NFC because zero is a valid index everywhere, so the old logic without this shortcut eventually reached the same conclusion.)getSimplifiedOffsets()
.