Skip to content

Commit aceb34c

Browse files
authored
[analyzer][NFC] Simplifications in ArrayBoundV2 (#67572)
I'm planning to improve diagnostics generation in `ArrayBoundCheckerV2` but before that I'm refactoring the source code to clean up some over-complicated code and an inaccurate comment. Changes in this commit: - Remove the `mutable std::unique_ptr<BugType>` boilerplate, because it's no longer needed. - Remove the code duplication between the methods `reportOOB()` and `reportTaintedOOB()`. - Eliminate the class `RegionRawOffsetV2` because it's just a "reinvent the wheel" version of `std::pair` and it was used only once, as a temporary object that was immediately decomposed. (I suspect that `RegionRawOffset` in MemRegion.cpp could also be eliminated.) - Flatten the code of `computeOffset()` which had contained six nested indentation levels before this commit. - Ensure that `computeOffset()` returns `std::nullopt` instead of a `{Region, <zero array index>}` pair in the case when it encounters a `Location` that is not an `ElementRegion`. This ensures that the `checkLocation` callback returns early when it handles a memory access where it has "nothing to do" (no subscript operation or equivalent pointer arithmetic). Note that this is still NFC because zero is a valid index everywhere, so the old logic without this shortcut eventually reached the same conclusion. - Correct a wrong explanation comment in `getSimplifiedOffsets()`.
1 parent b796eac commit aceb34c

File tree

1 file changed

+91
-136
lines changed

1 file changed

+91
-136
lines changed

clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp

Lines changed: 91 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -32,42 +32,76 @@ using namespace taint;
3232
namespace {
3333
class ArrayBoundCheckerV2 :
3434
public Checker<check::Location> {
35-
mutable std::unique_ptr<BugType> BT;
36-
mutable std::unique_ptr<BugType> TaintBT;
35+
BugType BT{this, "Out-of-bound access"};
36+
BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
3737

38-
enum OOB_Kind { OOB_Precedes, OOB_Excedes };
38+
enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
3939

40-
void reportOOB(CheckerContext &C, ProgramStateRef errorState,
41-
OOB_Kind kind) const;
42-
void reportTaintOOB(CheckerContext &C, ProgramStateRef errorState,
43-
SVal TaintedSVal) const;
40+
void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind,
41+
SVal TaintedSVal = UnknownVal()) const;
4442

4543
static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
4644

4745
public:
4846
void checkLocation(SVal l, bool isLoad, const Stmt *S,
4947
CheckerContext &C) const;
5048
};
49+
} // anonymous namespace
5150

52-
// FIXME: Eventually replace RegionRawOffset with this class.
53-
class RegionRawOffsetV2 {
54-
private:
55-
const SubRegion *baseRegion;
56-
NonLoc byteOffset;
57-
58-
public:
59-
RegionRawOffsetV2(const SubRegion *base, NonLoc offset)
60-
: baseRegion(base), byteOffset(offset) { assert(base); }
51+
/// For a given Location that can be represented as a symbolic expression
52+
/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
53+
/// Arr and the distance of Location from the beginning of Arr (expressed in a
54+
/// NonLoc that specifies the number of CharUnits). Returns nullopt when these
55+
/// cannot be determined.
56+
std::optional<std::pair<const SubRegion *, NonLoc>>
57+
computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
58+
QualType T = SVB.getArrayIndexType();
59+
auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) {
60+
// We will use this utility to add and multiply values.
61+
return SVB.evalBinOpNN(State, Op, L, R, T).getAs<NonLoc>();
62+
};
6163

62-
NonLoc getByteOffset() const { return byteOffset; }
63-
const SubRegion *getRegion() const { return baseRegion; }
64+
const SubRegion *OwnerRegion = nullptr;
65+
std::optional<NonLoc> Offset = SVB.makeZeroArrayIndex();
66+
67+
const ElementRegion *CurRegion =
68+
dyn_cast_or_null<ElementRegion>(Location.getAsRegion());
69+
70+
while (CurRegion) {
71+
const auto Index = CurRegion->getIndex().getAs<NonLoc>();
72+
if (!Index)
73+
return std::nullopt;
74+
75+
QualType ElemType = CurRegion->getElementType();
76+
77+
// FIXME: The following early return was presumably added to safeguard the
78+
// getTypeSizeInChars() call (which doesn't accept an incomplete type), but
79+
// it seems that `ElemType` cannot be incomplete at this point.
80+
if (ElemType->isIncompleteType())
81+
return std::nullopt;
82+
83+
// Calculate Delta = Index * sizeof(ElemType).
84+
NonLoc Size = SVB.makeArrayIndex(
85+
SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
86+
auto Delta = EvalBinOp(BO_Mul, *Index, Size);
87+
if (!Delta)
88+
return std::nullopt;
89+
90+
// Perform Offset += Delta.
91+
Offset = EvalBinOp(BO_Add, *Offset, *Delta);
92+
if (!Offset)
93+
return std::nullopt;
94+
95+
OwnerRegion = CurRegion->getSuperRegion()->getAs<SubRegion>();
96+
// When this is just another ElementRegion layer, we need to continue the
97+
// offset calculations:
98+
CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion);
99+
}
64100

65-
static std::optional<RegionRawOffsetV2>
66-
computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location);
101+
if (OwnerRegion)
102+
return std::make_pair(OwnerRegion, *Offset);
67103

68-
void dump() const;
69-
void dumpToStream(raw_ostream &os) const;
70-
};
104+
return std::nullopt;
71105
}
72106

73107
// TODO: once the constraint manager is smart enough to handle non simplified
@@ -86,8 +120,8 @@ getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
86120
APSIntType(extent.getValue()).convert(SIE->getRHS());
87121
switch (SIE->getOpcode()) {
88122
case BO_Mul:
89-
// The constant should never be 0 here, since it the result of scaling
90-
// based on the size of a type which is never 0.
123+
// The constant should never be 0 here, becasue multiplication by zero
124+
// is simplified by the engine.
91125
if ((extent.getValue() % constant) != 0)
92126
return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
93127
else
@@ -163,16 +197,15 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
163197
return;
164198

165199
ProgramStateRef state = checkerContext.getState();
166-
167200
SValBuilder &svalBuilder = checkerContext.getSValBuilder();
168-
const std::optional<RegionRawOffsetV2> &RawOffset =
169-
RegionRawOffsetV2::computeOffset(state, svalBuilder, location);
201+
202+
const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
203+
computeOffset(state, svalBuilder, location);
170204

171205
if (!RawOffset)
172206
return;
173207

174-
NonLoc ByteOffset = RawOffset->getByteOffset();
175-
const SubRegion *Reg = RawOffset->getRegion();
208+
auto [Reg, ByteOffset] = *RawOffset;
176209

177210
// CHECK LOWER BOUND
178211
const MemSpaceRegion *Space = Reg->getMemorySpace();
@@ -207,12 +240,13 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
207240
if (state_exceedsUpperBound) {
208241
if (!state_withinUpperBound) {
209242
// We know that the index definitely exceeds the upper bound.
210-
reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
243+
reportOOB(checkerContext, state_exceedsUpperBound, OOB_Exceeds);
211244
return;
212245
}
213246
if (isTainted(state, ByteOffset)) {
214247
// Both cases are possible, but the index is tainted, so report.
215-
reportTaintOOB(checkerContext, state_exceedsUpperBound, ByteOffset);
248+
reportOOB(checkerContext, state_exceedsUpperBound, OOB_Taint,
249+
ByteOffset);
216250
return;
217251
}
218252
}
@@ -224,58 +258,40 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
224258
checkerContext.addTransition(state);
225259
}
226260

227-
void ArrayBoundCheckerV2::reportTaintOOB(CheckerContext &checkerContext,
228-
ProgramStateRef errorState,
229-
SVal TaintedSVal) const {
230-
ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
231-
if (!errorNode)
232-
return;
233-
234-
if (!TaintBT)
235-
TaintBT.reset(
236-
new BugType(this, "Out-of-bound access", categories::TaintedData));
237-
238-
SmallString<256> buf;
239-
llvm::raw_svector_ostream os(buf);
240-
os << "Out of bound memory access (index is tainted)";
241-
auto BR =
242-
std::make_unique<PathSensitiveBugReport>(*TaintBT, os.str(), errorNode);
243-
244-
// Track back the propagation of taintedness.
245-
for (SymbolRef Sym : getTaintedSymbols(errorState, TaintedSVal)) {
246-
BR->markInteresting(Sym);
247-
}
248-
249-
checkerContext.emitReport(std::move(BR));
250-
}
251-
252-
void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext,
253-
ProgramStateRef errorState,
254-
OOB_Kind kind) const {
261+
void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
262+
ProgramStateRef ErrorState, OOB_Kind Kind,
263+
SVal TaintedSVal) const {
255264

256-
ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
257-
if (!errorNode)
265+
ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
266+
if (!ErrorNode)
258267
return;
259268

260-
if (!BT)
261-
BT.reset(new BugType(this, "Out-of-bound access"));
269+
// FIXME: These diagnostics are preliminary, and they'll be replaced soon by
270+
// a follow-up commit.
262271

263-
// FIXME: This diagnostics are preliminary. We should get far better
264-
// diagnostics for explaining buffer overruns.
272+
SmallString<128> Buf;
273+
llvm::raw_svector_ostream Out(Buf);
274+
Out << "Out of bound memory access ";
265275

266-
SmallString<256> buf;
267-
llvm::raw_svector_ostream os(buf);
268-
os << "Out of bound memory access ";
269-
switch (kind) {
276+
switch (Kind) {
270277
case OOB_Precedes:
271-
os << "(accessed memory precedes memory block)";
278+
Out << "(accessed memory precedes memory block)";
279+
break;
280+
case OOB_Exceeds:
281+
Out << "(access exceeds upper limit of memory block)";
272282
break;
273-
case OOB_Excedes:
274-
os << "(access exceeds upper limit of memory block)";
283+
case OOB_Taint:
284+
Out << "(index is tainted)";
275285
break;
276286
}
277-
auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode);
278-
checkerContext.emitReport(std::move(BR));
287+
auto BR = std::make_unique<PathSensitiveBugReport>(
288+
Kind == OOB_Taint ? TaintBT : BT, Out.str(), ErrorNode);
289+
// Track back the propagation of taintedness, or do nothing if TaintedSVal is
290+
// the default UnknownVal().
291+
for (SymbolRef Sym : getTaintedSymbols(ErrorState, TaintedSVal)) {
292+
BR->markInteresting(Sym);
293+
}
294+
C.emitReport(std::move(BR));
279295
}
280296

281297
bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
@@ -297,67 +313,6 @@ bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
297313
(MacroName == "isupper") || (MacroName == "isxdigit"));
298314
}
299315

300-
#ifndef NDEBUG
301-
LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const {
302-
dumpToStream(llvm::errs());
303-
}
304-
305-
void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const {
306-
os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}';
307-
}
308-
#endif
309-
310-
/// For a given Location that can be represented as a symbolic expression
311-
/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
312-
/// Arr and the distance of Location from the beginning of Arr (expressed in a
313-
/// NonLoc that specifies the number of CharUnits). Returns nullopt when these
314-
/// cannot be determined.
315-
std::optional<RegionRawOffsetV2>
316-
RegionRawOffsetV2::computeOffset(ProgramStateRef State, SValBuilder &SVB,
317-
SVal Location) {
318-
QualType T = SVB.getArrayIndexType();
319-
auto Calc = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) {
320-
// We will use this utility to add and multiply values.
321-
return SVB.evalBinOpNN(State, Op, LHS, RHS, T).getAs<NonLoc>();
322-
};
323-
324-
const MemRegion *Region = Location.getAsRegion();
325-
NonLoc Offset = SVB.makeZeroArrayIndex();
326-
327-
while (Region) {
328-
if (const auto *ERegion = dyn_cast<ElementRegion>(Region)) {
329-
if (const auto Index = ERegion->getIndex().getAs<NonLoc>()) {
330-
QualType ElemType = ERegion->getElementType();
331-
// If the element is an incomplete type, go no further.
332-
if (ElemType->isIncompleteType())
333-
return std::nullopt;
334-
335-
// Perform Offset += Index * sizeof(ElemType); then continue the offset
336-
// calculations with SuperRegion:
337-
NonLoc Size = SVB.makeArrayIndex(
338-
SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
339-
if (auto Delta = Calc(BO_Mul, *Index, Size)) {
340-
if (auto NewOffset = Calc(BO_Add, Offset, *Delta)) {
341-
Offset = *NewOffset;
342-
Region = ERegion->getSuperRegion();
343-
continue;
344-
}
345-
}
346-
}
347-
} else if (const auto *SRegion = dyn_cast<SubRegion>(Region)) {
348-
// NOTE: The dyn_cast<>() is expected to succeed, it'd be very surprising
349-
// to see a MemSpaceRegion at this point.
350-
// FIXME: We may return with {<Region>, 0} even if we didn't handle any
351-
// ElementRegion layers. I think that this behavior was introduced
352-
// accidentally by 8a4c760c204546aba566e302f299f7ed2e00e287 in 2011, so
353-
// it may be useful to review it in the future.
354-
return RegionRawOffsetV2(SRegion, Offset);
355-
}
356-
return std::nullopt;
357-
}
358-
return std::nullopt;
359-
}
360-
361316
void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
362317
mgr.registerChecker<ArrayBoundCheckerV2>();
363318
}

0 commit comments

Comments
 (0)