Skip to content

Commit ee68c3e

Browse files
NagyDonatyuxuanchen1997
authored andcommitted
[analyzer] Improve bug report hashing, merge similar reports (#98621)
Summary: Previously there were certain situations where alpha.security.ArrayBoundV2 produced lots of very similar and redundant reports that only differed in their full `Description` that contained the (negative) byte offset value. (See #86969 for details.) This change updates the `Profile()` method of `PathSensitiveBugReport` to ensure that it uses `getShortDescription()` instead of the full `Description` so the standard report deduplication eliminates most of these redundant reports. Note that the effects of this change are very limited because there are very few checkers that specify a separate short description, and so `getShortDescription()` practically always defaults to returning the full `Description`. For the sake of consistency `BasicBugReport::Profile()` is also updated to use the short description. (Right now there are no checkers that use `BasicBugReport` with separate long and short descriptions.) This commit also includes some small code quality improvements in `ArrayBoundV2` that are IMO too trivial to be moved into a separate commit. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251171
1 parent 6637a70 commit ee68c3e

File tree

3 files changed

+32
-11
lines changed

3 files changed

+32
-11
lines changed

clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -373,14 +373,14 @@ static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) {
373373
}
374374

375375
static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) {
376-
std::string RegName = getRegionName(Region);
377-
SmallString<128> Buf;
378-
llvm::raw_svector_ostream Out(Buf);
379-
Out << "Access of " << RegName << " at negative byte offset";
380-
if (auto ConcreteIdx = Offset.getAs<nonloc::ConcreteInt>())
381-
Out << ' ' << ConcreteIdx->getValue();
382-
return {formatv("Out of bound access to memory preceding {0}", RegName),
383-
std::string(Buf)};
376+
std::string RegName = getRegionName(Region), OffsetStr = "";
377+
378+
if (auto ConcreteOffset = getConcreteValue(Offset))
379+
OffsetStr = formatv(" {0}", ConcreteOffset);
380+
381+
return {
382+
formatv("Out of bound access to memory preceding {0}", RegName),
383+
formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)};
384384
}
385385

386386
/// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if
@@ -609,7 +609,7 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
609609
// CHECK UPPER BOUND
610610
DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB);
611611
if (auto KnownSize = Size.getAs<NonLoc>()) {
612-
// In a situation where both overflow and overflow are possible (but the
612+
// In a situation where both underflow and overflow are possible (but the
613613
// index is either tainted or known to be invalid), the logic of this
614614
// checker will first assume that the offset is non-negative, and then
615615
// (with this additional assumption) it will detect an overflow error.

clang/lib/StaticAnalyzer/Core/BugReporter.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2198,7 +2198,7 @@ const Decl *PathSensitiveBugReport::getDeclWithIssue() const {
21982198
void BasicBugReport::Profile(llvm::FoldingSetNodeID& hash) const {
21992199
hash.AddInteger(static_cast<int>(getKind()));
22002200
hash.AddPointer(&BT);
2201-
hash.AddString(Description);
2201+
hash.AddString(getShortDescription());
22022202
assert(Location.isValid());
22032203
Location.Profile(hash);
22042204

@@ -2213,7 +2213,7 @@ void BasicBugReport::Profile(llvm::FoldingSetNodeID& hash) const {
22132213
void PathSensitiveBugReport::Profile(llvm::FoldingSetNodeID &hash) const {
22142214
hash.AddInteger(static_cast<int>(getKind()));
22152215
hash.AddPointer(&BT);
2216-
hash.AddString(Description);
2216+
hash.AddString(getShortDescription());
22172217
PathDiagnosticLocation UL = getUniqueingLocation();
22182218
if (UL.isValid()) {
22192219
UL.Profile(hash);

clang/test/Analysis/out-of-bounds-diagnostics.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,27 @@ int underflowWithDeref(void) {
1717
// expected-note@-2 {{Access of 'TenElements' at negative byte offset -4}}
1818
}
1919

20+
int rng(void);
21+
int getIndex(void) {
22+
switch (rng()) {
23+
case 1: return -152;
24+
case 2: return -160;
25+
case 3: return -168;
26+
default: return -172;
27+
}
28+
}
29+
30+
void gh86959(void) {
31+
// Previously code like this produced many almost-identical bug reports that
32+
// only differed in the offset value. Verify that now we only see one report.
33+
34+
// expected-note@+1 {{Entering loop body}}
35+
while (rng())
36+
TenElements[getIndex()] = 10;
37+
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
38+
// expected-note@-2 {{Access of 'TenElements' at negative byte offset -688}}
39+
}
40+
2041
int scanf(const char *restrict fmt, ...);
2142

2243
void taintedIndex(void) {

0 commit comments

Comments
 (0)