Skip to content

Commit efec163

Browse files
committed
[analyzer] Bug identification
This patch adds hashes to the plist and html output to be able to identfy bugs for suppressing false positives or diff results against a baseline. This hash aims to be resilient for code evolution and is usable to identify bugs in two different snapshots of the same software. One missing piece however is a permanent unique identifier of the checker that produces the warning. Once that issue is resolved, the hashes generated are going to change. Until that point this feature is marked experimental, but it is suitable for early adoption. Differential Revision: http://reviews.llvm.org/D10305 Original patch by: Bence Babati! llvm-svn: 251011
1 parent b91bee0 commit efec163

40 files changed

+11868
-8422
lines changed
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
//===---------- IssueHash.h - Generate identification hashes ----*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
#ifndef LLVM_CLANG_STATICANALYZER_CORE_ISSUE_HASH_H
10+
#define LLVM_CLANG_STATICANALYZER_CORE_ISSUE_HASH_H
11+
12+
#include "llvm/ADT/SmallString.h"
13+
14+
namespace clang {
15+
class Decl;
16+
class SourceManager;
17+
class FullSourceLoc;
18+
19+
/// \brief Get an MD5 hash to help identify bugs.
20+
///
21+
/// This function returns a hash that helps identify bugs within a source file.
22+
/// This identification can be utilized to diff diagnostic results on different
23+
/// snapshots of a projects, or maintain a database of suppressed diagnotics.
24+
///
25+
/// The hash contains the normalized text of the location associated with the
26+
/// diagnostic. Normalization means removing the whitespaces. The associated
27+
/// location is the either the last location of a diagnostic path or a uniqueing
28+
/// location. The bugtype and the name of the checker is also part of the hash.
29+
/// The last component is the string representation of the enclosing declaration
30+
/// of the associated location.
31+
///
32+
/// In case a new hash is introduced, the old one should still be maintained for
33+
/// a while. One should not introduce a new hash for every change, it is
34+
/// possible to introduce experimental hashes that may change in the future.
35+
/// Such hashes should be marked as experimental using a comment in the plist
36+
/// files.
37+
llvm::SmallString<32> GetIssueHash(const SourceManager &SM,
38+
FullSourceLoc &IssueLoc,
39+
llvm::StringRef CheckerName,
40+
llvm::StringRef BugType, const Decl *D);
41+
42+
/// \brief Get the string representation of issue hash. See GetIssueHash() for
43+
/// more information.
44+
std::string GetIssueString(const SourceManager &SM, FullSourceLoc &IssueLoc,
45+
llvm::StringRef CheckerName, llvm::StringRef BugType,
46+
const Decl *D);
47+
} // namespace clang
48+
49+
#endif

clang/lib/StaticAnalyzer/Checkers/Checkers.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -591,4 +591,8 @@ def ExplodedGraphViewer : Checker<"ViewExplodedGraph">,
591591
HelpText<"View Exploded Graphs using GraphViz">,
592592
DescFile<"DebugCheckers.cpp">;
593593

594+
def BugHashDumper : Checker<"DumpBugHash">,
595+
HelpText<"Dump the bug hash for all statements.">,
596+
DescFile<"DebugCheckers.cpp">;
597+
594598
} // end "debug"

clang/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@
1616
#include "clang/Analysis/Analyses/LiveVariables.h"
1717
#include "clang/Analysis/CallGraph.h"
1818
#include "clang/StaticAnalyzer/Core/Checker.h"
19+
#include "clang/StaticAnalyzer/Core/IssueHash.h"
20+
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
1921
#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
22+
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
2023
#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
2124
#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
2225
#include "llvm/Support/Process.h"
@@ -209,3 +212,35 @@ class ExplodedGraphViewer : public Checker< check::EndAnalysis > {
209212
void ento::registerExplodedGraphViewer(CheckerManager &mgr) {
210213
mgr.registerChecker<ExplodedGraphViewer>();
211214
}
215+
216+
//===----------------------------------------------------------------------===//
217+
// DumpBugHash
218+
//===----------------------------------------------------------------------===//
219+
220+
namespace {
221+
class BugHashDumper : public Checker<check::PostStmt<Stmt>> {
222+
public:
223+
mutable std::unique_ptr<BugType> BT;
224+
225+
void checkPostStmt(const Stmt *S, CheckerContext &C) const {
226+
if (!BT)
227+
BT.reset(new BugType(this, "Dump hash components", "debug"));
228+
229+
ExplodedNode *N = C.generateNonFatalErrorNode();
230+
if (!N)
231+
return;
232+
233+
const SourceManager &SM = C.getSourceManager();
234+
FullSourceLoc FL(S->getLocStart(), SM);
235+
std::string HashContent =
236+
GetIssueString(SM, FL, getCheckName().getName(), BT->getCategory(),
237+
C.getLocationContext()->getDecl());
238+
239+
C.emitReport(llvm::make_unique<BugReport>(*BT, HashContent, N));
240+
}
241+
};
242+
}
243+
244+
void ento::registerBugHashDumper(CheckerManager &mgr) {
245+
mgr.registerChecker<BugHashDumper>();
246+
}

clang/lib/StaticAnalyzer/Core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ add_clang_library(clangStaticAnalyzerCore
66
AnalyzerOptions.cpp
77
BasicValueFactory.cpp
88
BlockCounter.cpp
9+
IssueHash.cpp
910
BugReporter.cpp
1011
BugReporterVisitors.cpp
1112
CallEvent.cpp

clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
//
1212
//===----------------------------------------------------------------------===//
1313

14-
#include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
1514
#include "clang/AST/ASTContext.h"
1615
#include "clang/AST/Decl.h"
1716
#include "clang/Basic/FileManager.h"
@@ -22,6 +21,8 @@
2221
#include "clang/Rewrite/Core/Rewriter.h"
2322
#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h"
2423
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
24+
#include "clang/StaticAnalyzer/Core/IssueHash.h"
25+
#include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
2526
#include "llvm/Support/Errc.h"
2627
#include "llvm/Support/FileSystem.h"
2728
#include "llvm/Support/MemoryBuffer.h"
@@ -236,6 +237,13 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
236237
if (!BugType.empty())
237238
os << "\n<!-- BUGTYPE " << BugType << " -->\n";
238239

240+
PathDiagnosticLocation UPDLoc = D.getUniqueingLoc();
241+
FullSourceLoc L(SMgr.getExpansionLoc(UPDLoc.isValid()
242+
? UPDLoc.asLocation()
243+
: D.getLocation().asLocation()),
244+
SMgr);
245+
const Decl *DeclWithIssue = D.getDeclWithIssue();
246+
239247
StringRef BugCategory = D.getCategory();
240248
if (!BugCategory.empty())
241249
os << "\n<!-- BUGCATEGORY " << BugCategory << " -->\n";
@@ -246,6 +254,10 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
246254

247255
os << "\n<!-- FUNCTIONNAME " << declName << " -->\n";
248256

257+
os << "\n<!-- ISSUEHASHCONTENTOFLINEINCONTEXT "
258+
<< GetIssueHash(SMgr, L, D.getCheckName(), D.getBugType(), DeclWithIssue)
259+
<< " -->\n";
260+
249261
os << "\n<!-- BUGLINE "
250262
<< LineNumber
251263
<< " -->\n";
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
//===---------- IssueHash.cpp - Generate identification hashes --*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
#include "clang/StaticAnalyzer/Core/IssueHash.h"
10+
#include "clang/AST/ASTContext.h"
11+
#include "clang/AST/Decl.h"
12+
#include "clang/AST/DeclCXX.h"
13+
#include "clang/Basic/SourceManager.h"
14+
#include "clang/Basic/Specifiers.h"
15+
#include "clang/Lex/Lexer.h"
16+
#include "llvm/ADT/SmallVector.h"
17+
#include "llvm/ADT/StringRef.h"
18+
#include "llvm/ADT/Twine.h"
19+
#include "llvm/Support/LineIterator.h"
20+
#include "llvm/Support/MD5.h"
21+
#include "llvm/Support/Path.h"
22+
23+
#include <functional>
24+
#include <sstream>
25+
#include <string>
26+
27+
using namespace clang;
28+
29+
// Get a string representation of the parts of the signature that can be
30+
// overloaded on.
31+
static std::string GetSignature(const FunctionDecl *Target) {
32+
if (!Target)
33+
return "";
34+
std::string Signature;
35+
36+
if (!isa<CXXConstructorDecl>(Target) && !isa<CXXDestructorDecl>(Target) &&
37+
!isa<CXXConversionDecl>(Target))
38+
Signature.append(Target->getReturnType().getAsString()).append(" ");
39+
Signature.append(Target->getQualifiedNameAsString()).append("(");
40+
41+
for (int i = 0, paramsCount = Target->getNumParams(); i < paramsCount; ++i) {
42+
if (i)
43+
Signature.append(", ");
44+
Signature.append(Target->getParamDecl(i)->getType().getAsString());
45+
}
46+
47+
if (Target->isVariadic())
48+
Signature.append(", ...");
49+
Signature.append(")");
50+
51+
const auto *TargetT =
52+
llvm::dyn_cast_or_null<FunctionType>(Target->getType().getTypePtr());
53+
54+
if (!TargetT)
55+
return Signature;
56+
57+
if (TargetT->isConst())
58+
Signature.append(" const");
59+
if (TargetT->isVolatile())
60+
Signature.append(" volatile");
61+
if (TargetT->isRestrict())
62+
Signature.append(" restrict");
63+
64+
if (const auto *TargetPT =
65+
dyn_cast_or_null<FunctionProtoType>(Target->getType().getTypePtr())) {
66+
switch (TargetPT->getRefQualifier()) {
67+
case RQ_LValue:
68+
Signature.append(" &");
69+
break;
70+
case RQ_RValue:
71+
Signature.append(" &&");
72+
break;
73+
default:
74+
break;
75+
}
76+
}
77+
78+
return Signature;
79+
}
80+
81+
static std::string GetEnclosingDeclContextSignature(const Decl *D) {
82+
if (!D)
83+
return "";
84+
85+
if (const auto *ND = dyn_cast<NamedDecl>(D)) {
86+
std::string DeclName;
87+
88+
switch (ND->getKind()) {
89+
case Decl::Namespace:
90+
case Decl::Record:
91+
case Decl::CXXRecord:
92+
case Decl::Enum:
93+
DeclName = ND->getQualifiedNameAsString();
94+
break;
95+
case Decl::CXXConstructor:
96+
case Decl::CXXDestructor:
97+
case Decl::CXXConversion:
98+
case Decl::CXXMethod:
99+
case Decl::Function:
100+
DeclName = GetSignature(dyn_cast_or_null<FunctionDecl>(ND));
101+
break;
102+
case Decl::ObjCMethod:
103+
// ObjC Methods can not be overloaded, qualified name uniquely identifies
104+
// the method.
105+
DeclName = ND->getQualifiedNameAsString();
106+
break;
107+
default:
108+
break;
109+
}
110+
111+
return DeclName;
112+
}
113+
114+
return "";
115+
}
116+
117+
static StringRef GetNthLineOfFile(llvm::MemoryBuffer *Buffer, int Line) {
118+
if (!Buffer)
119+
return "";
120+
121+
llvm::line_iterator LI(*Buffer, false);
122+
for (; !LI.is_at_eof() && LI.line_number() != Line; ++LI)
123+
;
124+
125+
return *LI;
126+
}
127+
128+
static std::string NormalizeLine(const SourceManager &SM, FullSourceLoc &L,
129+
const Decl *D) {
130+
static StringRef Whitespaces = " \t\n";
131+
132+
const LangOptions &Opts = D->getASTContext().getLangOpts();
133+
StringRef Str = GetNthLineOfFile(SM.getBuffer(L.getFileID(), L),
134+
L.getExpansionLineNumber());
135+
unsigned col = Str.find_first_not_of(Whitespaces);
136+
137+
SourceLocation StartOfLine =
138+
SM.translateLineCol(SM.getFileID(L), L.getExpansionLineNumber(), col);
139+
llvm::MemoryBuffer *Buffer =
140+
SM.getBuffer(SM.getFileID(StartOfLine), StartOfLine);
141+
if (!Buffer)
142+
return {};
143+
144+
const char *BufferPos = SM.getCharacterData(StartOfLine);
145+
146+
Token Token;
147+
Lexer Lexer(SM.getLocForStartOfFile(SM.getFileID(StartOfLine)), Opts,
148+
Buffer->getBufferStart(), BufferPos, Buffer->getBufferEnd());
149+
150+
size_t NextStart = 0;
151+
std::ostringstream LineBuff;
152+
while (!Lexer.LexFromRawLexer(Token) && NextStart < 2) {
153+
if (Token.isAtStartOfLine() && NextStart++ > 0)
154+
continue;
155+
LineBuff << std::string(SM.getCharacterData(Token.getLocation()),
156+
Token.getLength());
157+
}
158+
159+
return LineBuff.str();
160+
}
161+
162+
static llvm::SmallString<32> GetHashOfContent(StringRef Content) {
163+
llvm::MD5 Hash;
164+
llvm::MD5::MD5Result MD5Res;
165+
SmallString<32> Res;
166+
167+
Hash.update(Content);
168+
Hash.final(MD5Res);
169+
llvm::MD5::stringifyResult(MD5Res, Res);
170+
171+
return Res;
172+
}
173+
174+
std::string clang::GetIssueString(const SourceManager &SM,
175+
FullSourceLoc &IssueLoc,
176+
StringRef CheckerName, StringRef BugType,
177+
const Decl *D) {
178+
static StringRef Delimiter = "$";
179+
180+
return (llvm::Twine(CheckerName) + Delimiter +
181+
GetEnclosingDeclContextSignature(D) + Delimiter +
182+
std::to_string(IssueLoc.getExpansionColumnNumber()) + Delimiter +
183+
NormalizeLine(SM, IssueLoc, D) + Delimiter + BugType)
184+
.str();
185+
}
186+
187+
SmallString<32> clang::GetIssueHash(const SourceManager &SM,
188+
FullSourceLoc &IssueLoc,
189+
StringRef CheckerName, StringRef BugType,
190+
const Decl *D) {
191+
return GetHashOfContent(
192+
GetIssueString(SM, IssueLoc, CheckerName, BugType, D));
193+
}

0 commit comments

Comments
 (0)