Skip to content

Commit 9eeafc6

Browse files
authored
Implement -dump-minimization-hints flag. (#133910)
This PR implements a CC1 flag `-dump-minimization-hints`. The flag allows to specify a file path to dump ranges of deserialized declarations in `ASTReader`. Example usage: ``` clang -Xclang=-dump-minimization-hints=/tmp/decls -c file.cc -o file.o ``` Example output: ``` // /tmp/decls { "required_ranges": [ { "file": "foo.h", "range": [ { "from": { "line": 26, "column": 1 }, "to": { "line": 27, "column": 77 } } ] }, { "file": "bar.h", "range": [ { "from": { "line": 30, "column": 1 }, "to": { "line": 35, "column": 1 } }, { "from": { "line": 92, "column": 1 }, "to": { "line": 95, "column": 1 } } ] } ] } ``` Specifying the flag creates an instance of `DeserializedDeclsSourceRangePrinter`, which dumps ranges of deserialized declarations to aid debugging and bug minimization (we use is as input to [C-Vise](https://github.com/emaxx-google/cvise/tree/multifile-hints). Required ranges are computed from source ranges of Decls. `TranslationUnitDecl`, `LinkageSpecDecl` and `NamespaceDecl` are ignored for the sake of this PR. Technical details: * `DeserializedDeclsSourceRangePrinter` implements `ASTConsumer` and `ASTDeserializationListener`, so that an object of `DeserializedDeclsSourceRangePrinter` registers as its own listener. * `ASTDeserializationListener` interface provides the `DeclRead` callback that we use to collect the deserialized Decls. Printing or otherwise processing them as this point is dangerous, since that could trigger additional deserialization and crash compilation. * The collected Decls are processed in `HandleTranslationUnit` method of `ASTConsumer`. This is a safe point, since we know that by this point all the Decls needed by the compiler frontend have been deserialized. * In case our processing causes further deserialization, `DeclRead` from the listener might be called again. However, at that point we don't accept any more Decls for processing.
1 parent 4cde945 commit 9eeafc6

File tree

4 files changed

+293
-5
lines changed

4 files changed

+293
-5
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8045,6 +8045,10 @@ def print_dependency_directives_minimized_source : Flag<["-"],
80458045
"print-dependency-directives-minimized-source">,
80468046
HelpText<"Print the output of the dependency directives source minimizer">;
80478047
}
8048+
def dump_minimization_hints : Joined<["-"],
8049+
"dump-minimization-hints=">,
8050+
HelpText<"Dump ranges of deserialized declarations to use as bug minimization hints">,
8051+
MarshallingInfoString<FrontendOpts<"DumpMinimizationHintsPath">>;
80488052

80498053
defm emit_llvm_uselists : BoolOption<"", "emit-llvm-uselists",
80508054
CodeGenOpts<"EmitLLVMUseLists">, DefaultFalse,

clang/include/clang/Frontend/FrontendOptions.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,10 @@ class FrontendOptions {
530530
/// Output Path for module output file.
531531
std::string ModuleOutputPath;
532532

533+
/// Output path to dump ranges of deserialized declarations to use as
534+
/// minimization hints.
535+
std::string DumpMinimizationHintsPath;
536+
533537
public:
534538
FrontendOptions()
535539
: DisableFree(false), RelocatablePCH(false), ShowHelp(false),

clang/lib/Frontend/FrontendAction.cpp

Lines changed: 206 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,13 @@
1313
#include "clang/Basic/Builtins.h"
1414
#include "clang/Basic/DiagnosticOptions.h"
1515
#include "clang/Basic/FileEntry.h"
16+
#include "clang/Basic/LangOptions.h"
1617
#include "clang/Basic/LangStandard.h"
1718
#include "clang/Basic/Sarif.h"
19+
#include "clang/Basic/SourceLocation.h"
20+
#include "clang/Basic/SourceManager.h"
1821
#include "clang/Basic/Stack.h"
22+
#include "clang/Basic/TokenKinds.h"
1923
#include "clang/Frontend/ASTUnit.h"
2024
#include "clang/Frontend/CompilerInstance.h"
2125
#include "clang/Frontend/FrontendDiagnostic.h"
@@ -35,6 +39,7 @@
3539
#include "clang/Serialization/ASTReader.h"
3640
#include "clang/Serialization/GlobalModuleIndex.h"
3741
#include "llvm/ADT/ScopeExit.h"
42+
#include "llvm/ADT/StringRef.h"
3843
#include "llvm/Support/BuryPointer.h"
3944
#include "llvm/Support/ErrorHandling.h"
4045
#include "llvm/Support/FileSystem.h"
@@ -49,6 +54,185 @@ LLVM_INSTANTIATE_REGISTRY(FrontendPluginRegistry)
4954

5055
namespace {
5156

57+
/// DeserializedDeclsLineRangePrinter dumps ranges of deserialized declarations
58+
/// to aid debugging and bug minimization. It implements ASTConsumer and
59+
/// ASTDeserializationListener, so that an object of
60+
/// DeserializedDeclsLineRangePrinter registers as its own listener. The
61+
/// ASTDeserializationListener interface provides the DeclRead callback that we
62+
/// use to collect the deserialized Decls. Note that printing or otherwise
63+
/// processing them as this point is dangerous, since that could trigger
64+
/// additional deserialization and crash compilation. Therefore, we process the
65+
/// collected Decls in HandleTranslationUnit method of ASTConsumer. This is a
66+
/// safe point, since we know that by this point all the Decls needed by the
67+
/// compiler frontend have been deserialized. In case our processing causes
68+
/// further deserialization, DeclRead from the listener might be called again.
69+
/// However, at that point we don't accept any more Decls for processing.
70+
class DeserializedDeclsSourceRangePrinter : public ASTConsumer,
71+
ASTDeserializationListener {
72+
public:
73+
explicit DeserializedDeclsSourceRangePrinter(
74+
SourceManager &SM, std::unique_ptr<llvm::raw_fd_ostream> OS)
75+
: ASTDeserializationListener(), SM(SM), OS(std::move(OS)) {}
76+
77+
ASTDeserializationListener *GetASTDeserializationListener() override {
78+
return this;
79+
}
80+
81+
void DeclRead(GlobalDeclID ID, const Decl *D) override {
82+
if (!IsCollectingDecls)
83+
return;
84+
if (!D || isa<TranslationUnitDecl>(D) || isa<LinkageSpecDecl>(D) ||
85+
isa<NamespaceDecl>(D)) {
86+
// These decls cover a lot of nested declarations that might not be used,
87+
// reducing the granularity and making the output less useful.
88+
return;
89+
}
90+
if (auto *DC = D->getDeclContext(); !DC || !DC->isFileContext()) {
91+
// We choose to work at namespace level to reduce complexity and the
92+
// number of cases we care about.
93+
return;
94+
}
95+
PendingDecls.push_back(D);
96+
}
97+
98+
struct Position {
99+
unsigned Line;
100+
unsigned Column;
101+
102+
bool operator<(const Position &other) const {
103+
if (Line < other.Line)
104+
return true;
105+
if (Line > other.Line)
106+
return false;
107+
return Column < other.Column;
108+
}
109+
110+
static Position GetBeginSpelling(const SourceManager &SM,
111+
const CharSourceRange &R) {
112+
SourceLocation Begin = R.getBegin();
113+
return {SM.getSpellingLineNumber(Begin),
114+
SM.getSpellingColumnNumber(Begin)};
115+
}
116+
117+
static Position GetEndSpelling(const SourceManager &SM,
118+
const CharSourceRange &Range,
119+
const LangOptions &LangOpts) {
120+
// For token ranges, compute end location for end character of the range.
121+
CharSourceRange R = Lexer::getAsCharRange(Range, SM, LangOpts);
122+
SourceLocation End = R.getEnd();
123+
// Relex the token past the end location of the last token in the source
124+
// range. If it's a semicolon, advance the location by one token.
125+
Token PossiblySemi;
126+
Lexer::getRawToken(End, PossiblySemi, SM, LangOpts, true);
127+
if (PossiblySemi.is(tok::semi))
128+
End = End.getLocWithOffset(1);
129+
// Column number of the returned end position is exclusive.
130+
return {SM.getSpellingLineNumber(End), SM.getSpellingColumnNumber(End)};
131+
}
132+
};
133+
134+
struct RequiredRanges {
135+
StringRef Filename;
136+
std::vector<std::pair<Position, Position>> FromTo;
137+
};
138+
void HandleTranslationUnit(ASTContext &Context) override {
139+
assert(IsCollectingDecls && "HandleTranslationUnit called twice?");
140+
IsCollectingDecls = false;
141+
142+
// Merge ranges in each of the files.
143+
struct FileData {
144+
std::vector<std::pair<Position, Position>> FromTo;
145+
OptionalFileEntryRef Ref;
146+
};
147+
llvm::DenseMap<const FileEntry *, FileData> FileToRanges;
148+
for (const Decl *D : PendingDecls) {
149+
CharSourceRange R = SM.getExpansionRange(D->getSourceRange());
150+
if (!R.isValid())
151+
continue;
152+
153+
auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin()));
154+
if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) {
155+
// Such cases are rare and difficult to handle.
156+
continue;
157+
}
158+
159+
auto &Data = FileToRanges[F];
160+
if (!Data.Ref)
161+
Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
162+
Data.FromTo.push_back(
163+
{Position::GetBeginSpelling(SM, R),
164+
Position::GetEndSpelling(SM, R, D->getLangOpts())});
165+
}
166+
167+
// To simplify output, merge consecutive and intersecting ranges.
168+
std::vector<RequiredRanges> Result;
169+
for (auto &[F, Data] : FileToRanges) {
170+
auto &FromTo = Data.FromTo;
171+
assert(!FromTo.empty());
172+
173+
if (!Data.Ref)
174+
continue;
175+
176+
llvm::sort(FromTo);
177+
178+
std::vector<std::pair<Position, Position>> MergedRanges;
179+
MergedRanges.push_back(FromTo.front());
180+
for (auto It = FromTo.begin() + 1; It < FromTo.end(); ++It) {
181+
if (MergedRanges.back().second < It->first) {
182+
MergedRanges.push_back(*It);
183+
continue;
184+
}
185+
if (MergedRanges.back().second < It->second)
186+
MergedRanges.back().second = It->second;
187+
}
188+
Result.push_back({Data.Ref->getName(), MergedRanges});
189+
}
190+
printJson(Result);
191+
}
192+
193+
private:
194+
std::vector<const Decl *> PendingDecls;
195+
bool IsCollectingDecls = true;
196+
const SourceManager &SM;
197+
std::unique_ptr<llvm::raw_ostream> OS;
198+
199+
void printJson(llvm::ArrayRef<RequiredRanges> Result) {
200+
*OS << "{\n";
201+
*OS << R"( "required_ranges": [)" << "\n";
202+
for (size_t I = 0; I < Result.size(); ++I) {
203+
auto &F = Result[I].Filename;
204+
auto &MergedRanges = Result[I].FromTo;
205+
*OS << R"( {)" << "\n";
206+
*OS << R"( "file": ")" << F << "\"," << "\n";
207+
*OS << R"( "range": [)" << "\n";
208+
for (size_t J = 0; J < MergedRanges.size(); ++J) {
209+
auto &From = MergedRanges[J].first;
210+
auto &To = MergedRanges[J].second;
211+
*OS << R"( {)" << "\n";
212+
*OS << R"( "from": {)" << "\n";
213+
*OS << R"( "line": )" << From.Line << ",\n";
214+
*OS << R"( "column": )" << From.Column << "\n"
215+
<< R"( },)" << "\n";
216+
*OS << R"( "to": {)" << "\n";
217+
*OS << R"( "line": )" << To.Line << ",\n";
218+
*OS << R"( "column": )" << To.Column << "\n"
219+
<< R"( })" << "\n";
220+
*OS << R"( })";
221+
if (J < MergedRanges.size() - 1) {
222+
*OS << ",";
223+
}
224+
*OS << "\n";
225+
}
226+
*OS << " ]" << "\n" << " }";
227+
if (I < Result.size() - 1)
228+
*OS << ",";
229+
*OS << "\n";
230+
}
231+
*OS << " ]\n";
232+
*OS << "}\n";
233+
}
234+
};
235+
52236
/// Dumps deserialized declarations.
53237
class DeserializedDeclsDumper : public DelegatingDeserializationListener {
54238
public:
@@ -121,6 +305,25 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
121305
if (!Consumer)
122306
return nullptr;
123307

308+
std::vector<std::unique_ptr<ASTConsumer>> Consumers;
309+
llvm::StringRef DumpDeserializedDeclarationRangesPath =
310+
CI.getFrontendOpts().DumpMinimizationHintsPath;
311+
if (!DumpDeserializedDeclarationRangesPath.empty()) {
312+
std::error_code ErrorCode;
313+
auto FileStream = std::make_unique<llvm::raw_fd_ostream>(
314+
DumpDeserializedDeclarationRangesPath, ErrorCode,
315+
llvm::sys::fs::OF_None);
316+
if (!ErrorCode) {
317+
Consumers.push_back(std::make_unique<DeserializedDeclsSourceRangePrinter>(
318+
CI.getSourceManager(), std::move(FileStream)));
319+
} else {
320+
llvm::errs() << "Failed to create output file for "
321+
"-dump-minimization-hints flag, file path: "
322+
<< DumpDeserializedDeclarationRangesPath
323+
<< ", error: " << ErrorCode.message() << "\n";
324+
}
325+
}
326+
124327
// Validate -add-plugin args.
125328
bool FoundAllPlugins = true;
126329
for (const std::string &Arg : CI.getFrontendOpts().AddPluginActions) {
@@ -138,17 +341,12 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
138341
if (!FoundAllPlugins)
139342
return nullptr;
140343

141-
// If there are no registered plugins we don't need to wrap the consumer
142-
if (FrontendPluginRegistry::begin() == FrontendPluginRegistry::end())
143-
return Consumer;
144-
145344
// If this is a code completion run, avoid invoking the plugin consumers
146345
if (CI.hasCodeCompletionConsumer())
147346
return Consumer;
148347

149348
// Collect the list of plugins that go before the main action (in Consumers)
150349
// or after it (in AfterConsumers)
151-
std::vector<std::unique_ptr<ASTConsumer>> Consumers;
152350
std::vector<std::unique_ptr<ASTConsumer>> AfterConsumers;
153351
for (const FrontendPluginRegistry::entry &Plugin :
154352
FrontendPluginRegistry::entries()) {
@@ -191,6 +389,9 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
191389
Consumers.push_back(std::move(C));
192390
}
193391

392+
assert(Consumers.size() >= 1 && "should have added the main consumer");
393+
if (Consumers.size() == 1)
394+
return std::move(Consumers.front());
194395
return std::make_unique<MultiplexConsumer>(std::move(Consumers));
195396
}
196397

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// RUN: rm -rf %t
2+
// RUN: mkdir -p %t
3+
// RUN: split-file %s %t
4+
// RUN: %clang_cc1 -xc++ -fmodules -fmodule-name=foo -fmodule-map-file=%t/foo.cppmap -emit-module %t/foo.cppmap -o %t/foo.pcm
5+
// RUN: %clang_cc1 -xc++ -fmodules -dump-minimization-hints=%t/decls -fmodule-file=%t/foo.pcm %t/foo.cpp -o %t/foo.o
6+
// RUN: cat %t/decls
7+
// RUN: cat %t/decls | FileCheck -check-prefix=RANGE %s
8+
// RANGE:{
9+
// RANGE-NEXT: "required_ranges": [
10+
// RANGE-NEXT: {
11+
// RANGE-NEXT: "file": "{{.+}}foo.h",
12+
// RANGE-NEXT: "range": [
13+
// RANGE-NEXT: {
14+
// RANGE-NEXT: "from": {
15+
// RANGE-NEXT: "line": 1,
16+
// RANGE-NEXT: "column": 1
17+
// RANGE-NEXT: },
18+
// RANGE-NEXT: "to": {
19+
// RANGE-NEXT: "line": 9,
20+
// RANGE-NEXT: "column": 3
21+
// RANGE-NEXT: }
22+
// RANGE-NEXT: },
23+
// RANGE-NEXT: {
24+
// RANGE-NEXT: "from": {
25+
// RANGE-NEXT: "line": 11,
26+
// RANGE-NEXT: "column": 1
27+
// RANGE-NEXT: },
28+
// RANGE-NEXT: "to": {
29+
// RANGE-NEXT: "line": 11,
30+
// RANGE-NEXT: "column": 25
31+
// RANGE-NEXT: }
32+
// RANGE-NEXT: },
33+
// RANGE-NEXT: {
34+
// RANGE-NEXT: "from": {
35+
// RANGE-NEXT: "line": 13,
36+
// RANGE-NEXT: "column": 1
37+
// RANGE-NEXT: },
38+
// RANGE-NEXT: "to": {
39+
// RANGE-NEXT: "line": 15,
40+
// RANGE-NEXT: "column": 2
41+
// RANGE-NEXT: }
42+
// RANGE-NEXT: }
43+
// RANGE-NEXT: ]
44+
// RANGE-NEXT: }
45+
// RANGE-NEXT: ]
46+
// RANGE-NEXT:}
47+
48+
//--- foo.cppmap
49+
module foo {
50+
header "foo.h"
51+
export *
52+
}
53+
54+
//--- foo.h
55+
class MyData {
56+
public:
57+
MyData(int val): value_(val) {}
58+
int getValue() const {
59+
return 5;
60+
}
61+
private:
62+
int value_;
63+
};
64+
65+
extern int global_value;
66+
67+
int multiply(int a, int b) {
68+
return a * b;
69+
}
70+
71+
//--- foo.cpp
72+
#include "foo.h"
73+
int global_value = 5;
74+
int main() {
75+
MyData data(5);
76+
int current_value = data.getValue();
77+
int doubled_value = multiply(current_value, 2);
78+
int final_result = doubled_value + global_value;
79+
}

0 commit comments

Comments
 (0)