Skip to content

Implement -dump-minimization-hints flag. #133910

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Apr 11, 2025
Merged
4 changes: 4 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -7968,6 +7968,10 @@ def print_dependency_directives_minimized_source : Flag<["-"],
"print-dependency-directives-minimized-source">,
HelpText<"Print the output of the dependency directives source minimizer">;
}
def dump_minimization_hints : Joined<["-"],
"dump-minimization-hints=">,
HelpText<"Dump ranges of deserialized declarations to use as bug minimization hints">,
MarshallingInfoString<FrontendOpts<"DumpMinimizationHintsPath">>;

defm emit_llvm_uselists : BoolOption<"", "emit-llvm-uselists",
CodeGenOpts<"EmitLLVMUseLists">, DefaultFalse,
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Frontend/FrontendOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,10 @@ class FrontendOptions {
/// Output Path for module output file.
std::string ModuleOutputPath;

/// Output path to dump ranges of deserialized declarations to use as
/// minimization hints.
std::string DumpMinimizationHintsPath;

public:
FrontendOptions()
: DisableFree(false), RelocatablePCH(false), ShowHelp(false),
Expand Down
211 changes: 206 additions & 5 deletions clang/lib/Frontend/FrontendAction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,13 @@
#include "clang/Basic/Builtins.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/FileEntry.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/LangStandard.h"
#include "clang/Basic/Sarif.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Stack.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Frontend/ASTUnit.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendDiagnostic.h"
Expand All @@ -35,6 +39,7 @@
#include "clang/Serialization/ASTReader.h"
#include "clang/Serialization/GlobalModuleIndex.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/BuryPointer.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
Expand All @@ -49,6 +54,185 @@ LLVM_INSTANTIATE_REGISTRY(FrontendPluginRegistry)

namespace {

/// DeserializedDeclsLineRangePrinter dumps ranges of deserialized declarations
/// to aid debugging and bug minimization. It implements ASTConsumer and
/// ASTDeserializationListener, so that an object of
/// DeserializedDeclsLineRangePrinter registers as its own listener. The
/// ASTDeserializationListener interface provides the DeclRead callback that we
/// use to collect the deserialized Decls. Note that printing or otherwise
/// processing them as this point is dangerous, since that could trigger
/// additional deserialization and crash compilation. Therefore, we process the
/// collected Decls in HandleTranslationUnit method of ASTConsumer. This is a
/// safe point, since we know that by this point all the Decls needed by the
/// compiler frontend have been deserialized. In case our processing causes
/// further deserialization, DeclRead from the listener might be called again.
/// However, at that point we don't accept any more Decls for processing.
class DeserializedDeclsSourceRangePrinter : public ASTConsumer,
ASTDeserializationListener {
public:
explicit DeserializedDeclsSourceRangePrinter(
SourceManager &SM, std::unique_ptr<llvm::raw_fd_ostream> OS)
: ASTDeserializationListener(), SM(SM), OS(std::move(OS)) {}

ASTDeserializationListener *GetASTDeserializationListener() override {
return this;
}

void DeclRead(GlobalDeclID ID, const Decl *D) override {
if (!IsCollectingDecls)
return;
if (!D || isa<TranslationUnitDecl>(D) || isa<LinkageSpecDecl>(D) ||
isa<NamespaceDecl>(D)) {
// These decls cover a lot of nested declarations that might not be used,
// reducing the granularity and making the output less useful.
return;
}
if (auto *DC = D->getDeclContext(); !DC || !DC->isFileContext()) {
// We choose to work at namespace level to reduce complexity and the
// number of cases we care about.
return;
}
PendingDecls.push_back(D);
}

struct Position {
unsigned Line;
unsigned Column;

bool operator<(const Position &other) const {
if (Line < other.Line)
return true;
if (Line > other.Line)
return false;
return Column < other.Column;
}

static Position GetBeginSpelling(const SourceManager &SM,
const CharSourceRange &R) {
SourceLocation Begin = R.getBegin();
return {SM.getSpellingLineNumber(Begin),
SM.getSpellingColumnNumber(Begin)};
}

static Position GetEndSpelling(const SourceManager &SM,
const CharSourceRange &Range,
const LangOptions &LangOpts) {
// For token ranges, compute end location for end character of the range.
CharSourceRange R = Lexer::getAsCharRange(Range, SM, LangOpts);
SourceLocation End = R.getEnd();
// Relex the token past the end location of the last token in the source
// range. If it's a semicolon, advance the location by one token.
Token PossiblySemi;
Lexer::getRawToken(End, PossiblySemi, SM, LangOpts, true);
if (PossiblySemi.is(tok::semi))
End = End.getLocWithOffset(1);
// Column number of the returned end position is exclusive.
return {SM.getSpellingLineNumber(End), SM.getSpellingColumnNumber(End)};
}
};

struct RequiredRanges {
StringRef Filename;
std::vector<std::pair<Position, Position>> FromTo;
};
void HandleTranslationUnit(ASTContext &Context) override {
assert(IsCollectingDecls && "HandleTranslationUnit called twice?");
IsCollectingDecls = false;

// Merge ranges in each of the files.
struct FileData {
std::vector<std::pair<Position, Position>> FromTo;
OptionalFileEntryRef Ref;
};
llvm::DenseMap<const FileEntry *, FileData> FileToRanges;
for (const Decl *D : PendingDecls) {
CharSourceRange R = SM.getExpansionRange(D->getSourceRange());
if (!R.isValid())
continue;

auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin()));
if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) {
// Such cases are rare and difficult to handle.
continue;
}

auto &Data = FileToRanges[F];
if (!Data.Ref)
Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
Data.FromTo.push_back(
{Position::GetBeginSpelling(SM, R),
Position::GetEndSpelling(SM, R, D->getLangOpts())});
}

// To simplify output, merge consecutive and intersecting ranges.
std::vector<RequiredRanges> Result;
for (auto &[F, Data] : FileToRanges) {
auto &FromTo = Data.FromTo;
assert(!FromTo.empty());

if (!Data.Ref)
continue;

llvm::sort(FromTo);

std::vector<std::pair<Position, Position>> MergedRanges;
MergedRanges.push_back(FromTo.front());
for (auto It = FromTo.begin() + 1; It < FromTo.end(); ++It) {
if (MergedRanges.back().second < It->first) {
MergedRanges.push_back(*It);
continue;
}
if (MergedRanges.back().second < It->second)
MergedRanges.back().second = It->second;
}
Result.push_back({Data.Ref->getName(), MergedRanges});
}
printJson(Result);
}

private:
std::vector<const Decl *> PendingDecls;
bool IsCollectingDecls = true;
const SourceManager &SM;
std::unique_ptr<llvm::raw_ostream> OS;

void printJson(llvm::ArrayRef<RequiredRanges> Result) {
*OS << "{\n";
*OS << R"( "required_ranges": [)" << "\n";
for (size_t I = 0; I < Result.size(); ++I) {
auto &F = Result[I].Filename;
auto &MergedRanges = Result[I].FromTo;
*OS << R"( {)" << "\n";
*OS << R"( "file": ")" << F << "\"," << "\n";
*OS << R"( "range": [)" << "\n";
for (size_t J = 0; J < MergedRanges.size(); ++J) {
auto &From = MergedRanges[J].first;
auto &To = MergedRanges[J].second;
*OS << R"( {)" << "\n";
*OS << R"( "from": {)" << "\n";
*OS << R"( "line": )" << From.Line << ",\n";
*OS << R"( "column": )" << From.Column << "\n"
<< R"( },)" << "\n";
*OS << R"( "to": {)" << "\n";
*OS << R"( "line": )" << To.Line << ",\n";
*OS << R"( "column": )" << To.Column << "\n"
<< R"( })" << "\n";
*OS << R"( })";
if (J < MergedRanges.size() - 1) {
*OS << ",";
}
*OS << "\n";
}
*OS << " ]" << "\n" << " }";
if (I < Result.size() - 1)
*OS << ",";
*OS << "\n";
}
*OS << " ]\n";
*OS << "}\n";
}
};

/// Dumps deserialized declarations.
class DeserializedDeclsDumper : public DelegatingDeserializationListener {
public:
Expand Down Expand Up @@ -121,6 +305,25 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
if (!Consumer)
return nullptr;

std::vector<std::unique_ptr<ASTConsumer>> Consumers;
llvm::StringRef DumpDeserializedDeclarationRangesPath =
CI.getFrontendOpts().DumpMinimizationHintsPath;
if (!DumpDeserializedDeclarationRangesPath.empty()) {
std::error_code ErrorCode;
auto FileStream = std::make_unique<llvm::raw_fd_ostream>(
DumpDeserializedDeclarationRangesPath, ErrorCode,
llvm::sys::fs::OF_None);
if (!ErrorCode) {
Consumers.push_back(std::make_unique<DeserializedDeclsSourceRangePrinter>(
CI.getSourceManager(), std::move(FileStream)));
} else {
llvm::errs() << "Failed to create output file for "
"-dump-minimization-hints flag, file path: "
<< DumpDeserializedDeclarationRangesPath
<< ", error: " << ErrorCode.message() << "\n";
}
}

// Validate -add-plugin args.
bool FoundAllPlugins = true;
for (const std::string &Arg : CI.getFrontendOpts().AddPluginActions) {
Expand All @@ -138,17 +341,12 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
if (!FoundAllPlugins)
return nullptr;

// If there are no registered plugins we don't need to wrap the consumer
if (FrontendPluginRegistry::begin() == FrontendPluginRegistry::end())
return Consumer;

// If this is a code completion run, avoid invoking the plugin consumers
if (CI.hasCodeCompletionConsumer())
return Consumer;

// Collect the list of plugins that go before the main action (in Consumers)
// or after it (in AfterConsumers)
std::vector<std::unique_ptr<ASTConsumer>> Consumers;
std::vector<std::unique_ptr<ASTConsumer>> AfterConsumers;
for (const FrontendPluginRegistry::entry &Plugin :
FrontendPluginRegistry::entries()) {
Expand Down Expand Up @@ -191,6 +389,9 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
Consumers.push_back(std::move(C));
}

assert(Consumers.size() >= 1 && "should have added the main consumer");
if (Consumers.size() == 1)
return std::move(Consumers.front());
return std::make_unique<MultiplexConsumer>(std::move(Consumers));
}

Expand Down
79 changes: 79 additions & 0 deletions clang/test/Frontend/dump-minimization-hints.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// RUN: rm -rf %t
// RUN: mkdir -p %t
// RUN: split-file %s %t
// RUN: %clang_cc1 -xc++ -fmodules -fmodule-name=foo -fmodule-map-file=%t/foo.cppmap -emit-module %t/foo.cppmap -o %t/foo.pcm
// RUN: %clang_cc1 -xc++ -fmodules -dump-minimization-hints=%t/decls -fmodule-file=%t/foo.pcm %t/foo.cpp -o %t/foo.o
// RUN: cat %t/decls
// RUN: cat %t/decls | FileCheck -check-prefix=RANGE %s
// RANGE:{
// RANGE-NEXT: "required_ranges": [
// RANGE-NEXT: {
// RANGE-NEXT: "file": "{{.+}}foo.h",
// RANGE-NEXT: "range": [
// RANGE-NEXT: {
// RANGE-NEXT: "from": {
// RANGE-NEXT: "line": 1,
// RANGE-NEXT: "column": 1
// RANGE-NEXT: },
// RANGE-NEXT: "to": {
// RANGE-NEXT: "line": 9,
// RANGE-NEXT: "column": 3
// RANGE-NEXT: }
// RANGE-NEXT: },
// RANGE-NEXT: {
// RANGE-NEXT: "from": {
// RANGE-NEXT: "line": 11,
// RANGE-NEXT: "column": 1
// RANGE-NEXT: },
// RANGE-NEXT: "to": {
// RANGE-NEXT: "line": 11,
// RANGE-NEXT: "column": 25
// RANGE-NEXT: }
// RANGE-NEXT: },
// RANGE-NEXT: {
// RANGE-NEXT: "from": {
// RANGE-NEXT: "line": 13,
// RANGE-NEXT: "column": 1
// RANGE-NEXT: },
// RANGE-NEXT: "to": {
// RANGE-NEXT: "line": 15,
// RANGE-NEXT: "column": 2
// RANGE-NEXT: }
// RANGE-NEXT: }
// RANGE-NEXT: ]
// RANGE-NEXT: }
// RANGE-NEXT: ]
// RANGE-NEXT:}

//--- foo.cppmap
module foo {
header "foo.h"
export *
}

//--- foo.h
class MyData {
public:
MyData(int val): value_(val) {}
int getValue() const {
return 5;
}
private:
int value_;
};

extern int global_value;

int multiply(int a, int b) {
return a * b;
}

//--- foo.cpp
#include "foo.h"
int global_value = 5;
int main() {
MyData data(5);
int current_value = data.getValue();
int doubled_value = multiply(current_value, 2);
int final_result = doubled_value + global_value;
}