Skip to content

[clang][ScanDeps] Canonicalize -D and -U flags #82298

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,10 @@ enum class ScanningOptimizations {
/// Remove unused -ivfsoverlay arguments.
VFS = 4,

DSS_LAST_BITMASK_ENUM(VFS),
/// Canonicalize -D and -U options.
Macros = 8,

DSS_LAST_BITMASK_ENUM(Macros),
Default = All
};

Expand Down
74 changes: 74 additions & 0 deletions clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,78 @@ static void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) {
DiagOpts.IgnoreWarnings = true;
}

// Clang implements -D and -U by splatting text into a predefines buffer. This
// allows constructs such as `-DFඞ=3 "-D F\u{0D9E} 4 3 2”` to be accepted and
// define the same macro, or adding C++ style comments before the macro name.
//
// This function checks that the first non-space characters in the macro
// obviously form an identifier that can be uniqued on without lexing. Failing
// to do this could lead to changing the final definition of a macro.
//
// We could set up a preprocessor and actually lex the name, but that's very
// heavyweight for a situation that will almost never happen in practice.
static std::optional<StringRef> getSimpleMacroName(StringRef Macro) {
StringRef Name = Macro.split("=").first.ltrim(" \t");
std::size_t I = 0;

auto FinishName = [&]() -> std::optional<StringRef> {
StringRef SimpleName = Name.slice(0, I);
if (SimpleName.empty())
return std::nullopt;
return SimpleName;
};

for (; I != Name.size(); ++I) {
switch (Name[I]) {
case '(': // Start of macro parameter list
case ' ': // End of macro name
case '\t':
return FinishName();
case '_':
continue;
default:
if (llvm::isAlnum(Name[I]))
continue;
return std::nullopt;
}
}
return FinishName();
}

static void canonicalizeDefines(PreprocessorOptions &PPOpts) {
using MacroOpt = std::pair<StringRef, std::size_t>;
std::vector<MacroOpt> SimpleNames;
SimpleNames.reserve(PPOpts.Macros.size());
std::size_t Index = 0;
for (const auto &M : PPOpts.Macros) {
auto SName = getSimpleMacroName(M.first);
// Skip optimizing if we can't guarantee we can preserve relative order.
if (!SName)
return;
SimpleNames.emplace_back(*SName, Index);
++Index;
}

llvm::stable_sort(SimpleNames, [](const MacroOpt &A, const MacroOpt &B) {
return A.first < B.first;
});
// Keep the last instance of each macro name by going in reverse
auto NewEnd = std::unique(
SimpleNames.rbegin(), SimpleNames.rend(),
[](const MacroOpt &A, const MacroOpt &B) { return A.first == B.first; });
SimpleNames.erase(SimpleNames.begin(), NewEnd.base());

// Apply permutation.
decltype(PPOpts.Macros) NewMacros;
NewMacros.reserve(SimpleNames.size());
for (std::size_t I = 0, E = SimpleNames.size(); I != E; ++I) {
std::size_t OriginalIndex = SimpleNames[I].second;
// We still emit undefines here as they may be undefining a predefined macro
NewMacros.push_back(std::move(PPOpts.Macros[OriginalIndex]));
}
std::swap(PPOpts.Macros, NewMacros);
}

/// A clang tool that runs the preprocessor in a mode that's optimized for
/// dependency scanning for the given compiler invocation.
class DependencyScanningAction : public tooling::ToolAction {
Expand All @@ -203,6 +275,8 @@ class DependencyScanningAction : public tooling::ToolAction {
CompilerInvocation OriginalInvocation(*Invocation);
// Restore the value of DisableFree, which may be modified by Tooling.
OriginalInvocation.getFrontendOpts().DisableFree = DisableFree;
if (any(OptimizeArgs & ScanningOptimizations::Macros))
canonicalizeDefines(OriginalInvocation.getPreprocessorOpts());

if (Scanned) {
// Scanning runs once for the first -cc1 invocation in a chain of driver
Expand Down
87 changes: 87 additions & 0 deletions clang/test/ClangScanDeps/optimize-canonicalize-macros.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// This test verifies that command lines with equivalent -D and -U arguments
// are canonicalized to the same module variant.

// RUN: rm -rf %t
// RUN: split-file %s %t
// RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > %t/build/compile-commands.json
// RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \
// RUN: -j 1 -format experimental-full -optimize-args=canonicalize-macros > %t/deps.db
// RUN: cat %t/deps.db | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t

// Verify that there are only two variants and that the expected merges have
// happened.

// CHECK: {
// CHECK-NEXT: "modules": [
// CHECK-NEXT: {
// CHECK-NEXT: "clang-module-deps": [],
// CHECK-NEXT: "clang-modulemap-file":
// CHECK-NEXT: "command-line": [
// CHECK-NOT: "J=1"
// CHECK-NOT: "J"
// CHECK-NOT: "K"
// CHECK: ],
// CHECK-NEXT: "context-hash": "{{.*}}",
// CHECK-NEXT: "file-deps": [
// CHECK: ],
// CHECK-NEXT: "name": "A"
// CHECK-NEXT: },
// CHECK-NEXT: {
// CHECK-NEXT: "clang-module-deps": [],
// CHECK-NEXT: "clang-modulemap-file":
// CHECK-NEXT: "command-line": [
// CHECK: "Fඞ"
// CHECK: "F\\u{0D9E}"
// CHECK: "K"
// CHECK: "K"
// CHECK: ],
// CHECK-NEXT: "context-hash": "{{.*}}",
// CHECK-NEXT: "file-deps": [
// CHECK: ],
// CHECK-NEXT: "name": "A"
// CHECK-NEXT: }
// CHECK-NEXT: ],
// CHECK-NEXT: "translation-units": [
// CHECK: ]
// CHECK: }


//--- build/compile-commands.json.in

[
{
"directory": "DIR",
"command": "clang -c DIR/tu0.m -DJ=1 -UJ -DJ=2 -DI -DK(x)=x -I modules/A -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
"file": "DIR/tu0.m"
},
{
"directory": "DIR",
"command": "clang -c DIR/tu1.m -DK -DK(x)=x -DI -D \"J=2\" -I modules/A -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
"file": "DIR/tu1.m"
},
{
"directory": "DIR",
"command": "clang -c DIR/tu2.m -I modules/A -DFඞ '-DF\\u{0D9E}' -DK -DK -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
"file": "DIR/tu2.m"
}
]

//--- modules/A/module.modulemap

module A {
umbrella header "A.h"
}

//--- modules/A/A.h

//--- tu0.m

#include <A.h>

//--- tu1.m

#include <A.h>

//--- tu2.m

#include <A.h>
1 change: 1 addition & 0 deletions clang/tools/clang-scan-deps/ClangScanDeps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ static void ParseArgs(int argc, char **argv) {
.Case("header-search", ScanningOptimizations::HeaderSearch)
.Case("system-warnings", ScanningOptimizations::SystemWarnings)
.Case("vfs", ScanningOptimizations::VFS)
.Case("canonicalize-macros", ScanningOptimizations::Macros)
.Case("all", ScanningOptimizations::All)
.Default(std::nullopt);
if (!Optimization) {
Expand Down