Skip to content

Commit b60972e

Browse files
committed
Canonicalize -D and -U flags
Canonicalize `-D` and `-U` flags by sorting them and only keeping the last instance of a given name. This optimization will only fire if all `-D` and `-U` flags start with a simple identifier that we can guarantee a simple analysis of can determine if two flags refer to the same identifier or not. See the comment on `getSimpleMacroName()` for details of what the issues are.
1 parent 54b014b commit b60972e

File tree

4 files changed

+166
-1
lines changed

4 files changed

+166
-1
lines changed

clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,10 @@ enum class ScanningOptimizations {
6060
/// Remove unused -ivfsoverlay arguments.
6161
VFS = 4,
6262

63-
DSS_LAST_BITMASK_ENUM(VFS),
63+
/// Canonicalize -D and -U options.
64+
Macros = 8,
65+
66+
DSS_LAST_BITMASK_ENUM(Macros),
6467
Default = All
6568
};
6669

clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,78 @@ static void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) {
179179
DiagOpts.IgnoreWarnings = true;
180180
}
181181

182+
// Clang implements -D and -U by splatting text into a predefines buffer. This
183+
// allows constructs such as `-DFඞ=3 "-D F\u{0D9E} 4 3 2”` to be accepted and
184+
// define the same macro, or adding C++ style comments before the macro name.
185+
//
186+
// This function checks that the first non-space characters in the macro
187+
// obviously form an identifier that can be uniqued on without lexing. Failing
188+
// to do this could lead to changing the final definition of a macro.
189+
//
190+
// We could set up a preprocessor and actually lex the name, but that's very
191+
// heavyweight for a situation that will almost never happen in practice.
192+
static std::optional<StringRef> getSimpleMacroName(StringRef Macro) {
193+
StringRef Name = Macro.split("=").first.ltrim(" \t");
194+
std::size_t I = 0;
195+
196+
auto FinishName = [&]() -> std::optional<StringRef> {
197+
StringRef SimpleName = Name.slice(0, I);
198+
if (SimpleName.empty())
199+
return std::nullopt;
200+
return SimpleName;
201+
};
202+
203+
for (; I != Name.size(); ++I) {
204+
switch (Name[I]) {
205+
case '(': // Start of macro parameter list
206+
case ' ': // End of macro name
207+
case '\t':
208+
return FinishName();
209+
case '_':
210+
continue;
211+
default:
212+
if (llvm::isAlnum(Name[I]))
213+
continue;
214+
return std::nullopt;
215+
}
216+
}
217+
return FinishName();
218+
}
219+
220+
static void canonicalizeDefines(PreprocessorOptions &PPOpts) {
221+
using MacroOpt = std::pair<StringRef, std::size_t>;
222+
std::vector<MacroOpt> SimpleNames;
223+
SimpleNames.reserve(PPOpts.Macros.size());
224+
std::size_t Index = 0;
225+
for (const auto &M : PPOpts.Macros) {
226+
auto SName = getSimpleMacroName(M.first);
227+
// Skip optimizing if we can't guarantee we can preserve relative order.
228+
if (!SName)
229+
return;
230+
SimpleNames.emplace_back(*SName, Index);
231+
++Index;
232+
}
233+
234+
llvm::stable_sort(SimpleNames, [](const MacroOpt &A, const MacroOpt &B) {
235+
return A.first < B.first;
236+
});
237+
// Keep the last instance of each macro name by going in reverse
238+
auto NewEnd = std::unique(
239+
SimpleNames.rbegin(), SimpleNames.rend(),
240+
[](const MacroOpt &A, const MacroOpt &B) { return A.first == B.first; });
241+
SimpleNames.erase(SimpleNames.begin(), NewEnd.base());
242+
243+
// Apply permutation.
244+
decltype(PPOpts.Macros) NewMacros;
245+
NewMacros.reserve(SimpleNames.size());
246+
for (std::size_t I = 0, E = SimpleNames.size(); I != E; ++I) {
247+
std::size_t OriginalIndex = SimpleNames[I].second;
248+
// We still emit undefines here as they may be undefining a predefined macro
249+
NewMacros.push_back(std::move(PPOpts.Macros[OriginalIndex]));
250+
}
251+
std::swap(PPOpts.Macros, NewMacros);
252+
}
253+
182254
/// A clang tool that runs the preprocessor in a mode that's optimized for
183255
/// dependency scanning for the given compiler invocation.
184256
class DependencyScanningAction : public tooling::ToolAction {
@@ -203,6 +275,8 @@ class DependencyScanningAction : public tooling::ToolAction {
203275
CompilerInvocation OriginalInvocation(*Invocation);
204276
// Restore the value of DisableFree, which may be modified by Tooling.
205277
OriginalInvocation.getFrontendOpts().DisableFree = DisableFree;
278+
if (any(OptimizeArgs & ScanningOptimizations::Macros))
279+
canonicalizeDefines(OriginalInvocation.getPreprocessorOpts());
206280

207281
if (Scanned) {
208282
// Scanning runs once for the first -cc1 invocation in a chain of driver
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
// This test verifies that command lines with equivalent -D and -U arguments
2+
// are canonicalized to the same module variant.
3+
4+
// RUN: rm -rf %t
5+
// RUN: split-file %s %t
6+
// RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > %t/build/compile-commands.json
7+
// RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \
8+
// RUN: -j 1 -format experimental-full -optimize-args=canonicalize-macros > %t/deps.db
9+
// RUN: cat %t/deps.db | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t
10+
11+
// Verify that there are only two variants and that the expected merges have
12+
// happened.
13+
14+
// CHECK: {
15+
// CHECK-NEXT: "modules": [
16+
// CHECK-NEXT: {
17+
// CHECK-NEXT: "clang-module-deps": [],
18+
// CHECK-NEXT: "clang-modulemap-file":
19+
// CHECK-NEXT: "command-line": [
20+
// CHECK-NOT: "J=1"
21+
// CHECK-NOT: "J"
22+
// CHECK-NOT: "K"
23+
// CHECK: ],
24+
// CHECK-NEXT: "context-hash": "{{.*}}",
25+
// CHECK-NEXT: "file-deps": [
26+
// CHECK: ],
27+
// CHECK-NEXT: "name": "A"
28+
// CHECK-NEXT: },
29+
// CHECK-NEXT: {
30+
// CHECK-NEXT: "clang-module-deps": [],
31+
// CHECK-NEXT: "clang-modulemap-file":
32+
// CHECK-NEXT: "command-line": [
33+
// CHECK: "Fඞ"
34+
// CHECK: "F\\u{0D9E}"
35+
// CHECK: "K"
36+
// CHECK: "K"
37+
// CHECK: ],
38+
// CHECK-NEXT: "context-hash": "{{.*}}",
39+
// CHECK-NEXT: "file-deps": [
40+
// CHECK: ],
41+
// CHECK-NEXT: "name": "A"
42+
// CHECK-NEXT: }
43+
// CHECK-NEXT: ],
44+
// CHECK-NEXT: "translation-units": [
45+
// CHECK: ]
46+
// CHECK: }
47+
48+
49+
//--- build/compile-commands.json.in
50+
51+
[
52+
{
53+
"directory": "DIR",
54+
"command": "clang -c DIR/tu0.m -DJ=1 -UJ -DJ=2 -DI -DK(x)=x -I modules/A -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
55+
"file": "DIR/tu0.m"
56+
},
57+
{
58+
"directory": "DIR",
59+
"command": "clang -c DIR/tu1.m -DK -DK(x)=x -DI -D \"J=2\" -I modules/A -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
60+
"file": "DIR/tu1.m"
61+
},
62+
{
63+
"directory": "DIR",
64+
"command": "clang -c DIR/tu2.m -I modules/A -DFඞ '-DF\\u{0D9E}' -DK -DK -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
65+
"file": "DIR/tu2.m"
66+
}
67+
]
68+
69+
//--- modules/A/module.modulemap
70+
71+
module A {
72+
umbrella header "A.h"
73+
}
74+
75+
//--- modules/A/A.h
76+
77+
//--- tu0.m
78+
79+
#include <A.h>
80+
81+
//--- tu1.m
82+
83+
#include <A.h>
84+
85+
//--- tu2.m
86+
87+
#include <A.h>

clang/tools/clang-scan-deps/ClangScanDeps.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ static void ParseArgs(int argc, char **argv) {
157157
.Case("header-search", ScanningOptimizations::HeaderSearch)
158158
.Case("system-warnings", ScanningOptimizations::SystemWarnings)
159159
.Case("vfs", ScanningOptimizations::VFS)
160+
.Case("canonicalize-macros", ScanningOptimizations::Macros)
160161
.Case("all", ScanningOptimizations::All)
161162
.Default(std::nullopt);
162163
if (!Optimization) {

0 commit comments

Comments
 (0)