Skip to content

Commit c89bcfd

Browse files
committed
Canonicalize -D and -U flags
Canonicalize `-D` and `-U` flags by sorting them and only keeping the last instance of a given name. This optimization will only fire if all `-D` and `-U` flags start with a simple identifier that we can guarantee a simple analysis of can determine if two flags refer to the same identifier or not. See the comment on `getSimpleMacroName()` for details of what the issues are.
1 parent 61ae7e4 commit c89bcfd

File tree

4 files changed

+161
-1
lines changed

4 files changed

+161
-1
lines changed

clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,10 @@ enum class ScanningOptimizations {
6060
/// Remove unused -ivfsoverlay arguments.
6161
VFS = 4,
6262

63-
DSS_LAST_BITMASK_ENUM(VFS),
63+
/// Canonicalize -D and -U options.
64+
Macros = 8,
65+
66+
DSS_LAST_BITMASK_ENUM(Macros),
6467
Default = All
6568
};
6669

clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,73 @@ static void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) {
179179
DiagOpts.IgnoreWarnings = true;
180180
}
181181

182+
// Clang implements -D and -U by splatting text into a predefines buffer. This
183+
// allows constructs such as `-DFඞ=3 "-D F\u{0D9E} 4 3 2”` to be accepted and
184+
// define the same macro, or adding C++ style comments before the macro name.
185+
//
186+
// This function checks that the first non-space characters in the macro
187+
// obviously form an identifier that can be uniqued on without lexing. Failing
188+
// to do this could lead to changing the final definition of a macro.
189+
//
190+
// We could set up a preprocessor and actually lex the name, but that's very
191+
// heavyweight for a situation that will almost never happen in practice.
192+
static std::optional<StringRef> getSimpleMacroName(StringRef Macro) {
193+
StringRef Name = Macro.split("=").first.trim(" \t");
194+
std::size_t I = 0;
195+
for (; I != Name.size(); ++I) {
196+
switch (Name[I]) {
197+
case '(': // Start of macro parameter list
198+
case ' ': // End of macro name
199+
case '\t':
200+
goto EndOfMacro;
201+
case '_':
202+
continue;
203+
default:
204+
if (llvm::isAlnum(Name[I]))
205+
continue;
206+
return std::nullopt;
207+
}
208+
}
209+
EndOfMacro:
210+
StringRef SimpleName = Name.slice(0, I);
211+
if (SimpleName.empty())
212+
return std::nullopt;
213+
return SimpleName;
214+
}
215+
216+
static void canonicalizeDefines(PreprocessorOptions &PPOpts) {
217+
using MacroOpt = std::pair<StringRef, std::size_t>;
218+
std::vector<MacroOpt> SimpleNames;
219+
SimpleNames.reserve(PPOpts.Macros.size());
220+
std::size_t Index = 0;
221+
for (const auto &M : PPOpts.Macros) {
222+
auto SName = getSimpleMacroName(M.first);
223+
// Skip optimizing if we can't guarantee we can preserve relative order.
224+
if (!SName)
225+
return;
226+
SimpleNames.emplace_back(*SName, Index);
227+
++Index;
228+
}
229+
230+
llvm::stable_sort(SimpleNames, [](const MacroOpt &A, const MacroOpt &B) {
231+
return A.first < B.first;
232+
});
233+
// Keep the last instance of each macro name by going in reverse
234+
auto NewEnd = std::unique(SimpleNames.rbegin(), SimpleNames.rend(), [](const MacroOpt &A, const MacroOpt &B) {
235+
return A.first == B.first;
236+
});
237+
SimpleNames.erase(SimpleNames.begin(), NewEnd.base());
238+
239+
// Apply permutation.
240+
decltype(PPOpts.Macros) NewMacros;
241+
NewMacros.reserve(SimpleNames.size());
242+
for (std::size_t I = 0, E = SimpleNames.size(); I != E; ++I) {
243+
std::size_t OriginalIndex = SimpleNames[I].second;
244+
NewMacros.push_back(std::move(PPOpts.Macros[OriginalIndex]));
245+
}
246+
std::swap(PPOpts.Macros, NewMacros);
247+
}
248+
182249
/// A clang tool that runs the preprocessor in a mode that's optimized for
183250
/// dependency scanning for the given compiler invocation.
184251
class DependencyScanningAction : public tooling::ToolAction {
@@ -203,6 +270,8 @@ class DependencyScanningAction : public tooling::ToolAction {
203270
CompilerInvocation OriginalInvocation(*Invocation);
204271
// Restore the value of DisableFree, which may be modified by Tooling.
205272
OriginalInvocation.getFrontendOpts().DisableFree = DisableFree;
273+
if (any(OptimizeArgs & ScanningOptimizations::Macros))
274+
canonicalizeDefines(OriginalInvocation.getPreprocessorOpts());
206275

207276
if (Scanned) {
208277
// Scanning runs once for the first -cc1 invocation in a chain of driver
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
// This test verifies that command lines with equivalent -D and -U arguments
2+
// are canonicalized to the same module variant.
3+
4+
// RUN: rm -rf %t
5+
// RUN: split-file %s %t
6+
// RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > %t/build/compile-commands.json
7+
// RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \
8+
// RUN: -j 1 -format experimental-full -optimize-args=canonicalize-macros > %t/deps.db
9+
// RUN: cat %t/deps.db | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t
10+
11+
// Verify that there are only two variants and that the expected merges have
12+
// happened.
13+
14+
// CHECK: {
15+
// CHECK-NEXT: "modules": [
16+
// CHECK-NEXT: {
17+
// CHECK-NEXT: "clang-module-deps": [],
18+
// CHECK-NEXT: "clang-modulemap-file":
19+
// CHECK-NEXT: "command-line": [
20+
// CHECK-NOT: "J=1"
21+
// CHECK-NOT: "J"
22+
// CHECK-NOT: "K"
23+
// CHECK: ],
24+
// CHECK-NEXT: "context-hash": "{{.*}}",
25+
// CHECK-NEXT: "file-deps": [
26+
// CHECK: ],
27+
// CHECK-NEXT: "name": "A"
28+
// CHECK-NEXT: },
29+
// CHECK-NEXT: {
30+
// CHECK-NEXT: "clang-module-deps": [],
31+
// CHECK-NEXT: "clang-modulemap-file":
32+
// CHECK-NEXT: "command-line": [
33+
// CHECK: "Fඞ"
34+
// CHECK: "F\\u{0D9E}"
35+
// CHECK: "K"
36+
// CHECK: "K"
37+
// CHECK: ],
38+
// CHECK-NEXT: "context-hash": "{{.*}}",
39+
// CHECK-NEXT: "file-deps": [
40+
// CHECK: ],
41+
// CHECK-NEXT: "name": "A"
42+
// CHECK-NEXT: }
43+
// CHECK-NEXT: ],
44+
// CHECK-NEXT: "translation-units": [
45+
// CHECK: ]
46+
// CHECK: }
47+
48+
49+
//--- build/compile-commands.json.in
50+
51+
[
52+
{
53+
"directory": "DIR",
54+
"command": "clang -c DIR/tu0.m -DJ=1 -UJ -DJ=2 -DI -DK(x)=x -I modules/A -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
55+
"file": "DIR/tu0.m"
56+
},
57+
{
58+
"directory": "DIR",
59+
"command": "clang -c DIR/tu1.m -DK -DK(x)=x -DI -D \"J=2\" -I modules/A -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
60+
"file": "DIR/tu1.m"
61+
},
62+
{
63+
"directory": "DIR",
64+
"command": "clang -c DIR/tu2.m -I modules/A -DFඞ '-DF\\u{0D9E}' -DK -DK -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
65+
"file": "DIR/tu2.m"
66+
}
67+
]
68+
69+
//--- modules/A/module.modulemap
70+
71+
module A {
72+
umbrella header "A.h"
73+
}
74+
75+
//--- modules/A/A.h
76+
77+
//--- tu0.m
78+
79+
#include <A.h>
80+
81+
//--- tu1.m
82+
83+
#include <A.h>
84+
85+
//--- tu2.m
86+
87+
#include <A.h>

clang/tools/clang-scan-deps/ClangScanDeps.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ static void ParseArgs(int argc, char **argv) {
157157
.Case("header-search", ScanningOptimizations::HeaderSearch)
158158
.Case("system-warnings", ScanningOptimizations::SystemWarnings)
159159
.Case("vfs", ScanningOptimizations::VFS)
160+
.Case("canonicalize-macros", ScanningOptimizations::Macros)
160161
.Case("all", ScanningOptimizations::All)
161162
.Default(std::nullopt);
162163
if (!Optimization) {

0 commit comments

Comments
 (0)