Skip to content

Commit 8dea05d

Browse files
committed
[clang][ScanDeps] Canonicalize -D and -U flags
Canonicalize `-D` and `-U` flags by sorting them and only keeping the last instance of a given name. This optimization will only fire if all `-D` and `-U` flags start with a simple identifier that we can guarantee a simple analysis of can determine if two flags refer to the same identifier or not. See the comment on `getSimpleMacroName()` for details of what the issues are. Cherry pick of: llvm#82568
1 parent e27fdb1 commit 8dea05d

File tree

4 files changed

+166
-1
lines changed

4 files changed

+166
-1
lines changed

clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,10 @@ enum class ScanningOptimizations {
7676
/// Remove unused -ivfsoverlay arguments.
7777
VFS = 4,
7878

79-
DSS_LAST_BITMASK_ENUM(VFS),
79+
/// Canonicalize -D and -U options.
80+
Macros = 8,
81+
82+
DSS_LAST_BITMASK_ENUM(Macros),
8083
Default = All
8184
};
8285

clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,78 @@ static void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) {
197197
DiagOpts.IgnoreWarnings = true;
198198
}
199199

200+
// Clang implements -D and -U by splatting text into a predefines buffer. This
201+
// allows constructs such as `-DFඞ=3 "-D F\u{0D9E} 4 3 2”` to be accepted and
202+
// define the same macro, or adding C++ style comments before the macro name.
203+
//
204+
// This function checks that the first non-space characters in the macro
205+
// obviously form an identifier that can be uniqued on without lexing. Failing
206+
// to do this could lead to changing the final definition of a macro.
207+
//
208+
// We could set up a preprocessor and actually lex the name, but that's very
209+
// heavyweight for a situation that will almost never happen in practice.
210+
static std::optional<StringRef> getSimpleMacroName(StringRef Macro) {
211+
StringRef Name = Macro.split("=").first.ltrim(" \t");
212+
std::size_t I = 0;
213+
214+
auto FinishName = [&]() -> std::optional<StringRef> {
215+
StringRef SimpleName = Name.slice(0, I);
216+
if (SimpleName.empty())
217+
return std::nullopt;
218+
return SimpleName;
219+
};
220+
221+
for (; I != Name.size(); ++I) {
222+
switch (Name[I]) {
223+
case '(': // Start of macro parameter list
224+
case ' ': // End of macro name
225+
case '\t':
226+
return FinishName();
227+
case '_':
228+
continue;
229+
default:
230+
if (llvm::isAlnum(Name[I]))
231+
continue;
232+
return std::nullopt;
233+
}
234+
}
235+
return FinishName();
236+
}
237+
238+
static void canonicalizeDefines(PreprocessorOptions &PPOpts) {
239+
using MacroOpt = std::pair<StringRef, std::size_t>;
240+
std::vector<MacroOpt> SimpleNames;
241+
SimpleNames.reserve(PPOpts.Macros.size());
242+
std::size_t Index = 0;
243+
for (const auto &M : PPOpts.Macros) {
244+
auto SName = getSimpleMacroName(M.first);
245+
// Skip optimizing if we can't guarantee we can preserve relative order.
246+
if (!SName)
247+
return;
248+
SimpleNames.emplace_back(*SName, Index);
249+
++Index;
250+
}
251+
252+
llvm::stable_sort(SimpleNames, [](const MacroOpt &A, const MacroOpt &B) {
253+
return A.first < B.first;
254+
});
255+
// Keep the last instance of each macro name by going in reverse
256+
auto NewEnd = std::unique(
257+
SimpleNames.rbegin(), SimpleNames.rend(),
258+
[](const MacroOpt &A, const MacroOpt &B) { return A.first == B.first; });
259+
SimpleNames.erase(SimpleNames.begin(), NewEnd.base());
260+
261+
// Apply permutation.
262+
decltype(PPOpts.Macros) NewMacros;
263+
NewMacros.reserve(SimpleNames.size());
264+
for (std::size_t I = 0, E = SimpleNames.size(); I != E; ++I) {
265+
std::size_t OriginalIndex = SimpleNames[I].second;
266+
// We still emit undefines here as they may be undefining a predefined macro
267+
NewMacros.push_back(std::move(PPOpts.Macros[OriginalIndex]));
268+
}
269+
std::swap(PPOpts.Macros, NewMacros);
270+
}
271+
200272
/// Builds a dependency file after reversing prefix mappings. This allows
201273
/// emitting a .d file that has real paths where they would otherwise be
202274
/// canonicalized.
@@ -322,6 +394,8 @@ class DependencyScanningAction : public tooling::ToolAction {
322394
CompilerInvocation OriginalInvocation(*Invocation);
323395
// Restore the value of DisableFree, which may be modified by Tooling.
324396
OriginalInvocation.getFrontendOpts().DisableFree = DisableFree;
397+
if (any(OptimizeArgs & ScanningOptimizations::Macros))
398+
canonicalizeDefines(OriginalInvocation.getPreprocessorOpts());
325399

326400
if (Scanned) {
327401
// Scanning runs once for the first -cc1 invocation in a chain of driver
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
// This test verifies that command lines with equivalent -D and -U arguments
2+
// are canonicalized to the same module variant.
3+
4+
// RUN: rm -rf %t
5+
// RUN: split-file %s %t
6+
// RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > %t/build/compile-commands.json
7+
// RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \
8+
// RUN: -j 1 -format experimental-full -optimize-args=canonicalize-macros > %t/deps.db
9+
// RUN: cat %t/deps.db | FileCheck %s -DPREFIX=%/t
10+
11+
// Verify that there are only two variants and that the expected merges have
12+
// happened.
13+
14+
// CHECK: {
15+
// CHECK-NEXT: "modules": [
16+
// CHECK-NEXT: {
17+
// CHECK-NEXT: "clang-module-deps": [],
18+
// CHECK-NEXT: "clang-modulemap-file":
19+
// CHECK-NEXT: "command-line": [
20+
// CHECK-NOT: "J=1"
21+
// CHECK-NOT: "J"
22+
// CHECK-NOT: "K"
23+
// CHECK: ],
24+
// CHECK-NEXT: "context-hash": "{{.*}}",
25+
// CHECK-NEXT: "file-deps": [
26+
// CHECK: ],
27+
// CHECK-NEXT: "name": "A"
28+
// CHECK-NEXT: },
29+
// CHECK-NEXT: {
30+
// CHECK-NEXT: "clang-module-deps": [],
31+
// CHECK-NEXT: "clang-modulemap-file":
32+
// CHECK-NEXT: "command-line": [
33+
// CHECK: "Fඞ"
34+
// CHECK: 0D9E
35+
// CHECK: "K"
36+
// CHECK: "K"
37+
// CHECK: ],
38+
// CHECK-NEXT: "context-hash": "{{.*}}",
39+
// CHECK-NEXT: "file-deps": [
40+
// CHECK: ],
41+
// CHECK-NEXT: "name": "A"
42+
// CHECK-NEXT: }
43+
// CHECK-NEXT: ],
44+
// CHECK-NEXT: "translation-units": [
45+
// CHECK: ]
46+
// CHECK: }
47+
48+
49+
//--- build/compile-commands.json.in
50+
51+
[
52+
{
53+
"directory": "DIR",
54+
"command": "clang -c DIR/tu0.m -DJ=1 -UJ -DJ=2 -DI -DK(x)=x -I modules/A -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
55+
"file": "DIR/tu0.m"
56+
},
57+
{
58+
"directory": "DIR",
59+
"command": "clang -c DIR/tu1.m -DK -DK(x)=x -DI -D \"J=2\" -I modules/A -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
60+
"file": "DIR/tu1.m"
61+
},
62+
{
63+
"directory": "DIR",
64+
"command": "clang -c DIR/tu2.m -I modules/A -DFඞ \"-DF\\\\u{0D9E}\" -DK -DK -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps -v",
65+
"file": "DIR/tu2.m"
66+
}
67+
]
68+
69+
//--- modules/A/module.modulemap
70+
71+
module A {
72+
umbrella header "A.h"
73+
}
74+
75+
//--- modules/A/A.h
76+
77+
//--- tu0.m
78+
79+
#include <A.h>
80+
81+
//--- tu1.m
82+
83+
#include <A.h>
84+
85+
//--- tu2.m
86+
87+
#include <A.h>

clang/tools/clang-scan-deps/ClangScanDeps.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ static void ParseArgs(int argc, char **argv) {
178178
.Case("header-search", ScanningOptimizations::HeaderSearch)
179179
.Case("system-warnings", ScanningOptimizations::SystemWarnings)
180180
.Case("vfs", ScanningOptimizations::VFS)
181+
.Case("canonicalize-macros", ScanningOptimizations::Macros)
181182
.Case("all", ScanningOptimizations::All)
182183
.Default(std::nullopt);
183184
if (!Optimization) {

0 commit comments

Comments
 (0)