Skip to content

Commit d42de86

Browse files
authored
reland: [clang][ScanDeps] Canonicalize -D and -U flags (llvm#82568)
Canonicalize `-D` and `-U` flags by sorting them and only keeping the last instance of a given name. This optimization will only fire if all `-D` and `-U` flags start with a simple identifier that we can guarantee a simple analysis of can determine if two flags refer to the same identifier or not. See the comment on `getSimpleMacroName()` for details of what the issues are. Previous version of this had issues with sed differences between macOS, Linux, and Windows. This test doesn't check paths, so just don't run sed. Other tests should use `sed -E 's:\\\\?:/:g'` to get portable behavior. Windows has different command line parsing behavior than Linux for compilation databases, so the test has been adjusted to ignore that difference.
1 parent 8f2bd8a commit d42de86

File tree

4 files changed

+166
-1
lines changed

4 files changed

+166
-1
lines changed

clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,10 @@ enum class ScanningOptimizations {
6060
/// Remove unused -ivfsoverlay arguments.
6161
VFS = 4,
6262

63-
DSS_LAST_BITMASK_ENUM(VFS),
63+
/// Canonicalize -D and -U options.
64+
Macros = 8,
65+
66+
DSS_LAST_BITMASK_ENUM(Macros),
6467
Default = All
6568
};
6669

clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,78 @@ static void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) {
179179
DiagOpts.IgnoreWarnings = true;
180180
}
181181

182+
// Clang implements -D and -U by splatting text into a predefines buffer. This
183+
// allows constructs such as `-DFඞ=3 "-D F\u{0D9E} 4 3 2”` to be accepted and
184+
// define the same macro, or adding C++ style comments before the macro name.
185+
//
186+
// This function checks that the first non-space characters in the macro
187+
// obviously form an identifier that can be uniqued on without lexing. Failing
188+
// to do this could lead to changing the final definition of a macro.
189+
//
190+
// We could set up a preprocessor and actually lex the name, but that's very
191+
// heavyweight for a situation that will almost never happen in practice.
192+
static std::optional<StringRef> getSimpleMacroName(StringRef Macro) {
193+
StringRef Name = Macro.split("=").first.ltrim(" \t");
194+
std::size_t I = 0;
195+
196+
auto FinishName = [&]() -> std::optional<StringRef> {
197+
StringRef SimpleName = Name.slice(0, I);
198+
if (SimpleName.empty())
199+
return std::nullopt;
200+
return SimpleName;
201+
};
202+
203+
for (; I != Name.size(); ++I) {
204+
switch (Name[I]) {
205+
case '(': // Start of macro parameter list
206+
case ' ': // End of macro name
207+
case '\t':
208+
return FinishName();
209+
case '_':
210+
continue;
211+
default:
212+
if (llvm::isAlnum(Name[I]))
213+
continue;
214+
return std::nullopt;
215+
}
216+
}
217+
return FinishName();
218+
}
219+
220+
static void canonicalizeDefines(PreprocessorOptions &PPOpts) {
221+
using MacroOpt = std::pair<StringRef, std::size_t>;
222+
std::vector<MacroOpt> SimpleNames;
223+
SimpleNames.reserve(PPOpts.Macros.size());
224+
std::size_t Index = 0;
225+
for (const auto &M : PPOpts.Macros) {
226+
auto SName = getSimpleMacroName(M.first);
227+
// Skip optimizing if we can't guarantee we can preserve relative order.
228+
if (!SName)
229+
return;
230+
SimpleNames.emplace_back(*SName, Index);
231+
++Index;
232+
}
233+
234+
llvm::stable_sort(SimpleNames, [](const MacroOpt &A, const MacroOpt &B) {
235+
return A.first < B.first;
236+
});
237+
// Keep the last instance of each macro name by going in reverse
238+
auto NewEnd = std::unique(
239+
SimpleNames.rbegin(), SimpleNames.rend(),
240+
[](const MacroOpt &A, const MacroOpt &B) { return A.first == B.first; });
241+
SimpleNames.erase(SimpleNames.begin(), NewEnd.base());
242+
243+
// Apply permutation.
244+
decltype(PPOpts.Macros) NewMacros;
245+
NewMacros.reserve(SimpleNames.size());
246+
for (std::size_t I = 0, E = SimpleNames.size(); I != E; ++I) {
247+
std::size_t OriginalIndex = SimpleNames[I].second;
248+
// We still emit undefines here as they may be undefining a predefined macro
249+
NewMacros.push_back(std::move(PPOpts.Macros[OriginalIndex]));
250+
}
251+
std::swap(PPOpts.Macros, NewMacros);
252+
}
253+
182254
/// A clang tool that runs the preprocessor in a mode that's optimized for
183255
/// dependency scanning for the given compiler invocation.
184256
class DependencyScanningAction : public tooling::ToolAction {
@@ -203,6 +275,8 @@ class DependencyScanningAction : public tooling::ToolAction {
203275
CompilerInvocation OriginalInvocation(*Invocation);
204276
// Restore the value of DisableFree, which may be modified by Tooling.
205277
OriginalInvocation.getFrontendOpts().DisableFree = DisableFree;
278+
if (any(OptimizeArgs & ScanningOptimizations::Macros))
279+
canonicalizeDefines(OriginalInvocation.getPreprocessorOpts());
206280

207281
if (Scanned) {
208282
// Scanning runs once for the first -cc1 invocation in a chain of driver
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
// This test verifies that command lines with equivalent -D and -U arguments
2+
// are canonicalized to the same module variant.
3+
4+
// RUN: rm -rf %t
5+
// RUN: split-file %s %t
6+
// RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > %t/build/compile-commands.json
7+
// RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \
8+
// RUN: -j 1 -format experimental-full -optimize-args=canonicalize-macros > %t/deps.db
9+
// RUN: cat %t/deps.db | FileCheck %s -DPREFIX=%/t
10+
11+
// Verify that there are only two variants and that the expected merges have
12+
// happened.
13+
14+
// CHECK: {
15+
// CHECK-NEXT: "modules": [
16+
// CHECK-NEXT: {
17+
// CHECK-NEXT: "clang-module-deps": [],
18+
// CHECK-NEXT: "clang-modulemap-file":
19+
// CHECK-NEXT: "command-line": [
20+
// CHECK-NOT: "J=1"
21+
// CHECK-NOT: "J"
22+
// CHECK-NOT: "K"
23+
// CHECK: ],
24+
// CHECK-NEXT: "context-hash": "{{.*}}",
25+
// CHECK-NEXT: "file-deps": [
26+
// CHECK: ],
27+
// CHECK-NEXT: "name": "A"
28+
// CHECK-NEXT: },
29+
// CHECK-NEXT: {
30+
// CHECK-NEXT: "clang-module-deps": [],
31+
// CHECK-NEXT: "clang-modulemap-file":
32+
// CHECK-NEXT: "command-line": [
33+
// CHECK: "Fඞ"
34+
// CHECK: 0D9E
35+
// CHECK: "K"
36+
// CHECK: "K"
37+
// CHECK: ],
38+
// CHECK-NEXT: "context-hash": "{{.*}}",
39+
// CHECK-NEXT: "file-deps": [
40+
// CHECK: ],
41+
// CHECK-NEXT: "name": "A"
42+
// CHECK-NEXT: }
43+
// CHECK-NEXT: ],
44+
// CHECK-NEXT: "translation-units": [
45+
// CHECK: ]
46+
// CHECK: }
47+
48+
49+
//--- build/compile-commands.json.in
50+
51+
[
52+
{
53+
"directory": "DIR",
54+
"command": "clang -c DIR/tu0.m -DJ=1 -UJ -DJ=2 -DI -DK(x)=x -I modules/A -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
55+
"file": "DIR/tu0.m"
56+
},
57+
{
58+
"directory": "DIR",
59+
"command": "clang -c DIR/tu1.m -DK -DK(x)=x -DI -D \"J=2\" -I modules/A -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
60+
"file": "DIR/tu1.m"
61+
},
62+
{
63+
"directory": "DIR",
64+
"command": "clang -c DIR/tu2.m -I modules/A -DFඞ \"-DF\\\\u{0D9E}\" -DK -DK -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
65+
"file": "DIR/tu2.m"
66+
}
67+
]
68+
69+
//--- modules/A/module.modulemap
70+
71+
module A {
72+
umbrella header "A.h"
73+
}
74+
75+
//--- modules/A/A.h
76+
77+
//--- tu0.m
78+
79+
#include <A.h>
80+
81+
//--- tu1.m
82+
83+
#include <A.h>
84+
85+
//--- tu2.m
86+
87+
#include <A.h>

clang/tools/clang-scan-deps/ClangScanDeps.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ static void ParseArgs(int argc, char **argv) {
157157
.Case("header-search", ScanningOptimizations::HeaderSearch)
158158
.Case("system-warnings", ScanningOptimizations::SystemWarnings)
159159
.Case("vfs", ScanningOptimizations::VFS)
160+
.Case("canonicalize-macros", ScanningOptimizations::Macros)
160161
.Case("all", ScanningOptimizations::All)
161162
.Default(std::nullopt);
162163
if (!Optimization) {

0 commit comments

Comments
 (0)