Skip to content

Commit 54acda2

Browse files
authored
[clang module] Current Working Directory Pruning (#124786)
When computing the context hash, `clang` always includes the compiler's working directory. This can lead to situations when the only difference between two compilations is the working directory, different module variants are generated. These variants are redundant. This PR implements an optimization that ignores the working directory when computing the context hash when safe. Specifically, `clang` checks if it is safe to ignore the working directory in `isSafeToIgnoreCWD`. The check involves going through compile command options to see if any paths specified are relative. The definition of relative path used here is that the input path is not empty, and `llvm::sys::path::is_absolute` is false. If all the paths examined are not relative, `clang` considers it safe to ignore the current working directory and does not consider the working directory when computing the context hash.
1 parent 6b3cbf2 commit 54acda2

File tree

5 files changed

+290
-9
lines changed

5 files changed

+290
-9
lines changed

clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,18 @@ enum class ScanningOptimizations {
5555
HeaderSearch = 1,
5656

5757
/// Remove warnings from system modules.
58-
SystemWarnings = 2,
58+
SystemWarnings = (1 << 1),
5959

6060
/// Remove unused -ivfsoverlay arguments.
61-
VFS = 4,
61+
VFS = (1 << 2),
6262

6363
/// Canonicalize -D and -U options.
64-
Macros = 8,
64+
Macros = (1 << 3),
6565

66-
DSS_LAST_BITMASK_ENUM(Macros),
66+
/// Ignore the compiler's working directory if it is safe.
67+
IgnoreCWD = (1 << 4),
68+
69+
DSS_LAST_BITMASK_ENUM(IgnoreCWD),
6770
Default = All
6871
};
6972

clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp

Lines changed: 92 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -394,9 +394,91 @@ void ModuleDepCollector::applyDiscoveredDependencies(CompilerInvocation &CI) {
394394
}
395395
}
396396

397+
static bool isSafeToIgnoreCWD(const CowCompilerInvocation &CI) {
398+
// Check if the command line input uses relative paths.
399+
// It is not safe to ignore the current working directory if any of the
400+
// command line inputs use relative paths.
401+
#define IF_RELATIVE_RETURN_FALSE(PATH) \
402+
do { \
403+
if (!PATH.empty() && !llvm::sys::path::is_absolute(PATH)) \
404+
return false; \
405+
} while (0)
406+
407+
#define IF_ANY_RELATIVE_RETURN_FALSE(PATHS) \
408+
do { \
409+
if (llvm::any_of(PATHS, [](const auto &P) { \
410+
return !P.empty() && !llvm::sys::path::is_absolute(P); \
411+
})) \
412+
return false; \
413+
} while (0)
414+
415+
// Header search paths.
416+
const auto &HeaderSearchOpts = CI.getHeaderSearchOpts();
417+
IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.Sysroot);
418+
for (auto &Entry : HeaderSearchOpts.UserEntries)
419+
if (Entry.IgnoreSysRoot)
420+
IF_RELATIVE_RETURN_FALSE(Entry.Path);
421+
IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ResourceDir);
422+
IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleCachePath);
423+
IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleUserBuildPath);
424+
for (auto I = HeaderSearchOpts.PrebuiltModuleFiles.begin(),
425+
E = HeaderSearchOpts.PrebuiltModuleFiles.end();
426+
I != E;) {
427+
auto Current = I++;
428+
IF_RELATIVE_RETURN_FALSE(Current->second);
429+
}
430+
IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.PrebuiltModulePaths);
431+
IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.VFSOverlayFiles);
432+
433+
// Preprocessor options.
434+
const auto &PPOpts = CI.getPreprocessorOpts();
435+
IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.MacroIncludes);
436+
IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.Includes);
437+
IF_RELATIVE_RETURN_FALSE(PPOpts.ImplicitPCHInclude);
438+
439+
// Frontend options.
440+
const auto &FrontendOpts = CI.getFrontendOpts();
441+
for (const FrontendInputFile &Input : FrontendOpts.Inputs) {
442+
if (Input.isBuffer())
443+
continue; // FIXME: Can this happen when parsing command-line?
444+
445+
IF_RELATIVE_RETURN_FALSE(Input.getFile());
446+
}
447+
IF_RELATIVE_RETURN_FALSE(FrontendOpts.CodeCompletionAt.FileName);
448+
IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleMapFiles);
449+
IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleFiles);
450+
IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModulesEmbedFiles);
451+
IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ASTMergeFiles);
452+
IF_RELATIVE_RETURN_FALSE(FrontendOpts.OverrideRecordLayoutsFile);
453+
IF_RELATIVE_RETURN_FALSE(FrontendOpts.StatsFile);
454+
455+
// Filesystem options.
456+
const auto &FileSystemOpts = CI.getFileSystemOpts();
457+
IF_RELATIVE_RETURN_FALSE(FileSystemOpts.WorkingDir);
458+
459+
// Codegen options.
460+
const auto &CodeGenOpts = CI.getCodeGenOpts();
461+
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.DebugCompilationDir);
462+
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.CoverageCompilationDir);
463+
464+
// Sanitizer options.
465+
IF_ANY_RELATIVE_RETURN_FALSE(CI.getLangOpts().NoSanitizeFiles);
466+
467+
// Coverage mappings.
468+
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileInstrumentUsePath);
469+
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.SampleProfileFile);
470+
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileRemappingFile);
471+
472+
// Dependency output options.
473+
for (auto &ExtraDep : CI.getDependencyOutputOpts().ExtraDeps)
474+
IF_RELATIVE_RETURN_FALSE(ExtraDep.first);
475+
476+
return true;
477+
}
478+
397479
static std::string getModuleContextHash(const ModuleDeps &MD,
398480
const CowCompilerInvocation &CI,
399-
bool EagerLoadModules,
481+
bool EagerLoadModules, bool IgnoreCWD,
400482
llvm::vfs::FileSystem &VFS) {
401483
llvm::HashBuilder<llvm::TruncatedBLAKE3<16>, llvm::endianness::native>
402484
HashBuilder;
@@ -407,8 +489,11 @@ static std::string getModuleContextHash(const ModuleDeps &MD,
407489
HashBuilder.add(getClangFullRepositoryVersion());
408490
HashBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR);
409491
llvm::ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory();
410-
if (CWD)
492+
auto &FSOpts = const_cast<FileSystemOptions &>(CI.getFileSystemOpts());
493+
if (CWD && !IgnoreCWD)
411494
HashBuilder.add(*CWD);
495+
else
496+
FSOpts.WorkingDir.clear();
412497

413498
// Hash the BuildInvocation without any input files.
414499
SmallString<0> ArgVec;
@@ -440,8 +525,11 @@ static std::string getModuleContextHash(const ModuleDeps &MD,
440525

441526
void ModuleDepCollector::associateWithContextHash(
442527
const CowCompilerInvocation &CI, ModuleDeps &Deps) {
443-
Deps.ID.ContextHash = getModuleContextHash(
444-
Deps, CI, EagerLoadModules, ScanInstance.getVirtualFileSystem());
528+
bool IgnoreCWD = any(OptimizeArgs & ScanningOptimizations::IgnoreCWD) &&
529+
isSafeToIgnoreCWD(CI);
530+
Deps.ID.ContextHash =
531+
getModuleContextHash(Deps, CI, EagerLoadModules, IgnoreCWD,
532+
ScanInstance.getVirtualFileSystem());
445533
bool Inserted = ModuleDepsByID.insert({Deps.ID, &Deps}).second;
446534
(void)Inserted;
447535
assert(Inserted && "duplicate module mapping");
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
// Test current directory pruning when computing the context hash.
2+
3+
// REQUIRES: shell
4+
5+
// RUN: rm -rf %t
6+
// RUN: split-file %s %t
7+
// RUN: sed -e "s|DIR|%/t|g" %t/cdb0.json.in > %t/cdb0.json
8+
// RUN: sed -e "s|DIR|%/t|g" %t/cdb1.json.in > %t/cdb1.json
9+
// RUN: sed -e "s|DIR|%/t|g" %t/cdb3.json.in > %t/cdb3.json
10+
// RUN: sed -e "s|DIR|%/t|g" %t/cdb4.json.in > %t/cdb4.json
11+
// RUN: sed -e "s|DIR|%/t|g" %t/cdb5.json.in > %t/cdb5.json
12+
// RUN: clang-scan-deps -compilation-database %t/cdb0.json -format experimental-full > %t/result0.json
13+
// RUN: clang-scan-deps -compilation-database %t/cdb1.json -format experimental-full > %t/result1.json
14+
// It is not a typo to use cdb1.json for result2. We intend to use the same
15+
// compilation database, but different clang-scan-deps optimize-args options.
16+
// RUN: clang-scan-deps -compilation-database %t/cdb1.json -format experimental-full -optimize-args=header-search,system-warnings,vfs,canonicalize-macros > %t/result2.json
17+
// RUN: clang-scan-deps -compilation-database %t/cdb3.json -format experimental-full > %t/result3.json
18+
// RUN: clang-scan-deps -compilation-database %t/cdb4.json -format experimental-full > %t/result4.json
19+
// RUN: clang-scan-deps -compilation-database %t/cdb5.json -format experimental-full > %t/result5.json
20+
// RUN: cat %t/result0.json %t/result1.json | FileCheck %s
21+
// RUN: cat %t/result0.json %t/result2.json | FileCheck %s -check-prefix=SKIPOPT
22+
// RUN: cat %t/result3.json %t/result4.json | FileCheck %s -check-prefix=RELPATH
23+
// RUN: cat %t/result0.json %t/result5.json | FileCheck %s
24+
25+
//--- cdb0.json.in
26+
[{
27+
"directory": "DIR",
28+
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -o DIR/tu.o",
29+
"file": "DIR/tu.c"
30+
}]
31+
32+
//--- cdb1.json.in
33+
[{
34+
"directory": "DIR/a",
35+
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -o DIR/tu.o",
36+
"file": "DIR/tu.c"
37+
}]
38+
39+
// cdb2 is skipped because we reuse cdb1.
40+
41+
//--- cdb3.json.in
42+
[{
43+
"directory": "DIR",
44+
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -fprebuilt-module-path=.././module -IDIR/include/ -o DIR/tu.o ",
45+
"file": "DIR/tu.c"
46+
}]
47+
48+
//--- cdb4.json.in
49+
[{
50+
"directory": "DIR/a/",
51+
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -fprebuilt-module-path=.././module -IDIR/include/ -o DIR/tu.o ",
52+
"file": "DIR/tu.c"
53+
}]
54+
55+
//--- cdb5.json.in
56+
[{
57+
"directory": "DIR",
58+
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -Xclang -working-directory=DIR/a/ -o DIR/tu.o",
59+
"file": "DIR/tu.c"
60+
}]
61+
62+
//--- include/module.modulemap
63+
module mod {
64+
header "mod.h"
65+
}
66+
67+
//--- include/mod.h
68+
69+
//--- tu.c
70+
#include "mod.h"
71+
72+
// Check that result0 and result1/result5 compute the same hash with
73+
// optimization on. The only difference between result0 and result1/result5 is
74+
// the compiler's working directory.
75+
// CHECK: {
76+
// CHECK-NEXT: "modules": [
77+
// CHECK-NEXT: {
78+
// CHECK-NEXT: "clang-module-deps": [],
79+
// CHECK: "context-hash": "[[HASH:.*]]",
80+
// CHECK: }
81+
// CHECK: "translation-units": [
82+
// CHECK: {
83+
// CHECK: "commands": [
84+
// CHECK: {
85+
// CHECK-NEXT: "clang-context-hash": "{{.*}}",
86+
// CHECK-NEXT: "clang-module-deps": [
87+
// CHECK-NEXT: {
88+
// CHECK-NEXT: "context-hash": "[[HASH]]",
89+
// CHECK-NEXT: "module-name": "mod"
90+
// CHECK: }
91+
// CHECK: ],
92+
// CHECK: {
93+
// CHECK-NEXT: "modules": [
94+
// CHECK-NEXT: {
95+
// CHECK-NEXT: "clang-module-deps": [],
96+
// CHECK: "context-hash": "[[HASH]]",
97+
// CHECK: }
98+
// CHECK: "translation-units": [
99+
// CHECK: {
100+
// CHECK: "commands": [
101+
// CHECK: {
102+
// CHECK-NEXT: "clang-context-hash": "{{.*}}",
103+
// CHECK-NEXT: "clang-module-deps": [
104+
// CHECK-NEXT: {
105+
// CHECK-NEXT: "context-hash": "[[HASH]]",
106+
// CHECK-NEXT: "module-name": "mod"
107+
// CHECK: }
108+
// CHECK: ],
109+
110+
// Check that result0 and result2 compute different hashes because
111+
// the working directory optmization is turned off for result2.
112+
// SKIPOPT: {
113+
// SKIPOPT-NEXT: "modules": [
114+
// SKIPOPT-NEXT: {
115+
// SKIPOPT-NEXT: "clang-module-deps": [],
116+
// SKIPOPT: "context-hash": "[[HASH0:.*]]",
117+
// SKIPOPT: }
118+
// SKIPOPT: "translation-units": [
119+
// SKIPOPT: {
120+
// SKIPOPT: "commands": [
121+
// SKIPOPT: {
122+
// SKIPOPT-NEXT: "clang-context-hash": "{{.*}}",
123+
// SKIPOPT-NEXT: "clang-module-deps": [
124+
// SKIPOPT-NEXT: {
125+
// SKIPOPT-NEXT: "context-hash": "[[HASH0]]",
126+
// SKIPOPT-NEXT: "module-name": "mod"
127+
// SKIPOPT: }
128+
// SKIPOPT: ],
129+
// SKIPOPT: {
130+
// SKIPOPT-NEXT: "modules": [
131+
// SKIPOPT-NEXT: {
132+
// SKIPOPT-NEXT: "clang-module-deps": [],
133+
// SKIPOPT-NOT: "context-hash": "[[HASH0]]",
134+
// SKIPOPT: "context-hash": "[[HASH2:.*]]",
135+
// SKIPOPT: }
136+
// SKIPOPT: "translation-units": [
137+
// SKIPOPT: {
138+
// SKIPOPT: "commands": [
139+
// SKIPOPT: {
140+
// SKIPOPT-NEXT: "clang-context-hash": "{{.*}}",
141+
// SKIPOPT-NEXT: "clang-module-deps": [
142+
// SKIPOPT-NEXT: {
143+
// SKIPOPT-NOT: "context-hash": "[[HASH0]]",
144+
// SKIPOPT-NEXT: "context-hash": "[[HASH2]]"
145+
// SKIPOPT-NEXT: "module-name": "mod"
146+
// SKIPOPT: }
147+
// SKIPOPT: ],
148+
149+
// Check that result3 and result4 contain different hashes because
150+
// both have a same relative path as a command line input, and
151+
// they are produced using different compiler working directories.
152+
// RELPATH: {
153+
// RELPATH-NEXT: "modules": [
154+
// RELPATH-NEXT: {
155+
// RELPATH-NEXT: "clang-module-deps": [],
156+
// RELPATH: "context-hash": "[[HASH3:.*]]",
157+
// RELPATH: }
158+
// RELPATH: "translation-units": [
159+
// RELPATH: {
160+
// RELPATH: "commands": [
161+
// RELPATH: {
162+
// RELPATH-NEXT: "clang-context-hash": "{{.*}}",
163+
// RELPATH-NEXT: "clang-module-deps": [
164+
// RELPATH-NEXT: {
165+
// RELPATH-NEXT: "context-hash": "[[HASH3]]",
166+
// RELPATH-NEXT: "module-name": "mod"
167+
// RELPATH: }
168+
// RELPATH: ],
169+
// RELPATH: {
170+
// RELPATH-NEXT: "modules": [
171+
// RELPATH-NEXT: {
172+
// RELPATH-NEXT: "clang-module-deps": [],
173+
// RELPATH-NOT: "context-hash": "[[HASH3]]",
174+
// RELPATH: "context-hash": "[[HASH4:.*]]",
175+
// RELPATH: }
176+
// RELPATH: "translation-units": [
177+
// RELPATH: {
178+
// RELPATH: "commands": [
179+
// RELPATH: {
180+
// RELPATH-NEXT: "clang-context-hash": "{{.*}}",
181+
// RELPATH-NEXT: "clang-module-deps": [
182+
// RELPATH-NEXT: {
183+
// RELPATH-NOT: "context-hash": "[[HASH3]]",
184+
// RELPATH-NEXT: "context-hash": "[[HASH4]]"
185+
// RELPATH-NEXT: "module-name": "mod"
186+
// RELPATH: }
187+
// RELPATH: ],
188+

clang/test/ClangScanDeps/working-dir.m

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// RUN: split-file %s %t
33
// RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > %t/build/compile-commands.json
44
// RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \
5-
// RUN: -j 1 -format experimental-full --optimize-args=all > %t/deps.db
5+
// RUN: -j 1 -format experimental-full --optimize-args=header-search,system-warnings,vfs,canonicalize-macros > %t/deps.db
66
// RUN: cat %t/deps.db | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t
77

88
// Check that there are two separate modules hashes. One for each working dir.

clang/tools/clang-scan-deps/ClangScanDeps.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,8 @@ static void ParseArgs(int argc, char **argv) {
167167
.Case("system-warnings", ScanningOptimizations::SystemWarnings)
168168
.Case("vfs", ScanningOptimizations::VFS)
169169
.Case("canonicalize-macros", ScanningOptimizations::Macros)
170+
.Case("ignore-current-working-dir",
171+
ScanningOptimizations::IgnoreCWD)
170172
.Case("all", ScanningOptimizations::All)
171173
.Default(std::nullopt);
172174
if (!Optimization) {

0 commit comments

Comments
 (0)