Skip to content

Commit caca3cd

Browse files
qiongsiwucyndyishida
authored andcommitted
[clang module] Current Working Directory Pruning (llvm#124786)
When computing the context hash, `clang` always includes the compiler's working directory. This can lead to situations when the only difference between two compilations is the working directory, different module variants are generated. These variants are redundant. This PR implements an optimization that ignores the working directory when computing the context hash when safe. Specifically, `clang` checks if it is safe to ignore the working directory in `isSafeToIgnoreCWD`. The check involves going through compile command options to see if any paths specified are relative. The definition of relative path used here is that the input path is not empty, and `llvm::sys::path::is_absolute` is false. If all the paths examined are not relative, `clang` considers it safe to ignore the current working directory and does not consider the working directory when computing the context hash. (cherry picked from commit 54acda2)
1 parent 496a44a commit caca3cd

File tree

5 files changed

+290
-10
lines changed

5 files changed

+290
-10
lines changed

clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,18 @@ enum class ScanningOptimizations {
7171
HeaderSearch = 1,
7272

7373
/// Remove warnings from system modules.
74-
SystemWarnings = 2,
74+
SystemWarnings = (1 << 1),
7575

7676
/// Remove unused -ivfsoverlay arguments.
77-
VFS = 4,
77+
VFS = (1 << 2),
7878

7979
/// Canonicalize -D and -U options.
80-
Macros = 8,
80+
Macros = (1 << 3),
8181

82-
DSS_LAST_BITMASK_ENUM(Macros),
82+
/// Ignore the compiler's working directory if it is safe.
83+
IgnoreCWD = (1 << 4),
84+
85+
DSS_LAST_BITMASK_ENUM(IgnoreCWD),
8386
Default = All,
8487
FullIncludeTreeIrrelevant = HeaderSearch | VFS,
8588
};

clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp

Lines changed: 92 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -423,9 +423,91 @@ void ModuleDepCollector::applyDiscoveredDependencies(CompilerInvocation &CI) {
423423
}
424424
}
425425

426+
static bool isSafeToIgnoreCWD(const CowCompilerInvocation &CI) {
427+
// Check if the command line input uses relative paths.
428+
// It is not safe to ignore the current working directory if any of the
429+
// command line inputs use relative paths.
430+
#define IF_RELATIVE_RETURN_FALSE(PATH) \
431+
do { \
432+
if (!PATH.empty() && !llvm::sys::path::is_absolute(PATH)) \
433+
return false; \
434+
} while (0)
435+
436+
#define IF_ANY_RELATIVE_RETURN_FALSE(PATHS) \
437+
do { \
438+
if (llvm::any_of(PATHS, [](const auto &P) { \
439+
return !P.empty() && !llvm::sys::path::is_absolute(P); \
440+
})) \
441+
return false; \
442+
} while (0)
443+
444+
// Header search paths.
445+
const auto &HeaderSearchOpts = CI.getHeaderSearchOpts();
446+
IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.Sysroot);
447+
for (auto &Entry : HeaderSearchOpts.UserEntries)
448+
if (Entry.IgnoreSysRoot)
449+
IF_RELATIVE_RETURN_FALSE(Entry.Path);
450+
IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ResourceDir);
451+
IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleCachePath);
452+
IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleUserBuildPath);
453+
for (auto I = HeaderSearchOpts.PrebuiltModuleFiles.begin(),
454+
E = HeaderSearchOpts.PrebuiltModuleFiles.end();
455+
I != E;) {
456+
auto Current = I++;
457+
IF_RELATIVE_RETURN_FALSE(Current->second);
458+
}
459+
IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.PrebuiltModulePaths);
460+
IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.VFSOverlayFiles);
461+
462+
// Preprocessor options.
463+
const auto &PPOpts = CI.getPreprocessorOpts();
464+
IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.MacroIncludes);
465+
IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.Includes);
466+
IF_RELATIVE_RETURN_FALSE(PPOpts.ImplicitPCHInclude);
467+
468+
// Frontend options.
469+
const auto &FrontendOpts = CI.getFrontendOpts();
470+
for (const FrontendInputFile &Input : FrontendOpts.Inputs) {
471+
if (Input.isBuffer())
472+
continue; // FIXME: Can this happen when parsing command-line?
473+
474+
IF_RELATIVE_RETURN_FALSE(Input.getFile());
475+
}
476+
IF_RELATIVE_RETURN_FALSE(FrontendOpts.CodeCompletionAt.FileName);
477+
IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleMapFiles);
478+
IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleFiles);
479+
IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModulesEmbedFiles);
480+
IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ASTMergeFiles);
481+
IF_RELATIVE_RETURN_FALSE(FrontendOpts.OverrideRecordLayoutsFile);
482+
IF_RELATIVE_RETURN_FALSE(FrontendOpts.StatsFile);
483+
484+
// Filesystem options.
485+
const auto &FileSystemOpts = CI.getFileSystemOpts();
486+
IF_RELATIVE_RETURN_FALSE(FileSystemOpts.WorkingDir);
487+
488+
// Codegen options.
489+
const auto &CodeGenOpts = CI.getCodeGenOpts();
490+
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.DebugCompilationDir);
491+
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.CoverageCompilationDir);
492+
493+
// Sanitizer options.
494+
IF_ANY_RELATIVE_RETURN_FALSE(CI.getLangOpts().NoSanitizeFiles);
495+
496+
// Coverage mappings.
497+
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileInstrumentUsePath);
498+
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.SampleProfileFile);
499+
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileRemappingFile);
500+
501+
// Dependency output options.
502+
for (auto &ExtraDep : CI.getDependencyOutputOpts().ExtraDeps)
503+
IF_RELATIVE_RETURN_FALSE(ExtraDep.first);
504+
505+
return true;
506+
}
507+
426508
static std::string getModuleContextHash(const ModuleDeps &MD,
427509
const CowCompilerInvocation &CI,
428-
bool EagerLoadModules,
510+
bool EagerLoadModules, bool IgnoreCWD,
429511
llvm::vfs::FileSystem &VFS) {
430512
llvm::HashBuilder<llvm::TruncatedBLAKE3<16>, llvm::endianness::native>
431513
HashBuilder;
@@ -452,12 +534,14 @@ static std::string getModuleContextHash(const ModuleDeps &MD,
452534
HashBuilder.add(getClangFullRepositoryVersion());
453535
HashBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR);
454536
llvm::ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory();
455-
if (CWD)
537+
auto &FSOpts = const_cast<FileSystemOptions &>(CI.getFileSystemOpts());
538+
if (CWD && !IgnoreCWD)
456539
HashBuilder.add(*CWD);
540+
else
541+
FSOpts.WorkingDir.clear();
457542

458543
// Save and restore options that should not affect the hash, e.g. the exact
459544
// contents of input files, or prefix mappings.
460-
auto &FSOpts = const_cast<FileSystemOptions &>(CI.getFileSystemOpts());
461545
auto &FEOpts = const_cast<FrontendOptions &>(CI.getFrontendOpts());
462546
auto &CASOpts = const_cast<CASOptions &>(CI.getCASOpts());
463547
llvm::SaveAndRestore RestoreCASFSRootID(FSOpts.CASFileSystemRootID, {});
@@ -511,8 +595,11 @@ static void checkCompileCacheKeyMatch(cas::ObjectStore &CAS,
511595

512596
void ModuleDepCollector::associateWithContextHash(
513597
const CowCompilerInvocation &CI, ModuleDeps &Deps) {
514-
Deps.ID.ContextHash = getModuleContextHash(
515-
Deps, CI, EagerLoadModules, ScanInstance.getVirtualFileSystem());
598+
bool IgnoreCWD = any(OptimizeArgs & ScanningOptimizations::IgnoreCWD) &&
599+
isSafeToIgnoreCWD(CI);
600+
Deps.ID.ContextHash =
601+
getModuleContextHash(Deps, CI, EagerLoadModules, IgnoreCWD,
602+
ScanInstance.getVirtualFileSystem());
516603
bool Inserted = ModuleDepsByID.insert({Deps.ID, &Deps}).second;
517604
(void)Inserted;
518605
assert(Inserted && "duplicate module mapping");
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
// Test current directory pruning when computing the context hash.
2+
3+
// REQUIRES: shell
4+
5+
// RUN: rm -rf %t
6+
// RUN: split-file %s %t
7+
// RUN: sed -e "s|DIR|%/t|g" %t/cdb0.json.in > %t/cdb0.json
8+
// RUN: sed -e "s|DIR|%/t|g" %t/cdb1.json.in > %t/cdb1.json
9+
// RUN: sed -e "s|DIR|%/t|g" %t/cdb3.json.in > %t/cdb3.json
10+
// RUN: sed -e "s|DIR|%/t|g" %t/cdb4.json.in > %t/cdb4.json
11+
// RUN: sed -e "s|DIR|%/t|g" %t/cdb5.json.in > %t/cdb5.json
12+
// RUN: clang-scan-deps -compilation-database %t/cdb0.json -format experimental-full > %t/result0.json
13+
// RUN: clang-scan-deps -compilation-database %t/cdb1.json -format experimental-full > %t/result1.json
14+
// It is not a typo to use cdb1.json for result2. We intend to use the same
15+
// compilation database, but different clang-scan-deps optimize-args options.
16+
// RUN: clang-scan-deps -compilation-database %t/cdb1.json -format experimental-full -optimize-args=header-search,system-warnings,vfs,canonicalize-macros > %t/result2.json
17+
// RUN: clang-scan-deps -compilation-database %t/cdb3.json -format experimental-full > %t/result3.json
18+
// RUN: clang-scan-deps -compilation-database %t/cdb4.json -format experimental-full > %t/result4.json
19+
// RUN: clang-scan-deps -compilation-database %t/cdb5.json -format experimental-full > %t/result5.json
20+
// RUN: cat %t/result0.json %t/result1.json | FileCheck %s
21+
// RUN: cat %t/result0.json %t/result2.json | FileCheck %s -check-prefix=SKIPOPT
22+
// RUN: cat %t/result3.json %t/result4.json | FileCheck %s -check-prefix=RELPATH
23+
// RUN: cat %t/result0.json %t/result5.json | FileCheck %s
24+
25+
//--- cdb0.json.in
26+
[{
27+
"directory": "DIR",
28+
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -o DIR/tu.o",
29+
"file": "DIR/tu.c"
30+
}]
31+
32+
//--- cdb1.json.in
33+
[{
34+
"directory": "DIR/a",
35+
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -o DIR/tu.o",
36+
"file": "DIR/tu.c"
37+
}]
38+
39+
// cdb2 is skipped because we reuse cdb1.
40+
41+
//--- cdb3.json.in
42+
[{
43+
"directory": "DIR",
44+
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -fprebuilt-module-path=.././module -IDIR/include/ -o DIR/tu.o ",
45+
"file": "DIR/tu.c"
46+
}]
47+
48+
//--- cdb4.json.in
49+
[{
50+
"directory": "DIR/a/",
51+
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -fprebuilt-module-path=.././module -IDIR/include/ -o DIR/tu.o ",
52+
"file": "DIR/tu.c"
53+
}]
54+
55+
//--- cdb5.json.in
56+
[{
57+
"directory": "DIR",
58+
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -Xclang -working-directory=DIR/a/ -o DIR/tu.o",
59+
"file": "DIR/tu.c"
60+
}]
61+
62+
//--- include/module.modulemap
63+
module mod {
64+
header "mod.h"
65+
}
66+
67+
//--- include/mod.h
68+
69+
//--- tu.c
70+
#include "mod.h"
71+
72+
// Check that result0 and result1/result5 compute the same hash with
73+
// optimization on. The only difference between result0 and result1/result5 is
74+
// the compiler's working directory.
75+
// CHECK: {
76+
// CHECK-NEXT: "modules": [
77+
// CHECK-NEXT: {
78+
// CHECK-NEXT: "clang-module-deps": [],
79+
// CHECK: "context-hash": "[[HASH:.*]]",
80+
// CHECK: }
81+
// CHECK: "translation-units": [
82+
// CHECK: {
83+
// CHECK: "commands": [
84+
// CHECK: {
85+
// CHECK-NEXT: "clang-context-hash": "{{.*}}",
86+
// CHECK-NEXT: "clang-module-deps": [
87+
// CHECK-NEXT: {
88+
// CHECK-NEXT: "context-hash": "[[HASH]]",
89+
// CHECK-NEXT: "module-name": "mod"
90+
// CHECK: }
91+
// CHECK: ],
92+
// CHECK: {
93+
// CHECK-NEXT: "modules": [
94+
// CHECK-NEXT: {
95+
// CHECK-NEXT: "clang-module-deps": [],
96+
// CHECK: "context-hash": "[[HASH]]",
97+
// CHECK: }
98+
// CHECK: "translation-units": [
99+
// CHECK: {
100+
// CHECK: "commands": [
101+
// CHECK: {
102+
// CHECK-NEXT: "clang-context-hash": "{{.*}}",
103+
// CHECK-NEXT: "clang-module-deps": [
104+
// CHECK-NEXT: {
105+
// CHECK-NEXT: "context-hash": "[[HASH]]",
106+
// CHECK-NEXT: "module-name": "mod"
107+
// CHECK: }
108+
// CHECK: ],
109+
110+
// Check that result0 and result2 compute different hashes because
111+
// the working directory optmization is turned off for result2.
112+
// SKIPOPT: {
113+
// SKIPOPT-NEXT: "modules": [
114+
// SKIPOPT-NEXT: {
115+
// SKIPOPT-NEXT: "clang-module-deps": [],
116+
// SKIPOPT: "context-hash": "[[HASH0:.*]]",
117+
// SKIPOPT: }
118+
// SKIPOPT: "translation-units": [
119+
// SKIPOPT: {
120+
// SKIPOPT: "commands": [
121+
// SKIPOPT: {
122+
// SKIPOPT-NEXT: "clang-context-hash": "{{.*}}",
123+
// SKIPOPT-NEXT: "clang-module-deps": [
124+
// SKIPOPT-NEXT: {
125+
// SKIPOPT-NEXT: "context-hash": "[[HASH0]]",
126+
// SKIPOPT-NEXT: "module-name": "mod"
127+
// SKIPOPT: }
128+
// SKIPOPT: ],
129+
// SKIPOPT: {
130+
// SKIPOPT-NEXT: "modules": [
131+
// SKIPOPT-NEXT: {
132+
// SKIPOPT-NEXT: "clang-module-deps": [],
133+
// SKIPOPT-NOT: "context-hash": "[[HASH0]]",
134+
// SKIPOPT: "context-hash": "[[HASH2:.*]]",
135+
// SKIPOPT: }
136+
// SKIPOPT: "translation-units": [
137+
// SKIPOPT: {
138+
// SKIPOPT: "commands": [
139+
// SKIPOPT: {
140+
// SKIPOPT-NEXT: "clang-context-hash": "{{.*}}",
141+
// SKIPOPT-NEXT: "clang-module-deps": [
142+
// SKIPOPT-NEXT: {
143+
// SKIPOPT-NOT: "context-hash": "[[HASH0]]",
144+
// SKIPOPT-NEXT: "context-hash": "[[HASH2]]"
145+
// SKIPOPT-NEXT: "module-name": "mod"
146+
// SKIPOPT: }
147+
// SKIPOPT: ],
148+
149+
// Check that result3 and result4 contain different hashes because
150+
// both have a same relative path as a command line input, and
151+
// they are produced using different compiler working directories.
152+
// RELPATH: {
153+
// RELPATH-NEXT: "modules": [
154+
// RELPATH-NEXT: {
155+
// RELPATH-NEXT: "clang-module-deps": [],
156+
// RELPATH: "context-hash": "[[HASH3:.*]]",
157+
// RELPATH: }
158+
// RELPATH: "translation-units": [
159+
// RELPATH: {
160+
// RELPATH: "commands": [
161+
// RELPATH: {
162+
// RELPATH-NEXT: "clang-context-hash": "{{.*}}",
163+
// RELPATH-NEXT: "clang-module-deps": [
164+
// RELPATH-NEXT: {
165+
// RELPATH-NEXT: "context-hash": "[[HASH3]]",
166+
// RELPATH-NEXT: "module-name": "mod"
167+
// RELPATH: }
168+
// RELPATH: ],
169+
// RELPATH: {
170+
// RELPATH-NEXT: "modules": [
171+
// RELPATH-NEXT: {
172+
// RELPATH-NEXT: "clang-module-deps": [],
173+
// RELPATH-NOT: "context-hash": "[[HASH3]]",
174+
// RELPATH: "context-hash": "[[HASH4:.*]]",
175+
// RELPATH: }
176+
// RELPATH: "translation-units": [
177+
// RELPATH: {
178+
// RELPATH: "commands": [
179+
// RELPATH: {
180+
// RELPATH-NEXT: "clang-context-hash": "{{.*}}",
181+
// RELPATH-NEXT: "clang-module-deps": [
182+
// RELPATH-NEXT: {
183+
// RELPATH-NOT: "context-hash": "[[HASH3]]",
184+
// RELPATH-NEXT: "context-hash": "[[HASH4]]"
185+
// RELPATH-NEXT: "module-name": "mod"
186+
// RELPATH: }
187+
// RELPATH: ],
188+

clang/test/ClangScanDeps/working-dir.m

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// RUN: split-file %s %t
33
// RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > %t/build/compile-commands.json
44
// RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \
5-
// RUN: -j 1 -format experimental-full --optimize-args=all > %t/deps.db
5+
// RUN: -j 1 -format experimental-full --optimize-args=header-search,system-warnings,vfs,canonicalize-macros > %t/deps.db
66
// RUN: cat %t/deps.db | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t
77

88
// Check that there are two separate modules hashes. One for each working dir.

clang/tools/clang-scan-deps/ClangScanDeps.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,8 @@ static void ParseArgs(int argc, char **argv) {
183183
.Case("system-warnings", ScanningOptimizations::SystemWarnings)
184184
.Case("vfs", ScanningOptimizations::VFS)
185185
.Case("canonicalize-macros", ScanningOptimizations::Macros)
186+
.Case("ignore-current-working-dir",
187+
ScanningOptimizations::IgnoreCWD)
186188
.Case("all", ScanningOptimizations::All)
187189
.Default(std::nullopt);
188190
if (!Optimization) {

0 commit comments

Comments
 (0)