Skip to content

Commit b3c5a68

Browse files
committed
[Serialization] Reduce file size by splitting dep dirnames from basenames
Dependency tracking for cached compiled modules (compiled from swiftinterfaces) can lead to a high percentage of the module being SDK-relative paths when -track-system-dependencies is on. Cut down on this by storing directory names in a separate record that gets referenced from each file dependency. (Since a lot of per-file dependencies are header files in a common directory, this is a win.) We can do something more clever in the future, but this is a reasonable start for, say, the overlays. rdar://problem/50449802
1 parent 9ba1d8a commit b3c5a68

File tree

4 files changed

+48
-4
lines changed

4 files changed

+48
-4
lines changed

include/swift/Serialization/ModuleFormat.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ const uint16_t SWIFTMODULE_VERSION_MAJOR = 0;
5252
/// describe what change you made. The content of this comment isn't important;
5353
/// it just ensures a conflict if two people change the module format.
5454
/// Don't worry about adhering to the 80-column limit for this line.
55-
const uint16_t SWIFTMODULE_VERSION_MINOR = 488; // assign_by_delegate
55+
const uint16_t SWIFTMODULE_VERSION_MINOR = 489; // dependency directories
5656

5757
using DeclIDField = BCFixed<31>;
5858

@@ -644,6 +644,7 @@ namespace input_block {
644644
MODULE_FLAGS, // [unused]
645645
SEARCH_PATH,
646646
FILE_DEPENDENCY,
647+
DEPENDENCY_DIRECTORY,
647648
PARSEABLE_INTERFACE_PATH
648649
};
649650

@@ -689,9 +690,15 @@ namespace input_block {
689690
FileModTimeOrContentHashField, // mtime or content hash (for validation)
690691
BCFixed<1>, // are we reading mtime (0) or hash (1)?
691692
BCFixed<1>, // SDK-relative?
693+
BCVBR<8>, // subpath-relative index (0=none)
692694
BCBlob // path
693695
>;
694696

697+
using DependencyDirectoryLayout = BCRecordLayout<
698+
DEPENDENCY_DIRECTORY,
699+
BCBlob
700+
>;
701+
695702
using ParseableInterfaceLayout = BCRecordLayout<
696703
PARSEABLE_INTERFACE_PATH,
697704
BCBlob // file path

lib/Serialization/ModuleFile.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,9 @@ validateControlBlock(llvm::BitstreamCursor &cursor,
239239
static bool validateInputBlock(
240240
llvm::BitstreamCursor &cursor, SmallVectorImpl<uint64_t> &scratch,
241241
SmallVectorImpl<SerializationOptions::FileDependency> &dependencies) {
242+
SmallVector<StringRef, 4> dependencyDirectories;
243+
SmallString<256> dependencyFullPathBuffer;
244+
242245
while (!cursor.AtEndOfStream()) {
243246
auto entry = cursor.advance();
244247
if (entry.Kind == llvm::BitstreamEntry::EndBlock)
@@ -255,17 +258,30 @@ static bool validateInputBlock(
255258
bool isHashBased = scratch[2] != 0;
256259
bool isSDKRelative = scratch[3] != 0;
257260

261+
StringRef path = blobData;
262+
size_t directoryIndex = scratch[4];
263+
if (directoryIndex != 0) {
264+
if (directoryIndex > dependencyDirectories.size())
265+
return true;
266+
dependencyFullPathBuffer = dependencyDirectories[directoryIndex-1];
267+
llvm::sys::path::append(dependencyFullPathBuffer, blobData);
268+
path = dependencyFullPathBuffer;
269+
}
270+
258271
if (isHashBased) {
259272
dependencies.push_back(
260273
SerializationOptions::FileDependency::hashBased(
261-
blobData, isSDKRelative, scratch[0], scratch[1]));
274+
path, isSDKRelative, scratch[0], scratch[1]));
262275
} else {
263276
dependencies.push_back(
264277
SerializationOptions::FileDependency::modTimeBased(
265-
blobData, isSDKRelative, scratch[0], scratch[1]));
278+
path, isSDKRelative, scratch[0], scratch[1]));
266279
}
267280
break;
268281
}
282+
case input_block::DEPENDENCY_DIRECTORY:
283+
dependencyDirectories.push_back(blobData);
284+
break;
269285
default:
270286
// Unknown metadata record, possibly for use by a future version of the
271287
// module format.

lib/Serialization/Serialization.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -834,6 +834,7 @@ void Serializer::writeBlockInfoBlock() {
834834
BLOCK_RECORD(input_block, MODULE_FLAGS);
835835
BLOCK_RECORD(input_block, SEARCH_PATH);
836836
BLOCK_RECORD(input_block, FILE_DEPENDENCY);
837+
BLOCK_RECORD(input_block, DEPENDENCY_DIRECTORY);
837838
BLOCK_RECORD(input_block, PARSEABLE_INTERFACE_PATH);
838839

839840
BLOCK(DECLS_AND_TYPES_BLOCK);
@@ -1061,6 +1062,7 @@ void Serializer::writeInputBlock(const SerializationOptions &options) {
10611062
input_block::ImportedHeaderContentsLayout ImportedHeaderContents(Out);
10621063
input_block::SearchPathLayout SearchPath(Out);
10631064
input_block::FileDependencyLayout FileDependency(Out);
1065+
input_block::DependencyDirectoryLayout DependencyDirectory(Out);
10641066
input_block::ParseableInterfaceLayout ParseableInterface(Out);
10651067

10661068
if (options.SerializeOptionsForDebugging) {
@@ -1074,13 +1076,24 @@ void Serializer::writeInputBlock(const SerializationOptions &options) {
10741076
SearchPath.emit(ScratchRecord, /*framework=*/false, /*system=*/false, path);
10751077
}
10761078

1079+
// Note: We're not using StringMap here because we don't need to own the
1080+
// strings.
1081+
llvm::DenseMap<StringRef, unsigned> dependencyDirectories;
10771082
for (auto const &dep : options.Dependencies) {
1083+
StringRef directoryName = llvm::sys::path::parent_path(dep.getPath());
1084+
unsigned &dependencyDirectoryIndex = dependencyDirectories[directoryName];
1085+
if (!dependencyDirectoryIndex) {
1086+
// This name must be newly-added. Give it a new ID (and skip 0).
1087+
dependencyDirectoryIndex = dependencyDirectories.size();
1088+
DependencyDirectory.emit(ScratchRecord, directoryName);
1089+
}
10781090
FileDependency.emit(ScratchRecord,
10791091
dep.getSize(),
10801092
getRawModTimeOrHash(dep),
10811093
dep.isHashBased(),
10821094
dep.isSDKRelative(),
1083-
dep.getPath());
1095+
dependencyDirectoryIndex,
1096+
llvm::sys::path::filename(dep.getPath()));
10841097
}
10851098

10861099
if (!options.ParseableInterface.empty())

test/ParseableInterface/ModuleCache/SystemDependencies.swiftinterface

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
// RUN: echo 'import SystemDependencies' | %target-swift-frontend -typecheck - -I %S -sdk %t/mock-sdk -module-cache-path %t/MCP-system -emit-dependencies-path %t/dummy.d -track-system-dependencies
2323
// RUN: test -f %t/MCP-system/SystemDependencies*.swiftmodule
2424
// RUN: %FileCheck -check-prefix CHECK %s < %t/dummy.d
25+
// RUN: llvm-bcanalyzer -dump %t/MCP-system/SystemDependencies*.swiftmodule | %FileCheck -check-prefix CHECK-DUMP %s
2526
// RUN: %{python} %S/Inputs/make-old.py %t/MCP-system/SystemDependencies*.swiftmodule
2627

2728
// Baseline: running the same command again doesn't rebuild the cached module.
@@ -41,6 +42,13 @@
4142

4243
// NEGATIVE-NOT: SomeCModule.h
4344
// CHECK: SomeCModule.h
45+
46+
// CHECK-DUMP-NOT: usr/include
47+
// CHECK-DUMP: DEPENDENCY_DIRECTORY{{.+}}'usr/include'
48+
// CHECK-DUMP-NEXT: FILE_DEPENDENCY{{.+}}'module.modulemap'
49+
// CHECK-DUMP-NEXT: FILE_DEPENDENCY{{.+}}'SomeCModule.h'
50+
// CHECK-DUMP-NOT: usr/include
51+
4452
// MODULECACHE-COUNT-2: SystemDependencies-{{[^ ]+}}.swiftmodule
4553

4654
import SomeCModule

0 commit comments

Comments
 (0)