Skip to content
This repository was archived by the owner on May 28, 2025. It is now read-only.

Commit 7ec3b8d

Browse files
committed
Introduce -Zsplit-metadata option
This will split the crate metadata out of library files. Instead only the svh is preserved to allow for loading the right rmeta file. This significicantly reduces library size. In addition it allows for cheaper checks if different library files are the same crate.
1 parent 1065559 commit 7ec3b8d

File tree

8 files changed

+101
-28
lines changed

8 files changed

+101
-28
lines changed

compiler/rustc_codegen_ssa/src/back/link.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,8 +316,11 @@ fn link_rlib<'a>(
316316

317317
let trailing_metadata = match flavor {
318318
RlibFlavor::Normal => {
319-
let (metadata, metadata_position) =
320-
create_wrapper_file(sess, b".rmeta".to_vec(), codegen_results.metadata.raw_data());
319+
let (metadata, metadata_position) = create_wrapper_file(
320+
sess,
321+
b".rmeta".to_vec(),
322+
codegen_results.metadata.maybe_reference(),
323+
);
321324
let metadata = emit_wrapper_file(sess, &metadata, tmpdir, METADATA_FILENAME);
322325
match metadata_position {
323326
MetadataPosition::First => {

compiler/rustc_codegen_ssa/src/back/metadata.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -528,8 +528,8 @@ pub fn create_compressed_metadata_file(
528528
symbol_name: &str,
529529
) -> Vec<u8> {
530530
let mut packed_metadata = rustc_metadata::METADATA_HEADER.to_vec();
531-
packed_metadata.write_all(&(metadata.raw_data().len() as u64).to_le_bytes()).unwrap();
532-
packed_metadata.extend(metadata.raw_data());
531+
packed_metadata.write_all(&(metadata.maybe_reference().len() as u64).to_le_bytes()).unwrap();
532+
packed_metadata.extend(metadata.maybe_reference());
533533

534534
let Some(mut file) = create_object_file(sess) else {
535535
return packed_metadata.to_vec();

compiler/rustc_interface/src/tests.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,7 @@ fn test_unstable_options_tracking_hash() {
702702
untracked!(shell_argfiles, true);
703703
untracked!(span_debug, true);
704704
untracked!(span_free_formats, true);
705+
untracked!(split_metadata, true);
705706
untracked!(temps_dir, Some(String::from("abc")));
706707
untracked!(threads, 99);
707708
untracked!(time_llvm_passes, true);

compiler/rustc_metadata/src/fs.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
5050
.tempdir_in(out_filename.parent().unwrap_or_else(|| Path::new("")))
5151
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailedCreateTempdir { err }));
5252
let metadata_tmpdir = MaybeTempDir::new(metadata_tmpdir, tcx.sess.opts.cg.save_temps);
53-
let metadata_filename = metadata_tmpdir.as_ref().join(METADATA_FILENAME);
53+
let metadata_filename = metadata_tmpdir.as_ref().join("full.rmeta");
54+
let metadata_reference_filename = metadata_tmpdir.as_ref().join("ref.rmeta");
5455

5556
// Always create a file at `metadata_filename`, even if we have nothing to write to it.
5657
// This simplifies the creation of the output `out_filename` when requested.
@@ -60,9 +61,12 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
6061
std::fs::File::create(&metadata_filename).unwrap_or_else(|err| {
6162
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
6263
});
64+
std::fs::File::create(&metadata_reference_filename).unwrap_or_else(|err| {
65+
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
66+
});
6367
}
6468
MetadataKind::Uncompressed | MetadataKind::Compressed => {
65-
encode_metadata(tcx, &metadata_filename);
69+
encode_metadata(tcx, &metadata_filename, &metadata_reference_filename)
6670
}
6771
};
6872

@@ -100,9 +104,10 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
100104

101105
// Load metadata back to memory: codegen may need to include it in object files.
102106
let metadata =
103-
EncodedMetadata::from_path(metadata_filename, metadata_tmpdir).unwrap_or_else(|err| {
104-
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
105-
});
107+
EncodedMetadata::from_path(metadata_filename, metadata_reference_filename, metadata_tmpdir)
108+
.unwrap_or_else(|err| {
109+
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
110+
});
106111

107112
let need_metadata_module = metadata_kind == MetadataKind::Compressed;
108113

compiler/rustc_metadata/src/locator.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,11 @@ impl<'a> CrateLocator<'a> {
576576
) {
577577
Ok(blob) => {
578578
if let Some(h) = self.crate_matches(&blob, &lib) {
579+
if blob.get_header().is_reference {
580+
if slot.is_none() {
581+
todo!("return error");
582+
}
583+
}
579584
(h, blob)
580585
} else {
581586
info!("metadata mismatch");

compiler/rustc_metadata/src/rmeta/encoder.rs

Lines changed: 72 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
692692
triple: tcx.sess.opts.target_triple.clone(),
693693
hash: tcx.crate_hash(LOCAL_CRATE),
694694
is_proc_macro_crate: proc_macro_data.is_some(),
695+
is_reference: false,
695696
},
696697
extra_filename: tcx.sess.opts.cg.extra_filename.clone(),
697698
stable_crate_id: tcx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id(),
@@ -2133,42 +2134,61 @@ fn prefetch_mir(tcx: TyCtxt<'_>) {
21332134
// generated regardless of trailing bytes that end up in it.
21342135

21352136
pub struct EncodedMetadata {
2136-
// The declaration order matters because `mmap` should be dropped before `_temp_dir`.
2137-
mmap: Option<Mmap>,
2137+
// The declaration order matters because `full_mmap` should be dropped
2138+
// before `_temp_dir`.
2139+
full_mmap: Option<Mmap>,
2140+
reference: Option<Vec<u8>>,
21382141
// We need to carry MaybeTempDir to avoid deleting the temporary
21392142
// directory while accessing the Mmap.
21402143
_temp_dir: Option<MaybeTempDir>,
21412144
}
21422145

21432146
impl EncodedMetadata {
21442147
#[inline]
2145-
pub fn from_path(path: PathBuf, temp_dir: Option<MaybeTempDir>) -> std::io::Result<Self> {
2148+
pub fn from_path(
2149+
path: PathBuf,
2150+
reference_path: PathBuf,
2151+
temp_dir: Option<MaybeTempDir>,
2152+
) -> std::io::Result<Self> {
21462153
let file = std::fs::File::open(&path)?;
21472154
let file_metadata = file.metadata()?;
21482155
if file_metadata.len() == 0 {
2149-
return Ok(Self { mmap: None, _temp_dir: None });
2156+
return Ok(Self { full_mmap: None, reference: None, _temp_dir: None });
21502157
}
2151-
let mmap = unsafe { Some(Mmap::map(file)?) };
2152-
Ok(Self { mmap, _temp_dir: temp_dir })
2158+
let full_mmap = unsafe { Some(Mmap::map(file)?) };
2159+
2160+
let reference = std::fs::read(reference_path)?;
2161+
let reference = if reference.is_empty() { None } else { Some(reference) };
2162+
2163+
Ok(Self { full_mmap, reference, _temp_dir: temp_dir })
2164+
}
2165+
2166+
#[inline]
2167+
pub fn full(&self) -> &[u8] {
2168+
&self.full_mmap.as_deref().unwrap_or_default()
21532169
}
21542170

21552171
#[inline]
2156-
pub fn raw_data(&self) -> &[u8] {
2157-
self.mmap.as_deref().unwrap_or_default()
2172+
pub fn maybe_reference(&self) -> &[u8] {
2173+
self.reference.as_deref().unwrap_or(self.full())
21582174
}
21592175
}
21602176

21612177
impl<S: Encoder> Encodable<S> for EncodedMetadata {
21622178
fn encode(&self, s: &mut S) {
2163-
let slice = self.raw_data();
2179+
self.reference.encode(s);
2180+
2181+
let slice = self.full();
21642182
slice.encode(s)
21652183
}
21662184
}
21672185

21682186
impl<D: Decoder> Decodable<D> for EncodedMetadata {
21692187
fn decode(d: &mut D) -> Self {
2188+
let reference = <Option<Vec<u8>>>::decode(d);
2189+
21702190
let len = d.read_usize();
2171-
let mmap = if len > 0 {
2191+
let full_mmap = if len > 0 {
21722192
let mut mmap = MmapMut::map_anon(len).unwrap();
21732193
for _ in 0..len {
21742194
(&mut mmap[..]).write(&[d.read_u8()]).unwrap();
@@ -2179,11 +2199,11 @@ impl<D: Decoder> Decodable<D> for EncodedMetadata {
21792199
None
21802200
};
21812201

2182-
Self { mmap, _temp_dir: None }
2202+
Self { full_mmap, reference, _temp_dir: None }
21832203
}
21842204
}
21852205

2186-
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
2206+
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: &Path) {
21872207
let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata");
21882208

21892209
// Since encoding metadata is not in a query, and nothing is cached,
@@ -2197,6 +2217,44 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
21972217
join(|| prefetch_mir(tcx), || tcx.exported_symbols(LOCAL_CRATE));
21982218
}
21992219

2220+
with_encode_metadata_header(tcx, path, |ecx| {
2221+
// Encode all the entries and extra information in the crate,
2222+
// culminating in the `CrateRoot` which points to all of it.
2223+
let root = ecx.encode_crate_root();
2224+
2225+
// Flush buffer to ensure backing file has the correct size.
2226+
ecx.opaque.flush();
2227+
// Record metadata size for self-profiling
2228+
tcx.prof.artifact_size(
2229+
"crate_metadata",
2230+
"crate_metadata",
2231+
ecx.opaque.file().metadata().unwrap().len(),
2232+
);
2233+
2234+
root.position.get()
2235+
});
2236+
2237+
if tcx.sess.opts.unstable_opts.split_metadata
2238+
&& !tcx.crate_types().contains(&CrateType::ProcMacro)
2239+
{
2240+
with_encode_metadata_header(tcx, ref_path, |ecx| {
2241+
let header: LazyValue<CrateHeader> = ecx.lazy(CrateHeader {
2242+
name: tcx.crate_name(LOCAL_CRATE),
2243+
triple: tcx.sess.opts.target_triple.clone(),
2244+
hash: tcx.crate_hash(LOCAL_CRATE),
2245+
is_proc_macro_crate: false,
2246+
is_reference: true,
2247+
});
2248+
header.position.get()
2249+
});
2250+
}
2251+
}
2252+
2253+
pub fn with_encode_metadata_header(
2254+
tcx: TyCtxt<'_>,
2255+
path: &Path,
2256+
f: impl FnOnce(&mut EncodeContext<'_, '_>) -> usize,
2257+
) {
22002258
let mut encoder = opaque::FileEncoder::new(path)
22012259
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailCreateFileEncoder { err }));
22022260
encoder.emit_raw_bytes(METADATA_HEADER);
@@ -2231,9 +2289,7 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
22312289
// Encode the rustc version string in a predictable location.
22322290
rustc_version(tcx.sess.cfg_version).encode(&mut ecx);
22332291

2234-
// Encode all the entries and extra information in the crate,
2235-
// culminating in the `CrateRoot` which points to all of it.
2236-
let root = ecx.encode_crate_root();
2292+
let root_position = f(&mut ecx);
22372293

22382294
// Make sure we report any errors from writing to the file.
22392295
// If we forget this, compilation can succeed with an incomplete rmeta file,
@@ -2243,12 +2299,9 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
22432299
}
22442300

22452301
let file = ecx.opaque.file();
2246-
if let Err(err) = encode_root_position(file, root.position.get()) {
2302+
if let Err(err) = encode_root_position(file, root_position) {
22472303
tcx.dcx().emit_fatal(FailWriteFile { path: ecx.opaque.path(), err });
22482304
}
2249-
2250-
// Record metadata size for self-profiling
2251-
tcx.prof.artifact_size("crate_metadata", "crate_metadata", file.metadata().unwrap().len());
22522305
}
22532306

22542307
fn encode_root_position(mut file: &File, pos: usize) -> Result<(), std::io::Error> {

compiler/rustc_metadata/src/rmeta/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,10 @@ pub(crate) struct CrateHeader {
222222
/// This is separate from [`ProcMacroData`] to avoid having to update [`METADATA_VERSION`] every
223223
/// time ProcMacroData changes.
224224
pub(crate) is_proc_macro_crate: bool,
225+
/// Whether this header is a reference to a separate rmeta file.
226+
///
227+
/// This is used inside rlibs and dylibs when using `-Zsplit-metadata`.
228+
pub(crate) is_reference: bool,
225229
}
226230

227231
/// Serialized `.rmeta` data for a crate.

compiler/rustc_session/src/options.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1892,6 +1892,8 @@ written to standard error output)"),
18921892
by the linker"),
18931893
split_lto_unit: Option<bool> = (None, parse_opt_bool, [TRACKED],
18941894
"enable LTO unit splitting (default: no)"),
1895+
split_metadata: bool = (false, parse_bool, [TRACKED],
1896+
"split metadata out of libraries into .rmeta files"),
18951897
src_hash_algorithm: Option<SourceFileHashAlgorithm> = (None, parse_src_file_hash, [TRACKED],
18961898
"hash algorithm of source files in debug info (`md5`, `sha1`, or `sha256`)"),
18971899
#[rustc_lint_opt_deny_field_access("use `Session::stack_protector` instead of this field")]

0 commit comments

Comments
 (0)