Skip to content

Preliminary work for incremental ThinLTO (CGU name edition) #53356

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 17, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 95 additions & 2 deletions src/librustc/mir/mono.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,16 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use hir::def_id::DefId;
use hir::def_id::{DefId, CrateNum};
use syntax::ast::NodeId;
use syntax::symbol::InternedString;
use syntax::symbol::{Symbol, InternedString};
use ty::{Instance, TyCtxt};
use util::nodemap::FxHashMap;
use rustc_data_structures::base_n;
use rustc_data_structures::stable_hasher::{HashStable, StableHasherResult,
StableHasher};
use ich::{Fingerprint, StableHashingContext, NodeIdHashingMode};
use std::fmt;
use std::hash::Hash;

#[derive(PartialEq, Eq, Clone, Copy, Debug, Hash)]
Expand Down Expand Up @@ -239,3 +240,95 @@ impl Stats {
self.fn_stats.extend(stats.fn_stats);
}
}

pub struct CodegenUnitNameBuilder<'a, 'gcx: 'tcx, 'tcx: 'a> {
tcx: TyCtxt<'a, 'gcx, 'tcx>,
cache: FxHashMap<CrateNum, String>,
}

impl<'a, 'gcx: 'tcx, 'tcx: 'a> CodegenUnitNameBuilder<'a, 'gcx, 'tcx> {

pub fn new(tcx: TyCtxt<'a, 'gcx, 'tcx>) -> Self {
CodegenUnitNameBuilder {
tcx,
cache: FxHashMap(),
}
}

/// CGU names should fulfill the following requirements:
/// - They should be able to act as a file name on any kind of file system
/// - They should not collide with other CGU names, even for different versions
/// of the same crate.
///
/// Consequently, we don't use special characters except for '.' and '-' and we
/// prefix each name with the crate-name and crate-disambiguator.
///
/// This function will build CGU names of the form:
///
/// ```
/// <crate-name>.<crate-disambiguator>(-<component>)*[.<special-suffix>]
/// ```
///
/// The '.' before `<special-suffix>` makes sure that names with a special
/// suffix can never collide with a name built out of regular Rust
/// identifiers (e.g. module paths).
pub fn build_cgu_name<I, C, S>(&mut self,
cnum: CrateNum,
components: I,
special_suffix: Option<S>)
-> InternedString
where I: IntoIterator<Item=C>,
C: fmt::Display,
S: fmt::Display,
{
let cgu_name = self.build_cgu_name_no_mangle(cnum,
components,
special_suffix);

if self.tcx.sess.opts.debugging_opts.human_readable_cgu_names {
cgu_name
} else {
let cgu_name = &cgu_name.as_str()[..];
Symbol::intern(&CodegenUnit::mangle_name(cgu_name)).as_interned_str()
}
}

/// Same as `CodegenUnit::build_cgu_name()` but will never mangle the
/// resulting name.
pub fn build_cgu_name_no_mangle<I, C, S>(&mut self,
cnum: CrateNum,
components: I,
special_suffix: Option<S>)
-> InternedString
where I: IntoIterator<Item=C>,
C: fmt::Display,
S: fmt::Display,
{
use std::fmt::Write;

let mut cgu_name = String::with_capacity(64);

// Start out with the crate name and disambiguator
let tcx = self.tcx;
let crate_prefix = self.cache.entry(cnum).or_insert_with(|| {
let crate_disambiguator = format!("{}", tcx.crate_disambiguator(cnum));
// Using a shortened disambiguator of about 40 bits
format!("{}.{}", tcx.crate_name(cnum), &crate_disambiguator[0 .. 8])
});

write!(cgu_name, "{}", crate_prefix).unwrap();

// Add the components
for component in components {
write!(cgu_name, "-{}", component).unwrap();
}

if let Some(special_suffix) = special_suffix {
// We add a dot in here so it cannot clash with anything in a regular
// Rust identifier
write!(cgu_name, ".{}", special_suffix).unwrap();
}

Symbol::intern(&cgu_name[..]).as_interned_str()
}
}
10 changes: 10 additions & 0 deletions src/librustc/session/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use util::nodemap::{FxHashMap, FxHashSet};
use util::common::{duration_to_secs_str, ErrorReported};
use util::common::ProfileQueriesMsg;

use rustc_data_structures::base_n;
use rustc_data_structures::sync::{self, Lrc, Lock, LockCell, OneThread, Once, RwLock};

use syntax::ast::NodeId;
Expand All @@ -48,6 +49,7 @@ use std;
use std::cell::{self, Cell, RefCell};
use std::collections::HashMap;
use std::env;
use std::fmt;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::time::Duration;
Expand Down Expand Up @@ -1221,6 +1223,14 @@ impl CrateDisambiguator {
}
}

impl fmt::Display for CrateDisambiguator {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
let (a, b) = self.0.as_value();
let as_u128 = a as u128 | ((b as u128) << 64);
f.write_str(&base_n::encode(as_u128, base_n::CASE_INSENSITIVE))
}
}

impl From<Fingerprint> for CrateDisambiguator {
fn from(fingerprint: Fingerprint) -> CrateDisambiguator {
CrateDisambiguator(fingerprint)
Expand Down
7 changes: 0 additions & 7 deletions src/librustc_codegen_llvm/back/link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,6 @@ use std::process::{Output, Stdio};
use std::str;
use syntax::attr;

/// The LLVM module name containing crate-metadata. This includes a `.` on
/// purpose, so it cannot clash with the name of a user-defined module.
pub const METADATA_MODULE_NAME: &'static str = "crate.metadata";

// same as for metadata above, but for allocator shim
pub const ALLOCATOR_MODULE_NAME: &'static str = "crate.allocator";

pub use rustc_codegen_utils::link::{find_crate_name, filename_for_input, default_output_for_target,
invalid_output_for_target, build_link_meta, out_filename,
check_file_is_writeable};
Expand Down
11 changes: 5 additions & 6 deletions src/librustc_codegen_llvm/back/lto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ fn fat_lto(cgcx: &CodegenContext,
let llvm = module.llvm().expect("can't lto pre-codegened modules");
(&llvm.llcx, llvm.llmod())
};
info!("using {:?} as a base module", module.llmod_id);
info!("using {:?} as a base module", module.name);

// The linking steps below may produce errors and diagnostics within LLVM
// which we'd like to handle and print, so set up our diagnostic handlers
Expand All @@ -257,7 +257,7 @@ fn fat_lto(cgcx: &CodegenContext,
for module in modules {
let llvm = module.llvm().expect("can't lto pre-codegened modules");
let buffer = ModuleBuffer::new(llvm.llmod());
let llmod_id = CString::new(&module.llmod_id[..]).unwrap();
let llmod_id = CString::new(&module.name[..]).unwrap();
serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
}

Expand Down Expand Up @@ -384,9 +384,9 @@ fn thin_lto(diag_handler: &Handler,
// the most expensive portion of this small bit of global
// analysis!
for (i, module) in modules.iter().enumerate() {
info!("local module: {} - {}", i, module.llmod_id);
info!("local module: {} - {}", i, module.name);
let llvm = module.llvm().expect("can't lto precodegened module");
let name = CString::new(module.llmod_id.clone()).unwrap();
let name = CString::new(module.name.clone()).unwrap();
let buffer = ThinBuffer::new(llvm.llmod());
thin_modules.push(llvm::ThinLTOModule {
identifier: name.as_ptr(),
Expand All @@ -395,7 +395,7 @@ fn thin_lto(diag_handler: &Handler,
});
thin_buffers.push(buffer);
module_names.push(name);
timeline.record(&module.llmod_id);
timeline.record(&module.name);
}

// FIXME: All upstream crates are deserialized internally in the
Expand Down Expand Up @@ -668,7 +668,6 @@ impl ThinModule {
llcx,
tm,
}),
llmod_id: self.name().to_string(),
name: self.name().to_string(),
kind: ModuleKind::Regular,
};
Expand Down
3 changes: 1 addition & 2 deletions src/librustc_codegen_llvm/back/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@ unsafe fn codegen(cgcx: &CodegenContext,

if config.emit_bc_compressed {
let dst = bc_out.with_extension(RLIB_BYTECODE_EXTENSION);
let data = bytecode::encode(&module.llmod_id, data);
let data = bytecode::encode(&module.name, data);
if let Err(e) = fs::write(&dst, data) {
diag_handler.err(&format!("failed to write bytecode: {}", e));
}
Expand Down Expand Up @@ -1338,7 +1338,6 @@ fn execute_work_item(cgcx: &CodegenContext,
assert_eq!(bytecode_compressed.is_some(), config.emit_bc_compressed);

Ok(WorkItemResult::Compiled(CompiledModule {
llmod_id: module.llmod_id.clone(),
name: module_name,
kind: ModuleKind::Regular,
pre_existing: true,
Expand Down
67 changes: 24 additions & 43 deletions src/librustc_codegen_llvm/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use metadata;
use rustc::hir::def_id::{CrateNum, DefId, LOCAL_CRATE};
use rustc::middle::lang_items::StartFnLangItem;
use rustc::middle::weak_lang_items;
use rustc::mir::mono::{Linkage, Visibility, Stats};
use rustc::mir::mono::{Linkage, Visibility, Stats, CodegenUnitNameBuilder};
use rustc::middle::cstore::{EncodedMetadata};
use rustc::ty::{self, Ty, TyCtxt};
use rustc::ty::layout::{self, Align, TyLayout, LayoutOf};
Expand Down Expand Up @@ -742,19 +742,23 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,

let crate_hash = tcx.crate_hash(LOCAL_CRATE);
let link_meta = link::build_link_meta(crate_hash);
let cgu_name_builder = &mut CodegenUnitNameBuilder::new(tcx);

// Codegen the metadata.
tcx.sess.profiler(|p| p.start_activity(ProfileCategory::Codegen));
let llmod_id = "metadata";
let metadata_llvm_module = ModuleLlvm::new(tcx.sess, llmod_id);

let metadata_cgu_name = cgu_name_builder.build_cgu_name(LOCAL_CRATE,
&["crate"],
Some("metadata")).as_str()
.to_string();
let metadata_llvm_module = ModuleLlvm::new(tcx.sess, &metadata_cgu_name);
let metadata = time(tcx.sess, "write metadata", || {
write_metadata(tcx, &metadata_llvm_module, &link_meta)
});
tcx.sess.profiler(|p| p.end_activity(ProfileCategory::Codegen));

let metadata_module = ModuleCodegen {
name: link::METADATA_MODULE_NAME.to_string(),
llmod_id: llmod_id.to_string(),
name: metadata_cgu_name,
source: ModuleSource::Codegened(metadata_llvm_module),
kind: ModuleKind::Metadata,
};
Expand Down Expand Up @@ -833,20 +837,22 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
let allocator_module = if any_dynamic_crate {
None
} else if let Some(kind) = *tcx.sess.allocator_kind.get() {
unsafe {
let llmod_id = "allocator";
let modules = ModuleLlvm::new(tcx.sess, llmod_id);
time(tcx.sess, "write allocator module", || {
let llmod_id = cgu_name_builder.build_cgu_name(LOCAL_CRATE,
&["crate"],
Some("allocator")).as_str()
.to_string();
let modules = ModuleLlvm::new(tcx.sess, &llmod_id);
time(tcx.sess, "write allocator module", || {
unsafe {
allocator::codegen(tcx, &modules, kind)
});
}
});

Some(ModuleCodegen {
name: link::ALLOCATOR_MODULE_NAME.to_string(),
llmod_id: llmod_id.to_string(),
source: ModuleSource::Codegened(modules),
kind: ModuleKind::Allocator,
})
}
Some(ModuleCodegen {
name: llmod_id,
source: ModuleSource::Codegened(modules),
kind: ModuleKind::Allocator,
})
} else {
None
};
Expand Down Expand Up @@ -889,21 +895,10 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
// succeed it means that none of the dependencies has changed
// and we can safely re-use.
if let Some(dep_node_index) = tcx.dep_graph.try_mark_green(tcx, dep_node) {
// Append ".rs" to LLVM module identifier.
//
// LLVM code generator emits a ".file filename" directive
// for ELF backends. Value of the "filename" is set as the
// LLVM module identifier. Due to a LLVM MC bug[1], LLVM
// crashes if the module identifier is same as other symbols
// such as a function name in the module.
// 1. http://llvm.org/bugs/show_bug.cgi?id=11479
let llmod_id = format!("{}.rs", cgu.name());

let module = ModuleCodegen {
name: cgu.name().to_string(),
source: ModuleSource::Preexisting(buf),
kind: ModuleKind::Regular,
llmod_id,
};
tcx.dep_graph.mark_loaded_from_cache(dep_node_index, true);
write::submit_codegened_module_to_llvm(tcx, module, 0);
Expand Down Expand Up @@ -1212,21 +1207,8 @@ fn compile_codegen_unit<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
{
let cgu_name = cgu.name().to_string();

// Append ".rs" to LLVM module identifier.
//
// LLVM code generator emits a ".file filename" directive
// for ELF backends. Value of the "filename" is set as the
// LLVM module identifier. Due to a LLVM MC bug[1], LLVM
// crashes if the module identifier is same as other symbols
// such as a function name in the module.
// 1. http://llvm.org/bugs/show_bug.cgi?id=11479
let llmod_id = format!("{}-{}.rs",
cgu.name(),
tcx.crate_disambiguator(LOCAL_CRATE)
.to_fingerprint().to_hex());

// Instantiate monomorphizations without filling out definitions yet...
let llvm_module = ModuleLlvm::new(tcx.sess, &llmod_id);
let llvm_module = ModuleLlvm::new(tcx.sess, &cgu_name);
let stats = {
let cx = CodegenCx::new(tcx, cgu, &llvm_module);
let mono_items = cx.codegen_unit
Expand Down Expand Up @@ -1282,7 +1264,6 @@ fn compile_codegen_unit<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
name: cgu_name,
source: ModuleSource::Codegened(llvm_module),
kind: ModuleKind::Regular,
llmod_id,
})
}
}
Expand Down
4 changes: 1 addition & 3 deletions src/librustc_codegen_llvm/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,8 @@ struct ModuleCodegen {
/// unique amongst **all** crates. Therefore, it should contain
/// something unique to this crate (e.g., a module path) as well
/// as the crate name and disambiguator.
/// We currently generate these names via CodegenUnit::build_cgu_name().
name: String,
llmod_id: String,
source: ModuleSource,
kind: ModuleKind,
}
Expand Down Expand Up @@ -317,7 +317,6 @@ impl ModuleCodegen {
};

CompiledModule {
llmod_id: self.llmod_id,
name: self.name.clone(),
kind: self.kind,
pre_existing,
Expand All @@ -331,7 +330,6 @@ impl ModuleCodegen {
#[derive(Debug)]
struct CompiledModule {
name: String,
llmod_id: String,
kind: ModuleKind,
pre_existing: bool,
object: Option<PathBuf>,
Expand Down
Loading