Auto merge of #36524 - michaelwoerister:trans-inline-only-on-demand, r=nikomatsakis
trans: Only instantiate #[inline] functions in codegen units referencing them This PR changes how `#[inline]` functions are translated. Before, there was one "master instance" of the function with `external` linkage and a number of on-demand instances with `available_externally` linkage in each codegen unit that referenced the function. This had two downsides: * Public functions marked with `#[inline]` would be present in machine code of libraries unnecessarily (see #36280 for an example) * LLVM would crash on `i686-pc-windows-msvc` due to what I suspect to be a bug in LLVM's Win32 exception handling code, because it doesn't like `available_externally` there (#36309). This PR changes the behavior, so that there is no master instance and only on-demand instances with `internal` linkage. The downside of this is potential code-bloat if LLVM does not completely inline away the `internal` instances because then there'd be N instances of the function instead of 1. However, this can only become a problem when using more than one codegen unit per crate. cc @rust-lang/compiler
This commit is contained in:
commit
5cc6c6b1b7
7 changed files with 49 additions and 120 deletions
|
@ -1421,21 +1421,7 @@ fn internalize_symbols<'a, 'tcx>(sess: &Session,
|
|||
.iter()
|
||||
.cloned()
|
||||
.filter(|trans_item|{
|
||||
let def_id = match *trans_item {
|
||||
TransItem::DropGlue(..) => {
|
||||
return false
|
||||
},
|
||||
TransItem::Fn(ref instance) => {
|
||||
instance.def
|
||||
}
|
||||
TransItem::Static(node_id) => {
|
||||
tcx.map.local_def_id(node_id)
|
||||
}
|
||||
};
|
||||
|
||||
trans_item.explicit_linkage(tcx).is_some() ||
|
||||
attr::contains_extern_indicator(tcx.sess.diagnostic(),
|
||||
&tcx.get_attrs(def_id))
|
||||
trans_item.explicit_linkage(tcx).is_some()
|
||||
})
|
||||
.map(|trans_item| symbol_map.get_or_compute(scx, trans_item))
|
||||
.collect();
|
||||
|
@ -1591,7 +1577,11 @@ pub fn filter_reachable_ids(tcx: TyCtxt, reachable: NodeSet) -> NodeSet {
|
|||
node: hir::ImplItemKind::Method(..), .. }) => {
|
||||
let def_id = tcx.map.local_def_id(id);
|
||||
let generics = tcx.lookup_generics(def_id);
|
||||
generics.parent_types == 0 && generics.types.is_empty()
|
||||
let attributes = tcx.get_attrs(def_id);
|
||||
(generics.parent_types == 0 && generics.types.is_empty()) &&
|
||||
// Functions marked with #[inline] are only ever translated
|
||||
// with "internal" linkage and are never exported.
|
||||
!attr::requests_inline(&attributes[..])
|
||||
}
|
||||
|
||||
_ => false
|
||||
|
@ -1896,8 +1886,7 @@ fn collect_and_partition_translation_items<'a, 'tcx>(scx: &SharedCrateContext<'a
|
|||
partitioning::partition(scx,
|
||||
items.iter().cloned(),
|
||||
strategy,
|
||||
&inlining_map,
|
||||
scx.reachable())
|
||||
&inlining_map)
|
||||
});
|
||||
|
||||
assert!(scx.tcx().sess.opts.cg.codegen_units == codegen_units.len() ||
|
||||
|
|
|
@ -401,7 +401,7 @@ fn record_inlining_canditates<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
|
|||
callees: &[TransItem<'tcx>],
|
||||
inlining_map: &mut InliningMap<'tcx>) {
|
||||
let is_inlining_candidate = |trans_item: &TransItem<'tcx>| {
|
||||
trans_item.is_from_extern_crate() || trans_item.requests_inline(tcx)
|
||||
trans_item.needs_local_copy(tcx)
|
||||
};
|
||||
|
||||
let inlining_candidates = callees.into_iter()
|
||||
|
|
|
@ -133,7 +133,7 @@ use symbol_map::SymbolMap;
|
|||
use syntax::ast::NodeId;
|
||||
use syntax::parse::token::{self, InternedString};
|
||||
use trans_item::TransItem;
|
||||
use util::nodemap::{FnvHashMap, FnvHashSet, NodeSet};
|
||||
use util::nodemap::{FnvHashMap, FnvHashSet};
|
||||
|
||||
pub enum PartitioningStrategy {
|
||||
/// Generate one codegen unit per source-level module.
|
||||
|
@ -254,25 +254,17 @@ const FALLBACK_CODEGEN_UNIT: &'static str = "__rustc_fallback_codegen_unit";
|
|||
pub fn partition<'a, 'tcx, I>(scx: &SharedCrateContext<'a, 'tcx>,
|
||||
trans_items: I,
|
||||
strategy: PartitioningStrategy,
|
||||
inlining_map: &InliningMap<'tcx>,
|
||||
reachable: &NodeSet)
|
||||
inlining_map: &InliningMap<'tcx>)
|
||||
-> Vec<CodegenUnit<'tcx>>
|
||||
where I: Iterator<Item = TransItem<'tcx>>
|
||||
{
|
||||
let tcx = scx.tcx();
|
||||
|
||||
if let PartitioningStrategy::FixedUnitCount(1) = strategy {
|
||||
// If there is only a single codegen-unit, we can use a very simple
|
||||
// scheme and don't have to bother with doing much analysis.
|
||||
return vec![single_codegen_unit(tcx, trans_items, reachable)];
|
||||
}
|
||||
|
||||
// In the first step, we place all regular translation items into their
|
||||
// respective 'home' codegen unit. Regular translation items are all
|
||||
// functions and statics defined in the local crate.
|
||||
let mut initial_partitioning = place_root_translation_items(scx,
|
||||
trans_items,
|
||||
reachable);
|
||||
trans_items);
|
||||
|
||||
debug_dump(tcx, "INITIAL PARTITONING:", initial_partitioning.codegen_units.iter());
|
||||
|
||||
|
@ -310,8 +302,7 @@ struct PreInliningPartitioning<'tcx> {
|
|||
struct PostInliningPartitioning<'tcx>(Vec<CodegenUnit<'tcx>>);
|
||||
|
||||
fn place_root_translation_items<'a, 'tcx, I>(scx: &SharedCrateContext<'a, 'tcx>,
|
||||
trans_items: I,
|
||||
_reachable: &NodeSet)
|
||||
trans_items: I)
|
||||
-> PreInliningPartitioning<'tcx>
|
||||
where I: Iterator<Item = TransItem<'tcx>>
|
||||
{
|
||||
|
@ -320,7 +311,7 @@ fn place_root_translation_items<'a, 'tcx, I>(scx: &SharedCrateContext<'a, 'tcx>,
|
|||
let mut codegen_units = FnvHashMap();
|
||||
|
||||
for trans_item in trans_items {
|
||||
let is_root = !trans_item.is_instantiated_only_on_demand();
|
||||
let is_root = !trans_item.is_instantiated_only_on_demand(tcx);
|
||||
|
||||
if is_root {
|
||||
let characteristic_def_id = characteristic_def_id_of_trans_item(scx, trans_item);
|
||||
|
@ -350,6 +341,10 @@ fn place_root_translation_items<'a, 'tcx, I>(scx: &SharedCrateContext<'a, 'tcx>,
|
|||
// This is a non-generic functions, we always
|
||||
// make it visible externally on the chance that
|
||||
// it might be used in another codegen unit.
|
||||
// Later on base::internalize_symbols() will
|
||||
// assign "internal" linkage to those symbols
|
||||
// that are not referenced from other codegen
|
||||
// units (and are not publicly visible).
|
||||
llvm::ExternalLinkage
|
||||
} else {
|
||||
// In the current setup, generic functions cannot
|
||||
|
@ -454,7 +449,6 @@ fn place_inlined_translation_items<'tcx>(initial_partitioning: PreInliningPartit
|
|||
// reliably in that case.
|
||||
new_codegen_unit.items.insert(trans_item, llvm::InternalLinkage);
|
||||
} else {
|
||||
assert!(trans_item.is_instantiated_only_on_demand());
|
||||
// We can't be sure if this will also be instantiated
|
||||
// somewhere else, so we add an instance here with
|
||||
// InternalLinkage so we don't get any conflicts.
|
||||
|
@ -550,68 +544,6 @@ fn compute_codegen_unit_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
|
|||
return token::intern_and_get_ident(&mod_path[..]);
|
||||
}
|
||||
|
||||
fn single_codegen_unit<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
|
||||
trans_items: I,
|
||||
reachable: &NodeSet)
|
||||
-> CodegenUnit<'tcx>
|
||||
where I: Iterator<Item = TransItem<'tcx>>
|
||||
{
|
||||
let mut items = FnvHashMap();
|
||||
|
||||
for trans_item in trans_items {
|
||||
let linkage = trans_item.explicit_linkage(tcx).unwrap_or_else(|| {
|
||||
match trans_item {
|
||||
TransItem::Static(node_id) => {
|
||||
if reachable.contains(&node_id) {
|
||||
llvm::ExternalLinkage
|
||||
} else {
|
||||
llvm::PrivateLinkage
|
||||
}
|
||||
}
|
||||
TransItem::DropGlue(_) => {
|
||||
llvm::InternalLinkage
|
||||
}
|
||||
TransItem::Fn(instance) => {
|
||||
if trans_item.is_generic_fn() {
|
||||
// FIXME(mw): Assigning internal linkage to all
|
||||
// monomorphizations is potentially a waste of space
|
||||
// since monomorphizations could be shared between
|
||||
// crates. The main reason for making them internal is
|
||||
// a limitation in MingW's binutils that cannot deal
|
||||
// with COFF object that have more than 2^15 sections,
|
||||
// which is something that can happen for large programs
|
||||
// when every function gets put into its own COMDAT
|
||||
// section.
|
||||
llvm::InternalLinkage
|
||||
} else if trans_item.is_from_extern_crate() {
|
||||
// FIXME(mw): It would be nice if we could mark these as
|
||||
// `AvailableExternallyLinkage`, since they should have
|
||||
// been instantiated in the extern crate. But this
|
||||
// sometimes leads to crashes on Windows because LLVM
|
||||
// does not handle exception handling table instantiation
|
||||
// reliably in that case.
|
||||
llvm::InternalLinkage
|
||||
} else if reachable.contains(&tcx.map
|
||||
.as_local_node_id(instance.def)
|
||||
.unwrap()) {
|
||||
llvm::ExternalLinkage
|
||||
} else {
|
||||
// Functions that are not visible outside this crate can
|
||||
// be marked as internal.
|
||||
llvm::InternalLinkage
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
items.insert(trans_item, linkage);
|
||||
}
|
||||
|
||||
CodegenUnit::new(
|
||||
numbered_codegen_unit_name(&tcx.crate_name[..], 0),
|
||||
items)
|
||||
}
|
||||
|
||||
fn numbered_codegen_unit_name(crate_name: &str, index: usize) -> InternedString {
|
||||
token::intern_and_get_ident(&format!("{}{}{}",
|
||||
crate_name,
|
||||
|
|
|
@ -241,19 +241,6 @@ impl<'a, 'tcx> TransItem<'tcx> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn requests_inline(&self, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> bool {
|
||||
match *self {
|
||||
TransItem::Fn(ref instance) => {
|
||||
instance.substs.types().next().is_some() || {
|
||||
let attributes = tcx.get_attrs(instance.def);
|
||||
attr::requests_inline(&attributes[..])
|
||||
}
|
||||
}
|
||||
TransItem::DropGlue(..) => true,
|
||||
TransItem::Static(..) => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_from_extern_crate(&self) -> bool {
|
||||
match *self {
|
||||
TransItem::Fn(ref instance) => !instance.def.is_local(),
|
||||
|
@ -262,10 +249,18 @@ impl<'a, 'tcx> TransItem<'tcx> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn is_instantiated_only_on_demand(&self) -> bool {
|
||||
/// True if the translation item should only be translated to LLVM IR if
|
||||
/// it is referenced somewhere (like inline functions, for example).
|
||||
pub fn is_instantiated_only_on_demand(&self, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> bool {
|
||||
if self.explicit_linkage(tcx).is_some() {
|
||||
return false;
|
||||
}
|
||||
|
||||
match *self {
|
||||
TransItem::Fn(ref instance) => {
|
||||
!instance.def.is_local() || instance.substs.types().next().is_some()
|
||||
!instance.def.is_local() ||
|
||||
instance.substs.types().next().is_some() ||
|
||||
attr::requests_inline(&tcx.get_attrs(instance.def)[..])
|
||||
}
|
||||
TransItem::DropGlue(..) => true,
|
||||
TransItem::Static(..) => false,
|
||||
|
@ -282,6 +277,18 @@ impl<'a, 'tcx> TransItem<'tcx> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Returns true if there has to be a local copy of this TransItem in every
|
||||
/// codegen unit that references it (as with inlined functions, for example)
|
||||
pub fn needs_local_copy(&self, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> bool {
|
||||
// Currently everything that is instantiated only on demand is done so
|
||||
// with "internal" linkage, so we need a copy to be present in every
|
||||
// codegen unit.
|
||||
// This is coincidental: We could also instantiate something only if it
|
||||
// is referenced (e.g. a regular, private function) but place it in its
|
||||
// own codegen unit with "external" linkage.
|
||||
self.is_instantiated_only_on_demand(tcx)
|
||||
}
|
||||
|
||||
pub fn explicit_linkage(&self, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> Option<llvm::Linkage> {
|
||||
let def_id = match *self {
|
||||
TransItem::Fn(ref instance) => instance.def,
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
mod inline {
|
||||
|
||||
// Important: This function should show up in all codegen units where it is inlined
|
||||
//~ TRANS_ITEM fn local_inlining::inline[0]::inlined_function[0] @@ local_inlining-inline[External] local_inlining-user1[Available] local_inlining-user2[Available]
|
||||
//~ TRANS_ITEM fn local_inlining::inline[0]::inlined_function[0] @@ local_inlining-user1[Internal] local_inlining-user2[Internal]
|
||||
#[inline(always)]
|
||||
pub fn inlined_function()
|
||||
{
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
mod inline {
|
||||
|
||||
//~ TRANS_ITEM fn local_transitive_inlining::inline[0]::inlined_function[0] @@ local_transitive_inlining-inline[External] local_transitive_inlining-direct_user[Available] local_transitive_inlining-indirect_user[Available]
|
||||
//~ TRANS_ITEM fn local_transitive_inlining::inline[0]::inlined_function[0] @@ local_transitive_inlining-indirect_user[Internal]
|
||||
#[inline(always)]
|
||||
pub fn inlined_function()
|
||||
{
|
||||
|
@ -29,7 +29,7 @@ mod inline {
|
|||
mod direct_user {
|
||||
use super::inline;
|
||||
|
||||
//~ TRANS_ITEM fn local_transitive_inlining::direct_user[0]::foo[0] @@ local_transitive_inlining-direct_user[External] local_transitive_inlining-indirect_user[Available]
|
||||
//~ TRANS_ITEM fn local_transitive_inlining::direct_user[0]::foo[0] @@ local_transitive_inlining-indirect_user[Internal]
|
||||
#[inline(always)]
|
||||
pub fn foo() {
|
||||
inline::inlined_function();
|
||||
|
|
|
@ -1,13 +1,14 @@
|
|||
-include ../tools.mk
|
||||
|
||||
# Test that #[inline(always)] functions still get inlined across compilation
|
||||
# unit boundaries. Compilation should produce three IR files, with each one
|
||||
# containing a definition of the inlined function. Also, the non-#[inline]
|
||||
# function should be defined in only one compilation unit.
|
||||
# Test that #[inline] functions still get inlined across compilation unit
|
||||
# boundaries. Compilation should produce three IR files, but only the two
|
||||
# compilation units that have a usage of the #[inline] function should
|
||||
# contain a definition. Also, the non-#[inline] function should be defined
|
||||
# in only one compilation unit.
|
||||
|
||||
all:
|
||||
$(RUSTC) foo.rs --emit=llvm-ir -C codegen-units=3
|
||||
[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ i32\ .*inlined)" -eq "1" ]
|
||||
[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ available_externally\ i32\ .*inlined)" -eq "2" ]
|
||||
[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ i32\ .*inlined)" -eq "0" ]
|
||||
[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ internal\ i32\ .*inlined)" -eq "2" ]
|
||||
[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ i32\ .*normal)" -eq "1" ]
|
||||
[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c declare\ i32\ .*normal)" -eq "2" ]
|
||||
|
|
Loading…
Add table
Reference in a new issue