Auto merge of #132566 - saethlin:querify-mir-collection, r=cjgillot

Querify MonoItem collection

Factored out of https://github.com/rust-lang/rust/pull/131650. These changes are required for post-mono MIR opts, because the previous implementation would load the MIR for every Instance that we traverse (as well as invoke queries on it). The cost of that would grow massively with post-mono MIR opts because we'll need to load new MIR for every Instance, instead of re-using the `optimized_mir` for every Instance with the same DefId.

So the approach here is to add two new queries, `items_of_instance` and `size_estimate`, which contain the specific information about an Instance's MIR that MirUsedCollector and CGU partitioning need, respectively. Caching these significantly increases the size of the query cache, but that's justified by our improved incrementality (I'm sure walking all the MIR for a huge crate scales quite poorly).

This also changes `MonoItems` into a type that will retain the traversal order (otherwise we perturb a bunch of diagnostics), and will also eliminate duplicate findings. Eliminating duplicates removes about a quarter of the query cache size growth.

The perf improvements in this PR are inflated because rustc-perf uses `-Zincremental-verify-ich`, which makes loading MIR a lot slower because MIR contains a lot of Spans and computing the stable hash of a Span is slow. And the primary goal of this PR is to load less MIR. Some squinting at `collector profile_local perf-record +stage1` runs suggests the magnitude of the improvements in this PR would be decreased by between a third and a half if that flag weren't being used. Though this effect may apply to the regressions too since most are incr-full and this change also causes such builds to encode more Spans.
This commit is contained in:
bors 2024-11-17 06:39:47 +00:00
commit ee4a56e353
9 changed files with 165 additions and 67 deletions

View file

@ -46,7 +46,7 @@ pub enum InstantiationMode {
LocalCopy,
}
#[derive(PartialEq, Eq, Clone, Copy, Debug, Hash, HashStable)]
#[derive(PartialEq, Eq, Clone, Copy, Debug, Hash, HashStable, TyEncodable, TyDecodable)]
pub enum MonoItem<'tcx> {
Fn(Instance<'tcx>),
Static(DefId),
@ -66,20 +66,7 @@ impl<'tcx> MonoItem<'tcx> {
// change NON_INCR_MIN_CGU_SIZE as well.
pub fn size_estimate(&self, tcx: TyCtxt<'tcx>) -> usize {
match *self {
MonoItem::Fn(instance) => {
match instance.def {
// "Normal" functions size estimate: the number of
// statements, plus one for the terminator.
InstanceKind::Item(..)
| InstanceKind::DropGlue(..)
| InstanceKind::AsyncDropGlueCtorShim(..) => {
let mir = tcx.instance_mir(instance.def);
mir.basic_blocks.iter().map(|bb| bb.statements.len() + 1).sum()
}
// Other compiler-generated shims size estimate: 1
_ => 1,
}
}
MonoItem::Fn(instance) => tcx.size_estimate(instance),
// Conservatively estimate the size of a static declaration or
// assembly item to be 1.
MonoItem::Static(_) | MonoItem::GlobalAsm(_) => 1,
@ -556,3 +543,21 @@ impl<'tcx> CodegenUnitNameBuilder<'tcx> {
Symbol::intern(&cgu_name)
}
}
/// See module-level docs of `rustc_monomorphize::collector` on some context for "mentioned" items.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, HashStable)]
pub enum CollectionMode {
/// Collect items that are used, i.e., actually needed for codegen.
///
/// Which items are used can depend on optimization levels, as MIR optimizations can remove
/// uses.
UsedItems,
/// Collect items that are mentioned. The goal of this mode is that it is independent of
/// optimizations: the set of "mentioned" items is computed before optimizations are run.
///
/// The exact contents of this set are *not* a stable guarantee. (For instance, it is currently
/// computed after drop-elaboration. If we ever do some optimizations even in debug builds, we
/// might decide to run them before computing mentioned items.) The key property of this set is
/// that it is optimization-independent.
MentionedItems,
}

View file

@ -216,6 +216,10 @@ impl<T0, T1> EraseType for (&'_ T0, &'_ [T1]) {
type Result = [u8; size_of::<(&'static (), &'static [()])>()];
}
impl<T0, T1> EraseType for (&'_ [T0], &'_ [T1]) {
type Result = [u8; size_of::<(&'static [()], &'static [()])>()];
}
impl<T0> EraseType for (&'_ T0, Result<(), ErrorGuaranteed>) {
type Result = [u8; size_of::<(&'static (), Result<(), ErrorGuaranteed>)>()];
}

View file

@ -7,6 +7,7 @@ use rustc_span::symbol::{Ident, Symbol};
use rustc_span::{DUMMY_SP, Span};
use crate::infer::canonical::CanonicalQueryInput;
use crate::mir::mono::CollectionMode;
use crate::ty::fast_reject::SimplifiedType;
use crate::ty::layout::{TyAndLayout, ValidityRequirement};
use crate::ty::{self, GenericArg, GenericArgsRef, Ty, TyCtxt};
@ -590,3 +591,11 @@ impl<'tcx> Key for (ValidityRequirement, ty::ParamEnvAnd<'tcx, Ty<'tcx>>) {
}
}
}
impl<'tcx> Key for (ty::Instance<'tcx>, CollectionMode) {
type Cache<V> = DefaultCache<Self, V>;
fn default_span(&self, tcx: TyCtxt<'_>) -> Span {
self.0.default_span(tcx)
}
}

View file

@ -40,6 +40,7 @@ use rustc_session::cstore::{
};
use rustc_session::lint::LintExpectationId;
use rustc_span::def_id::LOCAL_CRATE;
use rustc_span::source_map::Spanned;
use rustc_span::symbol::Symbol;
use rustc_span::{DUMMY_SP, Span};
use rustc_target::spec::PanicStrategy;
@ -59,7 +60,7 @@ use crate::mir::interpret::{
EvalStaticInitializerRawResult, EvalToAllocationRawResult, EvalToConstValueResult,
EvalToValTreeResult, GlobalId, LitToConstError, LitToConstInput,
};
use crate::mir::mono::CodegenUnit;
use crate::mir::mono::{CodegenUnit, CollectionMode, MonoItem};
use crate::query::erase::{Erase, erase, restore};
use crate::query::plumbing::{
CyclePlaceholder, DynamicQuery, query_ensure, query_ensure_error_guaranteed, query_get_at,
@ -2339,6 +2340,16 @@ rustc_queries! {
arena_cache
desc { "functions to skip for move-size check" }
}
query items_of_instance(key: (ty::Instance<'tcx>, CollectionMode)) -> (&'tcx [Spanned<MonoItem<'tcx>>], &'tcx [Spanned<MonoItem<'tcx>>]) {
desc { "collecting items used by `{}`", key.0 }
cache_on_disk_if { true }
}
query size_estimate(key: ty::Instance<'tcx>) -> usize {
desc { "estimating codegen size of `{}`", key }
cache_on_disk_if { true }
}
}
rustc_query_append! { define_callbacks! }

View file

@ -12,6 +12,7 @@ use rustc_index::{Idx, IndexVec};
use rustc_macros::{Decodable, Encodable};
use rustc_middle::dep_graph::{DepNodeIndex, SerializedDepNodeIndex};
use rustc_middle::mir::interpret::{AllocDecodingSession, AllocDecodingState};
use rustc_middle::mir::mono::MonoItem;
use rustc_middle::mir::{self, interpret};
use rustc_middle::ty::codec::{RefDecodable, TyDecoder, TyEncoder};
use rustc_middle::ty::{self, Ty, TyCtxt};
@ -22,7 +23,7 @@ use rustc_session::Session;
use rustc_span::hygiene::{
ExpnId, HygieneDecodeContext, HygieneEncodeContext, SyntaxContext, SyntaxContextData,
};
use rustc_span::source_map::SourceMap;
use rustc_span::source_map::{SourceMap, Spanned};
use rustc_span::{
BytePos, CachingSourceMapView, ExpnData, ExpnHash, Pos, RelativeBytePos, SourceFile, Span,
SpanDecoder, SpanEncoder, StableSourceFileId, Symbol,
@ -773,6 +774,13 @@ impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for &'tcx [rustc_ast::InlineAsm
}
}
impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for &'tcx [Spanned<MonoItem<'tcx>>] {
#[inline]
fn decode(d: &mut CacheDecoder<'a, 'tcx>) -> Self {
RefDecodable::decode(d)
}
}
impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>>
for &'tcx crate::traits::specialization_graph::Graph
{

View file

@ -13,9 +13,11 @@ use std::marker::DiscriminantKind;
use rustc_abi::{FieldIdx, VariantIdx};
use rustc_data_structures::fx::FxHashMap;
use rustc_hir::def_id::LocalDefId;
use rustc_middle::mir::mono::MonoItem;
use rustc_middle::ty::TyCtxt;
use rustc_serialize::{Decodable, Encodable};
use rustc_span::Span;
use rustc_span::source_map::Spanned;
pub use rustc_type_ir::{TyDecoder, TyEncoder};
use crate::arena::ArenaAllocatable;
@ -397,6 +399,15 @@ impl<'tcx, D: TyDecoder<I = TyCtxt<'tcx>>> RefDecodable<'tcx, D>
}
}
impl<'tcx, D: TyDecoder<I = TyCtxt<'tcx>>> RefDecodable<'tcx, D> for [Spanned<MonoItem<'tcx>>] {
fn decode(decoder: &mut D) -> &'tcx Self {
decoder
.interner()
.arena
.alloc_from_iter((0..decoder.read_usize()).map(|_| Decodable::decode(decoder)))
}
}
impl<'tcx, D: TyDecoder<I = TyCtxt<'tcx>>> RefDecodable<'tcx, D>
for ty::List<ty::BoundVariableKind>
{

View file

@ -207,6 +207,7 @@
use std::path::PathBuf;
use rustc_data_structures::fx::FxIndexMap;
use rustc_data_structures::sync::{LRef, MTLock, par_for_each_in};
use rustc_data_structures::unord::{UnordMap, UnordSet};
use rustc_hir as hir;
@ -215,7 +216,7 @@ use rustc_hir::def_id::{DefId, DefIdMap, LocalDefId};
use rustc_hir::lang_items::LangItem;
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
use rustc_middle::mir::interpret::{AllocId, ErrorHandled, GlobalAlloc, Scalar};
use rustc_middle::mir::mono::{InstantiationMode, MonoItem};
use rustc_middle::mir::mono::{CollectionMode, InstantiationMode, MonoItem};
use rustc_middle::mir::visit::Visitor as MirVisitor;
use rustc_middle::mir::{self, Location, MentionedItem, traversal};
use rustc_middle::query::TyCtxtAt;
@ -243,16 +244,6 @@ pub(crate) enum MonoItemCollectionStrategy {
Lazy,
}
pub(crate) struct UsageMap<'tcx> {
// Maps every mono item to the mono items used by it.
used_map: UnordMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>,
// Maps every mono item to the mono items that use it.
user_map: UnordMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>,
}
type MonoItems<'tcx> = Vec<Spanned<MonoItem<'tcx>>>;
/// The state that is shared across the concurrent threads that are doing collection.
struct SharedState<'tcx> {
/// Items that have been or are currently being recursively collected.
@ -264,22 +255,12 @@ struct SharedState<'tcx> {
usage_map: MTLock<UsageMap<'tcx>>,
}
/// See module-level docs on some contect for "mentioned" items.
#[derive(Copy, Clone, Debug, PartialEq)]
enum CollectionMode {
/// Collect items that are used, i.e., actually needed for codegen.
///
/// Which items are used can depend on optimization levels, as MIR optimizations can remove
/// uses.
UsedItems,
/// Collect items that are mentioned. The goal of this mode is that it is independent of
/// optimizations: the set of "mentioned" items is computed before optimizations are run.
///
/// The exact contents of this set are *not* a stable guarantee. (For instance, it is currently
/// computed after drop-elaboration. If we ever do some optimizations even in debug builds, we
/// might decide to run them before computing mentioned items.) The key property of this set is
/// that it is optimization-independent.
MentionedItems,
pub(crate) struct UsageMap<'tcx> {
// Maps every mono item to the mono items used by it.
used_map: UnordMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>,
// Maps every mono item to the mono items that use it.
user_map: UnordMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>,
}
impl<'tcx> UsageMap<'tcx> {
@ -287,19 +268,15 @@ impl<'tcx> UsageMap<'tcx> {
UsageMap { used_map: Default::default(), user_map: Default::default() }
}
fn record_used<'a>(
&mut self,
user_item: MonoItem<'tcx>,
used_items: &'a [Spanned<MonoItem<'tcx>>],
) where
fn record_used<'a>(&mut self, user_item: MonoItem<'tcx>, used_items: &'a MonoItems<'tcx>)
where
'tcx: 'a,
{
let used_items: Vec<_> = used_items.iter().map(|item| item.node).collect();
for &used_item in used_items.iter() {
for used_item in used_items.items() {
self.user_map.entry(used_item).or_default().push(user_item);
}
assert!(self.used_map.insert(user_item, used_items).is_none());
assert!(self.used_map.insert(user_item, used_items.items().collect()).is_none());
}
pub(crate) fn get_user_items(&self, item: MonoItem<'tcx>) -> &[MonoItem<'tcx>] {
@ -325,6 +302,52 @@ impl<'tcx> UsageMap<'tcx> {
}
}
struct MonoItems<'tcx> {
// We want a set of MonoItem + Span where trying to re-insert a MonoItem with a different Span
// is ignored. Map does that, but it looks odd.
items: FxIndexMap<MonoItem<'tcx>, Span>,
}
impl<'tcx> MonoItems<'tcx> {
fn new() -> Self {
Self { items: FxIndexMap::default() }
}
fn is_empty(&self) -> bool {
self.items.is_empty()
}
fn push(&mut self, item: Spanned<MonoItem<'tcx>>) {
// Insert only if the entry does not exist. A normal insert would stomp the first span that
// got inserted.
self.items.entry(item.node).or_insert(item.span);
}
fn items(&self) -> impl Iterator<Item = MonoItem<'tcx>> + '_ {
self.items.keys().cloned()
}
}
impl<'tcx> IntoIterator for MonoItems<'tcx> {
type Item = Spanned<MonoItem<'tcx>>;
type IntoIter = impl Iterator<Item = Spanned<MonoItem<'tcx>>>;
fn into_iter(self) -> Self::IntoIter {
self.items.into_iter().map(|(item, span)| respan(span, item))
}
}
impl<'tcx> Extend<Spanned<MonoItem<'tcx>>> for MonoItems<'tcx> {
fn extend<I>(&mut self, iter: I)
where
I: IntoIterator<Item = Spanned<MonoItem<'tcx>>>,
{
for item in iter {
self.push(item)
}
}
}
/// Collect all monomorphized items reachable from `starting_point`, and emit a note diagnostic if a
/// post-monomorphization error is encountered during a collection step.
///
@ -443,13 +466,9 @@ fn collect_items_rec<'tcx>(
));
rustc_data_structures::stack::ensure_sufficient_stack(|| {
collect_items_of_instance(
tcx,
instance,
&mut used_items,
&mut mentioned_items,
mode,
)
let (used, mentioned) = tcx.items_of_instance((instance, mode));
used_items.extend(used.into_iter().copied());
mentioned_items.extend(mentioned.into_iter().copied());
});
}
MonoItem::GlobalAsm(item_id) => {
@ -1171,14 +1190,12 @@ fn collect_alloc<'tcx>(tcx: TyCtxt<'tcx>, alloc_id: AllocId, output: &mut MonoIt
/// Scans the MIR in order to find function calls, closures, and drop-glue.
///
/// Anything that's found is added to `output`. Furthermore the "mentioned items" of the MIR are returned.
#[instrument(skip(tcx, used_items, mentioned_items), level = "debug")]
#[instrument(skip(tcx), level = "debug")]
fn collect_items_of_instance<'tcx>(
tcx: TyCtxt<'tcx>,
instance: Instance<'tcx>,
used_items: &mut MonoItems<'tcx>,
mentioned_items: &mut MonoItems<'tcx>,
mode: CollectionMode,
) {
) -> (MonoItems<'tcx>, MonoItems<'tcx>) {
// This item is getting monomorphized, do mono-time checks.
tcx.ensure().check_mono_item(instance);
@ -1193,11 +1210,13 @@ fn collect_items_of_instance<'tcx>(
// mentioned item. So instead we collect all pre-monomorphized `MentionedItem` that were already
// added to `used_items` in a hash set, which can efficiently query in the
// `body.mentioned_items` loop below without even having to monomorphize the item.
let mut used_items = MonoItems::new();
let mut mentioned_items = MonoItems::new();
let mut used_mentioned_items = Default::default();
let mut collector = MirUsedCollector {
tcx,
body,
used_items,
used_items: &mut used_items,
used_mentioned_items: &mut used_mentioned_items,
instance,
};
@ -1212,7 +1231,7 @@ fn collect_items_of_instance<'tcx>(
// them errors.
for const_op in body.required_consts() {
if let Some(val) = collector.eval_constant(const_op) {
collect_const_value(tcx, val, mentioned_items);
collect_const_value(tcx, val, &mut mentioned_items);
}
}
@ -1221,9 +1240,23 @@ fn collect_items_of_instance<'tcx>(
for item in body.mentioned_items() {
if !collector.used_mentioned_items.contains(&item.node) {
let item_mono = collector.monomorphize(item.node);
visit_mentioned_item(tcx, &item_mono, item.span, mentioned_items);
visit_mentioned_item(tcx, &item_mono, item.span, &mut mentioned_items);
}
}
(used_items, mentioned_items)
}
fn items_of_instance<'tcx>(
tcx: TyCtxt<'tcx>,
(instance, mode): (Instance<'tcx>, CollectionMode),
) -> (&'tcx [Spanned<MonoItem<'tcx>>], &'tcx [Spanned<MonoItem<'tcx>>]) {
let (used_items, mentioned_items) = collect_items_of_instance(tcx, instance, mode);
let used_items = tcx.arena.alloc_from_iter(used_items);
let mentioned_items = tcx.arena.alloc_from_iter(mentioned_items);
(used_items, mentioned_items)
}
/// `item` must be already monomorphized.
@ -1304,7 +1337,7 @@ fn collect_const_value<'tcx>(
#[instrument(skip(tcx, mode), level = "debug")]
fn collect_roots(tcx: TyCtxt<'_>, mode: MonoItemCollectionStrategy) -> Vec<MonoItem<'_>> {
debug!("collecting roots");
let mut roots = Vec::new();
let mut roots = MonoItems::new();
{
let entry_fn = tcx.entry_fn(());
@ -1596,4 +1629,5 @@ pub(crate) fn collect_crate_mono_items<'tcx>(
pub(crate) fn provide(providers: &mut Providers) {
providers.hooks.should_codegen_locally = should_codegen_locally;
providers.items_of_instance = items_of_instance;
}

View file

@ -2,6 +2,7 @@
#![feature(array_windows)]
#![feature(file_buffered)]
#![feature(if_let_guard)]
#![feature(impl_trait_in_assoc_type)]
#![feature(let_chains)]
#![warn(unreachable_pub)]
// tidy-alphabetical-end

View file

@ -1319,5 +1319,20 @@ pub(crate) fn provide(providers: &mut Providers) {
.unwrap_or_else(|| panic!("failed to find cgu with name {name:?}"))
};
providers.size_estimate = |tcx, instance| {
match instance.def {
// "Normal" functions size estimate: the number of
// statements, plus one for the terminator.
InstanceKind::Item(..)
| InstanceKind::DropGlue(..)
| InstanceKind::AsyncDropGlueCtorShim(..) => {
let mir = tcx.instance_mir(instance.def);
mir.basic_blocks.iter().map(|bb| bb.statements.len() + 1).sum()
}
// Other compiler-generated shims size estimate: 1
_ => 1,
}
};
collector::provide(providers);
}