Initial support for recording query keys in self-profiling data.

This commit is contained in:
Michael Woerister 2019-12-17 14:44:07 +01:00
parent 996511a456
commit b8ead417a6
9 changed files with 354 additions and 52 deletions

View file

@ -1995,9 +1995,9 @@ dependencies = [
[[package]]
name = "measureme"
version = "0.6.0"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36dcc09c1a633097649f7d48bde3d8a61d2a43c01ce75525e31fbbc82c0fccf4"
checksum = "ebefdcb02b2ddeee50178a218aeaf6d752d0777cd07914542f202cb7440e6e38"
dependencies = [
"byteorder",
"memmap",

View file

@ -36,6 +36,6 @@ parking_lot = "0.9"
byteorder = { version = "1.3" }
chalk-engine = { version = "0.9.0", default-features=false }
smallvec = { version = "1.0", features = ["union", "may_dangle"] }
measureme = "0.6.0"
measureme = "0.7.0"
rustc_error_codes = { path = "../librustc_error_codes" }
rustc_session = { path = "../librustc_session" }

View file

@ -30,7 +30,7 @@ pub struct DepGraph {
/// This field is used for assigning DepNodeIndices when running in
/// non-incremental mode. Even in non-incremental mode we make sure that
/// each task as a `DepNodeIndex` that uniquely identifies it. This unique
/// each task has a `DepNodeIndex` that uniquely identifies it. This unique
/// ID is used for self-profiling.
virtual_dep_node_index: Lrc<AtomicU32>,
}

View file

@ -81,6 +81,9 @@ pub(crate) use self::config::QueryDescription;
mod on_disk_cache;
pub use self::on_disk_cache::OnDiskCache;
mod profiling_support;
pub use self::profiling_support::{IntoSelfProfilingString, QueryKeyStringBuilder};
// Each of these queries corresponds to a function pointer field in the
// `Providers` struct for requesting a value of that type, and a method
// on `tcx: TyCtxt` (and `tcx.at(span)`) for doing that request in a way

View file

@ -701,42 +701,6 @@ macro_rules! define_queries_inner {
}
}
/// All self-profiling events generated by the query engine use a
/// virtual `StringId`s for their `event_id`. This method makes all
/// those virtual `StringId`s point to actual strings.
///
/// If we are recording only summary data, the ids will point to
/// just the query names. If we are recording query keys too, we
/// allocate the corresponding strings here. (The latter is not yet
/// implemented.)
pub fn allocate_self_profile_query_strings(
&self,
profiler: &rustc_data_structures::profiling::SelfProfiler
) {
// Walk the entire query cache and allocate the appropriate
// string representation. Each cache entry is uniquely
// identified by its dep_node_index.
$({
let query_name_string_id =
profiler.get_or_alloc_cached_string(stringify!($name));
let result_cache = self.$name.lock_shards();
for shard in result_cache.iter() {
let query_invocation_ids = shard
.results
.values()
.map(|v| v.index)
.map(|dep_node_index| dep_node_index.into());
profiler.bulk_map_query_invocation_id_to_single_string(
query_invocation_ids,
query_name_string_id
);
}
})*
}
#[cfg(parallel_compiler)]
pub fn collect_active_jobs(&self) -> Vec<Lrc<QueryJob<$tcx>>> {
let mut jobs = Vec::new();
@ -1040,6 +1004,35 @@ macro_rules! define_queries_inner {
pub fn $name(self, key: $K) -> $V {
self.at(DUMMY_SP).$name(key)
})*
/// All self-profiling events generated by the query engine use
/// virtual `StringId`s for their `event_id`. This method makes all
/// those virtual `StringId`s point to actual strings.
///
/// If we are recording only summary data, the ids will point to
/// just the query names. If we are recording query keys too, we
/// allocate the corresponding strings here.
pub fn alloc_self_profile_query_strings(self) {
use crate::ty::query::profiling_support::{
alloc_self_profile_query_strings_for_query_cache,
QueryKeyStringCache,
};
if !self.prof.enabled() {
return;
}
let mut string_cache = QueryKeyStringCache::new();
$({
alloc_self_profile_query_strings_for_query_cache(
self,
stringify!($name),
&self.queries.$name,
&mut string_cache,
);
})*
}
}
impl TyCtxtAt<$tcx> {

View file

@ -0,0 +1,276 @@
use crate::hir::def_id::{CRATE_DEF_INDEX, CrateNum, DefId, DefIndex, LOCAL_CRATE};
use crate::hir::map::definitions::DefPathData;
use crate::ty::context::TyCtxt;
use crate::ty::query::config::QueryConfig;
use crate::ty::query::plumbing::QueryCache;
use measureme::{StringId, StringComponent};
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::profiling::SelfProfiler;
use rustc_data_structures::sharded::Sharded;
use std::fmt::Debug;
use std::io::Write;
pub struct QueryKeyStringCache {
def_id_cache: FxHashMap<DefId, StringId>,
}
impl QueryKeyStringCache {
pub fn new() -> QueryKeyStringCache {
QueryKeyStringCache {
def_id_cache: Default::default(),
}
}
}
pub struct QueryKeyStringBuilder<'p, 'c, 'tcx> {
profiler: &'p SelfProfiler,
tcx: TyCtxt<'tcx>,
string_cache: &'c mut QueryKeyStringCache,
}
impl<'p, 'c, 'tcx> QueryKeyStringBuilder<'p, 'c, 'tcx> {
pub fn new(
profiler: &'p SelfProfiler,
tcx: TyCtxt<'tcx>,
string_cache: &'c mut QueryKeyStringCache,
) -> QueryKeyStringBuilder<'p, 'c, 'tcx> {
QueryKeyStringBuilder {
profiler,
tcx,
string_cache,
}
}
// The current implementation is rather crude. In the future it might be a
// good idea to base this on `ty::print` in order to get nicer and more
// efficient query keys.
fn def_id_to_string_id(&mut self, def_id: DefId) -> StringId {
if let Some(&string_id) = self.string_cache.def_id_cache.get(&def_id) {
return string_id;
}
let def_key = self.tcx.def_key(def_id);
let (parent_string_id, start_index) = match def_key.parent {
Some(parent_index) => {
let parent_def_id = DefId {
index: parent_index,
krate: def_id.krate,
};
(self.def_id_to_string_id(parent_def_id), 0)
}
None => {
(StringId::INVALID, 2)
}
};
let dis_buffer = &mut [0u8; 16];
let name;
let dis;
let end_index;
match def_key.disambiguated_data.data {
DefPathData::CrateRoot => {
name = self.tcx.original_crate_name(def_id.krate).as_str();
dis = "";
end_index = 3;
}
other => {
name = other.as_symbol().as_str();
if def_key.disambiguated_data.disambiguator == 0 {
dis = "";
end_index = 3;
} else {
write!(&mut dis_buffer[..],
"[{}]",
def_key.disambiguated_data.disambiguator
).unwrap();
let end_of_dis = dis_buffer.iter().position(|&c| c == b']').unwrap();
dis = std::str::from_utf8(&dis_buffer[.. end_of_dis + 1]).unwrap();
end_index = 4;
}
}
}
let components = [
StringComponent::Ref(parent_string_id),
StringComponent::Value("::"),
StringComponent::Value(&name[..]),
StringComponent::Value(dis)
];
let string_id = self.profiler.alloc_string(
&components[start_index .. end_index]
);
self.string_cache.def_id_cache.insert(def_id, string_id);
string_id
}
}
pub trait IntoSelfProfilingString {
fn to_self_profile_string(
&self,
builder: &mut QueryKeyStringBuilder<'_, '_, '_>
) -> StringId;
}
// The default implementation of `IntoSelfProfilingString` just uses `Debug`
// which is slow and causes lots of duplication of string data.
// The specialized impls below take care of making the `DefId` case more
// efficient.
impl<T: Debug> IntoSelfProfilingString for T {
default fn to_self_profile_string(
&self,
builder: &mut QueryKeyStringBuilder<'_, '_, '_>
) -> StringId {
let s = format!("{:?}", self);
builder.profiler.alloc_string(&s[..])
}
}
impl IntoSelfProfilingString for DefId {
fn to_self_profile_string(
&self,
builder: &mut QueryKeyStringBuilder<'_, '_, '_>
) -> StringId {
builder.def_id_to_string_id(*self)
}
}
impl IntoSelfProfilingString for CrateNum {
fn to_self_profile_string(
&self,
builder: &mut QueryKeyStringBuilder<'_, '_, '_>
) -> StringId {
builder.def_id_to_string_id(DefId {
krate: *self,
index: CRATE_DEF_INDEX,
})
}
}
impl IntoSelfProfilingString for DefIndex {
fn to_self_profile_string(
&self,
builder: &mut QueryKeyStringBuilder<'_, '_, '_>
) -> StringId {
builder.def_id_to_string_id(DefId {
krate: LOCAL_CRATE,
index: *self,
})
}
}
impl<T0, T1> IntoSelfProfilingString for (T0, T1)
where T0: IntoSelfProfilingString+Debug,
T1: IntoSelfProfilingString+Debug,
{
default fn to_self_profile_string(
&self,
builder: &mut QueryKeyStringBuilder<'_, '_, '_>
) -> StringId {
let val0 = self.0.to_self_profile_string(builder);
let val1 = self.1.to_self_profile_string(builder);
let components = &[
StringComponent::Value("("),
StringComponent::Ref(val0),
StringComponent::Value(","),
StringComponent::Ref(val1),
StringComponent::Value(")"),
];
builder.profiler.alloc_string(components)
}
}
/// Allocate the self-profiling query strings for a single query cache. This
/// method is called from `alloc_self_profile_query_strings` which knows all
/// the queries via macro magic.
pub(super) fn alloc_self_profile_query_strings_for_query_cache<'tcx, Q>(
tcx: TyCtxt<'tcx>,
query_name: &'static str,
query_cache: &Sharded<QueryCache<'tcx, Q>>,
string_cache: &mut QueryKeyStringCache,
) where Q: QueryConfig<'tcx> {
tcx.prof.with_profiler(|profiler| {
let event_id_builder = profiler.event_id_builder();
// Walk the entire query cache and allocate the appropriate
// string representations. Each cache entry is uniquely
// identified by its dep_node_index.
if profiler.query_key_recording_enabled() {
let mut query_string_builder =
QueryKeyStringBuilder::new(profiler, tcx, string_cache);
let query_name = profiler.get_or_alloc_cached_string(query_name);
// Since building the string representation of query keys might
// need to invoke queries itself, we cannot keep the query caches
// locked while doing so. Instead we copy out the
// `(query_key, dep_node_index)` pairs and release the lock again.
let query_keys_and_indices = {
let shards = query_cache.lock_shards();
let len = shards.iter().map(|shard| shard.results.len()).sum();
let mut query_keys_and_indices = Vec::with_capacity(len);
for shard in &shards {
query_keys_and_indices.extend(shard.results.iter().map(|(q_key, q_val)| {
(q_key.clone(), q_val.index)
}));
}
query_keys_and_indices
};
// Now actually allocate the strings. If allocating the strings
// generates new entries in the query cache, we'll miss them but
// we don't actually care.
for (query_key, dep_node_index) in query_keys_and_indices {
// Translate the DepNodeIndex into a QueryInvocationId
let query_invocation_id = dep_node_index.into();
// Create the string version of the query-key
let query_key = query_key.to_self_profile_string(&mut query_string_builder);
let event_id = event_id_builder.from_label_and_arg(query_name, query_key);
// Doing this in bulk might be a good idea:
profiler.map_query_invocation_id_to_string(
query_invocation_id,
event_id.to_string_id(),
);
}
} else {
// In this branch we don't allocate query keys
let query_name = profiler.get_or_alloc_cached_string(query_name);
let event_id = event_id_builder.from_label(query_name).to_string_id();
let shards = query_cache.lock_shards();
for shard in shards.iter() {
let query_invocation_ids = shard
.results
.values()
.map(|v| v.index)
.map(|dep_node_index| dep_node_index.into());
profiler.bulk_map_query_invocation_id_to_single_string(
query_invocation_ids,
event_id,
);
}
}
});
}

View file

@ -722,10 +722,10 @@ fn finalize_tcx(tcx: TyCtxt<'_>) {
// We assume that no queries are run past here. If there are new queries
// after this point, they'll show up as "<unknown>" in self-profiling data.
tcx.prof.with_profiler(|profiler| {
{
let _prof_timer = tcx.prof.generic_activity("self_profile_alloc_query_strings");
tcx.queries.allocate_self_profile_query_strings(profiler);
});
tcx.alloc_self_profile_query_strings();
}
}
impl CrateInfo {

View file

@ -26,7 +26,7 @@ rustc-hash = "1.0.1"
smallvec = { version = "1.0", features = ["union", "may_dangle"] }
rustc_index = { path = "../librustc_index", package = "rustc_index" }
bitflags = "1.2.1"
measureme = "0.6.0"
measureme = "0.7.0"
[dependencies.parking_lot]
version = "0.9"

View file

@ -39,7 +39,7 @@
//! ## `event_id` Assignment
//!
//! As far as `measureme` is concerned, `event_id`s are just strings. However,
//! it would incur way too much overhead to generate and persist each `event_id`
//! it would incur too much overhead to generate and persist each `event_id`
//! string at the point where the event is recorded. In order to make this more
//! efficient `measureme` has two features:
//!
@ -56,7 +56,7 @@
//! ID be mapped to an actual string. This is used to cheaply generate
//! `event_id`s while the events actually occur, causing little timing
//! distortion, and then later map those `StringId`s, in bulk, to actual
//! `event_id` strings. This way the largest part of tracing overhead is
//! `event_id` strings. This way the largest part of the tracing overhead is
//! localized to one contiguous chunk of time.
//!
//! How are these `event_id`s generated in the compiler? For things that occur
@ -92,7 +92,7 @@ use std::thread::ThreadId;
use std::time::{Duration, Instant};
use std::u32;
use measureme::StringId;
use measureme::{EventId, EventIdBuilder, SerializableString, StringId};
use parking_lot::RwLock;
/// MmapSerializatioSink is faster on macOS and Linux
@ -123,6 +123,8 @@ bitflags::bitflags! {
const QUERY_BLOCKED = 1 << 3;
const INCR_CACHE_LOADS = 1 << 4;
const QUERY_KEYS = 1 << 5;
const DEFAULT = Self::GENERIC_ACTIVITIES.bits |
Self::QUERY_PROVIDERS.bits |
Self::QUERY_BLOCKED.bits |
@ -142,6 +144,7 @@ const EVENT_FILTERS_BY_NAME: &[(&str, EventFilter)] = &[
("query-cache-hit", EventFilter::QUERY_CACHE_HITS),
("query-blocked", EventFilter::QUERY_BLOCKED),
("incr-cache-load", EventFilter::INCR_CACHE_LOADS),
("query-keys", EventFilter::QUERY_KEYS),
];
fn thread_id_to_u32(tid: ThreadId) -> u32 {
@ -253,6 +256,7 @@ impl SelfProfilerRef {
pub fn generic_activity(&self, event_id: &'static str) -> TimingGuard<'_> {
self.exec(EventFilter::GENERIC_ACTIVITIES, |profiler| {
let event_id = profiler.get_or_alloc_cached_string(event_id);
let event_id = EventId::from_label(event_id);
TimingGuard::start(
profiler,
profiler.generic_activity_event_kind,
@ -266,7 +270,7 @@ impl SelfProfilerRef {
#[inline(always)]
pub fn query_provider(&self) -> TimingGuard<'_> {
self.exec(EventFilter::QUERY_PROVIDERS, |profiler| {
TimingGuard::start(profiler, profiler.query_event_kind, StringId::INVALID)
TimingGuard::start(profiler, profiler.query_event_kind, EventId::INVALID)
})
}
@ -289,7 +293,7 @@ impl SelfProfilerRef {
TimingGuard::start(
profiler,
profiler.query_blocked_event_kind,
StringId::INVALID,
EventId::INVALID,
)
})
}
@ -303,7 +307,7 @@ impl SelfProfilerRef {
TimingGuard::start(
profiler,
profiler.incremental_load_result_event_kind,
StringId::INVALID,
EventId::INVALID,
)
})
}
@ -319,7 +323,11 @@ impl SelfProfilerRef {
let event_id = StringId::new_virtual(query_invocation_id.0);
let thread_id = thread_id_to_u32(std::thread::current().id());
profiler.profiler.record_instant_event(event_kind(profiler), event_id, thread_id);
profiler.profiler.record_instant_event(
event_kind(profiler),
EventId::from_virtual(event_id),
thread_id,
);
TimingGuard::none()
}));
@ -330,6 +338,10 @@ impl SelfProfilerRef {
f(&profiler)
}
}
pub fn enabled(&self) -> bool {
self.profiler.is_some()
}
}
pub struct SelfProfiler {
@ -409,6 +421,15 @@ impl SelfProfiler {
})
}
/// Allocates a new string in the profiling data. Does not do any caching
/// or deduplication.
pub fn alloc_string<STR: SerializableString + ?Sized>(&self, s: &STR) -> StringId {
self.profiler.alloc_string(s)
}
/// Gets a `StringId` for the given string. This method makes sure that
/// any strings going through it will only be allocated once in the
/// profiling data.
pub fn get_or_alloc_cached_string(&self, s: &'static str) -> StringId {
// Only acquire a read-lock first since we assume that the string is
// already present in the common case.
@ -445,6 +466,14 @@ impl SelfProfiler {
let from = from.map(|qid| StringId::new_virtual(qid.0));
self.profiler.bulk_map_virtual_to_single_concrete_string(from, to);
}
pub fn query_key_recording_enabled(&self) -> bool {
self.event_filter_mask.contains(EventFilter::QUERY_KEYS)
}
pub fn event_id_builder(&self) -> EventIdBuilder<'_, SerializationSink> {
EventIdBuilder::new(&self.profiler)
}
}
#[must_use]
@ -455,7 +484,7 @@ impl<'a> TimingGuard<'a> {
pub fn start(
profiler: &'a SelfProfiler,
event_kind: StringId,
event_id: StringId,
event_id: EventId,
) -> TimingGuard<'a> {
let thread_id = thread_id_to_u32(std::thread::current().id());
let raw_profiler = &profiler.profiler;
@ -468,6 +497,7 @@ impl<'a> TimingGuard<'a> {
pub fn finish_with_query_invocation_id(self, query_invocation_id: QueryInvocationId) {
if let Some(guard) = self.0 {
let event_id = StringId::new_virtual(query_invocation_id.0);
let event_id = EventId::from_virtual(event_id);
guard.finish_with_override_event_id(event_id);
}
}