use preds to serialize just what we need

This massively speeds up serialization. It also
seems to produce deterministic metadata hashes
(before I was seeing inconsistent results).

Fixes #35232.
This commit is contained in:
Niko Matsakis 2016-08-05 20:14:47 -04:00
parent 9978cbc8f4
commit 02a47032dd
16 changed files with 177 additions and 214 deletions

View file

@ -19,7 +19,6 @@ use super::directory::DefPathIndex;
/// Data for use when recompiling the **current crate**.
#[derive(Debug, RustcEncodable, RustcDecodable)]
pub struct SerializedDepGraph {
pub nodes: Vec<DepNode<DefPathIndex>>,
pub edges: Vec<SerializedEdge>,
/// These are hashes of two things:
@ -44,15 +43,22 @@ pub struct SerializedDepGraph {
pub hashes: Vec<SerializedHash>,
}
/// Represents a "reduced" dependency edge. Unlike the full dep-graph,
/// the dep-graph we serialize contains only edges `S -> T` where the
/// source `S` is something hashable (a HIR node or foreign metadata)
/// and the target `T` is something significant, like a work-product.
/// Normally, significant nodes are only those that have saved data on
/// disk, but in unit-testing the set of significant nodes can be
/// increased.
pub type SerializedEdge = (DepNode<DefPathIndex>, DepNode<DefPathIndex>);
#[derive(Debug, RustcEncodable, RustcDecodable)]
pub struct SerializedHash {
/// node being hashed; either a Hir or MetaData variant, in
/// practice
pub node: DepNode<DefPathIndex>,
/// def-id of thing being hashed
pub dep_node: DepNode<DefPathIndex>,
/// the hash itself, computed by `calculate_item_hash`
/// the hash as of previous compilation, computed by code in
/// `hash` module
pub hash: u64,
}

View file

@ -28,7 +28,7 @@ use super::dirty_clean;
use super::hash::*;
use super::util::*;
type DirtyNodes = FnvHashSet<DepNode<DefId>>;
type DirtyNodes = FnvHashSet<DepNode<DefPathIndex>>;
type CleanEdges = Vec<(DepNode<DefId>, DepNode<DefId>)>;
@ -110,157 +110,106 @@ pub fn decode_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
// Retrace the paths in the directory to find their current location (if any).
let retraced = directory.retrace(tcx);
// Compute the set of Hir nodes whose data has changed.
let mut dirty_nodes =
initial_dirty_nodes(tcx, &serialized_dep_graph.hashes, &retraced);
// TODO -- this could be more efficient if we integrated the `DefIdDirectory` and
// pred set more deeply
debug!("decode_dep_graph: initial dirty_nodes = {:#?}", dirty_nodes);
// Compute the set of Hir nodes whose data has changed or which have been removed.
let dirty_raw_source_nodes = dirty_nodes(tcx, &serialized_dep_graph.hashes, &retraced);
// Find all DepNodes reachable from that core set. This loop
// iterates repeatedly over the list of edges whose source is not
// known to be dirty (`clean_edges`). If it finds an edge whose
// source is dirty, it removes it from that list and adds the
// target to `dirty_nodes`. It stops when it reaches a fixed
// point.
let clean_edges = compute_clean_edges(tcx,
&directory,
&serialized_dep_graph.edges,
&retraced,
&mut dirty_nodes);
// Create a (maybe smaller) list of
let retraced_edges: Vec<_> =
serialized_dep_graph.edges.iter()
.filter_map(|&(ref raw_source_node, ref raw_target_node)| {
retraced.map(raw_target_node)
.map(|target_node| (raw_source_node, target_node))
})
.collect();
// Add synthetic `foo->foo` edges for each clean node `foo` that
// we had before. This is sort of a hack to create clean nodes in
// the graph, since the existence of a node is a signal that the
// work it represents need not be repeated.
let clean_nodes =
serialized_dep_graph.nodes
.iter()
.filter_map(|node| retraced.map(node))
.filter(|node| !dirty_nodes.contains(node))
.map(|node| (node.clone(), node));
// Compute which work-products have changed.
let mut dirty_target_nodes = FnvHashSet();
for &(raw_source_node, ref target_node) in &retraced_edges {
if dirty_raw_source_nodes.contains(raw_source_node) {
if !dirty_target_nodes.contains(target_node) {
dirty_target_nodes.insert(target_node.clone());
// Add nodes and edges that are not dirty into our main graph.
if tcx.sess.opts.debugging_opts.incremental_info {
// It'd be nice to pretty-print these paths better than just
// using the `Debug` impls, but wev.
println!("module {:?} is dirty because {:?} changed or was removed",
target_node,
raw_source_node.map_def(|&index| {
Some(directory.def_path_string(tcx, index))
}).unwrap());
}
}
}
}
// For work-products that are still clean, add their deps into the
// graph. This is needed because later we will have to save this
// back out again!
let dep_graph = tcx.dep_graph.clone();
for (source, target) in clean_edges.into_iter().chain(clean_nodes) {
debug!("decode_dep_graph: clean edge: {:?} -> {:?}", source, target);
for (raw_source_node, target_node) in retraced_edges {
if dirty_target_nodes.contains(&target_node) {
continue;
}
let _task = dep_graph.in_task(target);
dep_graph.read(source);
let source_node = retraced.map(raw_source_node).unwrap();
debug!("decode_dep_graph: clean edge: {:?} -> {:?}", source_node, target_node);
let _task = dep_graph.in_task(target_node);
dep_graph.read(source_node);
}
// Add in work-products that are still clean, and delete those that are
// dirty.
let mut work_product_decoder = Decoder::new(work_products_data, 0);
let work_products = try!(<Vec<SerializedWorkProduct>>::decode(&mut work_product_decoder));
reconcile_work_products(tcx, work_products, &dirty_nodes);
reconcile_work_products(tcx, work_products, &dirty_target_nodes);
Ok(())
}
fn initial_dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
hashes: &[SerializedHash],
retraced: &RetracedDefIdDirectory)
-> DirtyNodes {
/// Computes which of the original set of def-ids are dirty. Stored in
/// a bit vector where the index is the DefPathIndex.
fn dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
hashes: &[SerializedHash],
retraced: &RetracedDefIdDirectory)
-> DirtyNodes {
let mut hcx = HashContext::new(tcx);
let mut items_removed = false;
let mut dirty_nodes = FnvHashSet();
for hash in hashes {
match hash.node.map_def(|&i| retraced.def_id(i)) {
Some(dep_node) => {
let (_, current_hash) = hcx.hash(&dep_node).unwrap();
if current_hash != hash.hash {
debug!("initial_dirty_nodes: {:?} is dirty as hash is {:?}, was {:?}",
dep_node.map_def(|&def_id| Some(tcx.def_path(def_id))).unwrap(),
current_hash,
hash.hash);
dirty_nodes.insert(dep_node);
}
}
None => {
items_removed = true;
}
}
}
// If any of the items in the krate have changed, then we consider
// the meta-node `Krate` to be dirty, since that means something
// which (potentially) read the contents of every single item.
if items_removed || !dirty_nodes.is_empty() {
dirty_nodes.insert(DepNode::Krate);
for hash in hashes {
if let Some(dep_node) = retraced.map(&hash.dep_node) {
let (_, current_hash) = hcx.hash(&dep_node).unwrap();
if current_hash == hash.hash {
continue;
}
debug!("initial_dirty_nodes: {:?} is dirty as hash is {:?}, was {:?}",
dep_node.map_def(|&def_id| Some(tcx.def_path(def_id))).unwrap(),
current_hash,
hash.hash);
} else {
debug!("initial_dirty_nodes: {:?} is dirty as it was removed",
hash.dep_node);
}
dirty_nodes.insert(hash.dep_node.clone());
}
dirty_nodes
}
fn compute_clean_edges(tcx: TyCtxt,
directory: &DefIdDirectory,
serialized_edges: &[(SerializedEdge)],
retraced: &RetracedDefIdDirectory,
dirty_nodes: &mut DirtyNodes)
-> CleanEdges {
// Build up an initial list of edges. Include an edge (source,
// target) if neither node has been removed. If the source has
// been removed, add target to the list of dirty nodes.
let mut clean_edges = Vec::with_capacity(serialized_edges.len());
for &(ref serialized_source, ref serialized_target) in serialized_edges {
if let Some(target) = retraced.map(serialized_target) {
if let Some(source) = retraced.map(serialized_source) {
clean_edges.push((source, target))
} else {
// source removed, target must be dirty
debug!("compute_clean_edges: {:?} dirty because {:?} no longer exists",
target,
serialized_source.map_def(|&index| {
Some(directory.def_path_string(tcx, index))
}).unwrap());
dirty_nodes.insert(target);
}
} else {
// target removed, ignore the edge
}
}
debug!("compute_clean_edges: dirty_nodes={:#?}", dirty_nodes);
// Propagate dirty marks by iterating repeatedly over
// `clean_edges`. If we find an edge `(source, target)` where
// `source` is dirty, add `target` to the list of dirty nodes and
// remove it. Keep doing this until we find no more dirty nodes.
let mut previous_size = 0;
while dirty_nodes.len() > previous_size {
debug!("compute_clean_edges: previous_size={}", previous_size);
previous_size = dirty_nodes.len();
let mut i = 0;
while i < clean_edges.len() {
if dirty_nodes.contains(&clean_edges[i].0) {
let (source, target) = clean_edges.swap_remove(i);
debug!("compute_clean_edges: dirty source {:?} -> {:?}",
source, target);
dirty_nodes.insert(target);
} else if dirty_nodes.contains(&clean_edges[i].1) {
let (source, target) = clean_edges.swap_remove(i);
debug!("compute_clean_edges: dirty target {:?} -> {:?}",
source, target);
} else {
i += 1;
}
}
}
clean_edges
}
/// Go through the list of work-products produced in the previous run.
/// Delete any whose nodes have been found to be dirty or which are
/// otherwise no longer applicable.
fn reconcile_work_products<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
work_products: Vec<SerializedWorkProduct>,
dirty_nodes: &DirtyNodes) {
dirty_target_nodes: &FnvHashSet<DepNode<DefId>>) {
debug!("reconcile_work_products({:?})", work_products);
for swp in work_products {
let dep_node = DepNode::WorkProduct(swp.id.clone());
if dirty_nodes.contains(&dep_node) {
if dirty_target_nodes.contains(&DepNode::WorkProduct(swp.id.clone())) {
debug!("reconcile_work_products: dep-node for {:?} is dirty", swp);
delete_dirty_work_product(tcx, swp);
} else {

View file

@ -9,11 +9,12 @@
// except according to those terms.
use rbml::opaque::Encoder;
use rustc::dep_graph::{DepGraphQuery, DepNode};
use rustc::dep_graph::DepNode;
use rustc::hir::def_id::DefId;
use rustc::middle::cstore::LOCAL_CRATE;
use rustc::session::Session;
use rustc::ty::TyCtxt;
use rustc_data_structures::fnv::FnvHashMap;
use rustc_serialize::Encodable as RustcEncodable;
use std::hash::{Hash, Hasher, SipHasher};
use std::io::{self, Cursor, Write};
@ -23,6 +24,7 @@ use std::path::PathBuf;
use super::data::*;
use super::directory::*;
use super::hash::*;
use super::preds::*;
use super::util::*;
pub fn save_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>) {
@ -35,12 +37,13 @@ pub fn save_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>) {
let mut hcx = HashContext::new(tcx);
let mut builder = DefIdDirectoryBuilder::new(tcx);
let query = tcx.dep_graph.query();
let preds = Predecessors::new(&query, &mut hcx);
save_in(sess,
dep_graph_path(tcx),
|e| encode_dep_graph(&mut hcx, &mut builder, &query, e));
|e| encode_dep_graph(&preds, &mut builder, e));
save_in(sess,
metadata_hash_path(tcx, LOCAL_CRATE),
|e| encode_metadata_hashes(&mut hcx, &mut builder, &query, e));
|e| encode_metadata_hashes(tcx, &preds, &mut builder, e));
}
pub fn save_work_products(sess: &Session, local_crate_name: &str) {
@ -98,38 +101,37 @@ fn save_in<F>(sess: &Session, opt_path_buf: Option<PathBuf>, encode: F)
}
}
pub fn encode_dep_graph<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>,
builder: &mut DefIdDirectoryBuilder,
query: &DepGraphQuery<DefId>,
encoder: &mut Encoder)
-> io::Result<()> {
let (nodes, edges) = (query.nodes(), query.edges());
// Create hashes for inputs.
let hashes = nodes.iter()
.filter_map(|dep_node| {
hcx.hash(dep_node)
.map(|(_, hash)| {
let node = builder.map(dep_node);
SerializedHash {
node: node,
hash: hash,
}
})
})
.collect();
pub fn encode_dep_graph(preds: &Predecessors,
builder: &mut DefIdDirectoryBuilder,
encoder: &mut Encoder)
-> io::Result<()> {
// Create a flat list of (Input, WorkProduct) edges for
// serialization.
let mut edges = vec![];
for (&target, sources) in &preds.inputs {
match *target {
DepNode::MetaData(_) => continue, // see encode_metadata_hashes instead
_ => (),
}
let target = builder.map(target);
for &source in sources {
let source = builder.map(source);
edges.push((source, target.clone()));
}
}
// Create the serialized dep-graph.
let graph = SerializedDepGraph {
nodes: nodes.iter().map(|node| builder.map(node)).collect(),
edges: edges.iter()
.map(|&(ref source_node, ref target_node)| {
let source = builder.map(source_node);
let target = builder.map(target_node);
(source, target)
edges: edges,
hashes: preds.hashes
.iter()
.map(|(&dep_node, &hash)| {
SerializedHash {
dep_node: builder.map(dep_node),
hash: hash,
}
})
.collect(),
hashes: hashes,
};
debug!("graph = {:#?}", graph);
@ -141,24 +143,37 @@ pub fn encode_dep_graph<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>,
Ok(())
}
pub fn encode_metadata_hashes<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>,
builder: &mut DefIdDirectoryBuilder,
query: &DepGraphQuery<DefId>,
encoder: &mut Encoder)
-> io::Result<()> {
let tcx = hcx.tcx;
pub fn encode_metadata_hashes(tcx: TyCtxt,
preds: &Predecessors,
builder: &mut DefIdDirectoryBuilder,
encoder: &mut Encoder)
-> io::Result<()> {
let mut def_id_hashes = FnvHashMap();
let mut def_id_hash = |def_id: DefId| -> u64 {
*def_id_hashes.entry(def_id)
.or_insert_with(|| {
let index = builder.add(def_id);
let path = builder.lookup_def_path(index);
path.deterministic_hash(tcx)
})
};
let serialized_hashes = {
// Identify the `MetaData(X)` nodes where `X` is local. These are
// the metadata items we export. Downstream crates will want to
// see a hash that tells them whether we might have changed the
// metadata for a given item since they last compiled.
let meta_data_def_ids = query.nodes()
.into_iter()
.filter_map(|dep_node| match *dep_node {
DepNode::MetaData(def_id) if def_id.is_local() => Some(def_id),
_ => None,
});
// For each `MetaData(X)` node where `X` is local, accumulate a
// hash. These are the metadata items we export. Downstream
// crates will want to see a hash that tells them whether we might
// have changed the metadata for a given item since they last
// compiled.
//
// (I initially wrote this with an iterator, but it seemed harder to read.)
let mut serialized_hashes = SerializedMetadataHashes { hashes: vec![] };
for (&target, sources) in &preds.inputs {
let def_id = match *target {
DepNode::MetaData(def_id) => {
assert!(def_id.is_local());
def_id
}
_ => continue,
};
// To create the hash for each item `X`, we don't hash the raw
// bytes of the metadata (though in principle we
@ -166,45 +181,32 @@ pub fn encode_metadata_hashes<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>,
// from the dep-graph. This corresponds to all the inputs that
// were read to construct the metadata. To create the hash for
// the metadata, we hash (the hash of) all of those inputs.
let hashes = meta_data_def_ids.map(|def_id| {
assert!(def_id.is_local());
let dep_node = DepNode::MetaData(def_id);
let mut state = SipHasher::new();
debug!("save: computing metadata hash for {:?}", dep_node);
debug!("save: computing metadata hash for {:?}", def_id);
let predecessors = query.transitive_predecessors(&dep_node);
let mut hashes: Vec<_> = predecessors.iter()
.filter_map(|node| hcx.hash(&node))
.map(|(def_id, hash)| {
let index = builder.add(def_id);
let path = builder.lookup_def_path(index);
(path.to_string(tcx), hash) // (*)
})
.collect();
// Create a vector containing a pair of (source-id, hash).
// The source-id is stored as a `DepNode<u64>`, where the u64
// is the det. hash of the def-path. This is convenient
// because we can sort this to get a table ordering across
// compilations, even if the def-ids themselves have changed.
let mut hashes: Vec<(DepNode<u64>, u64)> = sources.iter()
.map(|dep_node| {
let hash_dep_node = dep_node.map_def(|&def_id| Some(def_id_hash(def_id))).unwrap();
let hash = preds.hashes[dep_node];
(hash_dep_node, hash)
})
.collect();
// (*) creating a `String` from each def-path is a bit inefficient,
// but it's the easiest way to get a deterministic ord/hash.
hashes.sort();
let mut state = SipHasher::new();
hashes.hash(&mut state);
let hash = state.finish();
hashes.sort();
state.write_usize(hashes.len());
for (path, hash) in hashes {
debug!("save: predecessor {:?} has hash {}", path, hash);
path.hash(&mut state);
state.write_u64(hash.to_le());
}
let hash = state.finish();
debug!("save: metadata hash for {:?} is {}", dep_node, hash);
SerializedMetadataHash {
def_index: def_id.index,
hash: hash,
}
debug!("save: metadata hash for {:?} is {}", def_id, hash);
serialized_hashes.hashes.push(SerializedMetadataHash {
def_index: def_id.index,
hash: hash,
});
// Collect these up into a vector.
SerializedMetadataHashes { hashes: hashes.collect() }
};
}
// Encode everything.
try!(serialized_hashes.encode(encoder));

View file

@ -10,6 +10,7 @@
// aux-build:a.rs
// revisions:rpass1 rpass2
// compile-flags:-Z query-dep-graph
#![feature(rustc_attrs)]

View file

@ -9,6 +9,7 @@
// except according to those terms.
// revisions: rpass1 cfail2
// compile-flags: -Z query-dep-graph
#![allow(warnings)]
#![feature(rustc_attrs)]

View file

@ -9,6 +9,7 @@
// except according to those terms.
// revisions: rpass1 rpass2
// compile-flags: -Z query-dep-graph
#![allow(warnings)]
#![feature(rustc_attrs)]

View file

@ -16,7 +16,7 @@
// aux-build:a.rs
// revisions:rpass1 rpass2 rpass3
// no-prefer-dynamic
// compile-flags: -Z query-dep-graph
#![feature(rustc_attrs)]

View file

@ -35,14 +35,10 @@ mod x {
X { x: 11, y: 11 }
}
#[rustc_dirty(label="TypeckItemBody", cfg="rpass2")]
#[rustc_clean(label="ItemSignature", cfg="rpass2")]
pub fn new() -> X {
make()
}
#[rustc_clean(label="TypeckItemBody", cfg="rpass2")]
#[rustc_clean(label="ItemSignature", cfg="rpass2")]
pub fn sum(x: &X) -> u32 {
x.x + x.y
}
@ -51,7 +47,6 @@ mod x {
mod y {
use x;
#[rustc_clean(label="TypeckItemBody", cfg="rpass2")]
pub fn assert_sum() -> bool {
let x = x::new();
x::sum(&x) == 22

View file

@ -9,6 +9,7 @@
// except according to those terms.
// revisions: rpass1 rpass2
// compile-flags: -Z query-dep-graph
#![allow(warnings)]
#![feature(rustc_attrs)]

View file

@ -12,6 +12,7 @@
// in between revisions (hashing should be stable).
// revisions:rpass1 rpass2
// compile-flags: -Z query-dep-graph
#![feature(rustc_attrs)]

View file

@ -12,6 +12,7 @@
// in between revisions (hashing should be stable).
// revisions:rpass1 cfail2
// compile-flags: -Z query-dep-graph
#![feature(rustc_attrs)]

View file

@ -12,6 +12,7 @@
// in between revisions (hashing should be stable).
// revisions:rpass1 rpass2
// compile-flags: -Z query-dep-graph
#![feature(rustc_attrs)]

View file

@ -10,6 +10,7 @@
// aux-build:a.rs
// revisions:rpass1 rpass2
// compile-flags: -Z query-dep-graph
#![feature(rustc_attrs)]

View file

@ -12,6 +12,7 @@
// in between revisions (hashing should be stable).
// revisions:rpass1 rpass2
// compile-flags: -Z query-dep-graph
#![feature(rustc_attrs)]

View file

@ -12,6 +12,7 @@
// in between revisions (hashing should be stable).
// revisions:rpass1 rpass2
// compile-flags: -Z query-dep-graph
#![feature(rustc_attrs)]

View file

@ -10,6 +10,7 @@
// aux-build:a.rs
// revisions:rpass1 rpass2 rpass3
// compile-flags: -Z query-dep-graph
#![feature(rustc_attrs)]