rustc: Add a flag for specifying dependencies

This comit implements a new flag, --extern, which is used to specify where a
crate is located. The purpose of this flag is to bypass the normal crate
loading/matching of the compiler to point it directly at the right file.

This flag takes the form `--extern foo=bar` where `foo` is the name of a crate
and `bar` is the location at which to find the crate. Multiple `--extern`
directives are allowed with the same crate name to specify the rlib/dylib pair
for a crate. It is invalid to specify more than one rlib or more than one dylib,
and it's required that the crates are valid rust crates.

I have also added some extensive documentation to metadata::loader about how
crate loading should work.

RFC: 0035-remove-crate-id
This commit is contained in:
Alex Crichton 2014-07-01 08:37:54 -07:00
parent df4ea9c39a
commit cc3c8bbfaf
17 changed files with 444 additions and 15 deletions

View file

@ -588,10 +588,11 @@ pub fn find_crate_name(sess: Option<&Session>,
}), None)
}
pub fn build_link_meta(krate: &ast::Crate, name: String) -> LinkMeta {
pub fn build_link_meta(sess: &Session, krate: &ast::Crate,
name: String) -> LinkMeta {
let r = LinkMeta {
crate_name: name,
crate_hash: Svh::calculate(krate),
crate_hash: Svh::calculate(sess, krate),
};
info!("{}", r);
return r;

View file

@ -53,6 +53,8 @@ use std::iter::range_step;
use syntax::ast;
use syntax::visit;
use driver::session::Session;
#[deriving(Clone, PartialEq)]
pub struct Svh {
hash: String,
@ -68,7 +70,7 @@ impl Svh {
self.hash.as_slice()
}
pub fn calculate(krate: &ast::Crate) -> Svh {
pub fn calculate(sess: &Session, krate: &ast::Crate) -> Svh {
// FIXME (#14132): This is better than it used to be, but it still not
// ideal. We now attempt to hash only the relevant portions of the
// Crate AST as well as the top-level crate attributes. (However,
@ -80,6 +82,10 @@ impl Svh {
// avoid collisions.
let mut state = SipState::new();
for data in sess.opts.cg.metadata.iter() {
data.hash(&mut state);
}
{
let mut visit = svh_visitor::make(&mut state);
visit::walk_crate(&mut visit, krate, ());

View file

@ -30,7 +30,7 @@ use syntax::diagnostic::{ColorConfig, Auto, Always, Never};
use syntax::parse;
use syntax::parse::token::InternedString;
use std::collections::HashSet;
use std::collections::{HashSet, HashMap};
use getopts::{optopt, optmulti, optflag, optflagopt};
use getopts;
use lib::llvm::llvm;
@ -95,6 +95,7 @@ pub struct Options {
pub print_metas: (bool, bool),
pub cg: CodegenOptions,
pub color: ColorConfig,
pub externs: HashMap<String, Vec<String>>,
}
/// Some reasonable defaults
@ -120,6 +121,7 @@ pub fn basic_options() -> Options {
print_metas: (false, false),
cg: basic_codegen_options(),
color: Auto,
externs: HashMap::new(),
}
}
@ -551,7 +553,9 @@ pub fn optgroups() -> Vec<getopts::OptGroup> {
optopt("", "color", "Configure coloring of output:
auto = colorize, if output goes to a tty (default);
always = always colorize output;
never = never colorize output", "auto|always|never")
never = never colorize output", "auto|always|never"),
optmulti("", "extern", "Specify where an external rust library is located",
"PATH"),
)
}
@ -730,6 +734,21 @@ pub fn build_session_options(matches: &getopts::Matches) -> Options {
}
};
let mut externs = HashMap::new();
for arg in matches.opt_strs("extern").iter() {
let mut parts = arg.as_slice().splitn('=', 1);
let name = match parts.next() {
Some(s) => s,
None => early_error("--extern value must not be empty"),
};
let location = match parts.next() {
Some(s) => s,
None => early_error("--extern value must be of the format `foo=bar`"),
};
let locs = externs.find_or_insert(name.to_string(), Vec::new());
locs.push(location.to_string());
}
Options {
crate_types: crate_types,
gc: gc,
@ -750,7 +769,8 @@ pub fn build_session_options(matches: &getopts::Matches) -> Options {
write_dependency_info: write_dependency_info,
print_metas: print_metas,
cg: cg,
color: color
color: color,
externs: externs,
}
}

View file

@ -280,12 +280,32 @@ fn existing_match(e: &Env, name: &str,
hash: Option<&Svh>) -> Option<ast::CrateNum> {
let mut ret = None;
e.sess.cstore.iter_crate_data(|cnum, data| {
if data.name().as_slice() == name {
let other_hash = data.hash();
match hash {
Some(hash) if *hash != other_hash => {}
Some(..) | None => { ret = Some(cnum); }
if data.name().as_slice() != name { return }
match hash {
Some(hash) if *hash == data.hash() => { ret = Some(cnum); return }
Some(..) => return,
None => {}
}
// When the hash is None we're dealing with a top-level dependency in
// which case we may have a specification on the command line for this
// library. Even though an upstream library may have loaded something of
// the same name, we have to make sure it was loaded from the exact same
// location as well.
let source = e.sess.cstore.get_used_crate_source(cnum).unwrap();
let dylib = source.dylib.as_ref().map(|p| p.as_vec());
let rlib = source.rlib.as_ref().map(|p| p.as_vec());
match e.sess.opts.externs.find_equiv(&name) {
Some(locs) => {
let found = locs.iter().any(|l| {
Some(l.as_bytes()) == dylib || Some(l.as_bytes()) == rlib
});
if found {
ret = Some(cnum);
}
}
None => ret = Some(cnum),
}
});
return ret;
@ -361,6 +381,7 @@ fn resolve_crate<'a>(e: &mut Env,
root: root,
rejected_via_hash: vec!(),
rejected_via_triple: vec!(),
should_match_name: true,
};
let library = load_ctxt.load_library_crate();
register_crate(e, root, ident, name, span, library)
@ -422,6 +443,7 @@ impl<'a> PluginMetadataReader<'a> {
root: &None,
rejected_via_hash: vec!(),
rejected_via_triple: vec!(),
should_match_name: true,
};
let library = match load_ctxt.maybe_load_library_crate() {
Some (l) => l,

View file

@ -9,6 +9,208 @@
// except according to those terms.
//! Finds crate binaries and loads their metadata
//!
//! Might I be the first to welcome you to a world of platform differences,
//! version requirements, dependency graphs, conficting desires, and fun! This
//! is the major guts (along with metadata::creader) of the compiler for loading
//! crates and resolving dependencies. Let's take a tour!
//!
//! # The problem
//!
//! Each invocation of the compiler is immediately concerned with one primary
//! problem, to connect a set of crates to resolved crates on the filesystem.
//! Concretely speaking, the compiler follows roughly these steps to get here:
//!
//! 1. Discover a set of `extern crate` statements.
//! 2. Transform these directives into crate names. If the directive does not
//! have an explicit name, then the identifier is the name.
//! 3. For each of these crate names, find a corresponding crate on the
//! filesystem.
//!
//! Sounds easy, right? Let's walk into some of the nuances.
//!
//! ## Transitive Dependencies
//!
//! Let's say we've got three crates: A, B, and C. A depends on B, and B depends
//! on C. When we're compiling A, we primarily need to find and locate B, but we
//! also end up needing to find and locate C as well.
//!
//! The reason for this is that any of B's types could be composed of C's types,
//! any function in B could return a type from C, etc. To be able to guarantee
//! that we can always typecheck/translate any function, we have to have
//! complete knowledge of the whole ecosystem, not just our immediate
//! dependencies.
//!
//! So now as part of the "find a corresponding crate on the filesystem" step
//! above, this involves also finding all crates for *all upstream
//! dependencies*. This includes all dependencies transitively.
//!
//! ## Rlibs and Dylibs
//!
//! The compiler has two forms of intermediate dependencies. These are dubbed
//! rlibs and dylibs for the static and dynamic variants, respectively. An rlib
//! is a rustc-defined file format (currently just an ar archive) while a dylib
//! is a platform-defined dynamic library. Each library has a metadata somewhere
//! inside of it.
//!
//! When translating a crate name to a crate on the filesystem, we all of a
//! sudden need to take into account both rlibs and dylibs! Linkage later on may
//! use either one of these files, as each has their pros/cons. The job of crate
//! loading is to discover what's possible by finding all candidates.
//!
//! Most parts of this loading systems keep the dylib/rlib as just separate
//! variables.
//!
//! ## Where to look?
//!
//! We can't exactly scan your whole hard drive when looking for dependencies,
//! so we need to places to look. Currently the compiler will implicitly add the
//! target lib search path ($prefix/lib/rustlib/$target/lib) to any compilation,
//! and otherwise all -L flags are added to the search paths.
//!
//! ## What criterion to select on?
//!
//! This a pretty tricky area of loading crates. Given a file, how do we know
//! whether it's the right crate? Currently, the rules look along these lines:
//!
//! 1. Does the filename match an rlib/dylib pattern? That is to say, does the
//! filename have the right prefix/suffix?
//! 2. Does the filename have the right prefix for the crate name being queried?
//! This is filtering for files like `libfoo*.rlib` and such.
//! 3. Is the file an actual rust library? This is done by loading the metadata
//! from the library and making sure it's actually there.
//! 4. Does the name in the metadata agree with the name of the library?
//! 5. Does the target in the metadata agree with the current target?
//! 6. Does the SVH match? (more on this later)
//!
//! If the file answeres `yes` to all these questions, then the file is
//! considered as being *candidate* for being accepted. It is illegal to have
//! more than two candidates as the compiler has no method by which to resolve
//! this conflict. Additionally, rlib/dylib candidates are considered
//! separately.
//!
//! After all this has happened, we have 1 or two files as candidates. These
//! represent the rlib/dylib file found for a library, and they're returned as
//! being found.
//!
//! ### What about versions?
//!
//! A lot of effort has been put forth to remove versioning from the compiler.
//! There have been forays in the past to have versioning baked in, but it was
//! largely always deemed insufficient to the point that it was recognized that
//! it's probably something the compiler shouldn't do anyway due to its
//! complicated nature and the state of the half-baked solutions.
//!
//! With a departure from versioning, the primary criterion for loading crates
//! is just the name of a crate. If we stopped here, it would imply that you
//! could never link two crates of the same name from different sources
//! together, which is clearly a bad state to be in.
//!
//! To resolve this problem, we come to the next section!
//!
//! # Expert Mode
//!
//! A number of flags have been added to the compiler to solve the "version
//! problem" in the previous section, as well as generally enabling more
//! powerful usage of the crate loading system of the compiler. The goal of
//! these flags and options are to enable third-party tools to drive the
//! compiler with prior knowledge about how the world should look.
//!
//! ## The `--extern` flag
//!
//! The compiler accepts a flag of this form a number of times:
//!
//! ```notrust
//! --extern crate-name=path/to/the/crate.rlib
//! ```
//!
//! This flag is basically the following letter to the compiler:
//!
//! > Dear rustc,
//! >
//! > When you are attempting to load the immediate dependency `crate-name`, I
//! > would like you too assume that the library is located at
//! > `path/to/the/crate.rlib`, and look nowhere else. Also, please do not
//! > assume that the path I specified has the name `crate-name`.
//!
//! This flag basically overrides most matching logic except for validating that
//! the file is indeed a rust library. The same `crate-name` can be specified
//! twice to specify the rlib/dylib pair.
//!
//! ## Enabling "multiple versions"
//!
//! This basically boils down to the ability to specify arbitrary packages to
//! the compiler. For example, if crate A wanted to use Bv1 and Bv2, then it
//! would look something like:
//!
//! ```ignore
//! extern crate b1;
//! extern crate b2;
//!
//! fn main() {}
//! ```
//!
//! and the compiler would be invoked as:
//!
//! ```notrust
//! rustc a.rs --extern b1=path/to/libb1.rlib --extern b2=path/to/libb2.rlib
//! ```
//!
//! In this scenario there are two crates named `b` and the compiler must be
//! manually driven to be informed where each crate is.
//!
//! ## Frobbing symbols
//!
//! One of the immediate problems with linking the same library together twice
//! in the same problem is dealing with duplicate symbols. The primary way to
//! deal with this in rustc is to add hashes to the end of each symbol.
//!
//! In order to force hashes to change between versions of a library, if
//! desired, the compiler exposes an option `-C metadata=foo`, which is used to
//! initially seed each symbol hash. The string `foo` is prepended to each
//! string-to-hash to ensure that symbols change over time.
//!
//! ## Loading transitive dependencies
//!
//! Dealing with same-named-but-distinct crates is not just a local problem, but
//! one that also needs to be dealt with for transitive dependences. Note that
//! in the letter above `--extern` flags only apply to the *local* set of
//! dependencies, not the upstream transitive dependencies. Consider this
//! dependency graph:
//!
//! ```notrust
//! A.1 A.2
//! | |
//! | |
//! B C
//! \ /
//! \ /
//! D
//! ```
//!
//! In this scenario, when we compile `D`, we need to be able to distinctly
//! resolve `A.1` and `A.2`, but an `--extern` flag cannot apply to these
//! transitive dependencies.
//!
//! Note that the key idea here is that `B` and `C` are both *already compiled*.
//! That is, they have already resolved their dependencies. Due to unrelated
//! technical reasons, when a library is compiled, it is only compatible with
//! the *exact same* version of the upstream libraries it was compiled against.
//! We use the "Strict Version Hash" to identify the exact copy of an upstream
//! library.
//!
//! With this knowledge, we know that `B` and `C` will depend on `A` with
//! different SVH values, so we crawl the normal `-L` paths looking for
//! `liba*.rlib` and filter based on the contained SVH.
//!
//! In the end, this ends up not needing `--extern` to specify upstream
//! transitive dependencies.
//!
//! # Wrapping up
//!
//! That's the general overview of loading crates in the compiler, but it's by
//! no means all of the necessary details. Take a look at the rest of
//! metadata::loader or metadata::creader for all the juicy details!
use back::archive::{ArchiveRO, METADATA_FILENAME};
use back::svh::Svh;
@ -67,6 +269,7 @@ pub struct Context<'a> {
pub root: &'a Option<CratePaths>,
pub rejected_via_hash: Vec<CrateMismatch>,
pub rejected_via_triple: Vec<CrateMismatch>,
pub should_match_name: bool,
}
pub struct Library {
@ -164,6 +367,17 @@ impl<'a> Context<'a> {
}
fn find_library_crate(&mut self) -> Option<Library> {
// If an SVH is specified, then this is a transitive dependency that
// must be loaded via -L plus some filtering.
if self.hash.is_none() {
self.should_match_name = false;
match self.find_commandline_library() {
Some(l) => return Some(l),
None => {}
}
self.should_match_name = true;
}
let dypair = self.dylibname();
// want: crate_name.dir_part() + prefix + crate_name.file_part + "-"
@ -348,9 +562,11 @@ impl<'a> Context<'a> {
}
fn crate_matches(&mut self, crate_data: &[u8], libpath: &Path) -> bool {
match decoder::maybe_get_crate_name(crate_data) {
Some(ref name) if self.crate_name == name.as_slice() => {}
_ => { info!("Rejecting via crate name"); return false }
if self.should_match_name {
match decoder::maybe_get_crate_name(crate_data) {
Some(ref name) if self.crate_name == name.as_slice() => {}
_ => { info!("Rejecting via crate name"); return false }
}
}
let hash = match decoder::maybe_get_crate_hash(crate_data) {
Some(hash) => hash, None => {
@ -403,6 +619,68 @@ impl<'a> Context<'a> {
}
}
fn find_commandline_library(&mut self) -> Option<Library> {
let locs = match self.sess.opts.externs.find_equiv(&self.crate_name) {
Some(s) => s,
None => return None,
};
// First, filter out all libraries that look suspicious. We only accept
// files which actually exist that have the correct naming scheme for
// rlibs/dylibs.
let sess = self.sess;
let dylibname = self.dylibname();
let mut locs = locs.iter().map(|l| Path::new(l.as_slice())).filter(|loc| {
if !loc.exists() {
sess.err(format!("extern location does not exist: {}",
loc.display()).as_slice());
return false;
}
let file = loc.filename_str().unwrap();
if file.starts_with("lib") && file.ends_with(".rlib") {
return true
} else {
match dylibname {
Some((prefix, suffix)) => {
if file.starts_with(prefix) && file.ends_with(suffix) {
return true
}
}
None => {}
}
}
sess.err(format!("extern location is of an unknown type: {}",
loc.display()).as_slice());
false
});
// Now that we have an itertor of good candidates, make sure there's at
// most one rlib and at most one dylib.
let mut rlibs = HashSet::new();
let mut dylibs = HashSet::new();
for loc in locs {
if loc.filename_str().unwrap().ends_with(".rlib") {
rlibs.insert(loc.clone());
} else {
dylibs.insert(loc.clone());
}
}
// Extract the rlib/dylib pair.
let mut metadata = None;
let rlib = self.extract_one(rlibs, "rlib", &mut metadata);
let dylib = self.extract_one(dylibs, "dylib", &mut metadata);
if rlib.is_none() && dylib.is_none() { return None }
match metadata {
Some(metadata) => Some(Library {
dylib: dylib,
rlib: rlib,
metadata: metadata,
}),
None => None,
}
}
}
pub fn note_crate_name(diag: &SpanHandler, name: &str) {

View file

@ -2309,7 +2309,7 @@ pub fn trans_crate(krate: ast::Crate,
}
}
let link_meta = link::build_link_meta(&krate, name);
let link_meta = link::build_link_meta(&tcx.sess, &krate, name);
// Append ".rs" to crate name as LLVM module identifier.
//

View file

@ -0,0 +1,24 @@
-include ../tools.mk
# Attempt to build this dependency tree:
#
# A.1 A.2
# |\ |
# | \ |
# B \ C
# \ | /
# \|/
# D
#
# Note that A.1 and A.2 are crates with the same name.
all:
$(RUSTC) -C metadata=1 -C extra-filename=-1 a.rs
$(RUSTC) -C metadata=2 -C extra-filename=-2 a.rs
$(RUSTC) b.rs --extern a=$(TMPDIR)/liba-1.rlib
$(RUSTC) c.rs --extern a=$(TMPDIR)/liba-2.rlib
$(RUSTC) --cfg before d.rs --extern a=$(TMPDIR)/liba-1.rlib
$(call RUN,d)
$(RUSTC) --cfg after d.rs --extern a=$(TMPDIR)/liba-1.rlib
$(call RUN,d)

View file

@ -0,0 +1,6 @@
#![crate_name = "a"]
#![crate_type = "rlib"]
static FOO: uint = 3;
pub fn token() -> &'static uint { &FOO }

View file

@ -0,0 +1,9 @@
#![crate_name = "b"]
#![crate_type = "rlib"]
extern crate a;
static FOO: uint = 3;
pub fn token() -> &'static uint { &FOO }
pub fn a_token() -> &'static uint { a::token() }

View file

@ -0,0 +1,9 @@
#![crate_name = "c"]
#![crate_type = "rlib"]
extern crate a;
static FOO: uint = 3;
pub fn token() -> &'static uint { &FOO }
pub fn a_token() -> &'static uint { a::token() }

View file

@ -0,0 +1,11 @@
#[cfg(before)] extern crate a;
extern crate b;
extern crate c;
#[cfg(after)] extern crate a;
fn t(a: &'static uint) -> uint { a as *const _ as uint }
fn main() {
assert!(t(a::token()) == t(b::a_token()));
assert!(t(a::token()) != t(c::a_token()));
}

View file

@ -0,0 +1,16 @@
-include ../tools.mk
all:
$(RUSTC) bar.rs --crate-type=rlib
$(RUSTC) bar.rs --crate-type=rlib -C extra-filename=-a
$(RUSTC) foo.rs --extern hello && exit 1 || exit 0
$(RUSTC) foo.rs --extern bar=no-exist && exit 1 || exit 0
$(RUSTC) foo.rs --extern bar=foo.rs && exit 1 || exit 0
$(RUSTC) foo.rs \
--extern bar=$(TMPDIR)/libbar.rlib \
--extern bar=$(TMPDIR)/libbar-a.rlib \
&& exit 1 || exit 0
$(RUSTC) foo.rs \
--extern bar=$(TMPDIR)/libbar.rlib \
--extern bar=$(TMPDIR)/libbar.rlib
$(RUSTC) foo.rs --extern bar=$(TMPDIR)/libbar.rlib

View file

View file

@ -0,0 +1,3 @@
extern crate bar;
fn main() {}

View file

@ -0,0 +1,10 @@
-include ../tools.mk
all:
$(RUSTC) foo.rs -C metadata=a -C extra-filename=-a
$(RUSTC) foo.rs -C metadata=b -C extra-filename=-b
$(RUSTC) bar.rs \
--extern foo1=$(TMPDIR)/libfoo-a.rlib \
--extern foo2=$(TMPDIR)/libfoo-b.rlib \
-Z print-link-args
$(call RUN,bar)

View file

@ -0,0 +1,8 @@
extern crate foo1;
extern crate foo2;
fn main() {
let a = foo1::foo();
let b = foo2::foo();
assert!(a as *const _ != b as *const _);
}

View file

@ -0,0 +1,6 @@
#![crate_name = "foo"]
#![crate_type = "rlib"]
static FOO: uint = 3;
pub fn foo() -> &'static uint { &FOO }