auto merge of #9851 : alexcrichton/rust/include_bin, r=huonw

Previously an ExprLit was created *per byte* causing a huge increase in memory
bloat. This adds a new `lit_binary` to contain a literal of binary data, which
is currently only used by the include_bin! syntax extension. This massively
speeds up compilation times of the shootout-k-nucleotide-pipes test

    before:
        time: 469s
        memory: 6GB
        assertion failure in LLVM (section too large)

    after:
        time: 2.50s
        memory: 124MB

Closes #2598
This commit is contained in:
bors 2013-10-18 09:41:33 -07:00
commit d052912297
11 changed files with 62 additions and 18 deletions

View file

@ -1589,7 +1589,7 @@ explain, here's a few use cases and what they would entail.
* A crate needs a global available "helper module" to itself, but it doesn't
want to expose the helper module as a public API. To accomplish this, the root
of the crate's hierarchy would have a private module which then internally has
a "public api". Because the entire crate is an ancestor of the root, then the
a "public api". Because the entire crate is a descendant of the root, then the
entire local crate can access this private module through the second case.
* When writing unit tests for a module, it's often a common idiom to have an
@ -1602,7 +1602,10 @@ In the second case, it mentions that a private item "can be accessed" by the
current module and its descendants, but the exact meaning of accessing an item
depends on what the item is. Accessing a module, for example, would mean looking
inside of it (to import more items). On the other hand, accessing a function
would mean that it is invoked.
would mean that it is invoked. Additionally, path expressions and import
statements are considered to access an item in the sense that the
import/expression is only valid if the destination is in the current visibility
scope.
Here's an example of a program which exemplifies the three cases outlined above.

View file

@ -297,6 +297,7 @@ pub enum const_val {
const_int(i64),
const_uint(u64),
const_str(@str),
const_binary(@[u8]),
const_bool(bool)
}
@ -476,6 +477,7 @@ pub fn eval_const_expr_partial<T: ty::ExprTyProvider>(tcx: &T, e: &Expr)
pub fn lit_to_const(lit: &lit) -> const_val {
match lit.node {
lit_str(s, _) => const_str(s),
lit_binary(data) => const_binary(data),
lit_char(n) => const_uint(n as u64),
lit_int(n, _) => const_int(n),
lit_uint(n, _) => const_uint(n),

View file

@ -917,6 +917,24 @@ pub fn C_estr_slice(cx: &mut CrateContext, s: @str) -> ValueRef {
}
}
pub fn C_binary_slice(cx: &mut CrateContext, data: &[u8]) -> ValueRef {
unsafe {
let len = data.len();
let lldata = C_bytes(data);
let gsym = token::gensym("binary");
let g = do format!("binary{}", gsym).with_c_str |buf| {
llvm::LLVMAddGlobal(cx.llmod, val_ty(lldata).to_ref(), buf)
};
llvm::LLVMSetInitializer(g, lldata);
llvm::LLVMSetGlobalConstant(g, True);
lib::llvm::SetLinkage(g, lib::llvm::InternalLinkage);
let cs = llvm::LLVMConstPointerCast(g, Type::i8p().to_ref());
C_struct([cs, C_uint(cx, len)], false)
}
}
pub fn C_zero_byte_arr(size: uint) -> ValueRef {
unsafe {
let mut i = 0u;

View file

@ -71,7 +71,8 @@ pub fn const_lit(cx: &mut CrateContext, e: &ast::Expr, lit: ast::lit)
}
ast::lit_bool(b) => C_bool(b),
ast::lit_nil => C_nil(),
ast::lit_str(s, _) => C_estr_slice(cx, s)
ast::lit_str(s, _) => C_estr_slice(cx, s),
ast::lit_binary(data) => C_binary_slice(cx, data),
}
}

View file

@ -4442,6 +4442,12 @@ pub fn eval_repeat_count<T: ExprTyProvider>(tcx: &T, count_expr: &ast::Expr) ->
repeat count but found boolean");
return 0;
}
const_eval::const_binary(_) => {
tcx.ty_ctxt().sess.span_err(count_expr.span,
"expected positive integer for \
repeat count but found binary array");
return 0;
}
},
Err(*) => {
tcx.ty_ctxt().sess.span_err(count_expr.span,

View file

@ -1020,6 +1020,10 @@ pub fn check_lit(fcx: @mut FnCtxt, lit: @ast::lit) -> ty::t {
match lit.node {
ast::lit_str(*) => ty::mk_estr(tcx, ty::vstore_slice(ty::re_static)),
ast::lit_binary(*) => {
ty::mk_evec(tcx, ty::mt{ ty: ty::mk_u8(), mutbl: ast::MutImmutable },
ty::vstore_slice(ty::re_static))
}
ast::lit_char(_) => ty::mk_char(),
ast::lit_int(_, t) => ty::mk_mach_int(t),
ast::lit_uint(_, t) => ty::mk_mach_uint(t),

View file

@ -1121,6 +1121,7 @@ impl ToSource for syntax::codemap::Span {
fn lit_to_str(lit: &ast::lit) -> ~str {
match lit.node {
ast::lit_str(st, _) => st.to_owned(),
ast::lit_binary(data) => format!("{:?}", data.as_slice()),
ast::lit_char(c) => ~"'" + std::char::from_u32(c).unwrap().to_str() + "'",
ast::lit_int(i, _t) => i.to_str(),
ast::lit_uint(u, _t) => u.to_str(),

View file

@ -691,6 +691,7 @@ pub type lit = Spanned<lit_>;
#[deriving(Clone, Eq, Encodable, Decodable, IterBytes)]
pub enum lit_ {
lit_str(@str, StrStyle),
lit_binary(@[u8]),
lit_char(u32),
lit_int(i64, int_ty),
lit_uint(u64, uint_ty),

View file

@ -101,16 +101,19 @@ pub fn expand_include_str(cx: @ExtCtxt, sp: Span, tts: &[ast::token_tree])
}
pub fn expand_include_bin(cx: @ExtCtxt, sp: Span, tts: &[ast::token_tree])
-> base::MacResult {
-> base::MacResult
{
use std::at_vec;
let file = get_single_str_from_tts(cx, sp, tts, "include_bin!");
match io::read_whole_file(&res_rel_file(cx, sp, &Path::new(file))) {
result::Ok(src) => {
let u8_exprs: ~[@ast::Expr] = src.iter().map(|char| cx.expr_u8(sp, *char)).collect();
base::MRExpr(cx.expr_vec(sp, u8_exprs))
}
result::Err(ref e) => {
cx.parse_sess().span_diagnostic.handler().fatal((*e))
}
result::Ok(src) => {
let v = at_vec::to_managed_move(src);
base::MRExpr(cx.expr_lit(sp, ast::lit_binary(v)))
}
result::Err(ref e) => {
cx.parse_sess().span_diagnostic.handler().fatal((*e))
}
}
}

View file

@ -2092,6 +2092,14 @@ pub fn print_literal(s: @ps, lit: &ast::lit) {
ast::lit_bool(val) => {
if val { word(s.s, "true"); } else { word(s.s, "false"); }
}
ast::lit_binary(arr) => {
ibox(s, indent_unit);
word(s.s, "[");
commasep_cmnt(s, inconsistent, arr, |s, u| word(s.s, format!("{}", *u)),
|_| lit.span);
word(s.s, "]");
end(s);
}
}
}

View file

@ -156,17 +156,14 @@ fn make_sequence_processor(sz: uint,
// given a FASTA file on stdin, process sequence THREE
fn main() {
use std::rt::io::{Reader, Open};
use std::rt::io::file::FileInfo;
use std::rt::io::Reader;
use std::rt::io::native::stdio;
use std::rt::io::mem::MemReader;
use std::rt::io::buffered::BufferedReader;
let rdr = if os::getenv("RUST_BENCH").is_some() {
// FIXME: Using this compile-time env variable is a crummy way to
// get to this massive data set, but include_bin! chokes on it (#2598)
let mut path = Path::new(env!("CFG_SRC_DIR"));
path.push("src/test/bench/shootout-k-nucleotide.data");
~path.open_reader(Open).unwrap() as ~Reader
let foo = include_bin!("shootout-k-nucleotide.data");
~MemReader::new(foo.to_owned()) as ~Reader
} else {
~stdio::stdin() as ~Reader
};