Auto merge of #118636 - h1467792822:dev, r=michaelwoerister
Add the unstable option to reduce the binary size of dynamic library… # Motivation The average length of symbol names in the rust standard library is about 100 bytes, while the average length of symbol names in the C++ standard library is about 65 bytes. In some embedded environments where dynamic library are widely used, rust dynamic library symbol name space hash become one of the key bottlenecks of application, Especially when the existing C/C++ module is reconstructed into the rust module. The unstable option `-Z symbol_mangling_version=hashed` is added to solve the bottleneck caused by too long dynamic library symbol names. ## Test data The following is a set of test data on the ubuntu 18.04 LTS environment. With this plug-in, the space saving rate of dynamic libraries can reach about 20%. The test object is the standard library of rust (built based on Xargo), tokio crate, and hyper crate. The contents of the Cargo.toml file in the construction project of the three dynamic libraries are as follows: ```txt # Cargo.toml [profile.release] panic = "abort" opt-leve="z" codegen-units=1 strip=true debug=true ``` The built dynamic library also removes the `.rustc` segments that are not needed at run time and then compares the size. The detailed data is as follows: 1. libstd.so > | symbol_mangling_version | size | saving rate | > | --- | --- | --- | > | legacy | 804896 || > | hashed | 608288 | 0.244 | > | v0 | 858144 || > | hashed | 608288 | 0.291 | 2. libhyper.so > | symbol_mangling_version(libhyper.so) | symbol_mangling_version(libstd.so) | size | saving rate | > | --- | --- | --- | --- | > | legacy | legacy | 866312 || > | hashed | legacy | 645128 |0.255| > | legacy | hashed | 854024 || > | hashed | hashed | 632840 |0.259|
This commit is contained in:
commit
04521fd10e
15 changed files with 211 additions and 49 deletions
|
@ -347,6 +347,7 @@ impl SwitchWithOptPath {
|
|||
pub enum SymbolManglingVersion {
|
||||
Legacy,
|
||||
V0,
|
||||
Hashed,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Hash)]
|
||||
|
@ -2692,6 +2693,7 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
|
|||
match cg.symbol_mangling_version {
|
||||
// Stable values:
|
||||
None | Some(SymbolManglingVersion::V0) => {}
|
||||
|
||||
// Unstable values:
|
||||
Some(SymbolManglingVersion::Legacy) => {
|
||||
if !unstable_opts.unstable_options {
|
||||
|
@ -2700,6 +2702,13 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
|
|||
);
|
||||
}
|
||||
}
|
||||
Some(SymbolManglingVersion::Hashed) => {
|
||||
if !unstable_opts.unstable_options {
|
||||
early_dcx.early_fatal(
|
||||
"`-C symbol-mangling-version=hashed` requires `-Z unstable-options`",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for unstable values of `-C instrument-coverage`.
|
||||
|
@ -2741,6 +2750,12 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
|
|||
);
|
||||
}
|
||||
Some(SymbolManglingVersion::V0) => {}
|
||||
Some(SymbolManglingVersion::Hashed) => {
|
||||
early_dcx.early_warn(
|
||||
"-C instrument-coverage requires symbol mangling version `v0`, \
|
||||
but `-C symbol-mangling-version=hashed` was specified",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -407,7 +407,8 @@ mod desc {
|
|||
pub const parse_switch_with_opt_path: &str =
|
||||
"an optional path to the profiling data output directory";
|
||||
pub const parse_merge_functions: &str = "one of: `disabled`, `trampolines`, or `aliases`";
|
||||
pub const parse_symbol_mangling_version: &str = "either `legacy` or `v0` (RFC 2603)";
|
||||
pub const parse_symbol_mangling_version: &str =
|
||||
"one of: `legacy`, `v0` (RFC 2603), or `hashed`";
|
||||
pub const parse_src_file_hash: &str = "either `md5` or `sha1`";
|
||||
pub const parse_relocation_model: &str =
|
||||
"one of supported relocation models (`rustc --print relocation-models`)";
|
||||
|
@ -1180,6 +1181,7 @@ mod parse {
|
|||
*slot = match v {
|
||||
Some("legacy") => Some(SymbolManglingVersion::Legacy),
|
||||
Some("v0") => Some(SymbolManglingVersion::V0),
|
||||
Some("hashed") => Some(SymbolManglingVersion::Hashed),
|
||||
_ => return false,
|
||||
};
|
||||
true
|
||||
|
@ -1504,7 +1506,7 @@ options! {
|
|||
"tell the linker which information to strip (`none` (default), `debuginfo` or `symbols`)"),
|
||||
symbol_mangling_version: Option<SymbolManglingVersion> = (None,
|
||||
parse_symbol_mangling_version, [TRACKED],
|
||||
"which mangling version to use for symbol names ('legacy' (default) or 'v0')"),
|
||||
"which mangling version to use for symbol names ('legacy' (default), 'v0', or 'hashed')"),
|
||||
target_cpu: Option<String> = (None, parse_opt_string, [TRACKED],
|
||||
"select target processor (`rustc --print target-cpus` for details)"),
|
||||
target_feature: String = (String::new(), parse_target_feature, [TRACKED],
|
||||
|
|
43
compiler/rustc_symbol_mangling/src/hashed.rs
Normal file
43
compiler/rustc_symbol_mangling/src/hashed.rs
Normal file
|
@ -0,0 +1,43 @@
|
|||
use crate::v0;
|
||||
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher};
|
||||
use rustc_hir::def_id::CrateNum;
|
||||
use rustc_middle::ty::{Instance, TyCtxt};
|
||||
|
||||
use std::fmt::Write;
|
||||
|
||||
pub(super) fn mangle<'tcx>(
|
||||
tcx: TyCtxt<'tcx>,
|
||||
instance: Instance<'tcx>,
|
||||
instantiating_crate: Option<CrateNum>,
|
||||
full_mangling_name: impl FnOnce() -> String,
|
||||
) -> String {
|
||||
// The symbol of a generic function may be scattered in multiple downstream dylibs.
|
||||
// If the symbol of a generic function still contains `crate name`, hash conflicts between the
|
||||
// generic funcion and other symbols of the same `crate` cannot be detected in time during
|
||||
// construction. This symbol conflict is left over until it occurs during run time.
|
||||
// In this case, `instantiating-crate name` is used to replace `crate name` can completely
|
||||
// eliminate the risk of the preceding potential hash conflict.
|
||||
let crate_num =
|
||||
if let Some(krate) = instantiating_crate { krate } else { instance.def_id().krate };
|
||||
|
||||
let mut symbol = "_RNxC".to_string();
|
||||
v0::push_ident(tcx.crate_name(crate_num).as_str(), &mut symbol);
|
||||
|
||||
let hash = tcx.with_stable_hashing_context(|mut hcx| {
|
||||
let mut hasher = StableHasher::new();
|
||||
full_mangling_name().hash_stable(&mut hcx, &mut hasher);
|
||||
hasher.finish::<Hash64>().as_u64()
|
||||
});
|
||||
|
||||
push_hash64(hash, &mut symbol);
|
||||
|
||||
symbol
|
||||
}
|
||||
|
||||
// The hash is encoded based on `base-62` and the final terminator `_` is removed because it does
|
||||
// not help prevent hash collisions
|
||||
fn push_hash64(hash: u64, output: &mut String) {
|
||||
let hash = v0::encode_integer_62(hash);
|
||||
let hash_len = hash.len();
|
||||
let _ = write!(output, "{hash_len}H{}", &hash[..hash_len - 1]);
|
||||
}
|
|
@ -109,6 +109,7 @@ use rustc_middle::query::Providers;
|
|||
use rustc_middle::ty::{self, Instance, TyCtxt};
|
||||
use rustc_session::config::SymbolManglingVersion;
|
||||
|
||||
mod hashed;
|
||||
mod legacy;
|
||||
mod v0;
|
||||
|
||||
|
@ -263,6 +264,9 @@ fn compute_symbol_name<'tcx>(
|
|||
let symbol = match mangling_version {
|
||||
SymbolManglingVersion::Legacy => legacy::mangle(tcx, instance, instantiating_crate),
|
||||
SymbolManglingVersion::V0 => v0::mangle(tcx, instance, instantiating_crate),
|
||||
SymbolManglingVersion::Hashed => hashed::mangle(tcx, instance, instantiating_crate, || {
|
||||
v0::mangle(tcx, instance, instantiating_crate)
|
||||
}),
|
||||
};
|
||||
|
||||
debug_assert!(
|
||||
|
|
|
@ -116,10 +116,7 @@ impl<'tcx> SymbolMangler<'tcx> {
|
|||
/// * `x > 0` is encoded as `x - 1` in base 62, followed by `"_"`,
|
||||
/// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
|
||||
fn push_integer_62(&mut self, x: u64) {
|
||||
if let Some(x) = x.checked_sub(1) {
|
||||
base_n::push_str(x as u128, 62, &mut self.out);
|
||||
}
|
||||
self.push("_");
|
||||
push_integer_62(x, &mut self.out)
|
||||
}
|
||||
|
||||
/// Push a `tag`-prefixed base 62 integer, when larger than `0`, that is:
|
||||
|
@ -138,45 +135,7 @@ impl<'tcx> SymbolMangler<'tcx> {
|
|||
}
|
||||
|
||||
fn push_ident(&mut self, ident: &str) {
|
||||
let mut use_punycode = false;
|
||||
for b in ident.bytes() {
|
||||
match b {
|
||||
b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => {}
|
||||
0x80..=0xff => use_punycode = true,
|
||||
_ => bug!("symbol_names: bad byte {} in ident {:?}", b, ident),
|
||||
}
|
||||
}
|
||||
|
||||
let punycode_string;
|
||||
let ident = if use_punycode {
|
||||
self.push("u");
|
||||
|
||||
// FIXME(eddyb) we should probably roll our own punycode implementation.
|
||||
let mut punycode_bytes = match punycode::encode(ident) {
|
||||
Ok(s) => s.into_bytes(),
|
||||
Err(()) => bug!("symbol_names: punycode encoding failed for ident {:?}", ident),
|
||||
};
|
||||
|
||||
// Replace `-` with `_`.
|
||||
if let Some(c) = punycode_bytes.iter_mut().rfind(|&&mut c| c == b'-') {
|
||||
*c = b'_';
|
||||
}
|
||||
|
||||
// FIXME(eddyb) avoid rechecking UTF-8 validity.
|
||||
punycode_string = String::from_utf8(punycode_bytes).unwrap();
|
||||
&punycode_string
|
||||
} else {
|
||||
ident
|
||||
};
|
||||
|
||||
let _ = write!(self.out, "{}", ident.len());
|
||||
|
||||
// Write a separating `_` if necessary (leading digit or `_`).
|
||||
if let Some('_' | '0'..='9') = ident.chars().next() {
|
||||
self.push("_");
|
||||
}
|
||||
|
||||
self.push(ident);
|
||||
push_ident(ident, &mut self.out)
|
||||
}
|
||||
|
||||
fn path_append_ns(
|
||||
|
@ -836,3 +795,62 @@ impl<'tcx> Printer<'tcx> for SymbolMangler<'tcx> {
|
|||
Ok(())
|
||||
}
|
||||
}
|
||||
/// Push a `_`-terminated base 62 integer, using the format
|
||||
/// specified in the RFC as `<base-62-number>`, that is:
|
||||
/// * `x = 0` is encoded as just the `"_"` terminator
|
||||
/// * `x > 0` is encoded as `x - 1` in base 62, followed by `"_"`,
|
||||
/// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
|
||||
pub(crate) fn push_integer_62(x: u64, output: &mut String) {
|
||||
if let Some(x) = x.checked_sub(1) {
|
||||
base_n::push_str(x as u128, 62, output);
|
||||
}
|
||||
output.push('_');
|
||||
}
|
||||
|
||||
pub(crate) fn encode_integer_62(x: u64) -> String {
|
||||
let mut output = String::new();
|
||||
push_integer_62(x, &mut output);
|
||||
output
|
||||
}
|
||||
|
||||
pub(crate) fn push_ident(ident: &str, output: &mut String) {
|
||||
let mut use_punycode = false;
|
||||
for b in ident.bytes() {
|
||||
match b {
|
||||
b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => {}
|
||||
0x80..=0xff => use_punycode = true,
|
||||
_ => bug!("symbol_names: bad byte {} in ident {:?}", b, ident),
|
||||
}
|
||||
}
|
||||
|
||||
let punycode_string;
|
||||
let ident = if use_punycode {
|
||||
output.push('u');
|
||||
|
||||
// FIXME(eddyb) we should probably roll our own punycode implementation.
|
||||
let mut punycode_bytes = match punycode::encode(ident) {
|
||||
Ok(s) => s.into_bytes(),
|
||||
Err(()) => bug!("symbol_names: punycode encoding failed for ident {:?}", ident),
|
||||
};
|
||||
|
||||
// Replace `-` with `_`.
|
||||
if let Some(c) = punycode_bytes.iter_mut().rfind(|&&mut c| c == b'-') {
|
||||
*c = b'_';
|
||||
}
|
||||
|
||||
// FIXME(eddyb) avoid rechecking UTF-8 validity.
|
||||
punycode_string = String::from_utf8(punycode_bytes).unwrap();
|
||||
&punycode_string
|
||||
} else {
|
||||
ident
|
||||
};
|
||||
|
||||
let _ = write!(output, "{}", ident.len());
|
||||
|
||||
// Write a separating `_` if necessary (leading digit or `_`).
|
||||
if let Some('_' | '0'..='9') = ident.chars().next() {
|
||||
output.push('_');
|
||||
}
|
||||
|
||||
output.push_str(ident);
|
||||
}
|
||||
|
|
48
tests/run-make/symbol-mangling-hashed/Makefile
Normal file
48
tests/run-make/symbol-mangling-hashed/Makefile
Normal file
|
@ -0,0 +1,48 @@
|
|||
include ../tools.mk
|
||||
|
||||
# ignore-cross-compile
|
||||
# only-linux
|
||||
# only-x86_64
|
||||
|
||||
NM=nm -D
|
||||
RLIB_NAME=liba_rlib.rlib
|
||||
DYLIB_NAME=liba_dylib.so
|
||||
SO_NAME=libb_dylib.so
|
||||
BIN_NAME=b_bin
|
||||
|
||||
ifeq ($(UNAME),Darwin)
|
||||
NM=nm -gU
|
||||
RLIB_NAME=liba_rlib.rlib
|
||||
DYLIB_NAME=liba_dylib.dylib
|
||||
SO_NAME=libb_dylib.dylib
|
||||
BIN_NAME=b_bin
|
||||
endif
|
||||
|
||||
ifdef IS_WINDOWS
|
||||
NM=nm -g
|
||||
RLIB_NAME=liba_rlib.dll.a
|
||||
DYLIB_NAME=liba_dylib.dll
|
||||
SO_NAME=libb_dylib.dll
|
||||
BIN_NAME=b_bin.exe
|
||||
endif
|
||||
|
||||
all:
|
||||
$(RUSTC) -C prefer-dynamic -Z unstable-options -C symbol-mangling-version=hashed -C metadata=foo a_dylib.rs
|
||||
$(RUSTC) -C prefer-dynamic -Z unstable-options -C symbol-mangling-version=hashed -C metadata=bar a_rlib.rs
|
||||
$(RUSTC) -C prefer-dynamic -L $(TMPDIR) b_dylib.rs
|
||||
$(RUSTC) -C prefer-dynamic -L $(TMPDIR) b_bin.rs
|
||||
|
||||
# Check hashed symbol name
|
||||
|
||||
[ "$$($(NM) $(TMPDIR)/$(DYLIB_NAME) | grep -c hello)" -eq "0" ]
|
||||
[ "$$($(NM) $(TMPDIR)/$(DYLIB_NAME) | grep _RNxC7a_dylib | grep -c ' T ')" -eq "1" ]
|
||||
|
||||
[ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep b_dylib | grep -c hello)" -eq "1" ]
|
||||
[ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep _RNxC6a_rlib | grep -c ' T ')" -eq "1" ]
|
||||
[ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep _RNxC7a_dylib | grep -c ' U ')" -eq "1" ]
|
||||
|
||||
[ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep _RNxC6a_rlib | grep -c ' U ')" -eq "1" ]
|
||||
[ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep _RNxC7a_dylib | grep -c ' U ')" -eq "1" ]
|
||||
[ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep b_dylib | grep hello | grep -c ' U ')" -eq "1" ]
|
||||
|
||||
$(call RUN,$(BIN_NAME))
|
4
tests/run-make/symbol-mangling-hashed/a_dylib.rs
Normal file
4
tests/run-make/symbol-mangling-hashed/a_dylib.rs
Normal file
|
@ -0,0 +1,4 @@
|
|||
#![crate_type="dylib"]
|
||||
pub fn hello() {
|
||||
println!("hello dylib");
|
||||
}
|
5
tests/run-make/symbol-mangling-hashed/a_rlib.rs
Normal file
5
tests/run-make/symbol-mangling-hashed/a_rlib.rs
Normal file
|
@ -0,0 +1,5 @@
|
|||
#![crate_type="rlib"]
|
||||
|
||||
pub fn hello() {
|
||||
println!("hello rlib");
|
||||
}
|
9
tests/run-make/symbol-mangling-hashed/b_bin.rs
Normal file
9
tests/run-make/symbol-mangling-hashed/b_bin.rs
Normal file
|
@ -0,0 +1,9 @@
|
|||
extern crate a_rlib;
|
||||
extern crate a_dylib;
|
||||
extern crate b_dylib;
|
||||
|
||||
fn main() {
|
||||
a_rlib::hello();
|
||||
a_dylib::hello();
|
||||
b_dylib::hello();
|
||||
}
|
9
tests/run-make/symbol-mangling-hashed/b_dylib.rs
Normal file
9
tests/run-make/symbol-mangling-hashed/b_dylib.rs
Normal file
|
@ -0,0 +1,9 @@
|
|||
#![crate_type="dylib"]
|
||||
|
||||
extern crate a_rlib;
|
||||
extern crate a_dylib;
|
||||
|
||||
pub fn hello() {
|
||||
a_rlib::hello();
|
||||
a_dylib::hello();
|
||||
}
|
|
@ -1,2 +1,2 @@
|
|||
error: incorrect value `bad-value` for codegen option `symbol-mangling-version` - either `legacy` or `v0` (RFC 2603) was expected
|
||||
error: incorrect value `bad-value` for codegen option `symbol-mangling-version` - one of: `legacy`, `v0` (RFC 2603), or `hashed` was expected
|
||||
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
error: incorrect value `` for codegen option `symbol-mangling-version` - either `legacy` or `v0` (RFC 2603) was expected
|
||||
error: incorrect value `` for codegen option `symbol-mangling-version` - one of: `legacy`, `v0` (RFC 2603), or `hashed` was expected
|
||||
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
error: codegen option `symbol-mangling-version` requires either `legacy` or `v0` (RFC 2603) (C symbol-mangling-version=<value>)
|
||||
error: codegen option `symbol-mangling-version` requires one of: `legacy`, `v0` (RFC 2603), or `hashed` (C symbol-mangling-version=<value>)
|
||||
|
||||
|
|
2
tests/ui/symbol-mangling-version/unstable.hashed.stderr
Normal file
2
tests/ui/symbol-mangling-version/unstable.hashed.stderr
Normal file
|
@ -0,0 +1,2 @@
|
|||
error: `-C symbol-mangling-version=hashed` requires `-Z unstable-options`
|
||||
|
|
@ -1,6 +1,9 @@
|
|||
// revisions: legacy legacy-ok
|
||||
// revisions: legacy legacy-ok hashed hashed-ok
|
||||
// [legacy] compile-flags: -Csymbol-mangling-version=legacy
|
||||
// [legacy-ok] check-pass
|
||||
// [legacy-ok] compile-flags: -Zunstable-options -Csymbol-mangling-version=legacy
|
||||
// [hashed] compile-flags: -Csymbol-mangling-version=hashed
|
||||
// [hashed-ok] check-pass
|
||||
// [hashed-ok] compile-flags: -Zunstable-options -Csymbol-mangling-version=hashed
|
||||
|
||||
fn main() {}
|
||||
|
|
Loading…
Add table
Reference in a new issue