Stabilize the size of incr comp object file names

This commit is contained in:
Ben Kimock 2024-04-03 14:09:21 -04:00
parent 290d792411
commit 6ee3713b08
9 changed files with 146 additions and 71 deletions

View file

@ -6,7 +6,8 @@ use gccjit::{
use rustc_codegen_ssa::base::wants_msvc_seh;
use rustc_codegen_ssa::errors as ssa_errors;
use rustc_codegen_ssa::traits::{BackendTypes, BaseTypeMethods, MiscMethods};
use rustc_data_structures::base_n;
use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::base_n::ALPHANUMERIC_ONLY;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_middle::mir::mono::CodegenUnit;
use rustc_middle::span_bug;
@ -621,7 +622,7 @@ impl<'b, 'tcx> CodegenCx<'b, 'tcx> {
let mut name = String::with_capacity(prefix.len() + 6);
name.push_str(prefix);
name.push_str(".");
base_n::push_str(idx as u128, base_n::ALPHANUMERIC_ONLY, &mut name);
name.push_str(&(idx as u64).to_base(ALPHANUMERIC_ONLY));
name
}
}

View file

@ -11,7 +11,8 @@ use crate::value::Value;
use rustc_codegen_ssa::base::{wants_msvc_seh, wants_wasm_eh};
use rustc_codegen_ssa::errors as ssa_errors;
use rustc_codegen_ssa::traits::*;
use rustc_data_structures::base_n;
use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::base_n::ALPHANUMERIC_ONLY;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::small_c_str::SmallCStr;
use rustc_hir::def_id::DefId;
@ -1015,7 +1016,7 @@ impl CodegenCx<'_, '_> {
let mut name = String::with_capacity(prefix.len() + 6);
name.push_str(prefix);
name.push('.');
base_n::push_str(idx as u128, base_n::ALPHANUMERIC_ONLY, &mut name);
name.push_str(&(idx as u64).to_base(ALPHANUMERIC_ONLY));
name
}
}

View file

@ -1,6 +1,7 @@
/// Converts unsigned integers into a string representation with some base.
/// Bases up to and including 36 can be used for case-insensitive things.
use std::str;
use std::ascii;
use std::fmt;
#[cfg(test)]
mod tests;
@ -9,36 +10,101 @@ pub const MAX_BASE: usize = 64;
pub const ALPHANUMERIC_ONLY: usize = 62;
pub const CASE_INSENSITIVE: usize = 36;
const BASE_64: &[u8; MAX_BASE] =
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
const BASE_64: [ascii::Char; MAX_BASE] = {
let bytes = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
let Some(ascii) = bytes.as_ascii() else { panic!() };
*ascii
};
#[inline]
pub fn push_str(mut n: u128, base: usize, output: &mut String) {
debug_assert!(base >= 2 && base <= MAX_BASE);
let mut s = [0u8; 128];
let mut index = s.len();
pub struct BaseNString {
start: usize,
buf: [ascii::Char; 128],
}
let base = base as u128;
impl std::ops::Deref for BaseNString {
type Target = str;
loop {
index -= 1;
s[index] = BASE_64[(n % base) as usize];
n /= base;
fn deref(&self) -> &str {
self.buf[self.start..].as_str()
}
}
if n == 0 {
break;
}
impl AsRef<str> for BaseNString {
fn as_ref(&self) -> &str {
self.buf[self.start..].as_str()
}
}
impl fmt::Display for BaseNString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self)
}
}
// This trait just lets us reserve the exact right amount of space when doing fixed-length
// case-insensitve encoding. Add any impls you need.
pub trait ToBaseN: Into<u128> {
fn encoded_len(base: usize) -> usize;
fn to_base_fixed_len(self, base: usize) -> BaseNString {
let mut encoded = self.to_base(base);
encoded.start = encoded.buf.len() - Self::encoded_len(base);
encoded
}
output.push_str(unsafe {
// SAFETY: `s` is populated using only valid utf8 characters from `BASE_64`
str::from_utf8_unchecked(&s[index..])
});
fn to_base(self, base: usize) -> BaseNString {
let mut output = [ascii::Char::Digit0; 128];
let mut n: u128 = self.into();
let mut index = output.len();
loop {
index -= 1;
output[index] = BASE_64[(n % base as u128) as usize];
n /= base as u128;
if n == 0 {
break;
}
}
assert_eq!(n, 0);
BaseNString { start: index, buf: output }
}
}
#[inline]
pub fn encode(n: u128, base: usize) -> String {
let mut s = String::new();
push_str(n, base, &mut s);
s
impl ToBaseN for u128 {
fn encoded_len(base: usize) -> usize {
let mut max = u128::MAX;
let mut len = 0;
while max > 0 {
len += 1;
max /= base as u128;
}
len
}
}
impl ToBaseN for u64 {
fn encoded_len(base: usize) -> usize {
let mut max = u64::MAX;
let mut len = 0;
while max > 0 {
len += 1;
max /= base as u64;
}
len
}
}
impl ToBaseN for u32 {
fn encoded_len(base: usize) -> usize {
let mut max = u32::MAX;
let mut len = 0;
while max > 0 {
len += 1;
max /= base as u32;
}
len
}
}

View file

@ -1,9 +1,17 @@
use super::*;
#[test]
fn test_encode() {
fn limits() {
assert_eq!(Ok(u128::MAX), u128::from_str_radix(&u128::MAX.to_base(36), 36));
assert_eq!(Ok(u64::MAX), u64::from_str_radix(&u64::MAX.to_base(36), 36));
assert_eq!(Ok(u32::MAX), u32::from_str_radix(&u32::MAX.to_base(36), 36));
}
#[test]
fn test_to_base() {
fn test(n: u128, base: usize) {
assert_eq!(Ok(n), u128::from_str_radix(&encode(n, base), base as u32));
assert_eq!(Ok(n), u128::from_str_radix(&n.to_base(base), base as u32));
assert_eq!(Ok(n), u128::from_str_radix(&n.to_base_fixed_len(base), base as u32));
}
for base in 2..37 {

View file

@ -16,6 +16,8 @@
#![doc(rust_logo)]
#![feature(allocator_api)]
#![feature(array_windows)]
#![feature(ascii_char)]
#![feature(ascii_char_variants)]
#![feature(auto_traits)]
#![feature(cfg_match)]
#![feature(core_intrinsics)]

View file

@ -104,10 +104,14 @@
//! implemented.
use crate::errors;
use rustc_data_structures::base_n;
use rustc_data_structures::base_n::BaseNString;
use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::base_n::CASE_INSENSITIVE;
use rustc_data_structures::flock;
use rustc_data_structures::fx::{FxHashSet, FxIndexSet};
use rustc_data_structures::svh::Svh;
use rustc_data_structures::unord::{UnordMap, UnordSet};
use rustc_data_structures::{base_n, flock};
use rustc_errors::ErrorGuaranteed;
use rustc_fs_util::{link_or_copy, try_canonicalize, LinkOrCopy};
use rustc_session::config::CrateType;
@ -332,31 +336,24 @@ pub fn finalize_session_directory(sess: &Session, svh: Option<Svh>) {
debug!("finalize_session_directory() - session directory: {}", incr_comp_session_dir.display());
let old_sub_dir_name = incr_comp_session_dir
let mut sub_dir_name = incr_comp_session_dir
.file_name()
.unwrap()
.to_str()
.expect("malformed session dir name: contains non-Unicode characters");
.expect("malformed session dir name: contains non-Unicode characters")
.to_string();
// Keep the 's-{timestamp}-{random-number}' prefix, but replace the
// '-working' part with the SVH of the crate
let dash_indices: Vec<_> = old_sub_dir_name.match_indices('-').map(|(idx, _)| idx).collect();
if dash_indices.len() != 3 {
bug!(
"Encountered incremental compilation session directory with \
malformed name: {}",
incr_comp_session_dir.display()
)
}
// Keep the 's-{timestamp}-{random-number}' prefix, but replace "working" with the SVH of the crate
sub_dir_name.truncate(sub_dir_name.len() - "working".len());
// Double-check that we kept this: "s-{timestamp}-{random-number}-"
assert!(sub_dir_name.ends_with('-'), "{:?}", sub_dir_name);
assert!(sub_dir_name.as_bytes().iter().filter(|b| **b == b'-').count() == 3);
// State: "s-{timestamp}-{random-number}-"
let mut new_sub_dir_name = String::from(&old_sub_dir_name[..=dash_indices[2]]);
// Append the svh
base_n::push_str(svh.as_u128(), INT_ENCODE_BASE, &mut new_sub_dir_name);
// Append the SVH
sub_dir_name.push_str(&svh.as_u128().to_base_fixed_len(CASE_INSENSITIVE));
// Create the full path
let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name);
let new_path = incr_comp_session_dir.parent().unwrap().join(&*sub_dir_name);
debug!("finalize_session_directory() - new path: {}", new_path.display());
match rename_path_with_retry(&*incr_comp_session_dir, &new_path, 3) {
@ -452,11 +449,11 @@ fn generate_session_dir_path(crate_dir: &Path) -> PathBuf {
let random_number = thread_rng().next_u32();
debug!("generate_session_dir_path: random_number = {}", random_number);
let directory_name = format!(
"s-{}-{}-working",
timestamp,
base_n::encode(random_number as u128, INT_ENCODE_BASE)
);
// Chop the first 3 characters off the timestamp. Those 3 bytes will be zero for a while.
let (zeroes, timestamp) = timestamp.split_at(3);
assert_eq!(zeroes, "000");
let directory_name =
format!("s-{}-{}-working", timestamp, random_number.to_base_fixed_len(CASE_INSENSITIVE));
debug!("generate_session_dir_path: directory_name = {}", directory_name);
let directory_path = crate_dir.join(directory_name);
debug!("generate_session_dir_path: directory_path = {}", directory_path.display());
@ -587,10 +584,10 @@ fn extract_timestamp_from_session_dir(directory_name: &str) -> Result<SystemTime
string_to_timestamp(&directory_name[dash_indices[0] + 1..dash_indices[1]])
}
fn timestamp_to_string(timestamp: SystemTime) -> String {
fn timestamp_to_string(timestamp: SystemTime) -> BaseNString {
let duration = timestamp.duration_since(UNIX_EPOCH).unwrap();
let micros = duration.as_secs() * 1_000_000 + (duration.subsec_nanos() as u64) / 1000;
base_n::encode(micros as u128, INT_ENCODE_BASE)
micros.to_base_fixed_len(CASE_INSENSITIVE)
}
fn string_to_timestamp(s: &str) -> Result<SystemTime, &'static str> {
@ -621,9 +618,8 @@ fn crate_path(sess: &Session) -> PathBuf {
sess.cfg_version,
);
let stable_crate_id = base_n::encode(stable_crate_id.as_u64() as u128, INT_ENCODE_BASE);
let crate_name = format!("{crate_name}-{stable_crate_id}");
let crate_name =
format!("{crate_name}-{}", stable_crate_id.as_u64().to_base_fixed_len(CASE_INSENSITIVE));
incr_dir.join(crate_name)
}

View file

@ -1,7 +1,9 @@
use crate::dep_graph::{DepNode, WorkProduct, WorkProductId};
use crate::ty::{GenericArgs, Instance, InstanceDef, SymbolName, TyCtxt};
use rustc_attr::InlineAttr;
use rustc_data_structures::base_n;
use rustc_data_structures::base_n::BaseNString;
use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::base_n::CASE_INSENSITIVE;
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::fx::FxIndexMap;
@ -334,14 +336,11 @@ impl<'tcx> CodegenUnit<'tcx> {
self.is_code_coverage_dead_code_cgu = true;
}
pub fn mangle_name(human_readable_name: &str) -> String {
// We generate a 80 bit hash from the name. This should be enough to
// avoid collisions and is still reasonably short for filenames.
pub fn mangle_name(human_readable_name: &str) -> BaseNString {
let mut hasher = StableHasher::new();
human_readable_name.hash(&mut hasher);
let hash: Hash128 = hasher.finish();
let hash = hash.as_u128() & ((1u128 << 80) - 1);
base_n::encode(hash, base_n::CASE_INSENSITIVE)
hash.as_u128().to_base_fixed_len(CASE_INSENSITIVE)
}
pub fn compute_size_estimate(&mut self) {

View file

@ -4,7 +4,9 @@
//!
//! For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler,
//! see design document in the tracking issue #89653.
use rustc_data_structures::base_n;
use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::base_n::ALPHANUMERIC_ONLY;
use rustc_data_structures::base_n::CASE_INSENSITIVE;
use rustc_data_structures::fx::FxHashMap;
use rustc_hir as hir;
use rustc_middle::bug;
@ -736,7 +738,7 @@ fn encode_ty_name(tcx: TyCtxt<'_>, def_id: DefId) -> String {
/// <https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html>).
fn to_disambiguator(num: u64) -> String {
if let Some(num) = num.checked_sub(1) {
format!("s{}_", base_n::encode(num as u128, base_n::ALPHANUMERIC_ONLY))
format!("s{}_", num.to_base(ALPHANUMERIC_ONLY))
} else {
"s_".to_string()
}
@ -746,7 +748,7 @@ fn to_disambiguator(num: u64) -> String {
/// <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangle.seq-id>).
fn to_seq_id(num: usize) -> String {
if let Some(num) = num.checked_sub(1) {
base_n::encode(num as u128, base_n::CASE_INSENSITIVE).to_uppercase()
(num as u64).to_base(CASE_INSENSITIVE).to_uppercase()
} else {
"".to_string()
}

View file

@ -1,4 +1,4 @@
use rustc_data_structures::base_n;
use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::intern::Interned;
use rustc_hir as hir;
@ -831,7 +831,7 @@ impl<'tcx> Printer<'tcx> for SymbolMangler<'tcx> {
/// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
pub(crate) fn push_integer_62(x: u64, output: &mut String) {
if let Some(x) = x.checked_sub(1) {
base_n::push_str(x as u128, base_n::ALPHANUMERIC_ONLY, output);
output.push_str(&x.to_base(62));
}
output.push('_');
}