Rollup merge of #109124 - ferrocene:pa-compression-mode, r=Mark-Simulacrum
Add `dist.compression-profile` option to control compression speed PR #108534 reduced the size of compressed archives, but (as expected) it also resulted in way longer compression times and memory usage during compression. It's desirable to keep status quo (smaller archives but more CI usage), but it should also be configurable so that downstream users don't have to waste that much time on CI. As a data point, this resulted in doubling the time of Ferrocene's dist jobs, and required us to increase the RAM allocation for one of such jobs. This PR adds a new `config.toml` setting, `dist.compression-profile`. The values can be: * `fast`: equivalent to the gzip and xz preset of "1" * `balanced`: equivalent to the gzip and xz preset of "6" (the CLI defaults as far as I'm aware) * `best`: equivalent to the gzip present of "9", and our custom xz profile The default has also been moved back to `balanced`, to try and avoid the compression time regression for downstream users. I don't feel too strongly on the default, and I'm open to changing it. Also, for the `best` profile the XZ settings do not match the "9" preset used by the CLI, and it might be confusing. Should we create a `custom-rustc-ci`/`ultra` profile for that? r? ``@Mark-Simulacrum``
This commit is contained in:
commit
09b1254eb2
9 changed files with 123 additions and 43 deletions
|
@ -806,3 +806,9 @@ changelog-seen = 2
|
|||
#
|
||||
# This list must be non-empty.
|
||||
#compression-formats = ["gz", "xz"]
|
||||
|
||||
# How much time should be spent compressing the tarballs. The better the
|
||||
# compression profile, the longer compression will take.
|
||||
#
|
||||
# Available options: fast, balanced, best
|
||||
#compression-profile = "fast"
|
||||
|
|
|
@ -191,6 +191,7 @@ pub struct Config {
|
|||
pub dist_sign_folder: Option<PathBuf>,
|
||||
pub dist_upload_addr: Option<String>,
|
||||
pub dist_compression_formats: Option<Vec<String>>,
|
||||
pub dist_compression_profile: String,
|
||||
pub dist_include_mingw_linker: bool,
|
||||
|
||||
// libstd features
|
||||
|
@ -703,6 +704,7 @@ define_config! {
|
|||
src_tarball: Option<bool> = "src-tarball",
|
||||
missing_tools: Option<bool> = "missing-tools",
|
||||
compression_formats: Option<Vec<String>> = "compression-formats",
|
||||
compression_profile: Option<String> = "compression-profile",
|
||||
include_mingw_linker: Option<bool> = "include-mingw-linker",
|
||||
}
|
||||
}
|
||||
|
@ -821,6 +823,7 @@ impl Config {
|
|||
config.deny_warnings = true;
|
||||
config.bindir = "bin".into();
|
||||
config.dist_include_mingw_linker = true;
|
||||
config.dist_compression_profile = "fast".into();
|
||||
|
||||
// set by build.rs
|
||||
config.build = TargetSelection::from_user(&env!("BUILD_TRIPLE"));
|
||||
|
@ -1308,6 +1311,7 @@ impl Config {
|
|||
config.dist_sign_folder = t.sign_folder.map(PathBuf::from);
|
||||
config.dist_upload_addr = t.upload_addr;
|
||||
config.dist_compression_formats = t.compression_formats;
|
||||
set(&mut config.dist_compression_profile, t.compression_profile);
|
||||
set(&mut config.rust_dist_src, t.src_tarball);
|
||||
set(&mut config.missing_tools, t.missing_tools);
|
||||
set(&mut config.dist_include_mingw_linker, t.include_mingw_linker)
|
||||
|
|
|
@ -11,3 +11,7 @@ extended = true
|
|||
[llvm]
|
||||
# Most users installing from source want to build all parts of the project from source, not just rustc itself.
|
||||
download-ci-llvm = false
|
||||
|
||||
[dist]
|
||||
# Use better compression when preparing tarballs.
|
||||
compression-profile = "balanced"
|
||||
|
|
|
@ -318,6 +318,7 @@ impl<'a> Tarball<'a> {
|
|||
assert!(!formats.is_empty(), "dist.compression-formats can't be empty");
|
||||
cmd.arg("--compression-formats").arg(formats.join(","));
|
||||
}
|
||||
cmd.args(&["--compression-profile", &self.builder.config.dist_compression_profile]);
|
||||
self.builder.run(&mut cmd);
|
||||
|
||||
// Ensure there are no symbolic links in the tarball. In particular,
|
||||
|
|
|
@ -58,6 +58,7 @@ RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --disable-manage-submodules"
|
|||
RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --enable-locked-deps"
|
||||
RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --enable-cargo-native-static"
|
||||
RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --set rust.codegen-units-std=1"
|
||||
RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --set dist.compression-profile=best"
|
||||
|
||||
# Only produce xz tarballs on CI. gz tarballs will be generated by the release
|
||||
# process by recompressing the existing xz ones. This decreases the storage
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use super::Scripter;
|
||||
use super::Tarballer;
|
||||
use crate::{
|
||||
compression::{CompressionFormat, CompressionFormats},
|
||||
compression::{CompressionFormat, CompressionFormats, CompressionProfile},
|
||||
util::*,
|
||||
};
|
||||
use anyhow::{bail, Context, Result};
|
||||
|
@ -48,6 +48,10 @@ actor! {
|
|||
#[clap(value_name = "DIR")]
|
||||
output_dir: String = "./dist",
|
||||
|
||||
/// The profile used to compress the tarball.
|
||||
#[clap(value_name = "FORMAT", default_value_t)]
|
||||
compression_profile: CompressionProfile,
|
||||
|
||||
/// The formats used to compress the tarball
|
||||
#[clap(value_name = "FORMAT", default_value_t)]
|
||||
compression_formats: CompressionFormats,
|
||||
|
@ -153,6 +157,7 @@ impl Combiner {
|
|||
.work_dir(self.work_dir)
|
||||
.input(self.package_name)
|
||||
.output(path_to_str(&output)?.into())
|
||||
.compression_profile(self.compression_profile)
|
||||
.compression_formats(self.compression_formats.clone());
|
||||
tarballer.run()?;
|
||||
|
||||
|
|
|
@ -4,6 +4,37 @@ use rayon::prelude::*;
|
|||
use std::{convert::TryFrom, fmt, io::Read, io::Write, path::Path, str::FromStr};
|
||||
use xz2::{read::XzDecoder, write::XzEncoder};
|
||||
|
||||
#[derive(Default, Debug, Copy, Clone)]
|
||||
pub enum CompressionProfile {
|
||||
Fast,
|
||||
#[default]
|
||||
Balanced,
|
||||
Best,
|
||||
}
|
||||
|
||||
impl FromStr for CompressionProfile {
|
||||
type Err = Error;
|
||||
|
||||
fn from_str(input: &str) -> Result<Self, Error> {
|
||||
Ok(match input {
|
||||
"fast" => Self::Fast,
|
||||
"balanced" => Self::Balanced,
|
||||
"best" => Self::Best,
|
||||
other => anyhow::bail!("invalid compression profile: {other}"),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for CompressionProfile {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
CompressionProfile::Fast => f.write_str("fast"),
|
||||
CompressionProfile::Balanced => f.write_str("balanced"),
|
||||
CompressionProfile::Best => f.write_str("best"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub enum CompressionFormat {
|
||||
Gz,
|
||||
|
@ -26,7 +57,11 @@ impl CompressionFormat {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn encode(&self, path: impl AsRef<Path>) -> Result<Box<dyn Encoder>, Error> {
|
||||
pub(crate) fn encode(
|
||||
&self,
|
||||
path: impl AsRef<Path>,
|
||||
profile: CompressionProfile,
|
||||
) -> Result<Box<dyn Encoder>, Error> {
|
||||
let mut os = path.as_ref().as_os_str().to_os_string();
|
||||
os.push(format!(".{}", self.extension()));
|
||||
let path = Path::new(&os);
|
||||
|
@ -37,49 +72,64 @@ impl CompressionFormat {
|
|||
let file = crate::util::create_new_file(path)?;
|
||||
|
||||
Ok(match self {
|
||||
CompressionFormat::Gz => Box::new(GzEncoder::new(file, flate2::Compression::best())),
|
||||
CompressionFormat::Gz => Box::new(GzEncoder::new(
|
||||
file,
|
||||
match profile {
|
||||
CompressionProfile::Fast => flate2::Compression::fast(),
|
||||
CompressionProfile::Balanced => flate2::Compression::new(6),
|
||||
CompressionProfile::Best => flate2::Compression::best(),
|
||||
},
|
||||
)),
|
||||
CompressionFormat::Xz => {
|
||||
let mut filters = xz2::stream::Filters::new();
|
||||
// the preset is overridden by the other options so it doesn't matter
|
||||
let mut lzma_ops = xz2::stream::LzmaOptions::new_preset(9).unwrap();
|
||||
// This sets the overall dictionary size, which is also how much memory (baseline)
|
||||
// is needed for decompression.
|
||||
lzma_ops.dict_size(64 * 1024 * 1024);
|
||||
// Use the best match finder for compression ratio.
|
||||
lzma_ops.match_finder(xz2::stream::MatchFinder::BinaryTree4);
|
||||
lzma_ops.mode(xz2::stream::Mode::Normal);
|
||||
// Set nice len to the maximum for best compression ratio
|
||||
lzma_ops.nice_len(273);
|
||||
// Set depth to a reasonable value, 0 means auto, 1000 is somwhat high but gives
|
||||
// good results.
|
||||
lzma_ops.depth(1000);
|
||||
// 2 is the default and does well for most files
|
||||
lzma_ops.position_bits(2);
|
||||
// 0 is the default and does well for most files
|
||||
lzma_ops.literal_position_bits(0);
|
||||
// 3 is the default and does well for most files
|
||||
lzma_ops.literal_context_bits(3);
|
||||
let encoder = match profile {
|
||||
CompressionProfile::Fast => {
|
||||
xz2::stream::MtStreamBuilder::new().threads(6).preset(1).encoder().unwrap()
|
||||
}
|
||||
CompressionProfile::Balanced => {
|
||||
xz2::stream::MtStreamBuilder::new().threads(6).preset(6).encoder().unwrap()
|
||||
}
|
||||
CompressionProfile::Best => {
|
||||
let mut filters = xz2::stream::Filters::new();
|
||||
// the preset is overridden by the other options so it doesn't matter
|
||||
let mut lzma_ops = xz2::stream::LzmaOptions::new_preset(9).unwrap();
|
||||
// This sets the overall dictionary size, which is also how much memory (baseline)
|
||||
// is needed for decompression.
|
||||
lzma_ops.dict_size(64 * 1024 * 1024);
|
||||
// Use the best match finder for compression ratio.
|
||||
lzma_ops.match_finder(xz2::stream::MatchFinder::BinaryTree4);
|
||||
lzma_ops.mode(xz2::stream::Mode::Normal);
|
||||
// Set nice len to the maximum for best compression ratio
|
||||
lzma_ops.nice_len(273);
|
||||
// Set depth to a reasonable value, 0 means auto, 1000 is somwhat high but gives
|
||||
// good results.
|
||||
lzma_ops.depth(1000);
|
||||
// 2 is the default and does well for most files
|
||||
lzma_ops.position_bits(2);
|
||||
// 0 is the default and does well for most files
|
||||
lzma_ops.literal_position_bits(0);
|
||||
// 3 is the default and does well for most files
|
||||
lzma_ops.literal_context_bits(3);
|
||||
|
||||
filters.lzma2(&lzma_ops);
|
||||
filters.lzma2(&lzma_ops);
|
||||
|
||||
let mut builder = xz2::stream::MtStreamBuilder::new();
|
||||
builder.filters(filters);
|
||||
let mut builder = xz2::stream::MtStreamBuilder::new();
|
||||
builder.filters(filters);
|
||||
|
||||
// On 32-bit platforms limit ourselves to 3 threads, otherwise we exceed memory
|
||||
// usage this process can take. In the future we'll likely only do super-fast
|
||||
// compression in CI and move this heavyweight processing to promote-release (which
|
||||
// is always 64-bit and can run on big-memory machines) but for now this lets us
|
||||
// move forward.
|
||||
if std::mem::size_of::<usize>() == 4 {
|
||||
builder.threads(3);
|
||||
} else {
|
||||
builder.threads(6);
|
||||
}
|
||||
// On 32-bit platforms limit ourselves to 3 threads, otherwise we exceed memory
|
||||
// usage this process can take. In the future we'll likely only do super-fast
|
||||
// compression in CI and move this heavyweight processing to promote-release (which
|
||||
// is always 64-bit and can run on big-memory machines) but for now this lets us
|
||||
// move forward.
|
||||
if std::mem::size_of::<usize>() == 4 {
|
||||
builder.threads(3);
|
||||
} else {
|
||||
builder.threads(6);
|
||||
}
|
||||
builder.encoder().unwrap()
|
||||
}
|
||||
};
|
||||
|
||||
let compressor = XzEncoder::new_stream(
|
||||
std::io::BufWriter::new(file),
|
||||
builder.encoder().unwrap(),
|
||||
);
|
||||
let compressor = XzEncoder::new_stream(std::io::BufWriter::new(file), encoder);
|
||||
Box::new(compressor)
|
||||
}
|
||||
})
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use super::Scripter;
|
||||
use super::Tarballer;
|
||||
use crate::compression::CompressionFormats;
|
||||
use crate::compression::{CompressionFormats, CompressionProfile};
|
||||
use crate::util::*;
|
||||
use anyhow::{bail, format_err, Context, Result};
|
||||
use std::collections::BTreeSet;
|
||||
|
@ -54,6 +54,10 @@ actor! {
|
|||
#[clap(value_name = "DIR")]
|
||||
output_dir: String = "./dist",
|
||||
|
||||
/// The profile used to compress the tarball.
|
||||
#[clap(value_name = "FORMAT", default_value_t)]
|
||||
compression_profile: CompressionProfile,
|
||||
|
||||
/// The formats used to compress the tarball
|
||||
#[clap(value_name = "FORMAT", default_value_t)]
|
||||
compression_formats: CompressionFormats,
|
||||
|
@ -113,6 +117,7 @@ impl Generator {
|
|||
.work_dir(self.work_dir)
|
||||
.input(self.package_name)
|
||||
.output(path_to_str(&output)?.into())
|
||||
.compression_profile(self.compression_profile)
|
||||
.compression_formats(self.compression_formats.clone());
|
||||
tarballer.run()?;
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ use tar::{Builder, Header};
|
|||
use walkdir::WalkDir;
|
||||
|
||||
use crate::{
|
||||
compression::{CombinedEncoder, CompressionFormats},
|
||||
compression::{CombinedEncoder, CompressionFormats, CompressionProfile},
|
||||
util::*,
|
||||
};
|
||||
|
||||
|
@ -25,6 +25,10 @@ actor! {
|
|||
#[clap(value_name = "DIR")]
|
||||
work_dir: String = "./workdir",
|
||||
|
||||
/// The profile used to compress the tarball.
|
||||
#[clap(value_name = "FORMAT", default_value_t)]
|
||||
compression_profile: CompressionProfile,
|
||||
|
||||
/// The formats used to compress the tarball.
|
||||
#[clap(value_name = "FORMAT", default_value_t)]
|
||||
compression_formats: CompressionFormats,
|
||||
|
@ -38,7 +42,7 @@ impl Tarballer {
|
|||
let encoder = CombinedEncoder::new(
|
||||
self.compression_formats
|
||||
.iter()
|
||||
.map(|f| f.encode(&tarball_name))
|
||||
.map(|f| f.encode(&tarball_name, self.compression_profile))
|
||||
.collect::<Result<Vec<_>>>()?,
|
||||
);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue