Rollup merge of #109124 - ferrocene:pa-compression-mode, r=Mark-Simulacrum

Add `dist.compression-profile` option to control compression speed

PR #108534 reduced the size of compressed archives, but (as expected) it also resulted in way longer compression times and memory usage during compression.

It's desirable to keep status quo (smaller archives but more CI usage), but it should also be configurable so that downstream users don't have to waste that much time on CI. As a data point, this resulted in doubling the time of Ferrocene's dist jobs, and required us to increase the RAM allocation for one of such jobs.

This PR adds a new `config.toml` setting, `dist.compression-profile`. The values can be:

* `fast`: equivalent to the gzip and xz preset of "1"
* `balanced`: equivalent to the gzip and xz preset of "6" (the CLI defaults as far as I'm aware)
* `best`: equivalent to the gzip present of "9", and our custom xz profile

The default has also been moved back to `balanced`, to try and avoid the compression time regression for downstream users. I don't feel too strongly on the default, and I'm open to changing it.

Also, for the `best` profile the XZ settings do not match the "9" preset used by the CLI, and it might be confusing. Should we create a `custom-rustc-ci`/`ultra` profile for that?

r? ``@Mark-Simulacrum``
This commit is contained in:
nils 2023-03-21 13:00:23 +01:00 committed by GitHub
commit 09b1254eb2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 123 additions and 43 deletions

View file

@ -806,3 +806,9 @@ changelog-seen = 2
#
# This list must be non-empty.
#compression-formats = ["gz", "xz"]
# How much time should be spent compressing the tarballs. The better the
# compression profile, the longer compression will take.
#
# Available options: fast, balanced, best
#compression-profile = "fast"

View file

@ -191,6 +191,7 @@ pub struct Config {
pub dist_sign_folder: Option<PathBuf>,
pub dist_upload_addr: Option<String>,
pub dist_compression_formats: Option<Vec<String>>,
pub dist_compression_profile: String,
pub dist_include_mingw_linker: bool,
// libstd features
@ -703,6 +704,7 @@ define_config! {
src_tarball: Option<bool> = "src-tarball",
missing_tools: Option<bool> = "missing-tools",
compression_formats: Option<Vec<String>> = "compression-formats",
compression_profile: Option<String> = "compression-profile",
include_mingw_linker: Option<bool> = "include-mingw-linker",
}
}
@ -821,6 +823,7 @@ impl Config {
config.deny_warnings = true;
config.bindir = "bin".into();
config.dist_include_mingw_linker = true;
config.dist_compression_profile = "fast".into();
// set by build.rs
config.build = TargetSelection::from_user(&env!("BUILD_TRIPLE"));
@ -1308,6 +1311,7 @@ impl Config {
config.dist_sign_folder = t.sign_folder.map(PathBuf::from);
config.dist_upload_addr = t.upload_addr;
config.dist_compression_formats = t.compression_formats;
set(&mut config.dist_compression_profile, t.compression_profile);
set(&mut config.rust_dist_src, t.src_tarball);
set(&mut config.missing_tools, t.missing_tools);
set(&mut config.dist_include_mingw_linker, t.include_mingw_linker)

View file

@ -11,3 +11,7 @@ extended = true
[llvm]
# Most users installing from source want to build all parts of the project from source, not just rustc itself.
download-ci-llvm = false
[dist]
# Use better compression when preparing tarballs.
compression-profile = "balanced"

View file

@ -318,6 +318,7 @@ impl<'a> Tarball<'a> {
assert!(!formats.is_empty(), "dist.compression-formats can't be empty");
cmd.arg("--compression-formats").arg(formats.join(","));
}
cmd.args(&["--compression-profile", &self.builder.config.dist_compression_profile]);
self.builder.run(&mut cmd);
// Ensure there are no symbolic links in the tarball. In particular,

View file

@ -58,6 +58,7 @@ RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --disable-manage-submodules"
RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --enable-locked-deps"
RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --enable-cargo-native-static"
RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --set rust.codegen-units-std=1"
RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --set dist.compression-profile=best"
# Only produce xz tarballs on CI. gz tarballs will be generated by the release
# process by recompressing the existing xz ones. This decreases the storage

View file

@ -1,7 +1,7 @@
use super::Scripter;
use super::Tarballer;
use crate::{
compression::{CompressionFormat, CompressionFormats},
compression::{CompressionFormat, CompressionFormats, CompressionProfile},
util::*,
};
use anyhow::{bail, Context, Result};
@ -48,6 +48,10 @@ actor! {
#[clap(value_name = "DIR")]
output_dir: String = "./dist",
/// The profile used to compress the tarball.
#[clap(value_name = "FORMAT", default_value_t)]
compression_profile: CompressionProfile,
/// The formats used to compress the tarball
#[clap(value_name = "FORMAT", default_value_t)]
compression_formats: CompressionFormats,
@ -153,6 +157,7 @@ impl Combiner {
.work_dir(self.work_dir)
.input(self.package_name)
.output(path_to_str(&output)?.into())
.compression_profile(self.compression_profile)
.compression_formats(self.compression_formats.clone());
tarballer.run()?;

View file

@ -4,6 +4,37 @@ use rayon::prelude::*;
use std::{convert::TryFrom, fmt, io::Read, io::Write, path::Path, str::FromStr};
use xz2::{read::XzDecoder, write::XzEncoder};
#[derive(Default, Debug, Copy, Clone)]
pub enum CompressionProfile {
Fast,
#[default]
Balanced,
Best,
}
impl FromStr for CompressionProfile {
type Err = Error;
fn from_str(input: &str) -> Result<Self, Error> {
Ok(match input {
"fast" => Self::Fast,
"balanced" => Self::Balanced,
"best" => Self::Best,
other => anyhow::bail!("invalid compression profile: {other}"),
})
}
}
impl fmt::Display for CompressionProfile {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
CompressionProfile::Fast => f.write_str("fast"),
CompressionProfile::Balanced => f.write_str("balanced"),
CompressionProfile::Best => f.write_str("best"),
}
}
}
#[derive(Debug, Copy, Clone)]
pub enum CompressionFormat {
Gz,
@ -26,7 +57,11 @@ impl CompressionFormat {
}
}
pub(crate) fn encode(&self, path: impl AsRef<Path>) -> Result<Box<dyn Encoder>, Error> {
pub(crate) fn encode(
&self,
path: impl AsRef<Path>,
profile: CompressionProfile,
) -> Result<Box<dyn Encoder>, Error> {
let mut os = path.as_ref().as_os_str().to_os_string();
os.push(format!(".{}", self.extension()));
let path = Path::new(&os);
@ -37,49 +72,64 @@ impl CompressionFormat {
let file = crate::util::create_new_file(path)?;
Ok(match self {
CompressionFormat::Gz => Box::new(GzEncoder::new(file, flate2::Compression::best())),
CompressionFormat::Gz => Box::new(GzEncoder::new(
file,
match profile {
CompressionProfile::Fast => flate2::Compression::fast(),
CompressionProfile::Balanced => flate2::Compression::new(6),
CompressionProfile::Best => flate2::Compression::best(),
},
)),
CompressionFormat::Xz => {
let mut filters = xz2::stream::Filters::new();
// the preset is overridden by the other options so it doesn't matter
let mut lzma_ops = xz2::stream::LzmaOptions::new_preset(9).unwrap();
// This sets the overall dictionary size, which is also how much memory (baseline)
// is needed for decompression.
lzma_ops.dict_size(64 * 1024 * 1024);
// Use the best match finder for compression ratio.
lzma_ops.match_finder(xz2::stream::MatchFinder::BinaryTree4);
lzma_ops.mode(xz2::stream::Mode::Normal);
// Set nice len to the maximum for best compression ratio
lzma_ops.nice_len(273);
// Set depth to a reasonable value, 0 means auto, 1000 is somwhat high but gives
// good results.
lzma_ops.depth(1000);
// 2 is the default and does well for most files
lzma_ops.position_bits(2);
// 0 is the default and does well for most files
lzma_ops.literal_position_bits(0);
// 3 is the default and does well for most files
lzma_ops.literal_context_bits(3);
let encoder = match profile {
CompressionProfile::Fast => {
xz2::stream::MtStreamBuilder::new().threads(6).preset(1).encoder().unwrap()
}
CompressionProfile::Balanced => {
xz2::stream::MtStreamBuilder::new().threads(6).preset(6).encoder().unwrap()
}
CompressionProfile::Best => {
let mut filters = xz2::stream::Filters::new();
// the preset is overridden by the other options so it doesn't matter
let mut lzma_ops = xz2::stream::LzmaOptions::new_preset(9).unwrap();
// This sets the overall dictionary size, which is also how much memory (baseline)
// is needed for decompression.
lzma_ops.dict_size(64 * 1024 * 1024);
// Use the best match finder for compression ratio.
lzma_ops.match_finder(xz2::stream::MatchFinder::BinaryTree4);
lzma_ops.mode(xz2::stream::Mode::Normal);
// Set nice len to the maximum for best compression ratio
lzma_ops.nice_len(273);
// Set depth to a reasonable value, 0 means auto, 1000 is somwhat high but gives
// good results.
lzma_ops.depth(1000);
// 2 is the default and does well for most files
lzma_ops.position_bits(2);
// 0 is the default and does well for most files
lzma_ops.literal_position_bits(0);
// 3 is the default and does well for most files
lzma_ops.literal_context_bits(3);
filters.lzma2(&lzma_ops);
filters.lzma2(&lzma_ops);
let mut builder = xz2::stream::MtStreamBuilder::new();
builder.filters(filters);
let mut builder = xz2::stream::MtStreamBuilder::new();
builder.filters(filters);
// On 32-bit platforms limit ourselves to 3 threads, otherwise we exceed memory
// usage this process can take. In the future we'll likely only do super-fast
// compression in CI and move this heavyweight processing to promote-release (which
// is always 64-bit and can run on big-memory machines) but for now this lets us
// move forward.
if std::mem::size_of::<usize>() == 4 {
builder.threads(3);
} else {
builder.threads(6);
}
// On 32-bit platforms limit ourselves to 3 threads, otherwise we exceed memory
// usage this process can take. In the future we'll likely only do super-fast
// compression in CI and move this heavyweight processing to promote-release (which
// is always 64-bit and can run on big-memory machines) but for now this lets us
// move forward.
if std::mem::size_of::<usize>() == 4 {
builder.threads(3);
} else {
builder.threads(6);
}
builder.encoder().unwrap()
}
};
let compressor = XzEncoder::new_stream(
std::io::BufWriter::new(file),
builder.encoder().unwrap(),
);
let compressor = XzEncoder::new_stream(std::io::BufWriter::new(file), encoder);
Box::new(compressor)
}
})

View file

@ -1,6 +1,6 @@
use super::Scripter;
use super::Tarballer;
use crate::compression::CompressionFormats;
use crate::compression::{CompressionFormats, CompressionProfile};
use crate::util::*;
use anyhow::{bail, format_err, Context, Result};
use std::collections::BTreeSet;
@ -54,6 +54,10 @@ actor! {
#[clap(value_name = "DIR")]
output_dir: String = "./dist",
/// The profile used to compress the tarball.
#[clap(value_name = "FORMAT", default_value_t)]
compression_profile: CompressionProfile,
/// The formats used to compress the tarball
#[clap(value_name = "FORMAT", default_value_t)]
compression_formats: CompressionFormats,
@ -113,6 +117,7 @@ impl Generator {
.work_dir(self.work_dir)
.input(self.package_name)
.output(path_to_str(&output)?.into())
.compression_profile(self.compression_profile)
.compression_formats(self.compression_formats.clone());
tarballer.run()?;

View file

@ -6,7 +6,7 @@ use tar::{Builder, Header};
use walkdir::WalkDir;
use crate::{
compression::{CombinedEncoder, CompressionFormats},
compression::{CombinedEncoder, CompressionFormats, CompressionProfile},
util::*,
};
@ -25,6 +25,10 @@ actor! {
#[clap(value_name = "DIR")]
work_dir: String = "./workdir",
/// The profile used to compress the tarball.
#[clap(value_name = "FORMAT", default_value_t)]
compression_profile: CompressionProfile,
/// The formats used to compress the tarball.
#[clap(value_name = "FORMAT", default_value_t)]
compression_formats: CompressionFormats,
@ -38,7 +42,7 @@ impl Tarballer {
let encoder = CombinedEncoder::new(
self.compression_formats
.iter()
.map(|f| f.encode(&tarball_name))
.map(|f| f.encode(&tarball_name, self.compression_profile))
.collect::<Result<Vec<_>>>()?,
);