Auto merge of #86898 - the8472:path-cmp, r=dtolnay

Add fast path for Path::cmp that skips over long shared prefixes

```
# before
test path::tests::bench_path_cmp_fast_path_buf_sort               ... bench:      60,811 ns/iter (+/- 865)
test path::tests::bench_path_cmp_fast_path_long                   ... bench:       6,459 ns/iter (+/- 275)
test path::tests::bench_path_cmp_fast_path_short                  ... bench:       1,777 ns/iter (+/- 34)

# after
test path::tests::bench_path_cmp_fast_path_buf_sort               ... bench:      38,140 ns/iter (+/- 211)
test path::tests::bench_path_cmp_fast_path_long                   ... bench:       1,471 ns/iter (+/- 24)
test path::tests::bench_path_cmp_fast_path_short                  ... bench:       1,106 ns/iter (+/- 9)
```
This commit is contained in:
bors 2021-08-20 05:00:45 +00:00
commit bcfd3f7e88
2 changed files with 108 additions and 7 deletions

View file

@ -962,7 +962,7 @@ impl cmp::Eq for Components<'_> {}
impl<'a> cmp::PartialOrd for Components<'a> { impl<'a> cmp::PartialOrd for Components<'a> {
#[inline] #[inline]
fn partial_cmp(&self, other: &Components<'a>) -> Option<cmp::Ordering> { fn partial_cmp(&self, other: &Components<'a>) -> Option<cmp::Ordering> {
Iterator::partial_cmp(self.clone(), other.clone()) Some(compare_components(self.clone(), other.clone()))
} }
} }
@ -970,10 +970,43 @@ impl<'a> cmp::PartialOrd for Components<'a> {
impl cmp::Ord for Components<'_> { impl cmp::Ord for Components<'_> {
#[inline] #[inline]
fn cmp(&self, other: &Self) -> cmp::Ordering { fn cmp(&self, other: &Self) -> cmp::Ordering {
Iterator::cmp(self.clone(), other.clone()) compare_components(self.clone(), other.clone())
} }
} }
fn compare_components(mut left: Components<'_>, mut right: Components<'_>) -> cmp::Ordering {
// Fast path for long shared prefixes
//
// - compare raw bytes to find first mismatch
// - backtrack to find separator before mismatch to avoid ambiguous parsings of '.' or '..' characters
// - if found update state to only do a component-wise comparison on the remainder,
// otherwise do it on the full path
//
// The fast path isn't taken for paths with a PrefixComponent to avoid backtracking into
// the middle of one
if left.prefix.is_none() && right.prefix.is_none() && left.front == right.front {
// this might benefit from a [u8]::first_mismatch simd implementation, if it existed
let first_difference =
match left.path.iter().zip(right.path.iter()).position(|(&a, &b)| a != b) {
None if left.path.len() == right.path.len() => return cmp::Ordering::Equal,
None => left.path.len().min(right.path.len()),
Some(diff) => diff,
};
if let Some(previous_sep) =
left.path[..first_difference].iter().rposition(|&b| left.is_sep_byte(b))
{
let mismatched_component_start = previous_sep + 1;
left.path = &left.path[mismatched_component_start..];
left.front = State::Body;
right.path = &right.path[mismatched_component_start..];
right.front = State::Body;
}
}
Iterator::cmp(left, right)
}
/// An iterator over [`Path`] and its ancestors. /// An iterator over [`Path`] and its ancestors.
/// ///
/// This `struct` is created by the [`ancestors`] method on [`Path`]. /// This `struct` is created by the [`ancestors`] method on [`Path`].
@ -1704,7 +1737,7 @@ impl cmp::Eq for PathBuf {}
impl cmp::PartialOrd for PathBuf { impl cmp::PartialOrd for PathBuf {
#[inline] #[inline]
fn partial_cmp(&self, other: &PathBuf) -> Option<cmp::Ordering> { fn partial_cmp(&self, other: &PathBuf) -> Option<cmp::Ordering> {
self.components().partial_cmp(other.components()) Some(compare_components(self.components(), other.components()))
} }
} }
@ -1712,7 +1745,7 @@ impl cmp::PartialOrd for PathBuf {
impl cmp::Ord for PathBuf { impl cmp::Ord for PathBuf {
#[inline] #[inline]
fn cmp(&self, other: &PathBuf) -> cmp::Ordering { fn cmp(&self, other: &PathBuf) -> cmp::Ordering {
self.components().cmp(other.components()) compare_components(self.components(), other.components())
} }
} }
@ -2686,7 +2719,7 @@ impl fmt::Display for Display<'_> {
impl cmp::PartialEq for Path { impl cmp::PartialEq for Path {
#[inline] #[inline]
fn eq(&self, other: &Path) -> bool { fn eq(&self, other: &Path) -> bool {
self.components().eq(other.components()) self.components() == other.components()
} }
} }
@ -2706,7 +2739,7 @@ impl cmp::Eq for Path {}
impl cmp::PartialOrd for Path { impl cmp::PartialOrd for Path {
#[inline] #[inline]
fn partial_cmp(&self, other: &Path) -> Option<cmp::Ordering> { fn partial_cmp(&self, other: &Path) -> Option<cmp::Ordering> {
self.components().partial_cmp(other.components()) Some(compare_components(self.components(), other.components()))
} }
} }
@ -2714,7 +2747,7 @@ impl cmp::PartialOrd for Path {
impl cmp::Ord for Path { impl cmp::Ord for Path {
#[inline] #[inline]
fn cmp(&self, other: &Path) -> cmp::Ordering { fn cmp(&self, other: &Path) -> cmp::Ordering {
self.components().cmp(other.components()) compare_components(self.components(), other.components())
} }
} }

View file

@ -1,7 +1,9 @@
use super::*; use super::*;
use crate::collections::BTreeSet;
use crate::rc::Rc; use crate::rc::Rc;
use crate::sync::Arc; use crate::sync::Arc;
use core::hint::black_box;
macro_rules! t( macro_rules! t(
($path:expr, iter: $iter:expr) => ( ($path:expr, iter: $iter:expr) => (
@ -1392,3 +1394,69 @@ fn into_rc() {
assert_eq!(&*rc2, path); assert_eq!(&*rc2, path);
assert_eq!(&*arc2, path); assert_eq!(&*arc2, path);
} }
#[test]
fn test_ord() {
macro_rules! ord(
($ord:ident, $left:expr, $right:expr) => ( {
assert_eq!(Path::new($left).cmp(&Path::new($right)), core::cmp::Ordering::$ord);
});
);
ord!(Less, "1", "2");
ord!(Less, "/foo/bar", "/foo./bar");
ord!(Less, "foo/bar", "foo/bar.");
ord!(Equal, "foo/./bar", "foo/bar/");
ord!(Equal, "foo/bar", "foo/bar/");
ord!(Equal, "foo/bar", "foo/bar/.");
ord!(Equal, "foo/bar", "foo/bar//");
}
#[bench]
fn bench_path_cmp_fast_path_buf_sort(b: &mut test::Bencher) {
let prefix = "my/home";
let mut paths: Vec<_> =
(0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();
paths.sort();
b.iter(|| {
black_box(paths.as_mut_slice()).sort_unstable();
});
}
#[bench]
fn bench_path_cmp_fast_path_long(b: &mut test::Bencher) {
let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/";
let paths: Vec<_> =
(0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();
let mut set = BTreeSet::new();
paths.iter().for_each(|p| {
set.insert(p.as_path());
});
b.iter(|| {
set.remove(paths[500].as_path());
set.insert(paths[500].as_path());
});
}
#[bench]
fn bench_path_cmp_fast_path_short(b: &mut test::Bencher) {
let prefix = "my/home";
let paths: Vec<_> =
(0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();
let mut set = BTreeSet::new();
paths.iter().for_each(|p| {
set.insert(p.as_path());
});
b.iter(|| {
set.remove(paths[500].as_path());
set.insert(paths[500].as_path());
});
}