Auto merge of #132883 - LaihoE:vectorized_is_sorted, r=thomcc
vectorize slice::is_sorted Benchmarks using u32 slices: | len | New | Old | |--------|----------------------|----------------------| | 2 | 1.1997 ns | 889.23 ps | | 4 | 1.6479 ns | 1.5396 ns | | 8 | 2.5764 ns | 2.5633 ns | | 16 | 5.4750 ns | 4.7421 ns | | 32 | 11.344 ns | 8.4634 ns | | 64 | 12.105 ns | 18.104 ns | | 128 | 17.263 ns | 33.185 ns | | 256 | 29.465 ns | 60.928 ns | | 512 | 48.926 ns | 116.19 ns | | 1024 | 85.274 ns | 237.91 ns | | 2048 | 160.94 ns | 469.53 ns | | 4096 | 311.60 ns | 911.43 ns | | 8192 | 615.89 ns | 2.2316 µs | | 16384 | 1.2619 µs | 3.4871 µs | | 32768 | 2.5245 µs | 6.9947 µs | | 65536 | 5.2254 µs | 15.212 µs | Seems to be a bit slower on small N but much faster on large N. Godbolt: https://rust.godbolt.org/z/Txn5MdfKn
This commit is contained in:
commit
44f233f251
1 changed files with 17 additions and 1 deletions
|
@ -4097,7 +4097,23 @@ impl<T> [T] {
|
|||
where
|
||||
T: PartialOrd,
|
||||
{
|
||||
self.is_sorted_by(|a, b| a <= b)
|
||||
// This odd number works the best. 32 + 1 extra due to overlapping chunk boundaries.
|
||||
const CHUNK_SIZE: usize = 33;
|
||||
if self.len() < CHUNK_SIZE {
|
||||
return self.windows(2).all(|w| w[0] <= w[1]);
|
||||
}
|
||||
let mut i = 0;
|
||||
// Check in chunks for autovectorization.
|
||||
while i < self.len() - CHUNK_SIZE {
|
||||
let chunk = &self[i..i + CHUNK_SIZE];
|
||||
if !chunk.windows(2).fold(true, |acc, w| acc & (w[0] <= w[1])) {
|
||||
return false;
|
||||
}
|
||||
// We need to ensure that chunk boundaries are also sorted.
|
||||
// Overlap the next chunk with the last element of our last chunk.
|
||||
i += CHUNK_SIZE - 1;
|
||||
}
|
||||
self[i..].windows(2).all(|w| w[0] <= w[1])
|
||||
}
|
||||
|
||||
/// Checks if the elements of this slice are sorted using the given comparator function.
|
||||
|
|
Loading…
Add table
Reference in a new issue