Rollup merge of #131520 - zachs18:const-str-split, r=Noratrieb
Mark `str::is_char_boundary` and `str::split_at*` unstably `const`. Tracking issues: #131516, #131518 First commit implements `const_is_char_boundary`, second commit implements `const_str_split_at` (which depends on `const_is_char_boundary`) ~~I used `const_eval_select` for `is_char_boundary` since there is a comment about optimizations that would theoretically not happen with the simple `const`-compatible version (since `slice::get` is not `const`ifiable) cc #84751. I have not checked if this code difference is still required for the optimization, so it might not be worth the code complication, but 🤷.~~ This changes `str::split_at_checked` to use a new private helper function `split_at_unchecked` (copied from `split_at_mut_unchecked`) that does pointer stuff instead of `get_unchecked`, since that is not currently `const`ifiable due to using the `SliceIndex` trait.
This commit is contained in:
commit
b496974c53
2 changed files with 38 additions and 12 deletions
|
@ -185,7 +185,9 @@
|
||||||
#![feature(cfg_target_has_atomic_equal_alignment)]
|
#![feature(cfg_target_has_atomic_equal_alignment)]
|
||||||
#![feature(cfg_ub_checks)]
|
#![feature(cfg_ub_checks)]
|
||||||
#![feature(const_for)]
|
#![feature(const_for)]
|
||||||
|
#![feature(const_is_char_boundary)]
|
||||||
#![feature(const_precise_live_drops)]
|
#![feature(const_precise_live_drops)]
|
||||||
|
#![feature(const_str_split_at)]
|
||||||
#![feature(decl_macro)]
|
#![feature(decl_macro)]
|
||||||
#![feature(deprecated_suggestion)]
|
#![feature(deprecated_suggestion)]
|
||||||
#![feature(doc_cfg)]
|
#![feature(doc_cfg)]
|
||||||
|
|
|
@ -185,8 +185,9 @@ impl str {
|
||||||
/// ```
|
/// ```
|
||||||
#[must_use]
|
#[must_use]
|
||||||
#[stable(feature = "is_char_boundary", since = "1.9.0")]
|
#[stable(feature = "is_char_boundary", since = "1.9.0")]
|
||||||
|
#[rustc_const_unstable(feature = "const_is_char_boundary", issue = "131516")]
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_char_boundary(&self, index: usize) -> bool {
|
pub const fn is_char_boundary(&self, index: usize) -> bool {
|
||||||
// 0 is always ok.
|
// 0 is always ok.
|
||||||
// Test for 0 explicitly so that it can optimize out the check
|
// Test for 0 explicitly so that it can optimize out the check
|
||||||
// easily and skip reading string data for that case.
|
// easily and skip reading string data for that case.
|
||||||
|
@ -195,8 +196,8 @@ impl str {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
match self.as_bytes().get(index) {
|
if index >= self.len() {
|
||||||
// For `None` we have two options:
|
// For `true` we have two options:
|
||||||
//
|
//
|
||||||
// - index == self.len()
|
// - index == self.len()
|
||||||
// Empty strings are valid, so return true
|
// Empty strings are valid, so return true
|
||||||
|
@ -205,9 +206,9 @@ impl str {
|
||||||
//
|
//
|
||||||
// The check is placed exactly here, because it improves generated
|
// The check is placed exactly here, because it improves generated
|
||||||
// code on higher opt-levels. See PR #84751 for more details.
|
// code on higher opt-levels. See PR #84751 for more details.
|
||||||
None => index == self.len(),
|
index == self.len()
|
||||||
|
} else {
|
||||||
Some(&b) => b.is_utf8_char_boundary(),
|
self.as_bytes()[index].is_utf8_char_boundary()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -637,7 +638,8 @@ impl str {
|
||||||
#[inline]
|
#[inline]
|
||||||
#[must_use]
|
#[must_use]
|
||||||
#[stable(feature = "str_split_at", since = "1.4.0")]
|
#[stable(feature = "str_split_at", since = "1.4.0")]
|
||||||
pub fn split_at(&self, mid: usize) -> (&str, &str) {
|
#[rustc_const_unstable(feature = "const_str_split_at", issue = "131518")]
|
||||||
|
pub const fn split_at(&self, mid: usize) -> (&str, &str) {
|
||||||
match self.split_at_checked(mid) {
|
match self.split_at_checked(mid) {
|
||||||
None => slice_error_fail(self, 0, mid),
|
None => slice_error_fail(self, 0, mid),
|
||||||
Some(pair) => pair,
|
Some(pair) => pair,
|
||||||
|
@ -677,7 +679,8 @@ impl str {
|
||||||
#[inline]
|
#[inline]
|
||||||
#[must_use]
|
#[must_use]
|
||||||
#[stable(feature = "str_split_at", since = "1.4.0")]
|
#[stable(feature = "str_split_at", since = "1.4.0")]
|
||||||
pub fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
|
#[rustc_const_unstable(feature = "const_str_split_at", issue = "131518")]
|
||||||
|
pub const fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
|
||||||
// is_char_boundary checks that the index is in [0, .len()]
|
// is_char_boundary checks that the index is in [0, .len()]
|
||||||
if self.is_char_boundary(mid) {
|
if self.is_char_boundary(mid) {
|
||||||
// SAFETY: just checked that `mid` is on a char boundary.
|
// SAFETY: just checked that `mid` is on a char boundary.
|
||||||
|
@ -716,11 +719,12 @@ impl str {
|
||||||
#[inline]
|
#[inline]
|
||||||
#[must_use]
|
#[must_use]
|
||||||
#[stable(feature = "split_at_checked", since = "1.80.0")]
|
#[stable(feature = "split_at_checked", since = "1.80.0")]
|
||||||
pub fn split_at_checked(&self, mid: usize) -> Option<(&str, &str)> {
|
#[rustc_const_unstable(feature = "const_str_split_at", issue = "131518")]
|
||||||
|
pub const fn split_at_checked(&self, mid: usize) -> Option<(&str, &str)> {
|
||||||
// is_char_boundary checks that the index is in [0, .len()]
|
// is_char_boundary checks that the index is in [0, .len()]
|
||||||
if self.is_char_boundary(mid) {
|
if self.is_char_boundary(mid) {
|
||||||
// SAFETY: just checked that `mid` is on a char boundary.
|
// SAFETY: just checked that `mid` is on a char boundary.
|
||||||
Some(unsafe { (self.get_unchecked(0..mid), self.get_unchecked(mid..self.len())) })
|
Some(unsafe { self.split_at_unchecked(mid) })
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
@ -756,7 +760,9 @@ impl str {
|
||||||
#[inline]
|
#[inline]
|
||||||
#[must_use]
|
#[must_use]
|
||||||
#[stable(feature = "split_at_checked", since = "1.80.0")]
|
#[stable(feature = "split_at_checked", since = "1.80.0")]
|
||||||
pub fn split_at_mut_checked(&mut self, mid: usize) -> Option<(&mut str, &mut str)> {
|
#[rustc_const_unstable(feature = "const_str_split_at", issue = "131518")]
|
||||||
|
#[rustc_allow_const_fn_unstable(const_is_char_boundary)]
|
||||||
|
pub const fn split_at_mut_checked(&mut self, mid: usize) -> Option<(&mut str, &mut str)> {
|
||||||
// is_char_boundary checks that the index is in [0, .len()]
|
// is_char_boundary checks that the index is in [0, .len()]
|
||||||
if self.is_char_boundary(mid) {
|
if self.is_char_boundary(mid) {
|
||||||
// SAFETY: just checked that `mid` is on a char boundary.
|
// SAFETY: just checked that `mid` is on a char boundary.
|
||||||
|
@ -772,7 +778,25 @@ impl str {
|
||||||
///
|
///
|
||||||
/// The caller must ensure that `mid` is a valid byte offset from the start
|
/// The caller must ensure that `mid` is a valid byte offset from the start
|
||||||
/// of the string and falls on the boundary of a UTF-8 code point.
|
/// of the string and falls on the boundary of a UTF-8 code point.
|
||||||
unsafe fn split_at_mut_unchecked(&mut self, mid: usize) -> (&mut str, &mut str) {
|
const unsafe fn split_at_unchecked(&self, mid: usize) -> (&str, &str) {
|
||||||
|
let len = self.len();
|
||||||
|
let ptr = self.as_ptr();
|
||||||
|
// SAFETY: caller guarantees `mid` is on a char boundary.
|
||||||
|
unsafe {
|
||||||
|
(
|
||||||
|
from_utf8_unchecked(slice::from_raw_parts(ptr, mid)),
|
||||||
|
from_utf8_unchecked(slice::from_raw_parts(ptr.add(mid), len - mid)),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Divides one string slice into two at an index.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// The caller must ensure that `mid` is a valid byte offset from the start
|
||||||
|
/// of the string and falls on the boundary of a UTF-8 code point.
|
||||||
|
const unsafe fn split_at_mut_unchecked(&mut self, mid: usize) -> (&mut str, &mut str) {
|
||||||
let len = self.len();
|
let len = self.len();
|
||||||
let ptr = self.as_mut_ptr();
|
let ptr = self.as_mut_ptr();
|
||||||
// SAFETY: caller guarantees `mid` is on a char boundary.
|
// SAFETY: caller guarantees `mid` is on a char boundary.
|
||||||
|
|
Loading…
Add table
Reference in a new issue