Auto merge of #114443 - tgross35:cstr-len, r=dtolnay

Implement `cstr_count_bytes`

This has not yet been approved via ACP, but it's simple enough to get started on.

- ACP: https://github.com/rust-lang/libs-team/issues/256
- Tracking issue: https://github.com/rust-lang/rust/issues/114441

`@rustbot` label +T-libs-api
This commit is contained in:
bors 2023-09-20 00:02:45 +00:00
commit 3b9e0feff4

View file

@ -214,6 +214,8 @@ impl CStr {
/// * The memory referenced by the returned `CStr` must not be mutated for
/// the duration of lifetime `'a`.
///
/// * The nul terminator must be within `isize::MAX` from `ptr`
///
/// > **Note**: This operation is intended to be a 0-cost cast but it is
/// > currently implemented with an up-front calculation of the length of
/// > the string. This is not guaranteed to always be the case.
@ -259,42 +261,16 @@ impl CStr {
#[rustc_const_unstable(feature = "const_cstr_from_ptr", issue = "113219")]
pub const unsafe fn from_ptr<'a>(ptr: *const c_char) -> &'a CStr {
// SAFETY: The caller has provided a pointer that points to a valid C
// string with a NUL terminator of size less than `isize::MAX`, whose
// content remain valid and doesn't change for the lifetime of the
// returned `CStr`.
//
// Thus computing the length is fine (a NUL byte exists), the call to
// from_raw_parts is safe because we know the length is at most `isize::MAX`, meaning
// the call to `from_bytes_with_nul_unchecked` is correct.
// string with a NUL terminator less than `isize::MAX` from `ptr`.
let len = unsafe { const_strlen(ptr) };
// SAFETY: The caller has provided a valid pointer with length less than
// `isize::MAX`, so `from_raw_parts` is safe. The content remains valid
// and doesn't change for the lifetime of the returned `CStr`. This
// means the call to `from_bytes_with_nul_unchecked` is correct.
//
// The cast from c_char to u8 is ok because a c_char is always one byte.
unsafe {
const fn strlen_ct(s: *const c_char) -> usize {
let mut len = 0;
// SAFETY: Outer caller has provided a pointer to a valid C string.
while unsafe { *s.add(len) } != 0 {
len += 1;
}
len
}
// `inline` is necessary for codegen to see strlen.
#[inline]
fn strlen_rt(s: *const c_char) -> usize {
extern "C" {
/// Provided by libc or compiler_builtins.
fn strlen(s: *const c_char) -> usize;
}
// SAFETY: Outer caller has provided a pointer to a valid C string.
unsafe { strlen(s) }
}
let len = intrinsics::const_eval_select((ptr,), strlen_ct, strlen_rt);
Self::from_bytes_with_nul_unchecked(slice::from_raw_parts(ptr.cast(), len + 1))
}
unsafe { Self::from_bytes_with_nul_unchecked(slice::from_raw_parts(ptr.cast(), len + 1)) }
}
/// Creates a C string wrapper from a byte slice with any number of nuls.
@ -516,6 +492,34 @@ impl CStr {
self.inner.as_ptr()
}
/// Returns the length of `self`. Like C's `strlen`, this does not include the nul terminator.
///
/// > **Note**: This method is currently implemented as a constant-time
/// > cast, but it is planned to alter its definition in the future to
/// > perform the length calculation whenever this method is called.
///
/// # Examples
///
/// ```
/// #![feature(cstr_count_bytes)]
///
/// use std::ffi::CStr;
///
/// let cstr = CStr::from_bytes_with_nul(b"foo\0").unwrap();
/// assert_eq!(cstr.count_bytes(), 3);
///
/// let cstr = CStr::from_bytes_with_nul(b"\0").unwrap();
/// assert_eq!(cstr.count_bytes(), 0);
/// ```
#[inline]
#[must_use]
#[doc(alias("len", "strlen"))]
#[unstable(feature = "cstr_count_bytes", issue = "114441")]
#[rustc_const_unstable(feature = "const_cstr_from_ptr", issue = "113219")]
pub const fn count_bytes(&self) -> usize {
self.inner.len() - 1
}
/// Returns `true` if `self.to_bytes()` has a length of 0.
///
/// # Examples
@ -682,3 +686,37 @@ impl AsRef<CStr> for CStr {
self
}
}
/// Calculate the length of a nul-terminated string. Defers to C's `strlen` when possible.
///
/// # Safety
///
/// The pointer must point to a valid buffer that contains a NUL terminator. The NUL must be
/// located within `isize::MAX` from `ptr`.
#[inline]
const unsafe fn const_strlen(ptr: *const c_char) -> usize {
const fn strlen_ct(s: *const c_char) -> usize {
let mut len = 0;
// SAFETY: Outer caller has provided a pointer to a valid C string.
while unsafe { *s.add(len) } != 0 {
len += 1;
}
len
}
#[inline]
fn strlen_rt(s: *const c_char) -> usize {
extern "C" {
/// Provided by libc or compiler_builtins.
fn strlen(s: *const c_char) -> usize;
}
// SAFETY: Outer caller has provided a pointer to a valid C string.
unsafe { strlen(s) }
}
// SAFETY: the two functions always provide equivalent functionality
unsafe { intrinsics::const_eval_select((ptr,), strlen_ct, strlen_rt) }
}