Auto merge of #109698 - epage:wtf, r=Amanieu

Allow limited access to `OsStr` bytes

`OsStr` has historically kept its implementation details private out of
concern for locking us into a specific encoding on Windows.

This is an alternative to rust-lang#95290 which proposed specifying the encoding on Windows.  Instead, this
only specifies that for cross-platform code, `OsStr`'s encoding is a superset of UTF-8 and defines
rules for safely interacting with it

At minimum, this can greatly simplify the `os_str_bytes` crate and every
arg parser that interacts with `OsStr` directly (which is most of those
that support invalid UTF-8).

Tracking issue: #111544
This commit is contained in:
bors 2023-05-30 21:26:22 +00:00
commit 9610dfe5a9
14 changed files with 159 additions and 83 deletions

View file

@ -127,6 +127,14 @@
//! trait, which provides a [`from_wide`] method to convert a native Windows
//! string (without the terminating nul character) to an [`OsString`].
//!
//! ## On all platforms
//!
//! On all platforms, [`OsStr`] consists of a sequence of bytes that is encoded as a superset of
//! UTF-8; see [`OsString`] for more details on its encoding on different platforms.
//!
//! For limited, inexpensive conversions from and to bytes, see [`OsStr::as_os_str_bytes`] and
//! [`OsStr::from_os_str_bytes_unchecked`].
//!
//! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
//! [Unicode code point]: https://www.unicode.org/glossary/#code_point
//! [`env::set_var()`]: crate::env::set_var "env::set_var"

View file

@ -667,6 +667,51 @@ impl OsStr {
s.as_ref()
}
/// Converts a slice of bytes to an OS string slice without checking that the string contains
/// valid `OsStr`-encoded data.
///
/// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8.
/// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit
/// ASCII.
///
/// See the [module's toplevel documentation about conversions][conversions] for safe,
/// cross-platform [conversions] from/to native representations.
///
/// # Safety
///
/// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of
/// validated UTF-8 and bytes from [`OsStr::as_os_str_bytes`] from within the same rust version
/// built for the same target platform. For example, reconstructing an `OsStr` from bytes sent
/// over the network or stored in a file will likely violate these safety rules.
///
/// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_os_str_bytes`] can be
/// split either immediately before or immediately after any valid non-empty UTF-8 substring.
///
/// # Example
///
/// ```
/// #![feature(os_str_bytes)]
///
/// use std::ffi::OsStr;
///
/// let os_str = OsStr::new("Mary had a little lamb");
/// let bytes = os_str.as_os_str_bytes();
/// let words = bytes.split(|b| *b == b' ');
/// let words: Vec<&OsStr> = words.map(|word| {
/// // SAFETY:
/// // - Each `word` only contains content that originated from `OsStr::as_os_str_bytes`
/// // - Only split with ASCII whitespace which is a non-empty UTF-8 substring
/// unsafe { OsStr::from_os_str_bytes_unchecked(word) }
/// }).collect();
/// ```
///
/// [conversions]: super#conversions
#[inline]
#[unstable(feature = "os_str_bytes", issue = "111544")]
pub unsafe fn from_os_str_bytes_unchecked(bytes: &[u8]) -> &Self {
Self::from_inner(Slice::from_os_str_bytes_unchecked(bytes))
}
#[inline]
fn from_inner(inner: &Slice) -> &OsStr {
// SAFETY: OsStr is just a wrapper of Slice,
@ -837,13 +882,24 @@ impl OsStr {
OsString { inner: Buf::from_box(boxed) }
}
/// Gets the underlying byte representation.
/// Converts an OS string slice to a byte slice. To convert the byte slice back into an OS
/// string slice, use the [`OsStr::from_os_str_bytes_unchecked`] function.
///
/// Note: it is *crucial* that this API is not externally public, to avoid
/// revealing the internal, platform-specific encodings.
/// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8.
/// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit
/// ASCII.
///
/// Note: As the encoding is unspecified, any sub-slice of bytes that is not valid UTF-8 should
/// be treated as opaque and only comparable within the same rust version built for the same
/// target platform. For example, sending the slice over the network or storing it in a file
/// will likely result in incompatible byte slices. See [`OsString`] for more encoding details
/// and [`std::ffi`] for platform-specific, specified conversions.
///
/// [`std::ffi`]: crate::ffi
#[inline]
pub(crate) fn bytes(&self) -> &[u8] {
unsafe { &*(&self.inner as *const _ as *const [u8]) }
#[unstable(feature = "os_str_bytes", issue = "111544")]
pub fn as_os_str_bytes(&self) -> &[u8] {
self.inner.as_os_str_bytes()
}
/// Converts this string to its ASCII lower case equivalent in-place.
@ -1131,7 +1187,7 @@ impl Default for &OsStr {
impl PartialEq for OsStr {
#[inline]
fn eq(&self, other: &OsStr) -> bool {
self.bytes().eq(other.bytes())
self.as_os_str_bytes().eq(other.as_os_str_bytes())
}
}
@ -1158,23 +1214,23 @@ impl Eq for OsStr {}
impl PartialOrd for OsStr {
#[inline]
fn partial_cmp(&self, other: &OsStr) -> Option<cmp::Ordering> {
self.bytes().partial_cmp(other.bytes())
self.as_os_str_bytes().partial_cmp(other.as_os_str_bytes())
}
#[inline]
fn lt(&self, other: &OsStr) -> bool {
self.bytes().lt(other.bytes())
self.as_os_str_bytes().lt(other.as_os_str_bytes())
}
#[inline]
fn le(&self, other: &OsStr) -> bool {
self.bytes().le(other.bytes())
self.as_os_str_bytes().le(other.as_os_str_bytes())
}
#[inline]
fn gt(&self, other: &OsStr) -> bool {
self.bytes().gt(other.bytes())
self.as_os_str_bytes().gt(other.as_os_str_bytes())
}
#[inline]
fn ge(&self, other: &OsStr) -> bool {
self.bytes().ge(other.bytes())
self.as_os_str_bytes().ge(other.as_os_str_bytes())
}
}
@ -1193,7 +1249,7 @@ impl PartialOrd<str> for OsStr {
impl Ord for OsStr {
#[inline]
fn cmp(&self, other: &OsStr) -> cmp::Ordering {
self.bytes().cmp(other.bytes())
self.as_os_str_bytes().cmp(other.as_os_str_bytes())
}
}
@ -1243,7 +1299,7 @@ impl_cmp!(Cow<'a, OsStr>, OsString);
impl Hash for OsStr {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
self.bytes().hash(state)
self.as_os_str_bytes().hash(state)
}
}

View file

@ -193,7 +193,7 @@ impl<'a> Prefix<'a> {
fn len(&self) -> usize {
use self::Prefix::*;
fn os_str_len(s: &OsStr) -> usize {
s.bytes().len()
s.as_os_str_bytes().len()
}
match *self {
Verbatim(x) => 4 + os_str_len(x),
@ -299,20 +299,6 @@ where
}
}
unsafe fn u8_slice_as_os_str(s: &[u8]) -> &OsStr {
// SAFETY: See note at the top of this module to understand why this and
// `OsStr::bytes` are used:
//
// This casts are safe as OsStr is internally a wrapper around [u8] on all
// platforms.
//
// Note that currently this relies on the special knowledge that std has;
// these types are single-element structs but are not marked
// repr(transparent) or repr(C) which would make these casts not allowable
// outside std.
unsafe { &*(s as *const [u8] as *const OsStr) }
}
// Detect scheme on Redox
fn has_redox_scheme(s: &[u8]) -> bool {
cfg!(target_os = "redox") && s.contains(&b':')
@ -330,7 +316,7 @@ fn has_physical_root(s: &[u8], prefix: Option<Prefix<'_>>) -> bool {
// basic workhorse for splitting stem and extension
fn rsplit_file_at_dot(file: &OsStr) -> (Option<&OsStr>, Option<&OsStr>) {
if file.bytes() == b".." {
if file.as_os_str_bytes() == b".." {
return (Some(file), None);
}
@ -338,18 +324,23 @@ fn rsplit_file_at_dot(file: &OsStr) -> (Option<&OsStr>, Option<&OsStr>) {
// and back. This is safe to do because (1) we only look at ASCII
// contents of the encoding and (2) new &OsStr values are produced
// only from ASCII-bounded slices of existing &OsStr values.
let mut iter = file.bytes().rsplitn(2, |b| *b == b'.');
let mut iter = file.as_os_str_bytes().rsplitn(2, |b| *b == b'.');
let after = iter.next();
let before = iter.next();
if before == Some(b"") {
(Some(file), None)
} else {
unsafe { (before.map(|s| u8_slice_as_os_str(s)), after.map(|s| u8_slice_as_os_str(s))) }
unsafe {
(
before.map(|s| OsStr::from_os_str_bytes_unchecked(s)),
after.map(|s| OsStr::from_os_str_bytes_unchecked(s)),
)
}
}
}
fn split_file_at_dot(file: &OsStr) -> (&OsStr, Option<&OsStr>) {
let slice = file.bytes();
let slice = file.as_os_str_bytes();
if slice == b".." {
return (file, None);
}
@ -364,7 +355,12 @@ fn split_file_at_dot(file: &OsStr) -> (&OsStr, Option<&OsStr>) {
};
let before = &slice[..i];
let after = &slice[i + 1..];
unsafe { (u8_slice_as_os_str(before), Some(u8_slice_as_os_str(after))) }
unsafe {
(
OsStr::from_os_str_bytes_unchecked(before),
Some(OsStr::from_os_str_bytes_unchecked(after)),
)
}
}
////////////////////////////////////////////////////////////////////////////////
@ -743,7 +739,7 @@ impl<'a> Components<'a> {
// separately via `include_cur_dir`
b".." => Some(Component::ParentDir),
b"" => None,
_ => Some(Component::Normal(unsafe { u8_slice_as_os_str(comp) })),
_ => Some(Component::Normal(unsafe { OsStr::from_os_str_bytes_unchecked(comp) })),
}
}
@ -900,7 +896,7 @@ impl<'a> Iterator for Components<'a> {
let raw = &self.path[..self.prefix_len()];
self.path = &self.path[self.prefix_len()..];
return Some(Component::Prefix(PrefixComponent {
raw: unsafe { u8_slice_as_os_str(raw) },
raw: unsafe { OsStr::from_os_str_bytes_unchecked(raw) },
parsed: self.prefix.unwrap(),
}));
}
@ -972,7 +968,7 @@ impl<'a> DoubleEndedIterator for Components<'a> {
State::Prefix if self.prefix_len() > 0 => {
self.back = State::Done;
return Some(Component::Prefix(PrefixComponent {
raw: unsafe { u8_slice_as_os_str(self.path) },
raw: unsafe { OsStr::from_os_str_bytes_unchecked(self.path) },
parsed: self.prefix.unwrap(),
}));
}
@ -1481,17 +1477,17 @@ impl PathBuf {
fn _set_extension(&mut self, extension: &OsStr) -> bool {
let file_stem = match self.file_stem() {
None => return false,
Some(f) => f.bytes(),
Some(f) => f.as_os_str_bytes(),
};
// truncate until right after the file stem
let end_file_stem = file_stem[file_stem.len()..].as_ptr().addr();
let start = self.inner.bytes().as_ptr().addr();
let start = self.inner.as_os_str_bytes().as_ptr().addr();
let v = self.as_mut_vec();
v.truncate(end_file_stem.wrapping_sub(start));
// add the new extension, if any
let new = extension.bytes();
let new = extension.as_os_str_bytes();
if !new.is_empty() {
v.reserve_exact(new.len() + 1);
v.push(b'.');
@ -2011,11 +2007,11 @@ impl Path {
// The following (private!) function allows construction of a path from a u8
// slice, which is only safe when it is known to follow the OsStr encoding.
unsafe fn from_u8_slice(s: &[u8]) -> &Path {
unsafe { Path::new(u8_slice_as_os_str(s)) }
unsafe { Path::new(OsStr::from_os_str_bytes_unchecked(s)) }
}
// The following (private!) function reveals the byte encoding used for OsStr.
fn as_u8_slice(&self) -> &[u8] {
self.inner.bytes()
self.inner.as_os_str_bytes()
}
/// Directly wraps a string slice as a `Path` slice.

View file

@ -19,7 +19,7 @@ pub fn run_path_with_cstr<T, F>(path: &Path, f: F) -> io::Result<T>
where
F: FnOnce(&CStr) -> io::Result<T>,
{
run_with_cstr(path.as_os_str().bytes(), f)
run_with_cstr(path.as_os_str().as_os_str_bytes(), f)
}
#[inline]

View file

@ -8,7 +8,7 @@ use core::iter::repeat;
fn stack_allocation_works() {
let path = Path::new("abc");
let result = run_path_with_cstr(path, |p| {
assert_eq!(p, &*CString::new(path.as_os_str().bytes()).unwrap());
assert_eq!(p, &*CString::new(path.as_os_str().as_os_str_bytes()).unwrap());
Ok(42)
});
assert_eq!(result.unwrap(), 42);
@ -25,7 +25,7 @@ fn heap_allocation_works() {
let path = repeat("a").take(384).collect::<String>();
let path = Path::new(&path);
let result = run_path_with_cstr(path, |p| {
assert_eq!(p, &*CString::new(path.as_os_str().bytes()).unwrap());
assert_eq!(p, &*CString::new(path.as_os_str().as_os_str_bytes()).unwrap());
Ok(42)
});
assert_eq!(result.unwrap(), 42);

View file

@ -193,13 +193,18 @@ impl Buf {
impl Slice {
#[inline]
fn from_u8_slice(s: &[u8]) -> &Slice {
pub fn as_os_str_bytes(&self) -> &[u8] {
&self.inner
}
#[inline]
pub unsafe fn from_os_str_bytes_unchecked(s: &[u8]) -> &Slice {
unsafe { mem::transmute(s) }
}
#[inline]
pub fn from_str(s: &str) -> &Slice {
Slice::from_u8_slice(s.as_bytes())
unsafe { Slice::from_os_str_bytes_unchecked(s.as_bytes()) }
}
pub fn to_str(&self) -> Option<&str> {

View file

@ -2,7 +2,7 @@ use super::*;
#[test]
fn slice_debug_output() {
let input = Slice::from_u8_slice(b"\xF0hello,\tworld");
let input = unsafe { Slice::from_os_str_bytes_unchecked(b"\xF0hello,\tworld") };
let expected = r#""\xF0hello,\tworld""#;
let output = format!("{input:?}");
@ -11,8 +11,7 @@ fn slice_debug_output() {
#[test]
fn display() {
assert_eq!(
"Hello\u{FFFD}\u{FFFD} There\u{FFFD} Goodbye",
Slice::from_u8_slice(b"Hello\xC0\x80 There\xE6\x83 Goodbye").to_string(),
);
assert_eq!("Hello\u{FFFD}\u{FFFD} There\u{FFFD} Goodbye", unsafe {
Slice::from_os_str_bytes_unchecked(b"Hello\xC0\x80 There\xE6\x83 Goodbye").to_string()
},);
}

View file

@ -30,7 +30,7 @@ pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> {
// Get the components, skipping the redundant leading "." component if it exists.
let mut components = path.strip_prefix(".").unwrap_or(path).components();
let path_os = path.as_os_str().bytes();
let path_os = path.as_os_str().as_os_str_bytes();
let mut normalized = if path.is_absolute() {
// "If a pathname begins with two successive <slash> characters, the

View file

@ -164,9 +164,9 @@ pub enum ProgramKind {
impl ProgramKind {
fn new(program: &OsStr) -> Self {
if program.bytes().starts_with(b"/") {
if program.as_os_str_bytes().starts_with(b"/") {
Self::Absolute
} else if program.bytes().contains(&b'/') {
} else if program.as_os_str_bytes().contains(&b'/') {
// If the program has more than one component in it, it is a relative path.
Self::Relative
} else {

View file

@ -226,7 +226,7 @@ pub(crate) fn append_arg(cmd: &mut Vec<u16>, arg: &Arg, force_quotes: bool) -> i
// that it actually gets passed through on the command line or otherwise
// it will be dropped entirely when parsed on the other end.
ensure_no_nuls(arg)?;
let arg_bytes = arg.bytes();
let arg_bytes = arg.as_os_str_bytes();
let (quote, escape) = match quote {
Quote::Always => (true, true),
Quote::Auto => {
@ -297,7 +297,9 @@ pub(crate) fn make_bat_command_line(
// * `|<>` pipe/redirect characters.
const SPECIAL: &[u8] = b"\t &()[]{}^=;!'+,`~%|<>";
let force_quotes = match arg {
Arg::Regular(arg) if !force_quotes => arg.bytes().iter().any(|c| SPECIAL.contains(c)),
Arg::Regular(arg) if !force_quotes => {
arg.as_os_str_bytes().iter().any(|c| SPECIAL.contains(c))
}
_ => force_quotes,
};
append_arg(&mut cmd, arg, force_quotes)?;

View file

@ -151,6 +151,16 @@ impl Buf {
}
impl Slice {
#[inline]
pub fn as_os_str_bytes(&self) -> &[u8] {
self.inner.as_bytes()
}
#[inline]
pub unsafe fn from_os_str_bytes_unchecked(s: &[u8]) -> &Slice {
mem::transmute(Wtf8::from_bytes_unchecked(s))
}
#[inline]
pub fn from_str(s: &str) -> &Slice {
unsafe { mem::transmute(Wtf8::from_str(s)) }

View file

@ -1,7 +1,6 @@
use super::{c, fill_utf16_buf, to_u16s};
use crate::ffi::{OsStr, OsString};
use crate::io;
use crate::mem;
use crate::path::{Path, PathBuf, Prefix};
use crate::ptr;
@ -11,16 +10,6 @@ mod tests;
pub const MAIN_SEP_STR: &str = "\\";
pub const MAIN_SEP: char = '\\';
/// # Safety
///
/// `bytes` must be a valid wtf8 encoded slice
#[inline]
unsafe fn bytes_as_os_str(bytes: &[u8]) -> &OsStr {
// &OsStr is layout compatible with &Slice, which is compatible with &Wtf8,
// which is compatible with &[u8].
mem::transmute(bytes)
}
#[inline]
pub fn is_sep_byte(b: u8) -> bool {
b == b'/' || b == b'\\'
@ -33,12 +22,12 @@ pub fn is_verbatim_sep(b: u8) -> bool {
/// Returns true if `path` looks like a lone filename.
pub(crate) fn is_file_name(path: &OsStr) -> bool {
!path.bytes().iter().copied().any(is_sep_byte)
!path.as_os_str_bytes().iter().copied().any(is_sep_byte)
}
pub(crate) fn has_trailing_slash(path: &OsStr) -> bool {
let is_verbatim = path.bytes().starts_with(br"\\?\");
let is_verbatim = path.as_os_str_bytes().starts_with(br"\\?\");
let is_separator = if is_verbatim { is_verbatim_sep } else { is_sep_byte };
if let Some(&c) = path.bytes().last() { is_separator(c) } else { false }
if let Some(&c) = path.as_os_str_bytes().last() { is_separator(c) } else { false }
}
/// Appends a suffix to a path.
@ -60,7 +49,7 @@ impl<'a, const LEN: usize> PrefixParser<'a, LEN> {
fn get_prefix(path: &OsStr) -> [u8; LEN] {
let mut prefix = [0; LEN];
// SAFETY: Only ASCII characters are modified.
for (i, &ch) in path.bytes().iter().take(LEN).enumerate() {
for (i, &ch) in path.as_os_str_bytes().iter().take(LEN).enumerate() {
prefix[i] = if ch == b'/' { b'\\' } else { ch };
}
prefix
@ -93,7 +82,7 @@ impl<'a> PrefixParserSlice<'a, '_> {
}
fn prefix_bytes(&self) -> &'a [u8] {
&self.path.bytes()[..self.index]
&self.path.as_os_str_bytes()[..self.index]
}
fn finish(self) -> &'a OsStr {
@ -101,7 +90,7 @@ impl<'a> PrefixParserSlice<'a, '_> {
// &[u8] and back. This is safe to do because (1) we only look at ASCII
// contents of the encoding and (2) new &OsStr values are produced only
// from ASCII-bounded slices of existing &OsStr values.
unsafe { bytes_as_os_str(&self.path.bytes()[self.index..]) }
unsafe { OsStr::from_os_str_bytes_unchecked(&self.path.as_os_str_bytes()[self.index..]) }
}
}
@ -173,7 +162,7 @@ fn parse_drive(path: &OsStr) -> Option<u8> {
drive.is_ascii_alphabetic()
}
match path.bytes() {
match path.as_os_str_bytes() {
[drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
_ => None,
}
@ -182,7 +171,7 @@ fn parse_drive(path: &OsStr) -> Option<u8> {
// Parses a drive prefix exactly, e.g. "C:"
fn parse_drive_exact(path: &OsStr) -> Option<u8> {
// only parse two bytes: the drive letter and the drive separator
if path.bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) {
if path.as_os_str_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) {
parse_drive(path)
} else {
None
@ -196,21 +185,26 @@ fn parse_drive_exact(path: &OsStr) -> Option<u8> {
fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {
let separator = if verbatim { is_verbatim_sep } else { is_sep_byte };
match path.bytes().iter().position(|&x| separator(x)) {
match path.as_os_str_bytes().iter().position(|&x| separator(x)) {
Some(separator_start) => {
let separator_end = separator_start + 1;
let component = &path.bytes()[..separator_start];
let component = &path.as_os_str_bytes()[..separator_start];
// Panic safe
// The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
let path = &path.bytes()[separator_end..];
let path = &path.as_os_str_bytes()[separator_end..];
// SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
// is encoded in a single byte, therefore `bytes[separator_start]` and
// `bytes[separator_end]` must be code point boundaries and thus
// `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
unsafe { (bytes_as_os_str(component), bytes_as_os_str(path)) }
unsafe {
(
OsStr::from_os_str_bytes_unchecked(component),
OsStr::from_os_str_bytes_unchecked(path),
)
}
}
None => (path, OsStr::new("")),
}
@ -329,7 +323,7 @@ pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> {
// Verbatim paths should not be modified.
if prefix.map(|x| x.is_verbatim()).unwrap_or(false) {
// NULs in verbatim paths are rejected for consistency.
if path.bytes().contains(&0) {
if path.as_os_str_bytes().contains(&0) {
return Err(io::const_io_error!(
io::ErrorKind::InvalidInput,
"strings passed to WinAPI cannot contain NULs",

View file

@ -395,7 +395,7 @@ fn resolve_exe<'a>(
// Test if the file name has the `exe` extension.
// This does a case-insensitive `ends_with`.
let has_exe_suffix = if exe_path.len() >= EXE_SUFFIX.len() {
exe_path.bytes()[exe_path.len() - EXE_SUFFIX.len()..]
exe_path.as_os_str_bytes()[exe_path.len() - EXE_SUFFIX.len()..]
.eq_ignore_ascii_case(EXE_SUFFIX.as_bytes())
} else {
false
@ -425,7 +425,7 @@ fn resolve_exe<'a>(
// From the `CreateProcessW` docs:
// > If the file name does not contain an extension, .exe is appended.
// Note that this rule only applies when searching paths.
let has_extension = exe_path.bytes().contains(&b'.');
let has_extension = exe_path.as_os_str_bytes().contains(&b'.');
// Search the directories given by `search_paths`.
let result = search_paths(parent_paths, child_paths, |mut path| {

View file

@ -570,7 +570,7 @@ impl Wtf8 {
/// Since the byte slice is not checked for valid WTF-8, this functions is
/// marked unsafe.
#[inline]
unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 {
pub unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 {
mem::transmute(value)
}
@ -614,6 +614,12 @@ impl Wtf8 {
Wtf8CodePoints { bytes: self.bytes.iter() }
}
/// Access raw bytes of WTF-8 data
#[inline]
pub fn as_bytes(&self) -> &[u8] {
&self.bytes
}
/// Tries to convert the string to UTF-8 and return a `&str` slice.
///
/// Returns `None` if the string contains surrogates.