From 8047f8fb519bb5d9bb3c29652bb77b30369de7b5 Mon Sep 17 00:00:00 2001
From: CAD97 <cad97@cad97.com>
Date: Tue, 12 Apr 2022 02:43:35 -0500
Subject: [PATCH 1/4] Add feature(str_from_utf16_endian)

---
 library/alloc/src/string.rs | 150 ++++++++++++++++++++++++++++++++++++
 1 file changed, 150 insertions(+)

diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs
index ed43244ebda..e29f5fb70ab 100644
--- a/library/alloc/src/string.rs
+++ b/library/alloc/src/string.rs
@@ -714,6 +714,156 @@ impl String {
             .collect()
     }
 
+    /// Decode a UTF-16LEā€“encoded vector `v` into a `String`, returning [`Err`]
+    /// if `v` contains any invalid data.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// #![feature(str_from_utf16_endian)]
+    /// // š¯„˛music
+    /// let v = &[0x34, 0xD8, 0x1E, 0xDD, 0x6d, 0x00, 0x75, 0x00,
+    ///           0x73, 0x00, 0x69, 0x00, 0x63, 0x00];
+    /// assert_eq!(String::from("š¯„˛music"),
+    ///            String::from_utf16le(v).unwrap());
+    ///
+    /// // š¯„˛mu<invalid>ic
+    /// let v = &[0x34, 0xD8, 0x1E, 0xDD, 0x6d, 0x00, 0x75, 0x00,
+    ///           0x00, 0xD8, 0x69, 0x00, 0x63, 0x00];
+    /// assert!(String::from_utf16le(v).is_err());
+    /// ```
+    #[cfg(not(no_global_oom_handling))]
+    #[unstable(feature = "str_from_utf16_endian", issue = "none", reason = "recently added")]
+    pub fn from_utf16le(v: &[u8]) -> Result<String, FromUtf16Error> {
+        if v.len() % 2 != 0 {
+            return Err(FromUtf16Error(()));
+        }
+        match (cfg!(target_endian = "little"), unsafe { v.align_to::<u16>() }) {
+            (true, (&[], v, &[])) => Self::from_utf16(v),
+            _ => decode_utf16(v.array_chunks::<2>().copied().map(u16::from_le_bytes))
+                .collect::<Result<_, _>>()
+                .map_err(|_| FromUtf16Error(())),
+        }
+    }
+
+    /// Decode a UTF-16LEā€“encoded slice `v` into a `String`, replacing
+    /// invalid data with [the replacement character (`U+FFFD`)][U+FFFD].
+    ///
+    /// Unlike [`from_utf8_lossy`] which returns a [`Cow<'a, str>`],
+    /// `from_utf16le_lossy` returns a `String` since the UTF-16 to UTF-8
+    /// conversion requires a memory allocation.
+    ///
+    /// [`from_utf8_lossy`]: String::from_utf8_lossy
+    /// [`Cow<'a, str>`]: crate::borrow::Cow "borrow::Cow"
+    /// [U+FFFD]: core::char::REPLACEMENT_CHARACTER
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// #![feature(str_from_utf16_endian)]
+    /// // š¯„˛mus<invalid>ic<invalid>
+    /// let v = &[0x34, 0xD8, 0x1E, 0xDD, 0x6d, 0x00, 0x75, 0x00,
+    ///           0x73, 0x00, 0x1E, 0xDD, 0x69, 0x00, 0x63, 0x00,
+    ///           0x34, 0xD8];
+    ///
+    /// assert_eq!(String::from("š¯„˛mus\u{FFFD}ic\u{FFFD}"),
+    ///            String::from_utf16le_lossy(v));
+    /// ```
+    #[cfg(not(no_global_oom_handling))]
+    #[unstable(feature = "str_from_utf16_endian", issue = "none", reason = "recently added")]
+    pub fn from_utf16le_lossy(v: &[u8]) -> String {
+        match (cfg!(target_endian = "little"), unsafe { v.align_to::<u16>() }) {
+            (true, (&[], v, &[])) => Self::from_utf16_lossy(v),
+            (true, (&[], v, &[_remainder])) => Self::from_utf16_lossy(v) + "\u{FFFD}",
+            _ => {
+                let mut iter = v.array_chunks::<2>();
+                let string = decode_utf16(iter.by_ref().copied().map(u16::from_le_bytes))
+                    .map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
+                    .collect();
+                if iter.remainder().is_empty() { string } else { string + "\u{FFFD}" }
+            }
+        }
+    }
+
+    /// Decode a UTF-16BEā€“encoded vector `v` into a `String`, returning [`Err`]
+    /// if `v` contains any invalid data.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// #![feature(str_from_utf16_endian)]
+    /// // š¯„˛music
+    /// let v = &[0xD8, 0x34, 0xDD, 0x1E, 0x00, 0x6d, 0x00, 0x75,
+    ///           0x00, 0x73, 0x00, 0x69, 0x00, 0x63];
+    /// assert_eq!(String::from("š¯„˛music"),
+    ///            String::from_utf16be(v).unwrap());
+    ///
+    /// // š¯„˛mu<invalid>ic
+    /// let v = &[0xD8, 0x34, 0xDD, 0x1E, 0x00, 0x6d, 0x00, 0x75,
+    ///           0xD8, 0x00, 0x00, 0x69, 0x00, 0x63];
+    /// assert!(String::from_utf16be(v).is_err());
+    /// ```
+    #[cfg(not(no_global_oom_handling))]
+    #[unstable(feature = "str_from_utf16_endian", issue = "none", reason = "recently added")]
+    pub fn from_utf16be(v: &[u8]) -> Result<String, FromUtf16Error> {
+        if v.len() % 2 != 0 {
+            return Err(FromUtf16Error(()));
+        }
+        match (cfg!(target_endian = "big"), unsafe { v.align_to::<u16>() }) {
+            (true, (&[], v, &[])) => Self::from_utf16(v),
+            _ => decode_utf16(v.array_chunks::<2>().copied().map(u16::from_be_bytes))
+                .collect::<Result<_, _>>()
+                .map_err(|_| FromUtf16Error(())),
+        }
+    }
+
+    /// Decode a UTF-16BEā€“encoded slice `v` into a `String`, replacing
+    /// invalid data with [the replacement character (`U+FFFD`)][U+FFFD].
+    ///
+    /// Unlike [`from_utf8_lossy`] which returns a [`Cow<'a, str>`],
+    /// `from_utf16le_lossy` returns a `String` since the UTF-16 to UTF-8
+    /// conversion requires a memory allocation.
+    ///
+    /// [`from_utf8_lossy`]: String::from_utf8_lossy
+    /// [`Cow<'a, str>`]: crate::borrow::Cow "borrow::Cow"
+    /// [U+FFFD]: core::char::REPLACEMENT_CHARACTER
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// #![feature(str_from_utf16_endian)]
+    /// // š¯„˛mus<invalid>ic<invalid>
+    /// let v = &[0xD8, 0x34, 0xDD, 0x1E, 0x00, 0x6d, 0x00, 0x75,
+    ///           0x00, 0x73, 0xDD, 0x1E, 0x00, 0x69, 0x00, 0x63,
+    ///           0xD8, 0x34];
+    ///
+    /// assert_eq!(String::from("š¯„˛mus\u{FFFD}ic\u{FFFD}"),
+    ///            String::from_utf16be_lossy(v));
+    /// ```
+    #[cfg(not(no_global_oom_handling))]
+    #[unstable(feature = "str_from_utf16_endian", issue = "none", reason = "recently added")]
+    pub fn from_utf16be_lossy(v: &[u8]) -> String {
+        match (cfg!(target_endian = "big"), unsafe { v.align_to::<u16>() }) {
+            (true, (&[], v, &[])) => Self::from_utf16_lossy(v),
+            (true, (&[], v, &[_remainder])) => Self::from_utf16_lossy(v) + "\u{FFFD}",
+            _ => {
+                let mut iter = v.array_chunks::<2>();
+                let string = decode_utf16(iter.by_ref().copied().map(u16::from_be_bytes))
+                    .map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
+                    .collect();
+                if iter.remainder().is_empty() { string } else { string + "\u{FFFD}" }
+            }
+        }
+    }
+
     /// Decomposes a `String` into its raw components.
     ///
     /// Returns the raw pointer to the underlying data, the length of

From 3d448bd067234283c32792bf2a96d63d65fb5e91 Mon Sep 17 00:00:00 2001
From: Christopher Durham <cad97@cad97.com>
Date: Thu, 28 Sep 2023 23:18:55 -0400
Subject: [PATCH 2/4] style nits

Co-authored-by: David Tolnay <dtolnay@gmail.com>
---
 library/alloc/src/string.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs
index e29f5fb70ab..9a7d8213f42 100644
--- a/library/alloc/src/string.rs
+++ b/library/alloc/src/string.rs
@@ -741,7 +741,7 @@ impl String {
             return Err(FromUtf16Error(()));
         }
         match (cfg!(target_endian = "little"), unsafe { v.align_to::<u16>() }) {
-            (true, (&[], v, &[])) => Self::from_utf16(v),
+            (true, ([], v, [])) => Self::from_utf16(v),
             _ => decode_utf16(v.array_chunks::<2>().copied().map(u16::from_le_bytes))
                 .collect::<Result<_, _>>()
                 .map_err(|_| FromUtf16Error(())),
@@ -777,8 +777,8 @@ impl String {
     #[unstable(feature = "str_from_utf16_endian", issue = "none", reason = "recently added")]
     pub fn from_utf16le_lossy(v: &[u8]) -> String {
         match (cfg!(target_endian = "little"), unsafe { v.align_to::<u16>() }) {
-            (true, (&[], v, &[])) => Self::from_utf16_lossy(v),
-            (true, (&[], v, &[_remainder])) => Self::from_utf16_lossy(v) + "\u{FFFD}",
+            (true, ([], v, [])) => Self::from_utf16_lossy(v),
+            (true, ([], v, [_remainder])) => Self::from_utf16_lossy(v) + "\u{FFFD}",
             _ => {
                 let mut iter = v.array_chunks::<2>();
                 let string = decode_utf16(iter.by_ref().copied().map(u16::from_le_bytes))
@@ -816,7 +816,7 @@ impl String {
             return Err(FromUtf16Error(()));
         }
         match (cfg!(target_endian = "big"), unsafe { v.align_to::<u16>() }) {
-            (true, (&[], v, &[])) => Self::from_utf16(v),
+            (true, ([], v, [])) => Self::from_utf16(v),
             _ => decode_utf16(v.array_chunks::<2>().copied().map(u16::from_be_bytes))
                 .collect::<Result<_, _>>()
                 .map_err(|_| FromUtf16Error(())),
@@ -852,8 +852,8 @@ impl String {
     #[unstable(feature = "str_from_utf16_endian", issue = "none", reason = "recently added")]
     pub fn from_utf16be_lossy(v: &[u8]) -> String {
         match (cfg!(target_endian = "big"), unsafe { v.align_to::<u16>() }) {
-            (true, (&[], v, &[])) => Self::from_utf16_lossy(v),
-            (true, (&[], v, &[_remainder])) => Self::from_utf16_lossy(v) + "\u{FFFD}",
+            (true, ([], v, [])) => Self::from_utf16_lossy(v),
+            (true, ([], v, [_remainder])) => Self::from_utf16_lossy(v) + "\u{FFFD}",
             _ => {
                 let mut iter = v.array_chunks::<2>();
                 let string = decode_utf16(iter.by_ref().copied().map(u16::from_be_bytes))

From 1efea3138554e37874c098e497f59e8f955d2d3a Mon Sep 17 00:00:00 2001
From: Christopher Durham <cad97@cad97.com>
Date: Thu, 28 Sep 2023 23:44:39 -0400
Subject: [PATCH 3/4] add str_from_utf16_endian tracking issue

---
 library/alloc/src/string.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs
index 9a7d8213f42..4e4c358200e 100644
--- a/library/alloc/src/string.rs
+++ b/library/alloc/src/string.rs
@@ -735,7 +735,7 @@ impl String {
     /// assert!(String::from_utf16le(v).is_err());
     /// ```
     #[cfg(not(no_global_oom_handling))]
-    #[unstable(feature = "str_from_utf16_endian", issue = "none", reason = "recently added")]
+    #[unstable(feature = "str_from_utf16_endian", issue = "116258")]
     pub fn from_utf16le(v: &[u8]) -> Result<String, FromUtf16Error> {
         if v.len() % 2 != 0 {
             return Err(FromUtf16Error(()));
@@ -774,7 +774,7 @@ impl String {
     ///            String::from_utf16le_lossy(v));
     /// ```
     #[cfg(not(no_global_oom_handling))]
-    #[unstable(feature = "str_from_utf16_endian", issue = "none", reason = "recently added")]
+    #[unstable(feature = "str_from_utf16_endian", issue = "116258")]
     pub fn from_utf16le_lossy(v: &[u8]) -> String {
         match (cfg!(target_endian = "little"), unsafe { v.align_to::<u16>() }) {
             (true, ([], v, [])) => Self::from_utf16_lossy(v),
@@ -810,7 +810,7 @@ impl String {
     /// assert!(String::from_utf16be(v).is_err());
     /// ```
     #[cfg(not(no_global_oom_handling))]
-    #[unstable(feature = "str_from_utf16_endian", issue = "none", reason = "recently added")]
+    #[unstable(feature = "str_from_utf16_endian", issue = "116258")]
     pub fn from_utf16be(v: &[u8]) -> Result<String, FromUtf16Error> {
         if v.len() % 2 != 0 {
             return Err(FromUtf16Error(()));
@@ -849,7 +849,7 @@ impl String {
     ///            String::from_utf16be_lossy(v));
     /// ```
     #[cfg(not(no_global_oom_handling))]
-    #[unstable(feature = "str_from_utf16_endian", issue = "none", reason = "recently added")]
+    #[unstable(feature = "str_from_utf16_endian", issue = "116258")]
     pub fn from_utf16be_lossy(v: &[u8]) -> String {
         match (cfg!(target_endian = "big"), unsafe { v.align_to::<u16>() }) {
             (true, ([], v, [])) => Self::from_utf16_lossy(v),

From 5facc32e22e8843a8c276305fff4ec84d718e1c0 Mon Sep 17 00:00:00 2001
From: Christopher Durham <cad97@cad97.com>
Date: Fri, 29 Sep 2023 00:04:57 -0400
Subject: [PATCH 4/4] fix char imports

---
 library/alloc/src/string.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs
index 4e4c358200e..844441ba3a7 100644
--- a/library/alloc/src/string.rs
+++ b/library/alloc/src/string.rs
@@ -742,7 +742,7 @@ impl String {
         }
         match (cfg!(target_endian = "little"), unsafe { v.align_to::<u16>() }) {
             (true, ([], v, [])) => Self::from_utf16(v),
-            _ => decode_utf16(v.array_chunks::<2>().copied().map(u16::from_le_bytes))
+            _ => char::decode_utf16(v.array_chunks::<2>().copied().map(u16::from_le_bytes))
                 .collect::<Result<_, _>>()
                 .map_err(|_| FromUtf16Error(())),
         }
@@ -781,8 +781,8 @@ impl String {
             (true, ([], v, [_remainder])) => Self::from_utf16_lossy(v) + "\u{FFFD}",
             _ => {
                 let mut iter = v.array_chunks::<2>();
-                let string = decode_utf16(iter.by_ref().copied().map(u16::from_le_bytes))
-                    .map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
+                let string = char::decode_utf16(iter.by_ref().copied().map(u16::from_le_bytes))
+                    .map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER))
                     .collect();
                 if iter.remainder().is_empty() { string } else { string + "\u{FFFD}" }
             }
@@ -817,7 +817,7 @@ impl String {
         }
         match (cfg!(target_endian = "big"), unsafe { v.align_to::<u16>() }) {
             (true, ([], v, [])) => Self::from_utf16(v),
-            _ => decode_utf16(v.array_chunks::<2>().copied().map(u16::from_be_bytes))
+            _ => char::decode_utf16(v.array_chunks::<2>().copied().map(u16::from_be_bytes))
                 .collect::<Result<_, _>>()
                 .map_err(|_| FromUtf16Error(())),
         }
@@ -856,8 +856,8 @@ impl String {
             (true, ([], v, [_remainder])) => Self::from_utf16_lossy(v) + "\u{FFFD}",
             _ => {
                 let mut iter = v.array_chunks::<2>();
-                let string = decode_utf16(iter.by_ref().copied().map(u16::from_be_bytes))
-                    .map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
+                let string = char::decode_utf16(iter.by_ref().copied().map(u16::from_be_bytes))
+                    .map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER))
                     .collect();
                 if iter.remainder().is_empty() { string } else { string + "\u{FFFD}" }
             }