Rollup merge of #126980 - Borgerr:fix-extendfromslice-check, r=workingjubilee

set self.is_known_utf8 to false in extend_from_slice

try-job: x86_64-msvc

closes #126977
Related to #126885, #126333, and [this conversation](<aa46a3368e (r143539097)>)
This commit is contained in:
Jacob Pratt 2024-06-27 02:06:20 -04:00 committed by GitHub
commit 8905be5ef3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 25 additions and 1 deletions

View file

@ -480,7 +480,7 @@ impl Wtf8Buf {
#[inline]
pub(crate) fn extend_from_slice(&mut self, other: &[u8]) {
self.bytes.extend_from_slice(other);
self.is_known_utf8 = self.is_known_utf8 || self.next_surrogate(0).is_none();
self.is_known_utf8 = false;
}
}

View file

@ -725,3 +725,27 @@ fn wtf8_utf8_boundary_between_surrogates() {
string.push(CodePoint::from_u32(0xD800).unwrap());
check_utf8_boundary(&string, 3);
}
#[test]
fn wobbled_wtf8_plus_bytes_isnt_utf8() {
let mut string: Wtf8Buf = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() };
assert!(!string.is_known_utf8);
string.extend_from_slice(b"some utf-8");
assert!(!string.is_known_utf8);
}
#[test]
fn wobbled_wtf8_plus_str_isnt_utf8() {
let mut string: Wtf8Buf = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() };
assert!(!string.is_known_utf8);
string.push_str("some utf-8");
assert!(!string.is_known_utf8);
}
#[test]
fn unwobbly_wtf8_plus_utf8_is_utf8() {
let mut string: Wtf8Buf = Wtf8Buf::from_str("hello world");
assert!(string.is_known_utf8);
string.push_str("some utf-8");
assert!(string.is_known_utf8);
}