Skip to content

Commit

Permalink
Unrolled build for rust-lang#137154
Browse files Browse the repository at this point in the history
Rollup merge of rust-lang#137154 - thaliaarchi:wtf8-fast-paths, r=ChrisDenton

Add UTF-8 validation fast paths in `Wtf8Buf`

This adds two more fast paths for UTF-8 validation in `Wtf8Buf`, making use of the `is_known_utf8` flag added in rust-lang#96869 (Optimize `Wtf8Buf::into_string` for the case where it contains UTF-8).

r? `@ChrisDenton`
  • Loading branch information
rust-timer authored Feb 26, 2025
2 parents a46c755 + eb14652 commit d59558c
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 3 deletions.
4 changes: 2 additions & 2 deletions library/std/src/sys/os_str/wtf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@ impl AsInner<Wtf8> for Buf {

impl fmt::Debug for Buf {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self.as_slice(), f)
fmt::Debug::fmt(&self.inner, f)
}
}

impl fmt::Display for Buf {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.as_slice(), f)
fmt::Display::fmt(&self.inner, f)
}
}

Expand Down
26 changes: 25 additions & 1 deletion library/std/src/sys_common/wtf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,18 @@ impl fmt::Debug for Wtf8Buf {
}
}

/// Formats the string with unpaired surrogates substituted with the replacement
/// character, U+FFFD.
impl fmt::Display for Wtf8Buf {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(s) = self.as_known_utf8() {
fmt::Display::fmt(s, formatter)
} else {
fmt::Display::fmt(&**self, formatter)
}
}
}

impl Wtf8Buf {
/// Creates a new, empty WTF-8 string.
#[inline]
Expand Down Expand Up @@ -262,6 +274,18 @@ impl Wtf8Buf {
unsafe { Wtf8::from_mut_bytes_unchecked(&mut self.bytes) }
}

/// Converts the string to UTF-8 without validation, if it was created from
/// valid UTF-8.
#[inline]
fn as_known_utf8(&self) -> Option<&str> {
if self.is_known_utf8 {
// SAFETY: The buffer is known to be valid UTF-8.
Some(unsafe { str::from_utf8_unchecked(self.as_bytes()) })
} else {
None
}
}

/// Reserves capacity for at least `additional` more bytes to be inserted
/// in the given `Wtf8Buf`.
/// The collection may reserve more space to avoid frequent reallocations.
Expand Down Expand Up @@ -364,7 +388,7 @@ impl Wtf8Buf {
_ => {
// If we'll be pushing a string containing a surrogate, we may
// no longer have UTF-8.
if other.next_surrogate(0).is_some() {
if self.is_known_utf8 && other.next_surrogate(0).is_some() {
self.is_known_utf8 = false;
}

Expand Down

0 comments on commit d59558c

Please sign in to comment.