1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
//! The `compat` API flavor provides full compatibility with [`std::str::from_utf8()`] and detailed validation errors.
//!
//! In particular, [`from_utf8()`]
//! returns an [`Utf8Error`], which has the [`valid_up_to()`](Utf8Error#method.valid_up_to) and
//! [`error_len()`](Utf8Error#method.error_len) methods. The first is useful for verification of streamed data. The
//! second is useful e.g. for replacing invalid byte sequences with a replacement character.
//!
//! The functions in this module also fail early: errors are checked on-the-fly as the string is processed and once
//! an invalid UTF-8 sequence is encountered, it returns without processing the rest of the data.
//! This comes at a slight performance penality compared to the [`crate::basic`] module if the input is valid UTF-8.
use core::fmt::Display;
use core::fmt::Formatter;
use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut};
use crate::implementation::validate_utf8_compat;
/// UTF-8 error information compatible with [`std::str::Utf8Error`].
///
/// Contains information on the location of the encountered validation error and the length of the
/// invalid UTF-8 sequence.
#[derive(Copy, Eq, PartialEq, Clone, Debug)]
pub struct Utf8Error {
pub(crate) valid_up_to: usize,
pub(crate) error_len: Option<u8>,
}
impl Utf8Error {
/// Analogue to [`std::str::Utf8Error::valid_up_to()`](std::str::Utf8Error#method.valid_up_to).
///
/// ...
#[inline]
#[must_use]
#[allow(clippy::missing_const_for_fn)] // would not provide any benefit
pub fn valid_up_to(&self) -> usize {
self.valid_up_to
}
/// Analogue to [`std::str::Utf8Error::error_len()`](std::str::Utf8Error#method.error_len).
///
/// ...
#[inline]
#[must_use]
pub fn error_len(&self) -> Option<usize> {
self.error_len.map(|len| len as usize)
}
}
impl Display for Utf8Error {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
if let Some(error_len) = self.error_len {
write!(
f,
"invalid utf-8 sequence of {} bytes from index {}",
error_len, self.valid_up_to
)
} else {
write!(
f,
"incomplete utf-8 byte sequence from index {}",
self.valid_up_to
)
}
}
}
#[cfg(feature = "std")]
impl std::error::Error for Utf8Error {}
/// Analogue to [`std::str::from_utf8_mut()`].
///
/// Checks if the passed byte sequence is valid UTF-8 and returns an
/// [`std::str``] reference to the passed byte slice wrapped in `Ok()` if it is.
///
/// # Errors
/// Will return Err([`Utf8Error`]) on if the input contains invalid UTF-8 with
/// detailed error information.
#[inline]
pub fn from_utf8(input: &[u8]) -> Result<&str, Utf8Error> {
unsafe {
validate_utf8_compat(input)?;
Ok(from_utf8_unchecked(input))
}
}
/// Analogue to [`std::str::from_utf8_mut()`].
///
/// Checks if the passed mutable byte sequence is valid UTF-8 and returns a mutable
/// [`std::str``] reference to the passed byte slice wrapped in `Ok()` if it is.
///
/// # Errors
/// Will return Err([`Utf8Error`]) on if the input contains invalid UTF-8 with
/// detailed error information.
#[inline]
pub fn from_utf8_mut(input: &mut [u8]) -> Result<&mut str, Utf8Error> {
unsafe {
validate_utf8_compat(input)?;
Ok(from_utf8_unchecked_mut(input))
}
}
/// Allows direct access to the platform-specific unsafe validation implementations.
#[cfg(feature = "public_imp")]
pub mod imp {
/// Includes the x86/x86-64 SIMD implementations.
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64")))]
pub mod x86 {
/// Includes the validation implementation for AVX 2-compatible CPUs.
pub mod avx2 {
pub use crate::implementation::x86::avx2::validate_utf8_compat as validate_utf8;
}
/// Includes the validation implementation for SSE 4.2-compatible CPUs.
pub mod sse42 {
pub use crate::implementation::x86::sse42::validate_utf8_compat as validate_utf8;
}
}
/// Includes the aarch64 SIMD implementations.
#[cfg(all(feature = "aarch64_neon", target_arch = "aarch64"))]
pub mod aarch64 {
/// Includes the validation implementation for Neon SIMD.
pub mod neon {
pub use crate::implementation::aarch64::neon::validate_utf8_compat as validate_utf8;
}
}
}