1/// A type that provides a human readable debug impl for arbitrary bytes.
2///
3/// This generally works best when the bytes are presumed to be mostly UTF-8,
4/// but will work for anything.
5///
6/// N.B. This is copied nearly verbatim from regex-automata. Sigh.
7pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]);
89impl<'a> core::fmt::Debug for Bytes<'a> {
10fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
11write!(f, "\"")?;
12// This is a sad re-implementation of a similar impl found in bstr.
13let mut bytes = self.0;
14while let Some(result) = utf8_decode(bytes) {
15let ch = match result {
16Ok(ch) => ch,
17Err(byte) => {
18write!(f, r"\x{:02x}", byte)?;
19 bytes = &bytes[1..];
20continue;
21 }
22 };
23 bytes = &bytes[ch.len_utf8()..];
24match ch {
25'\0' => write!(f, "\\0")?,
26// ASCII control characters except \0, \n, \r, \t
27'\x01'..='\x08'
28| '\x0b'
29| '\x0c'
30| '\x0e'..='\x19'
31| '\x7f' => {
32write!(f, "\\x{:02x}", u32::from(ch))?;
33 }
34_ => {
35write!(f, "{}", ch.escape_debug())?;
36 }
37 }
38 }
39write!(f, "\"")?;
40Ok(())
41 }
42}
4344/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
45///
46/// If no valid encoding of a codepoint exists at the beginning of the given
47/// byte slice, then the first byte is returned instead.
48///
49/// This returns `None` if and only if `bytes` is empty.
50pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
51fn len(byte: u8) -> Option<usize> {
52if byte <= 0x7F {
53Some(1)
54 } else if byte & 0b1100_0000 == 0b1000_0000 {
55None
56} else if byte <= 0b1101_1111 {
57Some(2)
58 } else if byte <= 0b1110_1111 {
59Some(3)
60 } else if byte <= 0b1111_0111 {
61Some(4)
62 } else {
63None
64}
65 }
6667if bytes.is_empty() {
68return None;
69 }
70let len = match len(bytes[0]) {
71None => return Some(Err(bytes[0])),
72Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
73Some(1) => return Some(Ok(char::from(bytes[0]))),
74Some(len) => len,
75 };
76match core::str::from_utf8(&bytes[..len]) {
77Ok(s) => Some(Ok(s.chars().next().unwrap())),
78Err(_) => Some(Err(bytes[0])),
79 }
80}