utf8/
lossy.rs

1use super::*;
2
3/// A push-based, lossy decoder for UTF-8.
4/// Errors are replaced with the U+FFFD replacement character.
5///
6/// Users “push” bytes into the decoder, which in turn “pushes” `&str` slices into a callback.
7///
8/// For example, `String::from_utf8_lossy` (but returning `String` instead of `Cow`)
9/// can be rewritten as:
10///
11/// ```rust
12/// fn string_from_utf8_lossy(input: &[u8]) -> String {
13///     let mut string = String::new();
14///     utf8::LossyDecoder::new(|s| string.push_str(s)).feed(input);
15///     string
16/// }
17/// ```
18///
19/// **Note:** Dropping the decoder signals the end of the input:
20/// If the last input chunk ended with an incomplete byte sequence for a code point,
21/// this is an error and a replacement character is emitted.
22/// Use `std::mem::forget` to inhibit this behavior.
23pub struct LossyDecoder<F: FnMut(&str)> {
24    push_str: F,
25    incomplete: Incomplete,
26}
27
28impl<F: FnMut(&str)> LossyDecoder<F> {
29    /// Create a new decoder from a callback.
30    #[inline]
31    pub fn new(push_str: F) -> Self {
32        LossyDecoder {
33            push_str: push_str,
34            incomplete: Incomplete {
35                buffer: [0, 0, 0, 0],
36                buffer_len: 0,
37            },
38        }
39    }
40
41    /// Feed one chunk of input into the decoder.
42    ///
43    /// The input is decoded lossily
44    /// and the callback called once or more with `&str` string slices.
45    ///
46    /// If the UTF-8 byte sequence for one code point was split into this bytes chunk
47    /// and previous bytes chunks, it will be correctly pieced back together.
48    pub fn feed(&mut self, mut input: &[u8]) {
49        if self.incomplete.buffer_len > 0 {
50            match self.incomplete.try_complete(input) {
51                Some((Ok(s), remaining)) => {
52                    (self.push_str)(s);
53                    input = remaining
54                }
55                Some((Err(_), remaining)) => {
56                    (self.push_str)(REPLACEMENT_CHARACTER);
57                    input = remaining
58                }
59                None => {
60                    return
61                }
62            }
63        }
64        loop {
65            match decode(input) {
66                Ok(s) => {
67                    (self.push_str)(s);
68                    return
69                }
70                Err(DecodeError::Incomplete { valid_prefix, incomplete_suffix }) => {
71                    (self.push_str)(valid_prefix);
72                    self.incomplete = incomplete_suffix;
73                    return
74                }
75                Err(DecodeError::Invalid { valid_prefix, remaining_input, .. }) => {
76                    (self.push_str)(valid_prefix);
77                    (self.push_str)(REPLACEMENT_CHARACTER);
78                    input = remaining_input
79                }
80            }
81        }
82    }
83}
84
85impl<F: FnMut(&str)> Drop for LossyDecoder<F> {
86    #[inline]
87    fn drop(&mut self) {
88        if self.incomplete.buffer_len > 0 {
89            (self.push_str)(REPLACEMENT_CHARACTER)
90        }
91    }
92}