ciborium_ll/
seg.rs

1use super::*;
2
3use ciborium_io::Read;
4
5use core::marker::PhantomData;
6
7/// A parser for incoming segments
8pub trait Parser: Default {
9    /// The type of item that is parsed
10    type Item: ?Sized;
11
12    /// The parsing error that may occur
13    type Error;
14
15    /// The main parsing function
16    ///
17    /// This function processes the incoming bytes and returns the item.
18    ///
19    /// One important detail that **MUST NOT** be overlooked is that the
20    /// parser may save data from a previous parsing attempt. The number of
21    /// bytes saved is indicated by the `Parser::saved()` function. The saved
22    /// bytes will be copied into the beginning of the `bytes` array before
23    /// processing. Therefore, two requirements should be met.
24    ///
25    /// First, the incoming byte slice should be larger than the saved bytes.
26    ///
27    /// Second, the incoming byte slice should contain new bytes only after
28    /// the saved byte prefix.
29    ///
30    /// If both criteria are met, this allows the parser to prepend its saved
31    /// bytes without any additional allocation.
32    fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>;
33
34    /// Indicates the number of saved bytes in the parser
35    fn saved(&self) -> usize {
36        0
37    }
38}
39
40/// A bytes parser
41///
42/// No actual processing is performed and the input bytes are directly
43/// returned. This implies that this parser never saves any bytes internally.
44#[derive(Default)]
45pub struct Bytes(());
46
47impl Parser for Bytes {
48    type Item = [u8];
49    type Error = core::convert::Infallible;
50
51    fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error> {
52        Ok(bytes)
53    }
54}
55
56/// A text parser
57///
58/// This parser converts the input bytes to a `str`. This parser preserves
59/// trailing invalid UTF-8 sequences in the case that chunking fell in the
60/// middle of a valid UTF-8 character.
61#[derive(Default)]
62pub struct Text {
63    stored: usize,
64    buffer: [u8; 3],
65}
66
67impl Parser for Text {
68    type Item = str;
69    type Error = core::str::Utf8Error;
70
71    fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error> {
72        // If we cannot advance, return nothing.
73        if bytes.len() <= self.stored {
74            return Ok("");
75        }
76
77        // Copy previously invalid data into place.
78        bytes[..self.stored].clone_from_slice(&self.buffer[..self.stored]);
79
80        Ok(match core::str::from_utf8(bytes) {
81            Ok(s) => s,
82            Err(e) => {
83                let valid_len = e.valid_up_to();
84                let invalid_len = bytes.len() - valid_len;
85
86                // If the size of the invalid UTF-8 is large enough to hold
87                // all valid UTF-8 characters, we have a syntax error.
88                if invalid_len > self.buffer.len() {
89                    return Err(e);
90                }
91
92                // Otherwise, store the invalid bytes for the next read cycle.
93                self.buffer[..invalid_len].clone_from_slice(&bytes[valid_len..]);
94                self.stored = invalid_len;
95
96                // Decode the valid part of the string.
97                core::str::from_utf8(&bytes[..valid_len]).unwrap()
98            }
99        })
100    }
101
102    fn saved(&self) -> usize {
103        self.stored
104    }
105}
106
107/// A CBOR segment
108///
109/// This type represents a single bytes or text segment on the wire. It can be
110/// read out in parsed chunks based on the size of the input scratch buffer.
111pub struct Segment<'r, R: Read, P: Parser> {
112    reader: &'r mut Decoder<R>,
113    unread: usize,
114    offset: usize,
115    parser: P,
116}
117
118impl<'r, R: Read, P: Parser> Segment<'r, R, P> {
119    /// Gets the number of unprocessed bytes
120    #[inline]
121    pub fn left(&self) -> usize {
122        self.unread + self.parser.saved()
123    }
124
125    /// Gets the next parsed chunk within the segment
126    ///
127    /// Returns `Ok(None)` when all chunks have been read.
128    #[inline]
129    pub fn pull<'a>(
130        &mut self,
131        buffer: &'a mut [u8],
132    ) -> Result<Option<&'a P::Item>, Error<R::Error>> {
133        use core::cmp::min;
134
135        let prev = self.parser.saved();
136        match self.unread {
137            0 if prev == 0 => return Ok(None),
138            0 => return Err(Error::Syntax(self.offset)),
139            _ => (),
140        }
141
142        // Determine how many bytes to read.
143        let size = min(buffer.len(), prev + self.unread);
144        let full = &mut buffer[..size];
145        let next = &mut full[min(size, prev)..];
146
147        // Read additional bytes.
148        self.reader.read_exact(next)?;
149        self.unread -= next.len();
150
151        self.parser
152            .parse(full)
153            .or(Err(Error::Syntax(self.offset)))
154            .map(Some)
155    }
156}
157
158/// A sequence of CBOR segments
159///
160/// CBOR allows for bytes or text items to be segmented. This type represents
161/// the state of that segmented input stream.
162pub struct Segments<'r, R: Read, P: Parser> {
163    reader: &'r mut Decoder<R>,
164    finish: bool,
165    nested: usize,
166    parser: PhantomData<P>,
167    unwrap: fn(Header) -> Result<Option<usize>, ()>,
168}
169
170impl<'r, R: Read, P: Parser> Segments<'r, R, P> {
171    #[inline]
172    pub(crate) fn new(
173        decoder: &'r mut Decoder<R>,
174        unwrap: fn(Header) -> Result<Option<usize>, ()>,
175    ) -> Self {
176        Self {
177            reader: decoder,
178            finish: false,
179            nested: 0,
180            parser: PhantomData,
181            unwrap,
182        }
183    }
184
185    /// Gets the next segment in the stream
186    ///
187    /// Returns `Ok(None)` at the conclusion of the stream.
188    #[inline]
189    pub fn pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>> {
190        while !self.finish {
191            let offset = self.reader.offset();
192            match self.reader.pull()? {
193                Header::Break if self.nested == 1 => return Ok(None),
194                Header::Break if self.nested > 1 => self.nested -= 1,
195                header => match (self.unwrap)(header) {
196                    Err(..) => return Err(Error::Syntax(offset)),
197                    Ok(None) => self.nested += 1,
198                    Ok(Some(len)) => {
199                        self.finish = self.nested == 0;
200                        return Ok(Some(Segment {
201                            reader: self.reader,
202                            unread: len,
203                            offset,
204                            parser: P::default(),
205                        }));
206                    }
207                },
208            }
209        }
210
211        Ok(None)
212    }
213}