ciborium_ll/seg.rs
1use super::*;
2
3use ciborium_io::Read;
4
5use core::marker::PhantomData;
6
7/// A parser for incoming segments
8pub trait Parser: Default {
9 /// The type of item that is parsed
10 type Item: ?Sized;
11
12 /// The parsing error that may occur
13 type Error;
14
15 /// The main parsing function
16 ///
17 /// This function processes the incoming bytes and returns the item.
18 ///
19 /// One important detail that **MUST NOT** be overlooked is that the
20 /// parser may save data from a previous parsing attempt. The number of
21 /// bytes saved is indicated by the `Parser::saved()` function. The saved
22 /// bytes will be copied into the beginning of the `bytes` array before
23 /// processing. Therefore, two requirements should be met.
24 ///
25 /// First, the incoming byte slice should be larger than the saved bytes.
26 ///
27 /// Second, the incoming byte slice should contain new bytes only after
28 /// the saved byte prefix.
29 ///
30 /// If both criteria are met, this allows the parser to prepend its saved
31 /// bytes without any additional allocation.
32 fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>;
33
34 /// Indicates the number of saved bytes in the parser
35 fn saved(&self) -> usize {
36 0
37 }
38}
39
40/// A bytes parser
41///
42/// No actual processing is performed and the input bytes are directly
43/// returned. This implies that this parser never saves any bytes internally.
44#[derive(Default)]
45pub struct Bytes(());
46
47impl Parser for Bytes {
48 type Item = [u8];
49 type Error = core::convert::Infallible;
50
51 fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error> {
52 Ok(bytes)
53 }
54}
55
56/// A text parser
57///
58/// This parser converts the input bytes to a `str`. This parser preserves
59/// trailing invalid UTF-8 sequences in the case that chunking fell in the
60/// middle of a valid UTF-8 character.
61#[derive(Default)]
62pub struct Text {
63 stored: usize,
64 buffer: [u8; 3],
65}
66
67impl Parser for Text {
68 type Item = str;
69 type Error = core::str::Utf8Error;
70
71 fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error> {
72 // If we cannot advance, return nothing.
73 if bytes.len() <= self.stored {
74 return Ok("");
75 }
76
77 // Copy previously invalid data into place.
78 bytes[..self.stored].clone_from_slice(&self.buffer[..self.stored]);
79
80 Ok(match core::str::from_utf8(bytes) {
81 Ok(s) => s,
82 Err(e) => {
83 let valid_len = e.valid_up_to();
84 let invalid_len = bytes.len() - valid_len;
85
86 // If the size of the invalid UTF-8 is large enough to hold
87 // all valid UTF-8 characters, we have a syntax error.
88 if invalid_len > self.buffer.len() {
89 return Err(e);
90 }
91
92 // Otherwise, store the invalid bytes for the next read cycle.
93 self.buffer[..invalid_len].clone_from_slice(&bytes[valid_len..]);
94 self.stored = invalid_len;
95
96 // Decode the valid part of the string.
97 core::str::from_utf8(&bytes[..valid_len]).unwrap()
98 }
99 })
100 }
101
102 fn saved(&self) -> usize {
103 self.stored
104 }
105}
106
107/// A CBOR segment
108///
109/// This type represents a single bytes or text segment on the wire. It can be
110/// read out in parsed chunks based on the size of the input scratch buffer.
111pub struct Segment<'r, R: Read, P: Parser> {
112 reader: &'r mut Decoder<R>,
113 unread: usize,
114 offset: usize,
115 parser: P,
116}
117
118impl<'r, R: Read, P: Parser> Segment<'r, R, P> {
119 /// Gets the number of unprocessed bytes
120 #[inline]
121 pub fn left(&self) -> usize {
122 self.unread + self.parser.saved()
123 }
124
125 /// Gets the next parsed chunk within the segment
126 ///
127 /// Returns `Ok(None)` when all chunks have been read.
128 #[inline]
129 pub fn pull<'a>(
130 &mut self,
131 buffer: &'a mut [u8],
132 ) -> Result<Option<&'a P::Item>, Error<R::Error>> {
133 use core::cmp::min;
134
135 let prev = self.parser.saved();
136 match self.unread {
137 0 if prev == 0 => return Ok(None),
138 0 => return Err(Error::Syntax(self.offset)),
139 _ => (),
140 }
141
142 // Determine how many bytes to read.
143 let size = min(buffer.len(), prev + self.unread);
144 let full = &mut buffer[..size];
145 let next = &mut full[min(size, prev)..];
146
147 // Read additional bytes.
148 self.reader.read_exact(next)?;
149 self.unread -= next.len();
150
151 self.parser
152 .parse(full)
153 .or(Err(Error::Syntax(self.offset)))
154 .map(Some)
155 }
156}
157
158/// A sequence of CBOR segments
159///
160/// CBOR allows for bytes or text items to be segmented. This type represents
161/// the state of that segmented input stream.
162pub struct Segments<'r, R: Read, P: Parser> {
163 reader: &'r mut Decoder<R>,
164 finish: bool,
165 nested: usize,
166 parser: PhantomData<P>,
167 unwrap: fn(Header) -> Result<Option<usize>, ()>,
168}
169
170impl<'r, R: Read, P: Parser> Segments<'r, R, P> {
171 #[inline]
172 pub(crate) fn new(
173 decoder: &'r mut Decoder<R>,
174 unwrap: fn(Header) -> Result<Option<usize>, ()>,
175 ) -> Self {
176 Self {
177 reader: decoder,
178 finish: false,
179 nested: 0,
180 parser: PhantomData,
181 unwrap,
182 }
183 }
184
185 /// Gets the next segment in the stream
186 ///
187 /// Returns `Ok(None)` at the conclusion of the stream.
188 #[inline]
189 pub fn pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>> {
190 while !self.finish {
191 let offset = self.reader.offset();
192 match self.reader.pull()? {
193 Header::Break if self.nested == 1 => return Ok(None),
194 Header::Break if self.nested > 1 => self.nested -= 1,
195 header => match (self.unwrap)(header) {
196 Err(..) => return Err(Error::Syntax(offset)),
197 Ok(None) => self.nested += 1,
198 Ok(Some(len)) => {
199 self.finish = self.nested == 0;
200 return Ok(Some(Segment {
201 reader: self.reader,
202 unread: len,
203 offset,
204 parser: P::default(),
205 }));
206 }
207 },
208 }
209 }
210
211 Ok(None)
212 }
213}