serde_qs/de/parse.rs
1use super::*;
2
3use serde::de;
4
5use std::borrow::Cow;
6use std::iter::Iterator;
7use std::slice::Iter;
8use std::str;
9
10macro_rules! tu {
11 ($x:expr) => {
12 match $x {
13 Some(x) => *x,
14 None => return Err(de::Error::custom("query string ended before expected")),
15 }
16 };
17}
18
19impl<'a> Level<'a> {
20 /// If this `Level` value is indeed a map, then attempt to insert
21 /// `value` for key `key`.
22 /// Returns error if `self` is not a map, or already has an entry for that
23 /// key.
24 fn insert_map_value(&mut self, key: Cow<'a, str>, value: Cow<'a, str>) {
25 if let Level::Nested(ref mut map) = *self {
26 match map.entry(key) {
27 Entry::Occupied(mut o) => {
28 // Throw away old result; map is now invalid anyway.
29 let _ = o.insert(Level::Invalid("Multiple values for one key"));
30 }
31 Entry::Vacant(vm) => {
32 // Map is empty, result is None
33 let _ = vm.insert(Level::Flat(value));
34 }
35 }
36 } else if let Level::Uninitialised = *self {
37 let mut map = BTreeMap::default();
38 let _ = map.insert(key, Level::Flat(value));
39 *self = Level::Nested(map);
40 } else {
41 *self = Level::Invalid(
42 "Attempted to insert map value into \
43 non-map structure",
44 );
45 }
46 }
47
48 /// If this `Level` value is indeed a seq, then push a new value
49 fn insert_ord_seq_value(&mut self, key: usize, value: Cow<'a, str>) {
50 if let Level::OrderedSeq(ref mut map) = *self {
51 match map.entry(key) {
52 Entry::Occupied(mut o) => {
53 // Throw away old result; map is now invalid anyway.
54 let _ = o.insert(Level::Invalid("Multiple values for one key"));
55 }
56 Entry::Vacant(vm) => {
57 // Map is empty, result is None
58 let _ = vm.insert(Level::Flat(value));
59 }
60 }
61 } else if let Level::Uninitialised = *self {
62 // To reach here, self is either an OrderedSeq or nothing.
63 let mut map = BTreeMap::default();
64 let _ = map.insert(key, Level::Flat(value));
65 *self = Level::OrderedSeq(map);
66 } else {
67 *self = Level::Invalid(
68 "Attempted to insert seq value into \
69 non-seq structure",
70 );
71 }
72 }
73
74 /// If this `Level` value is indeed a seq, then attempt to insert
75 /// `value` for key `key`.
76 /// Returns error if `self` is not a seq, or already has an entry for that
77 /// key.
78 fn insert_seq_value(&mut self, value: Cow<'a, str>) {
79 // Reached the end of the key string
80 if let Level::Sequence(ref mut seq) = *self {
81 seq.push(Level::Flat(value));
82 } else if let Level::Uninitialised = *self {
83 let seq = vec![Level::Flat(value)];
84 *self = Level::Sequence(seq);
85 } else {
86 *self = Level::Invalid(
87 "Attempted to insert seq value into \
88 non-seq structure",
89 );
90 }
91 }
92}
93
94/// The `Parser` struct is a stateful querystring parser.
95/// It iterates over a slice of bytes, with a range to track the current
96/// start/end points of a value.
97/// The parser additionally supports peeking values, which allows them to be
98/// re-used (precisely once, unlike with `Peekable` from `std::iter`).
99pub struct Parser<'a> {
100 inner: &'a [u8],
101 iter: Iter<'a, u8>,
102 index: usize,
103 acc: (usize, usize),
104 peeked: Option<&'a u8>,
105 depth: usize, // stores the current depth, for use in bounded-depth parsing
106 strict: bool,
107 state: ParsingState,
108}
109
110/// The parsing logic varies slightly based on whether it is a key or a value
111/// (determines how encoded brackets are parse in non-strict mode)
112/// This tracks the state.
113enum ParsingState {
114 Init,
115 Key,
116 Value,
117}
118
119impl<'a> Iterator for Parser<'a> {
120 type Item = &'a u8;
121 #[inline]
122 fn next(&mut self) -> Option<Self::Item> {
123 let preparse_brackets = match self.state {
124 ParsingState::Value => false,
125 _ => !self.strict,
126 };
127 if preparse_brackets {
128 // in non-strict mode, we will happily decode any bracket
129 match self.peeked.take() {
130 Some(v) => Some(v),
131 None => {
132 self.index += 1;
133 self.acc.1 += 1;
134 match self.iter.next() {
135 Some(v) if v == &b'%' && self.iter.len() >= 2 => {
136 match &self.iter.as_slice()[..2] {
137 b"5B" => {
138 // skip the next two characters
139 let _ = self.iter.next();
140 let _ = self.iter.next();
141 self.index += 2;
142 Some(&b'[')
143 }
144 b"5D" => {
145 // skip the next two characters
146 let _ = self.iter.next();
147 let _ = self.iter.next();
148 self.index += 2;
149 Some(&b']')
150 }
151 _ => Some(v),
152 }
153 }
154 Some(v) => Some(v),
155 None => None,
156 }
157 }
158 }
159 } else {
160 match self.peeked.take() {
161 Some(v) => Some(v),
162 None => {
163 self.index += 1;
164 self.acc.1 += 1;
165 self.iter.next()
166 }
167 }
168 }
169 }
170}
171
172impl<'a> Parser<'a> {
173 #[inline]
174 fn peek(&mut self) -> Option<<Self as Iterator>::Item> {
175 if self.peeked.is_some() {
176 self.peeked
177 } else if let Some(x) = self.next() {
178 self.peeked = Some(x);
179 Some(x)
180 } else {
181 None
182 }
183 }
184}
185
186/// Replace b'+' with b' '
187/// Copied from [`form_urlencoded`](https://github.com/servo/rust-url/blob/380be29859adb859e861c2d765897c22ec878e01/src/form_urlencoded.rs#L125).
188fn replace_plus(input: &[u8]) -> Cow<[u8]> {
189 match input.iter().position(|&b| b == b'+') {
190 None => Cow::Borrowed(input),
191 Some(first_position) => {
192 let mut replaced = input.to_owned();
193 replaced[first_position] = b' ';
194 for byte in &mut replaced[first_position + 1..] {
195 if *byte == b'+' {
196 *byte = b' ';
197 }
198 }
199
200 Cow::Owned(replaced)
201 }
202 }
203}
204
205impl<'a> Parser<'a> {
206 pub fn new(encoded: &'a [u8], depth: usize, strict: bool) -> Self {
207 Parser {
208 inner: encoded,
209 iter: encoded.iter(),
210 acc: (0, 0),
211 index: 0,
212 peeked: None,
213 depth,
214 strict,
215 state: ParsingState::Init,
216 }
217 }
218
219 /// Resets the accumulator range by setting `(start, end)` to `(end, end)`.
220 fn clear_acc(&mut self) {
221 self.acc = (self.index, self.index);
222 }
223
224 /// Extracts a string from the internal byte slice from the range tracked by
225 /// the parser.
226 /// Avoids allocations when neither percent encoded, nor `'+'` values are
227 /// present.
228 fn collect_str(&mut self) -> Result<Cow<'a, str>> {
229 let replaced = replace_plus(&self.inner[self.acc.0..self.acc.1 - 1]);
230 let ret: Result<Cow<'a, str>> =
231 match percent_encoding::percent_decode(&replaced).decode_utf8()? {
232 Cow::Borrowed(_) => {
233 match replaced {
234 Cow::Borrowed(_) => {
235 // In this case, neither method made replacements, so we
236 // reuse the original bytes
237 let res = str::from_utf8(&self.inner[self.acc.0..self.acc.1 - 1])?;
238 Ok(Cow::Borrowed(res))
239 }
240 Cow::Owned(owned) => {
241 let res = String::from_utf8(owned)?;
242 Ok(Cow::Owned(res))
243 }
244 }
245 }
246 Cow::Owned(owned) => Ok(Cow::Owned(owned)),
247 };
248 self.clear_acc();
249 ret.map_err(Error::from)
250 }
251
252 /// In some ways the main way to use a `Parser`, this runs the parsing step
253 /// and outputs a simple `Deserializer` over the parsed map.
254 pub(crate) fn as_deserializer(&mut self) -> Result<QsDeserializer<'a>> {
255 let map = BTreeMap::default();
256 let mut root = Level::Nested(map);
257
258 // Parses all top level nodes into the `root` map.
259 while self.parse(&mut root)? {}
260 let iter = match root {
261 Level::Nested(map) => map.into_iter(),
262 _ => BTreeMap::default().into_iter(),
263 };
264 Ok(QsDeserializer { iter, value: None })
265 }
266
267 /// This is the top level parsing function. It checks the first character to
268 /// decide the type of key (nested, sequence, etc.) and to call the
269 /// approprate parsing function.
270 ///
271 /// Returns `Ok(false)` when there is no more string to parse.
272 fn parse(&mut self, node: &mut Level<'a>) -> Result<bool> {
273 // First character determines parsing type
274 if self.depth == 0 {
275 // Hit the maximum depth level, so parse everything as a key
276 let key = self.parse_key(b'=', false)?;
277 self.parse_map_value(key, node)?;
278 return Ok(true);
279 }
280 match self.next() {
281 Some(x) => {
282 match *x {
283 b'[' => {
284 loop {
285 self.clear_acc();
286 // Only peek at the next value to determine the key type.
287 match tu!(self.peek()) {
288 // key is of the form "[..=", not really allowed.
289 b'[' => {
290 // If we're in strict mode, error, otherwise just ignore it.
291 if self.strict {
292 return Err(super::Error::parse_err("found another opening bracket before the closed bracket", self.index));
293 } else {
294 let _ = self.next();
295 }
296 }
297 // key is simply "[]", so treat as a seq.
298 b']' => {
299 // throw away the bracket
300 let _ = self.next();
301 self.clear_acc();
302 self.parse_seq_value(node)?;
303 return Ok(true);
304 }
305 // First character is an integer, attempt to parse it as an integer key
306 b'0'..=b'9' => {
307 let key = self.parse_key(b']', true)?;
308 let key = key.parse().map_err(Error::from)?;
309 self.parse_ord_seq_value(key, node)?;
310 return Ok(true);
311 }
312 // Key is "[a..=" so parse up to the closing "]"
313 0x20..=0x2f | 0x3a..=0x5a | 0x5c | 0x5e..=0x7e => {
314 let key = self.parse_key(b']', true)?;
315 self.parse_map_value(key, node)?;
316 return Ok(true);
317 }
318 c => {
319 if self.strict {
320 return Err(super::Error::parse_err(
321 &format!(
322 "unexpected character: {}",
323 String::from_utf8_lossy(&[c])
324 ),
325 self.index,
326 ));
327 } else {
328 let _ = self.next();
329 }
330 }
331 }
332 }
333 }
334 // Skip empty byte sequences (e.g. leading `&`, trailing `&`, `&&`, ...)
335 b'&' => {
336 self.clear_acc();
337 Ok(true)
338 }
339 // This means the key should be a root key
340 // of the form "abc" or "abc[..=]"
341 // We do actually allow integer keys here since they cannot
342 // be confused with sequences
343 _ => {
344 let key = { self.parse_key(b'[', false)? };
345 // Root keys are _always_ map values
346 self.parse_map_value(key, node)?;
347 Ok(true)
348 }
349 }
350 }
351 // Ran out of characters to parse
352 None => Ok(false),
353 }
354 }
355
356 /// The iterator is currently pointing at a key, so parse up until the
357 /// `end_on` value. This will either be `'['` when the key is the root key,
358 /// or `']'` when the key is a nested key. In the former case, `'='` will
359 /// also finish the key parsing.
360 ///
361 /// The `consume` flag determines whether the end character should be
362 /// returned to the buffer to be peeked. This is important when
363 /// parsing keys like `abc[def][ghi]` since the `'['` character is
364 /// needed to for the next iteration of `parse`.
365 fn parse_key(&mut self, end_on: u8, consume: bool) -> Result<Cow<'a, str>> {
366 self.state = ParsingState::Key;
367 loop {
368 if let Some(x) = self.next() {
369 match *x {
370 c if c == end_on => {
371 // Add this character back to the buffer for peek.
372 if !consume {
373 self.peeked = Some(x);
374 }
375 return self.collect_str();
376 }
377 b'=' => {
378 // Allow the '=' byte only when parsing keys within []
379 if end_on != b']' {
380 // Otherwise, we have reached the end of the key
381 // Add this character back to the buffer for peek.
382 self.peeked = Some(x);
383 return self.collect_str();
384 }
385
386 // otherwise do nothing, so '=' is accumulated
387 }
388 b'&' => {
389 // important to keep the `&` character so we know the
390 // key-value is of the form `key&..=` (i.e. no value)
391 self.peeked = Some(&b'&');
392 return self.collect_str();
393 }
394 _ => {
395 // for any other character
396 // do nothing, keep adding to key
397 }
398 }
399 } else {
400 // no more string to parse
401 return self.collect_str();
402 }
403 }
404 }
405
406 /// The `(key,value)` pair is determined to be corresponding to a map entry,
407 /// so parse it as such. The first part of the `key` has been parsed.
408 fn parse_map_value(&mut self, key: Cow<'a, str>, node: &mut Level<'a>) -> Result<()> {
409 self.state = ParsingState::Key;
410 let res = loop {
411 if let Some(x) = self.peek() {
412 match *x {
413 b'=' => {
414 // Key is finished, parse up until the '&' as the value
415 self.clear_acc();
416 self.state = ParsingState::Value;
417 for _ in self.take_while(|b| *b != &b'&') {}
418 let value: Cow<'a, str> = self.collect_str()?;
419 node.insert_map_value(key, value);
420 break Ok(());
421 }
422 b'&' => {
423 // No value
424 node.insert_map_value(key, Cow::Borrowed(""));
425 break Ok(());
426 }
427 b'[' => {
428 // The key continues to another level of nested.
429 // Add a new unitialised level for this node and continue.
430 if let Level::Uninitialised = *node {
431 *node = Level::Nested(BTreeMap::default());
432 }
433 if let Level::Nested(ref mut map) = *node {
434 // By parsing we drop down another level
435 self.depth -= 1;
436 // Either take the existing entry, or add a new
437 // unitialised level
438 // Use this new node to keep parsing
439 let _ = self.parse(map.entry(key).or_insert(Level::Uninitialised))?;
440 break Ok(());
441 } else {
442 // We expected to parse into a map here.
443 break Err(super::Error::parse_err(
444 &format!(
445 "tried to insert a \
446 new key into {:?}",
447 node
448 ),
449 self.index,
450 ));
451 }
452 }
453 c => {
454 // Anything else is unexpected since we just finished
455 // parsing a key.
456 if self.strict {
457 break Err(super::Error::parse_err(
458 format!(
459 "Unexpected character: '{}' found when parsing",
460 String::from_utf8_lossy(&[c])
461 ),
462 self.index,
463 ));
464 } else {
465 let _ = self.next();
466 }
467 }
468 }
469 } else {
470 // The string has ended, so the value is empty.
471 node.insert_map_value(key, Cow::Borrowed(""));
472 break Ok(());
473 }
474 };
475 // We have finished parsing this level, so go back up a level.
476 self.depth += 1;
477 res
478 }
479
480 /// The `(key,value)` pair is determined to be corresponding to an
481 /// ordered sequence.
482 /// Basically the same as the above, but we insert into `OrderedSeq`
483 /// Can potentially be merged?
484 fn parse_ord_seq_value(&mut self, key: usize, node: &mut Level<'a>) -> Result<()> {
485 self.state = ParsingState::Key;
486 let res = loop {
487 if let Some(x) = self.peek() {
488 match *x {
489 b'=' => {
490 // Key is finished, parse up until the '&' as the value
491 self.clear_acc();
492 self.state = ParsingState::Value;
493 for _ in self.take_while(|b| *b != &b'&') {}
494 let value = self.collect_str()?;
495 // Reached the end of the key string
496 node.insert_ord_seq_value(key, value);
497 break Ok(());
498 }
499 b'&' => {
500 // No value
501 node.insert_ord_seq_value(key, Cow::Borrowed(""));
502 break Ok(());
503 }
504 b'[' => {
505 // The key continues to another level of nested.
506 // Add a new unitialised level for this node and continue.
507 if let Level::Uninitialised = *node {
508 *node = Level::OrderedSeq(BTreeMap::default());
509 }
510 if let Level::OrderedSeq(ref mut map) = *node {
511 // By parsing we drop down another level
512 self.depth -= 1;
513 let _ = self.parse(
514 // Either take the existing entry, or add a new
515 // unitialised level
516 // Use this new node to keep parsing
517 map.entry(key).or_insert(Level::Uninitialised),
518 )?;
519 break Ok(());
520 } else {
521 // We expected to parse into a seq here.
522 break Err(super::Error::parse_err(
523 &format!(
524 "tried to insert a \
525 new key into {:?}",
526 node
527 ),
528 self.index,
529 ));
530 }
531 }
532 c => {
533 // Anything else is unexpected since we just finished
534 // parsing a key.
535 if self.strict {
536 break Err(super::Error::parse_err(
537 format!("Unexpected character: {:?} found when parsing", c),
538 self.index,
539 ));
540 } else {
541 let _ = self.next();
542 }
543 }
544 }
545 } else {
546 // The string has ended, so the value is empty.
547 node.insert_ord_seq_value(key, Cow::Borrowed(""));
548 break Ok(());
549 }
550 };
551 // We have finished parsing this level, so go back up a level.
552 self.depth += 1;
553 res
554 }
555
556 /// The `(key,value)` pair is determined to be corresponding to an
557 /// unordered sequence.
558 /// This must be the final level of nesting, so assume we have a value
559 fn parse_seq_value(&mut self, node: &mut Level<'a>) -> Result<()> {
560 self.state = ParsingState::Key;
561 let res = match self.peek() {
562 Some(x) => {
563 match *x {
564 b'=' => {
565 // Key is finished, parse up until the '&' as the value
566 self.clear_acc();
567 self.state = ParsingState::Value;
568 for _ in self.take_while(|b| *b != &b'&') {}
569 let value = self.collect_str()?;
570 node.insert_seq_value(value);
571 Ok(())
572 }
573 b'&' => {
574 // key value is empty
575 node.insert_seq_value(Cow::Borrowed(""));
576 Ok(())
577 }
578 _ => Err(super::Error::parse_err(
579 "non-indexed sequence of \
580 structs not supported",
581 self.index,
582 )),
583 }
584 }
585 None => {
586 // The string has ended, so the value is empty.
587 node.insert_seq_value(Cow::Borrowed(""));
588 Ok(())
589 }
590 };
591 // We have finished parsing this level, so go back up a level.
592 self.depth += 1;
593 res
594 }
595}