serde_qs/de/
parse.rs

1use super::*;
2
3use serde::de;
4
5use std::borrow::Cow;
6use std::iter::Iterator;
7use std::slice::Iter;
8use std::str;
9
10macro_rules! tu {
11    ($x:expr) => {
12        match $x {
13            Some(x) => *x,
14            None => return Err(de::Error::custom("query string ended before expected")),
15        }
16    };
17}
18
19impl<'a> Level<'a> {
20    /// If this `Level` value is indeed a map, then attempt to insert
21    /// `value` for key `key`.
22    /// Returns error if `self` is not a map, or already has an entry for that
23    /// key.
24    fn insert_map_value(&mut self, key: Cow<'a, str>, value: Cow<'a, str>) {
25        if let Level::Nested(ref mut map) = *self {
26            match map.entry(key) {
27                Entry::Occupied(mut o) => {
28                    // Throw away old result; map is now invalid anyway.
29                    let _ = o.insert(Level::Invalid("Multiple values for one key"));
30                }
31                Entry::Vacant(vm) => {
32                    // Map is empty, result is None
33                    let _ = vm.insert(Level::Flat(value));
34                }
35            }
36        } else if let Level::Uninitialised = *self {
37            let mut map = BTreeMap::default();
38            let _ = map.insert(key, Level::Flat(value));
39            *self = Level::Nested(map);
40        } else {
41            *self = Level::Invalid(
42                "Attempted to insert map value into \
43                 non-map structure",
44            );
45        }
46    }
47
48    /// If this `Level` value is indeed a seq, then push a new value
49    fn insert_ord_seq_value(&mut self, key: usize, value: Cow<'a, str>) {
50        if let Level::OrderedSeq(ref mut map) = *self {
51            match map.entry(key) {
52                Entry::Occupied(mut o) => {
53                    // Throw away old result; map is now invalid anyway.
54                    let _ = o.insert(Level::Invalid("Multiple values for one key"));
55                }
56                Entry::Vacant(vm) => {
57                    // Map is empty, result is None
58                    let _ = vm.insert(Level::Flat(value));
59                }
60            }
61        } else if let Level::Uninitialised = *self {
62            // To reach here, self is either an OrderedSeq or nothing.
63            let mut map = BTreeMap::default();
64            let _ = map.insert(key, Level::Flat(value));
65            *self = Level::OrderedSeq(map);
66        } else {
67            *self = Level::Invalid(
68                "Attempted to insert seq value into \
69                 non-seq structure",
70            );
71        }
72    }
73
74    /// If this `Level` value is indeed a seq, then attempt to insert
75    /// `value` for key `key`.
76    /// Returns error if `self` is not a seq, or already has an entry for that
77    /// key.
78    fn insert_seq_value(&mut self, value: Cow<'a, str>) {
79        // Reached the end of the key string
80        if let Level::Sequence(ref mut seq) = *self {
81            seq.push(Level::Flat(value));
82        } else if let Level::Uninitialised = *self {
83            let seq = vec![Level::Flat(value)];
84            *self = Level::Sequence(seq);
85        } else {
86            *self = Level::Invalid(
87                "Attempted to insert seq value into \
88                 non-seq structure",
89            );
90        }
91    }
92}
93
94/// The `Parser` struct is a stateful querystring parser.
95/// It iterates over a slice of bytes, with a range to track the current
96/// start/end points of a value.
97/// The parser additionally supports peeking values, which allows them to be
98/// re-used (precisely once, unlike with `Peekable` from `std::iter`).
99pub struct Parser<'a> {
100    inner: &'a [u8],
101    iter: Iter<'a, u8>,
102    index: usize,
103    acc: (usize, usize),
104    peeked: Option<&'a u8>,
105    depth: usize, // stores the current depth, for use in bounded-depth parsing
106    strict: bool,
107    state: ParsingState,
108}
109
110/// The parsing logic varies slightly based on whether it is a key or a value
111/// (determines how encoded brackets are parse in non-strict mode)
112/// This tracks the state.
113enum ParsingState {
114    Init,
115    Key,
116    Value,
117}
118
119impl<'a> Iterator for Parser<'a> {
120    type Item = &'a u8;
121    #[inline]
122    fn next(&mut self) -> Option<Self::Item> {
123        let preparse_brackets = match self.state {
124            ParsingState::Value => false,
125            _ => !self.strict,
126        };
127        if preparse_brackets {
128            // in non-strict mode, we will happily decode any bracket
129            match self.peeked.take() {
130                Some(v) => Some(v),
131                None => {
132                    self.index += 1;
133                    self.acc.1 += 1;
134                    match self.iter.next() {
135                        Some(v) if v == &b'%' && self.iter.len() >= 2 => {
136                            match &self.iter.as_slice()[..2] {
137                                b"5B" => {
138                                    // skip the next two characters
139                                    let _ = self.iter.next();
140                                    let _ = self.iter.next();
141                                    self.index += 2;
142                                    Some(&b'[')
143                                }
144                                b"5D" => {
145                                    // skip the next two characters
146                                    let _ = self.iter.next();
147                                    let _ = self.iter.next();
148                                    self.index += 2;
149                                    Some(&b']')
150                                }
151                                _ => Some(v),
152                            }
153                        }
154                        Some(v) => Some(v),
155                        None => None,
156                    }
157                }
158            }
159        } else {
160            match self.peeked.take() {
161                Some(v) => Some(v),
162                None => {
163                    self.index += 1;
164                    self.acc.1 += 1;
165                    self.iter.next()
166                }
167            }
168        }
169    }
170}
171
172impl<'a> Parser<'a> {
173    #[inline]
174    fn peek(&mut self) -> Option<<Self as Iterator>::Item> {
175        if self.peeked.is_some() {
176            self.peeked
177        } else if let Some(x) = self.next() {
178            self.peeked = Some(x);
179            Some(x)
180        } else {
181            None
182        }
183    }
184}
185
186/// Replace b'+' with b' '
187/// Copied from [`form_urlencoded`](https://github.com/servo/rust-url/blob/380be29859adb859e861c2d765897c22ec878e01/src/form_urlencoded.rs#L125).
188fn replace_plus(input: &[u8]) -> Cow<[u8]> {
189    match input.iter().position(|&b| b == b'+') {
190        None => Cow::Borrowed(input),
191        Some(first_position) => {
192            let mut replaced = input.to_owned();
193            replaced[first_position] = b' ';
194            for byte in &mut replaced[first_position + 1..] {
195                if *byte == b'+' {
196                    *byte = b' ';
197                }
198            }
199
200            Cow::Owned(replaced)
201        }
202    }
203}
204
205impl<'a> Parser<'a> {
206    pub fn new(encoded: &'a [u8], depth: usize, strict: bool) -> Self {
207        Parser {
208            inner: encoded,
209            iter: encoded.iter(),
210            acc: (0, 0),
211            index: 0,
212            peeked: None,
213            depth,
214            strict,
215            state: ParsingState::Init,
216        }
217    }
218
219    /// Resets the accumulator range by setting `(start, end)` to `(end, end)`.
220    fn clear_acc(&mut self) {
221        self.acc = (self.index, self.index);
222    }
223
224    /// Extracts a string from the internal byte slice from the range tracked by
225    /// the parser.
226    /// Avoids allocations when neither percent encoded, nor `'+'` values are
227    /// present.
228    fn collect_str(&mut self) -> Result<Cow<'a, str>> {
229        let replaced = replace_plus(&self.inner[self.acc.0..self.acc.1 - 1]);
230        let ret: Result<Cow<'a, str>> =
231            match percent_encoding::percent_decode(&replaced).decode_utf8()? {
232                Cow::Borrowed(_) => {
233                    match replaced {
234                        Cow::Borrowed(_) => {
235                            // In this case, neither method made replacements, so we
236                            // reuse the original bytes
237                            let res = str::from_utf8(&self.inner[self.acc.0..self.acc.1 - 1])?;
238                            Ok(Cow::Borrowed(res))
239                        }
240                        Cow::Owned(owned) => {
241                            let res = String::from_utf8(owned)?;
242                            Ok(Cow::Owned(res))
243                        }
244                    }
245                }
246                Cow::Owned(owned) => Ok(Cow::Owned(owned)),
247            };
248        self.clear_acc();
249        ret.map_err(Error::from)
250    }
251
252    /// In some ways the main way to use a `Parser`, this runs the parsing step
253    /// and outputs a simple `Deserializer` over the parsed map.
254    pub(crate) fn as_deserializer(&mut self) -> Result<QsDeserializer<'a>> {
255        let map = BTreeMap::default();
256        let mut root = Level::Nested(map);
257
258        // Parses all top level nodes into the `root` map.
259        while self.parse(&mut root)? {}
260        let iter = match root {
261            Level::Nested(map) => map.into_iter(),
262            _ => BTreeMap::default().into_iter(),
263        };
264        Ok(QsDeserializer { iter, value: None })
265    }
266
267    /// This is the top level parsing function. It checks the first character to
268    /// decide the type of key (nested, sequence, etc.) and to call the
269    /// approprate parsing function.
270    ///
271    /// Returns `Ok(false)` when there is no more string to parse.
272    fn parse(&mut self, node: &mut Level<'a>) -> Result<bool> {
273        // First character determines parsing type
274        if self.depth == 0 {
275            // Hit the maximum depth level, so parse everything as a key
276            let key = self.parse_key(b'=', false)?;
277            self.parse_map_value(key, node)?;
278            return Ok(true);
279        }
280        match self.next() {
281            Some(x) => {
282                match *x {
283                    b'[' => {
284                        loop {
285                            self.clear_acc();
286                            // Only peek at the next value to determine the key type.
287                            match tu!(self.peek()) {
288                                // key is of the form "[..=", not really allowed.
289                                b'[' => {
290                                    // If we're in strict mode, error, otherwise just ignore it.
291                                    if self.strict {
292                                        return Err(super::Error::parse_err("found another opening bracket before the closed bracket", self.index));
293                                    } else {
294                                        let _ = self.next();
295                                    }
296                                }
297                                // key is simply "[]", so treat as a seq.
298                                b']' => {
299                                    // throw away the bracket
300                                    let _ = self.next();
301                                    self.clear_acc();
302                                    self.parse_seq_value(node)?;
303                                    return Ok(true);
304                                }
305                                // First character is an integer, attempt to parse it as an integer key
306                                b'0'..=b'9' => {
307                                    let key = self.parse_key(b']', true)?;
308                                    let key = key.parse().map_err(Error::from)?;
309                                    self.parse_ord_seq_value(key, node)?;
310                                    return Ok(true);
311                                }
312                                // Key is "[a..=" so parse up to the closing "]"
313                                0x20..=0x2f | 0x3a..=0x5a | 0x5c | 0x5e..=0x7e => {
314                                    let key = self.parse_key(b']', true)?;
315                                    self.parse_map_value(key, node)?;
316                                    return Ok(true);
317                                }
318                                c => {
319                                    if self.strict {
320                                        return Err(super::Error::parse_err(
321                                            &format!(
322                                                "unexpected character: {}",
323                                                String::from_utf8_lossy(&[c])
324                                            ),
325                                            self.index,
326                                        ));
327                                    } else {
328                                        let _ = self.next();
329                                    }
330                                }
331                            }
332                        }
333                    }
334                    // Skip empty byte sequences (e.g. leading `&`, trailing `&`, `&&`, ...)
335                    b'&' => {
336                        self.clear_acc();
337                        Ok(true)
338                    }
339                    // This means the key should be a root key
340                    // of the form "abc" or "abc[..=]"
341                    // We do actually allow integer keys here since they cannot
342                    // be confused with sequences
343                    _ => {
344                        let key = { self.parse_key(b'[', false)? };
345                        // Root keys are _always_ map values
346                        self.parse_map_value(key, node)?;
347                        Ok(true)
348                    }
349                }
350            }
351            // Ran out of characters to parse
352            None => Ok(false),
353        }
354    }
355
356    /// The iterator is currently pointing at a key, so parse up until the
357    /// `end_on` value. This will either be `'['` when the key is the root key,
358    /// or `']'` when the key is a nested key. In the former case, `'='` will
359    /// also finish the key parsing.
360    ///
361    /// The `consume` flag determines whether the end character should be
362    /// returned to the buffer to be peeked. This is important when
363    /// parsing keys like `abc[def][ghi]` since the `'['` character is
364    /// needed to for the next iteration of `parse`.
365    fn parse_key(&mut self, end_on: u8, consume: bool) -> Result<Cow<'a, str>> {
366        self.state = ParsingState::Key;
367        loop {
368            if let Some(x) = self.next() {
369                match *x {
370                    c if c == end_on => {
371                        // Add this character back to the buffer for peek.
372                        if !consume {
373                            self.peeked = Some(x);
374                        }
375                        return self.collect_str();
376                    }
377                    b'=' => {
378                        // Allow the '=' byte only when parsing keys within []
379                        if end_on != b']' {
380                            // Otherwise, we have reached the end of the key
381                            // Add this character back to the buffer for peek.
382                            self.peeked = Some(x);
383                            return self.collect_str();
384                        }
385
386                        // otherwise do nothing, so '=' is accumulated
387                    }
388                    b'&' => {
389                        // important to keep the `&` character so we know the
390                        // key-value is of the form `key&..=` (i.e. no value)
391                        self.peeked = Some(&b'&');
392                        return self.collect_str();
393                    }
394                    _ => {
395                        // for any other character
396                        // do nothing, keep adding to key
397                    }
398                }
399            } else {
400                // no more string to parse
401                return self.collect_str();
402            }
403        }
404    }
405
406    /// The `(key,value)` pair is determined to be corresponding to a map entry,
407    /// so parse it as such. The first part of the `key` has been parsed.
408    fn parse_map_value(&mut self, key: Cow<'a, str>, node: &mut Level<'a>) -> Result<()> {
409        self.state = ParsingState::Key;
410        let res = loop {
411            if let Some(x) = self.peek() {
412                match *x {
413                    b'=' => {
414                        // Key is finished, parse up until the '&' as the value
415                        self.clear_acc();
416                        self.state = ParsingState::Value;
417                        for _ in self.take_while(|b| *b != &b'&') {}
418                        let value: Cow<'a, str> = self.collect_str()?;
419                        node.insert_map_value(key, value);
420                        break Ok(());
421                    }
422                    b'&' => {
423                        // No value
424                        node.insert_map_value(key, Cow::Borrowed(""));
425                        break Ok(());
426                    }
427                    b'[' => {
428                        // The key continues to another level of nested.
429                        // Add a new unitialised level for this node and continue.
430                        if let Level::Uninitialised = *node {
431                            *node = Level::Nested(BTreeMap::default());
432                        }
433                        if let Level::Nested(ref mut map) = *node {
434                            // By parsing we drop down another level
435                            self.depth -= 1;
436                            // Either take the existing entry, or add a new
437                            // unitialised level
438                            // Use this new node to keep parsing
439                            let _ = self.parse(map.entry(key).or_insert(Level::Uninitialised))?;
440                            break Ok(());
441                        } else {
442                            // We expected to parse into a map here.
443                            break Err(super::Error::parse_err(
444                                &format!(
445                                    "tried to insert a \
446                                     new key into {:?}",
447                                    node
448                                ),
449                                self.index,
450                            ));
451                        }
452                    }
453                    c => {
454                        // Anything else is unexpected since we just finished
455                        // parsing a key.
456                        if self.strict {
457                            break Err(super::Error::parse_err(
458                                format!(
459                                    "Unexpected character: '{}' found when parsing",
460                                    String::from_utf8_lossy(&[c])
461                                ),
462                                self.index,
463                            ));
464                        } else {
465                            let _ = self.next();
466                        }
467                    }
468                }
469            } else {
470                // The string has ended, so the value is empty.
471                node.insert_map_value(key, Cow::Borrowed(""));
472                break Ok(());
473            }
474        };
475        // We have finished parsing this level, so go back up a level.
476        self.depth += 1;
477        res
478    }
479
480    /// The `(key,value)` pair is determined to be corresponding to an
481    /// ordered sequence.
482    /// Basically the same as the above, but we insert into `OrderedSeq`
483    /// Can potentially be merged?
484    fn parse_ord_seq_value(&mut self, key: usize, node: &mut Level<'a>) -> Result<()> {
485        self.state = ParsingState::Key;
486        let res = loop {
487            if let Some(x) = self.peek() {
488                match *x {
489                    b'=' => {
490                        // Key is finished, parse up until the '&' as the value
491                        self.clear_acc();
492                        self.state = ParsingState::Value;
493                        for _ in self.take_while(|b| *b != &b'&') {}
494                        let value = self.collect_str()?;
495                        // Reached the end of the key string
496                        node.insert_ord_seq_value(key, value);
497                        break Ok(());
498                    }
499                    b'&' => {
500                        // No value
501                        node.insert_ord_seq_value(key, Cow::Borrowed(""));
502                        break Ok(());
503                    }
504                    b'[' => {
505                        // The key continues to another level of nested.
506                        // Add a new unitialised level for this node and continue.
507                        if let Level::Uninitialised = *node {
508                            *node = Level::OrderedSeq(BTreeMap::default());
509                        }
510                        if let Level::OrderedSeq(ref mut map) = *node {
511                            // By parsing we drop down another level
512                            self.depth -= 1;
513                            let _ = self.parse(
514                                // Either take the existing entry, or add a new
515                                // unitialised level
516                                // Use this new node to keep parsing
517                                map.entry(key).or_insert(Level::Uninitialised),
518                            )?;
519                            break Ok(());
520                        } else {
521                            // We expected to parse into a seq here.
522                            break Err(super::Error::parse_err(
523                                &format!(
524                                    "tried to insert a \
525                                     new key into {:?}",
526                                    node
527                                ),
528                                self.index,
529                            ));
530                        }
531                    }
532                    c => {
533                        // Anything else is unexpected since we just finished
534                        // parsing a key.
535                        if self.strict {
536                            break Err(super::Error::parse_err(
537                                format!("Unexpected character: {:?} found when parsing", c),
538                                self.index,
539                            ));
540                        } else {
541                            let _ = self.next();
542                        }
543                    }
544                }
545            } else {
546                // The string has ended, so the value is empty.
547                node.insert_ord_seq_value(key, Cow::Borrowed(""));
548                break Ok(());
549            }
550        };
551        // We have finished parsing this level, so go back up a level.
552        self.depth += 1;
553        res
554    }
555
556    /// The `(key,value)` pair is determined to be corresponding to an
557    /// unordered sequence.
558    /// This must be the final level of nesting, so assume we have a value
559    fn parse_seq_value(&mut self, node: &mut Level<'a>) -> Result<()> {
560        self.state = ParsingState::Key;
561        let res = match self.peek() {
562            Some(x) => {
563                match *x {
564                    b'=' => {
565                        // Key is finished, parse up until the '&' as the value
566                        self.clear_acc();
567                        self.state = ParsingState::Value;
568                        for _ in self.take_while(|b| *b != &b'&') {}
569                        let value = self.collect_str()?;
570                        node.insert_seq_value(value);
571                        Ok(())
572                    }
573                    b'&' => {
574                        // key value is empty
575                        node.insert_seq_value(Cow::Borrowed(""));
576                        Ok(())
577                    }
578                    _ => Err(super::Error::parse_err(
579                        "non-indexed sequence of \
580                         structs not supported",
581                        self.index,
582                    )),
583                }
584            }
585            None => {
586                // The string has ended, so the value is empty.
587                node.insert_seq_value(Cow::Borrowed(""));
588                Ok(())
589            }
590        };
591        // We have finished parsing this level, so go back up a level.
592        self.depth += 1;
593        res
594    }
595}