urlencoding/
dec.rs

1use std::borrow::Cow;
2use std::string::FromUtf8Error;
3
4#[inline]
5pub(crate) fn from_hex_digit(digit: u8) -> Option<u8> {
6    match digit {
7        b'0'..=b'9' => Some(digit - b'0'),
8        b'A'..=b'F' => Some(digit - b'A' + 10),
9        b'a'..=b'f' => Some(digit - b'a' + 10),
10        _ => None,
11    }
12}
13
14/// Decode percent-encoded string assuming UTF-8 encoding.
15///
16/// If you need a `String`, call `.into_owned()` (not `.to_owned()`).
17///
18/// Unencoded `+` is preserved literally, and _not_ changed to a space.
19pub fn decode(data: &str) -> Result<Cow<str>, FromUtf8Error> {
20    match decode_binary(data.as_bytes()) {
21        Cow::Borrowed(_) => Ok(Cow::Borrowed(data)),
22        Cow::Owned(s) => Ok(Cow::Owned(String::from_utf8(s)?)),
23    }
24}
25
26/// Decode percent-encoded string as binary data, in any encoding.
27///
28/// Unencoded `+` is preserved literally, and _not_ changed to a space.
29pub fn decode_binary(data: &[u8]) -> Cow<[u8]> {
30    let offset = data.iter().take_while(|&&c| c != b'%').count();
31    if offset >= data.len() {
32        return Cow::Borrowed(data)
33    }
34
35    let mut decoded: Vec<u8> = Vec::with_capacity(data.len());
36    let mut out = NeverRealloc(&mut decoded);
37
38    let (ascii, mut data) = data.split_at(offset);
39    out.extend_from_slice(ascii);
40
41    loop {
42        let mut parts = data.splitn(2, |&c| c == b'%');
43        // first the decoded non-% part
44        let non_escaped_part = parts.next().unwrap();
45        let rest = parts.next();
46        if rest.is_none() && out.0.is_empty() {
47            // if empty there were no '%' in the string
48            return data.into();
49        }
50        out.extend_from_slice(non_escaped_part);
51
52        // then decode one %xx
53        match rest {
54            Some(rest) => match rest.get(0..2) {
55                Some(&[first, second]) => match from_hex_digit(first) {
56                    Some(first_val) => match from_hex_digit(second) {
57                        Some(second_val) => {
58                            out.push((first_val << 4) | second_val);
59                            data = &rest[2..];
60                        },
61                        None => {
62                            out.extend_from_slice(&[b'%', first]);
63                            data = &rest[1..];
64                        },
65                    },
66                    None => {
67                        out.push(b'%');
68                        data = rest;
69                    },
70                },
71                _ => {
72                    // too short
73                    out.push(b'%');
74                    out.extend_from_slice(rest);
75                    break;
76                },
77            },
78            None => break,
79        }
80    }
81    Cow::Owned(decoded)
82}
83
84
85struct NeverRealloc<'a, T>(pub &'a mut Vec<T>);
86
87impl<T> NeverRealloc<'_, T> {
88    #[inline]
89    pub fn push(&mut self, val: T) {
90        // these branches only exist to remove redundant reallocation code
91        // (the capacity is always sufficient)
92        if self.0.len() != self.0.capacity() {
93            self.0.push(val);
94        }
95    }
96    #[inline]
97    pub fn extend_from_slice(&mut self, val: &[T]) where T: Clone {
98        if self.0.capacity() - self.0.len() >= val.len() {
99            self.0.extend_from_slice(val);
100        }
101    }
102}
103
104#[test]
105fn dec_borrows() {
106    assert!(matches!(decode("hello"), Ok(Cow::Borrowed("hello"))));
107    assert!(matches!(decode("hello%20"), Ok(Cow::Owned(s)) if s == "hello "));
108    assert!(matches!(decode("%20hello"), Ok(Cow::Owned(s)) if s == " hello"));
109}