jsonptr/
token.rs

1use core::str::Split;
2
3use crate::index::{Index, ParseIndexError};
4use alloc::{
5    borrow::Cow,
6    fmt,
7    string::{String, ToString},
8    vec::Vec,
9};
10
11const ENCODED_TILDE: &[u8] = b"~0";
12const ENCODED_SLASH: &[u8] = b"~1";
13
14const ENC_PREFIX: u8 = b'~';
15const TILDE_ENC: u8 = b'0';
16const SLASH_ENC: u8 = b'1';
17
18/*
19░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
20╔══════════════════════════════════════════════════════════════════════════════╗
21║                                                                              ║
22║                                    Token                                     ║
23║                                   ¯¯¯¯¯¯¯                                    ║
24╚══════════════════════════════════════════════════════════════════════════════╝
25░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
26*/
27
28/// A `Token` is a segment of a JSON [`Pointer`](crate::Token), preceded by `'/'` (`%x2F`).
29///
30/// `Token`s can represent a key in a JSON object or an index in an array.
31///
32/// - Indexes should not contain leading zeros.
33/// - When dealing with arrays or path expansion for assignment, `"-"` represent
34///   the next, non-existent index in a JSON array.
35#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
36pub struct Token<'a> {
37    inner: Cow<'a, str>,
38}
39
40impl<'a> Token<'a> {
41    /// Constructs a `Token` from an RFC 6901 encoded string.
42    ///
43    /// This is like [`Self::from_encoded`], except that no validation is
44    /// performed on the input string.
45    pub(crate) fn from_encoded_unchecked(inner: impl Into<Cow<'a, str>>) -> Self {
46        Self {
47            inner: inner.into(),
48        }
49    }
50
51    /// Constructs a `Token` from an RFC 6901 encoded string.
52    ///
53    /// To be valid, the string must not contain any `/` characters, and any `~`
54    /// characters must be followed by either `0` or `1`.
55    ///
56    /// This function does not allocate.
57    ///
58    /// # Examples
59    ///
60    /// ```
61    /// # use jsonptr::Token;
62    /// assert_eq!(Token::from_encoded("~1foo~1~0bar").unwrap().decoded(), "/foo/~bar");
63    /// let err = Token::from_encoded("foo/oops~bar").unwrap_err();
64    /// assert_eq!(err.offset, 3);
65    /// ```
66    ///
67    /// ## Errors
68    /// Returns `InvalidEncodingError` if the input string is not a valid RFC
69    /// 6901 (`~` must be followed by `0` or `1`)
70    pub fn from_encoded(s: &'a str) -> Result<Self, InvalidEncodingError> {
71        let mut escaped = false;
72        for (offset, b) in s.bytes().enumerate() {
73            match b {
74                b'/' => return Err(InvalidEncodingError { offset }),
75                ENC_PREFIX => {
76                    escaped = true;
77                }
78                TILDE_ENC | SLASH_ENC if escaped => {
79                    escaped = false;
80                }
81                _ => {
82                    if escaped {
83                        return Err(InvalidEncodingError { offset });
84                    }
85                }
86            }
87        }
88        if escaped {
89            return Err(InvalidEncodingError { offset: s.len() });
90        }
91        Ok(Self { inner: s.into() })
92    }
93
94    /// Constructs a `Token` from an arbitrary string.
95    ///
96    /// If the string contains a `/` or a `~`, then it will be assumed not
97    /// encoded, in which case this function will encode it, allocating a new
98    /// string.
99    ///
100    /// If the string is already encoded per RFC 6901, use
101    /// [`Self::from_encoded`] instead, otherwise it will end up double-encoded.
102    ///
103    /// # Examples
104    ///
105    /// ```
106    /// # use jsonptr::Token;
107    /// assert_eq!(Token::new("/foo/~bar").encoded(), "~1foo~1~0bar");
108    /// ```
109    pub fn new(s: impl Into<Cow<'a, str>>) -> Self {
110        let s = s.into();
111
112        if let Some(i) = s.bytes().position(|b| b == b'/' || b == b'~') {
113            let input = s.as_bytes();
114            // we could take advantage of [`Cow::into_owned`] here, but it would
115            // mean copying over the entire string, only to overwrite a portion
116            // of it... so instead we explicitly allocate a new buffer and copy
117            // only the prefix until the first encoded character
118            // NOTE: the output is at least as large as the input + 1, so we
119            // allocate that much capacity ahead of time
120            let mut bytes = Vec::with_capacity(input.len() + 1);
121            bytes.extend_from_slice(&input[..i]);
122            for &b in &input[i..] {
123                match b {
124                    b'/' => {
125                        bytes.extend_from_slice(ENCODED_SLASH);
126                    }
127                    b'~' => {
128                        bytes.extend_from_slice(ENCODED_TILDE);
129                    }
130                    other => {
131                        bytes.push(other);
132                    }
133                }
134            }
135            Self {
136                // SAFETY: we started from a valid UTF-8 sequence of bytes,
137                // and only replaced some ASCII characters with other two ASCII
138                // characters, so the output is guaranteed valid UTF-8.
139                inner: Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) }),
140            }
141        } else {
142            Self { inner: s }
143        }
144    }
145
146    /// Converts into an owned copy of this token.
147    ///
148    /// If the token is not already owned, this will clone the referenced string
149    /// slice.
150    pub fn into_owned(self) -> Token<'static> {
151        Token {
152            inner: Cow::Owned(self.inner.into_owned()),
153        }
154    }
155
156    /// Extracts an owned copy of this token.
157    ///
158    /// If the token is not already owned, this will clone the referenced string
159    /// slice.
160    ///
161    /// This method is like [`Self::into_owned`], except it doesn't take
162    /// ownership of the original `Token`.
163    pub fn to_owned(&self) -> Token<'static> {
164        Token {
165            inner: Cow::Owned(self.inner.clone().into_owned()),
166        }
167    }
168
169    /// Returns the encoded string representation of the `Token`.
170    ///
171    /// # Examples
172    ///
173    /// ```
174    /// # use jsonptr::Token;
175    /// assert_eq!(Token::new("~bar").encoded(), "~0bar");
176    /// ```
177    pub fn encoded(&self) -> &str {
178        &self.inner
179    }
180
181    /// Returns the decoded string representation of the `Token`.
182    ///
183    /// # Examples
184    ///
185    /// ```
186    /// # use jsonptr::Token;
187    /// assert_eq!(Token::new("~bar").decoded(), "~bar");
188    /// ```
189    pub fn decoded(&self) -> Cow<'_, str> {
190        if let Some(i) = self.inner.bytes().position(|b| b == ENC_PREFIX) {
191            let input = self.inner.as_bytes();
192            // we could take advantage of [`Cow::into_owned`] here, but it would
193            // mean copying over the entire string, only to overwrite a portion
194            // of it... so instead we explicitly allocate a new buffer and copy
195            // only the prefix until the first encoded character
196            // NOTE: the output is at least as large as the input + 1, so we
197            // allocate that much capacity ahead of time
198            let mut bytes = Vec::with_capacity(input.len() + 1);
199            bytes.extend_from_slice(&input[..i]);
200            // we start from the first escaped character
201            let mut escaped = true;
202            for &b in &input[i + 1..] {
203                match b {
204                    ENC_PREFIX => {
205                        escaped = true;
206                    }
207                    TILDE_ENC if escaped => {
208                        bytes.push(b'~');
209                        escaped = false;
210                    }
211                    SLASH_ENC if escaped => {
212                        bytes.push(b'/');
213                        escaped = false;
214                    }
215                    other => {
216                        bytes.push(other);
217                    }
218                }
219            }
220            // SAFETY: we start from a valid String, and only write valid UTF-8
221            // byte sequences into it.
222            Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
223        } else {
224            // if there are no encoded characters, we don't need to allocate!
225            self.inner.clone()
226        }
227    }
228
229    /// Attempts to parse the given `Token` as an array index.
230    ///
231    /// Per [RFC 6901](https://datatracker.ietf.org/doc/html/rfc6901#section-4),
232    /// the acceptable values are non-negative integers and the `-` character,
233    /// which stands for the next, non-existent member after the last array
234    /// element.
235    ///
236    /// ## Examples
237    ///
238    /// ```
239    /// # use jsonptr::{index::Index, Token};
240    /// assert_eq!(Token::new("-").to_index(), Ok(Index::Next));
241    /// assert_eq!(Token::new("0").to_index(), Ok(Index::Num(0)));
242    /// assert_eq!(Token::new("2").to_index(), Ok(Index::Num(2)));
243    /// assert!(Token::new("a").to_index().is_err());
244    /// assert!(Token::new("-1").to_index().is_err());
245    /// ```
246    /// ## Errors
247    /// Returns [`ParseIndexError`] if the token is not a valid array index.
248    pub fn to_index(&self) -> Result<Index, ParseIndexError> {
249        self.try_into()
250    }
251}
252
253macro_rules! impl_from_num {
254    ($($ty:ty),*) => {
255        $(
256            impl From<$ty> for Token<'static> {
257                fn from(v: $ty) -> Self {
258                    Token::from_encoded_unchecked(v.to_string())
259                }
260            }
261        )*
262    };
263}
264impl_from_num!(u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize);
265
266impl<'a> From<&'a str> for Token<'a> {
267    fn from(value: &'a str) -> Self {
268        Token::new(value)
269    }
270}
271
272impl<'a> From<&'a String> for Token<'a> {
273    fn from(value: &'a String) -> Self {
274        Token::new(value)
275    }
276}
277
278impl From<String> for Token<'static> {
279    fn from(value: String) -> Self {
280        Token::new(value)
281    }
282}
283
284impl<'a> From<&Token<'a>> for Token<'a> {
285    fn from(value: &Token<'a>) -> Self {
286        value.clone()
287    }
288}
289
290impl alloc::fmt::Display for Token<'_> {
291    fn fmt(&self, f: &mut alloc::fmt::Formatter<'_>) -> alloc::fmt::Result {
292        write!(f, "{}", self.decoded())
293    }
294}
295
296/*
297░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
298╔══════════════════════════════════════════════════════════════════════════════╗
299║                                                                              ║
300║                                    Tokens                                    ║
301║                                   ¯¯¯¯¯¯¯¯                                   ║
302╚══════════════════════════════════════════════════════════════════════════════╝
303░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
304*/
305
306/// An iterator over the [`Token`]s of a [`Pointer`](crate::Pointer).
307#[derive(Debug)]
308pub struct Tokens<'a> {
309    inner: Split<'a, char>,
310}
311
312impl<'a> Iterator for Tokens<'a> {
313    type Item = Token<'a>;
314    fn next(&mut self) -> Option<Self::Item> {
315        self.inner.next().map(Token::from_encoded_unchecked)
316    }
317}
318impl<'t> Tokens<'t> {
319    pub(crate) fn new(inner: Split<'t, char>) -> Self {
320        Self { inner }
321    }
322}
323
324/*
325░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
326╔══════════════════════════════════════════════════════════════════════════════╗
327║                                                                              ║
328║                             InvalidEncodingError                             ║
329║                            ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯                            ║
330╚══════════════════════════════════════════════════════════════════════════════╝
331░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
332*/
333
334/// A token within a json pointer contained invalid encoding (`~` not followed
335/// by `0` or `1`).
336///
337#[derive(Debug, PartialEq, Eq)]
338pub struct InvalidEncodingError {
339    /// offset of the erroneous `~` from within the `Token`
340    pub offset: usize,
341}
342
343impl fmt::Display for InvalidEncodingError {
344    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
345        write!(
346            f,
347            "json pointer is malformed due to invalid encoding ('~' not followed by '0' or '1')"
348        )
349    }
350}
351
352#[cfg(feature = "std")]
353impl std::error::Error for InvalidEncodingError {}
354
355/*
356░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
357╔══════════════════════════════════════════════════════════════════════════════╗
358║                                                                              ║
359║                                    Tests                                     ║
360║                                   ¯¯¯¯¯¯¯                                    ║
361╚══════════════════════════════════════════════════════════════════════════════╝
362░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
363*/
364
365#[cfg(test)]
366mod tests {
367    use crate::{assign::AssignError, index::OutOfBoundsError, Pointer};
368
369    use super::*;
370    use quickcheck_macros::quickcheck;
371
372    #[test]
373    fn from() {
374        assert_eq!(Token::from("/").encoded(), "~1");
375        assert_eq!(Token::from("~/").encoded(), "~0~1");
376        assert_eq!(Token::from(34u32).encoded(), "34");
377        assert_eq!(Token::from(34u64).encoded(), "34");
378        assert_eq!(Token::from(String::from("foo")).encoded(), "foo");
379        assert_eq!(Token::from(&Token::new("foo")).encoded(), "foo");
380    }
381
382    #[test]
383    fn to_index() {
384        assert_eq!(Token::new("-").to_index(), Ok(Index::Next));
385        assert_eq!(Token::new("0").to_index(), Ok(Index::Num(0)));
386        assert_eq!(Token::new("2").to_index(), Ok(Index::Num(2)));
387        assert!(Token::new("a").to_index().is_err());
388        assert!(Token::new("-1").to_index().is_err());
389    }
390
391    #[test]
392    fn new() {
393        assert_eq!(Token::new("~1").encoded(), "~01");
394        assert_eq!(Token::new("a/b").encoded(), "a~1b");
395    }
396
397    #[test]
398    fn assign_error_display() {
399        let err = AssignError::FailedToParseIndex {
400            offset: 3,
401            source: ParseIndexError::InvalidInteger("a".parse::<usize>().unwrap_err()),
402        };
403        assert_eq!(
404            err.to_string(),
405            "assignment failed due to an invalid index at offset 3"
406        );
407
408        let err = AssignError::OutOfBounds {
409            offset: 3,
410            source: OutOfBoundsError {
411                index: 3,
412                length: 2,
413            },
414        };
415
416        assert_eq!(
417            err.to_string(),
418            "assignment failed due to index at offset 3 being out of bounds"
419        );
420    }
421
422    #[test]
423    #[cfg(feature = "std")]
424    fn assign_error_source() {
425        use std::error::Error;
426        let err = AssignError::FailedToParseIndex {
427            offset: 3,
428            source: ParseIndexError::InvalidInteger("a".parse::<usize>().unwrap_err()),
429        };
430        assert!(err.source().is_some());
431        assert!(err.source().unwrap().is::<ParseIndexError>());
432
433        let err = AssignError::OutOfBounds {
434            offset: 3,
435            source: OutOfBoundsError {
436                index: 3,
437                length: 2,
438            },
439        };
440
441        assert!(err.source().unwrap().is::<OutOfBoundsError>());
442    }
443
444    #[test]
445    fn from_encoded() {
446        assert_eq!(Token::from_encoded("~1").unwrap().encoded(), "~1");
447        assert_eq!(Token::from_encoded("~0~1").unwrap().encoded(), "~0~1");
448        let t = Token::from_encoded("a~1b").unwrap();
449        assert_eq!(t.decoded(), "a/b");
450        assert!(Token::from_encoded("a/b").is_err());
451        assert!(Token::from_encoded("a~a").is_err());
452    }
453
454    #[test]
455    fn into_owned() {
456        let token = Token::from_encoded("foo~0").unwrap().into_owned();
457        assert_eq!(token.encoded(), "foo~0");
458    }
459
460    #[quickcheck]
461    fn encode_decode(s: String) -> bool {
462        let token = Token::new(s);
463        let decoded = Token::from_encoded(token.encoded()).unwrap();
464        token == decoded
465    }
466
467    #[test]
468    fn invalid_encoding_error_display() {
469        assert_eq!(
470            Token::from_encoded("~").unwrap_err().to_string(),
471            "json pointer is malformed due to invalid encoding ('~' not followed by '0' or '1')"
472        );
473    }
474
475    #[test]
476    fn tokens() {
477        let pointer = Pointer::from_static("/a/b/c");
478        let tokens: Vec<Token> = pointer.tokens().collect();
479        assert_eq!(
480            tokens,
481            vec![
482                Token::from_encoded_unchecked("a"),
483                Token::from_encoded_unchecked("b"),
484                Token::from_encoded_unchecked("c")
485            ]
486        );
487    }
488}