jsonptr/
token.rs

1use core::str::Split;
2
3use crate::index::{Index, ParseIndexError};
4use alloc::{
5    borrow::Cow,
6    fmt,
7    string::{String, ToString},
8    vec::Vec,
9};
10
11const ENCODED_TILDE: &[u8] = b"~0";
12const ENCODED_SLASH: &[u8] = b"~1";
13
14const ENC_PREFIX: u8 = b'~';
15const TILDE_ENC: u8 = b'0';
16const SLASH_ENC: u8 = b'1';
17
18/*
19░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
20╔══════════════════════════════════════════════════════════════════════════════╗
21║                                                                              ║
22║                                    Token                                     ║
23║                                   ¯¯¯¯¯¯¯                                    ║
24╚══════════════════════════════════════════════════════════════════════════════╝
25░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
26*/
27
28/// A `Token` is a segment of a JSON [`Pointer`](crate::Token), preceded by `'/'` (`%x2F`).
29///
30/// `Token`s can represent a key in a JSON object or an index in an array.
31///
32/// - Indexes should not contain leading zeros.
33/// - When dealing with arrays or path expansion for assignment, `"-"` represent
34///   the next, non-existent index in a JSON array.
35#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
36pub struct Token<'a> {
37    inner: Cow<'a, str>,
38}
39
40impl<'a> Token<'a> {
41    /// Constructs a `Token` from an RFC 6901 encoded string.
42    ///
43    /// This is like [`Self::from_encoded`], except that no validation is
44    /// performed on the input string.
45    ///
46    /// ## Safety
47    /// Input string must be RFC 6901 encoded.
48    pub(crate) unsafe fn from_encoded_unchecked(inner: impl Into<Cow<'a, str>>) -> Self {
49        Self {
50            inner: inner.into(),
51        }
52    }
53
54    /// Constructs a `Token` from an RFC 6901 encoded string.
55    ///
56    /// To be valid, the string must not contain any `/` characters, and any `~`
57    /// characters must be followed by either `0` or `1`.
58    ///
59    /// This function does not allocate.
60    ///
61    /// # Examples
62    ///
63    /// ```
64    /// # use jsonptr::Token;
65    /// assert_eq!(Token::from_encoded("~1foo~1~0bar").unwrap().decoded(), "/foo/~bar");
66    /// let err = Token::from_encoded("foo/oops~bar").unwrap_err();
67    /// assert_eq!(err.offset, 3);
68    /// ```
69    ///
70    /// ## Errors
71    /// Returns `InvalidEncodingError` if the input string is not a valid RFC
72    /// 6901 (`~` must be followed by `0` or `1`)
73    pub fn from_encoded(s: &'a str) -> Result<Self, EncodingError> {
74        let mut escaped = false;
75        for (offset, b) in s.bytes().enumerate() {
76            match b {
77                b'/' => {
78                    return Err(EncodingError {
79                        offset,
80                        source: InvalidEncoding::Slash,
81                    })
82                }
83                ENC_PREFIX => {
84                    escaped = true;
85                }
86                TILDE_ENC | SLASH_ENC if escaped => {
87                    escaped = false;
88                }
89                _ => {
90                    if escaped {
91                        return Err(EncodingError {
92                            offset,
93                            source: InvalidEncoding::Tilde,
94                        });
95                    }
96                }
97            }
98        }
99        if escaped {
100            return Err(EncodingError {
101                offset: s.len(),
102                source: InvalidEncoding::Slash,
103            });
104        }
105        Ok(Self { inner: s.into() })
106    }
107
108    /// Constructs a `Token` from an arbitrary string.
109    ///
110    /// If the string contains a `/` or a `~`, then it will be assumed not
111    /// encoded, in which case this function will encode it, allocating a new
112    /// string.
113    ///
114    /// If the string is already encoded per RFC 6901, use
115    /// [`Self::from_encoded`] instead, otherwise it will end up double-encoded.
116    ///
117    /// # Examples
118    ///
119    /// ```
120    /// # use jsonptr::Token;
121    /// assert_eq!(Token::new("/foo/~bar").encoded(), "~1foo~1~0bar");
122    /// ```
123    pub fn new(s: impl Into<Cow<'a, str>>) -> Self {
124        let s = s.into();
125
126        if let Some(i) = s.bytes().position(|b| b == b'/' || b == b'~') {
127            let input = s.as_bytes();
128            // we could take advantage of [`Cow::into_owned`] here, but it would
129            // mean copying over the entire string, only to overwrite a portion
130            // of it... so instead we explicitly allocate a new buffer and copy
131            // only the prefix until the first encoded character
132            // NOTE: the output is at least as large as the input + 1, so we
133            // allocate that much capacity ahead of time
134            let mut bytes = Vec::with_capacity(input.len() + 1);
135            bytes.extend_from_slice(&input[..i]);
136            for &b in &input[i..] {
137                match b {
138                    b'/' => {
139                        bytes.extend_from_slice(ENCODED_SLASH);
140                    }
141                    b'~' => {
142                        bytes.extend_from_slice(ENCODED_TILDE);
143                    }
144                    other => {
145                        bytes.push(other);
146                    }
147                }
148            }
149            Self {
150                // SAFETY: we started from a valid UTF-8 sequence of bytes,
151                // and only replaced some ASCII characters with other two ASCII
152                // characters, so the output is guaranteed valid UTF-8.
153                inner: Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) }),
154            }
155        } else {
156            Self { inner: s }
157        }
158    }
159
160    /// Converts into an owned copy of this token.
161    ///
162    /// If the token is not already owned, this will clone the referenced string
163    /// slice.
164    pub fn into_owned(self) -> Token<'static> {
165        Token {
166            inner: Cow::Owned(self.inner.into_owned()),
167        }
168    }
169
170    /// Extracts an owned copy of this token.
171    ///
172    /// If the token is not already owned, this will clone the referenced string
173    /// slice.
174    ///
175    /// This method is like [`Self::into_owned`], except it doesn't take
176    /// ownership of the original `Token`.
177    pub fn to_owned(&self) -> Token<'static> {
178        Token {
179            inner: Cow::Owned(self.inner.clone().into_owned()),
180        }
181    }
182
183    /// Returns the encoded string representation of the `Token`.
184    ///
185    /// # Examples
186    ///
187    /// ```
188    /// # use jsonptr::Token;
189    /// assert_eq!(Token::new("~bar").encoded(), "~0bar");
190    /// ```
191    pub fn encoded(&self) -> &str {
192        &self.inner
193    }
194
195    /// Returns the decoded string representation of the `Token`.
196    ///
197    /// # Examples
198    ///
199    /// ```
200    /// # use jsonptr::Token;
201    /// assert_eq!(Token::new("~bar").decoded(), "~bar");
202    /// ```
203    pub fn decoded(&self) -> Cow<'_, str> {
204        if let Some(i) = self.inner.bytes().position(|b| b == ENC_PREFIX) {
205            let input = self.inner.as_bytes();
206            // we could take advantage of [`Cow::into_owned`] here, but it would
207            // mean copying over the entire string, only to overwrite a portion
208            // of it... so instead we explicitly allocate a new buffer and copy
209            // only the prefix until the first encoded character
210            // NOTE: the output is at least as large as the input + 1, so we
211            // allocate that much capacity ahead of time
212            let mut bytes = Vec::with_capacity(input.len() + 1);
213            bytes.extend_from_slice(&input[..i]);
214            // we start from the first escaped character
215            let mut escaped = true;
216            for &b in &input[i + 1..] {
217                match b {
218                    ENC_PREFIX => {
219                        escaped = true;
220                    }
221                    TILDE_ENC if escaped => {
222                        bytes.push(b'~');
223                        escaped = false;
224                    }
225                    SLASH_ENC if escaped => {
226                        bytes.push(b'/');
227                        escaped = false;
228                    }
229                    other => {
230                        bytes.push(other);
231                    }
232                }
233            }
234            // SAFETY: we start from a valid String, and only write valid UTF-8
235            // byte sequences into it.
236            Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
237        } else {
238            // if there are no encoded characters, we don't need to allocate!
239            self.inner.clone()
240        }
241    }
242
243    /// Attempts to parse the given `Token` as an array index.
244    ///
245    /// Per [RFC 6901](https://datatracker.ietf.org/doc/html/rfc6901#section-4),
246    /// the acceptable values are non-negative integers and the `-` character,
247    /// which stands for the next, non-existent member after the last array
248    /// element.
249    ///
250    /// ## Examples
251    ///
252    /// ```
253    /// # use jsonptr::{index::Index, Token};
254    /// assert_eq!(Token::new("-").to_index(), Ok(Index::Next));
255    /// assert_eq!(Token::new("0").to_index(), Ok(Index::Num(0)));
256    /// assert_eq!(Token::new("2").to_index(), Ok(Index::Num(2)));
257    /// assert!(Token::new("a").to_index().is_err());
258    /// assert!(Token::new("-1").to_index().is_err());
259    /// ```
260    /// ## Errors
261    /// Returns [`ParseIndexError`] if the token is not a valid array index.
262    pub fn to_index(&self) -> Result<Index, ParseIndexError> {
263        self.try_into()
264    }
265
266    /// Returns if the `Token` is `-`, which stands for the next array index.
267    ///
268    /// See also [`Self::to_index`].
269    pub fn is_next(&self) -> bool {
270        matches!(self.to_index(), Ok(Index::Next))
271    }
272}
273
274macro_rules! impl_from_num {
275    ($($ty:ty),*) => {
276        $(
277            impl From<$ty> for Token<'static> {
278                fn from(v: $ty) -> Self {
279                    // SAFETY: only used for integer types, which are always valid
280                    unsafe { Token::from_encoded_unchecked(v.to_string()) }
281                }
282            }
283        )*
284    };
285}
286impl_from_num!(u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize);
287
288impl<'a> From<&'a str> for Token<'a> {
289    fn from(value: &'a str) -> Self {
290        Token::new(value)
291    }
292}
293
294impl<'a> From<&'a String> for Token<'a> {
295    fn from(value: &'a String) -> Self {
296        Token::new(value)
297    }
298}
299
300impl From<String> for Token<'static> {
301    fn from(value: String) -> Self {
302        Token::new(value)
303    }
304}
305
306impl<'a> From<&Token<'a>> for Token<'a> {
307    fn from(value: &Token<'a>) -> Self {
308        value.clone()
309    }
310}
311
312impl alloc::fmt::Display for Token<'_> {
313    fn fmt(&self, f: &mut alloc::fmt::Formatter<'_>) -> alloc::fmt::Result {
314        write!(f, "{}", self.decoded())
315    }
316}
317
318/*
319░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
320╔══════════════════════════════════════════════════════════════════════════════╗
321║                                                                              ║
322║                                    Tokens                                    ║
323║                                   ¯¯¯¯¯¯¯¯                                   ║
324╚══════════════════════════════════════════════════════════════════════════════╝
325░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
326*/
327
328/// An iterator over the [`Token`]s of a [`Pointer`](crate::Pointer).
329#[derive(Debug)]
330pub struct Tokens<'a> {
331    inner: Split<'a, char>,
332}
333
334impl<'a> Iterator for Tokens<'a> {
335    type Item = Token<'a>;
336    fn next(&mut self) -> Option<Self::Item> {
337        self.inner
338            .next()
339            // SAFETY: source pointer is encoded
340            .map(|s| unsafe { Token::from_encoded_unchecked(s) })
341    }
342}
343impl<'t> Tokens<'t> {
344    pub(crate) fn new(inner: Split<'t, char>) -> Self {
345        Self { inner }
346    }
347}
348
349/*
350░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
351╔══════════════════════════════════════════════════════════════════════════════╗
352║                                                                              ║
353║                             InvalidEncodingError                             ║
354║                            ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯                            ║
355╚══════════════════════════════════════════════════════════════════════════════╝
356░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
357*/
358
359#[deprecated(since = "0.7.0", note = "renamed to `EncodingError`")]
360/// Deprecated alias for [`EncodingError`].
361pub type InvalidEncodingError = EncodingError;
362
363/*
364░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
365╔══════════════════════════════════════════════════════════════════════════════╗
366║                                                                              ║
367║                                EncodingError                                 ║
368║                               ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯                                ║
369╚══════════════════════════════════════════════════════════════════════════════╝
370░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
371*/
372
373/// A token within a json pointer contained invalid encoding (`~` not followed
374/// by `0` or `1`).
375///
376#[derive(Debug, PartialEq, Eq)]
377pub struct EncodingError {
378    /// offset of the erroneous `~` from within the `Token`
379    pub offset: usize,
380    /// the specific encoding error
381    pub source: InvalidEncoding,
382}
383
384#[cfg(feature = "std")]
385impl std::error::Error for EncodingError {
386    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
387        Some(&self.source)
388    }
389}
390
391impl fmt::Display for EncodingError {
392    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
393        write!(
394            f,
395            "token contains invalid encoding at offset {}",
396            self.offset
397        )
398    }
399}
400
401/*
402░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
403╔══════════════════════════════════════════════════════════════════════════════╗
404║                                                                              ║
405║                               InvalidEncoding                                ║
406║                              ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯                               ║
407╚══════════════════════════════════════════════════════════════════════════════╝
408░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
409*/
410
411/// Represents the specific type of invalid encoding error.
412#[derive(Debug, PartialEq, Eq, Clone, Copy)]
413pub enum InvalidEncoding {
414    /// `~` not followed by `0` or `1`
415    Tilde,
416    /// non-encoded `/` found in token
417    Slash,
418}
419
420impl fmt::Display for InvalidEncoding {
421    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
422        match self {
423            InvalidEncoding::Tilde => write!(f, "tilde (~) not followed by 0 or 1"),
424            InvalidEncoding::Slash => write!(f, "slash (/) found in token"),
425        }
426    }
427}
428#[cfg(feature = "std")]
429impl std::error::Error for InvalidEncoding {}
430
431/*
432░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
433╔══════════════════════════════════════════════════════════════════════════════╗
434║                                                                              ║
435║                                    Tests                                     ║
436║                                   ¯¯¯¯¯¯¯                                    ║
437╚══════════════════════════════════════════════════════════════════════════════╝
438░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
439*/
440
441#[cfg(test)]
442mod tests {
443    use crate::Pointer;
444
445    use super::*;
446    use quickcheck_macros::quickcheck;
447
448    #[test]
449    fn from() {
450        assert_eq!(Token::from("/").encoded(), "~1");
451        assert_eq!(Token::from("~/").encoded(), "~0~1");
452        assert_eq!(Token::from(34u32).encoded(), "34");
453        assert_eq!(Token::from(34u64).encoded(), "34");
454        assert_eq!(Token::from(String::from("foo")).encoded(), "foo");
455        assert_eq!(Token::from(&Token::new("foo")).encoded(), "foo");
456    }
457
458    #[test]
459    fn to_index() {
460        assert_eq!(Token::new("-").to_index(), Ok(Index::Next));
461        assert_eq!(Token::new("0").to_index(), Ok(Index::Num(0)));
462        assert_eq!(Token::new("2").to_index(), Ok(Index::Num(2)));
463        assert!(Token::new("a").to_index().is_err());
464        assert!(Token::new("-1").to_index().is_err());
465    }
466
467    #[test]
468    fn new() {
469        assert_eq!(Token::new("~1").encoded(), "~01");
470        assert_eq!(Token::new("a/b").encoded(), "a~1b");
471    }
472
473    #[test]
474    fn from_encoded() {
475        assert_eq!(Token::from_encoded("~1").unwrap().encoded(), "~1");
476        assert_eq!(Token::from_encoded("~0~1").unwrap().encoded(), "~0~1");
477        let t = Token::from_encoded("a~1b").unwrap();
478        assert_eq!(t.decoded(), "a/b");
479        assert!(Token::from_encoded("a/b").is_err());
480        assert!(Token::from_encoded("a~a").is_err());
481    }
482
483    #[test]
484    fn into_owned() {
485        let token = Token::from_encoded("foo~0").unwrap().into_owned();
486        assert_eq!(token.encoded(), "foo~0");
487    }
488
489    #[quickcheck]
490    fn encode_decode(s: String) -> bool {
491        let token = Token::new(s);
492        let decoded = Token::from_encoded(token.encoded()).unwrap();
493        token == decoded
494    }
495
496    #[test]
497    fn tokens() {
498        let pointer = Pointer::from_static("/a/b/c");
499        let tokens: Vec<Token> = pointer.tokens().collect();
500        assert_eq!(tokens, unsafe {
501            vec![
502                Token::from_encoded_unchecked("a"),
503                Token::from_encoded_unchecked("b"),
504                Token::from_encoded_unchecked("c"),
505            ]
506        });
507    }
508
509    #[test]
510    fn is_next() {
511        let token = Token::new("-");
512        assert!(token.is_next());
513        let token = Token::new("0");
514        assert!(!token.is_next());
515        let token = Token::new("a");
516        assert!(!token.is_next());
517        let token = Token::new("");
518        assert!(!token.is_next());
519    }
520}