jsonptr/token.rs
1use core::str::Split;
2
3use crate::index::{Index, ParseIndexError};
4use alloc::{
5 borrow::Cow,
6 fmt,
7 string::{String, ToString},
8 vec::Vec,
9};
10
11const ENCODED_TILDE: &[u8] = b"~0";
12const ENCODED_SLASH: &[u8] = b"~1";
13
14const ENC_PREFIX: u8 = b'~';
15const TILDE_ENC: u8 = b'0';
16const SLASH_ENC: u8 = b'1';
17
18/*
19░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
20╔══════════════════════════════════════════════════════════════════════════════╗
21║ ║
22║ Token ║
23║ ¯¯¯¯¯¯¯ ║
24╚══════════════════════════════════════════════════════════════════════════════╝
25░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
26*/
27
28/// A `Token` is a segment of a JSON [`Pointer`](crate::Token), preceded by `'/'` (`%x2F`).
29///
30/// `Token`s can represent a key in a JSON object or an index in an array.
31///
32/// - Indexes should not contain leading zeros.
33/// - When dealing with arrays or path expansion for assignment, `"-"` represent
34/// the next, non-existent index in a JSON array.
35#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
36pub struct Token<'a> {
37 inner: Cow<'a, str>,
38}
39
40impl<'a> Token<'a> {
41 /// Constructs a `Token` from an RFC 6901 encoded string.
42 ///
43 /// This is like [`Self::from_encoded`], except that no validation is
44 /// performed on the input string.
45 ///
46 /// ## Safety
47 /// Input string must be RFC 6901 encoded.
48 pub(crate) unsafe fn from_encoded_unchecked(inner: impl Into<Cow<'a, str>>) -> Self {
49 Self {
50 inner: inner.into(),
51 }
52 }
53
54 /// Constructs a `Token` from an RFC 6901 encoded string.
55 ///
56 /// To be valid, the string must not contain any `/` characters, and any `~`
57 /// characters must be followed by either `0` or `1`.
58 ///
59 /// This function does not allocate.
60 ///
61 /// # Examples
62 ///
63 /// ```
64 /// # use jsonptr::Token;
65 /// assert_eq!(Token::from_encoded("~1foo~1~0bar").unwrap().decoded(), "/foo/~bar");
66 /// let err = Token::from_encoded("foo/oops~bar").unwrap_err();
67 /// assert_eq!(err.offset, 3);
68 /// ```
69 ///
70 /// ## Errors
71 /// Returns `InvalidEncodingError` if the input string is not a valid RFC
72 /// 6901 (`~` must be followed by `0` or `1`)
73 pub fn from_encoded(s: &'a str) -> Result<Self, EncodingError> {
74 let mut escaped = false;
75 for (offset, b) in s.bytes().enumerate() {
76 match b {
77 b'/' => {
78 return Err(EncodingError {
79 offset,
80 source: InvalidEncoding::Slash,
81 })
82 }
83 ENC_PREFIX => {
84 escaped = true;
85 }
86 TILDE_ENC | SLASH_ENC if escaped => {
87 escaped = false;
88 }
89 _ => {
90 if escaped {
91 return Err(EncodingError {
92 offset,
93 source: InvalidEncoding::Tilde,
94 });
95 }
96 }
97 }
98 }
99 if escaped {
100 return Err(EncodingError {
101 offset: s.len(),
102 source: InvalidEncoding::Slash,
103 });
104 }
105 Ok(Self { inner: s.into() })
106 }
107
108 /// Constructs a `Token` from an arbitrary string.
109 ///
110 /// If the string contains a `/` or a `~`, then it will be assumed not
111 /// encoded, in which case this function will encode it, allocating a new
112 /// string.
113 ///
114 /// If the string is already encoded per RFC 6901, use
115 /// [`Self::from_encoded`] instead, otherwise it will end up double-encoded.
116 ///
117 /// # Examples
118 ///
119 /// ```
120 /// # use jsonptr::Token;
121 /// assert_eq!(Token::new("/foo/~bar").encoded(), "~1foo~1~0bar");
122 /// ```
123 pub fn new(s: impl Into<Cow<'a, str>>) -> Self {
124 let s = s.into();
125
126 if let Some(i) = s.bytes().position(|b| b == b'/' || b == b'~') {
127 let input = s.as_bytes();
128 // we could take advantage of [`Cow::into_owned`] here, but it would
129 // mean copying over the entire string, only to overwrite a portion
130 // of it... so instead we explicitly allocate a new buffer and copy
131 // only the prefix until the first encoded character
132 // NOTE: the output is at least as large as the input + 1, so we
133 // allocate that much capacity ahead of time
134 let mut bytes = Vec::with_capacity(input.len() + 1);
135 bytes.extend_from_slice(&input[..i]);
136 for &b in &input[i..] {
137 match b {
138 b'/' => {
139 bytes.extend_from_slice(ENCODED_SLASH);
140 }
141 b'~' => {
142 bytes.extend_from_slice(ENCODED_TILDE);
143 }
144 other => {
145 bytes.push(other);
146 }
147 }
148 }
149 Self {
150 // SAFETY: we started from a valid UTF-8 sequence of bytes,
151 // and only replaced some ASCII characters with other two ASCII
152 // characters, so the output is guaranteed valid UTF-8.
153 inner: Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) }),
154 }
155 } else {
156 Self { inner: s }
157 }
158 }
159
160 /// Converts into an owned copy of this token.
161 ///
162 /// If the token is not already owned, this will clone the referenced string
163 /// slice.
164 pub fn into_owned(self) -> Token<'static> {
165 Token {
166 inner: Cow::Owned(self.inner.into_owned()),
167 }
168 }
169
170 /// Extracts an owned copy of this token.
171 ///
172 /// If the token is not already owned, this will clone the referenced string
173 /// slice.
174 ///
175 /// This method is like [`Self::into_owned`], except it doesn't take
176 /// ownership of the original `Token`.
177 pub fn to_owned(&self) -> Token<'static> {
178 Token {
179 inner: Cow::Owned(self.inner.clone().into_owned()),
180 }
181 }
182
183 /// Returns the encoded string representation of the `Token`.
184 ///
185 /// # Examples
186 ///
187 /// ```
188 /// # use jsonptr::Token;
189 /// assert_eq!(Token::new("~bar").encoded(), "~0bar");
190 /// ```
191 pub fn encoded(&self) -> &str {
192 &self.inner
193 }
194
195 /// Returns the decoded string representation of the `Token`.
196 ///
197 /// # Examples
198 ///
199 /// ```
200 /// # use jsonptr::Token;
201 /// assert_eq!(Token::new("~bar").decoded(), "~bar");
202 /// ```
203 pub fn decoded(&self) -> Cow<'_, str> {
204 if let Some(i) = self.inner.bytes().position(|b| b == ENC_PREFIX) {
205 let input = self.inner.as_bytes();
206 // we could take advantage of [`Cow::into_owned`] here, but it would
207 // mean copying over the entire string, only to overwrite a portion
208 // of it... so instead we explicitly allocate a new buffer and copy
209 // only the prefix until the first encoded character
210 // NOTE: the output is at least as large as the input + 1, so we
211 // allocate that much capacity ahead of time
212 let mut bytes = Vec::with_capacity(input.len() + 1);
213 bytes.extend_from_slice(&input[..i]);
214 // we start from the first escaped character
215 let mut escaped = true;
216 for &b in &input[i + 1..] {
217 match b {
218 ENC_PREFIX => {
219 escaped = true;
220 }
221 TILDE_ENC if escaped => {
222 bytes.push(b'~');
223 escaped = false;
224 }
225 SLASH_ENC if escaped => {
226 bytes.push(b'/');
227 escaped = false;
228 }
229 other => {
230 bytes.push(other);
231 }
232 }
233 }
234 // SAFETY: we start from a valid String, and only write valid UTF-8
235 // byte sequences into it.
236 Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
237 } else {
238 // if there are no encoded characters, we don't need to allocate!
239 self.inner.clone()
240 }
241 }
242
243 /// Attempts to parse the given `Token` as an array index.
244 ///
245 /// Per [RFC 6901](https://datatracker.ietf.org/doc/html/rfc6901#section-4),
246 /// the acceptable values are non-negative integers and the `-` character,
247 /// which stands for the next, non-existent member after the last array
248 /// element.
249 ///
250 /// ## Examples
251 ///
252 /// ```
253 /// # use jsonptr::{index::Index, Token};
254 /// assert_eq!(Token::new("-").to_index(), Ok(Index::Next));
255 /// assert_eq!(Token::new("0").to_index(), Ok(Index::Num(0)));
256 /// assert_eq!(Token::new("2").to_index(), Ok(Index::Num(2)));
257 /// assert!(Token::new("a").to_index().is_err());
258 /// assert!(Token::new("-1").to_index().is_err());
259 /// ```
260 /// ## Errors
261 /// Returns [`ParseIndexError`] if the token is not a valid array index.
262 pub fn to_index(&self) -> Result<Index, ParseIndexError> {
263 self.try_into()
264 }
265
266 /// Returns if the `Token` is `-`, which stands for the next array index.
267 ///
268 /// See also [`Self::to_index`].
269 pub fn is_next(&self) -> bool {
270 matches!(self.to_index(), Ok(Index::Next))
271 }
272}
273
274macro_rules! impl_from_num {
275 ($($ty:ty),*) => {
276 $(
277 impl From<$ty> for Token<'static> {
278 fn from(v: $ty) -> Self {
279 // SAFETY: only used for integer types, which are always valid
280 unsafe { Token::from_encoded_unchecked(v.to_string()) }
281 }
282 }
283 )*
284 };
285}
286impl_from_num!(u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize);
287
288impl<'a> From<&'a str> for Token<'a> {
289 fn from(value: &'a str) -> Self {
290 Token::new(value)
291 }
292}
293
294impl<'a> From<&'a String> for Token<'a> {
295 fn from(value: &'a String) -> Self {
296 Token::new(value)
297 }
298}
299
300impl From<String> for Token<'static> {
301 fn from(value: String) -> Self {
302 Token::new(value)
303 }
304}
305
306impl<'a> From<&Token<'a>> for Token<'a> {
307 fn from(value: &Token<'a>) -> Self {
308 value.clone()
309 }
310}
311
312impl alloc::fmt::Display for Token<'_> {
313 fn fmt(&self, f: &mut alloc::fmt::Formatter<'_>) -> alloc::fmt::Result {
314 write!(f, "{}", self.decoded())
315 }
316}
317
318/*
319░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
320╔══════════════════════════════════════════════════════════════════════════════╗
321║ ║
322║ Tokens ║
323║ ¯¯¯¯¯¯¯¯ ║
324╚══════════════════════════════════════════════════════════════════════════════╝
325░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
326*/
327
328/// An iterator over the [`Token`]s of a [`Pointer`](crate::Pointer).
329#[derive(Debug)]
330pub struct Tokens<'a> {
331 inner: Split<'a, char>,
332}
333
334impl<'a> Iterator for Tokens<'a> {
335 type Item = Token<'a>;
336 fn next(&mut self) -> Option<Self::Item> {
337 self.inner
338 .next()
339 // SAFETY: source pointer is encoded
340 .map(|s| unsafe { Token::from_encoded_unchecked(s) })
341 }
342}
343impl<'t> Tokens<'t> {
344 pub(crate) fn new(inner: Split<'t, char>) -> Self {
345 Self { inner }
346 }
347}
348
349/*
350░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
351╔══════════════════════════════════════════════════════════════════════════════╗
352║ ║
353║ InvalidEncodingError ║
354║ ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ ║
355╚══════════════════════════════════════════════════════════════════════════════╝
356░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
357*/
358
359#[deprecated(since = "0.7.0", note = "renamed to `EncodingError`")]
360/// Deprecated alias for [`EncodingError`].
361pub type InvalidEncodingError = EncodingError;
362
363/*
364░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
365╔══════════════════════════════════════════════════════════════════════════════╗
366║ ║
367║ EncodingError ║
368║ ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ ║
369╚══════════════════════════════════════════════════════════════════════════════╝
370░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
371*/
372
373/// A token within a json pointer contained invalid encoding (`~` not followed
374/// by `0` or `1`).
375///
376#[derive(Debug, PartialEq, Eq)]
377pub struct EncodingError {
378 /// offset of the erroneous `~` from within the `Token`
379 pub offset: usize,
380 /// the specific encoding error
381 pub source: InvalidEncoding,
382}
383
384#[cfg(feature = "std")]
385impl std::error::Error for EncodingError {
386 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
387 Some(&self.source)
388 }
389}
390
391impl fmt::Display for EncodingError {
392 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
393 write!(
394 f,
395 "token contains invalid encoding at offset {}",
396 self.offset
397 )
398 }
399}
400
401/*
402░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
403╔══════════════════════════════════════════════════════════════════════════════╗
404║ ║
405║ InvalidEncoding ║
406║ ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ ║
407╚══════════════════════════════════════════════════════════════════════════════╝
408░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
409*/
410
411/// Represents the specific type of invalid encoding error.
412#[derive(Debug, PartialEq, Eq, Clone, Copy)]
413pub enum InvalidEncoding {
414 /// `~` not followed by `0` or `1`
415 Tilde,
416 /// non-encoded `/` found in token
417 Slash,
418}
419
420impl fmt::Display for InvalidEncoding {
421 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
422 match self {
423 InvalidEncoding::Tilde => write!(f, "tilde (~) not followed by 0 or 1"),
424 InvalidEncoding::Slash => write!(f, "slash (/) found in token"),
425 }
426 }
427}
428#[cfg(feature = "std")]
429impl std::error::Error for InvalidEncoding {}
430
431/*
432░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
433╔══════════════════════════════════════════════════════════════════════════════╗
434║ ║
435║ Tests ║
436║ ¯¯¯¯¯¯¯ ║
437╚══════════════════════════════════════════════════════════════════════════════╝
438░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
439*/
440
441#[cfg(test)]
442mod tests {
443 use crate::Pointer;
444
445 use super::*;
446 use quickcheck_macros::quickcheck;
447
448 #[test]
449 fn from() {
450 assert_eq!(Token::from("/").encoded(), "~1");
451 assert_eq!(Token::from("~/").encoded(), "~0~1");
452 assert_eq!(Token::from(34u32).encoded(), "34");
453 assert_eq!(Token::from(34u64).encoded(), "34");
454 assert_eq!(Token::from(String::from("foo")).encoded(), "foo");
455 assert_eq!(Token::from(&Token::new("foo")).encoded(), "foo");
456 }
457
458 #[test]
459 fn to_index() {
460 assert_eq!(Token::new("-").to_index(), Ok(Index::Next));
461 assert_eq!(Token::new("0").to_index(), Ok(Index::Num(0)));
462 assert_eq!(Token::new("2").to_index(), Ok(Index::Num(2)));
463 assert!(Token::new("a").to_index().is_err());
464 assert!(Token::new("-1").to_index().is_err());
465 }
466
467 #[test]
468 fn new() {
469 assert_eq!(Token::new("~1").encoded(), "~01");
470 assert_eq!(Token::new("a/b").encoded(), "a~1b");
471 }
472
473 #[test]
474 fn from_encoded() {
475 assert_eq!(Token::from_encoded("~1").unwrap().encoded(), "~1");
476 assert_eq!(Token::from_encoded("~0~1").unwrap().encoded(), "~0~1");
477 let t = Token::from_encoded("a~1b").unwrap();
478 assert_eq!(t.decoded(), "a/b");
479 assert!(Token::from_encoded("a/b").is_err());
480 assert!(Token::from_encoded("a~a").is_err());
481 }
482
483 #[test]
484 fn into_owned() {
485 let token = Token::from_encoded("foo~0").unwrap().into_owned();
486 assert_eq!(token.encoded(), "foo~0");
487 }
488
489 #[quickcheck]
490 fn encode_decode(s: String) -> bool {
491 let token = Token::new(s);
492 let decoded = Token::from_encoded(token.encoded()).unwrap();
493 token == decoded
494 }
495
496 #[test]
497 fn tokens() {
498 let pointer = Pointer::from_static("/a/b/c");
499 let tokens: Vec<Token> = pointer.tokens().collect();
500 assert_eq!(tokens, unsafe {
501 vec![
502 Token::from_encoded_unchecked("a"),
503 Token::from_encoded_unchecked("b"),
504 Token::from_encoded_unchecked("c"),
505 ]
506 });
507 }
508
509 #[test]
510 fn is_next() {
511 let token = Token::new("-");
512 assert!(token.is_next());
513 let token = Token::new("0");
514 assert!(!token.is_next());
515 let token = Token::new("a");
516 assert!(!token.is_next());
517 let token = Token::new("");
518 assert!(!token.is_next());
519 }
520}