jsonptr/token.rs
1use core::str::Split;
2
3use crate::index::{Index, ParseIndexError};
4use alloc::{
5 borrow::Cow,
6 fmt,
7 string::{String, ToString},
8 vec::Vec,
9};
10
11const ENCODED_TILDE: &[u8] = b"~0";
12const ENCODED_SLASH: &[u8] = b"~1";
13
14const ENC_PREFIX: u8 = b'~';
15const TILDE_ENC: u8 = b'0';
16const SLASH_ENC: u8 = b'1';
17
18/*
19░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
20╔══════════════════════════════════════════════════════════════════════════════╗
21║ ║
22║ Token ║
23║ ¯¯¯¯¯¯¯ ║
24╚══════════════════════════════════════════════════════════════════════════════╝
25░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
26*/
27
28/// A `Token` is a segment of a JSON [`Pointer`](crate::Token), preceded by `'/'` (`%x2F`).
29///
30/// `Token`s can represent a key in a JSON object or an index in an array.
31///
32/// - Indexes should not contain leading zeros.
33/// - When dealing with arrays or path expansion for assignment, `"-"` represent
34/// the next, non-existent index in a JSON array.
35#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
36pub struct Token<'a> {
37 inner: Cow<'a, str>,
38}
39
40impl<'a> Token<'a> {
41 /// Constructs a `Token` from an RFC 6901 encoded string.
42 ///
43 /// This is like [`Self::from_encoded`], except that no validation is
44 /// performed on the input string.
45 pub(crate) fn from_encoded_unchecked(inner: impl Into<Cow<'a, str>>) -> Self {
46 Self {
47 inner: inner.into(),
48 }
49 }
50
51 /// Constructs a `Token` from an RFC 6901 encoded string.
52 ///
53 /// To be valid, the string must not contain any `/` characters, and any `~`
54 /// characters must be followed by either `0` or `1`.
55 ///
56 /// This function does not allocate.
57 ///
58 /// # Examples
59 ///
60 /// ```
61 /// # use jsonptr::Token;
62 /// assert_eq!(Token::from_encoded("~1foo~1~0bar").unwrap().decoded(), "/foo/~bar");
63 /// let err = Token::from_encoded("foo/oops~bar").unwrap_err();
64 /// assert_eq!(err.offset, 3);
65 /// ```
66 ///
67 /// ## Errors
68 /// Returns `InvalidEncodingError` if the input string is not a valid RFC
69 /// 6901 (`~` must be followed by `0` or `1`)
70 pub fn from_encoded(s: &'a str) -> Result<Self, InvalidEncodingError> {
71 let mut escaped = false;
72 for (offset, b) in s.bytes().enumerate() {
73 match b {
74 b'/' => return Err(InvalidEncodingError { offset }),
75 ENC_PREFIX => {
76 escaped = true;
77 }
78 TILDE_ENC | SLASH_ENC if escaped => {
79 escaped = false;
80 }
81 _ => {
82 if escaped {
83 return Err(InvalidEncodingError { offset });
84 }
85 }
86 }
87 }
88 if escaped {
89 return Err(InvalidEncodingError { offset: s.len() });
90 }
91 Ok(Self { inner: s.into() })
92 }
93
94 /// Constructs a `Token` from an arbitrary string.
95 ///
96 /// If the string contains a `/` or a `~`, then it will be assumed not
97 /// encoded, in which case this function will encode it, allocating a new
98 /// string.
99 ///
100 /// If the string is already encoded per RFC 6901, use
101 /// [`Self::from_encoded`] instead, otherwise it will end up double-encoded.
102 ///
103 /// # Examples
104 ///
105 /// ```
106 /// # use jsonptr::Token;
107 /// assert_eq!(Token::new("/foo/~bar").encoded(), "~1foo~1~0bar");
108 /// ```
109 pub fn new(s: impl Into<Cow<'a, str>>) -> Self {
110 let s = s.into();
111
112 if let Some(i) = s.bytes().position(|b| b == b'/' || b == b'~') {
113 let input = s.as_bytes();
114 // we could take advantage of [`Cow::into_owned`] here, but it would
115 // mean copying over the entire string, only to overwrite a portion
116 // of it... so instead we explicitly allocate a new buffer and copy
117 // only the prefix until the first encoded character
118 // NOTE: the output is at least as large as the input + 1, so we
119 // allocate that much capacity ahead of time
120 let mut bytes = Vec::with_capacity(input.len() + 1);
121 bytes.extend_from_slice(&input[..i]);
122 for &b in &input[i..] {
123 match b {
124 b'/' => {
125 bytes.extend_from_slice(ENCODED_SLASH);
126 }
127 b'~' => {
128 bytes.extend_from_slice(ENCODED_TILDE);
129 }
130 other => {
131 bytes.push(other);
132 }
133 }
134 }
135 Self {
136 // SAFETY: we started from a valid UTF-8 sequence of bytes,
137 // and only replaced some ASCII characters with other two ASCII
138 // characters, so the output is guaranteed valid UTF-8.
139 inner: Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) }),
140 }
141 } else {
142 Self { inner: s }
143 }
144 }
145
146 /// Converts into an owned copy of this token.
147 ///
148 /// If the token is not already owned, this will clone the referenced string
149 /// slice.
150 pub fn into_owned(self) -> Token<'static> {
151 Token {
152 inner: Cow::Owned(self.inner.into_owned()),
153 }
154 }
155
156 /// Extracts an owned copy of this token.
157 ///
158 /// If the token is not already owned, this will clone the referenced string
159 /// slice.
160 ///
161 /// This method is like [`Self::into_owned`], except it doesn't take
162 /// ownership of the original `Token`.
163 pub fn to_owned(&self) -> Token<'static> {
164 Token {
165 inner: Cow::Owned(self.inner.clone().into_owned()),
166 }
167 }
168
169 /// Returns the encoded string representation of the `Token`.
170 ///
171 /// # Examples
172 ///
173 /// ```
174 /// # use jsonptr::Token;
175 /// assert_eq!(Token::new("~bar").encoded(), "~0bar");
176 /// ```
177 pub fn encoded(&self) -> &str {
178 &self.inner
179 }
180
181 /// Returns the decoded string representation of the `Token`.
182 ///
183 /// # Examples
184 ///
185 /// ```
186 /// # use jsonptr::Token;
187 /// assert_eq!(Token::new("~bar").decoded(), "~bar");
188 /// ```
189 pub fn decoded(&self) -> Cow<'_, str> {
190 if let Some(i) = self.inner.bytes().position(|b| b == ENC_PREFIX) {
191 let input = self.inner.as_bytes();
192 // we could take advantage of [`Cow::into_owned`] here, but it would
193 // mean copying over the entire string, only to overwrite a portion
194 // of it... so instead we explicitly allocate a new buffer and copy
195 // only the prefix until the first encoded character
196 // NOTE: the output is at least as large as the input + 1, so we
197 // allocate that much capacity ahead of time
198 let mut bytes = Vec::with_capacity(input.len() + 1);
199 bytes.extend_from_slice(&input[..i]);
200 // we start from the first escaped character
201 let mut escaped = true;
202 for &b in &input[i + 1..] {
203 match b {
204 ENC_PREFIX => {
205 escaped = true;
206 }
207 TILDE_ENC if escaped => {
208 bytes.push(b'~');
209 escaped = false;
210 }
211 SLASH_ENC if escaped => {
212 bytes.push(b'/');
213 escaped = false;
214 }
215 other => {
216 bytes.push(other);
217 }
218 }
219 }
220 // SAFETY: we start from a valid String, and only write valid UTF-8
221 // byte sequences into it.
222 Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
223 } else {
224 // if there are no encoded characters, we don't need to allocate!
225 self.inner.clone()
226 }
227 }
228
229 /// Attempts to parse the given `Token` as an array index.
230 ///
231 /// Per [RFC 6901](https://datatracker.ietf.org/doc/html/rfc6901#section-4),
232 /// the acceptable values are non-negative integers and the `-` character,
233 /// which stands for the next, non-existent member after the last array
234 /// element.
235 ///
236 /// ## Examples
237 ///
238 /// ```
239 /// # use jsonptr::{index::Index, Token};
240 /// assert_eq!(Token::new("-").to_index(), Ok(Index::Next));
241 /// assert_eq!(Token::new("0").to_index(), Ok(Index::Num(0)));
242 /// assert_eq!(Token::new("2").to_index(), Ok(Index::Num(2)));
243 /// assert!(Token::new("a").to_index().is_err());
244 /// assert!(Token::new("-1").to_index().is_err());
245 /// ```
246 /// ## Errors
247 /// Returns [`ParseIndexError`] if the token is not a valid array index.
248 pub fn to_index(&self) -> Result<Index, ParseIndexError> {
249 self.try_into()
250 }
251}
252
253macro_rules! impl_from_num {
254 ($($ty:ty),*) => {
255 $(
256 impl From<$ty> for Token<'static> {
257 fn from(v: $ty) -> Self {
258 Token::from_encoded_unchecked(v.to_string())
259 }
260 }
261 )*
262 };
263}
264impl_from_num!(u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize);
265
266impl<'a> From<&'a str> for Token<'a> {
267 fn from(value: &'a str) -> Self {
268 Token::new(value)
269 }
270}
271
272impl<'a> From<&'a String> for Token<'a> {
273 fn from(value: &'a String) -> Self {
274 Token::new(value)
275 }
276}
277
278impl From<String> for Token<'static> {
279 fn from(value: String) -> Self {
280 Token::new(value)
281 }
282}
283
284impl<'a> From<&Token<'a>> for Token<'a> {
285 fn from(value: &Token<'a>) -> Self {
286 value.clone()
287 }
288}
289
290impl alloc::fmt::Display for Token<'_> {
291 fn fmt(&self, f: &mut alloc::fmt::Formatter<'_>) -> alloc::fmt::Result {
292 write!(f, "{}", self.decoded())
293 }
294}
295
296/*
297░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
298╔══════════════════════════════════════════════════════════════════════════════╗
299║ ║
300║ Tokens ║
301║ ¯¯¯¯¯¯¯¯ ║
302╚══════════════════════════════════════════════════════════════════════════════╝
303░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
304*/
305
306/// An iterator over the [`Token`]s of a [`Pointer`](crate::Pointer).
307#[derive(Debug)]
308pub struct Tokens<'a> {
309 inner: Split<'a, char>,
310}
311
312impl<'a> Iterator for Tokens<'a> {
313 type Item = Token<'a>;
314 fn next(&mut self) -> Option<Self::Item> {
315 self.inner.next().map(Token::from_encoded_unchecked)
316 }
317}
318impl<'t> Tokens<'t> {
319 pub(crate) fn new(inner: Split<'t, char>) -> Self {
320 Self { inner }
321 }
322}
323
324/*
325░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
326╔══════════════════════════════════════════════════════════════════════════════╗
327║ ║
328║ InvalidEncodingError ║
329║ ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ ║
330╚══════════════════════════════════════════════════════════════════════════════╝
331░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
332*/
333
334/// A token within a json pointer contained invalid encoding (`~` not followed
335/// by `0` or `1`).
336///
337#[derive(Debug, PartialEq, Eq)]
338pub struct InvalidEncodingError {
339 /// offset of the erroneous `~` from within the `Token`
340 pub offset: usize,
341}
342
343impl fmt::Display for InvalidEncodingError {
344 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
345 write!(
346 f,
347 "json pointer is malformed due to invalid encoding ('~' not followed by '0' or '1')"
348 )
349 }
350}
351
352#[cfg(feature = "std")]
353impl std::error::Error for InvalidEncodingError {}
354
355/*
356░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
357╔══════════════════════════════════════════════════════════════════════════════╗
358║ ║
359║ Tests ║
360║ ¯¯¯¯¯¯¯ ║
361╚══════════════════════════════════════════════════════════════════════════════╝
362░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
363*/
364
365#[cfg(test)]
366mod tests {
367 use crate::{assign::AssignError, index::OutOfBoundsError, Pointer};
368
369 use super::*;
370 use quickcheck_macros::quickcheck;
371
372 #[test]
373 fn from() {
374 assert_eq!(Token::from("/").encoded(), "~1");
375 assert_eq!(Token::from("~/").encoded(), "~0~1");
376 assert_eq!(Token::from(34u32).encoded(), "34");
377 assert_eq!(Token::from(34u64).encoded(), "34");
378 assert_eq!(Token::from(String::from("foo")).encoded(), "foo");
379 assert_eq!(Token::from(&Token::new("foo")).encoded(), "foo");
380 }
381
382 #[test]
383 fn to_index() {
384 assert_eq!(Token::new("-").to_index(), Ok(Index::Next));
385 assert_eq!(Token::new("0").to_index(), Ok(Index::Num(0)));
386 assert_eq!(Token::new("2").to_index(), Ok(Index::Num(2)));
387 assert!(Token::new("a").to_index().is_err());
388 assert!(Token::new("-1").to_index().is_err());
389 }
390
391 #[test]
392 fn new() {
393 assert_eq!(Token::new("~1").encoded(), "~01");
394 assert_eq!(Token::new("a/b").encoded(), "a~1b");
395 }
396
397 #[test]
398 fn assign_error_display() {
399 let err = AssignError::FailedToParseIndex {
400 offset: 3,
401 source: ParseIndexError::InvalidInteger("a".parse::<usize>().unwrap_err()),
402 };
403 assert_eq!(
404 err.to_string(),
405 "assignment failed due to an invalid index at offset 3"
406 );
407
408 let err = AssignError::OutOfBounds {
409 offset: 3,
410 source: OutOfBoundsError {
411 index: 3,
412 length: 2,
413 },
414 };
415
416 assert_eq!(
417 err.to_string(),
418 "assignment failed due to index at offset 3 being out of bounds"
419 );
420 }
421
422 #[test]
423 #[cfg(feature = "std")]
424 fn assign_error_source() {
425 use std::error::Error;
426 let err = AssignError::FailedToParseIndex {
427 offset: 3,
428 source: ParseIndexError::InvalidInteger("a".parse::<usize>().unwrap_err()),
429 };
430 assert!(err.source().is_some());
431 assert!(err.source().unwrap().is::<ParseIndexError>());
432
433 let err = AssignError::OutOfBounds {
434 offset: 3,
435 source: OutOfBoundsError {
436 index: 3,
437 length: 2,
438 },
439 };
440
441 assert!(err.source().unwrap().is::<OutOfBoundsError>());
442 }
443
444 #[test]
445 fn from_encoded() {
446 assert_eq!(Token::from_encoded("~1").unwrap().encoded(), "~1");
447 assert_eq!(Token::from_encoded("~0~1").unwrap().encoded(), "~0~1");
448 let t = Token::from_encoded("a~1b").unwrap();
449 assert_eq!(t.decoded(), "a/b");
450 assert!(Token::from_encoded("a/b").is_err());
451 assert!(Token::from_encoded("a~a").is_err());
452 }
453
454 #[test]
455 fn into_owned() {
456 let token = Token::from_encoded("foo~0").unwrap().into_owned();
457 assert_eq!(token.encoded(), "foo~0");
458 }
459
460 #[quickcheck]
461 fn encode_decode(s: String) -> bool {
462 let token = Token::new(s);
463 let decoded = Token::from_encoded(token.encoded()).unwrap();
464 token == decoded
465 }
466
467 #[test]
468 fn invalid_encoding_error_display() {
469 assert_eq!(
470 Token::from_encoded("~").unwrap_err().to_string(),
471 "json pointer is malformed due to invalid encoding ('~' not followed by '0' or '1')"
472 );
473 }
474
475 #[test]
476 fn tokens() {
477 let pointer = Pointer::from_static("/a/b/c");
478 let tokens: Vec<Token> = pointer.tokens().collect();
479 assert_eq!(
480 tokens,
481 vec![
482 Token::from_encoded_unchecked("a"),
483 Token::from_encoded_unchecked("b"),
484 Token::from_encoded_unchecked("c")
485 ]
486 );
487 }
488}