bytes_utils/
string.rs

1//! [String]-like wrappers around [Bytes] and [BytesMut].
2//!
3//! The [Bytes] and [BytesMut] provide a buffer of bytes with ability to create owned slices into
4//! the same shared memory allocation. This allows cheap manipulation of data.
5//!
6//! Strings are mostly just byte buffers with extra APIs to manipulate them. The standard [String]
7//! type is built as a wrapper around [Vec]. We build similar wrappers around the [Bytes] and
8//! [BytesMut], gaining the ability to create owned shared slices for textual data as well.
9//!
10//! Users are expected to use the [Str] and [StrMut] types. Note that these are type aliases around
11//! the [StrInner] type. The latter is means to implement both in one go and contains all the
12//! documentation, but is not meant to be used directly.
13//!
14//! # Splitting
15//!
16//! The [prim@str] type from standard library (which the types here dereference to) allows for
17//! slicing and splitting in many convenient ways. They, however, return borrowed string slices
18//! (`&str`), which might pose some problems.
19//!
20//! The [Str], and to certain extent, the [StrMut] type additionally allow cheap splitting and
21//! slicing that produce owned [Str] and [StrMut] respectively. They are slightly more expensive
22//! than the slicing than the ones returning `&str`, but only by incrementing internal reference
23//! counts. They do not clone the actual string data, like `.to_owned()` on the standard library
24//! methods would. These methods are available in addition to the standard ones.
25//!
26//! There are three ways how this can be done:
27//!
28//! * By dedicated methods, like [lines_bytes][StrInner::lines_bytes] (in general, the name of the
29//!   standard method suffixed with `_bytes`).
30//! * By using the [BytesIter] iterator manually.
31//! * By using the standard-library methods, producing `&str` and translating it back to [Str] with
32//!   [slice][StrInner::slice] or [StrInner::slice_ref].
33//!
34//! # Examples
35//!
36//! ```rust
37//! # use std::convert::TryFrom;
38//! # use bytes::Bytes;
39//! # use bytes_utils::{Str, StrMut};
40//! let mut builder = StrMut::new();
41//! builder += "Hello";
42//! builder.push(' ');
43//! builder.push_str("World");
44//! assert_eq!("Hello World", builder);
45//!
46//! let s1 = builder.split_built().freeze();
47//! // This is a cheap copy, in the form of incrementing a reference count.
48//! let s2 = s1.clone();
49//! assert_eq!("Hello World", s1);
50//! assert_eq!("Hello World", s2);
51//! // Slicing is cheap as well, even though the returned things are Str and therefore owned too.
52//! assert_eq!("ello", s1.slice(1..5));
53//! // We have taken the data out of the builder, but the rest of its capacity can be used for
54//! // further things.
55//! assert_eq!("", builder);
56//!
57//! // Creating from strings and similar works
58//! let a = Str::from("Hello");
59//! assert_eq!("Hello", a);
60//!
61//! let e = Str::new();
62//! assert_eq!("", e);
63//!
64//! // And from Bytes too.
65//! let b = Str::try_from(Bytes::from_static(b"World")).expect("Must be utf8");
66//! assert_eq!("World", b);
67//! // Invalid utf8 is refused.
68//! Str::try_from(Bytes::from_static(&[0, 0, 255])).unwrap_err();
69//! ```
70
71use std::borrow::{Borrow, BorrowMut, Cow};
72use std::cmp::Ordering;
73use std::convert::{Infallible, TryFrom};
74use std::error::Error;
75use std::fmt::{Debug, Display, Formatter, Result as FmtResult, Write};
76use std::hash::{Hash, Hasher};
77use std::iter::{self, FromIterator};
78use std::ops::{Add, AddAssign, Deref, DerefMut, Index, IndexMut};
79use std::str::{self, FromStr};
80
81use bytes::{Bytes, BytesMut};
82use either::Either;
83
84/// Error when creating [Str] or [StrMut] from invalid UTF8 data.
85#[derive(Copy, Clone, Debug)]
86pub struct Utf8Error<S> {
87    e: str::Utf8Error,
88    inner: S,
89}
90
91impl<S> Utf8Error<S> {
92    /// Returns the byte buffer back to the caller.
93    pub fn into_inner(self) -> S {
94        self.inner
95    }
96
97    /// The inner description of why the data is invalid UTF8.
98    pub fn utf8_error(&self) -> str::Utf8Error {
99        self.e
100    }
101}
102
103impl<S> Display for Utf8Error<S> {
104    fn fmt(&self, fmt: &mut Formatter) -> FmtResult {
105        Display::fmt(&self.e, fmt)
106    }
107}
108
109impl<S: Debug> Error for Utf8Error<S> {}
110
111/// Direction of iteration.
112///
113/// See [BytesIter].
114#[derive(Copy, Clone, Debug, Eq, PartialEq)]
115pub enum Direction {
116    /// Move forward (in the normal direction) in the string.
117    Forward,
118
119    /// Move backwards in the string.
120    Backward,
121}
122
123/// Manual splitting iterator.
124///
125/// The methods on [Str] and [StrMut] that iterate use this internally. But it can also be used
126/// manually to generate other iterators that split the original into parts.
127#[derive(Clone, Debug)]
128pub struct BytesIter<S, F> {
129    bytes: Option<S>,
130    extract: F,
131    direction: Direction,
132}
133
134impl<S, F> BytesIter<S, F>
135where
136    S: Storage,
137    F: FnMut(&str) -> Option<(usize, usize)>,
138{
139    /// A constructor of the iterator.
140    ///
141    /// The `direction` specifies in what order chunks should be yielded.
142    ///
143    /// The `ext` closure is always called with the rest of not yet split string. It shall return
144    /// the byte indices of the chunk and separator border. In case of forward iteration, it is the
145    /// end of them and the separator needs to end further to the string (or at the same position).
146    /// In the backwards direction, it is in reverse ‒ they specify their starts and the separator
147    /// is before the chunk.
148    ///
149    /// # Panics
150    ///
151    /// If the indices don't point at a character boundary, the iteration will panic. It'll also
152    /// panic if the returned indices are reversed or if they are out of bounds.
153    pub fn new(s: StrInner<S>, direction: Direction, ext: F) -> Self {
154        Self {
155            bytes: Some(s.0),
156            extract: ext,
157            direction,
158        }
159    }
160}
161
162impl<S, F> Iterator for BytesIter<S, F>
163where
164    S: Storage,
165    F: FnMut(&str) -> Option<(usize, usize)>,
166{
167    type Item = StrInner<S>;
168
169    fn next(&mut self) -> Option<StrInner<S>> {
170        let storage = self.bytes.take()?;
171        // Safety: we keep sure it is valid UTF8 on the API boundary.
172        let whole_str = unsafe { str::from_utf8_unchecked(storage.as_ref()) };
173        fn split<S: Storage>(storage: S, left: usize, right: usize) -> (S, S) {
174            let whole_str = unsafe { str::from_utf8_unchecked(storage.as_ref()) };
175            // Sanity-check we are not slicing in the middle of utf8 code point. This would
176            // panic if we do. It would also panic if we are out of range, which is also good.
177            assert!(whole_str.is_char_boundary(left));
178            assert!(whole_str.is_char_boundary(right));
179
180            // Now that we are sure this is legal, we are going to slice the byte data for real.
181            let (with_sep, end) = storage.split_at(right);
182            let (start, _sep) = with_sep.split_at(left);
183            (start, end)
184        }
185        match ((self.extract)(whole_str), self.direction) {
186            (Some((chunk_end, sep_end)), Direction::Forward) => {
187                assert!(chunk_end <= sep_end);
188                let (start, end) = split(storage, chunk_end, sep_end);
189
190                self.bytes = Some(end);
191                Some(StrInner(start))
192            }
193            (Some((chunk_start, sep_start)), Direction::Backward) => {
194                assert!(sep_start <= chunk_start);
195                let (start, end) = split(storage, sep_start, chunk_start);
196
197                self.bytes = Some(start);
198                Some(StrInner(end))
199            }
200            (None, _) => {
201                // No separator found -> return the whole rest (and keep None in ourselves)
202                Some(StrInner(storage))
203            }
204        }
205    }
206}
207
208/// Find a separator position, for use with the [BytesIter].
209fn sep_find<F: Fn(char) -> bool>(s: &str, is_sep: F) -> Option<(usize, usize)> {
210    let sep_start = s.find(&is_sep)?;
211    let sep_end = s[sep_start..]
212        .find(|c| !is_sep(c))
213        .map(|e| e + sep_start)
214        .unwrap_or_else(|| s.len());
215    Some((sep_start, sep_end))
216}
217
218/// Separator for an empty pattern.
219fn empty_sep(s: &str, limit: usize) -> Option<(usize, usize)> {
220    let char_end = s
221        .char_indices()
222        .skip(1)
223        .map(|(i, _)| i)
224        .chain(iter::once(s.len()).take((!s.is_empty()) as usize))
225        .take(limit)
226        .next()?;
227    Some((char_end, char_end))
228}
229
230fn rempty_sep(s: &str, limit: usize) -> Option<(usize, usize)> {
231    let char_start = s.char_indices().rev().map(|(i, _)| i).take(limit).next()?;
232    Some((char_start, char_start))
233}
234
235/// The backing storage for [StrInner]
236///
237/// This is currently a technical detail of the crate, users are not expected to implement this
238/// trait. Use [Str] or [StrMut] type aliases.
239///
240/// # Safety
241///
242/// The storage must act "sane". But what exactly it means is not yet analyzed and may change in
243/// future versions. Don't implement the trait (at least not yet).
244pub unsafe trait Storage: AsRef<[u8]> + Default + Sized {
245    /// A type that can be used to build the storage incrementally.
246    ///
247    /// For mutable storages, it may be itself. For immutable one, there needs to be a mutable
248    /// counterpart that can be converted to immutable later on.
249    type Creator: Default + StorageMut;
250
251    /// Converts the creator (mutable storage) to self.
252    ///
253    /// In case of mutable storages, this should be identity.
254    fn from_creator(creator: Self::Creator) -> Self;
255
256    /// Splits the storage at the given byte index and creates two non-overlapping instances.
257    fn split_at(self, at: usize) -> (Self, Self);
258}
259
260unsafe impl Storage for Bytes {
261    type Creator = BytesMut;
262    fn from_creator(creator: Self::Creator) -> Self {
263        creator.freeze()
264    }
265    fn split_at(mut self, at: usize) -> (Self, Self) {
266        let right = self.split_off(at);
267        (self, right)
268    }
269}
270
271unsafe impl Storage for BytesMut {
272    type Creator = BytesMut;
273    fn from_creator(creator: Self::Creator) -> Self {
274        creator
275    }
276    fn split_at(mut self, at: usize) -> (Self, Self) {
277        let right = self.split_off(at);
278        (self, right)
279    }
280}
281
282/// Trait for extra functionality of a mutable storage.
283///
284/// This is in addition to what an immutable storage must satisfy.
285///
286/// # Safety
287///
288/// The storage must act "sane". But what exactly it means is not yet analyzed and may change in
289/// future versions. Don't implement the trait (at least not yet).
290pub unsafe trait StorageMut: Storage + AsMut<[u8]> {
291    /// An immutable counter-part storage.
292    type Immutable: Storage<Creator = Self>;
293
294    /// Adds some more bytes to the end of the storage.
295    fn push_slice(&mut self, s: &[u8]);
296}
297
298unsafe impl StorageMut for BytesMut {
299    type Immutable = Bytes;
300    fn push_slice(&mut self, s: &[u8]) {
301        self.extend_from_slice(s)
302    }
303}
304
305/// Implementation of the [Str] and [StrMut] types.
306///
307/// For technical reasons, both are implemented in one go as this type. For the same reason, most
308/// of the documentation can be found here. Users are expected to use the [Str] and [StrMut]
309/// instead.
310#[derive(Copy, Clone, Default)]
311pub struct StrInner<S>(S);
312
313impl<S: Storage> StrInner<S> {
314    /// Creates an empty instance.
315    pub fn new() -> Self {
316        Self::default()
317    }
318
319    /// Extracts the inner byte storage.
320    pub fn into_inner(self) -> S {
321        self.0
322    }
323
324    /// Access to the inner storage.
325    pub fn inner(&self) -> &S {
326        &self.0
327    }
328
329    /// Creates an instance from an existing byte storage.
330    ///
331    /// It may fail if the content is not valid UTF8.
332    ///
333    /// A [try_from][TryFrom::try_from] may be used instead.
334    pub fn from_inner(s: S) -> Result<Self, Utf8Error<S>> {
335        match str::from_utf8(s.as_ref()) {
336            Ok(_) => Ok(Self(s)),
337            Err(e) => Err(Utf8Error { e, inner: s }),
338        }
339    }
340
341    /// Same as [from_inner][StrInner::from_inner], but without the checks.
342    ///
343    /// # Safety
344    ///
345    /// The caller must ensure content is valid UTF8.
346    pub unsafe fn from_inner_unchecked(s: S) -> Self {
347        Self(s)
348    }
349
350    /// Splits the string into two at the given index.
351    ///
352    /// # Panics
353    ///
354    /// If the index is not at char boundary.
355    pub fn split_at_bytes(self, at: usize) -> (Self, Self) {
356        assert!(self.deref().is_char_boundary(at));
357        let (l, r) = self.0.split_at(at);
358        (Self(l), Self(r))
359    }
360
361    /// Splits into whitespace separated "words".
362    ///
363    /// This acts like [split_whitespace][str::split_whitespace], but yields owned instances. It
364    /// doesn't clone the content, it just increments some reference counts.
365    pub fn split_whitespace_bytes(self) -> impl Iterator<Item = Self> {
366        BytesIter::new(self, Direction::Forward, |s| {
367            sep_find(s, char::is_whitespace)
368        })
369        .filter(|s| !s.is_empty())
370    }
371
372    /// Splits into whitespace separated "words".
373    ///
374    /// This acts like [split_ascii_whitespace][str::split_ascii_whitespace], but yields owned
375    /// instances. This doesn't clone the content, it just increments some reference counts.
376    pub fn split_ascii_whitespace_bytes(self) -> impl Iterator<Item = Self> {
377        BytesIter::new(self, Direction::Forward, |s| {
378            sep_find(s, |c| c.is_ascii() && (c as u8).is_ascii_whitespace())
379        })
380        .filter(|s| !s.is_empty())
381    }
382
383    /// Splits into lines.
384    ///
385    /// This acts like [lines][str::lines], but yields owned instances. The content is not cloned,
386    /// this just increments some reference counts.
387    pub fn lines_bytes(self) -> impl Iterator<Item = Self> {
388        if self.is_empty() {
389            Either::Left(iter::empty())
390        } else {
391            let iter = BytesIter::new(self, Direction::Forward, |s| sep_find(s, |c| c == '\n'))
392                .map(|s| match s.chars().next() {
393                    Some('\r') => s.split_at_bytes(1).1,
394                    _ => s,
395                });
396            Either::Right(iter)
397        }
398    }
399
400    /// Splits with the provided separator.
401    ///
402    /// This acts somewhat like [split][str::split], but yields owned instances. Also, it accepts
403    /// only string patters (since the `Pattern` is not stable ☹). The content is not cloned, this
404    /// just increments some reference counts.
405    pub fn split_bytes<'s>(self, sep: &'s str) -> impl Iterator<Item = Self> + 's
406    where
407        S: 's,
408    {
409        if sep.is_empty() {
410            let bulk = BytesIter::new(self, Direction::Forward, |s| empty_sep(s, usize::MAX));
411            Either::Left(iter::once(Self::default()).chain(bulk))
412        } else {
413            let sep_find = move |s: &str| s.find(sep).map(|pos| (pos, pos + sep.len()));
414            Either::Right(BytesIter::new(self, Direction::Forward, sep_find))
415        }
416    }
417
418    /// Splits max. `n` times according to the given pattern.
419    ///
420    /// This acts somewhat like [splitn][str::splitn], but yields owned instances. Also, it accepts
421    /// only string patters (since the `Pattern` is not stable ☹). The content is not cloned, this
422    /// just increments some reference counts.
423    pub fn splitn_bytes<'s>(self, mut n: usize, sep: &'s str) -> impl Iterator<Item = Self> + 's
424    where
425        S: 's,
426    {
427        // TODO: This seems to work, but is ugly. Any idea how to simplify?
428        if sep.is_empty() {
429            if n <= 1 {
430                Either::Left(Either::Left(iter::once(self).take(n)))
431            } else {
432                n -= 1;
433                let bulk = BytesIter::new(self, Direction::Forward, move |s| {
434                    n -= 1;
435                    empty_sep(s, n)
436                });
437                Either::Left(Either::Right(iter::once(Self::default()).chain(bulk)))
438            }
439        } else {
440            let sep_find = move |s: &str| {
441                n -= 1;
442                if n == 0 {
443                    None
444                } else {
445                    s.find(sep).map(|pos| (pos, pos + sep.len()))
446                }
447            };
448            Either::Right(BytesIter::new(self, Direction::Forward, sep_find).take(n))
449        }
450    }
451
452    /// A reverse version of [split_bytes][Self::split_bytes].
453    pub fn rsplit_bytes<'s>(self, sep: &'s str) -> impl Iterator<Item = Self> + 's
454    where
455        S: 's,
456    {
457        if sep.is_empty() {
458            let bulk = BytesIter::new(self, Direction::Backward, |s| rempty_sep(s, usize::MAX));
459            Either::Left(iter::once(Self::default()).chain(bulk))
460        } else {
461            let sep_find = move |s: &str| s.rfind(sep).map(|pos| (pos + sep.len(), pos));
462            Either::Right(BytesIter::new(self, Direction::Backward, sep_find))
463        }
464    }
465
466    /// A reverse version of [splitn_bytes][Self::splitn_bytes].
467    pub fn rsplitn_bytes<'s>(self, mut n: usize, sep: &'s str) -> impl Iterator<Item = Self> + 's
468    where
469        S: 's,
470    {
471        // TODO: This seems to work, but is ugly. Any idea how to simplify?
472        if sep.is_empty() {
473            if n <= 1 {
474                Either::Left(Either::Left(iter::once(self).take(n)))
475            } else {
476                n -= 1;
477                let bulk = BytesIter::new(self, Direction::Backward, move |s| {
478                    n -= 1;
479                    rempty_sep(s, n)
480                });
481                Either::Left(Either::Right(iter::once(Self::default()).chain(bulk)))
482            }
483        } else {
484            let sep_find = move |s: &str| {
485                n -= 1;
486                if n == 0 {
487                    None
488                } else {
489                    s.rfind(sep).map(|pos| (pos + sep.len(), pos))
490                }
491            };
492            Either::Right(BytesIter::new(self, Direction::Backward, sep_find).take(n))
493        }
494    }
495}
496
497impl<S: StorageMut> StrInner<S> {
498    /// Appends a string.
499    pub fn push_str(&mut self, s: &str) {
500        self.0.push_slice(s.as_bytes());
501    }
502
503    /// Appends one character.
504    pub fn push(&mut self, c: char) {
505        self.push_str(c.encode_utf8(&mut [0; 4]));
506    }
507
508    /// Provides mutable access to the inner buffer.
509    ///
510    /// # Safety
511    ///
512    /// The caller must ensure that the content stays valid UTF8.
513    pub unsafe fn inner_mut(&mut self) -> &mut S {
514        &mut self.0
515    }
516
517    /// Turns the mutable variant into an immutable one.
518    ///
519    /// The advantage is that it can then be shared (also by small parts).
520    pub fn freeze(self) -> StrInner<S::Immutable> {
521        StrInner(S::Immutable::from_creator(self.0))
522    }
523}
524
525impl<S: Storage> Deref for StrInner<S> {
526    type Target = str;
527
528    fn deref(&self) -> &str {
529        unsafe { str::from_utf8_unchecked(self.0.as_ref()) }
530    }
531}
532
533impl<S: StorageMut> DerefMut for StrInner<S> {
534    fn deref_mut(&mut self) -> &mut str {
535        unsafe { str::from_utf8_unchecked_mut(self.0.as_mut()) }
536    }
537}
538
539impl<S, T> AsRef<T> for StrInner<S>
540where
541    S: Storage,
542    str: AsRef<T>,
543{
544    fn as_ref(&self) -> &T {
545        self.deref().as_ref()
546    }
547}
548
549impl<S: StorageMut> AsMut<str> for StrInner<S> {
550    fn as_mut(&mut self) -> &mut str {
551        self.deref_mut()
552    }
553}
554
555impl<S: Storage> Borrow<str> for StrInner<S> {
556    fn borrow(&self) -> &str {
557        self.deref()
558    }
559}
560
561impl<S: StorageMut> BorrowMut<str> for StrInner<S> {
562    fn borrow_mut(&mut self) -> &mut str {
563        self.deref_mut()
564    }
565}
566
567impl<S: Storage> Debug for StrInner<S> {
568    fn fmt(&self, fmt: &mut Formatter) -> FmtResult {
569        Debug::fmt(self.deref(), fmt)
570    }
571}
572
573impl<S: Storage> Display for StrInner<S> {
574    fn fmt(&self, fmt: &mut Formatter) -> FmtResult {
575        Display::fmt(self.deref(), fmt)
576    }
577}
578
579impl<S: Storage> Hash for StrInner<S> {
580    fn hash<H: Hasher>(&self, state: &mut H) {
581        self.deref().hash(state)
582    }
583}
584
585impl<S, I> Index<I> for StrInner<S>
586where
587    S: Storage,
588    str: Index<I>,
589{
590    type Output = <str as Index<I>>::Output;
591
592    fn index(&self, index: I) -> &Self::Output {
593        self.deref().index(index)
594    }
595}
596
597impl<S, I> IndexMut<I> for StrInner<S>
598where
599    S: StorageMut,
600    str: IndexMut<I>,
601{
602    fn index_mut(&mut self, index: I) -> &mut Self::Output {
603        self.deref_mut().index_mut(index)
604    }
605}
606
607impl<S: StorageMut> Add<&str> for StrInner<S> {
608    type Output = Self;
609
610    fn add(mut self, rhs: &str) -> Self::Output {
611        self.push_str(rhs);
612        self
613    }
614}
615
616impl<S: StorageMut> AddAssign<&str> for StrInner<S> {
617    fn add_assign(&mut self, rhs: &str) {
618        self.push_str(rhs);
619    }
620}
621
622impl<S: StorageMut> Extend<char> for StrInner<S> {
623    fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
624        for c in iter {
625            self.push(c);
626        }
627    }
628}
629
630impl<'a, S: StorageMut> Extend<&'a char> for StrInner<S> {
631    fn extend<T: IntoIterator<Item = &'a char>>(&mut self, iter: T) {
632        for c in iter {
633            self.push(*c);
634        }
635    }
636}
637
638macro_rules! e {
639    ($ty: ty) => {
640        impl<'a, S: StorageMut> Extend<$ty> for StrInner<S> {
641            fn extend<T: IntoIterator<Item = $ty>>(&mut self, iter: T) {
642                for i in iter {
643                    self.push_str(i.as_ref());
644                }
645            }
646        }
647
648        impl<'a, S> FromIterator<$ty> for StrInner<S>
649        where
650            S: Storage,
651        {
652            fn from_iter<T: IntoIterator<Item = $ty>>(iter: T) -> Self {
653                let mut creator = StrInner(S::Creator::default());
654                creator.extend(iter);
655                StrInner(S::from_creator(creator.0))
656            }
657        }
658
659        impl<'a, S> From<$ty> for StrInner<S>
660        where
661            S: Storage,
662        {
663            fn from(s: $ty) -> Self {
664                iter::once(s).collect()
665            }
666        }
667    };
668}
669
670e!(String);
671e!(&'a String);
672e!(Box<str>);
673e!(&'a str);
674e!(Cow<'a, str>);
675
676macro_rules! t {
677    ($ty: ty) => {
678        impl TryFrom<$ty> for StrInner<$ty> {
679            type Error = Utf8Error<$ty>;
680            fn try_from(s: $ty) -> Result<Self, Utf8Error<$ty>> {
681                Self::from_inner(s)
682            }
683        }
684
685        impl From<StrInner<$ty>> for $ty {
686            fn from(s: StrInner<$ty>) -> $ty {
687                s.0
688            }
689        }
690    };
691}
692
693t!(Bytes);
694t!(BytesMut);
695
696impl From<StrMut> for Str {
697    fn from(s: StrMut) -> Self {
698        s.freeze()
699    }
700}
701
702impl<S: Storage> FromStr for StrInner<S> {
703    type Err = Infallible;
704
705    fn from_str(s: &str) -> Result<Self, Self::Err> {
706        Ok(s.into())
707    }
708}
709
710impl<S: Storage> PartialEq for StrInner<S> {
711    fn eq(&self, other: &Self) -> bool {
712        self.deref() == other.deref()
713    }
714}
715
716impl<S: Storage> Eq for StrInner<S> {}
717
718impl<S: Storage> PartialOrd for StrInner<S> {
719    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
720        Some(Ord::cmp(self, other))
721    }
722}
723
724impl<S: Storage> Ord for StrInner<S> {
725    fn cmp(&self, other: &Self) -> Ordering {
726        self.deref().cmp(other.deref())
727    }
728}
729
730macro_rules! c {
731    ($ty: ty) => {
732        impl<'a, S: Storage> PartialEq<$ty> for StrInner<S> {
733            fn eq(&self, other: &$ty) -> bool {
734                self.deref() == other.deref()
735            }
736        }
737
738        impl<'a, S: Storage> PartialEq<StrInner<S>> for $ty {
739            fn eq(&self, other: &StrInner<S>) -> bool {
740                self.deref() == other.deref()
741            }
742        }
743
744        impl<'a, S: Storage> PartialOrd<$ty> for StrInner<S> {
745            fn partial_cmp(&self, other: &$ty) -> Option<Ordering> {
746                Some(self.deref().cmp(other.deref()))
747            }
748        }
749
750        impl<'a, S: Storage> PartialOrd<StrInner<S>> for $ty {
751            fn partial_cmp(&self, other: &StrInner<S>) -> Option<Ordering> {
752                Some(self.deref().cmp(other.deref()))
753            }
754        }
755    };
756}
757
758c!(&'a str);
759c!(&'a mut str);
760c!(String);
761c!(Box<str>);
762c!(Cow<'a, str>);
763
764impl<S: StorageMut> Write for StrInner<S> {
765    fn write_str(&mut self, s: &str) -> FmtResult {
766        self.push_str(s);
767        Ok(())
768    }
769}
770
771/// The [format] macro, but returning [Str].
772///
773/// # Examples
774///
775/// ```
776/// use bytes_utils::{format_bytes, Str};
777/// let s: Str = format_bytes!("Hello {}", "world");
778/// assert_eq!("Hello world", s);
779/// ```
780#[macro_export]
781macro_rules! format_bytes {
782    ($($arg: tt)*) => {
783        $crate::format_bytes_mut!($($arg)*).freeze()
784    }
785}
786
787/// The [format] macro, but returning [StrMut].
788///
789/// # Examples
790///
791/// ```
792/// use bytes_utils::{format_bytes_mut, StrMut};
793/// let s: StrMut = format_bytes_mut!("Hello {}", "world");
794/// assert_eq!("Hello world", s);
795/// ```
796#[macro_export]
797macro_rules! format_bytes_mut {
798    ($($arg: tt)*) => {{
799        use std::fmt::Write;
800        let mut buf = $crate::StrMut::default();
801        write!(buf, $($arg)*).unwrap();
802        buf
803    }}
804}
805
806// TODO: Serde
807
808/// An immutable variant of [Bytes]-backed string.
809///
810/// The methods and their documentation are on [StrInner], but users are mostly expected to use
811/// this and the [StrMut] aliases.
812pub type Str = StrInner<Bytes>;
813
814impl Str {
815    /// Extracts a subslice of the string as an owned [Str].
816    ///
817    /// # Panics
818    ///
819    /// If the byte indices in the range are not on char boundaries.
820    pub fn slice<R>(&self, range: R) -> Str
821    where
822        str: Index<R, Output = str>,
823    {
824        self.slice_ref(&self[range])
825    }
826
827    /// Extracts owned representation of the slice passed.
828    ///
829    /// This method accepts a string sub-slice of `self`. It then extracts the slice but as the
830    /// [Str] type. This makes it easier to use "ordinary" string parsing/manipulation and then go
831    /// back to holding the [Bytes]-based representation.
832    ///
833    /// This is zero-copy, the common part will be shared by reference counting.
834    ///
835    /// # Panics
836    ///
837    /// If the provided slice is not a sub-slice of `self`. This is checked based on address of the
838    /// slice, not on the content.
839    ///
840    /// # Example
841    ///
842    /// ```rust
843    /// # use bytes_utils::Str;
844    /// let owned = Str::from("Hello World");
845    /// let borrowed_mid: &str = &owned[2..5];
846    ///
847    /// let mid: Str = owned.slice_ref(borrowed_mid);
848    /// assert_eq!("Hello World", owned);
849    /// assert_eq!("llo", mid);
850    /// ```
851    pub fn slice_ref(&self, subslice: &str) -> Self {
852        let sub = self.0.slice_ref(subslice.as_bytes());
853        Self(sub)
854    }
855}
856
857/// A mutable variant of [BytesMut]-backed string.
858///
859/// Unlike [Str], this one allows modifications (mostly additions), but also doesn't allow
860/// overlapping/shared chunks.
861///
862/// This is internally backed by the [StrInner] type, so the documentation of the methods are on
863/// that.
864pub type StrMut = StrInner<BytesMut>;
865
866impl StrMut {
867    /// Splits and returns the part of already built string, but keeps the extra capacity.
868    pub fn split_built(&mut self) -> StrMut {
869        StrInner(self.0.split())
870    }
871}
872
873#[cfg(test)]
874mod tests {
875    use std::panic;
876
877    use itertools::Itertools;
878    use proptest::prelude::*;
879
880    use super::*;
881
882    #[test]
883    fn split_w_byte_index() {
884        let v = Str::from("😈 ").split_whitespace_bytes().collect_vec();
885        assert_eq!(1, v.len());
886        assert_eq!("😈", v[0]);
887    }
888
889    #[test]
890    fn split_same() {
891        let v = Str::from("a").split_bytes("a").collect_vec();
892        assert_eq!(2, v.len());
893        assert_eq!("", v[0]);
894        assert_eq!("", v[1]);
895    }
896
897    #[test]
898    fn split_empty_pat() {
899        let v = Str::from("a").split_bytes("").collect_vec();
900        assert_eq!(3, v.len());
901        assert_eq!("", v[0]);
902        assert_eq!("a", v[1]);
903        assert_eq!("", v[2]);
904    }
905
906    #[test]
907    fn slice_checks_char_boundaries() {
908        let v = Str::from("😈");
909        assert_eq!(4, v.len());
910        panic::catch_unwind(|| v.slice(1..)).unwrap_err();
911    }
912
913    #[test]
914    fn split_at_bytes_mid() {
915        let v = Str::from("hello");
916        let (l, r) = v.split_at_bytes(2);
917        assert_eq!("he", l);
918        assert_eq!("llo", r);
919    }
920
921    #[test]
922    fn split_at_bytes_begin() {
923        let v = Str::from("hello");
924        let (l, r) = v.split_at_bytes(0);
925        assert_eq!("", l);
926        assert_eq!("hello", r);
927    }
928
929    #[test]
930    fn split_at_bytes_end() {
931        let v = Str::from("hello");
932        let (l, r) = v.split_at_bytes(5);
933        assert_eq!("hello", l);
934        assert_eq!("", r);
935    }
936
937    #[test]
938    fn split_at_bytes_panic() {
939        let v = Str::from("😈");
940        assert_eq!(4, v.len());
941        panic::catch_unwind(|| v.split_at_bytes(2)).unwrap_err();
942    }
943
944    proptest! {
945        #[test]
946        fn split_whitespace(s: String) {
947            let bstring = Str::from(&s);
948
949            let bw = bstring.split_whitespace_bytes();
950            let sw = s.split_whitespace();
951
952            for (b, s) in bw.zip_eq(sw) {
953                prop_assert_eq!(b, s);
954            }
955        }
956
957        #[test]
958        fn split_ascii_whitespace(s: String) {
959            let bstring = Str::from(&s);
960
961            let bw = bstring.split_ascii_whitespace_bytes();
962            let sw = s.split_ascii_whitespace();
963
964            for (b, s) in bw.zip_eq(sw) {
965                prop_assert_eq!(b, s);
966            }
967        }
968
969        #[test]
970        fn lines(s: String) {
971            let bstring = Str::from(&s);
972
973            let bl = bstring.lines_bytes();
974            let sl = s.lines();
975
976            for (b, s) in bl.zip_eq(sl) {
977                prop_assert_eq!(b, s);
978            }
979        }
980
981        #[test]
982        fn split(s: String, pat: String) {
983            let bstring = Str::from(&s);
984
985            let bs = bstring.split_bytes(&pat);
986            let ss = s.split(&pat);
987
988            for (b, s) in bs.zip_eq(ss) {
989                prop_assert_eq!(b, s);
990            }
991        }
992
993        #[test]
994        fn split_n(s: String, pat: String, n in 0..5usize) {
995            let bstring = Str::from(&s);
996
997            let bs = bstring.splitn_bytes(n, &pat);
998            let ss = s.splitn(n, &pat);
999
1000            for (b, s) in bs.zip_eq(ss) {
1001                prop_assert_eq!(b, s);
1002            }
1003        }
1004
1005        #[test]
1006        fn rsplit(s: String, pat: String) {
1007            let bstring = Str::from(&s);
1008
1009            let bs = bstring.rsplit_bytes(&pat);
1010            let ss = s.rsplit(&pat);
1011
1012            for (b, s) in bs.zip_eq(ss) {
1013                prop_assert_eq!(b, s);
1014            }
1015        }
1016
1017        #[test]
1018        fn rsplit_n(s: String, pat: String, n in 0..5usize) {
1019            let bstring = Str::from(&s);
1020
1021            let bs = bstring.rsplitn_bytes(n, &pat);
1022            let ss = s.rsplitn(n, &pat);
1023
1024            for (b, s) in bs.zip_eq(ss) {
1025                prop_assert_eq!(b, s);
1026            }
1027        }
1028    }
1029}