1use std::borrow::{Borrow, BorrowMut, Cow};
72use std::cmp::Ordering;
73use std::convert::{Infallible, TryFrom};
74use std::error::Error;
75use std::fmt::{Debug, Display, Formatter, Result as FmtResult, Write};
76use std::hash::{Hash, Hasher};
77use std::iter::{self, FromIterator};
78use std::ops::{Add, AddAssign, Deref, DerefMut, Index, IndexMut};
79use std::str::{self, FromStr};
80
81use bytes::{Bytes, BytesMut};
82use either::Either;
83
84#[derive(Copy, Clone, Debug)]
86pub struct Utf8Error<S> {
87 e: str::Utf8Error,
88 inner: S,
89}
90
91impl<S> Utf8Error<S> {
92 pub fn into_inner(self) -> S {
94 self.inner
95 }
96
97 pub fn utf8_error(&self) -> str::Utf8Error {
99 self.e
100 }
101}
102
103impl<S> Display for Utf8Error<S> {
104 fn fmt(&self, fmt: &mut Formatter) -> FmtResult {
105 Display::fmt(&self.e, fmt)
106 }
107}
108
109impl<S: Debug> Error for Utf8Error<S> {}
110
111#[derive(Copy, Clone, Debug, Eq, PartialEq)]
115pub enum Direction {
116 Forward,
118
119 Backward,
121}
122
123#[derive(Clone, Debug)]
128pub struct BytesIter<S, F> {
129 bytes: Option<S>,
130 extract: F,
131 direction: Direction,
132}
133
134impl<S, F> BytesIter<S, F>
135where
136 S: Storage,
137 F: FnMut(&str) -> Option<(usize, usize)>,
138{
139 pub fn new(s: StrInner<S>, direction: Direction, ext: F) -> Self {
154 Self {
155 bytes: Some(s.0),
156 extract: ext,
157 direction,
158 }
159 }
160}
161
162impl<S, F> Iterator for BytesIter<S, F>
163where
164 S: Storage,
165 F: FnMut(&str) -> Option<(usize, usize)>,
166{
167 type Item = StrInner<S>;
168
169 fn next(&mut self) -> Option<StrInner<S>> {
170 let storage = self.bytes.take()?;
171 let whole_str = unsafe { str::from_utf8_unchecked(storage.as_ref()) };
173 fn split<S: Storage>(storage: S, left: usize, right: usize) -> (S, S) {
174 let whole_str = unsafe { str::from_utf8_unchecked(storage.as_ref()) };
175 assert!(whole_str.is_char_boundary(left));
178 assert!(whole_str.is_char_boundary(right));
179
180 let (with_sep, end) = storage.split_at(right);
182 let (start, _sep) = with_sep.split_at(left);
183 (start, end)
184 }
185 match ((self.extract)(whole_str), self.direction) {
186 (Some((chunk_end, sep_end)), Direction::Forward) => {
187 assert!(chunk_end <= sep_end);
188 let (start, end) = split(storage, chunk_end, sep_end);
189
190 self.bytes = Some(end);
191 Some(StrInner(start))
192 }
193 (Some((chunk_start, sep_start)), Direction::Backward) => {
194 assert!(sep_start <= chunk_start);
195 let (start, end) = split(storage, sep_start, chunk_start);
196
197 self.bytes = Some(start);
198 Some(StrInner(end))
199 }
200 (None, _) => {
201 Some(StrInner(storage))
203 }
204 }
205 }
206}
207
208fn sep_find<F: Fn(char) -> bool>(s: &str, is_sep: F) -> Option<(usize, usize)> {
210 let sep_start = s.find(&is_sep)?;
211 let sep_end = s[sep_start..]
212 .find(|c| !is_sep(c))
213 .map(|e| e + sep_start)
214 .unwrap_or_else(|| s.len());
215 Some((sep_start, sep_end))
216}
217
218fn empty_sep(s: &str, limit: usize) -> Option<(usize, usize)> {
220 let char_end = s
221 .char_indices()
222 .skip(1)
223 .map(|(i, _)| i)
224 .chain(iter::once(s.len()).take((!s.is_empty()) as usize))
225 .take(limit)
226 .next()?;
227 Some((char_end, char_end))
228}
229
230fn rempty_sep(s: &str, limit: usize) -> Option<(usize, usize)> {
231 let char_start = s.char_indices().rev().map(|(i, _)| i).take(limit).next()?;
232 Some((char_start, char_start))
233}
234
235pub unsafe trait Storage: AsRef<[u8]> + Default + Sized {
245 type Creator: Default + StorageMut;
250
251 fn from_creator(creator: Self::Creator) -> Self;
255
256 fn split_at(self, at: usize) -> (Self, Self);
258}
259
260unsafe impl Storage for Bytes {
261 type Creator = BytesMut;
262 fn from_creator(creator: Self::Creator) -> Self {
263 creator.freeze()
264 }
265 fn split_at(mut self, at: usize) -> (Self, Self) {
266 let right = self.split_off(at);
267 (self, right)
268 }
269}
270
271unsafe impl Storage for BytesMut {
272 type Creator = BytesMut;
273 fn from_creator(creator: Self::Creator) -> Self {
274 creator
275 }
276 fn split_at(mut self, at: usize) -> (Self, Self) {
277 let right = self.split_off(at);
278 (self, right)
279 }
280}
281
282pub unsafe trait StorageMut: Storage + AsMut<[u8]> {
291 type Immutable: Storage<Creator = Self>;
293
294 fn push_slice(&mut self, s: &[u8]);
296}
297
298unsafe impl StorageMut for BytesMut {
299 type Immutable = Bytes;
300 fn push_slice(&mut self, s: &[u8]) {
301 self.extend_from_slice(s)
302 }
303}
304
305#[derive(Copy, Clone, Default)]
311pub struct StrInner<S>(S);
312
313impl<S: Storage> StrInner<S> {
314 pub fn new() -> Self {
316 Self::default()
317 }
318
319 pub fn into_inner(self) -> S {
321 self.0
322 }
323
324 pub fn inner(&self) -> &S {
326 &self.0
327 }
328
329 pub fn from_inner(s: S) -> Result<Self, Utf8Error<S>> {
335 match str::from_utf8(s.as_ref()) {
336 Ok(_) => Ok(Self(s)),
337 Err(e) => Err(Utf8Error { e, inner: s }),
338 }
339 }
340
341 pub unsafe fn from_inner_unchecked(s: S) -> Self {
347 Self(s)
348 }
349
350 pub fn split_at_bytes(self, at: usize) -> (Self, Self) {
356 assert!(self.deref().is_char_boundary(at));
357 let (l, r) = self.0.split_at(at);
358 (Self(l), Self(r))
359 }
360
361 pub fn split_whitespace_bytes(self) -> impl Iterator<Item = Self> {
366 BytesIter::new(self, Direction::Forward, |s| {
367 sep_find(s, char::is_whitespace)
368 })
369 .filter(|s| !s.is_empty())
370 }
371
372 pub fn split_ascii_whitespace_bytes(self) -> impl Iterator<Item = Self> {
377 BytesIter::new(self, Direction::Forward, |s| {
378 sep_find(s, |c| c.is_ascii() && (c as u8).is_ascii_whitespace())
379 })
380 .filter(|s| !s.is_empty())
381 }
382
383 pub fn lines_bytes(self) -> impl Iterator<Item = Self> {
388 if self.is_empty() {
389 Either::Left(iter::empty())
390 } else {
391 let iter = BytesIter::new(self, Direction::Forward, |s| sep_find(s, |c| c == '\n'))
392 .map(|s| match s.chars().next() {
393 Some('\r') => s.split_at_bytes(1).1,
394 _ => s,
395 });
396 Either::Right(iter)
397 }
398 }
399
400 pub fn split_bytes<'s>(self, sep: &'s str) -> impl Iterator<Item = Self> + 's
406 where
407 S: 's,
408 {
409 if sep.is_empty() {
410 let bulk = BytesIter::new(self, Direction::Forward, |s| empty_sep(s, usize::MAX));
411 Either::Left(iter::once(Self::default()).chain(bulk))
412 } else {
413 let sep_find = move |s: &str| s.find(sep).map(|pos| (pos, pos + sep.len()));
414 Either::Right(BytesIter::new(self, Direction::Forward, sep_find))
415 }
416 }
417
418 pub fn splitn_bytes<'s>(self, mut n: usize, sep: &'s str) -> impl Iterator<Item = Self> + 's
424 where
425 S: 's,
426 {
427 if sep.is_empty() {
429 if n <= 1 {
430 Either::Left(Either::Left(iter::once(self).take(n)))
431 } else {
432 n -= 1;
433 let bulk = BytesIter::new(self, Direction::Forward, move |s| {
434 n -= 1;
435 empty_sep(s, n)
436 });
437 Either::Left(Either::Right(iter::once(Self::default()).chain(bulk)))
438 }
439 } else {
440 let sep_find = move |s: &str| {
441 n -= 1;
442 if n == 0 {
443 None
444 } else {
445 s.find(sep).map(|pos| (pos, pos + sep.len()))
446 }
447 };
448 Either::Right(BytesIter::new(self, Direction::Forward, sep_find).take(n))
449 }
450 }
451
452 pub fn rsplit_bytes<'s>(self, sep: &'s str) -> impl Iterator<Item = Self> + 's
454 where
455 S: 's,
456 {
457 if sep.is_empty() {
458 let bulk = BytesIter::new(self, Direction::Backward, |s| rempty_sep(s, usize::MAX));
459 Either::Left(iter::once(Self::default()).chain(bulk))
460 } else {
461 let sep_find = move |s: &str| s.rfind(sep).map(|pos| (pos + sep.len(), pos));
462 Either::Right(BytesIter::new(self, Direction::Backward, sep_find))
463 }
464 }
465
466 pub fn rsplitn_bytes<'s>(self, mut n: usize, sep: &'s str) -> impl Iterator<Item = Self> + 's
468 where
469 S: 's,
470 {
471 if sep.is_empty() {
473 if n <= 1 {
474 Either::Left(Either::Left(iter::once(self).take(n)))
475 } else {
476 n -= 1;
477 let bulk = BytesIter::new(self, Direction::Backward, move |s| {
478 n -= 1;
479 rempty_sep(s, n)
480 });
481 Either::Left(Either::Right(iter::once(Self::default()).chain(bulk)))
482 }
483 } else {
484 let sep_find = move |s: &str| {
485 n -= 1;
486 if n == 0 {
487 None
488 } else {
489 s.rfind(sep).map(|pos| (pos + sep.len(), pos))
490 }
491 };
492 Either::Right(BytesIter::new(self, Direction::Backward, sep_find).take(n))
493 }
494 }
495}
496
497impl<S: StorageMut> StrInner<S> {
498 pub fn push_str(&mut self, s: &str) {
500 self.0.push_slice(s.as_bytes());
501 }
502
503 pub fn push(&mut self, c: char) {
505 self.push_str(c.encode_utf8(&mut [0; 4]));
506 }
507
508 pub unsafe fn inner_mut(&mut self) -> &mut S {
514 &mut self.0
515 }
516
517 pub fn freeze(self) -> StrInner<S::Immutable> {
521 StrInner(S::Immutable::from_creator(self.0))
522 }
523}
524
525impl<S: Storage> Deref for StrInner<S> {
526 type Target = str;
527
528 fn deref(&self) -> &str {
529 unsafe { str::from_utf8_unchecked(self.0.as_ref()) }
530 }
531}
532
533impl<S: StorageMut> DerefMut for StrInner<S> {
534 fn deref_mut(&mut self) -> &mut str {
535 unsafe { str::from_utf8_unchecked_mut(self.0.as_mut()) }
536 }
537}
538
539impl<S, T> AsRef<T> for StrInner<S>
540where
541 S: Storage,
542 str: AsRef<T>,
543{
544 fn as_ref(&self) -> &T {
545 self.deref().as_ref()
546 }
547}
548
549impl<S: StorageMut> AsMut<str> for StrInner<S> {
550 fn as_mut(&mut self) -> &mut str {
551 self.deref_mut()
552 }
553}
554
555impl<S: Storage> Borrow<str> for StrInner<S> {
556 fn borrow(&self) -> &str {
557 self.deref()
558 }
559}
560
561impl<S: StorageMut> BorrowMut<str> for StrInner<S> {
562 fn borrow_mut(&mut self) -> &mut str {
563 self.deref_mut()
564 }
565}
566
567impl<S: Storage> Debug for StrInner<S> {
568 fn fmt(&self, fmt: &mut Formatter) -> FmtResult {
569 Debug::fmt(self.deref(), fmt)
570 }
571}
572
573impl<S: Storage> Display for StrInner<S> {
574 fn fmt(&self, fmt: &mut Formatter) -> FmtResult {
575 Display::fmt(self.deref(), fmt)
576 }
577}
578
579impl<S: Storage> Hash for StrInner<S> {
580 fn hash<H: Hasher>(&self, state: &mut H) {
581 self.deref().hash(state)
582 }
583}
584
585impl<S, I> Index<I> for StrInner<S>
586where
587 S: Storage,
588 str: Index<I>,
589{
590 type Output = <str as Index<I>>::Output;
591
592 fn index(&self, index: I) -> &Self::Output {
593 self.deref().index(index)
594 }
595}
596
597impl<S, I> IndexMut<I> for StrInner<S>
598where
599 S: StorageMut,
600 str: IndexMut<I>,
601{
602 fn index_mut(&mut self, index: I) -> &mut Self::Output {
603 self.deref_mut().index_mut(index)
604 }
605}
606
607impl<S: StorageMut> Add<&str> for StrInner<S> {
608 type Output = Self;
609
610 fn add(mut self, rhs: &str) -> Self::Output {
611 self.push_str(rhs);
612 self
613 }
614}
615
616impl<S: StorageMut> AddAssign<&str> for StrInner<S> {
617 fn add_assign(&mut self, rhs: &str) {
618 self.push_str(rhs);
619 }
620}
621
622impl<S: StorageMut> Extend<char> for StrInner<S> {
623 fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
624 for c in iter {
625 self.push(c);
626 }
627 }
628}
629
630impl<'a, S: StorageMut> Extend<&'a char> for StrInner<S> {
631 fn extend<T: IntoIterator<Item = &'a char>>(&mut self, iter: T) {
632 for c in iter {
633 self.push(*c);
634 }
635 }
636}
637
638macro_rules! e {
639 ($ty: ty) => {
640 impl<'a, S: StorageMut> Extend<$ty> for StrInner<S> {
641 fn extend<T: IntoIterator<Item = $ty>>(&mut self, iter: T) {
642 for i in iter {
643 self.push_str(i.as_ref());
644 }
645 }
646 }
647
648 impl<'a, S> FromIterator<$ty> for StrInner<S>
649 where
650 S: Storage,
651 {
652 fn from_iter<T: IntoIterator<Item = $ty>>(iter: T) -> Self {
653 let mut creator = StrInner(S::Creator::default());
654 creator.extend(iter);
655 StrInner(S::from_creator(creator.0))
656 }
657 }
658
659 impl<'a, S> From<$ty> for StrInner<S>
660 where
661 S: Storage,
662 {
663 fn from(s: $ty) -> Self {
664 iter::once(s).collect()
665 }
666 }
667 };
668}
669
670e!(String);
671e!(&'a String);
672e!(Box<str>);
673e!(&'a str);
674e!(Cow<'a, str>);
675
676macro_rules! t {
677 ($ty: ty) => {
678 impl TryFrom<$ty> for StrInner<$ty> {
679 type Error = Utf8Error<$ty>;
680 fn try_from(s: $ty) -> Result<Self, Utf8Error<$ty>> {
681 Self::from_inner(s)
682 }
683 }
684
685 impl From<StrInner<$ty>> for $ty {
686 fn from(s: StrInner<$ty>) -> $ty {
687 s.0
688 }
689 }
690 };
691}
692
693t!(Bytes);
694t!(BytesMut);
695
696impl From<StrMut> for Str {
697 fn from(s: StrMut) -> Self {
698 s.freeze()
699 }
700}
701
702impl<S: Storage> FromStr for StrInner<S> {
703 type Err = Infallible;
704
705 fn from_str(s: &str) -> Result<Self, Self::Err> {
706 Ok(s.into())
707 }
708}
709
710impl<S: Storage> PartialEq for StrInner<S> {
711 fn eq(&self, other: &Self) -> bool {
712 self.deref() == other.deref()
713 }
714}
715
716impl<S: Storage> Eq for StrInner<S> {}
717
718impl<S: Storage> PartialOrd for StrInner<S> {
719 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
720 Some(Ord::cmp(self, other))
721 }
722}
723
724impl<S: Storage> Ord for StrInner<S> {
725 fn cmp(&self, other: &Self) -> Ordering {
726 self.deref().cmp(other.deref())
727 }
728}
729
730macro_rules! c {
731 ($ty: ty) => {
732 impl<'a, S: Storage> PartialEq<$ty> for StrInner<S> {
733 fn eq(&self, other: &$ty) -> bool {
734 self.deref() == other.deref()
735 }
736 }
737
738 impl<'a, S: Storage> PartialEq<StrInner<S>> for $ty {
739 fn eq(&self, other: &StrInner<S>) -> bool {
740 self.deref() == other.deref()
741 }
742 }
743
744 impl<'a, S: Storage> PartialOrd<$ty> for StrInner<S> {
745 fn partial_cmp(&self, other: &$ty) -> Option<Ordering> {
746 Some(self.deref().cmp(other.deref()))
747 }
748 }
749
750 impl<'a, S: Storage> PartialOrd<StrInner<S>> for $ty {
751 fn partial_cmp(&self, other: &StrInner<S>) -> Option<Ordering> {
752 Some(self.deref().cmp(other.deref()))
753 }
754 }
755 };
756}
757
758c!(&'a str);
759c!(&'a mut str);
760c!(String);
761c!(Box<str>);
762c!(Cow<'a, str>);
763
764impl<S: StorageMut> Write for StrInner<S> {
765 fn write_str(&mut self, s: &str) -> FmtResult {
766 self.push_str(s);
767 Ok(())
768 }
769}
770
771#[macro_export]
781macro_rules! format_bytes {
782 ($($arg: tt)*) => {
783 $crate::format_bytes_mut!($($arg)*).freeze()
784 }
785}
786
787#[macro_export]
797macro_rules! format_bytes_mut {
798 ($($arg: tt)*) => {{
799 use std::fmt::Write;
800 let mut buf = $crate::StrMut::default();
801 write!(buf, $($arg)*).unwrap();
802 buf
803 }}
804}
805
806pub type Str = StrInner<Bytes>;
813
814impl Str {
815 pub fn slice<R>(&self, range: R) -> Str
821 where
822 str: Index<R, Output = str>,
823 {
824 self.slice_ref(&self[range])
825 }
826
827 pub fn slice_ref(&self, subslice: &str) -> Self {
852 let sub = self.0.slice_ref(subslice.as_bytes());
853 Self(sub)
854 }
855}
856
857pub type StrMut = StrInner<BytesMut>;
865
866impl StrMut {
867 pub fn split_built(&mut self) -> StrMut {
869 StrInner(self.0.split())
870 }
871}
872
873#[cfg(test)]
874mod tests {
875 use std::panic;
876
877 use itertools::Itertools;
878 use proptest::prelude::*;
879
880 use super::*;
881
882 #[test]
883 fn split_w_byte_index() {
884 let v = Str::from("😈 ").split_whitespace_bytes().collect_vec();
885 assert_eq!(1, v.len());
886 assert_eq!("😈", v[0]);
887 }
888
889 #[test]
890 fn split_same() {
891 let v = Str::from("a").split_bytes("a").collect_vec();
892 assert_eq!(2, v.len());
893 assert_eq!("", v[0]);
894 assert_eq!("", v[1]);
895 }
896
897 #[test]
898 fn split_empty_pat() {
899 let v = Str::from("a").split_bytes("").collect_vec();
900 assert_eq!(3, v.len());
901 assert_eq!("", v[0]);
902 assert_eq!("a", v[1]);
903 assert_eq!("", v[2]);
904 }
905
906 #[test]
907 fn slice_checks_char_boundaries() {
908 let v = Str::from("😈");
909 assert_eq!(4, v.len());
910 panic::catch_unwind(|| v.slice(1..)).unwrap_err();
911 }
912
913 #[test]
914 fn split_at_bytes_mid() {
915 let v = Str::from("hello");
916 let (l, r) = v.split_at_bytes(2);
917 assert_eq!("he", l);
918 assert_eq!("llo", r);
919 }
920
921 #[test]
922 fn split_at_bytes_begin() {
923 let v = Str::from("hello");
924 let (l, r) = v.split_at_bytes(0);
925 assert_eq!("", l);
926 assert_eq!("hello", r);
927 }
928
929 #[test]
930 fn split_at_bytes_end() {
931 let v = Str::from("hello");
932 let (l, r) = v.split_at_bytes(5);
933 assert_eq!("hello", l);
934 assert_eq!("", r);
935 }
936
937 #[test]
938 fn split_at_bytes_panic() {
939 let v = Str::from("😈");
940 assert_eq!(4, v.len());
941 panic::catch_unwind(|| v.split_at_bytes(2)).unwrap_err();
942 }
943
944 proptest! {
945 #[test]
946 fn split_whitespace(s: String) {
947 let bstring = Str::from(&s);
948
949 let bw = bstring.split_whitespace_bytes();
950 let sw = s.split_whitespace();
951
952 for (b, s) in bw.zip_eq(sw) {
953 prop_assert_eq!(b, s);
954 }
955 }
956
957 #[test]
958 fn split_ascii_whitespace(s: String) {
959 let bstring = Str::from(&s);
960
961 let bw = bstring.split_ascii_whitespace_bytes();
962 let sw = s.split_ascii_whitespace();
963
964 for (b, s) in bw.zip_eq(sw) {
965 prop_assert_eq!(b, s);
966 }
967 }
968
969 #[test]
970 fn lines(s: String) {
971 let bstring = Str::from(&s);
972
973 let bl = bstring.lines_bytes();
974 let sl = s.lines();
975
976 for (b, s) in bl.zip_eq(sl) {
977 prop_assert_eq!(b, s);
978 }
979 }
980
981 #[test]
982 fn split(s: String, pat: String) {
983 let bstring = Str::from(&s);
984
985 let bs = bstring.split_bytes(&pat);
986 let ss = s.split(&pat);
987
988 for (b, s) in bs.zip_eq(ss) {
989 prop_assert_eq!(b, s);
990 }
991 }
992
993 #[test]
994 fn split_n(s: String, pat: String, n in 0..5usize) {
995 let bstring = Str::from(&s);
996
997 let bs = bstring.splitn_bytes(n, &pat);
998 let ss = s.splitn(n, &pat);
999
1000 for (b, s) in bs.zip_eq(ss) {
1001 prop_assert_eq!(b, s);
1002 }
1003 }
1004
1005 #[test]
1006 fn rsplit(s: String, pat: String) {
1007 let bstring = Str::from(&s);
1008
1009 let bs = bstring.rsplit_bytes(&pat);
1010 let ss = s.rsplit(&pat);
1011
1012 for (b, s) in bs.zip_eq(ss) {
1013 prop_assert_eq!(b, s);
1014 }
1015 }
1016
1017 #[test]
1018 fn rsplit_n(s: String, pat: String, n in 0..5usize) {
1019 let bstring = Str::from(&s);
1020
1021 let bs = bstring.rsplitn_bytes(n, &pat);
1022 let ss = s.rsplitn(n, &pat);
1023
1024 for (b, s) in bs.zip_eq(ss) {
1025 prop_assert_eq!(b, s);
1026 }
1027 }
1028 }
1029}