octseq/
str.rs

1//! Strings atop octet sequences.
2//!
3//! This module provides the type [`Str<Octets>`] that guarantees the same
4//! invariants – namely that the content is an UTF-8 encoded string – as
5//! the standard library’s [`str`] and [`String`] types but atop a generic
6//! octet sequence.
7
8use core::{borrow, cmp, fmt, hash, ops, str};
9use core::convert::Infallible;
10use crate::builder::{
11    BuilderAppendError, EmptyBuilder, FreezeBuilder, FromBuilder,
12    OctetsBuilder, Truncate, infallible
13};
14use crate::octets::OctetsFrom;
15
16
17//------------ Str -----------------------------------------------------------
18
19/// A fixed length UTF-8 encoded string atop an octet sequence.
20#[derive(Clone, Default)]
21pub struct Str<Octets: ?Sized>(Octets);
22
23impl<Octets> Str<Octets> {
24    /// Converts a sequence of octets into a string.
25    pub fn from_utf8(octets: Octets) -> Result<Self, FromUtf8Error<Octets>>
26    where Octets: AsRef<[u8]> {
27        if let Err(error) = str::from_utf8(octets.as_ref()) {
28            Err(FromUtf8Error { octets, error })
29        }
30        else {
31            Ok(Self(octets))
32        }
33    }
34
35    /// Converts a sequence of octets into a string without checking.
36    ///
37    /// # Safety
38    ///
39    /// The caller must make sure that the contents of `octets` is a
40    /// correctly encoded UTF-8 string.
41    pub unsafe fn from_utf8_unchecked(octets: Octets) -> Self {
42        Self(octets)
43    }
44
45    /// Creates a value by copying the content of a [`str`].
46    pub fn try_copy_from_str(
47        s: &str
48    ) -> Result<Self, BuilderAppendError<Octets>>
49    where
50        Octets: FromBuilder,
51        <Octets as FromBuilder>::Builder: EmptyBuilder,
52    {
53        let mut res = <Octets as FromBuilder>::Builder::with_capacity(s.len());
54        res.append_slice(s.as_bytes())?;
55        Ok(unsafe { Self::from_utf8_unchecked(res.freeze()) })
56    }
57
58    /// Creates a value by copying the content of a [`str`].
59    ///
60    /// This function is identical to
61    /// [`try_copy_from_str`][Self::try_copy_from_str] for octets types
62    /// of unlimited capacity.
63    pub fn copy_from_str(s: &str) -> Self
64    where
65        Octets: FromBuilder,
66        <Octets as FromBuilder>::Builder: EmptyBuilder,
67        <<Octets as FromBuilder>::Builder as OctetsBuilder>::AppendError:
68            Into<Infallible>,
69    {
70        infallible(Self::try_copy_from_str(s))
71    }
72}
73
74impl Str<[u8]> {
75    /// Creates a string value from a UTF-8 slice.
76    pub fn from_utf8_slice(
77        slice: &[u8]
78    ) -> Result<&Self, FromUtf8Error<&[u8]>> {
79        match str::from_utf8(slice) {
80            Ok(s) => Ok(Self::from_str(s)),
81            Err(error) => Err(FromUtf8Error { octets: slice, error })
82        }
83    }
84
85    /// Creates a string value from a string slice.
86    #[allow(clippy::should_implement_trait)]
87    pub fn from_str(s: &str) -> &Self {
88        unsafe { &*(s as *const str as *const Self) }
89    }
90}
91
92#[cfg(feature = "std")]
93impl Str<std::vec::Vec<u8>> {
94    pub fn from_string(s: std::string::String) -> Self {
95        unsafe { Self::from_utf8_unchecked(s.into_bytes()) }
96    }
97}
98
99impl<Octets> Str<Octets> {
100    /// Converts the string into its raw octets.
101    pub fn into_octets(self) -> Octets {
102        self.0
103    }
104}
105
106impl<Octets: ?Sized> Str<Octets> {
107    /// Returns the string as a string slice.
108    pub fn as_str(&self) -> &str
109    where Octets: AsRef<[u8]> {
110        unsafe { str::from_utf8_unchecked(self.0.as_ref()) }
111    }
112
113    /// Returns the string as a mutable string slice.
114    pub fn as_str_mut(&mut self) -> &mut str
115    where Octets: AsMut<[u8]> {
116        unsafe { str::from_utf8_unchecked_mut(self.0.as_mut()) }
117    }
118
119    /// Returns a reference to the underlying octets sequence.
120    pub fn as_octets(&self) -> &Octets {
121        &self.0
122    }
123
124    /// Returns a mutable reference to the underlying octets sequence.
125    ///
126    /// # Safety
127    ///
128    /// The caller must ensure that the content of the octets sequence is
129    /// valid UTF-8 before the borrow ends.
130    pub unsafe fn as_octets_mut(&mut self) -> &mut Octets {
131        &mut self.0
132    }
133
134    /// Returns the string’s octets as a slice.
135    pub fn as_slice(&self) -> &[u8]
136    where Octets: AsRef<[u8]> {
137        self.0.as_ref()
138    }
139
140    /// Returns a mutable slice of the string’s octets.
141    ///
142    /// # Safety
143    ///
144    /// The caller must ensure that the content of the slice is
145    /// valid UTF-8 before the borrow ends.
146    pub unsafe fn as_slice_mut(&mut self) -> &mut [u8]
147    where Octets: AsMut<[u8]> {
148        self.0.as_mut()
149    }
150
151    /// Returns the length of the string in octets.
152    pub fn len(&self) -> usize
153    where Octets: AsRef<[u8]> {
154        self.0.as_ref().len()
155    }
156
157    /// Returns whether the string is empty.
158    pub fn is_empty(&self) -> bool
159    where Octets: AsRef<[u8]> {
160        self.0.as_ref().is_empty()
161    }
162}
163
164
165//--- OctetsFrom
166
167impl<Octs, SrcOcts> OctetsFrom<Str<SrcOcts>> for Str<Octs>
168where
169    Octs: OctetsFrom<SrcOcts>
170{
171    type Error = Octs::Error;
172
173    fn try_octets_from(src: Str<SrcOcts>) -> Result<Self, Self::Error> {
174        Octs::try_octets_from(src.into_octets()).map(|octs| unsafe {
175            Self::from_utf8_unchecked(octs)
176        })
177    }
178}
179
180
181//--- Deref, DerefMut, AsRef, AsMut, Borrow, BorrowMut
182
183impl<Octets: AsRef<[u8]> + ?Sized> ops::Deref for Str<Octets> {
184    type Target = str;
185
186    fn deref(&self) -> &Self::Target {
187        self.as_str()
188    }
189}
190
191impl<Octets> ops::DerefMut for Str<Octets>
192where Octets: AsRef<[u8]> + AsMut<[u8]> + ?Sized {
193    fn deref_mut(&mut self) -> &mut Self::Target {
194        self.as_str_mut()
195    }
196}
197
198impl<Octets: AsRef<[u8]> + ?Sized> AsRef<str> for Str<Octets>{
199    fn as_ref(&self) -> &str {
200        self.as_str()
201    }
202}
203
204impl<Octets: AsRef<[u8]> + ?Sized> AsRef<[u8]> for Str<Octets>{
205    fn as_ref(&self) -> &[u8] {
206        self.as_slice()
207    }
208}
209
210impl<Octets: AsMut<[u8]> + ?Sized> AsMut<str> for Str<Octets> {
211    fn as_mut(&mut self) -> &mut str {
212        self.as_str_mut()
213    }
214}
215
216impl<Octets: AsRef<[u8]> + ?Sized> borrow::Borrow<str> for Str<Octets>{
217    fn borrow(&self) -> &str {
218        self.as_str()
219    }
220}
221
222impl<Octets> borrow::BorrowMut<str> for Str<Octets> 
223where Octets: AsRef<[u8]> +  AsMut<[u8]> + ?Sized {
224    fn borrow_mut(&mut self) -> &mut str {
225        self.as_str_mut()
226    }
227}
228
229//--- Debug and Display
230
231impl<Octets: AsRef<[u8]> + ?Sized> fmt::Debug for Str<Octets> {
232    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
233        fmt::Debug::fmt(self.as_str(), f)
234    }
235}
236
237impl<Octets: AsRef<[u8]> + ?Sized> fmt::Display for Str<Octets> {
238    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
239        fmt::Display::fmt(self.as_str(), f)
240    }
241}
242
243//--- PartialEq and Eq
244
245impl<Octets, Other> PartialEq<Other> for Str<Octets>
246where
247    Octets: AsRef<[u8]> + ?Sized,
248    Other: AsRef<str> + ?Sized,
249{
250    fn eq(&self, other: &Other) -> bool {
251        self.as_str().eq(other.as_ref())
252    }
253}
254
255impl<Octets: AsRef<[u8]> + ?Sized> Eq for Str<Octets> { }
256
257//--- Hash
258
259impl<Octets: AsRef<[u8]> + ?Sized> hash::Hash for Str<Octets> {
260    fn hash<H: hash::Hasher>(&self, state: &mut H) {
261        self.as_str().hash(state)
262    }
263}
264
265//--- PartialOrd and Ord
266
267impl<Octets, Other> PartialOrd<Other> for Str<Octets>
268where
269    Octets: AsRef<[u8]> + ?Sized,
270    Other: AsRef<str> + ?Sized,
271{
272    fn partial_cmp(&self, other: &Other) -> Option<cmp::Ordering> {
273        self.as_str().partial_cmp(other.as_ref())
274    }
275}
276
277impl<Octets: AsRef<[u8]> + ?Sized> Ord for Str<Octets> {
278    fn cmp(&self, other: &Self) -> cmp::Ordering {
279        self.as_str().cmp(other.as_str())
280    }
281}
282
283
284//------------ StrBuilder ----------------------------------------------------
285
286/// A growable, UTF-8 encoded string atop an octets builder.
287pub struct StrBuilder<Octets>(Octets);
288
289impl<Octets> StrBuilder<Octets> {
290    /// Creates a new, empty string builder.
291    pub fn new() -> Self
292    where Octets: EmptyBuilder {
293        StrBuilder(Octets::empty())
294    }
295
296    /// Creates a new, empty string builder with a given minimum capacity.
297    pub fn with_capacity(capacity: usize) -> Self
298    where Octets: EmptyBuilder {
299        StrBuilder(Octets::with_capacity(capacity))
300    }
301
302    /// Creates a new string builder from an octets builder.
303    ///
304    /// The function expects the contents of the octets builder to contain
305    /// a sequence of UTF-8 encoded characters.
306    pub fn from_utf8(octets: Octets) -> Result<Self, FromUtf8Error<Octets>>
307    where Octets: AsRef<[u8]> {
308        if let Err(error) = str::from_utf8(octets.as_ref()) {
309            Err(FromUtf8Error { octets, error })
310        }
311        else {
312            Ok(Self(octets))
313        }
314    }
315
316    /// Converts on octets builder into a string builder.
317    ///
318    /// If the octets builder contains invalid octets, they are replaced with
319    /// `U+FFFD REPLACEMENT CHARACTER`.
320    ///
321    /// If the content is UTF-8 encoded, it will remain unchanged. Otherwise,
322    /// a new builder is created and the passed builder is dropped.
323    pub fn try_from_utf8_lossy(
324        octets: Octets
325    ) -> Result<Self, Octets::AppendError>
326    where Octets: AsRef<[u8]> + OctetsBuilder + EmptyBuilder {
327        const REPLACEMENT_CHAR: &[u8] = &[239, 191, 189];
328
329        let mut err = match str::from_utf8(octets.as_ref()) {
330            Ok(_) => return Ok(Self(octets)),
331            Err(err) => err,
332        };
333        let mut octets = octets.as_ref();
334        let mut res = Octets::with_capacity(octets.len());
335        while !octets.is_empty() {
336            if err.valid_up_to() > 0 {
337                res.append_slice(&octets[..err.valid_up_to()])?;
338            }
339            res.append_slice(REPLACEMENT_CHAR)?;
340            octets = match err.error_len() {
341                Some(len) => &octets[err.valid_up_to() + len ..],
342                None => b""
343            };
344            err = match str::from_utf8(octets) {
345                Ok(_) => {
346                    res.append_slice(octets)?;
347                    break;
348                }
349                Err(err) => err,
350            };
351        }
352        Ok(Self(res))
353    }
354
355    pub fn from_utf8_lossy(octets: Octets) -> Self
356    where
357        Octets: AsRef<[u8]> + OctetsBuilder + EmptyBuilder,
358        Octets::AppendError: Into<Infallible>
359    {
360        infallible(Self::try_from_utf8_lossy(octets))
361    }
362
363    /// Converts an octets builder into a string builder without checking.
364    ///
365    /// For the safe versions, see [from_utf8][Self::from_utf8],
366    /// [try_from_utf8_lossy][Self::try_from_utf8_lossy] and
367    /// [from_utf8_lossy][Self::from_utf8_lossy].
368    ///
369    /// # Safety
370    ///
371    /// The caller must ensure that `octets` contains data that is a correctly
372    /// UTF-8 encoded string. It may be empty.
373    pub unsafe fn from_utf8_unchecked(octets: Octets) -> Self {
374        Self(octets)
375    }
376
377    /// Converts the string builder into the underlying octets builder.
378    pub fn into_octets_builder(self) -> Octets {
379        self.0
380    }
381
382    /// Converts the string builder into the final str.
383    pub fn freeze(self) -> Str<Octets::Octets>
384    where Octets: FreezeBuilder {
385        Str(self.0.freeze())
386    }
387
388    /// Returns a slice of the already assembled string.
389    pub fn as_str(&self) -> &str
390    where Octets: AsRef<[u8]> {
391        unsafe { str::from_utf8_unchecked(self.0.as_ref()) }
392    }
393
394    /// Returns a mutable slice of the already assembled string.
395    pub fn as_str_mut(&mut self) -> &mut str
396    where Octets: AsMut<[u8]> {
397        unsafe { str::from_utf8_unchecked_mut(self.0.as_mut()) }
398    }
399
400    /// Returns the string’s octets as a slice.
401    pub fn as_slice(&self) -> &[u8]
402    where Octets: AsRef<[u8]> {
403        self.0.as_ref()
404    }
405
406    /// Returns the length of the string in octets.
407    pub fn len(&self) -> usize
408    where Octets: AsRef<[u8]> {
409        self.0.as_ref().len()
410    }
411
412    /// Returns whether the string is empty.
413    pub fn is_empty(&self) -> bool
414    where Octets: AsRef<[u8]> {
415        self.0.as_ref().is_empty()
416    }
417
418    /// Appends a given string slice onto the end of this builder.
419    pub fn try_push_str(
420        &mut self, s: &str,
421    ) -> Result<(), Octets::AppendError>
422    where Octets: OctetsBuilder {
423        self.0.append_slice(s.as_bytes())
424    }
425
426    /// Appends a given string slice onto the end of this builder.
427    pub fn push_str(
428        &mut self, s: &str,
429    ) 
430    where Octets: OctetsBuilder, Octets::AppendError: Into<Infallible>  {
431        infallible(self.try_push_str(s))
432    }
433
434    /// Appends the given character to the end of the builder.
435    pub fn try_push(
436        &mut self, ch: char
437    ) -> Result<(), Octets::AppendError>
438    where Octets: OctetsBuilder {
439        let mut buf = [0u8; 4];
440        self.0.append_slice(ch.encode_utf8(&mut buf).as_bytes())
441    }
442
443    /// Appends the given character to the end of the builder.
444    pub fn push(&mut self, ch: char)
445    where Octets: OctetsBuilder, Octets::AppendError: Into<Infallible> {
446        infallible(self.try_push(ch))
447    }
448
449    /// Truncates the builder, keeping the first `new_len` octets.
450    ///
451    /// # Panics
452    ///
453    /// The method panics if `new_len` does not lie on a `char` boundary.
454    pub fn truncate(&mut self, new_len: usize)
455    where Octets: AsRef<[u8]> + Truncate {
456        if new_len < self.len() {
457            assert!(self.as_str().is_char_boundary(new_len));
458            self.0.truncate(new_len)
459        }
460    }
461
462    /// Clears the builder into an empty builder.
463    pub fn clear(&mut self)
464    where Octets: AsRef<[u8]> + Truncate {
465        self.truncate(0)
466    }
467
468    /// Removes the last character from the builder and returns it.
469    ///
470    /// Returns `None` if the builder is empty.
471    pub fn pop(&mut self) -> Option<char>
472    where Octets: AsRef<[u8]> + Truncate {
473        let ch = self.as_str().chars().next_back()?;
474        self.truncate(self.len() - ch.len_utf8());
475        Some(ch)
476    }
477}
478
479
480//-- Default
481
482impl<Octets: EmptyBuilder> Default for StrBuilder<Octets> {
483    fn default() -> Self {
484        Self::new()
485    }
486}
487
488
489//--- Deref, DerefMut, AsRef, AsMut, Borrow, BorrowMut
490
491impl<Octets: AsRef<[u8]>> ops::Deref for StrBuilder<Octets> {
492    type Target = str;
493
494    fn deref(&self) -> &Self::Target {
495        self.as_str()
496    }
497}
498
499impl<Octets: AsRef<[u8]> + AsMut<[u8]>> ops::DerefMut for StrBuilder<Octets> {
500    fn deref_mut(&mut self) -> &mut Self::Target {
501        self.as_str_mut()
502    }
503}
504
505impl<Octets: AsRef<[u8]>> AsRef<str> for StrBuilder<Octets>{
506    fn as_ref(&self) -> &str {
507        self.as_str()
508    }
509}
510
511impl<Octets: AsRef<[u8]>> AsRef<[u8]> for StrBuilder<Octets>{
512    fn as_ref(&self) -> &[u8] {
513        self.as_slice()
514    }
515}
516
517impl<Octets: AsMut<[u8]>> AsMut<str> for StrBuilder<Octets> {
518    fn as_mut(&mut self) -> &mut str {
519        self.as_str_mut()
520    }
521}
522
523impl<Octets: AsRef<[u8]>> borrow::Borrow<str> for StrBuilder<Octets>{
524    fn borrow(&self) -> &str {
525        self.as_str()
526    }
527}
528
529impl<Octets> borrow::BorrowMut<str> for StrBuilder<Octets> 
530where Octets: AsRef<[u8]> +  AsMut<[u8]> {
531    fn borrow_mut(&mut self) -> &mut str {
532        self.as_str_mut()
533    }
534}
535
536//--- Debug and Display
537
538impl<Octets: AsRef<[u8]>> fmt::Debug for StrBuilder<Octets> {
539    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
540        fmt::Debug::fmt(self.as_str(), f)
541    }
542}
543
544impl<Octets: AsRef<[u8]>> fmt::Display for StrBuilder<Octets> {
545    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
546        fmt::Display::fmt(self.as_str(), f)
547    }
548}
549
550//--- PartialEq and Eq
551
552impl<Octets, Other> PartialEq<Other> for StrBuilder<Octets>
553where
554    Octets: AsRef<[u8]>,
555    Other: AsRef<str>,
556{
557    fn eq(&self, other: &Other) -> bool {
558        self.as_str().eq(other.as_ref())
559    }
560}
561
562impl<Octets: AsRef<[u8]>> Eq for StrBuilder<Octets> { }
563
564//--- Hash
565
566impl<Octets: AsRef<[u8]>> hash::Hash for StrBuilder<Octets> {
567    fn hash<H: hash::Hasher>(&self, state: &mut H) {
568        self.as_str().hash(state)
569    }
570}
571
572//--- PartialOrd and Ord
573
574impl<Octets, Other> PartialOrd<Other> for StrBuilder<Octets>
575where
576    Octets: AsRef<[u8]>,
577    Other: AsRef<str>,
578{
579    fn partial_cmp(&self, other: &Other) -> Option<cmp::Ordering> {
580        self.as_str().partial_cmp(other.as_ref())
581    }
582}
583
584impl<Octets: AsRef<[u8]>> Ord for StrBuilder<Octets> {
585    fn cmp(&self, other: &Self) -> cmp::Ordering {
586        self.as_str().cmp(other.as_str())
587    }
588}
589
590
591//============ Error Types ===================================================
592
593//------------ FromUtf8Error -------------------------------------------------
594
595/// An error happened when converting octets into a string.
596#[derive(Clone, Copy, Eq, PartialEq)]
597pub struct FromUtf8Error<Octets> {
598    octets: Octets,
599    error: str::Utf8Error,
600}
601
602impl<Octets> FromUtf8Error<Octets> {
603    /// Returns an octets slice of the data that failed to convert.
604    pub fn as_slice(&self) -> &[u8]
605    where Octets: AsRef<[u8]> {
606        self.octets.as_ref()
607    }
608
609    /// Returns the octets sequence that failed to convert.
610    pub fn into_octets(self) -> Octets {
611        self.octets
612    }
613
614    /// Returns the reason for the conversion error.
615    pub fn utf8_error(&self) -> str::Utf8Error {
616        self.error
617    }
618}
619
620impl<Octets> fmt::Debug for FromUtf8Error<Octets> {
621    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
622        f.debug_struct("FromUtf8Error")
623            .field("error", &self.error)
624            .finish_non_exhaustive()
625    }
626}
627
628impl<Octets> fmt::Display for FromUtf8Error<Octets> {
629    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
630        fmt::Display::fmt(&self.error, f)
631    }
632}
633
634#[cfg(feature = "std")]
635impl<Octets> std::error::Error for FromUtf8Error<Octets> {}
636
637
638//============ Testing =======================================================
639
640#[cfg(test)]
641mod test {
642    use super::*;
643
644    // Most of the test cases herein have been borrowed from the test cases
645    // of the Rust standard library.
646
647    #[test]
648    #[cfg(feature = "std")]
649    fn from_utf8_lossy() {
650        fn check(src: impl AsRef<[u8]>) {
651            assert_eq!(
652                StrBuilder::from_utf8_lossy(std::vec::Vec::from(src.as_ref())),
653                std::string::String::from_utf8_lossy(src.as_ref())
654            );
655        }
656
657        check(b"hello");
658        check("ศไทย中华Việt Nam");
659        check(b"Hello\xC2 There\xFF Goodbye");
660        check(b"Hello\xC0\x80 There\xE6\x83 Goodbye");
661        check(b"\xF5foo\xF5\x80bar");
662        check(b"\xF1foo\xF1\x80bar\xF1\x80\x80baz");
663        check(b"\xF4foo\xF4\x80bar\xF4\xBFbaz");
664        check(b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar");
665        check(b"\xED\xA0\x80foo\xED\xBF\xBFbar");
666    }
667
668    #[test]
669    #[cfg(feature = "std")]
670    fn push_str() {
671        let mut s = StrBuilder::<std::vec::Vec<u8>>::new();
672        s.push_str("");
673        assert_eq!(&s[0..], "");
674        s.push_str("abc");
675        assert_eq!(&s[0..], "abc");
676        s.push_str("ประเทศไทย中华Việt Nam");
677        assert_eq!(&s[0..], "abcประเทศไทย中华Việt Nam");
678    }
679
680    #[test]
681    #[cfg(feature = "std")]
682    fn push() {
683        let mut data = StrBuilder::from_utf8(
684            std::vec::Vec::from("ประเทศไทย中".as_bytes())
685        ).unwrap();
686        data.push('华');
687        data.push('b'); // 1 byte
688        data.push('¢'); // 2 byte
689        data.push('€'); // 3 byte
690        data.push('𤭢'); // 4 byte
691        assert_eq!(data, "ประเทศไทย中华b¢€𤭢");
692    }
693
694    #[test]
695    #[cfg(feature = "std")]
696    fn pop() {
697        let mut data = StrBuilder::from_utf8(
698            std::vec::Vec::from("ประเทศไทย中华b¢€𤭢".as_bytes())
699        ).unwrap();
700        assert_eq!(data.pop().unwrap(), '𤭢'); // 4 bytes
701        assert_eq!(data.pop().unwrap(), '€'); // 3 bytes
702        assert_eq!(data.pop().unwrap(), '¢'); // 2 bytes
703        assert_eq!(data.pop().unwrap(), 'b'); // 1 bytes
704        assert_eq!(data.pop().unwrap(), '华');
705        assert_eq!(data, "ประเทศไทย中");
706    }
707}
708