octseq/
str.rs

1//! Strings atop octet sequences.
2//!
3//! This module provides the type `Str<Octets>` that guarantees the same
4//! invariants – namely that the content is an UTF-8 encoded string – as
5//! the standard library’s `str` and `String` types but atop a generic
6//! octet sequence.
7
8use core::{borrow, cmp, fmt, hash, ops, str};
9use core::convert::Infallible;
10use crate::builder::{
11    EmptyBuilder, FreezeBuilder, OctetsBuilder, Truncate, infallible
12};
13
14
15//------------ Str -----------------------------------------------------------
16
17/// A fixed length UTF-8 encoded string atop an octet sequence.
18#[derive(Clone, Default)]
19pub struct Str<Octets: ?Sized>(Octets);
20
21impl<Octets> Str<Octets> {
22    /// Converts a sequence of octets into a string.
23    pub fn from_utf8(octets: Octets) -> Result<Self, FromUtf8Error<Octets>>
24    where Octets: AsRef<[u8]> {
25        if let Err(error) = str::from_utf8(octets.as_ref()) {
26            Err(FromUtf8Error { octets, error })
27        }
28        else {
29            Ok(Self(octets))
30        }
31    }
32
33    /// Converts a sequence of octets into a string without checking.
34    ///
35    /// # Safety
36    ///
37    /// The caller must make sure that the contents of `octets` is a
38    /// correctly encoded UTF-8 string.
39    pub unsafe fn from_utf8_unchecked(octets: Octets) -> Self {
40        Self(octets)
41    }
42}
43
44impl Str<[u8]> {
45    /// Creates a string value from a UTF-8 slice.
46    pub fn from_utf8_slice(
47        slice: &[u8]
48    ) -> Result<&Self, FromUtf8Error<&[u8]>> {
49        match str::from_utf8(slice) {
50            Ok(s) => Ok(Self::from_str(s)),
51            Err(error) => Err(FromUtf8Error { octets: slice, error })
52        }
53    }
54
55    /// Creates a string value from a string slice.
56    #[allow(clippy::should_implement_trait)]
57    pub fn from_str(s: &str) -> &Self {
58        unsafe { &*(s as *const str as *const Self) }
59    }
60}
61
62#[cfg(feature = "std")]
63impl Str<std::vec::Vec<u8>> {
64    pub fn from_string(s: std::string::String) -> Self {
65        unsafe { Self::from_utf8_unchecked(s.into_bytes()) }
66    }
67}
68
69impl<Octets> Str<Octets> {
70    /// Converts the string into its raw octets.
71    pub fn into_octets(self) -> Octets {
72        self.0
73    }
74}
75
76impl<Octets: ?Sized> Str<Octets> {
77    /// Returns the string as a string slice.
78    pub fn as_str(&self) -> &str
79    where Octets: AsRef<[u8]> {
80        unsafe { str::from_utf8_unchecked(self.0.as_ref()) }
81    }
82
83    /// Returns the string as a mutable string slice.
84    pub fn as_str_mut(&mut self) -> &mut str
85    where Octets: AsMut<[u8]> {
86        unsafe { str::from_utf8_unchecked_mut(self.0.as_mut()) }
87    }
88
89    /// Returns a reference to the underlying octets sequence.
90    pub fn as_octets(&self) -> &Octets {
91        &self.0
92    }
93
94    /// Returns a mutable reference to the underlying octets sequence.
95    ///
96    /// # Safety
97    ///
98    /// The caller must ensure that the content of the octets sequence is
99    /// valid UTF-8 before the borrow ends.
100    pub unsafe fn as_octets_mut(&mut self) -> &mut Octets {
101        &mut self.0
102    }
103
104    /// Returns the string’s octets as a slice.
105    pub fn as_slice(&self) -> &[u8]
106    where Octets: AsRef<[u8]> {
107        self.0.as_ref()
108    }
109
110    /// Returns a mutable slice of the string’s octets.
111    ///
112    /// # Safety
113    ///
114    /// The caller must ensure that the content of the slice is
115    /// valid UTF-8 before the borrow ends.
116    pub unsafe fn as_slice_mut(&mut self) -> &mut [u8]
117    where Octets: AsMut<[u8]> {
118        self.0.as_mut()
119    }
120
121    /// Returns the length of the string in octets.
122    pub fn len(&self) -> usize
123    where Octets: AsRef<[u8]> {
124        self.0.as_ref().len()
125    }
126
127    /// Returns whether the string is empty.
128    pub fn is_empty(&self) -> bool
129    where Octets: AsRef<[u8]> {
130        self.0.as_ref().is_empty()
131    }
132}
133
134
135//--- Deref, DerefMut, AsRef, AsMut, Borrow, BorrowMut
136
137impl<Octets: AsRef<[u8]> + ?Sized> ops::Deref for Str<Octets> {
138    type Target = str;
139
140    fn deref(&self) -> &Self::Target {
141        self.as_str()
142    }
143}
144
145impl<Octets> ops::DerefMut for Str<Octets>
146where Octets: AsRef<[u8]> + AsMut<[u8]> + ?Sized {
147    fn deref_mut(&mut self) -> &mut Self::Target {
148        self.as_str_mut()
149    }
150}
151
152impl<Octets: AsRef<[u8]> + ?Sized> AsRef<str> for Str<Octets>{
153    fn as_ref(&self) -> &str {
154        self.as_str()
155    }
156}
157
158impl<Octets: AsRef<[u8]> + ?Sized> AsRef<[u8]> for Str<Octets>{
159    fn as_ref(&self) -> &[u8] {
160        self.as_slice()
161    }
162}
163
164impl<Octets: AsMut<[u8]> + ?Sized> AsMut<str> for Str<Octets> {
165    fn as_mut(&mut self) -> &mut str {
166        self.as_str_mut()
167    }
168}
169
170impl<Octets: AsRef<[u8]> + ?Sized> borrow::Borrow<str> for Str<Octets>{
171    fn borrow(&self) -> &str {
172        self.as_str()
173    }
174}
175
176impl<Octets: AsRef<[u8]> + ?Sized> borrow::Borrow<[u8]> for Str<Octets>{
177    fn borrow(&self) -> &[u8] {
178        self.as_slice()
179    }
180}
181
182impl<Octets> borrow::BorrowMut<str> for Str<Octets> 
183where Octets: AsRef<[u8]> +  AsMut<[u8]> + ?Sized {
184    fn borrow_mut(&mut self) -> &mut str {
185        self.as_str_mut()
186    }
187}
188
189//--- Debug and Display
190
191impl<Octets: AsRef<[u8]> + ?Sized> fmt::Debug for Str<Octets> {
192    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
193        fmt::Debug::fmt(self.as_str(), f)
194    }
195}
196
197impl<Octets: AsRef<[u8]> + ?Sized> fmt::Display for Str<Octets> {
198    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
199        fmt::Display::fmt(self.as_str(), f)
200    }
201}
202
203//--- PartialEq and Eq
204
205impl<Octets, Other> PartialEq<Other> for Str<Octets>
206where
207    Octets: AsRef<[u8]> + ?Sized,
208    Other: AsRef<str> + ?Sized,
209{
210    fn eq(&self, other: &Other) -> bool {
211        self.as_str().eq(other.as_ref())
212    }
213}
214
215impl<Octets: AsRef<[u8]> + ?Sized> Eq for Str<Octets> { }
216
217//--- Hash
218
219impl<Octets: AsRef<[u8]> + ?Sized> hash::Hash for Str<Octets> {
220    fn hash<H: hash::Hasher>(&self, state: &mut H) {
221        self.as_str().hash(state)
222    }
223}
224
225//--- PartialOrd and Ord
226
227impl<Octets, Other> PartialOrd<Other> for Str<Octets>
228where
229    Octets: AsRef<[u8]> + ?Sized,
230    Other: AsRef<str> + ?Sized,
231{
232    fn partial_cmp(&self, other: &Other) -> Option<cmp::Ordering> {
233        self.as_str().partial_cmp(other.as_ref())
234    }
235}
236
237impl<Octets: AsRef<[u8]> + ?Sized> Ord for Str<Octets> {
238    fn cmp(&self, other: &Self) -> cmp::Ordering {
239        self.as_str().cmp(other.as_str())
240    }
241}
242
243
244//------------ StrBuilder ----------------------------------------------------
245
246/// A growable, UTF-8 encoded string atop an octets builder.
247pub struct StrBuilder<Octets>(Octets);
248
249impl<Octets> StrBuilder<Octets> {
250    /// Creates a new, empty string builder.
251    pub fn new() -> Self
252    where Octets: EmptyBuilder {
253        StrBuilder(Octets::empty())
254    }
255
256    /// Creates a new, empty string builder with a given minimum capacity.
257    pub fn with_capacity(capacity: usize) -> Self
258    where Octets: EmptyBuilder {
259        StrBuilder(Octets::with_capacity(capacity))
260    }
261
262    /// Creates a new string builder from an octets builder.
263    ///
264    /// The function expects the contents of the octets builder to contain
265    /// a sequence of UTF-8 encoded characters.
266    pub fn from_utf8(octets: Octets) -> Result<Self, FromUtf8Error<Octets>>
267    where Octets: AsRef<[u8]> {
268        if let Err(error) = str::from_utf8(octets.as_ref()) {
269            Err(FromUtf8Error { octets, error })
270        }
271        else {
272            Ok(Self(octets))
273        }
274    }
275
276    /// Converts on octets builder into a string builder.
277    ///
278    /// If the octets builder contains invalid octets, they are replaced with
279    /// `U+FFFD REPLACEMENT CHARACTER`.
280    ///
281    /// If the content is UTF-8 encoded, it will remain unchanged. Otherwise,
282    /// a new builder is created and the passed builder is dropped.
283    pub fn try_from_utf8_lossy(
284        octets: Octets
285    ) -> Result<Self, Octets::AppendError>
286    where Octets: AsRef<[u8]> + OctetsBuilder + EmptyBuilder {
287        const REPLACEMENT_CHAR: &[u8] = &[239, 191, 189];
288
289        let mut err = match str::from_utf8(octets.as_ref()) {
290            Ok(_) => return Ok(Self(octets)),
291            Err(err) => err,
292        };
293        let mut octets = octets.as_ref();
294        let mut res = Octets::with_capacity(octets.len());
295        while !octets.is_empty() {
296            if err.valid_up_to() > 0 {
297                res.append_slice(&octets[..err.valid_up_to()])?;
298            }
299            res.append_slice(REPLACEMENT_CHAR)?;
300            octets = match err.error_len() {
301                Some(len) => &octets[err.valid_up_to() + len ..],
302                None => b""
303            };
304            err = match str::from_utf8(octets) {
305                Ok(_) => {
306                    res.append_slice(octets)?;
307                    break;
308                }
309                Err(err) => err,
310            };
311        }
312        Ok(Self(res))
313    }
314
315    pub fn from_utf8_lossy(octets: Octets) -> Self
316    where
317        Octets: AsRef<[u8]> + OctetsBuilder + EmptyBuilder,
318        Octets::AppendError: Into<Infallible>
319    {
320        infallible(Self::try_from_utf8_lossy(octets))
321    }
322
323    /// Converts an octets builder into a string builder without checking.
324    ///
325    /// For the safe versions, see [from_utf8][Self::from_utf8],
326    /// [try_from_utf8_lossy][Self::try_from_utf8_lossy] and
327    /// [from_utf8_lossy][Self::from_utf8_lossy].
328    ///
329    /// # Safety
330    ///
331    /// The caller must ensure that `octets` contains data that is a correctly
332    /// UTF-8 encoded string. It may be empty.
333    pub unsafe fn from_utf8_unchecked(octets: Octets) -> Self {
334        Self(octets)
335    }
336
337    /// Converts the string builder into the underlying octets builder.
338    pub fn into_octets_builder(self) -> Octets {
339        self.0
340    }
341
342    /// Converts the string builder into the final str.
343    pub fn freeze(self) -> Str<Octets::Octets>
344    where Octets: FreezeBuilder {
345        Str(self.0.freeze())
346    }
347
348    /// Returns a slice of the already assembled string.
349    pub fn as_str(&self) -> &str
350    where Octets: AsRef<[u8]> {
351        unsafe { str::from_utf8_unchecked(self.0.as_ref()) }
352    }
353
354    /// Returns a mutable slice of the already assembled string.
355    pub fn as_str_mut(&mut self) -> &mut str
356    where Octets: AsMut<[u8]> {
357        unsafe { str::from_utf8_unchecked_mut(self.0.as_mut()) }
358    }
359
360    /// Returns the string’s octets as a slice.
361    pub fn as_slice(&self) -> &[u8]
362    where Octets: AsRef<[u8]> {
363        self.0.as_ref()
364    }
365
366    /// Returns the length of the string in octets.
367    pub fn len(&self) -> usize
368    where Octets: AsRef<[u8]> {
369        self.0.as_ref().len()
370    }
371
372    /// Returns whether the string is empty.
373    pub fn is_empty(&self) -> bool
374    where Octets: AsRef<[u8]> {
375        self.0.as_ref().is_empty()
376    }
377
378    /// Appends a given string slice onto the end of this builder.
379    pub fn try_push_str(
380        &mut self, s: &str,
381    ) -> Result<(), Octets::AppendError>
382    where Octets: OctetsBuilder {
383        self.0.append_slice(s.as_bytes())
384    }
385
386    /// Appends a given string slice onto the end of this builder.
387    pub fn push_str(
388        &mut self, s: &str,
389    ) 
390    where Octets: OctetsBuilder, Octets::AppendError: Into<Infallible>  {
391        infallible(self.try_push_str(s))
392    }
393
394    /// Appends the given character to the end of the builder.
395    pub fn try_push(
396        &mut self, ch: char
397    ) -> Result<(), Octets::AppendError>
398    where Octets: OctetsBuilder {
399        let mut buf = [0u8; 4];
400        self.0.append_slice(ch.encode_utf8(&mut buf).as_bytes())
401    }
402
403    /// Appends the given character to the end of the builder.
404    pub fn push(&mut self, ch: char)
405    where Octets: OctetsBuilder, Octets::AppendError: Into<Infallible> {
406        infallible(self.try_push(ch))
407    }
408
409    /// Truncates the builder, keeping the first `new_len` octets.
410    ///
411    /// # Panics
412    ///
413    /// The method panics if `new_len` does not lie on a `char` boundary.
414    pub fn truncate(&mut self, new_len: usize)
415    where Octets: AsRef<[u8]> + Truncate {
416        if new_len < self.len() {
417            assert!(self.as_str().is_char_boundary(new_len));
418            self.0.truncate(new_len)
419        }
420    }
421
422    /// Clears the builder into an empty builder.
423    pub fn clear(&mut self)
424    where Octets: AsRef<[u8]> + Truncate {
425        self.truncate(0)
426    }
427
428    /// Removes the last character from the builder and returns it.
429    ///
430    /// Returns `None` if the builder is empty.
431    pub fn pop(&mut self) -> Option<char>
432    where Octets: AsRef<[u8]> + Truncate {
433        let ch = self.as_str().chars().rev().next()?;
434        self.truncate(self.len() - ch.len_utf8());
435        Some(ch)
436    }
437}
438
439
440//-- Default
441
442impl<Octets: EmptyBuilder> Default for StrBuilder<Octets> {
443    fn default() -> Self {
444        Self::new()
445    }
446}
447
448
449//--- Deref, DerefMut, AsRef, AsMut, Borrow, BorrowMut
450
451impl<Octets: AsRef<[u8]>> ops::Deref for StrBuilder<Octets> {
452    type Target = str;
453
454    fn deref(&self) -> &Self::Target {
455        self.as_str()
456    }
457}
458
459impl<Octets: AsRef<[u8]> + AsMut<[u8]>> ops::DerefMut for StrBuilder<Octets> {
460    fn deref_mut(&mut self) -> &mut Self::Target {
461        self.as_str_mut()
462    }
463}
464
465impl<Octets: AsRef<[u8]>> AsRef<str> for StrBuilder<Octets>{
466    fn as_ref(&self) -> &str {
467        self.as_str()
468    }
469}
470
471impl<Octets: AsRef<[u8]>> AsRef<[u8]> for StrBuilder<Octets>{
472    fn as_ref(&self) -> &[u8] {
473        self.as_slice()
474    }
475}
476
477impl<Octets: AsMut<[u8]>> AsMut<str> for StrBuilder<Octets> {
478    fn as_mut(&mut self) -> &mut str {
479        self.as_str_mut()
480    }
481}
482
483impl<Octets: AsRef<[u8]>> borrow::Borrow<str> for StrBuilder<Octets>{
484    fn borrow(&self) -> &str {
485        self.as_str()
486    }
487}
488
489impl<Octets: AsRef<[u8]>> borrow::Borrow<[u8]> for StrBuilder<Octets>{
490    fn borrow(&self) -> &[u8] {
491        self.as_slice()
492    }
493}
494
495impl<Octets> borrow::BorrowMut<str> for StrBuilder<Octets> 
496where Octets: AsRef<[u8]> +  AsMut<[u8]> {
497    fn borrow_mut(&mut self) -> &mut str {
498        self.as_str_mut()
499    }
500}
501
502//--- Debug and Display
503
504impl<Octets: AsRef<[u8]>> fmt::Debug for StrBuilder<Octets> {
505    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
506        fmt::Debug::fmt(self.as_str(), f)
507    }
508}
509
510impl<Octets: AsRef<[u8]>> fmt::Display for StrBuilder<Octets> {
511    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
512        fmt::Display::fmt(self.as_str(), f)
513    }
514}
515
516//--- PartialEq and Eq
517
518impl<Octets, Other> PartialEq<Other> for StrBuilder<Octets>
519where
520    Octets: AsRef<[u8]>,
521    Other: AsRef<str>,
522{
523    fn eq(&self, other: &Other) -> bool {
524        self.as_str().eq(other.as_ref())
525    }
526}
527
528impl<Octets: AsRef<[u8]>> Eq for StrBuilder<Octets> { }
529
530//--- Hash
531
532impl<Octets: AsRef<[u8]>> hash::Hash for StrBuilder<Octets> {
533    fn hash<H: hash::Hasher>(&self, state: &mut H) {
534        self.as_str().hash(state)
535    }
536}
537
538//--- PartialOrd and Ord
539
540impl<Octets, Other> PartialOrd<Other> for StrBuilder<Octets>
541where
542    Octets: AsRef<[u8]>,
543    Other: AsRef<str>,
544{
545    fn partial_cmp(&self, other: &Other) -> Option<cmp::Ordering> {
546        self.as_str().partial_cmp(other.as_ref())
547    }
548}
549
550impl<Octets: AsRef<[u8]>> Ord for StrBuilder<Octets> {
551    fn cmp(&self, other: &Self) -> cmp::Ordering {
552        self.as_str().cmp(other.as_str())
553    }
554}
555
556
557//============ Error Types ===================================================
558
559//------------ FromUtf8Error -------------------------------------------------
560
561/// An error happened when converting octets into a string.
562#[derive(Clone, Copy, Eq, PartialEq)]
563pub struct FromUtf8Error<Octets> {
564    octets: Octets,
565    error: str::Utf8Error,
566}
567
568impl<Octets> FromUtf8Error<Octets> {
569    /// Returns an octets slice of the data that failed to convert.
570    pub fn as_slice(&self) -> &[u8]
571    where Octets: AsRef<[u8]> {
572        self.octets.as_ref()
573    }
574
575    /// Returns the octets sequence that failed to convert.
576    pub fn into_octets(self) -> Octets {
577        self.octets
578    }
579
580    /// Returns the reason for the conversion error.
581    pub fn utf8_error(&self) -> str::Utf8Error {
582        self.error
583    }
584}
585
586impl<Octets> fmt::Debug for FromUtf8Error<Octets> {
587    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
588        f.debug_struct("FromUtf8Error")
589            .field("error", &self.error)
590            .finish_non_exhaustive()
591    }
592}
593
594impl<Octets> fmt::Display for FromUtf8Error<Octets> {
595    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
596        fmt::Display::fmt(&self.error, f)
597    }
598}
599
600#[cfg(feature = "std")]
601impl<Octets> std::error::Error for FromUtf8Error<Octets> {}
602
603
604//============ Testing =======================================================
605
606#[cfg(test)]
607mod test {
608    use super::*;
609
610    // Most of the test cases herein have been borrowed from the test cases
611    // of the Rust standard library.
612
613    #[test]
614    #[cfg(feature = "std")]
615    fn from_utf8_lossy() {
616        fn check(src: impl AsRef<[u8]>) {
617            assert_eq!(
618                StrBuilder::from_utf8_lossy(std::vec::Vec::from(src.as_ref())),
619                std::string::String::from_utf8_lossy(src.as_ref())
620            );
621        }
622
623        check(b"hello");
624        check("ศไทย中华Việt Nam");
625        check(b"Hello\xC2 There\xFF Goodbye");
626        check(b"Hello\xC0\x80 There\xE6\x83 Goodbye");
627        check(b"\xF5foo\xF5\x80bar");
628        check(b"\xF1foo\xF1\x80bar\xF1\x80\x80baz");
629        check(b"\xF4foo\xF4\x80bar\xF4\xBFbaz");
630        check(b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar");
631        check(b"\xED\xA0\x80foo\xED\xBF\xBFbar");
632    }
633
634    #[test]
635    #[cfg(feature = "std")]
636    fn push_str() {
637        let mut s = StrBuilder::<std::vec::Vec<u8>>::new();
638        s.push_str("");
639        assert_eq!(&s[0..], "");
640        s.push_str("abc");
641        assert_eq!(&s[0..], "abc");
642        s.push_str("ประเทศไทย中华Việt Nam");
643        assert_eq!(&s[0..], "abcประเทศไทย中华Việt Nam");
644    }
645
646    #[test]
647    #[cfg(feature = "std")]
648    fn push() {
649        let mut data = StrBuilder::from_utf8(
650            std::vec::Vec::from("ประเทศไทย中".as_bytes())
651        ).unwrap();
652        data.push('华');
653        data.push('b'); // 1 byte
654        data.push('¢'); // 2 byte
655        data.push('€'); // 3 byte
656        data.push('𤭢'); // 4 byte
657        assert_eq!(data, "ประเทศไทย中华b¢€𤭢");
658    }
659
660    #[test]
661    #[cfg(feature = "std")]
662    fn pop() {
663        let mut data = StrBuilder::from_utf8(
664            std::vec::Vec::from("ประเทศไทย中华b¢€𤭢".as_bytes())
665        ).unwrap();
666        assert_eq!(data.pop().unwrap(), '𤭢'); // 4 bytes
667        assert_eq!(data.pop().unwrap(), '€'); // 3 bytes
668        assert_eq!(data.pop().unwrap(), '¢'); // 2 bytes
669        assert_eq!(data.pop().unwrap(), 'b'); // 1 bytes
670        assert_eq!(data.pop().unwrap(), '华');
671        assert_eq!(data, "ประเทศไทย中");
672    }
673}
674