unicase/
lib.rs

1#![cfg_attr(test, deny(missing_docs))]
2#![cfg_attr(test, deny(warnings))]
3#![doc(html_root_url = "https://docs.rs/unicase/2.6.0")]
4#![cfg_attr(feature = "nightly", feature(test))]
5#![cfg_attr(
6    all(
7        __unicase__core_and_alloc,
8        not(test),
9    ),
10    no_std,
11)]
12
13//! # UniCase
14//!
15//! UniCase provides a way of specifying strings that are case-insensitive.
16//!
17//! UniCase supports full [Unicode case
18//! folding](https://www.w3.org/International/wiki/Case_folding). It can also
19//! utilize faster ASCII case comparisons, if both strings are ASCII.
20//!
21//! Using the `UniCase::new()` constructor will check the string to see if it
22//! is all ASCII. When a `UniCase` is compared against another, if both are
23//! ASCII, it will use the faster comparison.
24//!
25//! There also exists the `Ascii` type in this crate, which will always assume
26//! to use the ASCII case comparisons, if the encoding is already known.
27//!
28//! ## Example
29//!
30//! ```rust
31//! use unicase::UniCase;
32//!
33//! let a = UniCase::new("Maße");
34//! let b = UniCase::new("MASSE");
35//! let c = UniCase::new("mase");
36//!
37//! assert_eq!(a, b);
38//! assert!(b != c);
39//! ```
40//!
41//! ## Ascii
42//!
43//! ```rust
44//! use unicase::Ascii;
45//!
46//! let a = Ascii::new("foobar");
47//! let b = Ascii::new("FoObAr");
48//!
49//! assert_eq!(a, b);
50//! ```
51
52#[cfg(feature = "nightly")]
53extern crate test;
54
55#[cfg(all(__unicase__core_and_alloc, not(test)))]
56extern crate alloc;
57#[cfg(all(__unicase__core_and_alloc, not(test)))]
58use alloc::string::String;
59
60#[cfg(not(all(__unicase__core_and_alloc, not(test))))]
61extern crate std as alloc;
62#[cfg(not(all(__unicase__core_and_alloc, not(test))))]
63extern crate std as core;
64
65use alloc::borrow::Cow;
66#[cfg(__unicase__iter_cmp)]
67use core::cmp::Ordering;
68use core::fmt;
69use core::hash::{Hash, Hasher};
70use core::ops::{Deref, DerefMut};
71use core::str::FromStr;
72
73use self::unicode::Unicode;
74
75mod ascii;
76mod unicode;
77
78/// Case Insensitive wrapper of strings.
79#[derive(Clone, Copy)]
80pub struct UniCase<S>(Encoding<S>);
81
82/// Case Insensitive wrapper of Ascii strings.
83#[derive(Clone, Copy, Debug, Default)]
84pub struct Ascii<S>(S);
85
86/// Compare two string-like types for case-less equality, using unicode folding.
87///
88/// Equivalent to `UniCase::new(left) == UniCase::new(right)`.
89///
90/// Note: This will perform a scan for ASCII characters before doing the
91/// the comparison. See `UniCase` for more information.
92#[inline]
93pub fn eq<S: AsRef<str> + ?Sized>(left: &S, right: &S) -> bool {
94    UniCase::new(left) == UniCase::new(right)
95}
96
97/// Compare two string-like types for case-less equality, ignoring ASCII case.
98///
99/// Equivalent to `Ascii::new(left) == Ascii::new(right)`.
100#[inline]
101pub fn eq_ascii<S: AsRef<str> + ?Sized>(left: &S, right: &S) -> bool {
102    Ascii(left) == Ascii(right)
103}
104
105#[derive(Clone, Copy, Debug)]
106enum Encoding<S> {
107    Ascii(Ascii<S>),
108    Unicode(Unicode<S>),
109}
110
111macro_rules! inner {
112
113    (mut $e:expr) => ({
114        match &mut $e {
115            &mut Encoding::Ascii(ref mut s) => &mut s.0,
116            &mut Encoding::Unicode(ref mut s) => &mut s.0,
117        }
118    });
119    ($e:expr) => ({
120        match &$e {
121            &Encoding::Ascii(ref s) => &s.0,
122            &Encoding::Unicode(ref s) => &s.0,
123        }
124    });
125}
126
127impl<S: AsRef<str> + Default> Default for UniCase<S> {
128    fn default() -> Self {
129        Self::new(Default::default())
130    }
131}
132
133impl<S: AsRef<str>> UniCase<S> {
134    /// Creates a new `UniCase`.
135    ///
136    /// Note: This scans the text to determine if it is all ASCII or not.
137    pub fn new(s: S) -> UniCase<S> {
138        #[cfg(not(__unicase__core_and_alloc))]
139        #[allow(deprecated, unused)]
140        use std::ascii::AsciiExt;
141
142        if s.as_ref().is_ascii() {
143            UniCase(Encoding::Ascii(Ascii(s)))
144        } else {
145            UniCase(Encoding::Unicode(Unicode(s)))
146        }
147    }
148}
149
150impl<S> UniCase<S> {
151    /// Creates a new `UniCase`, skipping the ASCII check.
152    #[cfg(__unicase__const_fns)]
153    pub const fn unicode(s: S) -> UniCase<S> {
154        UniCase(Encoding::Unicode(Unicode(s)))
155    }
156
157    /// Creates a new `UniCase`, skipping the ASCII check.
158    ///
159    /// For Rust versions >= 1.31, this is a `const fn`.
160    #[cfg(not(__unicase__const_fns))]
161    pub fn unicode(s: S) -> UniCase<S> {
162        UniCase(Encoding::Unicode(Unicode(s)))
163    }
164
165    /// Creates a new `UniCase` which performs only ASCII case folding.
166    #[cfg(__unicase__const_fns)]
167    pub const fn ascii(s: S) -> UniCase<S> {
168        UniCase(Encoding::Ascii(Ascii(s)))
169    }
170
171    /// Creates a new `UniCase` which performs only ASCII case folding.
172    ///
173    /// For Rust versions >= 1.31, this is a `const fn`.
174    #[cfg(not(__unicase__const_fns))]
175    pub fn ascii(s: S) -> UniCase<S> {
176        UniCase(Encoding::Ascii(Ascii(s)))
177    }
178
179    /// Return `true` if this instance will only perform ASCII case folding.
180    pub fn is_ascii(&self) -> bool {
181        match self.0 {
182            Encoding::Ascii(_) => true,
183            Encoding::Unicode(_) => false,
184        }
185    }
186
187    /// Unwraps the inner value held by this `UniCase`.
188    #[inline]
189    pub fn into_inner(self) -> S {
190        match self.0 {
191            Encoding::Ascii(s) => s.0,
192            Encoding::Unicode(s) => s.0,
193        }
194    }
195}
196
197impl<S> Deref for UniCase<S> {
198    type Target = S;
199    #[inline]
200    fn deref<'a>(&'a self) -> &'a S {
201        inner!(self.0)
202    }
203}
204
205impl<S> DerefMut for UniCase<S> {
206    #[inline]
207    fn deref_mut<'a>(&'a mut self) -> &'a mut S {
208        inner!(mut self.0)
209    }
210}
211
212impl<S: AsRef<str>> AsRef<str> for UniCase<S> {
213    #[inline]
214    fn as_ref(&self) -> &str {
215        inner!(self.0).as_ref()
216    }
217
218}
219
220impl<S: fmt::Debug> fmt::Debug for UniCase<S> {
221    #[inline]
222    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
223        fmt::Debug::fmt(inner!(self.0), fmt)
224    }
225}
226
227impl<S: fmt::Display> fmt::Display for UniCase<S> {
228    #[inline]
229    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
230        fmt::Display::fmt(inner!(self.0), fmt)
231    }
232}
233
234
235impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<UniCase<S2>> for UniCase<S1> {
236    #[inline]
237    fn eq(&self, other: &UniCase<S2>) -> bool {
238        match (&self.0, &other.0) {
239            (&Encoding::Ascii(ref x), &Encoding::Ascii(ref y)) => x == y,
240            (&Encoding::Unicode(ref x), &Encoding::Unicode(ref y)) => x == y,
241            (&Encoding::Ascii(ref x), &Encoding::Unicode(ref y)) => &Unicode(x.as_ref()) == y,
242            (&Encoding::Unicode(ref x), &Encoding::Ascii(ref y)) => x == &Unicode(y.as_ref()),
243        }
244    }
245}
246
247impl<S: AsRef<str>> Eq for UniCase<S> {}
248
249impl<S: AsRef<str>> Hash for UniCase<S> {
250    #[inline]
251    fn hash<H: Hasher>(&self, hasher: &mut H) {
252        match self.0 {
253            Encoding::Ascii(ref s) => s.hash(hasher),
254            Encoding::Unicode(ref s) => s.hash(hasher)
255        }
256    }
257}
258
259impl<S> From<Ascii<S>> for UniCase<S> {
260    fn from(ascii: Ascii<S>) -> Self {
261        UniCase(Encoding::Ascii(ascii))
262    }
263}
264
265macro_rules! from_impl {
266    ($from:ty => $to:ty; $by:ident) => (
267        impl<'a> From<$from> for UniCase<$to> {
268            fn from(s: $from) -> Self {
269                UniCase::unicode(s.$by())
270            }
271        }
272    );
273    ($from:ty => $to:ty) => ( from_impl!($from => $to; into); )
274}
275
276macro_rules! into_impl {
277    ($to:ty) => (
278        impl<'a> Into<$to> for UniCase<$to> {
279            fn into(self) -> $to {
280                self.into_inner()
281            }
282        }
283    );
284}
285
286impl<S: AsRef<str>> From<S> for UniCase<S> {
287    fn from(s: S) -> Self {
288        UniCase::unicode(s)
289    }
290}
291
292from_impl!(&'a str => Cow<'a, str>);
293from_impl!(String => Cow<'a, str>);
294from_impl!(&'a str => String);
295from_impl!(Cow<'a, str> => String; into_owned);
296from_impl!(&'a String => &'a str; as_ref);
297
298into_impl!(&'a str);
299into_impl!(String);
300into_impl!(Cow<'a, str>);
301
302#[cfg(__unicase__iter_cmp)]
303impl<T: AsRef<str>> PartialOrd for UniCase<T> {
304    #[inline]
305    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
306        Some(self.cmp(other))
307    }
308}
309
310#[cfg(__unicase__iter_cmp)]
311impl<T: AsRef<str>> Ord for UniCase<T> {
312    #[inline]
313    fn cmp(&self, other: &Self) -> Ordering {
314        match (&self.0, &other.0) {
315            (&Encoding::Ascii(ref x), &Encoding::Ascii(ref y)) => x.cmp(y),
316            (&Encoding::Unicode(ref x), &Encoding::Unicode(ref y)) => x.cmp(y),
317            (&Encoding::Ascii(ref x), &Encoding::Unicode(ref y)) => Unicode(x.as_ref()).cmp(&Unicode(y.0.as_ref())),
318            (&Encoding::Unicode(ref x), &Encoding::Ascii(ref y)) => Unicode(x.0.as_ref()).cmp(&Unicode(y.as_ref())),
319        }
320    }
321}
322
323
324
325impl<S: FromStr + AsRef<str>> FromStr for UniCase<S> {
326    type Err = <S as FromStr>::Err;
327    fn from_str(s: &str) -> Result<UniCase<S>, Self::Err> {
328        s.parse().map(UniCase::new)
329    }
330}
331
332#[cfg(test)]
333mod tests {
334    use super::UniCase;
335    use std::hash::{Hash, Hasher};
336    #[cfg(not(__unicase__default_hasher))]
337    use std::hash::SipHasher as DefaultHasher;
338    #[cfg(__unicase__default_hasher)]
339    use std::collections::hash_map::DefaultHasher;
340
341    fn hash<T: Hash>(t: &T) -> u64 {
342        let mut s = DefaultHasher::new();
343        t.hash(&mut s);
344        s.finish()
345    }
346
347    #[test]
348    fn test_copy_for_refs() {
349        fn foo<T>(_: UniCase<T>) {}
350
351        let a = UniCase::new("foobar");
352        foo(a);
353        foo(a);
354    }
355
356    #[test]
357    fn test_eq_ascii() {
358        let a = UniCase::new("foobar");
359        let b = UniCase::new("FOOBAR");
360        let c = UniCase::ascii("FoObAr");
361
362        assert_eq!(a, b);
363        assert_eq!(b, a);
364        assert_eq!(a, c);
365        assert_eq!(c, a);
366        assert_eq!(hash(&a), hash(&b));
367        assert_eq!(hash(&a), hash(&c));
368        assert!(a.is_ascii());
369        assert!(b.is_ascii());
370        assert!(c.is_ascii());
371    }
372
373
374    #[test]
375    fn test_eq_unicode() {
376        let a = UniCase::new("στιγμας");
377        let b = UniCase::new("στιγμασ");
378        assert_eq!(a, b);
379        assert_eq!(b, a);
380        assert_eq!(hash(&a), hash(&b));
381    }
382
383    #[test]
384    fn test_eq_unicode_left_is_substring() {
385        // https://github.com/seanmonstar/unicase/issues/38
386        let a = UniCase::unicode("foo");
387        let b = UniCase::unicode("foobar");
388
389        assert!(a != b);
390        assert!(b != a);
391    }
392
393    #[cfg(feature = "nightly")]
394    #[bench]
395    fn bench_unicase_ascii(b: &mut ::test::Bencher) {
396        b.bytes = b"foobar".len() as u64;
397        let x = UniCase::new("foobar");
398        let y = UniCase::new("FOOBAR");
399        b.iter(|| assert_eq!(x, y));
400    }
401
402    #[cfg(feature = "nightly")]
403    static SUBJECT: &'static [u8] = b"ffoo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz oo bar baz quux herp derp";
404
405    #[cfg(feature = "nightly")]
406    #[inline(never)]
407    fn is_ascii(bytes: &[u8]) -> bool {
408        #[allow(unused, deprecated)]
409        use std::ascii::AsciiExt;
410        bytes.is_ascii()
411    }
412
413    #[cfg(feature = "nightly")]
414    #[bench]
415    fn bench_is_ascii(b: &mut ::test::Bencher) {
416        b.iter(|| assert!(is_ascii(SUBJECT)));
417    }
418
419    #[cfg(feature = "nightly")]
420    #[bench]
421    fn bench_is_utf8(b: &mut ::test::Bencher) {
422        b.iter(|| assert!(::std::str::from_utf8(SUBJECT).is_ok()));
423    }
424
425    #[cfg(__unicase__iter_cmp)]
426    #[test]
427    fn test_case_cmp() {
428        assert!(UniCase::new("a") < UniCase::new("B"));
429
430        assert!(UniCase::new("A") < UniCase::new("b"));
431        assert!(UniCase::new("aa") > UniCase::new("a"));
432
433        assert!(UniCase::new("a") < UniCase::new("aa"));
434        assert!(UniCase::new("a") < UniCase::new("AA"));
435    }
436
437    #[test]
438    fn test_from_impls() {
439        let view: &'static str = "foobar";
440        let _: UniCase<&'static str> = view.into();
441        let _: UniCase<&str> = view.into();
442        let _: UniCase<String> = view.into();
443
444        let owned: String = view.to_owned();
445        let _: UniCase<&str> = (&owned).into();
446        let _: UniCase<String> = owned.into();
447    }
448
449    #[test]
450    fn test_into_impls() {
451        let view: UniCase<&'static str> = UniCase::new("foobar");
452        let _: &'static str = view.into();
453        let _: &str = view.into();
454
455        let owned: UniCase<String> = "foobar".into();
456        let _: String = owned.clone().into();
457        let _: &str = owned.as_ref();
458    }
459
460    #[cfg(__unicase__const_fns)]
461    #[test]
462    fn test_unicase_unicode_const() {
463        const _UNICASE: UniCase<&'static str> = UniCase::unicode("");
464    }
465}