unicase/unicode/
mod.rs

1#[cfg(__unicase__iter_cmp)]
2use core::cmp::Ordering;
3use core::hash::{Hash, Hasher};
4
5use self::map::lookup;
6mod map;
7
8#[derive(Clone, Copy, Debug, Default)]
9pub struct Unicode<S>(pub S);
10
11impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<Unicode<S2>> for Unicode<S1> {
12    #[inline]
13    fn eq(&self, other: &Unicode<S2>) -> bool {
14        let mut left = self.0.as_ref().chars().flat_map(lookup);
15        let mut right = other.0.as_ref().chars().flat_map(lookup);
16
17        // inline Iterator::eq since not added until Rust 1.5
18        loop {
19            let x = match left.next() {
20                None => return right.next().is_none(),
21                Some(val) => val,
22            };
23
24            let y = match right.next() {
25                None => return false,
26                Some(val) => val,
27            };
28
29            if x != y {
30                return false;
31            }
32        }
33    }
34}
35
36impl<S: AsRef<str>> Eq for Unicode<S> {}
37
38#[cfg(__unicase__iter_cmp)]
39impl<T: AsRef<str>> PartialOrd for Unicode<T> {
40    #[inline]
41    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
42        Some(self.cmp(other))
43    }
44}
45
46#[cfg(__unicase__iter_cmp)]
47impl<T: AsRef<str>> Ord for Unicode<T> {
48    #[inline]
49    fn cmp(&self, other: &Self) -> Ordering {
50        let self_chars = self.0.as_ref().chars().flat_map(lookup);
51        let other_chars = other.0.as_ref().chars().flat_map(lookup);
52        self_chars.cmp(other_chars)
53    }
54}
55
56impl<S: AsRef<str>> Hash for Unicode<S> {
57    #[inline]
58    fn hash<H: Hasher>(&self, hasher: &mut H) {
59        let mut buf = [0; 4];
60        for c in self.0.as_ref().chars().flat_map(|c| lookup(c)) {
61            let len = char_to_utf8(c, &mut buf);
62            hasher.write(&buf[..len])
63        }
64    }
65}
66
67#[inline]
68fn char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize {
69    const TAG_CONT: u8    = 0b1000_0000;
70    const TAG_TWO_B: u8   = 0b1100_0000;
71    const TAG_THREE_B: u8 = 0b1110_0000;
72    const TAG_FOUR_B: u8  = 0b1111_0000;
73
74    let code = c as u32;
75    if code <= 0x7F {
76        dst[0] = code as u8;
77        1
78    } else if code <= 0x7FF {
79        dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
80        dst[1] = (code & 0x3F) as u8 | TAG_CONT;
81        2
82    } else if code <= 0xFFFF {
83        dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
84        dst[1] = (code >>  6 & 0x3F) as u8 | TAG_CONT;
85        dst[2] = (code & 0x3F) as u8 | TAG_CONT;
86        3
87    } else {
88        dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
89        dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
90        dst[2] = (code >>  6 & 0x3F) as u8 | TAG_CONT;
91        dst[3] = (code & 0x3F) as u8 | TAG_CONT;
92        4
93    }
94}
95
96// internal mod so that the enum can be 'pub'
97// thanks privacy-checker :___(
98mod fold {
99    #[derive(Clone, Copy)]
100    pub enum Fold {
101        Zero,
102        One(char),
103        Two(char, char),
104        Three(char, char, char),
105    }
106
107    impl Iterator for Fold {
108        type Item = char;
109        #[inline]
110        fn next(&mut self) -> Option<char> {
111            match *self {
112                Fold::Zero => None,
113                Fold::One(one) => {
114                    *self = Fold::Zero;
115                    Some(one)
116                },
117                Fold::Two(one, two) => {
118                    *self = Fold::One(two);
119                    Some(one)
120                },
121                Fold::Three(one, two, three) => {
122                    *self = Fold::Two(one, two);
123                    Some(three)
124                }
125            }
126        }
127
128        #[inline]
129        fn size_hint(&self) -> (usize, Option<usize>) {
130            match *self {
131                Fold::Zero => (0, Some(0)),
132                Fold::One(..) => (1, Some(1)),
133                Fold::Two(..) => (2, Some(2)),
134                Fold::Three(..) => (3, Some(3))
135            }
136        }
137
138    }
139    impl From<(char,)> for Fold {
140        #[inline]
141        fn from((one,): (char,)) -> Fold {
142            Fold::One(one)
143        }
144    }
145
146    impl From<(char, char)> for Fold {
147        #[inline]
148        fn from((one, two): (char, char)) -> Fold {
149            Fold::Two(one, two)
150        }
151    }
152
153    impl From<(char, char, char)> for Fold {
154        #[inline]
155        fn from((one, two, three): (char, char, char)) -> Fold {
156            Fold::Three(one, two, three)
157        }
158    }
159}
160
161#[cfg(test)]
162mod tests {
163    use super::Unicode;
164
165    macro_rules! eq {
166        ($left:expr, $right:expr) => ({
167            assert_eq!(Unicode($left), Unicode($right));
168        });
169    }
170
171    #[test]
172    fn test_ascii_folding() {
173        eq!("foo bar", "FoO BAR");
174    }
175
176    #[test]
177    fn test_simple_case_folding() {
178        eq!("στιγμας", "στιγμασ");
179    }
180
181    #[test]
182    fn test_full_case_folding() {
183        eq!("flour", "flour");
184        eq!("Maße", "MASSE");
185        eq!("ᾲ στο διάολο", "ὰι στο διάολο");
186    }
187
188    #[cfg(feature = "nightly")]
189    #[bench]
190    fn bench_ascii_folding(b: &mut ::test::Bencher) {
191        b.bytes = b"foo bar".len() as u64;
192        b.iter(|| eq!("foo bar", "FoO BAR"));
193    }
194
195    #[cfg(feature = "nightly")]
196    #[bench]
197    fn bench_simple_case_folding(b: &mut ::test::Bencher) {
198        b.bytes = "στιγμας".len() as u64;
199        b.iter(|| eq!("στιγμας", "στιγμασ"));
200    }
201}