unicode_normalization/
recompose.rs

1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11use crate::decompose::Decompositions;
12use core::{
13    fmt::{self, Write},
14    iter::FusedIterator,
15};
16use tinyvec::TinyVec;
17
18#[derive(Clone)]
19enum RecompositionState {
20    Composing,
21    Purging(usize),
22    Finished(usize),
23}
24
25/// External iterator for a string recomposition's characters.
26#[derive(Clone)]
27pub struct Recompositions<I> {
28    iter: Decompositions<I>,
29    state: RecompositionState,
30    buffer: TinyVec<[char; 4]>,
31    composee: Option<char>,
32    last_ccc: Option<u8>,
33}
34
35impl<I: Iterator<Item = char>> Recompositions<I> {
36    /// Create a new recomposition iterator for canonical compositions (NFC)
37    ///
38    /// Note that this iterator can also be obtained by directly calling [`.nfc()`](crate::UnicodeNormalization::nfc)
39    /// on the iterator.
40    #[inline]
41    pub fn new_canonical(iter: I) -> Self {
42        Recompositions {
43            iter: Decompositions::new_canonical(iter),
44            state: self::RecompositionState::Composing,
45            buffer: TinyVec::new(),
46            composee: None,
47            last_ccc: None,
48        }
49    }
50
51    /// Create a new recomposition iterator for compatability compositions (NFkC)
52    ///
53    /// Note that this iterator can also be obtained by directly calling [`.nfkc()`](crate::UnicodeNormalization::nfkc)
54    /// on the iterator.
55    #[inline]
56    pub fn new_compatible(iter: I) -> Self {
57        Recompositions {
58            iter: Decompositions::new_compatible(iter),
59            state: self::RecompositionState::Composing,
60            buffer: TinyVec::new(),
61            composee: None,
62            last_ccc: None,
63        }
64    }
65}
66
67impl<I: Iterator<Item = char>> Iterator for Recompositions<I> {
68    type Item = char;
69
70    #[inline]
71    fn next(&mut self) -> Option<char> {
72        use self::RecompositionState::*;
73
74        loop {
75            match self.state {
76                Composing => {
77                    for ch in self.iter.by_ref() {
78                        let ch_class = super::char::canonical_combining_class(ch);
79                        let k = match self.composee {
80                            None => {
81                                if ch_class != 0 {
82                                    return Some(ch);
83                                }
84                                self.composee = Some(ch);
85                                continue;
86                            }
87                            Some(k) => k,
88                        };
89                        match self.last_ccc {
90                            None => match super::char::compose(k, ch) {
91                                Some(r) => {
92                                    self.composee = Some(r);
93                                    continue;
94                                }
95                                None => {
96                                    if ch_class == 0 {
97                                        self.composee = Some(ch);
98                                        return Some(k);
99                                    }
100                                    self.buffer.push(ch);
101                                    self.last_ccc = Some(ch_class);
102                                }
103                            },
104                            Some(l_class) => {
105                                if l_class >= ch_class {
106                                    // `ch` is blocked from `composee`
107                                    if ch_class == 0 {
108                                        self.composee = Some(ch);
109                                        self.last_ccc = None;
110                                        self.state = Purging(0);
111                                        return Some(k);
112                                    }
113                                    self.buffer.push(ch);
114                                    self.last_ccc = Some(ch_class);
115                                    continue;
116                                }
117                                match super::char::compose(k, ch) {
118                                    Some(r) => {
119                                        self.composee = Some(r);
120                                        continue;
121                                    }
122                                    None => {
123                                        self.buffer.push(ch);
124                                        self.last_ccc = Some(ch_class);
125                                    }
126                                }
127                            }
128                        }
129                    }
130                    self.state = Finished(0);
131                    if self.composee.is_some() {
132                        return self.composee.take();
133                    }
134                }
135                Purging(next) => match self.buffer.get(next).cloned() {
136                    None => {
137                        self.buffer.clear();
138                        self.state = Composing;
139                    }
140                    s => {
141                        self.state = Purging(next + 1);
142                        return s;
143                    }
144                },
145                Finished(next) => match self.buffer.get(next).cloned() {
146                    None => {
147                        self.buffer.clear();
148                        return self.composee.take();
149                    }
150                    s => {
151                        self.state = Finished(next + 1);
152                        return s;
153                    }
154                },
155            }
156        }
157    }
158}
159
160impl<I: Iterator<Item = char> + FusedIterator> FusedIterator for Recompositions<I> {}
161
162impl<I: Iterator<Item = char> + Clone> fmt::Display for Recompositions<I> {
163    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
164        for c in self.clone() {
165            f.write_char(c)?;
166        }
167        Ok(())
168    }
169}