unicode_normalization/recompose.rs
1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11use crate::decompose::Decompositions;
12use core::{
13 fmt::{self, Write},
14 iter::FusedIterator,
15};
16use tinyvec::TinyVec;
17
18#[derive(Clone)]
19enum RecompositionState {
20 Composing,
21 Purging(usize),
22 Finished(usize),
23}
24
25/// External iterator for a string recomposition's characters.
26#[derive(Clone)]
27pub struct Recompositions<I> {
28 iter: Decompositions<I>,
29 state: RecompositionState,
30 buffer: TinyVec<[char; 4]>,
31 composee: Option<char>,
32 last_ccc: Option<u8>,
33}
34
35impl<I: Iterator<Item = char>> Recompositions<I> {
36 /// Create a new recomposition iterator for canonical compositions (NFC)
37 ///
38 /// Note that this iterator can also be obtained by directly calling [`.nfc()`](crate::UnicodeNormalization::nfc)
39 /// on the iterator.
40 #[inline]
41 pub fn new_canonical(iter: I) -> Self {
42 Recompositions {
43 iter: Decompositions::new_canonical(iter),
44 state: self::RecompositionState::Composing,
45 buffer: TinyVec::new(),
46 composee: None,
47 last_ccc: None,
48 }
49 }
50
51 /// Create a new recomposition iterator for compatability compositions (NFkC)
52 ///
53 /// Note that this iterator can also be obtained by directly calling [`.nfkc()`](crate::UnicodeNormalization::nfkc)
54 /// on the iterator.
55 #[inline]
56 pub fn new_compatible(iter: I) -> Self {
57 Recompositions {
58 iter: Decompositions::new_compatible(iter),
59 state: self::RecompositionState::Composing,
60 buffer: TinyVec::new(),
61 composee: None,
62 last_ccc: None,
63 }
64 }
65}
66
67impl<I: Iterator<Item = char>> Iterator for Recompositions<I> {
68 type Item = char;
69
70 #[inline]
71 fn next(&mut self) -> Option<char> {
72 use self::RecompositionState::*;
73
74 loop {
75 match self.state {
76 Composing => {
77 for ch in self.iter.by_ref() {
78 let ch_class = super::char::canonical_combining_class(ch);
79 let k = match self.composee {
80 None => {
81 if ch_class != 0 {
82 return Some(ch);
83 }
84 self.composee = Some(ch);
85 continue;
86 }
87 Some(k) => k,
88 };
89 match self.last_ccc {
90 None => match super::char::compose(k, ch) {
91 Some(r) => {
92 self.composee = Some(r);
93 continue;
94 }
95 None => {
96 if ch_class == 0 {
97 self.composee = Some(ch);
98 return Some(k);
99 }
100 self.buffer.push(ch);
101 self.last_ccc = Some(ch_class);
102 }
103 },
104 Some(l_class) => {
105 if l_class >= ch_class {
106 // `ch` is blocked from `composee`
107 if ch_class == 0 {
108 self.composee = Some(ch);
109 self.last_ccc = None;
110 self.state = Purging(0);
111 return Some(k);
112 }
113 self.buffer.push(ch);
114 self.last_ccc = Some(ch_class);
115 continue;
116 }
117 match super::char::compose(k, ch) {
118 Some(r) => {
119 self.composee = Some(r);
120 continue;
121 }
122 None => {
123 self.buffer.push(ch);
124 self.last_ccc = Some(ch_class);
125 }
126 }
127 }
128 }
129 }
130 self.state = Finished(0);
131 if self.composee.is_some() {
132 return self.composee.take();
133 }
134 }
135 Purging(next) => match self.buffer.get(next).cloned() {
136 None => {
137 self.buffer.clear();
138 self.state = Composing;
139 }
140 s => {
141 self.state = Purging(next + 1);
142 return s;
143 }
144 },
145 Finished(next) => match self.buffer.get(next).cloned() {
146 None => {
147 self.buffer.clear();
148 return self.composee.take();
149 }
150 s => {
151 self.state = Finished(next + 1);
152 return s;
153 }
154 },
155 }
156 }
157 }
158}
159
160impl<I: Iterator<Item = char> + FusedIterator> FusedIterator for Recompositions<I> {}
161
162impl<I: Iterator<Item = char> + Clone> fmt::Display for Recompositions<I> {
163 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
164 for c in self.clone() {
165 f.write_char(c)?;
166 }
167 Ok(())
168 }
169}