unicode_normalization/
replace.rs

1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10use core::{
11    fmt::{self, Write},
12    iter::FusedIterator,
13};
14use tinyvec::ArrayVec;
15
16/// External iterator for replacements for a string's characters.
17#[derive(Clone)]
18pub struct Replacements<I> {
19    iter: I,
20    // At this time, the longest replacement sequence has length 2, so we just
21    // need buffer space for 1 codepoint.
22    buffer: Option<char>,
23}
24
25impl<I: Iterator<Item = char>> Replacements<I> {
26    /// Create a new iterator that replaces [CJK Compatibility Ideograph] codepoints with normal forms using [Standardized Variation Sequences].
27    ///
28    /// Note that this iterator can also be obtained by directly calling [`.cjk_compat_variants()`] on the iterator.
29    ///
30    /// [CJK Compatibility Ideograph]: https://www.unicode.org/glossary/#compatibility_ideograph
31    /// [Standardized Variation Sequences]: https://www.unicode.org/glossary/#standardized_variation_sequence
32    /// [`.cjk_compat_variants()`]: crate::UnicodeNormalization::cjk_compat_variants
33    #[inline]
34    pub fn new_cjk_compat_variants(iter: I) -> Replacements<I> {
35        Replacements { iter, buffer: None }
36    }
37}
38
39impl<I: Iterator<Item = char>> Iterator for Replacements<I> {
40    type Item = char;
41
42    #[inline]
43    fn next(&mut self) -> Option<char> {
44        if let Some(c) = self.buffer.take() {
45            return Some(c);
46        }
47
48        match self.iter.next() {
49            Some(ch) => {
50                // At this time, the longest replacement sequence has length 2.
51                let mut buffer = ArrayVec::<[char; 2]>::new();
52                super::char::decompose_cjk_compat_variants(ch, |d| buffer.push(d));
53                self.buffer = buffer.get(1).copied();
54                Some(buffer[0])
55            }
56            None => None,
57        }
58    }
59
60    fn size_hint(&self) -> (usize, Option<usize>) {
61        let (lower, _) = self.iter.size_hint();
62        (lower, None)
63    }
64}
65
66impl<I: Iterator<Item = char> + FusedIterator> FusedIterator for Replacements<I> {}
67
68impl<I: Iterator<Item = char> + Clone> fmt::Display for Replacements<I> {
69    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
70        for c in self.clone() {
71            f.write_char(c)?;
72        }
73        Ok(())
74    }
75}