stringprep/
tables.rs

1//! Character Tables
2use unicode_bidi::{bidi_class, BidiClass};
3use std::cmp::Ordering;
4use std::str::Chars;
5
6use super::rfc3454;
7
8/// A.1 Unassigned code points in Unicode 3.2
9pub fn unassigned_code_point(c: char) -> bool {
10    rfc3454::A_1
11        .binary_search_by(|&(start, end)| if start > c {
12            Ordering::Greater
13        } else if end < c {
14            Ordering::Less
15        } else {
16            Ordering::Equal
17        })
18        .is_ok()
19}
20
21/// B.1 Commonly mapped to nothing
22pub fn commonly_mapped_to_nothing(c: char) -> bool {
23    match c {
24        '\u{00AD}' | '\u{034F}' | '\u{1806}' | '\u{180B}' | '\u{180C}' | '\u{180D}' |
25        '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{2060}' | '\u{FE00}' | '\u{FE01}' |
26        '\u{FE02}' | '\u{FE03}' | '\u{FE04}' | '\u{FE05}' | '\u{FE06}' | '\u{FE07}' |
27        '\u{FE08}' | '\u{FE09}' | '\u{FE0A}' | '\u{FE0B}' | '\u{FE0C}' | '\u{FE0D}' |
28        '\u{FE0E}' | '\u{FE0F}' | '\u{FEFF}' => true,
29        _ => false,
30    }
31}
32
33/// B.2 Mapping for case-folding used with NFKC.
34pub fn case_fold_for_nfkc(c: char) -> CaseFoldForNfkc {
35    let inner = match rfc3454::B_2.binary_search_by_key(&c, |e| e.0) {
36        Ok(idx) => FoldInner::Chars(rfc3454::B_2[idx].1.chars()),
37        Err(_) => FoldInner::Char(Some(c)),
38    };
39    CaseFoldForNfkc(inner)
40}
41
42enum FoldInner {
43    Chars(Chars<'static>),
44    Char(Option<char>),
45}
46
47/// The iterator returned by `case_fold_for_nfkc`.
48pub struct CaseFoldForNfkc(FoldInner);
49
50impl Iterator for CaseFoldForNfkc {
51    type Item = char;
52
53    fn next(&mut self) -> Option<char> {
54        match self.0 {
55            FoldInner::Chars(ref mut it) => it.next(),
56            FoldInner::Char(ref mut ch) => ch.take(),
57        }
58    }
59}
60
61/// C.1.1 ASCII space characters
62pub fn ascii_space_character(c: char) -> bool {
63    c == ' '
64}
65
66/// C.1.2 Non-ASCII space characters
67pub fn non_ascii_space_character(c: char) -> bool {
68    match c {
69        '\u{00A0}' | '\u{1680}' | '\u{2000}' | '\u{2001}' | '\u{2002}' | '\u{2003}' |
70        '\u{2004}' | '\u{2005}' | '\u{2006}' | '\u{2007}' | '\u{2008}' | '\u{2009}' |
71        '\u{200A}' | '\u{200B}' | '\u{202F}' | '\u{205F}' | '\u{3000}' => true,
72        _ => false,
73    }
74}
75
76/// C.2.1 ASCII control characters
77pub fn ascii_control_character(c: char) -> bool {
78    match c {
79        '\u{0000}'...'\u{001F}' |
80        '\u{007F}' => true,
81        _ => false,
82    }
83}
84
85/// C.2.2 Non-ASCII control characters
86pub fn non_ascii_control_character(c: char) -> bool {
87    match c {
88        '\u{0080}'...'\u{009F}' |
89        '\u{06DD}' |
90        '\u{070F}' |
91        '\u{180E}' |
92        '\u{200C}' |
93        '\u{200D}' |
94        '\u{2028}' |
95        '\u{2029}' |
96        '\u{2060}' |
97        '\u{2061}' |
98        '\u{2062}' |
99        '\u{2063}' |
100        '\u{206A}'...'\u{206F}' |
101        '\u{FEFF}' |
102        '\u{FFF9}'...'\u{FFFC}' |
103        '\u{1D173}'...'\u{1D17A}' => true,
104        _ => false,
105    }
106}
107
108/// C.3 Private use
109pub fn private_use(c: char) -> bool {
110    match c {
111        '\u{E000}'...'\u{F8FF}' |
112        '\u{F0000}'...'\u{FFFFD}' |
113        '\u{100000}'...'\u{10FFFD}' => true,
114        _ => false,
115    }
116}
117
118/// C.4 Non-character code points
119pub fn non_character_code_point(c: char) -> bool {
120    match c {
121        '\u{FDD0}'...'\u{FDEF}' |
122        '\u{FFFE}'...'\u{FFFF}' |
123        '\u{1FFFE}'...'\u{1FFFF}' |
124        '\u{2FFFE}'...'\u{2FFFF}' |
125        '\u{3FFFE}'...'\u{3FFFF}' |
126        '\u{4FFFE}'...'\u{4FFFF}' |
127        '\u{5FFFE}'...'\u{5FFFF}' |
128        '\u{6FFFE}'...'\u{6FFFF}' |
129        '\u{7FFFE}'...'\u{7FFFF}' |
130        '\u{8FFFE}'...'\u{8FFFF}' |
131        '\u{9FFFE}'...'\u{9FFFF}' |
132        '\u{AFFFE}'...'\u{AFFFF}' |
133        '\u{BFFFE}'...'\u{BFFFF}' |
134        '\u{CFFFE}'...'\u{CFFFF}' |
135        '\u{DFFFE}'...'\u{DFFFF}' |
136        '\u{EFFFE}'...'\u{EFFFF}' |
137        '\u{FFFFE}'...'\u{FFFFF}' |
138        '\u{10FFFE}'...'\u{10FFFF}' => true,
139        _ => false,
140    }
141}
142
143/// C.5 Surrogate codes
144pub fn surrogate_code(c: char) -> bool {
145    match c {
146        // forbidden by rust
147        /*'\u{D800}'...'\u{DFFF}' => true,*/
148        _ => false,
149    }
150}
151
152/// C.6 Inappropriate for plain text
153pub fn inappropriate_for_plain_text(c: char) -> bool {
154    match c {
155        '\u{FFF9}' | '\u{FFFA}' | '\u{FFFB}' | '\u{FFFC}' | '\u{FFFD}' => true,
156        _ => false,
157    }
158}
159
160/// C.7 Inappropriate for canonical representation
161pub fn inappropriate_for_canonical_representation(c: char) -> bool {
162    match c {
163        '\u{2FF0}'...'\u{2FFB}' => true,
164        _ => false,
165    }
166}
167
168/// C.8 Change display properties or are deprecated
169pub fn change_display_properties_or_deprecated(c: char) -> bool {
170    match c {
171        '\u{0340}' | '\u{0341}' | '\u{200E}' | '\u{200F}' | '\u{202A}' | '\u{202B}' |
172        '\u{202C}' | '\u{202D}' | '\u{202E}' | '\u{206A}' | '\u{206B}' | '\u{206C}' |
173        '\u{206D}' | '\u{206E}' | '\u{206F}' => true,
174        _ => false,
175    }
176}
177
178/// C.9 Tagging characters
179pub fn tagging_character(c: char) -> bool {
180    match c {
181        '\u{E0001}' |
182        '\u{E0020}'...'\u{E007F}' => true,
183        _ => false,
184    }
185}
186
187/// D.1 Characters with bidirectional property "R" or "AL"
188pub fn bidi_r_or_al(c: char) -> bool {
189    match bidi_class(c) {
190        BidiClass::R | BidiClass::AL => true,
191        _ => false,
192    }
193}
194
195/// D.2 Characters with bidirectional property "L"
196pub fn bidi_l(c: char) -> bool {
197    match bidi_class(c) {
198        BidiClass::L => true,
199        _ => false,
200    }
201}