stringprep/
tables.rs
1use unicode_bidi::{bidi_class, BidiClass};
3use std::cmp::Ordering;
4use std::str::Chars;
5
6use super::rfc3454;
7
8pub fn unassigned_code_point(c: char) -> bool {
10 rfc3454::A_1
11 .binary_search_by(|&(start, end)| if start > c {
12 Ordering::Greater
13 } else if end < c {
14 Ordering::Less
15 } else {
16 Ordering::Equal
17 })
18 .is_ok()
19}
20
21pub fn commonly_mapped_to_nothing(c: char) -> bool {
23 match c {
24 '\u{00AD}' | '\u{034F}' | '\u{1806}' | '\u{180B}' | '\u{180C}' | '\u{180D}' |
25 '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{2060}' | '\u{FE00}' | '\u{FE01}' |
26 '\u{FE02}' | '\u{FE03}' | '\u{FE04}' | '\u{FE05}' | '\u{FE06}' | '\u{FE07}' |
27 '\u{FE08}' | '\u{FE09}' | '\u{FE0A}' | '\u{FE0B}' | '\u{FE0C}' | '\u{FE0D}' |
28 '\u{FE0E}' | '\u{FE0F}' | '\u{FEFF}' => true,
29 _ => false,
30 }
31}
32
33pub fn case_fold_for_nfkc(c: char) -> CaseFoldForNfkc {
35 let inner = match rfc3454::B_2.binary_search_by_key(&c, |e| e.0) {
36 Ok(idx) => FoldInner::Chars(rfc3454::B_2[idx].1.chars()),
37 Err(_) => FoldInner::Char(Some(c)),
38 };
39 CaseFoldForNfkc(inner)
40}
41
42enum FoldInner {
43 Chars(Chars<'static>),
44 Char(Option<char>),
45}
46
47pub struct CaseFoldForNfkc(FoldInner);
49
50impl Iterator for CaseFoldForNfkc {
51 type Item = char;
52
53 fn next(&mut self) -> Option<char> {
54 match self.0 {
55 FoldInner::Chars(ref mut it) => it.next(),
56 FoldInner::Char(ref mut ch) => ch.take(),
57 }
58 }
59}
60
61pub fn ascii_space_character(c: char) -> bool {
63 c == ' '
64}
65
66pub fn non_ascii_space_character(c: char) -> bool {
68 match c {
69 '\u{00A0}' | '\u{1680}' | '\u{2000}' | '\u{2001}' | '\u{2002}' | '\u{2003}' |
70 '\u{2004}' | '\u{2005}' | '\u{2006}' | '\u{2007}' | '\u{2008}' | '\u{2009}' |
71 '\u{200A}' | '\u{200B}' | '\u{202F}' | '\u{205F}' | '\u{3000}' => true,
72 _ => false,
73 }
74}
75
76pub fn ascii_control_character(c: char) -> bool {
78 match c {
79 '\u{0000}'...'\u{001F}' |
80 '\u{007F}' => true,
81 _ => false,
82 }
83}
84
85pub fn non_ascii_control_character(c: char) -> bool {
87 match c {
88 '\u{0080}'...'\u{009F}' |
89 '\u{06DD}' |
90 '\u{070F}' |
91 '\u{180E}' |
92 '\u{200C}' |
93 '\u{200D}' |
94 '\u{2028}' |
95 '\u{2029}' |
96 '\u{2060}' |
97 '\u{2061}' |
98 '\u{2062}' |
99 '\u{2063}' |
100 '\u{206A}'...'\u{206F}' |
101 '\u{FEFF}' |
102 '\u{FFF9}'...'\u{FFFC}' |
103 '\u{1D173}'...'\u{1D17A}' => true,
104 _ => false,
105 }
106}
107
108pub fn private_use(c: char) -> bool {
110 match c {
111 '\u{E000}'...'\u{F8FF}' |
112 '\u{F0000}'...'\u{FFFFD}' |
113 '\u{100000}'...'\u{10FFFD}' => true,
114 _ => false,
115 }
116}
117
118pub fn non_character_code_point(c: char) -> bool {
120 match c {
121 '\u{FDD0}'...'\u{FDEF}' |
122 '\u{FFFE}'...'\u{FFFF}' |
123 '\u{1FFFE}'...'\u{1FFFF}' |
124 '\u{2FFFE}'...'\u{2FFFF}' |
125 '\u{3FFFE}'...'\u{3FFFF}' |
126 '\u{4FFFE}'...'\u{4FFFF}' |
127 '\u{5FFFE}'...'\u{5FFFF}' |
128 '\u{6FFFE}'...'\u{6FFFF}' |
129 '\u{7FFFE}'...'\u{7FFFF}' |
130 '\u{8FFFE}'...'\u{8FFFF}' |
131 '\u{9FFFE}'...'\u{9FFFF}' |
132 '\u{AFFFE}'...'\u{AFFFF}' |
133 '\u{BFFFE}'...'\u{BFFFF}' |
134 '\u{CFFFE}'...'\u{CFFFF}' |
135 '\u{DFFFE}'...'\u{DFFFF}' |
136 '\u{EFFFE}'...'\u{EFFFF}' |
137 '\u{FFFFE}'...'\u{FFFFF}' |
138 '\u{10FFFE}'...'\u{10FFFF}' => true,
139 _ => false,
140 }
141}
142
143pub fn surrogate_code(c: char) -> bool {
145 match c {
146 _ => false,
149 }
150}
151
152pub fn inappropriate_for_plain_text(c: char) -> bool {
154 match c {
155 '\u{FFF9}' | '\u{FFFA}' | '\u{FFFB}' | '\u{FFFC}' | '\u{FFFD}' => true,
156 _ => false,
157 }
158}
159
160pub fn inappropriate_for_canonical_representation(c: char) -> bool {
162 match c {
163 '\u{2FF0}'...'\u{2FFB}' => true,
164 _ => false,
165 }
166}
167
168pub fn change_display_properties_or_deprecated(c: char) -> bool {
170 match c {
171 '\u{0340}' | '\u{0341}' | '\u{200E}' | '\u{200F}' | '\u{202A}' | '\u{202B}' |
172 '\u{202C}' | '\u{202D}' | '\u{202E}' | '\u{206A}' | '\u{206B}' | '\u{206C}' |
173 '\u{206D}' | '\u{206E}' | '\u{206F}' => true,
174 _ => false,
175 }
176}
177
178pub fn tagging_character(c: char) -> bool {
180 match c {
181 '\u{E0001}' |
182 '\u{E0020}'...'\u{E007F}' => true,
183 _ => false,
184 }
185}
186
187pub fn bidi_r_or_al(c: char) -> bool {
189 match bidi_class(c) {
190 BidiClass::R | BidiClass::AL => true,
191 _ => false,
192 }
193}
194
195pub fn bidi_l(c: char) -> bool {
197 match bidi_class(c) {
198 BidiClass::L => true,
199 _ => false,
200 }
201}