idna_mapping/
lib.rs
1#![no_std]
23
24use self::Mapping::*;
25
26include!("uts46_mapping_table.rs");
27
28#[derive(Debug)]
29struct StringTableSlice {
30 byte_start_lo: u8,
33 byte_start_hi: u8,
34 byte_len: u8,
35}
36
37fn decode_slice(slice: &StringTableSlice) -> &'static str {
38 let lo = slice.byte_start_lo as usize;
39 let hi = slice.byte_start_hi as usize;
40 let start = (hi << 8) | lo;
41 let len = slice.byte_len as usize;
42 &STRING_TABLE[start..(start + len)]
43}
44
45#[repr(u8)]
46#[derive(Debug)]
47enum Mapping {
48 Valid,
49 Ignored,
50 Mapped(StringTableSlice),
51 Disallowed,
52}
53
54fn find_char(codepoint: char) -> &'static Mapping {
55 let idx = match TABLE.binary_search_by_key(&codepoint, |&val| val.0) {
56 Ok(idx) => idx,
57 Err(idx) => idx - 1,
58 };
59
60 const SINGLE_MARKER: u16 = 1 << 15;
61
62 let (base, x) = TABLE[idx];
63 let single = (x & SINGLE_MARKER) != 0;
64 let offset = !SINGLE_MARKER & x;
65
66 if single {
67 &MAPPING_TABLE[offset as usize]
68 } else {
69 &MAPPING_TABLE[(offset + (codepoint as u16 - base as u16)) as usize]
70 }
71}
72
73pub struct Mapper<I>
74where
75 I: Iterator<Item = char>,
76{
77 chars: I,
78 slice: Option<core::str::Chars<'static>>,
79 ignored_as_errors: bool,
80}
81
82impl<I> Mapper<I>
83where
84 I: Iterator<Item = char>,
85{
86 pub fn new(delegate: I, ignored_as_errors: bool) -> Self {
87 Mapper {
88 chars: delegate,
89 slice: None,
90 ignored_as_errors,
91 }
92 }
93}
94
95impl<I> Iterator for Mapper<I>
96where
97 I: Iterator<Item = char>,
98{
99 type Item = char;
100
101 fn next(&mut self) -> Option<Self::Item> {
102 loop {
103 if let Some(s) = &mut self.slice {
104 match s.next() {
105 Some(c) => return Some(c),
106 None => {
107 self.slice = None;
108 }
109 }
110 }
111
112 let codepoint = self.chars.next()?;
113 if let '.' | '-' | 'a'..='z' | '0'..='9' = codepoint {
114 return Some(codepoint);
115 }
116
117 return Some(match *find_char(codepoint) {
118 Mapping::Valid => codepoint,
119 Mapping::Ignored => {
120 if self.ignored_as_errors {
121 '\u{FFFD}'
122 } else {
123 continue;
124 }
125 }
126 Mapping::Mapped(ref slice) => {
127 self.slice = Some(decode_slice(slice).chars());
128 continue;
129 }
130 Mapping::Disallowed => '\u{FFFD}',
131 });
132 }
133 }
134}
135
136const fn joining_type_to_mask(jt: unicode_joining_type::JoiningType) -> u32 {
141 1u32 << (jt as u32)
142}
143
144pub const LEFT_OR_DUAL_JOINING_MASK: JoiningTypeMask = JoiningTypeMask(
146 joining_type_to_mask(unicode_joining_type::JoiningType::LeftJoining)
147 | joining_type_to_mask(unicode_joining_type::JoiningType::DualJoining),
148);
149
150pub const RIGHT_OR_DUAL_JOINING_MASK: JoiningTypeMask = JoiningTypeMask(
152 joining_type_to_mask(unicode_joining_type::JoiningType::RightJoining)
153 | joining_type_to_mask(unicode_joining_type::JoiningType::DualJoining),
154);
155
156#[repr(transparent)]
158#[derive(Clone, Copy)]
159pub struct JoiningType(unicode_joining_type::JoiningType);
160
161impl JoiningType {
162 #[inline(always)]
164 pub fn to_mask(self) -> JoiningTypeMask {
165 JoiningTypeMask(joining_type_to_mask(self.0))
166 }
167
168 #[inline(always)]
170 pub fn is_transparent(self) -> bool {
171 self.0 == unicode_joining_type::JoiningType::Transparent
172 }
173}
174
175#[repr(transparent)]
178#[derive(Clone, Copy)]
179pub struct JoiningTypeMask(u32);
180
181impl JoiningTypeMask {
182 #[inline(always)]
184 pub fn intersects(self, other: JoiningTypeMask) -> bool {
185 self.0 & other.0 != 0
186 }
187}
188
189#[inline(always)]
191pub fn joining_type(c: char) -> JoiningType {
192 JoiningType(unicode_joining_type::get_joining_type(c))
193}
194
195#[cfg(test)]
196mod tests {
197 use super::{find_char, Mapping};
198 use assert_matches::assert_matches;
199
200 #[test]
201 fn mapping_fast_path() {
202 assert_matches!(find_char('-'), &Mapping::Valid);
203 assert_matches!(find_char('.'), &Mapping::Valid);
204 for c in &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] {
205 assert_matches!(find_char(*c), &Mapping::Valid);
206 }
207 for c in &[
208 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
209 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
210 ] {
211 assert_matches!(find_char(*c), &Mapping::Valid);
212 }
213 }
214}