idna_adapter/
lib.rs

1// Copyright The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! This crate abstracts over a Unicode back end for the [`idna`][1]
10//! crate.
11//!
12//! To work around the lack of [`global-features`][2] in Cargo, this
13//! crate allows the top level `Cargo.lock` to choose an alternative
14//! Unicode back end for the `idna` crate by pinning a version of this
15//! crate.
16//!
17//! See the [README of the latest version][3] for more details.
18//!
19//! [1]: https://docs.rs/crate/idna/latest
20//! [2]: https://internals.rust-lang.org/t/pre-rfc-mutually-excusive-global-features/19618
21//! [3]: https://docs.rs/crate/idna_adapter/latest
22
23#![no_std]
24
25use unicode_normalization::UnicodeNormalization;
26
27/// Mask for checking for both left and dual joining.
28pub const LEFT_OR_DUAL_JOINING_MASK: JoiningTypeMask =
29    JoiningTypeMask(idna_mapping::LEFT_OR_DUAL_JOINING_MASK);
30
31/// Mask for checking for both left and dual joining.
32pub const RIGHT_OR_DUAL_JOINING_MASK: JoiningTypeMask =
33    JoiningTypeMask(idna_mapping::RIGHT_OR_DUAL_JOINING_MASK);
34
35/// Turns a bidi class into a mask for comparing with multiple classes at once.
36const fn bidi_class_to_mask(bc: unicode_bidi::BidiClass) -> u32 {
37    1u32 << (bc as u32)
38}
39
40/// Mask for checking if the domain is a bidi domain.
41pub const RTL_MASK: BidiClassMask = BidiClassMask(
42    bidi_class_to_mask(unicode_bidi::BidiClass::R)
43        | bidi_class_to_mask(unicode_bidi::BidiClass::AL)
44        | bidi_class_to_mask(unicode_bidi::BidiClass::AN),
45);
46
47/// Mask for allowable bidi classes in the first character of a label
48/// (either LTR or RTL) in a bidi domain.
49pub const FIRST_BC_MASK: BidiClassMask = BidiClassMask(
50    bidi_class_to_mask(unicode_bidi::BidiClass::L)
51        | bidi_class_to_mask(unicode_bidi::BidiClass::R)
52        | bidi_class_to_mask(unicode_bidi::BidiClass::AL),
53);
54
55// Mask for allowable bidi classes of the last (non-Non-Spacing Mark)
56// character in an LTR label in a bidi domain.
57pub const LAST_LTR_MASK: BidiClassMask = BidiClassMask(
58    bidi_class_to_mask(unicode_bidi::BidiClass::L)
59        | bidi_class_to_mask(unicode_bidi::BidiClass::EN),
60);
61
62// Mask for allowable bidi classes of the last (non-Non-Spacing Mark)
63// character in an RTL label in a bidi domain.
64pub const LAST_RTL_MASK: BidiClassMask = BidiClassMask(
65    bidi_class_to_mask(unicode_bidi::BidiClass::R)
66        | bidi_class_to_mask(unicode_bidi::BidiClass::AL)
67        | bidi_class_to_mask(unicode_bidi::BidiClass::EN)
68        | bidi_class_to_mask(unicode_bidi::BidiClass::AN),
69);
70
71// Mask for allowable bidi classes of the middle characters in an LTR label in a bidi domain.
72pub const MIDDLE_LTR_MASK: BidiClassMask = BidiClassMask(
73    bidi_class_to_mask(unicode_bidi::BidiClass::L)
74        | bidi_class_to_mask(unicode_bidi::BidiClass::EN)
75        | bidi_class_to_mask(unicode_bidi::BidiClass::ES)
76        | bidi_class_to_mask(unicode_bidi::BidiClass::CS)
77        | bidi_class_to_mask(unicode_bidi::BidiClass::ET)
78        | bidi_class_to_mask(unicode_bidi::BidiClass::ON)
79        | bidi_class_to_mask(unicode_bidi::BidiClass::BN)
80        | bidi_class_to_mask(unicode_bidi::BidiClass::NSM),
81);
82
83// Mask for allowable bidi classes of the middle characters in an RTL label in a bidi domain.
84pub const MIDDLE_RTL_MASK: BidiClassMask = BidiClassMask(
85    bidi_class_to_mask(unicode_bidi::BidiClass::R)
86        | bidi_class_to_mask(unicode_bidi::BidiClass::AL)
87        | bidi_class_to_mask(unicode_bidi::BidiClass::AN)
88        | bidi_class_to_mask(unicode_bidi::BidiClass::EN)
89        | bidi_class_to_mask(unicode_bidi::BidiClass::ES)
90        | bidi_class_to_mask(unicode_bidi::BidiClass::CS)
91        | bidi_class_to_mask(unicode_bidi::BidiClass::ET)
92        | bidi_class_to_mask(unicode_bidi::BidiClass::ON)
93        | bidi_class_to_mask(unicode_bidi::BidiClass::BN)
94        | bidi_class_to_mask(unicode_bidi::BidiClass::NSM),
95);
96
97/// Value for the Joining_Type Unicode property.
98#[repr(transparent)]
99#[derive(Clone, Copy)]
100pub struct JoiningType(idna_mapping::JoiningType);
101
102impl JoiningType {
103    /// Returns the corresponding `JoiningTypeMask`.
104    #[inline(always)]
105    pub fn to_mask(self) -> JoiningTypeMask {
106        JoiningTypeMask(self.0.to_mask())
107    }
108
109    // `true` iff this value is the Transparent value.
110    #[inline(always)]
111    pub fn is_transparent(self) -> bool {
112        self.0.is_transparent()
113    }
114}
115
116/// A mask representing potentially multiple `JoiningType`
117/// values.
118#[repr(transparent)]
119#[derive(Clone, Copy)]
120pub struct JoiningTypeMask(idna_mapping::JoiningTypeMask);
121
122impl JoiningTypeMask {
123    /// `true` iff both masks have at `JoiningType` in common.
124    #[inline(always)]
125    pub fn intersects(self, other: JoiningTypeMask) -> bool {
126        self.0.intersects(other.0)
127    }
128}
129
130/// Value for the Bidi_Class Unicode property.
131#[repr(transparent)]
132#[derive(Clone, Copy)]
133pub struct BidiClass(unicode_bidi::BidiClass);
134
135impl BidiClass {
136    /// Returns the corresponding `BidiClassMask`.
137    #[inline(always)]
138    pub fn to_mask(self) -> BidiClassMask {
139        BidiClassMask(bidi_class_to_mask(self.0))
140    }
141
142    /// `true` iff this value is Left_To_Right
143    #[inline(always)]
144    pub fn is_ltr(self) -> bool {
145        self.0 == unicode_bidi::BidiClass::L
146    }
147
148    /// `true` iff this value is Nonspacing_Mark
149    #[inline(always)]
150    pub fn is_nonspacing_mark(self) -> bool {
151        self.0 == unicode_bidi::BidiClass::NSM
152    }
153
154    /// `true` iff this value is European_Number
155    #[inline(always)]
156    pub fn is_european_number(self) -> bool {
157        self.0 == unicode_bidi::BidiClass::EN
158    }
159
160    /// `true` iff this value is Arabic_Number
161    #[inline(always)]
162    pub fn is_arabic_number(self) -> bool {
163        self.0 == unicode_bidi::BidiClass::AN
164    }
165}
166
167/// A mask representing potentially multiple `BidiClass`
168/// values.
169#[repr(transparent)]
170#[derive(Clone, Copy)]
171pub struct BidiClassMask(u32);
172
173impl BidiClassMask {
174    /// `true` iff both masks have at `BidiClass` in common.
175    #[inline(always)]
176    pub fn intersects(self, other: BidiClassMask) -> bool {
177        self.0 & other.0 != 0
178    }
179}
180
181/// An adapter between a Unicode back end an the `idna` crate.
182#[non_exhaustive]
183pub struct Adapter {}
184
185#[cfg(feature = "compiled_data")]
186impl Default for Adapter {
187    fn default() -> Self {
188        Self::new()
189    }
190}
191
192impl Adapter {
193    /// Constructor using data compiled into the binary.
194    #[cfg(feature = "compiled_data")]
195    #[inline(always)]
196    pub const fn new() -> Self {
197        Self {}
198    }
199
200    /// `true` iff the Canonical_Combining_Class of `c` is Virama.
201    #[inline(always)]
202    pub fn is_virama(&self, c: char) -> bool {
203        unicode_normalization::char::canonical_combining_class(c) == 9
204    }
205
206    /// `true` iff the General_Category of `c` is Mark, i.e. any of Nonspacing_Mark,
207    /// Spacing_Mark, or Enclosing_Mark.
208    #[inline(always)]
209    pub fn is_mark(&self, c: char) -> bool {
210        unicode_normalization::char::is_combining_mark(c)
211    }
212
213    /// Returns the Bidi_Class of `c`.
214    #[inline(always)]
215    pub fn bidi_class(&self, c: char) -> BidiClass {
216        BidiClass(unicode_bidi::bidi_class(c))
217    }
218
219    /// Returns the Joining_Type of `c`.
220    #[inline(always)]
221    pub fn joining_type(&self, c: char) -> JoiningType {
222        JoiningType(idna_mapping::joining_type(c))
223    }
224
225    /// See the [method of the same name in `icu_normalizer`][1] for the
226    /// exact semantics.
227    ///
228    /// [1]: https://docs.rs/icu_normalizer/latest/icu_normalizer/uts46/struct.Uts46Mapper.html#method.map_normalize
229    #[inline(always)]
230    pub fn map_normalize<'delegate, I: Iterator<Item = char> + 'delegate>(
231        &'delegate self,
232        iter: I,
233    ) -> impl Iterator<Item = char> + 'delegate {
234        idna_mapping::Mapper::new(iter, false).nfc()
235    }
236
237    /// See the [method of the same name in `icu_normalizer`][1] for the
238    /// exact semantics.
239    ///
240    /// [1]: https://docs.rs/icu_normalizer/latest/icu_normalizer/uts46/struct.Uts46Mapper.html#method.normalize_validate
241    #[inline(always)]
242    pub fn normalize_validate<'delegate, I: Iterator<Item = char> + 'delegate>(
243        &'delegate self,
244        iter: I,
245    ) -> impl Iterator<Item = char> + 'delegate {
246        idna_mapping::Mapper::new(iter, true).nfc()
247    }
248}