psl_types/
lib.rs

1//! Common types for the public suffix implementation crates
2//!
3//! The types in this crate assume that the input is valid
4//! UTF-8 encoded domain names. If input is potentially invalid,
5//! use a higher level crate like the `addr` crate.
6//!
7//! Some implentations may also assume that the domain name is
8//! in lowercase and/or may only support looking up unicode
9//! domain names.
10
11#![no_std]
12#![forbid(unsafe_code)]
13
14use core::cmp::Ordering;
15use core::hash::{Hash, Hasher};
16
17/// A list of all public suffixes
18pub trait List {
19    /// Finds the suffix information of the given input labels
20    ///
21    /// *NB:* `labels` must be in reverse order
22    fn find<'a, T>(&self, labels: T) -> Info
23    where
24        T: Iterator<Item = &'a [u8]>;
25
26    /// Get the public suffix of the domain
27    #[inline]
28    fn suffix<'a>(&self, name: &'a [u8]) -> Option<Suffix<'a>> {
29        let mut labels = name.rsplit(|x| *x == b'.');
30        let fqdn = if name.ends_with(b".") {
31            labels.next();
32            true
33        } else {
34            false
35        };
36        let Info { mut len, typ } = self.find(labels);
37        if fqdn {
38            len += 1;
39        }
40        if len == 0 {
41            return None;
42        }
43        let offset = name.len() - len;
44        let bytes = name.get(offset..)?;
45        Some(Suffix { bytes, fqdn, typ })
46    }
47
48    /// Get the registrable domain
49    #[inline]
50    fn domain<'a>(&self, name: &'a [u8]) -> Option<Domain<'a>> {
51        let suffix = self.suffix(name)?;
52        let name_len = name.len();
53        let suffix_len = suffix.bytes.len();
54        if name_len < suffix_len + 2 {
55            return None;
56        }
57        let offset = name_len - (1 + suffix_len);
58        let subdomain = name.get(..offset)?;
59        let root_label = subdomain.rsplitn(2, |x| *x == b'.').next()?;
60        let registrable_len = root_label.len() + 1 + suffix_len;
61        let offset = name_len - registrable_len;
62        let bytes = name.get(offset..)?;
63        Some(Domain { bytes, suffix })
64    }
65}
66
67impl<L: List> List for &'_ L {
68    #[inline]
69    fn find<'a, T>(&self, labels: T) -> Info
70    where
71        T: Iterator<Item = &'a [u8]>,
72    {
73        (*self).find(labels)
74    }
75}
76
77/// Type of suffix
78#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
79pub enum Type {
80    Icann,
81    Private,
82}
83
84/// Information about the suffix
85#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
86pub struct Info {
87    pub len: usize,
88    pub typ: Option<Type>,
89}
90
91/// The suffix of a domain name
92#[derive(Copy, Clone, Eq, Debug)]
93pub struct Suffix<'a> {
94    bytes: &'a [u8],
95    fqdn: bool,
96    typ: Option<Type>,
97}
98
99impl<'a> Suffix<'a> {
100    /// Builds a new suffix
101    #[inline]
102    #[must_use]
103    #[doc(hidden)]
104    pub fn new(bytes: &[u8], typ: Option<Type>) -> Suffix<'_> {
105        Suffix {
106            bytes,
107            typ,
108            fqdn: bytes.ends_with(b"."),
109        }
110    }
111
112    /// The suffix as bytes
113    #[inline]
114    #[must_use]
115    pub const fn as_bytes(&self) -> &'a [u8] {
116        self.bytes
117    }
118
119    /// Whether or not the suffix is fully qualified (i.e. it ends with a `.`)
120    #[inline]
121    #[must_use]
122    pub const fn is_fqdn(&self) -> bool {
123        self.fqdn
124    }
125
126    /// Whether this is an `ICANN`, `private` or unknown suffix
127    #[inline]
128    #[must_use]
129    pub const fn typ(&self) -> Option<Type> {
130        self.typ
131    }
132
133    /// Returns the suffix with a trailing `.` removed
134    #[inline]
135    #[must_use]
136    pub fn trim(mut self) -> Self {
137        if self.fqdn {
138            self.bytes = &self.bytes[..self.bytes.len() - 1];
139            self.fqdn = false;
140        }
141        self
142    }
143
144    /// Whether or not this is a known suffix (i.e. it is explicitly in the public suffix list)
145    // Could be const but Isahc needs support for Rust v1.41
146    #[inline]
147    #[must_use]
148    pub fn is_known(&self) -> bool {
149        self.typ.is_some()
150    }
151}
152
153impl PartialEq for Suffix<'_> {
154    #[inline]
155    fn eq(&self, other: &Self) -> bool {
156        self.trim().bytes == strip_dot(other.bytes)
157    }
158}
159
160impl PartialEq<&[u8]> for Suffix<'_> {
161    #[inline]
162    fn eq(&self, other: &&[u8]) -> bool {
163        self.trim().bytes == strip_dot(other)
164    }
165}
166
167impl PartialEq<&str> for Suffix<'_> {
168    #[inline]
169    fn eq(&self, other: &&str) -> bool {
170        self.trim().bytes == strip_dot(other.as_bytes())
171    }
172}
173
174impl Ord for Suffix<'_> {
175    #[inline]
176    fn cmp(&self, other: &Self) -> Ordering {
177        self.trim().bytes.cmp(strip_dot(other.bytes))
178    }
179}
180
181impl PartialOrd for Suffix<'_> {
182    #[inline]
183    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
184        Some(self.trim().bytes.cmp(strip_dot(other.bytes)))
185    }
186}
187
188impl Hash for Suffix<'_> {
189    #[inline]
190    fn hash<H: Hasher>(&self, state: &mut H) {
191        self.trim().bytes.hash(state);
192    }
193}
194
195/// A registrable domain name
196#[derive(Copy, Clone, Eq, Debug)]
197pub struct Domain<'a> {
198    bytes: &'a [u8],
199    suffix: Suffix<'a>,
200}
201
202impl<'a> Domain<'a> {
203    /// Builds a root domain
204    #[inline]
205    #[must_use]
206    #[doc(hidden)]
207    pub const fn new(bytes: &'a [u8], suffix: Suffix<'a>) -> Domain<'a> {
208        Domain { bytes, suffix }
209    }
210
211    /// The domain name as bytes
212    #[inline]
213    #[must_use]
214    pub const fn as_bytes(&self) -> &'a [u8] {
215        self.bytes
216    }
217
218    /// The public suffix of this domain name
219    #[inline]
220    #[must_use]
221    pub const fn suffix(&self) -> Suffix<'_> {
222        self.suffix
223    }
224
225    /// Returns the domain with a trailing `.` removed
226    #[inline]
227    #[must_use]
228    pub fn trim(mut self) -> Self {
229        if self.suffix.fqdn {
230            self.bytes = &self.bytes[..self.bytes.len() - 1];
231            self.suffix = self.suffix.trim();
232        }
233        self
234    }
235}
236
237impl PartialEq for Domain<'_> {
238    #[inline]
239    fn eq(&self, other: &Self) -> bool {
240        self.trim().bytes == strip_dot(other.bytes)
241    }
242}
243
244impl PartialEq<&[u8]> for Domain<'_> {
245    #[inline]
246    fn eq(&self, other: &&[u8]) -> bool {
247        self.trim().bytes == strip_dot(other)
248    }
249}
250
251impl PartialEq<&str> for Domain<'_> {
252    #[inline]
253    fn eq(&self, other: &&str) -> bool {
254        self.trim().bytes == strip_dot(other.as_bytes())
255    }
256}
257
258impl Ord for Domain<'_> {
259    #[inline]
260    fn cmp(&self, other: &Self) -> Ordering {
261        self.trim().bytes.cmp(strip_dot(other.bytes))
262    }
263}
264
265impl PartialOrd for Domain<'_> {
266    #[inline]
267    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
268        Some(self.trim().bytes.cmp(strip_dot(other.bytes)))
269    }
270}
271
272impl Hash for Domain<'_> {
273    #[inline]
274    fn hash<H: Hasher>(&self, state: &mut H) {
275        self.trim().bytes.hash(state);
276    }
277}
278
279#[inline]
280fn strip_dot(bytes: &[u8]) -> &[u8] {
281    if bytes.ends_with(b".") {
282        &bytes[..bytes.len() - 1]
283    } else {
284        bytes
285    }
286}
287
288#[cfg(test)]
289mod test {
290    use super::{Info, List as Psl};
291
292    struct List;
293
294    impl Psl for List {
295        fn find<'a, T>(&self, mut labels: T) -> Info
296        where
297            T: Iterator<Item = &'a [u8]>,
298        {
299            match labels.next() {
300                Some(label) => Info {
301                    len: label.len(),
302                    typ: None,
303                },
304                None => Info { len: 0, typ: None },
305            }
306        }
307    }
308
309    #[test]
310    fn www_example_com() {
311        let domain = List.domain(b"www.example.com").expect("domain name");
312        assert_eq!(domain, "example.com");
313        assert_eq!(domain.suffix(), "com");
314    }
315
316    #[test]
317    fn example_com() {
318        let domain = List.domain(b"example.com").expect("domain name");
319        assert_eq!(domain, "example.com");
320        assert_eq!(domain.suffix(), "com");
321    }
322
323    #[test]
324    fn example_com_() {
325        let domain = List.domain(b"example.com.").expect("domain name");
326        assert_eq!(domain, "example.com.");
327        assert_eq!(domain.suffix(), "com.");
328    }
329
330    #[test]
331    fn fqdn_comparisons() {
332        let domain = List.domain(b"example.com.").expect("domain name");
333        assert_eq!(domain, "example.com");
334        assert_eq!(domain.suffix(), "com");
335    }
336
337    #[test]
338    fn non_fqdn_comparisons() {
339        let domain = List.domain(b"example.com").expect("domain name");
340        assert_eq!(domain, "example.com.");
341        assert_eq!(domain.suffix(), "com.");
342    }
343
344    #[test]
345    fn self_comparisons() {
346        let fqdn = List.domain(b"example.com.").expect("domain name");
347        let non_fqdn = List.domain(b"example.com").expect("domain name");
348        assert_eq!(fqdn, non_fqdn);
349        assert_eq!(fqdn.suffix(), non_fqdn.suffix());
350    }
351
352    #[test]
353    fn btreemap_comparisons() {
354        extern crate alloc;
355        use alloc::collections::BTreeSet;
356
357        let mut domain = BTreeSet::new();
358        let mut suffix = BTreeSet::new();
359
360        let fqdn = List.domain(b"example.com.").expect("domain name");
361        domain.insert(fqdn);
362        suffix.insert(fqdn.suffix());
363
364        let non_fqdn = List.domain(b"example.com").expect("domain name");
365        assert!(domain.contains(&non_fqdn));
366        assert!(suffix.contains(&non_fqdn.suffix()));
367    }
368
369    #[test]
370    fn hashmap_comparisons() {
371        extern crate std;
372        use std::collections::HashSet;
373
374        let mut domain = HashSet::new();
375        let mut suffix = HashSet::new();
376
377        let fqdn = List.domain(b"example.com.").expect("domain name");
378        domain.insert(fqdn);
379        suffix.insert(fqdn.suffix());
380
381        let non_fqdn = List.domain(b"example.com").expect("domain name");
382        assert!(domain.contains(&non_fqdn));
383        assert!(suffix.contains(&non_fqdn.suffix()));
384    }
385
386    #[test]
387    fn com() {
388        let domain = List.domain(b"com");
389        assert_eq!(domain, None);
390
391        let suffix = List.suffix(b"com").expect("public suffix");
392        assert_eq!(suffix, "com");
393    }
394
395    #[test]
396    fn root() {
397        let domain = List.domain(b".");
398        assert_eq!(domain, None);
399
400        let suffix = List.suffix(b".").expect("public suffix");
401        assert_eq!(suffix, ".");
402    }
403
404    #[test]
405    fn empty_string() {
406        let domain = List.domain(b"");
407        assert_eq!(domain, None);
408
409        let suffix = List.suffix(b"");
410        assert_eq!(suffix, None);
411    }
412
413    #[test]
414    #[allow(dead_code)]
415    fn accessors_borrow_correctly() {
416        fn return_suffix(domain: &str) -> &[u8] {
417            let suffix = List.suffix(domain.as_bytes()).unwrap();
418            suffix.as_bytes()
419        }
420
421        fn return_domain(name: &str) -> &[u8] {
422            let domain = List.domain(name.as_bytes()).unwrap();
423            domain.as_bytes()
424        }
425    }
426}