unicode_bidi/char_data/
mod.rs

1// Copyright 2015 The Servo Project Developers. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! Accessor for `Bidi_Class` property from Unicode Character Database (UCD)
11
12mod tables;
13
14pub use self::tables::{BidiClass, UNICODE_VERSION};
15#[cfg(feature = "hardcoded-data")]
16use core::char;
17#[cfg(feature = "hardcoded-data")]
18use core::cmp::Ordering::{Equal, Greater, Less};
19
20#[cfg(feature = "hardcoded-data")]
21use self::tables::bidi_class_table;
22use crate::data_source::BidiMatchedOpeningBracket;
23use crate::BidiClass::*;
24#[cfg(feature = "hardcoded-data")]
25use crate::BidiDataSource;
26/// Hardcoded Bidi data that ships with the unicode-bidi crate.
27///
28/// This can be enabled with the default `hardcoded-data` Cargo feature.
29#[cfg(feature = "hardcoded-data")]
30pub struct HardcodedBidiData;
31
32#[cfg(feature = "hardcoded-data")]
33impl BidiDataSource for HardcodedBidiData {
34    fn bidi_class(&self, c: char) -> BidiClass {
35        bsearch_range_value_table(c, bidi_class_table)
36    }
37}
38
39/// Find the `BidiClass` of a single char.
40#[cfg(feature = "hardcoded-data")]
41pub fn bidi_class(c: char) -> BidiClass {
42    bsearch_range_value_table(c, bidi_class_table)
43}
44
45/// If this character is a bracket according to BidiBrackets.txt,
46/// return the corresponding *normalized* *opening bracket* of the pair,
47/// and whether or not it itself is an opening bracket.
48pub(crate) fn bidi_matched_opening_bracket(c: char) -> Option<BidiMatchedOpeningBracket> {
49    for pair in self::tables::bidi_pairs_table {
50        if pair.0 == c || pair.1 == c {
51            let skeleton = pair.2.unwrap_or(pair.0);
52            return Some(BidiMatchedOpeningBracket {
53                opening: skeleton,
54                is_open: pair.0 == c,
55            });
56        }
57    }
58    None
59}
60
61pub fn is_rtl(bidi_class: BidiClass) -> bool {
62    matches!(bidi_class, RLE | RLO | RLI)
63}
64
65#[cfg(feature = "hardcoded-data")]
66fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) -> BidiClass {
67    match r.binary_search_by(|&(lo, hi, _)| {
68        if lo <= c && c <= hi {
69            Equal
70        } else if hi < c {
71            Less
72        } else {
73            Greater
74        }
75    }) {
76        Ok(idx) => {
77            let (_, _, cat) = r[idx];
78            cat
79        }
80        // UCD/extracted/DerivedBidiClass.txt: "All code points not explicitly listed
81        // for Bidi_Class have the value Left_To_Right (L)."
82        Err(_) => L,
83    }
84}
85
86#[cfg(all(test, feature = "hardcoded-data"))]
87mod tests {
88    use super::*;
89
90    #[test]
91    fn test_ascii() {
92        assert_eq!(bidi_class('\u{0000}'), BN);
93        assert_eq!(bidi_class('\u{0040}'), ON);
94        assert_eq!(bidi_class('\u{0041}'), L);
95        assert_eq!(bidi_class('\u{0062}'), L);
96        assert_eq!(bidi_class('\u{007F}'), BN);
97    }
98
99    #[test]
100    fn test_bmp() {
101        // Hebrew
102        assert_eq!(bidi_class('\u{0590}'), R);
103        assert_eq!(bidi_class('\u{05D0}'), R);
104        assert_eq!(bidi_class('\u{05D1}'), R);
105        assert_eq!(bidi_class('\u{05FF}'), R);
106
107        // Arabic
108        assert_eq!(bidi_class('\u{0600}'), AN);
109        assert_eq!(bidi_class('\u{0627}'), AL);
110        assert_eq!(bidi_class('\u{07BF}'), AL);
111
112        // Default R + Arabic Extras
113        assert_eq!(bidi_class('\u{07C0}'), R);
114        assert_eq!(bidi_class('\u{085F}'), R);
115        assert_eq!(bidi_class('\u{0860}'), AL);
116        assert_eq!(bidi_class('\u{0870}'), AL);
117        assert_eq!(bidi_class('\u{089F}'), NSM);
118        assert_eq!(bidi_class('\u{08A0}'), AL);
119        assert_eq!(bidi_class('\u{089F}'), NSM);
120        assert_eq!(bidi_class('\u{08FF}'), NSM);
121
122        // Default ET
123        assert_eq!(bidi_class('\u{20A0}'), ET);
124        assert_eq!(bidi_class('\u{20CF}'), ET);
125
126        // Arabic Presentation Forms
127        assert_eq!(bidi_class('\u{FB1D}'), R);
128        assert_eq!(bidi_class('\u{FB4F}'), R);
129        assert_eq!(bidi_class('\u{FB50}'), AL);
130        assert_eq!(bidi_class('\u{FDCF}'), ON);
131        assert_eq!(bidi_class('\u{FDF0}'), AL);
132        assert_eq!(bidi_class('\u{FDFF}'), ON);
133        assert_eq!(bidi_class('\u{FE70}'), AL);
134        assert_eq!(bidi_class('\u{FEFE}'), AL);
135        assert_eq!(bidi_class('\u{FEFF}'), BN);
136
137        // noncharacters
138        assert_eq!(bidi_class('\u{FDD0}'), L);
139        assert_eq!(bidi_class('\u{FDD1}'), L);
140        assert_eq!(bidi_class('\u{FDEE}'), L);
141        assert_eq!(bidi_class('\u{FDEF}'), L);
142        assert_eq!(bidi_class('\u{FFFE}'), L);
143        assert_eq!(bidi_class('\u{FFFF}'), L);
144    }
145
146    #[test]
147    fn test_smp() {
148        // Default AL + R
149        assert_eq!(bidi_class('\u{10800}'), R);
150        assert_eq!(bidi_class('\u{10FFF}'), R);
151        assert_eq!(bidi_class('\u{1E800}'), R);
152        assert_eq!(bidi_class('\u{1EDFF}'), R);
153        assert_eq!(bidi_class('\u{1EE00}'), AL);
154        assert_eq!(bidi_class('\u{1EEFF}'), AL);
155        assert_eq!(bidi_class('\u{1EF00}'), R);
156        assert_eq!(bidi_class('\u{1EFFF}'), R);
157    }
158
159    #[test]
160    fn test_unassigned_planes() {
161        assert_eq!(bidi_class('\u{30000}'), L);
162        assert_eq!(bidi_class('\u{40000}'), L);
163        assert_eq!(bidi_class('\u{50000}'), L);
164        assert_eq!(bidi_class('\u{60000}'), L);
165        assert_eq!(bidi_class('\u{70000}'), L);
166        assert_eq!(bidi_class('\u{80000}'), L);
167        assert_eq!(bidi_class('\u{90000}'), L);
168        assert_eq!(bidi_class('\u{a0000}'), L);
169    }
170}