unicode_bidi/explicit.rs
1// Copyright 2015 The Servo Project Developers. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! 3.3.2 Explicit Levels and Directions
11//!
12//! <http://www.unicode.org/reports/tr9/#Explicit_Levels_and_Directions>
13
14#[cfg(feature = "smallvec")]
15use smallvec::{smallvec, SmallVec};
16
17use super::char_data::{
18 is_rtl,
19 BidiClass::{self, *},
20};
21use super::level::Level;
22use super::prepare::removed_by_x9;
23use super::LevelRunVec;
24use super::TextSource;
25
26/// Compute explicit embedding levels for one paragraph of text (X1-X8), and identify
27/// level runs (BD7) for use when determining Isolating Run Sequences (X10).
28///
29/// `processing_classes[i]` must contain the `BidiClass` of the char at byte index `i`,
30/// for each char in `text`.
31///
32/// `runs` returns the list of level runs (BD7) of the text.
33#[cfg_attr(feature = "flame_it", flamer::flame)]
34pub fn compute<'a, T: TextSource<'a> + ?Sized>(
35 text: &'a T,
36 para_level: Level,
37 original_classes: &[BidiClass],
38 levels: &mut [Level],
39 processing_classes: &mut [BidiClass],
40 runs: &mut LevelRunVec,
41) {
42 assert_eq!(text.len(), original_classes.len());
43
44 // <http://www.unicode.org/reports/tr9/#X1>
45 #[cfg(feature = "smallvec")]
46 let mut stack: SmallVec<[Status; 8]> = smallvec![Status {
47 level: para_level,
48 status: OverrideStatus::Neutral,
49 }];
50 #[cfg(not(feature = "smallvec"))]
51 let mut stack = vec![Status {
52 level: para_level,
53 status: OverrideStatus::Neutral,
54 }];
55
56 let mut overflow_isolate_count = 0u32;
57 let mut overflow_embedding_count = 0u32;
58 let mut valid_isolate_count = 0u32;
59
60 let mut current_run_level = Level::ltr();
61 let mut current_run_start = 0;
62
63 for (i, len) in text.indices_lengths() {
64 let last = stack.last().unwrap();
65
66 match original_classes[i] {
67 // Rules X2-X5c
68 RLE | LRE | RLO | LRO | RLI | LRI | FSI => {
69 // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
70 levels[i] = last.level;
71
72 // X5a-X5c: Isolate initiators get the level of the last entry on the stack.
73 let is_isolate = matches!(original_classes[i], RLI | LRI | FSI);
74 if is_isolate {
75 // Redundant due to "Retaining explicit formatting characters" step.
76 // levels[i] = last.level;
77 match last.status {
78 OverrideStatus::RTL => processing_classes[i] = R,
79 OverrideStatus::LTR => processing_classes[i] = L,
80 _ => {}
81 }
82 }
83
84 let new_level = if is_rtl(original_classes[i]) {
85 last.level.new_explicit_next_rtl()
86 } else {
87 last.level.new_explicit_next_ltr()
88 };
89
90 if new_level.is_ok() && overflow_isolate_count == 0 && overflow_embedding_count == 0
91 {
92 let new_level = new_level.unwrap();
93
94 stack.push(Status {
95 level: new_level,
96 status: match original_classes[i] {
97 RLO => OverrideStatus::RTL,
98 LRO => OverrideStatus::LTR,
99 RLI | LRI | FSI => OverrideStatus::Isolate,
100 _ => OverrideStatus::Neutral,
101 },
102 });
103
104 if is_isolate {
105 valid_isolate_count += 1;
106 } else {
107 // The spec doesn't explicitly mention this step, but it is necessary.
108 // See the reference implementations for comparison.
109 levels[i] = new_level;
110 }
111 } else if is_isolate {
112 overflow_isolate_count += 1;
113 } else if overflow_isolate_count == 0 {
114 overflow_embedding_count += 1;
115 }
116
117 if !is_isolate {
118 // X9 +
119 // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
120 // (PDF handled below)
121 processing_classes[i] = BN;
122 }
123 }
124
125 // <http://www.unicode.org/reports/tr9/#X6a>
126 PDI => {
127 if overflow_isolate_count > 0 {
128 overflow_isolate_count -= 1;
129 } else if valid_isolate_count > 0 {
130 overflow_embedding_count = 0;
131
132 while !matches!(
133 stack.pop(),
134 None | Some(Status {
135 status: OverrideStatus::Isolate,
136 ..
137 })
138 ) {}
139
140 valid_isolate_count -= 1;
141 }
142
143 let last = stack.last().unwrap();
144 levels[i] = last.level;
145
146 match last.status {
147 OverrideStatus::RTL => processing_classes[i] = R,
148 OverrideStatus::LTR => processing_classes[i] = L,
149 _ => {}
150 }
151 }
152
153 // <http://www.unicode.org/reports/tr9/#X7>
154 PDF => {
155 if overflow_isolate_count > 0 {
156 // do nothing
157 } else if overflow_embedding_count > 0 {
158 overflow_embedding_count -= 1;
159 } else if last.status != OverrideStatus::Isolate && stack.len() >= 2 {
160 stack.pop();
161 }
162
163 // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
164 levels[i] = stack.last().unwrap().level;
165 // X9 part of retaining explicit formatting characters.
166 processing_classes[i] = BN;
167 }
168
169 // Nothing.
170 // BN case moved down to X6, see <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
171 B => {}
172
173 // <http://www.unicode.org/reports/tr9/#X6>
174 _ => {
175 levels[i] = last.level;
176
177 // This condition is not in the spec, but I am pretty sure that is a spec bug.
178 // https://www.unicode.org/L2/L2023/23014-amd-to-uax9.pdf
179 if original_classes[i] != BN {
180 match last.status {
181 OverrideStatus::RTL => processing_classes[i] = R,
182 OverrideStatus::LTR => processing_classes[i] = L,
183 _ => {}
184 }
185 }
186 }
187 }
188
189 // Handle multi-byte characters.
190 for j in 1..len {
191 levels[i + j] = levels[i];
192 processing_classes[i + j] = processing_classes[i];
193 }
194
195 // Identify level runs to be passed to prepare::isolating_run_sequences().
196 if i == 0 {
197 // Initialize for the first (or only) run.
198 current_run_level = levels[i];
199 } else {
200 // Check if we need to start a new level run.
201 // <https://www.unicode.org/reports/tr9/#BD7>
202 if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level {
203 // End the last run and start a new one.
204 runs.push(current_run_start..i);
205 current_run_level = levels[i];
206 current_run_start = i;
207 }
208 }
209 }
210
211 // Append the trailing level run, if non-empty.
212 if levels.len() > current_run_start {
213 runs.push(current_run_start..levels.len());
214 }
215}
216
217/// Entries in the directional status stack:
218struct Status {
219 level: Level,
220 status: OverrideStatus,
221}
222
223#[derive(PartialEq)]
224enum OverrideStatus {
225 Neutral,
226 RTL,
227 LTR,
228 Isolate,
229}