unicode_bidi/
explicit.rs

1// Copyright 2015 The Servo Project Developers. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! 3.3.2 Explicit Levels and Directions
11//!
12//! <http://www.unicode.org/reports/tr9/#Explicit_Levels_and_Directions>
13
14#[cfg(feature = "smallvec")]
15use smallvec::{smallvec, SmallVec};
16
17use super::char_data::{
18    is_rtl,
19    BidiClass::{self, *},
20};
21use super::level::Level;
22use super::prepare::removed_by_x9;
23use super::LevelRunVec;
24use super::TextSource;
25
26/// Compute explicit embedding levels for one paragraph of text (X1-X8), and identify
27/// level runs (BD7) for use when determining Isolating Run Sequences (X10).
28///
29/// `processing_classes[i]` must contain the `BidiClass` of the char at byte index `i`,
30/// for each char in `text`.
31///
32/// `runs` returns the list of level runs (BD7) of the text.
33#[cfg_attr(feature = "flame_it", flamer::flame)]
34pub fn compute<'a, T: TextSource<'a> + ?Sized>(
35    text: &'a T,
36    para_level: Level,
37    original_classes: &[BidiClass],
38    levels: &mut [Level],
39    processing_classes: &mut [BidiClass],
40    runs: &mut LevelRunVec,
41) {
42    assert_eq!(text.len(), original_classes.len());
43
44    // <http://www.unicode.org/reports/tr9/#X1>
45    #[cfg(feature = "smallvec")]
46    let mut stack: SmallVec<[Status; 8]> = smallvec![Status {
47        level: para_level,
48        status: OverrideStatus::Neutral,
49    }];
50    #[cfg(not(feature = "smallvec"))]
51    let mut stack = vec![Status {
52        level: para_level,
53        status: OverrideStatus::Neutral,
54    }];
55
56    let mut overflow_isolate_count = 0u32;
57    let mut overflow_embedding_count = 0u32;
58    let mut valid_isolate_count = 0u32;
59
60    let mut current_run_level = Level::ltr();
61    let mut current_run_start = 0;
62
63    for (i, len) in text.indices_lengths() {
64        let last = stack.last().unwrap();
65
66        match original_classes[i] {
67            // Rules X2-X5c
68            RLE | LRE | RLO | LRO | RLI | LRI | FSI => {
69                // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
70                levels[i] = last.level;
71
72                // X5a-X5c: Isolate initiators get the level of the last entry on the stack.
73                let is_isolate = matches!(original_classes[i], RLI | LRI | FSI);
74                if is_isolate {
75                    // Redundant due to "Retaining explicit formatting characters" step.
76                    // levels[i] = last.level;
77                    match last.status {
78                        OverrideStatus::RTL => processing_classes[i] = R,
79                        OverrideStatus::LTR => processing_classes[i] = L,
80                        _ => {}
81                    }
82                }
83
84                let new_level = if is_rtl(original_classes[i]) {
85                    last.level.new_explicit_next_rtl()
86                } else {
87                    last.level.new_explicit_next_ltr()
88                };
89
90                if new_level.is_ok() && overflow_isolate_count == 0 && overflow_embedding_count == 0
91                {
92                    let new_level = new_level.unwrap();
93
94                    stack.push(Status {
95                        level: new_level,
96                        status: match original_classes[i] {
97                            RLO => OverrideStatus::RTL,
98                            LRO => OverrideStatus::LTR,
99                            RLI | LRI | FSI => OverrideStatus::Isolate,
100                            _ => OverrideStatus::Neutral,
101                        },
102                    });
103
104                    if is_isolate {
105                        valid_isolate_count += 1;
106                    } else {
107                        // The spec doesn't explicitly mention this step, but it is necessary.
108                        // See the reference implementations for comparison.
109                        levels[i] = new_level;
110                    }
111                } else if is_isolate {
112                    overflow_isolate_count += 1;
113                } else if overflow_isolate_count == 0 {
114                    overflow_embedding_count += 1;
115                }
116
117                if !is_isolate {
118                    // X9 +
119                    // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
120                    // (PDF handled below)
121                    processing_classes[i] = BN;
122                }
123            }
124
125            // <http://www.unicode.org/reports/tr9/#X6a>
126            PDI => {
127                if overflow_isolate_count > 0 {
128                    overflow_isolate_count -= 1;
129                } else if valid_isolate_count > 0 {
130                    overflow_embedding_count = 0;
131
132                    while !matches!(
133                        stack.pop(),
134                        None | Some(Status {
135                            status: OverrideStatus::Isolate,
136                            ..
137                        })
138                    ) {}
139
140                    valid_isolate_count -= 1;
141                }
142
143                let last = stack.last().unwrap();
144                levels[i] = last.level;
145
146                match last.status {
147                    OverrideStatus::RTL => processing_classes[i] = R,
148                    OverrideStatus::LTR => processing_classes[i] = L,
149                    _ => {}
150                }
151            }
152
153            // <http://www.unicode.org/reports/tr9/#X7>
154            PDF => {
155                if overflow_isolate_count > 0 {
156                    // do nothing
157                } else if overflow_embedding_count > 0 {
158                    overflow_embedding_count -= 1;
159                } else if last.status != OverrideStatus::Isolate && stack.len() >= 2 {
160                    stack.pop();
161                }
162
163                // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
164                levels[i] = stack.last().unwrap().level;
165                // X9 part of retaining explicit formatting characters.
166                processing_classes[i] = BN;
167            }
168
169            // Nothing.
170            // BN case moved down to X6, see <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
171            B => {}
172
173            // <http://www.unicode.org/reports/tr9/#X6>
174            _ => {
175                levels[i] = last.level;
176
177                // This condition is not in the spec, but I am pretty sure that is a spec bug.
178                // https://www.unicode.org/L2/L2023/23014-amd-to-uax9.pdf
179                if original_classes[i] != BN {
180                    match last.status {
181                        OverrideStatus::RTL => processing_classes[i] = R,
182                        OverrideStatus::LTR => processing_classes[i] = L,
183                        _ => {}
184                    }
185                }
186            }
187        }
188
189        // Handle multi-byte characters.
190        for j in 1..len {
191            levels[i + j] = levels[i];
192            processing_classes[i + j] = processing_classes[i];
193        }
194
195        // Identify level runs to be passed to prepare::isolating_run_sequences().
196        if i == 0 {
197            // Initialize for the first (or only) run.
198            current_run_level = levels[i];
199        } else {
200            // Check if we need to start a new level run.
201            // <https://www.unicode.org/reports/tr9/#BD7>
202            if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level {
203                // End the last run and start a new one.
204                runs.push(current_run_start..i);
205                current_run_level = levels[i];
206                current_run_start = i;
207            }
208        }
209    }
210
211    // Append the trailing level run, if non-empty.
212    if levels.len() > current_run_start {
213        runs.push(current_run_start..levels.len());
214    }
215}
216
217/// Entries in the directional status stack:
218struct Status {
219    level: Level,
220    status: OverrideStatus,
221}
222
223#[derive(PartialEq)]
224enum OverrideStatus {
225    Neutral,
226    RTL,
227    LTR,
228    Isolate,
229}