unicode_bidi/
utf16.rs

1// Copyright 2023 The Mozilla Foundation. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10use super::TextSource;
11
12use alloc::borrow::Cow;
13use alloc::vec::Vec;
14use core::char;
15use core::ops::Range;
16
17use crate::{
18    compute_bidi_info_for_para, compute_initial_info, level, para_direction, reorder_levels,
19    reorder_visual, visual_runs_for_line,
20};
21use crate::{
22    BidiClass, BidiDataSource, Direction, Level, LevelRun, ParagraphInfo, ParagraphInfoFlags,
23};
24
25#[cfg(feature = "hardcoded-data")]
26use crate::HardcodedBidiData;
27
28/// Initial bidi information of the text (UTF-16 version).
29///
30/// Contains the text paragraphs and `BidiClass` of its characters.
31#[derive(PartialEq, Debug)]
32pub struct InitialInfo<'text> {
33    /// The text
34    pub text: &'text [u16],
35
36    /// The BidiClass of the character at each code unit in the text.
37    /// If a character is multiple code units, its class will appear multiple times in the vector.
38    pub original_classes: Vec<BidiClass>,
39
40    /// The boundaries and level of each paragraph within the text.
41    pub paragraphs: Vec<ParagraphInfo>,
42}
43
44impl<'text> InitialInfo<'text> {
45    /// Find the paragraphs and BidiClasses in a string of text.
46    ///
47    /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
48    ///
49    /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
50    /// character is found before the matching PDI.  If no strong character is found, the class will
51    /// remain FSI, and it's up to later stages to treat these as LRI when needed.
52    ///
53    /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
54    #[cfg_attr(feature = "flame_it", flamer::flame)]
55    #[cfg(feature = "hardcoded-data")]
56    pub fn new(text: &[u16], default_para_level: Option<Level>) -> InitialInfo<'_> {
57        Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
58    }
59
60    /// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
61    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
62    /// instead (enabled with tbe default `hardcoded-data` Cargo feature)
63    ///
64    /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
65    ///
66    /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
67    /// character is found before the matching PDI.  If no strong character is found, the class will
68    /// remain FSI, and it's up to later stages to treat these as LRI when needed.
69    #[cfg_attr(feature = "flame_it", flamer::flame)]
70    pub fn new_with_data_source<'a, D: BidiDataSource>(
71        data_source: &D,
72        text: &'a [u16],
73        default_para_level: Option<Level>,
74    ) -> InitialInfo<'a> {
75        InitialInfoExt::new_with_data_source(data_source, text, default_para_level).base
76    }
77}
78
79/// Extended version of InitialInfo (not public API).
80#[derive(PartialEq, Debug)]
81struct InitialInfoExt<'text> {
82    /// The base InitialInfo for the text, recording its paragraphs and bidi classes.
83    base: InitialInfo<'text>,
84
85    /// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that
86    /// requires no further bidi processing (i.e. there are no RTL characters or bidi
87    /// control codes present).
88    flags: Vec<ParagraphInfoFlags>,
89}
90
91impl<'text> InitialInfoExt<'text> {
92    /// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
93    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
94    /// instead (enabled with tbe default `hardcoded-data` Cargo feature)
95    ///
96    /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
97    ///
98    /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
99    /// character is found before the matching PDI.  If no strong character is found, the class will
100    /// remain FSI, and it's up to later stages to treat these as LRI when needed.
101    #[cfg_attr(feature = "flame_it", flamer::flame)]
102    pub fn new_with_data_source<'a, D: BidiDataSource>(
103        data_source: &D,
104        text: &'a [u16],
105        default_para_level: Option<Level>,
106    ) -> InitialInfoExt<'a> {
107        let mut paragraphs = Vec::<ParagraphInfo>::new();
108        let mut flags = Vec::<ParagraphInfoFlags>::new();
109        let (original_classes, _, _, _) = compute_initial_info(
110            data_source,
111            text,
112            default_para_level,
113            Some((&mut paragraphs, &mut flags)),
114        );
115
116        InitialInfoExt {
117            base: InitialInfo {
118                text,
119                original_classes,
120                paragraphs,
121            },
122            flags,
123        }
124    }
125}
126
127/// Bidi information of the text (UTF-16 version).
128///
129/// The `original_classes` and `levels` vectors are indexed by code unit offsets into the text.  If a
130/// character is multiple code units wide, then its class and level will appear multiple times in these
131/// vectors.
132// TODO: Impl `struct StringProperty<T> { values: Vec<T> }` and use instead of Vec<T>
133#[derive(Debug, PartialEq)]
134pub struct BidiInfo<'text> {
135    /// The text
136    pub text: &'text [u16],
137
138    /// The BidiClass of the character at each byte in the text.
139    pub original_classes: Vec<BidiClass>,
140
141    /// The directional embedding level of each byte in the text.
142    pub levels: Vec<Level>,
143
144    /// The boundaries and paragraph embedding level of each paragraph within the text.
145    ///
146    /// TODO: Use SmallVec or similar to avoid overhead when there are only one or two paragraphs?
147    /// Or just don't include the first paragraph, which always starts at 0?
148    pub paragraphs: Vec<ParagraphInfo>,
149}
150
151impl<'text> BidiInfo<'text> {
152    /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph.
153    ///
154    ///
155    /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
156    ///
157    /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
158    /// text that is entirely LTR.  See the `nsBidi` class from Gecko for comparison.
159    ///
160    /// TODO: Support auto-RTL base direction
161    #[cfg_attr(feature = "flame_it", flamer::flame)]
162    #[cfg(feature = "hardcoded-data")]
163    #[inline]
164    pub fn new(text: &[u16], default_para_level: Option<Level>) -> BidiInfo<'_> {
165        Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
166    }
167
168    /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph, with a custom [`BidiDataSource`]
169    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
170    /// instead (enabled with tbe default `hardcoded-data` Cargo feature).
171    ///
172    /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
173    /// text that is entirely LTR.  See the `nsBidi` class from Gecko for comparison.
174    ///
175    /// TODO: Support auto-RTL base direction
176    #[cfg_attr(feature = "flame_it", flamer::flame)]
177    pub fn new_with_data_source<'a, D: BidiDataSource>(
178        data_source: &D,
179        text: &'a [u16],
180        default_para_level: Option<Level>,
181    ) -> BidiInfo<'a> {
182        let InitialInfoExt { base, flags, .. } =
183            InitialInfoExt::new_with_data_source(data_source, text, default_para_level);
184
185        let mut levels = Vec::<Level>::with_capacity(text.len());
186        let mut processing_classes = base.original_classes.clone();
187
188        for (para, flags) in base.paragraphs.iter().zip(flags.iter()) {
189            let text = &text[para.range.clone()];
190            let original_classes = &base.original_classes[para.range.clone()];
191
192            compute_bidi_info_for_para(
193                data_source,
194                para,
195                flags.is_pure_ltr,
196                flags.has_isolate_controls,
197                text,
198                original_classes,
199                &mut processing_classes,
200                &mut levels,
201            );
202        }
203
204        BidiInfo {
205            text,
206            original_classes: base.original_classes,
207            paragraphs: base.paragraphs,
208            levels,
209        }
210    }
211
212    /// Produce the levels for this paragraph as needed for reordering, one level per *byte*
213    /// in the paragraph. The returned vector includes bytes that are not included
214    /// in the `line`, but will not adjust them.
215    ///
216    /// This runs [Rule L1], you can run
217    /// [Rule L2] by calling [`Self::reorder_visual()`].
218    /// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
219    /// to avoid non-byte indices.
220    ///
221    /// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
222    ///
223    /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
224    /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
225    #[cfg_attr(feature = "flame_it", flamer::flame)]
226    pub fn reordered_levels(&self, para: &ParagraphInfo, line: Range<usize>) -> Vec<Level> {
227        assert!(line.start <= self.levels.len());
228        assert!(line.end <= self.levels.len());
229
230        let mut levels = self.levels.clone();
231        let line_classes = &self.original_classes[line.clone()];
232        let line_levels = &mut levels[line.clone()];
233        let line_str: &[u16] = &self.text[line.clone()];
234
235        reorder_levels(line_classes, line_levels, line_str, para.level);
236
237        levels
238    }
239
240    /// Produce the levels for this paragraph as needed for reordering, one level per *character*
241    /// in the paragraph. The returned vector includes characters that are not included
242    /// in the `line`, but will not adjust them.
243    ///
244    /// This runs [Rule L1], you can run
245    /// [Rule L2] by calling [`Self::reorder_visual()`].
246    /// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
247    /// to avoid non-byte indices.
248    ///
249    /// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
250    ///
251    /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
252    /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
253    #[cfg_attr(feature = "flame_it", flamer::flame)]
254    pub fn reordered_levels_per_char(
255        &self,
256        para: &ParagraphInfo,
257        line: Range<usize>,
258    ) -> Vec<Level> {
259        let levels = self.reordered_levels(para, line);
260        self.text.char_indices().map(|(i, _)| levels[i]).collect()
261    }
262
263    /// Re-order a line based on resolved levels and return the line in display order.
264    ///
265    /// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
266    ///
267    /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
268    /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
269    #[cfg_attr(feature = "flame_it", flamer::flame)]
270    pub fn reorder_line(&self, para: &ParagraphInfo, line: Range<usize>) -> Cow<'text, [u16]> {
271        if !level::has_rtl(&self.levels[line.clone()]) {
272            return self.text[line].into();
273        }
274        let (levels, runs) = self.visual_runs(para, line.clone());
275        reorder_line(self.text, line, levels, runs)
276    }
277
278    /// Reorders pre-calculated levels of a sequence of characters.
279    ///
280    /// NOTE: This is a convenience method that does not use a `Paragraph`  object. It is
281    /// intended to be used when an application has determined the levels of the objects (character sequences)
282    /// and just needs to have them reordered.
283    ///
284    /// the index map will result in `indexMap[visualIndex]==logicalIndex`.
285    ///
286    /// This only runs [Rule L2](http://www.unicode.org/reports/tr9/#L2) as it does not have
287    /// information about the actual text.
288    ///
289    /// Furthermore, if `levels` is an array that is aligned with code units, bytes within a codepoint may be
290    /// reversed. You may need to fix up the map to deal with this. Alternatively, only pass in arrays where each `Level`
291    /// is for a single code point.
292    ///
293    ///
294    ///   # # Example
295    /// ```
296    /// use unicode_bidi::BidiInfo;
297    /// use unicode_bidi::Level;
298    ///
299    /// let l0 = Level::from(0);
300    /// let l1 = Level::from(1);
301    /// let l2 = Level::from(2);
302    ///
303    /// let levels = vec![l0, l0, l0, l0];
304    /// let index_map = BidiInfo::reorder_visual(&levels);
305    /// assert_eq!(levels.len(), index_map.len());
306    /// assert_eq!(index_map, [0, 1, 2, 3]);
307    ///
308    /// let levels: Vec<Level> = vec![l0, l0, l0, l1, l1, l1, l2, l2];
309    /// let index_map = BidiInfo::reorder_visual(&levels);
310    /// assert_eq!(levels.len(), index_map.len());
311    /// assert_eq!(index_map, [0, 1, 2, 6, 7, 5, 4, 3]);
312    /// ```
313    #[cfg_attr(feature = "flame_it", flamer::flame)]
314    #[inline]
315    pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
316        reorder_visual(levels)
317    }
318
319    /// Find the level runs within a line and return them in visual order.
320    ///
321    /// `line` is a range of bytes indices within `levels`.
322    ///
323    /// The first return value is a vector of levels used by the reordering algorithm,
324    /// i.e. the result of [Rule L1]. The second return value is a vector of level runs,
325    /// the result of [Rule L2], showing the visual order that each level run (a run of text with the
326    /// same level) should be displayed. Within each run, the display order can be checked
327    /// against the Level vector.
328    ///
329    /// This does not handle [Rule L3] (combining characters) or [Rule L4] (mirroring),
330    /// as that should be handled by the engine using this API.
331    ///
332    /// Conceptually, this is the same as running [`Self::reordered_levels()`] followed by
333    /// [`Self::reorder_visual()`], however it returns the result as a list of level runs instead
334    /// of producing a level map, since one may wish to deal with the fact that this is operating on
335    /// byte rather than character indices.
336    ///
337    /// <http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels>
338    ///
339    /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
340    /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
341    /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
342    /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
343    #[cfg_attr(feature = "flame_it", flamer::flame)]
344    #[inline]
345    pub fn visual_runs(
346        &self,
347        para: &ParagraphInfo,
348        line: Range<usize>,
349    ) -> (Vec<Level>, Vec<LevelRun>) {
350        let levels = self.reordered_levels(para, line.clone());
351        visual_runs_for_line(levels, &line)
352    }
353
354    /// If processed text has any computed RTL levels
355    ///
356    /// This information is usually used to skip re-ordering of text when no RTL level is present
357    #[inline]
358    pub fn has_rtl(&self) -> bool {
359        level::has_rtl(&self.levels)
360    }
361}
362
363/// Bidi information of text treated as a single paragraph.
364///
365/// The `original_classes` and `levels` vectors are indexed by code unit offsets into the text.  If a
366/// character is multiple code units wide, then its class and level will appear multiple times in these
367/// vectors.
368#[derive(Debug, PartialEq)]
369pub struct ParagraphBidiInfo<'text> {
370    /// The text
371    pub text: &'text [u16],
372
373    /// The BidiClass of the character at each byte in the text.
374    pub original_classes: Vec<BidiClass>,
375
376    /// The directional embedding level of each byte in the text.
377    pub levels: Vec<Level>,
378
379    /// The paragraph embedding level.
380    pub paragraph_level: Level,
381
382    /// Whether the paragraph is purely LTR.
383    pub is_pure_ltr: bool,
384}
385
386impl<'text> ParagraphBidiInfo<'text> {
387    /// Determine the bidi embedding level.
388    ///
389    ///
390    /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
391    ///
392    /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
393    /// text that is entirely LTR.  See the `nsBidi` class from Gecko for comparison.
394    ///
395    /// TODO: Support auto-RTL base direction
396    #[cfg_attr(feature = "flame_it", flamer::flame)]
397    #[cfg(feature = "hardcoded-data")]
398    #[inline]
399    pub fn new(text: &[u16], default_para_level: Option<Level>) -> ParagraphBidiInfo<'_> {
400        Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
401    }
402
403    /// Determine the bidi embedding level, with a custom [`BidiDataSource`]
404    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
405    /// instead (enabled with tbe default `hardcoded-data` Cargo feature).
406    ///
407    /// (This is the single-paragraph equivalent of BidiInfo::new_with_data_source,
408    /// and should be kept in sync with it.
409    #[cfg_attr(feature = "flame_it", flamer::flame)]
410    pub fn new_with_data_source<'a, D: BidiDataSource>(
411        data_source: &D,
412        text: &'a [u16],
413        default_para_level: Option<Level>,
414    ) -> ParagraphBidiInfo<'a> {
415        // Here we could create a ParagraphInitialInfo struct to parallel the one
416        // used by BidiInfo, but there doesn't seem any compelling reason for it.
417        let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) =
418            compute_initial_info(data_source, text, default_para_level, None);
419
420        let mut levels = Vec::<Level>::with_capacity(text.len());
421        let mut processing_classes = original_classes.clone();
422
423        let para_info = ParagraphInfo {
424            range: Range {
425                start: 0,
426                end: text.len(),
427            },
428            level: paragraph_level,
429        };
430
431        compute_bidi_info_for_para(
432            data_source,
433            &para_info,
434            is_pure_ltr,
435            has_isolate_controls,
436            text,
437            &original_classes,
438            &mut processing_classes,
439            &mut levels,
440        );
441
442        ParagraphBidiInfo {
443            text,
444            original_classes,
445            levels,
446            paragraph_level,
447            is_pure_ltr,
448        }
449    }
450
451    /// Produce the levels for this paragraph as needed for reordering, one level per *code unit*
452    /// in the paragraph. The returned vector includes code units that are not included
453    /// in the `line`, but will not adjust them.
454    ///
455    /// See BidiInfo::reordered_levels for details.
456    ///
457    /// (This should be kept in sync with BidiInfo::reordered_levels.)
458    #[cfg_attr(feature = "flame_it", flamer::flame)]
459    pub fn reordered_levels(&self, line: Range<usize>) -> Vec<Level> {
460        assert!(line.start <= self.levels.len());
461        assert!(line.end <= self.levels.len());
462
463        let mut levels = self.levels.clone();
464        let line_classes = &self.original_classes[line.clone()];
465        let line_levels = &mut levels[line.clone()];
466
467        reorder_levels(
468            line_classes,
469            line_levels,
470            self.text.subrange(line),
471            self.paragraph_level,
472        );
473
474        levels
475    }
476
477    /// Produce the levels for this paragraph as needed for reordering, one level per *character*
478    /// in the paragraph. The returned vector includes characters that are not included
479    /// in the `line`, but will not adjust them.
480    ///
481    /// See BidiInfo::reordered_levels_per_char for details.
482    ///
483    /// (This should be kept in sync with BidiInfo::reordered_levels_per_char.)
484    #[cfg_attr(feature = "flame_it", flamer::flame)]
485    pub fn reordered_levels_per_char(&self, line: Range<usize>) -> Vec<Level> {
486        let levels = self.reordered_levels(line);
487        self.text.char_indices().map(|(i, _)| levels[i]).collect()
488    }
489
490    /// Re-order a line based on resolved levels and return the line in display order.
491    ///
492    /// See BidiInfo::reorder_line for details.
493    ///
494    /// (This should be kept in sync with BidiInfo::reorder_line.)
495    #[cfg_attr(feature = "flame_it", flamer::flame)]
496    pub fn reorder_line(&self, line: Range<usize>) -> Cow<'text, [u16]> {
497        if !level::has_rtl(&self.levels[line.clone()]) {
498            return self.text[line].into();
499        }
500        let (levels, runs) = self.visual_runs(line.clone());
501        reorder_line(self.text, line, levels, runs)
502    }
503
504    /// Reorders pre-calculated levels of a sequence of characters.
505    ///
506    /// See BidiInfo::reorder_visual for details.
507    #[cfg_attr(feature = "flame_it", flamer::flame)]
508    #[inline]
509    pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
510        reorder_visual(levels)
511    }
512
513    /// Find the level runs within a line and return them in visual order.
514    ///
515    /// `line` is a range of code-unit indices within `levels`.
516    ///
517    /// See `BidiInfo::visual_runs` for details.
518    ///
519    /// (This should be kept in sync with BidiInfo::visual_runs.)
520    #[cfg_attr(feature = "flame_it", flamer::flame)]
521    #[inline]
522    pub fn visual_runs(&self, line: Range<usize>) -> (Vec<Level>, Vec<LevelRun>) {
523        let levels = self.reordered_levels(line.clone());
524        visual_runs_for_line(levels, &line)
525    }
526
527    /// If processed text has any computed RTL levels
528    ///
529    /// This information is usually used to skip re-ordering of text when no RTL level is present
530    #[inline]
531    pub fn has_rtl(&self) -> bool {
532        !self.is_pure_ltr
533    }
534
535    /// Return the paragraph's Direction (Ltr, Rtl, or Mixed) based on its levels.
536    #[inline]
537    pub fn direction(&self) -> Direction {
538        para_direction(&self.levels)
539    }
540}
541
542/// Return a line of the text in display order based on resolved levels.
543///
544/// `text`   the full text passed to the `BidiInfo` or `ParagraphBidiInfo` for analysis
545/// `line`   a range of byte indices within `text` corresponding to one line
546/// `levels` array of `Level` values, with `line`'s levels reordered into visual order
547/// `runs`   array of `LevelRun`s in visual order
548///
549/// (`levels` and `runs` are the result of calling `BidiInfo::visual_runs()` or
550/// `ParagraphBidiInfo::visual_runs()` for the line of interest.)
551///
552/// Returns: the reordered text of the line.
553///
554/// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
555///
556/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
557/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
558fn reorder_line(
559    text: &[u16],
560    line: Range<usize>,
561    levels: Vec<Level>,
562    runs: Vec<LevelRun>,
563) -> Cow<'_, [u16]> {
564    // If all isolating run sequences are LTR, no reordering is needed
565    if runs.iter().all(|run| levels[run.start].is_ltr()) {
566        return text[line].into();
567    }
568
569    let mut result = Vec::<u16>::with_capacity(line.len());
570    for run in runs {
571        if levels[run.start].is_rtl() {
572            let mut buf = [0; 2];
573            for c in text[run].chars().rev() {
574                result.extend(c.encode_utf16(&mut buf).iter());
575            }
576        } else {
577            result.extend(text[run].iter());
578        }
579    }
580    result.into()
581}
582
583/// Contains a reference of `BidiInfo` and one of its `paragraphs`.
584/// And it supports all operation in the `Paragraph` that needs also its
585/// `BidiInfo` such as `direction`.
586#[derive(Debug)]
587pub struct Paragraph<'a, 'text> {
588    pub info: &'a BidiInfo<'text>,
589    pub para: &'a ParagraphInfo,
590}
591
592impl<'a, 'text> Paragraph<'a, 'text> {
593    #[inline]
594    pub fn new(info: &'a BidiInfo<'text>, para: &'a ParagraphInfo) -> Paragraph<'a, 'text> {
595        Paragraph { info, para }
596    }
597
598    /// Returns if the paragraph is Left direction, right direction or mixed.
599    #[inline]
600    pub fn direction(&self) -> Direction {
601        para_direction(&self.info.levels[self.para.range.clone()])
602    }
603
604    /// Returns the `Level` of a certain character in the paragraph.
605    #[inline]
606    pub fn level_at(&self, pos: usize) -> Level {
607        let actual_position = self.para.range.start + pos;
608        self.info.levels[actual_position]
609    }
610}
611
612/// Implementation of TextSource for UTF-16 text in a [u16] array.
613/// Note that there could be unpaired surrogates present!
614
615// Convenience functions to check whether a UTF16 code unit is a surrogate.
616#[inline]
617fn is_high_surrogate(code: u16) -> bool {
618    (code & 0xFC00) == 0xD800
619}
620#[inline]
621fn is_low_surrogate(code: u16) -> bool {
622    (code & 0xFC00) == 0xDC00
623}
624
625impl<'text> TextSource<'text> for [u16] {
626    type CharIter = Utf16CharIter<'text>;
627    type CharIndexIter = Utf16CharIndexIter<'text>;
628    type IndexLenIter = Utf16IndexLenIter<'text>;
629
630    #[inline]
631    fn len(&self) -> usize {
632        (self as &[u16]).len()
633    }
634    fn char_at(&self, index: usize) -> Option<(char, usize)> {
635        if index >= self.len() {
636            return None;
637        }
638        // Get the indicated code unit and try simply converting it to a char;
639        // this will fail if it is half of a surrogate pair.
640        let c = self[index];
641        if let Some(ch) = char::from_u32(c.into()) {
642            return Some((ch, 1));
643        }
644        // If it's a low surrogate, and was immediately preceded by a high surrogate,
645        // then we're in the middle of a (valid) character, and should return None.
646        if is_low_surrogate(c) && index > 0 && is_high_surrogate(self[index - 1]) {
647            return None;
648        }
649        // Otherwise, try to decode, returning REPLACEMENT_CHARACTER for errors.
650        if let Some(ch) = char::decode_utf16(self[index..].iter().cloned()).next() {
651            if let Ok(ch) = ch {
652                // This must be a surrogate pair, otherwise char::from_u32() above should
653                // have succeeded!
654                debug_assert!(ch.len_utf16() == 2, "BMP should have already been handled");
655                return Some((ch, ch.len_utf16()));
656            }
657        } else {
658            debug_assert!(
659                false,
660                "Why did decode_utf16 return None when we're not at the end?"
661            );
662            return None;
663        }
664        // Failed to decode UTF-16: we must have encountered an unpaired surrogate.
665        // Return REPLACEMENT_CHARACTER (not None), to continue processing the following text
666        // and keep indexing correct.
667        Some((char::REPLACEMENT_CHARACTER, 1))
668    }
669    #[inline]
670    fn subrange(&self, range: Range<usize>) -> &Self {
671        &(self as &[u16])[range]
672    }
673    #[inline]
674    fn chars(&'text self) -> Self::CharIter {
675        Utf16CharIter::new(self)
676    }
677    #[inline]
678    fn char_indices(&'text self) -> Self::CharIndexIter {
679        Utf16CharIndexIter::new(self)
680    }
681    #[inline]
682    fn indices_lengths(&'text self) -> Self::IndexLenIter {
683        Utf16IndexLenIter::new(self)
684    }
685    #[inline]
686    fn char_len(ch: char) -> usize {
687        ch.len_utf16()
688    }
689}
690
691/// Iterator over UTF-16 text in a [u16] slice, returning (index, char_len) tuple.
692#[derive(Debug)]
693pub struct Utf16IndexLenIter<'text> {
694    text: &'text [u16],
695    cur_pos: usize,
696}
697
698impl<'text> Utf16IndexLenIter<'text> {
699    #[inline]
700    pub fn new(text: &'text [u16]) -> Self {
701        Utf16IndexLenIter { text, cur_pos: 0 }
702    }
703}
704
705impl Iterator for Utf16IndexLenIter<'_> {
706    type Item = (usize, usize);
707
708    #[inline]
709    fn next(&mut self) -> Option<Self::Item> {
710        if let Some((_, char_len)) = self.text.char_at(self.cur_pos) {
711            let result = (self.cur_pos, char_len);
712            self.cur_pos += char_len;
713            return Some(result);
714        }
715        None
716    }
717}
718
719/// Iterator over UTF-16 text in a [u16] slice, returning (index, char) tuple.
720#[derive(Debug)]
721pub struct Utf16CharIndexIter<'text> {
722    text: &'text [u16],
723    cur_pos: usize,
724}
725
726impl<'text> Utf16CharIndexIter<'text> {
727    pub fn new(text: &'text [u16]) -> Self {
728        Utf16CharIndexIter { text, cur_pos: 0 }
729    }
730}
731
732impl Iterator for Utf16CharIndexIter<'_> {
733    type Item = (usize, char);
734
735    fn next(&mut self) -> Option<Self::Item> {
736        if let Some((ch, char_len)) = self.text.char_at(self.cur_pos) {
737            let result = (self.cur_pos, ch);
738            self.cur_pos += char_len;
739            return Some(result);
740        }
741        None
742    }
743}
744
745/// Iterator over UTF-16 text in a [u16] slice, returning Unicode chars.
746/// (Unlike the other iterators above, this also supports reverse iteration.)
747#[derive(Debug)]
748pub struct Utf16CharIter<'text> {
749    text: &'text [u16],
750    cur_pos: usize,
751    end_pos: usize,
752}
753
754impl<'text> Utf16CharIter<'text> {
755    pub fn new(text: &'text [u16]) -> Self {
756        Utf16CharIter {
757            text,
758            cur_pos: 0,
759            end_pos: text.len(),
760        }
761    }
762}
763
764impl Iterator for Utf16CharIter<'_> {
765    type Item = char;
766
767    fn next(&mut self) -> Option<Self::Item> {
768        if let Some((ch, char_len)) = self.text.char_at(self.cur_pos) {
769            self.cur_pos += char_len;
770            return Some(ch);
771        }
772        None
773    }
774}
775
776impl DoubleEndedIterator for Utf16CharIter<'_> {
777    fn next_back(&mut self) -> Option<Self::Item> {
778        if self.end_pos <= self.cur_pos {
779            return None;
780        }
781        self.end_pos -= 1;
782        if let Some(ch) = char::from_u32(self.text[self.end_pos] as u32) {
783            return Some(ch);
784        }
785        if self.end_pos > self.cur_pos {
786            if let Some((ch, char_len)) = self.text.char_at(self.end_pos - 1) {
787                if char_len == 2 {
788                    self.end_pos -= 1;
789                    return Some(ch);
790                }
791            }
792        }
793        Some(char::REPLACEMENT_CHARACTER)
794    }
795}