1#![no_std]
69#[cfg(feature = "std")]
71extern crate std;
72#[macro_use]
73extern crate alloc;
74#[cfg(feature = "smallvec")]
75extern crate smallvec;
76
77pub mod data_source;
78pub mod deprecated;
79pub mod format_chars;
80pub mod level;
81pub mod utf16;
82
83mod char_data;
84mod explicit;
85mod implicit;
86mod prepare;
87
88pub use crate::char_data::{BidiClass, UNICODE_VERSION};
89pub use crate::data_source::BidiDataSource;
90pub use crate::level::{Level, LTR_LEVEL, RTL_LEVEL};
91pub use crate::prepare::{LevelRun, LevelRunVec};
92
93#[cfg(feature = "hardcoded-data")]
94pub use crate::char_data::{bidi_class, HardcodedBidiData};
95
96use alloc::borrow::Cow;
97use alloc::string::String;
98use alloc::vec::Vec;
99use core::char;
100use core::cmp;
101use core::iter::repeat;
102use core::ops::Range;
103use core::str::CharIndices;
104#[cfg(feature = "smallvec")]
105use smallvec::SmallVec;
106
107use crate::format_chars as chars;
108use crate::BidiClass::*;
109
110pub trait TextSource<'text>: private::Sealed {
115 type CharIter: Iterator<Item = char>;
116 type CharIndexIter: Iterator<Item = (usize, char)>;
117 type IndexLenIter: Iterator<Item = (usize, usize)>;
118
119 #[doc(hidden)]
121 fn len(&self) -> usize;
122
123 #[doc(hidden)]
127 fn char_at(&self, index: usize) -> Option<(char, usize)>;
128
129 #[doc(hidden)]
132 fn subrange(&self, range: Range<usize>) -> &Self;
133
134 #[doc(hidden)]
137 fn chars(&'text self) -> Self::CharIter;
138
139 #[doc(hidden)]
143 fn char_indices(&'text self) -> Self::CharIndexIter;
144
145 #[doc(hidden)]
149 fn indices_lengths(&'text self) -> Self::IndexLenIter;
150
151 #[doc(hidden)]
153 fn char_len(ch: char) -> usize;
154}
155
156mod private {
157 pub trait Sealed {}
158
159 impl Sealed for str {}
161 impl Sealed for [u16] {}
162}
163
164#[derive(PartialEq, Debug)]
165pub enum Direction {
166 Ltr,
167 Rtl,
168 Mixed,
169}
170
171#[derive(Clone, Debug, PartialEq)]
173pub struct ParagraphInfo {
174 pub range: Range<usize>,
178
179 pub level: Level,
183}
184
185impl ParagraphInfo {
186 pub fn len(&self) -> usize {
188 self.range.end - self.range.start
189 }
190}
191
192#[derive(PartialEq, Debug)]
196pub struct InitialInfo<'text> {
197 pub text: &'text str,
199
200 pub original_classes: Vec<BidiClass>,
203
204 pub paragraphs: Vec<ParagraphInfo>,
206}
207
208impl<'text> InitialInfo<'text> {
209 #[cfg_attr(feature = "flame_it", flamer::flame)]
219 #[cfg(feature = "hardcoded-data")]
220 pub fn new(text: &str, default_para_level: Option<Level>) -> InitialInfo<'_> {
221 Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
222 }
223
224 #[cfg_attr(feature = "flame_it", flamer::flame)]
234 pub fn new_with_data_source<'a, D: BidiDataSource>(
235 data_source: &D,
236 text: &'a str,
237 default_para_level: Option<Level>,
238 ) -> InitialInfo<'a> {
239 InitialInfoExt::new_with_data_source(data_source, text, default_para_level).base
240 }
241}
242
243#[derive(PartialEq, Debug)]
245struct InitialInfoExt<'text> {
246 base: InitialInfo<'text>,
248
249 flags: Vec<ParagraphInfoFlags>,
253}
254
255#[derive(PartialEq, Debug)]
256struct ParagraphInfoFlags {
257 is_pure_ltr: bool,
258 has_isolate_controls: bool,
259}
260
261impl<'text> InitialInfoExt<'text> {
262 #[cfg_attr(feature = "flame_it", flamer::flame)]
272 pub fn new_with_data_source<'a, D: BidiDataSource>(
273 data_source: &D,
274 text: &'a str,
275 default_para_level: Option<Level>,
276 ) -> InitialInfoExt<'a> {
277 let mut paragraphs = Vec::<ParagraphInfo>::new();
278 let mut flags = Vec::<ParagraphInfoFlags>::new();
279 let (original_classes, _, _, _) = compute_initial_info(
280 data_source,
281 text,
282 default_para_level,
283 Some((&mut paragraphs, &mut flags)),
284 );
285
286 InitialInfoExt {
287 base: InitialInfo {
288 text,
289 original_classes,
290 paragraphs,
291 },
292 flags,
293 }
294 }
295}
296
297fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
305 data_source: &D,
306 text: &'a T,
307 default_para_level: Option<Level>,
308 mut split_paragraphs: Option<(&mut Vec<ParagraphInfo>, &mut Vec<ParagraphInfoFlags>)>,
309) -> (Vec<BidiClass>, Level, bool, bool) {
310 let mut original_classes = Vec::with_capacity(text.len());
311
312 #[cfg(feature = "smallvec")]
314 let mut isolate_stack = SmallVec::<[usize; 8]>::new();
315 #[cfg(not(feature = "smallvec"))]
316 let mut isolate_stack = Vec::new();
317
318 debug_assert!(
319 if let Some((ref paragraphs, ref flags)) = split_paragraphs {
320 paragraphs.is_empty() && flags.is_empty()
321 } else {
322 true
323 }
324 );
325
326 let mut para_start = 0;
327 let mut para_level = default_para_level;
328
329 let mut is_pure_ltr = true;
332 let mut has_isolate_controls = false;
334
335 #[cfg(feature = "flame_it")]
336 flame::start("compute_initial_info(): iter text.char_indices()");
337
338 for (i, c) in text.char_indices() {
339 let class = data_source.bidi_class(c);
340
341 #[cfg(feature = "flame_it")]
342 flame::start("original_classes.extend()");
343
344 let len = T::char_len(c);
345 original_classes.extend(repeat(class).take(len));
346
347 #[cfg(feature = "flame_it")]
348 flame::end("original_classes.extend()");
349
350 match class {
351 B => {
352 if let Some((ref mut paragraphs, ref mut flags)) = split_paragraphs {
353 let para_end = i + len;
356 paragraphs.push(ParagraphInfo {
357 range: para_start..para_end,
358 level: para_level.unwrap_or(LTR_LEVEL),
360 });
361 flags.push(ParagraphInfoFlags {
362 is_pure_ltr,
363 has_isolate_controls,
364 });
365 para_start = para_end;
367 para_level = default_para_level;
371 is_pure_ltr = true;
372 has_isolate_controls = false;
373 isolate_stack.clear();
374 }
375 }
376
377 L | R | AL => {
378 if class != L {
379 is_pure_ltr = false;
380 }
381 match isolate_stack.last() {
382 Some(&start) => {
383 if original_classes[start] == FSI {
384 for j in 0..T::char_len(chars::FSI) {
387 original_classes[start + j] = if class == L { LRI } else { RLI };
388 }
389 }
390 }
391
392 None => {
393 if para_level.is_none() {
394 para_level = Some(if class != L { RTL_LEVEL } else { LTR_LEVEL });
398 }
399 }
400 }
401 }
402
403 AN | LRE | RLE | LRO | RLO => {
404 is_pure_ltr = false;
405 }
406
407 RLI | LRI | FSI => {
408 is_pure_ltr = false;
409 has_isolate_controls = true;
410 isolate_stack.push(i);
411 }
412
413 PDI => {
414 isolate_stack.pop();
415 }
416
417 _ => {}
418 }
419 }
420
421 if let Some((paragraphs, flags)) = split_paragraphs {
422 if para_start < text.len() {
423 paragraphs.push(ParagraphInfo {
424 range: para_start..text.len(),
425 level: para_level.unwrap_or(LTR_LEVEL),
426 });
427 flags.push(ParagraphInfoFlags {
428 is_pure_ltr,
429 has_isolate_controls,
430 });
431 }
432 debug_assert_eq!(paragraphs.len(), flags.len());
433 }
434 debug_assert_eq!(original_classes.len(), text.len());
435
436 #[cfg(feature = "flame_it")]
437 flame::end("compute_initial_info(): iter text.char_indices()");
438
439 (
440 original_classes,
441 para_level.unwrap_or(LTR_LEVEL),
442 is_pure_ltr,
443 has_isolate_controls,
444 )
445}
446
447#[derive(Debug, PartialEq)]
454pub struct BidiInfo<'text> {
455 pub text: &'text str,
457
458 pub original_classes: Vec<BidiClass>,
460
461 pub levels: Vec<Level>,
463
464 pub paragraphs: Vec<ParagraphInfo>,
469}
470
471impl<'text> BidiInfo<'text> {
472 #[cfg_attr(feature = "flame_it", flamer::flame)]
482 #[cfg(feature = "hardcoded-data")]
483 #[inline]
484 pub fn new(text: &str, default_para_level: Option<Level>) -> BidiInfo<'_> {
485 Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
486 }
487
488 #[cfg_attr(feature = "flame_it", flamer::flame)]
497 pub fn new_with_data_source<'a, D: BidiDataSource>(
498 data_source: &D,
499 text: &'a str,
500 default_para_level: Option<Level>,
501 ) -> BidiInfo<'a> {
502 let InitialInfoExt { base, flags, .. } =
503 InitialInfoExt::new_with_data_source(data_source, text, default_para_level);
504
505 let mut levels = Vec::<Level>::with_capacity(text.len());
506 let mut processing_classes = base.original_classes.clone();
507
508 for (para, flags) in base.paragraphs.iter().zip(flags.iter()) {
509 let text = &text[para.range.clone()];
510 let original_classes = &base.original_classes[para.range.clone()];
511
512 compute_bidi_info_for_para(
513 data_source,
514 para,
515 flags.is_pure_ltr,
516 flags.has_isolate_controls,
517 text,
518 original_classes,
519 &mut processing_classes,
520 &mut levels,
521 );
522 }
523
524 BidiInfo {
525 text,
526 original_classes: base.original_classes,
527 paragraphs: base.paragraphs,
528 levels,
529 }
530 }
531
532 #[cfg_attr(feature = "flame_it", flamer::flame)]
546 pub fn reordered_levels(&self, para: &ParagraphInfo, line: Range<usize>) -> Vec<Level> {
547 assert!(line.start <= self.levels.len());
548 assert!(line.end <= self.levels.len());
549
550 let mut levels = self.levels.clone();
551 let line_classes = &self.original_classes[line.clone()];
552 let line_levels = &mut levels[line.clone()];
553
554 reorder_levels(
555 line_classes,
556 line_levels,
557 self.text.subrange(line),
558 para.level,
559 );
560
561 levels
562 }
563
564 #[cfg_attr(feature = "flame_it", flamer::flame)]
578 pub fn reordered_levels_per_char(
579 &self,
580 para: &ParagraphInfo,
581 line: Range<usize>,
582 ) -> Vec<Level> {
583 let levels = self.reordered_levels(para, line);
584 self.text.char_indices().map(|(i, _)| levels[i]).collect()
585 }
586
587 #[cfg_attr(feature = "flame_it", flamer::flame)]
594 pub fn reorder_line(&self, para: &ParagraphInfo, line: Range<usize>) -> Cow<'text, str> {
595 if !level::has_rtl(&self.levels[line.clone()]) {
596 return self.text[line].into();
597 }
598 let (levels, runs) = self.visual_runs(para, line.clone());
599 reorder_line(self.text, line, levels, runs)
600 }
601
602 #[cfg_attr(feature = "flame_it", flamer::flame)]
638 #[inline]
639 pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
640 reorder_visual(levels)
641 }
642
643 #[cfg_attr(feature = "flame_it", flamer::flame)]
668 #[inline]
669 pub fn visual_runs(
670 &self,
671 para: &ParagraphInfo,
672 line: Range<usize>,
673 ) -> (Vec<Level>, Vec<LevelRun>) {
674 let levels = self.reordered_levels(para, line.clone());
675 visual_runs_for_line(levels, &line)
676 }
677
678 #[inline]
682 pub fn has_rtl(&self) -> bool {
683 level::has_rtl(&self.levels)
684 }
685}
686
687#[derive(Debug, PartialEq)]
693pub struct ParagraphBidiInfo<'text> {
694 pub text: &'text str,
696
697 pub original_classes: Vec<BidiClass>,
699
700 pub levels: Vec<Level>,
702
703 pub paragraph_level: Level,
705
706 pub is_pure_ltr: bool,
708}
709
710impl<'text> ParagraphBidiInfo<'text> {
711 #[cfg_attr(feature = "flame_it", flamer::flame)]
721 #[cfg(feature = "hardcoded-data")]
722 #[inline]
723 pub fn new(text: &str, default_para_level: Option<Level>) -> ParagraphBidiInfo<'_> {
724 Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
725 }
726
727 #[cfg_attr(feature = "flame_it", flamer::flame)]
734 pub fn new_with_data_source<'a, D: BidiDataSource>(
735 data_source: &D,
736 text: &'a str,
737 default_para_level: Option<Level>,
738 ) -> ParagraphBidiInfo<'a> {
739 let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) =
742 compute_initial_info(data_source, text, default_para_level, None);
743
744 let mut levels = Vec::<Level>::with_capacity(text.len());
745 let mut processing_classes = original_classes.clone();
746
747 let para_info = ParagraphInfo {
748 range: Range {
749 start: 0,
750 end: text.len(),
751 },
752 level: paragraph_level,
753 };
754
755 compute_bidi_info_for_para(
756 data_source,
757 ¶_info,
758 is_pure_ltr,
759 has_isolate_controls,
760 text,
761 &original_classes,
762 &mut processing_classes,
763 &mut levels,
764 );
765
766 ParagraphBidiInfo {
767 text,
768 original_classes,
769 levels,
770 paragraph_level,
771 is_pure_ltr,
772 }
773 }
774
775 #[cfg_attr(feature = "flame_it", flamer::flame)]
783 pub fn reordered_levels(&self, line: Range<usize>) -> Vec<Level> {
784 assert!(line.start <= self.levels.len());
785 assert!(line.end <= self.levels.len());
786
787 let mut levels = self.levels.clone();
788 let line_classes = &self.original_classes[line.clone()];
789 let line_levels = &mut levels[line.clone()];
790
791 reorder_levels(
792 line_classes,
793 line_levels,
794 self.text.subrange(line),
795 self.paragraph_level,
796 );
797
798 levels
799 }
800
801 #[cfg_attr(feature = "flame_it", flamer::flame)]
809 pub fn reordered_levels_per_char(&self, line: Range<usize>) -> Vec<Level> {
810 let levels = self.reordered_levels(line);
811 self.text.char_indices().map(|(i, _)| levels[i]).collect()
812 }
813
814 #[cfg_attr(feature = "flame_it", flamer::flame)]
820 pub fn reorder_line(&self, line: Range<usize>) -> Cow<'text, str> {
821 if !level::has_rtl(&self.levels[line.clone()]) {
822 return self.text[line].into();
823 }
824
825 let (levels, runs) = self.visual_runs(line.clone());
826
827 reorder_line(self.text, line, levels, runs)
828 }
829
830 #[cfg_attr(feature = "flame_it", flamer::flame)]
834 #[inline]
835 pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
836 reorder_visual(levels)
837 }
838
839 #[cfg_attr(feature = "flame_it", flamer::flame)]
847 #[inline]
848 pub fn visual_runs(&self, line: Range<usize>) -> (Vec<Level>, Vec<LevelRun>) {
849 let levels = self.reordered_levels(line.clone());
850 visual_runs_for_line(levels, &line)
851 }
852
853 #[inline]
857 pub fn has_rtl(&self) -> bool {
858 !self.is_pure_ltr
859 }
860
861 #[inline]
863 pub fn direction(&self) -> Direction {
864 para_direction(&self.levels)
865 }
866}
867
868fn reorder_line(
885 text: &str,
886 line: Range<usize>,
887 levels: Vec<Level>,
888 runs: Vec<LevelRun>,
889) -> Cow<'_, str> {
890 if runs.iter().all(|run| levels[run.start].is_ltr()) {
892 return text[line].into();
893 }
894
895 let mut result = String::with_capacity(line.len());
896 for run in runs {
897 if levels[run.start].is_rtl() {
898 result.extend(text[run].chars().rev());
899 } else {
900 result.push_str(&text[run]);
901 }
902 }
903 result.into()
904}
905
906fn visual_runs_for_line(levels: Vec<Level>, line: &Range<usize>) -> (Vec<Level>, Vec<LevelRun>) {
931 let mut runs = Vec::new();
933 let mut start = line.start;
934 let mut run_level = levels[start];
935 let mut min_level = run_level;
936 let mut max_level = run_level;
937
938 for (i, &new_level) in levels.iter().enumerate().take(line.end).skip(start + 1) {
939 if new_level != run_level {
940 runs.push(start..i);
942 start = i;
943 run_level = new_level;
944 min_level = cmp::min(run_level, min_level);
945 max_level = cmp::max(run_level, max_level);
946 }
947 }
948 runs.push(start..line.end);
949
950 let run_count = runs.len();
951
952 min_level = min_level.new_lowest_ge_rtl().expect("Level error");
957 while max_level >= min_level {
960 let mut seq_start = 0;
962 while seq_start < run_count {
963 if levels[runs[seq_start].start] < max_level {
964 seq_start += 1;
965 continue;
966 }
967
968 let mut seq_end = seq_start + 1;
970 while seq_end < run_count {
971 if levels[runs[seq_end].start] < max_level {
972 break;
973 }
974 seq_end += 1;
975 }
976 runs[seq_start..seq_end].reverse();
978
979 seq_start = seq_end;
980 }
981 max_level
982 .lower(1)
983 .expect("Lowering embedding level below zero");
984 }
985 (levels, runs)
986}
987
988fn reorder_visual(levels: &[Level]) -> Vec<usize> {
1003 fn next_range(levels: &[level::Level], mut start_index: usize, max: Level) -> Range<usize> {
1006 if levels.is_empty() || start_index >= levels.len() {
1007 return start_index..start_index;
1008 }
1009 while let Some(l) = levels.get(start_index) {
1010 if *l >= max {
1011 break;
1012 }
1013 start_index += 1;
1014 }
1015
1016 if levels.get(start_index).is_none() {
1017 return start_index..start_index;
1020 }
1021
1022 let mut end_index = start_index + 1;
1023 while let Some(l) = levels.get(end_index) {
1024 if *l < max {
1025 return start_index..end_index;
1026 }
1027 end_index += 1;
1028 }
1029
1030 start_index..end_index
1031 }
1032
1033 if levels.is_empty() {
1037 return vec![];
1038 }
1039
1040 let (mut min, mut max) = levels
1042 .iter()
1043 .fold((levels[0], levels[0]), |(min, max), &l| {
1044 (cmp::min(min, l), cmp::max(max, l))
1045 });
1046
1047 let mut result: Vec<usize> = (0..levels.len()).collect();
1049
1050 if min == max && min.is_ltr() {
1051 return result;
1053 }
1054
1055 min = min.new_lowest_ge_rtl().expect("Level error");
1058
1059 while min <= max {
1066 let mut range = 0..0;
1067 loop {
1068 range = next_range(levels, range.end, max);
1069 result[range.clone()].reverse();
1070
1071 if range.end >= levels.len() {
1072 break;
1073 }
1074 }
1075
1076 max.lower(1).expect("Level error");
1077 }
1078
1079 result
1080}
1081
1082fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
1085 data_source: &D,
1086 para: &ParagraphInfo,
1087 is_pure_ltr: bool,
1088 has_isolate_controls: bool,
1089 text: &'a T,
1090 original_classes: &[BidiClass],
1091 processing_classes: &mut [BidiClass],
1092 levels: &mut Vec<Level>,
1093) {
1094 let new_len = levels.len() + para.range.len();
1095 levels.resize(new_len, para.level);
1096 if para.level == LTR_LEVEL && is_pure_ltr {
1097 return;
1098 }
1099
1100 let processing_classes = &mut processing_classes[para.range.clone()];
1101 let levels = &mut levels[para.range.clone()];
1102 let mut level_runs = LevelRunVec::new();
1103
1104 explicit::compute(
1105 text,
1106 para.level,
1107 original_classes,
1108 levels,
1109 processing_classes,
1110 &mut level_runs,
1111 );
1112
1113 let mut sequences = prepare::IsolatingRunSequenceVec::new();
1114 prepare::isolating_run_sequences(
1115 para.level,
1116 original_classes,
1117 levels,
1118 level_runs,
1119 has_isolate_controls,
1120 &mut sequences,
1121 );
1122 for sequence in &sequences {
1123 implicit::resolve_weak(text, sequence, processing_classes);
1124 implicit::resolve_neutral(
1125 text,
1126 data_source,
1127 sequence,
1128 levels,
1129 original_classes,
1130 processing_classes,
1131 );
1132 }
1133
1134 implicit::resolve_levels(processing_classes, levels);
1135
1136 assign_levels_to_removed_chars(para.level, original_classes, levels);
1137}
1138
1139fn reorder_levels<'a, T: TextSource<'a> + ?Sized>(
1147 line_classes: &[BidiClass],
1148 line_levels: &mut [Level],
1149 line_text: &'a T,
1150 para_level: Level,
1151) {
1152 let mut reset_from: Option<usize> = Some(0);
1155 let mut reset_to: Option<usize> = None;
1156 let mut prev_level = para_level;
1157 for ((i, c), (_, length)) in line_text.char_indices().zip(line_text.indices_lengths()) {
1158 match line_classes[i] {
1159 B | S => {
1161 assert_eq!(reset_to, None);
1162 reset_to = Some(i + T::char_len(c));
1163 if reset_from.is_none() {
1164 reset_from = Some(i);
1165 }
1166 }
1167 WS | FSI | LRI | RLI | PDI => {
1169 if reset_from.is_none() {
1170 reset_from = Some(i);
1171 }
1172 }
1173 RLE | LRE | RLO | LRO | PDF | BN => {
1176 if reset_from.is_none() {
1177 reset_from = Some(i);
1178 }
1179 for level in &mut line_levels[i..i + length] {
1181 *level = prev_level;
1182 }
1183 }
1184 _ => {
1185 reset_from = None;
1186 }
1187 }
1188 if let (Some(from), Some(to)) = (reset_from, reset_to) {
1189 for level in &mut line_levels[from..to] {
1190 *level = para_level;
1191 }
1192 reset_from = None;
1193 reset_to = None;
1194 }
1195 prev_level = line_levels[i];
1196 }
1197 if let Some(from) = reset_from {
1198 for level in &mut line_levels[from..] {
1199 *level = para_level;
1200 }
1201 }
1202}
1203
1204#[derive(Debug)]
1208pub struct Paragraph<'a, 'text> {
1209 pub info: &'a BidiInfo<'text>,
1210 pub para: &'a ParagraphInfo,
1211}
1212
1213impl<'a, 'text> Paragraph<'a, 'text> {
1214 #[inline]
1215 pub fn new(info: &'a BidiInfo<'text>, para: &'a ParagraphInfo) -> Paragraph<'a, 'text> {
1216 Paragraph { info, para }
1217 }
1218
1219 #[inline]
1221 pub fn direction(&self) -> Direction {
1222 para_direction(&self.info.levels[self.para.range.clone()])
1223 }
1224
1225 #[inline]
1227 pub fn level_at(&self, pos: usize) -> Level {
1228 let actual_position = self.para.range.start + pos;
1229 self.info.levels[actual_position]
1230 }
1231}
1232
1233#[cfg_attr(feature = "flame_it", flamer::flame)]
1235fn para_direction(levels: &[Level]) -> Direction {
1236 let mut ltr = false;
1237 let mut rtl = false;
1238 for level in levels {
1239 if level.is_ltr() {
1240 ltr = true;
1241 if rtl {
1242 return Direction::Mixed;
1243 }
1244 }
1245
1246 if level.is_rtl() {
1247 rtl = true;
1248 if ltr {
1249 return Direction::Mixed;
1250 }
1251 }
1252 }
1253
1254 if ltr {
1255 return Direction::Ltr;
1256 }
1257
1258 Direction::Rtl
1259}
1260
1261#[cfg_attr(feature = "flame_it", flamer::flame)]
1266fn assign_levels_to_removed_chars(para_level: Level, classes: &[BidiClass], levels: &mut [Level]) {
1267 for i in 0..levels.len() {
1268 if prepare::removed_by_x9(classes[i]) {
1269 levels[i] = if i > 0 { levels[i - 1] } else { para_level };
1270 }
1271 }
1272}
1273
1274#[cfg(feature = "hardcoded-data")]
1291#[inline]
1292pub fn get_base_direction<'a, T: TextSource<'a> + ?Sized>(text: &'a T) -> Direction {
1293 get_base_direction_with_data_source(&HardcodedBidiData, text)
1294}
1295
1296#[cfg(feature = "hardcoded-data")]
1304#[inline]
1305pub fn get_base_direction_full<'a, T: TextSource<'a> + ?Sized>(text: &'a T) -> Direction {
1306 get_base_direction_full_with_data_source(&HardcodedBidiData, text)
1307}
1308
1309#[inline]
1310pub fn get_base_direction_with_data_source<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
1311 data_source: &D,
1312 text: &'a T,
1313) -> Direction {
1314 get_base_direction_impl(data_source, text, false)
1315}
1316
1317#[inline]
1318pub fn get_base_direction_full_with_data_source<
1319 'a,
1320 D: BidiDataSource,
1321 T: TextSource<'a> + ?Sized,
1322>(
1323 data_source: &D,
1324 text: &'a T,
1325) -> Direction {
1326 get_base_direction_impl(data_source, text, true)
1327}
1328
1329fn get_base_direction_impl<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
1330 data_source: &D,
1331 text: &'a T,
1332 use_full_text: bool,
1333) -> Direction {
1334 let mut isolate_level = 0;
1335 for c in text.chars() {
1336 match data_source.bidi_class(c) {
1337 LRI | RLI | FSI => isolate_level += 1,
1338 PDI if isolate_level > 0 => isolate_level -= 1,
1339 L if isolate_level == 0 => return Direction::Ltr,
1340 R | AL if isolate_level == 0 => return Direction::Rtl,
1341 B if !use_full_text => break,
1342 B if use_full_text => isolate_level = 0,
1343 _ => (),
1344 }
1345 }
1346 Direction::Mixed
1350}
1351
1352impl<'text> TextSource<'text> for str {
1354 type CharIter = core::str::Chars<'text>;
1355 type CharIndexIter = core::str::CharIndices<'text>;
1356 type IndexLenIter = Utf8IndexLenIter<'text>;
1357
1358 #[inline]
1359 fn len(&self) -> usize {
1360 (self as &str).len()
1361 }
1362 #[inline]
1363 fn char_at(&self, index: usize) -> Option<(char, usize)> {
1364 if let Some(slice) = self.get(index..) {
1365 if let Some(ch) = slice.chars().next() {
1366 return Some((ch, ch.len_utf8()));
1367 }
1368 }
1369 None
1370 }
1371 #[inline]
1372 fn subrange(&self, range: Range<usize>) -> &Self {
1373 &(self as &str)[range]
1374 }
1375 #[inline]
1376 fn chars(&'text self) -> Self::CharIter {
1377 (self as &str).chars()
1378 }
1379 #[inline]
1380 fn char_indices(&'text self) -> Self::CharIndexIter {
1381 (self as &str).char_indices()
1382 }
1383 #[inline]
1384 fn indices_lengths(&'text self) -> Self::IndexLenIter {
1385 Utf8IndexLenIter::new(self)
1386 }
1387 #[inline]
1388 fn char_len(ch: char) -> usize {
1389 ch.len_utf8()
1390 }
1391}
1392
1393#[derive(Debug)]
1395pub struct Utf8IndexLenIter<'text> {
1396 iter: CharIndices<'text>,
1397}
1398
1399impl<'text> Utf8IndexLenIter<'text> {
1400 #[inline]
1401 pub fn new(text: &'text str) -> Self {
1402 Utf8IndexLenIter {
1403 iter: text.char_indices(),
1404 }
1405 }
1406}
1407
1408impl Iterator for Utf8IndexLenIter<'_> {
1409 type Item = (usize, usize);
1410
1411 #[inline]
1412 fn next(&mut self) -> Option<Self::Item> {
1413 if let Some((pos, ch)) = self.iter.next() {
1414 return Some((pos, ch.len_utf8()));
1415 }
1416 None
1417 }
1418}
1419
1420#[cfg(test)]
1421fn to_utf16(s: &str) -> Vec<u16> {
1422 s.encode_utf16().collect()
1423}
1424
1425#[cfg(test)]
1426#[cfg(feature = "hardcoded-data")]
1427mod tests {
1428 use super::*;
1429
1430 use utf16::{
1431 BidiInfo as BidiInfoU16, InitialInfo as InitialInfoU16, Paragraph as ParagraphU16,
1432 ParagraphBidiInfo as ParagraphBidiInfoU16,
1433 };
1434
1435 #[test]
1436 fn test_utf16_text_source() {
1437 let text: &[u16] =
1438 &[0x41, 0xD801, 0xDC01, 0x20, 0xD800, 0x20, 0xDFFF, 0x20, 0xDC00, 0xD800];
1439 assert_eq!(text.char_at(0), Some(('A', 1)));
1440 assert_eq!(text.char_at(1), Some(('\u{10401}', 2)));
1441 assert_eq!(text.char_at(2), None);
1442 assert_eq!(text.char_at(3), Some((' ', 1)));
1443 assert_eq!(text.char_at(4), Some((char::REPLACEMENT_CHARACTER, 1)));
1444 assert_eq!(text.char_at(5), Some((' ', 1)));
1445 assert_eq!(text.char_at(6), Some((char::REPLACEMENT_CHARACTER, 1)));
1446 assert_eq!(text.char_at(7), Some((' ', 1)));
1447 assert_eq!(text.char_at(8), Some((char::REPLACEMENT_CHARACTER, 1)));
1448 assert_eq!(text.char_at(9), Some((char::REPLACEMENT_CHARACTER, 1)));
1449 assert_eq!(text.char_at(10), None);
1450 }
1451
1452 #[test]
1453 fn test_utf16_char_iter() {
1454 let text: &[u16] =
1455 &[0x41, 0xD801, 0xDC01, 0x20, 0xD800, 0x20, 0xDFFF, 0x20, 0xDC00, 0xD800];
1456 assert_eq!(text.len(), 10);
1457 assert_eq!(text.chars().count(), 9);
1458 let mut chars = text.chars();
1459 assert_eq!(chars.next(), Some('A'));
1460 assert_eq!(chars.next(), Some('\u{10401}'));
1461 assert_eq!(chars.next(), Some(' '));
1462 assert_eq!(chars.next(), Some('\u{FFFD}'));
1463 assert_eq!(chars.next(), Some(' '));
1464 assert_eq!(chars.next(), Some('\u{FFFD}'));
1465 assert_eq!(chars.next(), Some(' '));
1466 assert_eq!(chars.next(), Some('\u{FFFD}'));
1467 assert_eq!(chars.next(), Some('\u{FFFD}'));
1468 assert_eq!(chars.next(), None);
1469 }
1470
1471 #[test]
1472 fn test_initial_text_info() {
1473 let tests = vec![
1474 (
1475 "a1",
1477 vec![L, EN],
1479 vec![ParagraphInfo {
1481 range: 0..2,
1482 level: LTR_LEVEL,
1483 }],
1484 vec![L, EN],
1486 vec![ParagraphInfo {
1488 range: 0..2,
1489 level: LTR_LEVEL,
1490 }],
1491 ),
1492 (
1493 "\u{0639} \u{05D0}",
1495 vec![AL, AL, WS, R, R],
1496 vec![ParagraphInfo {
1497 range: 0..5,
1498 level: RTL_LEVEL,
1499 }],
1500 vec![AL, WS, R],
1501 vec![ParagraphInfo {
1502 range: 0..3,
1503 level: RTL_LEVEL,
1504 }],
1505 ),
1506 (
1507 "\u{10A00}\u{12000}\u{1E900}",
1509 vec![R, R, R, R, L, L, L, L, R, R, R, R],
1510 vec![ParagraphInfo {
1511 range: 0..12,
1512 level: RTL_LEVEL,
1513 }],
1514 vec![R, R, L, L, R, R],
1515 vec![ParagraphInfo {
1516 range: 0..6,
1517 level: RTL_LEVEL,
1518 }],
1519 ),
1520 (
1521 "a\u{2029}b",
1522 vec![L, B, B, B, L],
1523 vec![
1524 ParagraphInfo {
1525 range: 0..4,
1526 level: LTR_LEVEL,
1527 },
1528 ParagraphInfo {
1529 range: 4..5,
1530 level: LTR_LEVEL,
1531 },
1532 ],
1533 vec![L, B, L],
1534 vec![
1535 ParagraphInfo {
1536 range: 0..2,
1537 level: LTR_LEVEL,
1538 },
1539 ParagraphInfo {
1540 range: 2..3,
1541 level: LTR_LEVEL,
1542 },
1543 ],
1544 ),
1545 (
1546 "\u{2068}א\u{2069}a", vec![RLI, RLI, RLI, R, R, PDI, PDI, PDI, L],
1548 vec![ParagraphInfo {
1549 range: 0..9,
1550 level: LTR_LEVEL,
1551 }],
1552 vec![RLI, R, PDI, L],
1553 vec![ParagraphInfo {
1554 range: 0..4,
1555 level: LTR_LEVEL,
1556 }],
1557 ),
1558 ];
1559
1560 for t in tests {
1561 assert_eq!(
1562 InitialInfo::new(t.0, None),
1563 InitialInfo {
1564 text: t.0,
1565 original_classes: t.1,
1566 paragraphs: t.2,
1567 }
1568 );
1569 let text = &to_utf16(t.0);
1570 assert_eq!(
1571 InitialInfoU16::new(text, None),
1572 InitialInfoU16 {
1573 text,
1574 original_classes: t.3,
1575 paragraphs: t.4,
1576 }
1577 );
1578 }
1579 }
1580
1581 #[test]
1582 #[cfg(feature = "hardcoded-data")]
1583 fn test_process_text() {
1584 let tests = vec![
1585 (
1586 "",
1588 Some(RTL_LEVEL),
1590 Level::vec(&[]),
1592 vec![],
1594 vec![],
1596 Level::vec(&[]),
1598 vec![],
1600 vec![],
1602 ),
1603 (
1604 "abc123",
1606 Some(LTR_LEVEL),
1608 Level::vec(&[0, 0, 0, 0, 0, 0]),
1610 vec![L, L, L, EN, EN, EN],
1612 vec![ParagraphInfo {
1614 range: 0..6,
1615 level: LTR_LEVEL,
1616 }],
1617 Level::vec(&[0, 0, 0, 0, 0, 0]),
1619 vec![L, L, L, EN, EN, EN],
1621 vec![ParagraphInfo {
1623 range: 0..6,
1624 level: LTR_LEVEL,
1625 }],
1626 ),
1627 (
1628 "abc \u{05D0}\u{05D1}\u{05D2}",
1629 Some(LTR_LEVEL),
1630 Level::vec(&[0, 0, 0, 0, 1, 1, 1, 1, 1, 1]),
1631 vec![L, L, L, WS, R, R, R, R, R, R],
1632 vec![ParagraphInfo {
1633 range: 0..10,
1634 level: LTR_LEVEL,
1635 }],
1636 Level::vec(&[0, 0, 0, 0, 1, 1, 1]),
1637 vec![L, L, L, WS, R, R, R],
1638 vec![ParagraphInfo {
1639 range: 0..7,
1640 level: LTR_LEVEL,
1641 }],
1642 ),
1643 (
1644 "abc \u{05D0}\u{05D1}\u{05D2}",
1645 Some(RTL_LEVEL),
1646 Level::vec(&[2, 2, 2, 1, 1, 1, 1, 1, 1, 1]),
1647 vec![L, L, L, WS, R, R, R, R, R, R],
1648 vec![ParagraphInfo {
1649 range: 0..10,
1650 level: RTL_LEVEL,
1651 }],
1652 Level::vec(&[2, 2, 2, 1, 1, 1, 1]),
1653 vec![L, L, L, WS, R, R, R],
1654 vec![ParagraphInfo {
1655 range: 0..7,
1656 level: RTL_LEVEL,
1657 }],
1658 ),
1659 (
1660 "\u{05D0}\u{05D1}\u{05D2} abc",
1661 Some(LTR_LEVEL),
1662 Level::vec(&[1, 1, 1, 1, 1, 1, 0, 0, 0, 0]),
1663 vec![R, R, R, R, R, R, WS, L, L, L],
1664 vec![ParagraphInfo {
1665 range: 0..10,
1666 level: LTR_LEVEL,
1667 }],
1668 Level::vec(&[1, 1, 1, 0, 0, 0, 0]),
1669 vec![R, R, R, WS, L, L, L],
1670 vec![ParagraphInfo {
1671 range: 0..7,
1672 level: LTR_LEVEL,
1673 }],
1674 ),
1675 (
1676 "\u{05D0}\u{05D1}\u{05D2} abc",
1677 None,
1678 Level::vec(&[1, 1, 1, 1, 1, 1, 1, 2, 2, 2]),
1679 vec![R, R, R, R, R, R, WS, L, L, L],
1680 vec![ParagraphInfo {
1681 range: 0..10,
1682 level: RTL_LEVEL,
1683 }],
1684 Level::vec(&[1, 1, 1, 1, 2, 2, 2]),
1685 vec![R, R, R, WS, L, L, L],
1686 vec![ParagraphInfo {
1687 range: 0..7,
1688 level: RTL_LEVEL,
1689 }],
1690 ),
1691 (
1692 "\u{063A}2\u{0638} \u{05D0}2\u{05D2}",
1693 Some(LTR_LEVEL),
1694 Level::vec(&[1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1]),
1695 vec![AL, AL, EN, AL, AL, WS, R, R, EN, R, R],
1696 vec![ParagraphInfo {
1697 range: 0..11,
1698 level: LTR_LEVEL,
1699 }],
1700 Level::vec(&[1, 2, 1, 1, 1, 2, 1]),
1701 vec![AL, EN, AL, WS, R, EN, R],
1702 vec![ParagraphInfo {
1703 range: 0..7,
1704 level: LTR_LEVEL,
1705 }],
1706 ),
1707 (
1708 "a א.\nג",
1709 None,
1710 Level::vec(&[0, 0, 1, 1, 0, 0, 1, 1]),
1711 vec![L, WS, R, R, CS, B, R, R],
1712 vec![
1713 ParagraphInfo {
1714 range: 0..6,
1715 level: LTR_LEVEL,
1716 },
1717 ParagraphInfo {
1718 range: 6..8,
1719 level: RTL_LEVEL,
1720 },
1721 ],
1722 Level::vec(&[0, 0, 1, 0, 0, 1]),
1723 vec![L, WS, R, CS, B, R],
1724 vec![
1725 ParagraphInfo {
1726 range: 0..5,
1727 level: LTR_LEVEL,
1728 },
1729 ParagraphInfo {
1730 range: 5..6,
1731 level: RTL_LEVEL,
1732 },
1733 ],
1734 ),
1735 (
1737 "\u{060B}\u{20CF}\u{06F9}",
1738 None,
1739 Level::vec(&[1, 1, 1, 1, 1, 2, 2]),
1740 vec![AL, AL, ET, ET, ET, EN, EN],
1741 vec![ParagraphInfo {
1742 range: 0..7,
1743 level: RTL_LEVEL,
1744 }],
1745 Level::vec(&[1, 1, 2]),
1746 vec![AL, ET, EN],
1747 vec![ParagraphInfo {
1748 range: 0..3,
1749 level: RTL_LEVEL,
1750 }],
1751 ),
1752 ];
1753
1754 for t in tests {
1755 assert_eq!(
1756 BidiInfo::new(t.0, t.1),
1757 BidiInfo {
1758 text: t.0,
1759 levels: t.2.clone(),
1760 original_classes: t.3.clone(),
1761 paragraphs: t.4.clone(),
1762 }
1763 );
1764 if t.4.len() == 0 {
1766 assert_eq!(
1767 ParagraphBidiInfo::new(t.0, t.1),
1768 ParagraphBidiInfo {
1769 text: t.0,
1770 original_classes: t.3.clone(),
1771 levels: t.2.clone(),
1772 paragraph_level: RTL_LEVEL,
1773 is_pure_ltr: true,
1774 }
1775 )
1776 }
1777 if t.4.len() == 1 {
1779 assert_eq!(
1780 ParagraphBidiInfo::new(t.0, t.1),
1781 ParagraphBidiInfo {
1782 text: t.0,
1783 original_classes: t.3,
1784 levels: t.2.clone(),
1785 paragraph_level: t.4[0].level,
1786 is_pure_ltr: !level::has_rtl(&t.2),
1787 }
1788 )
1789 }
1790 let text = &to_utf16(t.0);
1791 assert_eq!(
1792 BidiInfoU16::new(text, t.1),
1793 BidiInfoU16 {
1794 text,
1795 levels: t.5.clone(),
1796 original_classes: t.6.clone(),
1797 paragraphs: t.7.clone(),
1798 }
1799 );
1800 if t.7.len() == 1 {
1801 assert_eq!(
1802 ParagraphBidiInfoU16::new(text, t.1),
1803 ParagraphBidiInfoU16 {
1804 text: text,
1805 original_classes: t.6.clone(),
1806 levels: t.5.clone(),
1807 paragraph_level: t.7[0].level,
1808 is_pure_ltr: !level::has_rtl(&t.5),
1809 }
1810 )
1811 }
1812 }
1813 }
1814
1815 #[test]
1816 #[cfg(feature = "hardcoded-data")]
1817 fn test_paragraph_bidi_info() {
1818 let tests = vec![
1822 (
1823 "a א.\nג",
1824 None,
1825 vec![L, WS, R, R, CS, B, R, R],
1827 Level::vec(&[0, 0, 1, 1, 1, 1, 1, 1]),
1828 vec![L, WS, R, CS, B, R],
1830 Level::vec(&[0, 0, 1, 1, 1, 1]),
1831 LTR_LEVEL,
1833 false,
1834 ),
1835 (
1836 "\u{5d1} a.\nb.",
1837 None,
1838 vec![R, R, WS, L, CS, B, L, CS],
1840 Level::vec(&[1, 1, 1, 2, 2, 2, 2, 1]),
1841 vec![R, WS, L, CS, B, L, CS],
1843 Level::vec(&[1, 1, 2, 2, 2, 2, 1]),
1844 RTL_LEVEL,
1846 false,
1847 ),
1848 (
1849 "a א.\tג",
1850 None,
1851 vec![L, WS, R, R, CS, S, R, R],
1853 Level::vec(&[0, 0, 1, 1, 1, 1, 1, 1]),
1854 vec![L, WS, R, CS, S, R],
1856 Level::vec(&[0, 0, 1, 1, 1, 1]),
1857 LTR_LEVEL,
1859 false,
1860 ),
1861 (
1862 "\u{5d1} a.\tb.",
1863 None,
1864 vec![R, R, WS, L, CS, S, L, CS],
1866 Level::vec(&[1, 1, 1, 2, 2, 2, 2, 1]),
1867 vec![R, WS, L, CS, S, L, CS],
1869 Level::vec(&[1, 1, 2, 2, 2, 2, 1]),
1870 RTL_LEVEL,
1872 false,
1873 ),
1874 ];
1875
1876 for t in tests {
1877 assert_eq!(
1878 ParagraphBidiInfo::new(t.0, t.1),
1879 ParagraphBidiInfo {
1880 text: t.0,
1881 original_classes: t.2,
1882 levels: t.3,
1883 paragraph_level: t.6,
1884 is_pure_ltr: t.7,
1885 }
1886 );
1887 let text = &to_utf16(t.0);
1888 assert_eq!(
1889 ParagraphBidiInfoU16::new(text, t.1),
1890 ParagraphBidiInfoU16 {
1891 text: text,
1892 original_classes: t.4,
1893 levels: t.5,
1894 paragraph_level: t.6,
1895 is_pure_ltr: t.7,
1896 }
1897 );
1898 }
1899 }
1900
1901 #[test]
1902 #[cfg(feature = "hardcoded-data")]
1903 fn test_bidi_info_has_rtl() {
1904 let tests = vec![
1905 ("123", None, false),
1907 ("123", Some(LTR_LEVEL), false),
1908 ("123", Some(RTL_LEVEL), false),
1909 ("abc", None, false),
1910 ("abc", Some(LTR_LEVEL), false),
1911 ("abc", Some(RTL_LEVEL), false),
1912 ("abc 123", None, false),
1913 ("abc\n123", None, false),
1914 ("\u{05D0}\u{05D1}\u{05BC}\u{05D2}", None, true),
1916 ("\u{05D0}\u{05D1}\u{05BC}\u{05D2}", Some(LTR_LEVEL), true),
1917 ("\u{05D0}\u{05D1}\u{05BC}\u{05D2}", Some(RTL_LEVEL), true),
1918 ("abc \u{05D0}\u{05D1}\u{05BC}\u{05D2}", None, true),
1919 ("abc\n\u{05D0}\u{05D1}\u{05BC}\u{05D2}", None, true),
1920 ("\u{05D0}\u{05D1}\u{05BC}\u{05D2} abc", None, true),
1921 ("\u{05D0}\u{05D1}\u{05BC}\u{05D2}\nabc", None, true),
1922 ("\u{05D0}\u{05D1}\u{05BC}\u{05D2} 123", None, true),
1923 ("\u{05D0}\u{05D1}\u{05BC}\u{05D2}\n123", None, true),
1924 ];
1925
1926 for t in tests {
1927 assert_eq!(BidiInfo::new(t.0, t.1).has_rtl(), t.2);
1928 assert_eq!(BidiInfoU16::new(&to_utf16(t.0), t.1).has_rtl(), t.2);
1929 }
1930 }
1931
1932 #[cfg(feature = "hardcoded-data")]
1933 fn reorder_paras(text: &str) -> Vec<Cow<'_, str>> {
1934 let bidi_info = BidiInfo::new(text, None);
1935 bidi_info
1936 .paragraphs
1937 .iter()
1938 .map(|para| bidi_info.reorder_line(para, para.range.clone()))
1939 .collect()
1940 }
1941
1942 #[cfg(feature = "hardcoded-data")]
1943 fn reorder_paras_u16(text: &[u16]) -> Vec<Cow<'_, [u16]>> {
1944 let bidi_info = BidiInfoU16::new(text, None);
1945 bidi_info
1946 .paragraphs
1947 .iter()
1948 .map(|para| bidi_info.reorder_line(para, para.range.clone()))
1949 .collect()
1950 }
1951
1952 #[test]
1953 #[cfg(feature = "hardcoded-data")]
1954 fn test_reorder_line() {
1955 let tests = vec![
1956 ("abc\ndef\nghi", vec!["abc\n", "def\n", "ghi"]),
1958 ("ab1\nde2\ngh3", vec!["ab1\n", "de2\n", "gh3"]),
1960 ("abc\nابج", vec!["abc\n", "جبا"]),
1962 (
1964 "\u{0627}\u{0628}\u{062C}\nabc",
1965 vec!["\n\u{062C}\u{0628}\u{0627}", "abc"],
1966 ),
1967 ("1.-2", vec!["1.-2"]),
1968 ("1-.2", vec!["1-.2"]),
1969 ("abc אבג", vec!["abc גבא"]),
1970 ("123 \u{05D0}\u{05D1}\u{05D2}", vec!["גבא 123"]),
1972 ("abc\u{202A}def", vec!["abc\u{202A}def"]),
1973 (
1974 "abc\u{202A}def\u{202C}ghi",
1975 vec!["abc\u{202A}def\u{202C}ghi"],
1976 ),
1977 (
1978 "abc\u{2066}def\u{2069}ghi",
1979 vec!["abc\u{2066}def\u{2069}ghi"],
1980 ),
1981 ("\u{202B}abc אבג\u{202C}", vec!["\u{202b}גבא abc\u{202c}"]),
1983 ("\u{05D0}בג? אבג", vec!["גבא ?גבא"]),
1985 ("A אבג?", vec!["A גבא?"]),
1987 ("A אבג?\u{200F}", vec!["A \u{200F}?גבא"]),
1989 ("\u{05D0}בג abc", vec!["abc גבא"]),
1990 ("abc\u{2067}.-\u{2069}ghi", vec!["abc\u{2067}-.\u{2069}ghi"]),
1991 (
1992 "Hello, \u{2068}\u{202E}world\u{202C}\u{2069}!",
1993 vec!["Hello, \u{2068}\u{202E}\u{202C}dlrow\u{2069}!"],
1994 ),
1995 ("\u{05D0}(ב)ג.", vec![".ג)ב(א"]),
1997 ("\u{05D0}ב(גד[&ef].)gh", vec!["gh).]ef&[דג(בא"]),
1999 ];
2000
2001 for t in tests {
2002 assert_eq!(reorder_paras(t.0), t.1);
2003 let expect_utf16 = t.1.iter().map(|v| to_utf16(v)).collect::<Vec<_>>();
2004 assert_eq!(reorder_paras_u16(&to_utf16(t.0)), expect_utf16);
2005 }
2006 }
2007
2008 fn reordered_levels_for_paras(text: &str) -> Vec<Vec<Level>> {
2009 let bidi_info = BidiInfo::new(text, None);
2010 bidi_info
2011 .paragraphs
2012 .iter()
2013 .map(|para| bidi_info.reordered_levels(para, para.range.clone()))
2014 .collect()
2015 }
2016
2017 fn reordered_levels_per_char_for_paras(text: &str) -> Vec<Vec<Level>> {
2018 let bidi_info = BidiInfo::new(text, None);
2019 bidi_info
2020 .paragraphs
2021 .iter()
2022 .map(|para| bidi_info.reordered_levels_per_char(para, para.range.clone()))
2023 .collect()
2024 }
2025
2026 fn reordered_levels_for_paras_u16(text: &[u16]) -> Vec<Vec<Level>> {
2027 let bidi_info = BidiInfoU16::new(text, None);
2028 bidi_info
2029 .paragraphs
2030 .iter()
2031 .map(|para| bidi_info.reordered_levels(para, para.range.clone()))
2032 .collect()
2033 }
2034
2035 fn reordered_levels_per_char_for_paras_u16(text: &[u16]) -> Vec<Vec<Level>> {
2036 let bidi_info = BidiInfoU16::new(text, None);
2037 bidi_info
2038 .paragraphs
2039 .iter()
2040 .map(|para| bidi_info.reordered_levels_per_char(para, para.range.clone()))
2041 .collect()
2042 }
2043
2044 #[test]
2045 #[cfg(feature = "hardcoded-data")]
2046 fn test_reordered_levels_range() {
2048 let s = "\u{202a}A\u{202c}\u{202a}A\u{202c}";
2050 let range = 4..11;
2051 assert!(s.get(range.clone()).is_some());
2052
2053 let bidi = BidiInfo::new(s, None);
2054 let (_, runs) = bidi.visual_runs(&bidi.paragraphs[0], range);
2055
2056 for run in runs {
2057 let _ = &s[run]; }
2059 }
2060
2061 #[test]
2062 #[cfg(feature = "hardcoded-data")]
2063 fn test_reordered_levels() {
2064 let tests = vec![
2065 (
2067 "\u{2067}\u{2069}",
2068 vec![Level::vec(&[0, 0, 0, 0, 0, 0])],
2069 vec![Level::vec(&[0, 0])],
2070 vec![Level::vec(&[0, 0])],
2071 ),
2072 (
2074 "\u{060B}\u{20CF}\u{06F9}",
2075 vec![Level::vec(&[1, 1, 1, 1, 1, 2, 2])],
2076 vec![Level::vec(&[1, 1, 2])],
2077 vec![Level::vec(&[1, 1, 2])],
2078 ),
2079 ];
2080
2081 for t in tests {
2082 assert_eq!(reordered_levels_for_paras(t.0), t.1);
2083 assert_eq!(reordered_levels_per_char_for_paras(t.0), t.2);
2084 let text = &to_utf16(t.0);
2085 assert_eq!(reordered_levels_for_paras_u16(text), t.3);
2086 assert_eq!(reordered_levels_per_char_for_paras_u16(text), t.2);
2087 }
2088
2089 let tests = vec![
2090 (
2092 "\u{0605}\u{2067}\u{202C}\u{0590}",
2093 vec![&["2", "2", "0", "0", "0", "x", "x", "x", "1", "1"]],
2094 vec![&["2", "0", "x", "1"]],
2095 vec![&["2", "0", "x", "1"]],
2096 ),
2097 ];
2098
2099 for t in tests {
2100 assert_eq!(reordered_levels_for_paras(t.0), t.1);
2101 assert_eq!(reordered_levels_per_char_for_paras(t.0), t.2);
2102 let text = &to_utf16(t.0);
2103 assert_eq!(reordered_levels_for_paras_u16(text), t.3);
2104 assert_eq!(reordered_levels_per_char_for_paras_u16(text), t.2);
2105 }
2106
2107 let text = "aa טֶ";
2108 let bidi_info = BidiInfo::new(text, None);
2109 assert_eq!(
2110 bidi_info.reordered_levels(&bidi_info.paragraphs[0], 3..7),
2111 Level::vec(&[0, 0, 0, 1, 1, 1, 1]),
2112 );
2113
2114 let text = &to_utf16(text);
2115 let bidi_info = BidiInfoU16::new(text, None);
2116 assert_eq!(
2117 bidi_info.reordered_levels(&bidi_info.paragraphs[0], 1..4),
2118 Level::vec(&[0, 0, 0, 1, 1]),
2119 );
2120 }
2121
2122 #[test]
2123 fn test_paragraph_info_len() {
2124 let text = "hello world";
2125 let bidi_info = BidiInfo::new(text, None);
2126 assert_eq!(bidi_info.paragraphs.len(), 1);
2127 assert_eq!(bidi_info.paragraphs[0].len(), text.len());
2128
2129 let text2 = "How are you";
2130 let whole_text = format!("{}\n{}", text, text2);
2131 let bidi_info = BidiInfo::new(&whole_text, None);
2132 assert_eq!(bidi_info.paragraphs.len(), 2);
2133
2134 assert_eq!(bidi_info.paragraphs[0].len(), text.len() + 1);
2138 assert_eq!(bidi_info.paragraphs[1].len(), text2.len());
2139
2140 let text = &to_utf16(text);
2141 let bidi_info = BidiInfoU16::new(text, None);
2142 assert_eq!(bidi_info.paragraphs.len(), 1);
2143 assert_eq!(bidi_info.paragraphs[0].len(), text.len());
2144
2145 let text2 = &to_utf16(text2);
2146 let whole_text = &to_utf16(&whole_text);
2147 let bidi_info = BidiInfoU16::new(&whole_text, None);
2148 assert_eq!(bidi_info.paragraphs.len(), 2);
2149
2150 assert_eq!(bidi_info.paragraphs[0].len(), text.len() + 1);
2151 assert_eq!(bidi_info.paragraphs[1].len(), text2.len());
2152 }
2153
2154 #[test]
2155 fn test_direction() {
2156 let ltr_text = "hello world";
2157 let rtl_text = "أهلا بكم";
2158 let all_paragraphs = format!("{}\n{}\n{}{}", ltr_text, rtl_text, ltr_text, rtl_text);
2159 let bidi_info = BidiInfo::new(&all_paragraphs, None);
2160 assert_eq!(bidi_info.paragraphs.len(), 3);
2161 let p_ltr = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]);
2162 let p_rtl = Paragraph::new(&bidi_info, &bidi_info.paragraphs[1]);
2163 let p_mixed = Paragraph::new(&bidi_info, &bidi_info.paragraphs[2]);
2164 assert_eq!(p_ltr.direction(), Direction::Ltr);
2165 assert_eq!(p_rtl.direction(), Direction::Rtl);
2166 assert_eq!(p_mixed.direction(), Direction::Mixed);
2167
2168 let all_paragraphs = &to_utf16(&all_paragraphs);
2169 let bidi_info = BidiInfoU16::new(&all_paragraphs, None);
2170 assert_eq!(bidi_info.paragraphs.len(), 3);
2171 let p_ltr = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[0]);
2172 let p_rtl = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[1]);
2173 let p_mixed = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[2]);
2174 assert_eq!(p_ltr.direction(), Direction::Ltr);
2175 assert_eq!(p_rtl.direction(), Direction::Rtl);
2176 assert_eq!(p_mixed.direction(), Direction::Mixed);
2177 }
2178
2179 #[test]
2180 fn test_edge_cases_direction() {
2181 let empty = "";
2183 let bidi_info = BidiInfo::new(empty, Option::from(RTL_LEVEL));
2184 assert_eq!(bidi_info.paragraphs.len(), 0);
2185
2186 let empty = &to_utf16(empty);
2187 let bidi_info = BidiInfoU16::new(empty, Option::from(RTL_LEVEL));
2188 assert_eq!(bidi_info.paragraphs.len(), 0);
2189
2190 let tests = vec![
2191 ("\n", None, Direction::Ltr),
2194 ("\n", Option::from(LTR_LEVEL), Direction::Ltr),
2197 ("\n", Option::from(RTL_LEVEL), Direction::Rtl),
2200 ];
2201
2202 for t in tests {
2203 let bidi_info = BidiInfo::new(t.0, t.1);
2204 assert_eq!(bidi_info.paragraphs.len(), 1);
2205 let p = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]);
2206 assert_eq!(p.direction(), t.2);
2207 let text = &to_utf16(t.0);
2208 let bidi_info = BidiInfoU16::new(text, t.1);
2209 let p = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[0]);
2210 assert_eq!(p.direction(), t.2);
2211 }
2212 }
2213
2214 #[test]
2215 fn test_level_at() {
2216 let ltr_text = "hello world";
2217 let rtl_text = "أهلا بكم";
2218 let all_paragraphs = format!("{}\n{}\n{}{}", ltr_text, rtl_text, ltr_text, rtl_text);
2219 let bidi_info = BidiInfo::new(&all_paragraphs, None);
2220 assert_eq!(bidi_info.paragraphs.len(), 3);
2221
2222 let p_ltr = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]);
2223 let p_rtl = Paragraph::new(&bidi_info, &bidi_info.paragraphs[1]);
2224 let p_mixed = Paragraph::new(&bidi_info, &bidi_info.paragraphs[2]);
2225
2226 assert_eq!(p_ltr.level_at(0), LTR_LEVEL);
2227 assert_eq!(p_rtl.level_at(0), RTL_LEVEL);
2228 assert_eq!(p_mixed.level_at(0), LTR_LEVEL);
2229 assert_eq!(p_mixed.info.levels.len(), 54);
2230 assert_eq!(p_mixed.para.range.start, 28);
2231 assert_eq!(p_mixed.level_at(ltr_text.len()), RTL_LEVEL);
2232
2233 let all_paragraphs = &to_utf16(&all_paragraphs);
2234 let bidi_info = BidiInfoU16::new(&all_paragraphs, None);
2235 assert_eq!(bidi_info.paragraphs.len(), 3);
2236
2237 let p_ltr = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[0]);
2238 let p_rtl = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[1]);
2239 let p_mixed = ParagraphU16::new(&bidi_info, &bidi_info.paragraphs[2]);
2240
2241 assert_eq!(p_ltr.level_at(0), LTR_LEVEL);
2242 assert_eq!(p_rtl.level_at(0), RTL_LEVEL);
2243 assert_eq!(p_mixed.level_at(0), LTR_LEVEL);
2244 assert_eq!(p_mixed.info.levels.len(), 40);
2245 assert_eq!(p_mixed.para.range.start, 21);
2246 assert_eq!(p_mixed.level_at(ltr_text.len()), RTL_LEVEL);
2247 }
2248
2249 #[test]
2250 fn test_get_base_direction() {
2251 let tests = vec![
2252 ("", Direction::Mixed), ("123[]-+\u{2019}\u{2060}\u{00bf}?", Direction::Mixed),
2254 ("3.14\npi", Direction::Mixed), ("[123 'abc']", Direction::Ltr),
2256 ("[123 '\u{0628}' abc", Direction::Rtl),
2257 ("[123 '\u{2066}abc\u{2069}'\u{0628}]", Direction::Rtl), ("[123 '\u{2066}abc\u{2068}'\u{0628}]", Direction::Mixed),
2259 ];
2260
2261 for t in tests {
2262 assert_eq!(get_base_direction(t.0), t.1);
2263 let text = &to_utf16(t.0);
2264 assert_eq!(get_base_direction(text.as_slice()), t.1);
2265 }
2266 }
2267
2268 #[test]
2269 fn test_get_base_direction_full() {
2270 let tests = vec![
2271 ("", Direction::Mixed), ("123[]-+\u{2019}\u{2060}\u{00bf}?", Direction::Mixed),
2273 ("3.14\npi", Direction::Ltr), ("3.14\n\u{05D0}", Direction::Rtl), ("[123 'abc']", Direction::Ltr),
2276 ("[123 '\u{0628}' abc", Direction::Rtl),
2277 ("[123 '\u{2066}abc\u{2069}'\u{0628}]", Direction::Rtl), ("[123 '\u{2066}abc\u{2068}'\u{0628}]", Direction::Mixed),
2279 ("[123 '\u{2066}abc\u{2068}'\n\u{0628}]", Direction::Rtl), ];
2281
2282 for t in tests {
2283 assert_eq!(get_base_direction_full(t.0), t.1);
2284 let text = &to_utf16(t.0);
2285 assert_eq!(get_base_direction_full(text.as_slice()), t.1);
2286 }
2287 }
2288}
2289
2290#[cfg(all(feature = "serde", feature = "hardcoded-data", test))]
2291mod serde_tests {
2292 use super::*;
2293 use serde_test::{assert_tokens, Token};
2294
2295 #[test]
2296 fn test_levels() {
2297 let text = "abc אבג";
2298 let bidi_info = BidiInfo::new(text, None);
2299 let levels = bidi_info.levels;
2300 assert_eq!(text.as_bytes().len(), 10);
2301 assert_eq!(levels.len(), 10);
2302 assert_tokens(
2303 &levels,
2304 &[
2305 Token::Seq { len: Some(10) },
2306 Token::NewtypeStruct { name: "Level" },
2307 Token::U8(0),
2308 Token::NewtypeStruct { name: "Level" },
2309 Token::U8(0),
2310 Token::NewtypeStruct { name: "Level" },
2311 Token::U8(0),
2312 Token::NewtypeStruct { name: "Level" },
2313 Token::U8(0),
2314 Token::NewtypeStruct { name: "Level" },
2315 Token::U8(1),
2316 Token::NewtypeStruct { name: "Level" },
2317 Token::U8(1),
2318 Token::NewtypeStruct { name: "Level" },
2319 Token::U8(1),
2320 Token::NewtypeStruct { name: "Level" },
2321 Token::U8(1),
2322 Token::NewtypeStruct { name: "Level" },
2323 Token::U8(1),
2324 Token::NewtypeStruct { name: "Level" },
2325 Token::U8(1),
2326 Token::SeqEnd,
2327 ],
2328 );
2329 }
2330}