1use std::borrow::Cow;
28
29use super::in_inclusive_range16;
30use super::in_inclusive_range32;
31use super::in_inclusive_range8;
32use super::in_range16;
33use super::in_range32;
34use super::DecoderResult;
35use crate::ascii::*;
36use crate::utf_8::*;
37
38macro_rules! non_fuzz_debug_assert {
39 ($($arg:tt)*) => (if !cfg!(fuzzing) { debug_assert!($($arg)*); })
40}
41
42cfg_if! {
43 if #[cfg(feature = "simd-accel")] {
44 use ::std::intrinsics::likely;
45 use ::std::intrinsics::unlikely;
46 } else {
47 #[inline(always)]
48 unsafe fn likely(b: bool) -> bool {
50 b
51 }
52 #[inline(always)]
53 unsafe fn unlikely(b: bool) -> bool {
55 b
56 }
57 }
58}
59
60#[must_use]
64#[derive(Debug, PartialEq, Eq)]
65#[repr(C)]
66pub enum Latin1Bidi {
67 Latin1 = 0,
69 LeftToRight = 1,
72 Bidi = 2,
74}
75
76#[allow(dead_code)]
78const LATIN1_MASK: usize = 0xFF00_FF00_FF00_FF00u64 as usize;
79
80#[allow(unused_macros)]
81macro_rules! by_unit_check_alu {
82 ($name:ident, $unit:ty, $bound:expr, $mask:ident) => {
83 #[cfg_attr(feature = "cargo-clippy", allow(cast_ptr_alignment))]
84 #[inline(always)]
85 fn $name(buffer: &[$unit]) -> bool {
86 let mut offset = 0usize;
87 let mut accu = 0usize;
88 let unit_size = ::std::mem::size_of::<$unit>();
89 let len = buffer.len();
90 if len >= ALU_ALIGNMENT / unit_size {
91 if buffer[0] >= $bound {
94 return false;
95 }
96 let src = buffer.as_ptr();
97 let mut until_alignment = ((ALU_ALIGNMENT - ((src as usize) & ALU_ALIGNMENT_MASK))
98 & ALU_ALIGNMENT_MASK)
99 / unit_size;
100 if until_alignment + ALU_ALIGNMENT / unit_size <= len {
101 if until_alignment != 0 {
102 accu |= buffer[offset] as usize;
103 offset += 1;
104 until_alignment -= 1;
105 while until_alignment != 0 {
106 accu |= buffer[offset] as usize;
107 offset += 1;
108 until_alignment -= 1;
109 }
110 if accu >= $bound {
111 return false;
112 }
113 }
114 let len_minus_stride = len - ALU_ALIGNMENT / unit_size;
115 if offset + (4 * (ALU_ALIGNMENT / unit_size)) <= len {
116 let len_minus_unroll = len - (4 * (ALU_ALIGNMENT / unit_size));
117 loop {
118 let unroll_accu = unsafe { *(src.add(offset) as *const usize) }
119 | unsafe {
120 *(src.add(offset + (ALU_ALIGNMENT / unit_size)) as *const usize)
121 }
122 | unsafe {
123 *(src.add(offset + (2 * (ALU_ALIGNMENT / unit_size)))
124 as *const usize)
125 }
126 | unsafe {
127 *(src.add(offset + (3 * (ALU_ALIGNMENT / unit_size)))
128 as *const usize)
129 };
130 if unroll_accu & $mask != 0 {
131 return false;
132 }
133 offset += 4 * (ALU_ALIGNMENT / unit_size);
134 if offset > len_minus_unroll {
135 break;
136 }
137 }
138 }
139 while offset <= len_minus_stride {
140 accu |= unsafe { *(src.add(offset) as *const usize) };
141 offset += ALU_ALIGNMENT / unit_size;
142 }
143 }
144 }
145 for &unit in &buffer[offset..] {
146 accu |= unit as usize;
147 }
148 accu & $mask == 0
149 }
150 };
151}
152
153#[allow(unused_macros)]
154macro_rules! by_unit_check_simd {
155 ($name:ident, $unit:ty, $splat:expr, $simd_ty:ty, $bound:expr, $func:ident) => {
156 #[inline(always)]
157 fn $name(buffer: &[$unit]) -> bool {
158 let mut offset = 0usize;
159 let mut accu = 0usize;
160 let unit_size = ::std::mem::size_of::<$unit>();
161 let len = buffer.len();
162 if len >= SIMD_STRIDE_SIZE / unit_size {
163 if buffer[0] >= $bound {
166 return false;
167 }
168 let src = buffer.as_ptr();
169 let mut until_alignment = ((SIMD_ALIGNMENT
170 - ((src as usize) & SIMD_ALIGNMENT_MASK))
171 & SIMD_ALIGNMENT_MASK)
172 / unit_size;
173 if until_alignment + SIMD_STRIDE_SIZE / unit_size <= len {
174 if until_alignment != 0 {
175 accu |= buffer[offset] as usize;
176 offset += 1;
177 until_alignment -= 1;
178 while until_alignment != 0 {
179 accu |= buffer[offset] as usize;
180 offset += 1;
181 until_alignment -= 1;
182 }
183 if accu >= $bound {
184 return false;
185 }
186 }
187 let len_minus_stride = len - SIMD_STRIDE_SIZE / unit_size;
188 if offset + (4 * (SIMD_STRIDE_SIZE / unit_size)) <= len {
189 let len_minus_unroll = len - (4 * (SIMD_STRIDE_SIZE / unit_size));
190 loop {
191 let unroll_accu = unsafe { *(src.add(offset) as *const $simd_ty) }
192 | unsafe {
193 *(src.add(offset + (SIMD_STRIDE_SIZE / unit_size))
194 as *const $simd_ty)
195 }
196 | unsafe {
197 *(src.add(offset + (2 * (SIMD_STRIDE_SIZE / unit_size)))
198 as *const $simd_ty)
199 }
200 | unsafe {
201 *(src.add(offset + (3 * (SIMD_STRIDE_SIZE / unit_size)))
202 as *const $simd_ty)
203 };
204 if !$func(unroll_accu) {
205 return false;
206 }
207 offset += 4 * (SIMD_STRIDE_SIZE / unit_size);
208 if offset > len_minus_unroll {
209 break;
210 }
211 }
212 }
213 let mut simd_accu = $splat;
214 while offset <= len_minus_stride {
215 simd_accu = simd_accu | unsafe { *(src.add(offset) as *const $simd_ty) };
216 offset += SIMD_STRIDE_SIZE / unit_size;
217 }
218 if !$func(simd_accu) {
219 return false;
220 }
221 }
222 }
223 for &unit in &buffer[offset..] {
224 accu |= unit as usize;
225 }
226 accu < $bound
227 }
228 };
229}
230
231cfg_if! {
232 if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] {
233 use crate::simd_funcs::*;
234 use packed_simd::u8x16;
235 use packed_simd::u16x8;
236
237 const SIMD_ALIGNMENT: usize = 16;
238
239 const SIMD_ALIGNMENT_MASK: usize = 15;
240
241 by_unit_check_simd!(is_ascii_impl, u8, u8x16::splat(0), u8x16, 0x80, simd_is_ascii);
242 by_unit_check_simd!(is_basic_latin_impl, u16, u16x8::splat(0), u16x8, 0x80, simd_is_basic_latin);
243 by_unit_check_simd!(is_utf16_latin1_impl, u16, u16x8::splat(0), u16x8, 0x100, simd_is_latin1);
244
245 #[inline(always)]
246 fn utf16_valid_up_to_impl(buffer: &[u16]) -> usize {
247 let unit_size = ::std::mem::size_of::<u16>();
252 let src = buffer.as_ptr();
253 let len = buffer.len();
254 let mut offset = 0usize;
255 'outer: loop {
256 let until_alignment = ((SIMD_ALIGNMENT - ((unsafe { src.add(offset) } as usize) & SIMD_ALIGNMENT_MASK)) &
257 SIMD_ALIGNMENT_MASK) / unit_size;
258 if until_alignment == 0 {
259 if offset + SIMD_STRIDE_SIZE / unit_size > len {
260 break;
261 }
262 } else {
263 let offset_plus_until_alignment = offset + until_alignment;
264 let offset_plus_until_alignment_plus_one = offset_plus_until_alignment + 1;
265 if offset_plus_until_alignment_plus_one + SIMD_STRIDE_SIZE / unit_size > len {
266 break;
267 }
268 let (up_to, last_valid_low) = utf16_valid_up_to_alu(&buffer[offset..offset_plus_until_alignment_plus_one]);
269 if up_to < until_alignment {
270 return offset + up_to;
271 }
272 if last_valid_low {
273 offset = offset_plus_until_alignment_plus_one;
274 continue;
275 }
276 offset = offset_plus_until_alignment;
277 }
278 let len_minus_stride = len - SIMD_STRIDE_SIZE / unit_size;
279 loop {
280 let offset_plus_stride = offset + SIMD_STRIDE_SIZE / unit_size;
281 if contains_surrogates(unsafe { *(src.add(offset) as *const u16x8) }) {
282 if offset_plus_stride == len {
283 break 'outer;
284 }
285 let offset_plus_stride_plus_one = offset_plus_stride + 1;
286 let (up_to, last_valid_low) = utf16_valid_up_to_alu(&buffer[offset..offset_plus_stride_plus_one]);
287 if up_to < SIMD_STRIDE_SIZE / unit_size {
288 return offset + up_to;
289 }
290 if last_valid_low {
291 offset = offset_plus_stride_plus_one;
292 continue 'outer;
293 }
294 }
295 offset = offset_plus_stride;
296 if offset > len_minus_stride {
297 break 'outer;
298 }
299 }
300 }
301 let (up_to, _) = utf16_valid_up_to_alu(&buffer[offset..]);
302 offset + up_to
303 }
304 } else {
305 by_unit_check_alu!(is_ascii_impl, u8, 0x80, ASCII_MASK);
306 by_unit_check_alu!(is_basic_latin_impl, u16, 0x80, BASIC_LATIN_MASK);
307 by_unit_check_alu!(is_utf16_latin1_impl, u16, 0x100, LATIN1_MASK);
308
309 #[inline(always)]
310 fn utf16_valid_up_to_impl(buffer: &[u16]) -> usize {
311 let (up_to, _) = utf16_valid_up_to_alu(buffer);
312 up_to
313 }
314 }
315}
316
317#[cfg_attr(feature = "cargo-clippy", allow(collapsible_if))]
320#[inline(always)]
321fn utf16_valid_up_to_alu(buffer: &[u16]) -> (usize, bool) {
322 let len = buffer.len();
323 if len == 0 {
324 return (0, false);
325 }
326 let mut offset = 0usize;
327 loop {
328 let unit = buffer[offset];
329 let next = offset + 1;
330 let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
331 if unit_minus_surrogate_start > (0xDFFF - 0xD800) {
332 offset = next;
334 if offset == len {
335 return (offset, false);
336 }
337 continue;
338 }
339 if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
340 if next < len {
342 let second = buffer[next];
343 let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
344 if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) {
345 offset = next + 1;
347 if offset == len {
348 return (offset, true);
349 }
350 continue;
351 }
352 }
356 }
358 return (offset, false);
360 }
361}
362
363cfg_if! {
364 if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] {
365 #[inline(always)]
366 fn is_str_latin1_impl(buffer: &str) -> Option<usize> {
367 let mut offset = 0usize;
368 let bytes = buffer.as_bytes();
369 let len = bytes.len();
370 if len >= SIMD_STRIDE_SIZE {
371 let src = bytes.as_ptr();
372 let mut until_alignment = (SIMD_ALIGNMENT - ((src as usize) & SIMD_ALIGNMENT_MASK)) &
373 SIMD_ALIGNMENT_MASK;
374 if until_alignment + SIMD_STRIDE_SIZE <= len {
375 while until_alignment != 0 {
376 if bytes[offset] > 0xC3 {
377 return Some(offset);
378 }
379 offset += 1;
380 until_alignment -= 1;
381 }
382 let len_minus_stride = len - SIMD_STRIDE_SIZE;
383 loop {
384 if !simd_is_str_latin1(unsafe { *(src.add(offset) as *const u8x16) }) {
385 while bytes[offset] & 0xC0 == 0x80 {
387 offset += 1;
388 }
389 return Some(offset);
390 }
391 offset += SIMD_STRIDE_SIZE;
392 if offset > len_minus_stride {
393 break;
394 }
395 }
396 }
397 }
398 for i in offset..len {
399 if bytes[i] > 0xC3 {
400 return Some(i);
401 }
402 }
403 None
404 }
405 } else {
406 #[inline(always)]
407 fn is_str_latin1_impl(buffer: &str) -> Option<usize> {
408 let mut bytes = buffer.as_bytes();
409 let mut total = 0;
410 loop {
411 if let Some((byte, offset)) = validate_ascii(bytes) {
412 total += offset;
413 if byte > 0xC3 {
414 return Some(total);
415 }
416 bytes = &bytes[offset + 2..];
417 total += 2;
418 } else {
419 return None;
420 }
421 }
422 }
423 }
424}
425
426#[inline(always)]
427fn is_utf8_latin1_impl(buffer: &[u8]) -> Option<usize> {
428 let mut bytes = buffer;
429 let mut total = 0;
430 loop {
431 if let Some((byte, offset)) = validate_ascii(bytes) {
432 total += offset;
433 if in_inclusive_range8(byte, 0xC2, 0xC3) {
434 let next = offset + 1;
435 if next == bytes.len() {
436 return Some(total);
437 }
438 if bytes[next] & 0xC0 != 0x80 {
439 return Some(total);
440 }
441 bytes = &bytes[offset + 2..];
442 total += 2;
443 } else {
444 return Some(total);
445 }
446 } else {
447 return None;
448 }
449 }
450}
451
452cfg_if! {
453 if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] {
454 #[inline(always)]
455 fn is_utf16_bidi_impl(buffer: &[u16]) -> bool {
456 let mut offset = 0usize;
457 let len = buffer.len();
458 if len >= SIMD_STRIDE_SIZE / 2 {
459 let src = buffer.as_ptr();
460 let mut until_alignment = ((SIMD_ALIGNMENT - ((src as usize) & SIMD_ALIGNMENT_MASK)) &
461 SIMD_ALIGNMENT_MASK) / 2;
462 if until_alignment + (SIMD_STRIDE_SIZE / 2) <= len {
463 while until_alignment != 0 {
464 if is_utf16_code_unit_bidi(buffer[offset]) {
465 return true;
466 }
467 offset += 1;
468 until_alignment -= 1;
469 }
470 let len_minus_stride = len - (SIMD_STRIDE_SIZE / 2);
471 loop {
472 if is_u16x8_bidi(unsafe { *(src.add(offset) as *const u16x8) }) {
473 return true;
474 }
475 offset += SIMD_STRIDE_SIZE / 2;
476 if offset > len_minus_stride {
477 break;
478 }
479 }
480 }
481 }
482 for &u in &buffer[offset..] {
483 if is_utf16_code_unit_bidi(u) {
484 return true;
485 }
486 }
487 false
488 }
489 } else {
490 #[inline(always)]
491 fn is_utf16_bidi_impl(buffer: &[u16]) -> bool {
492 for &u in buffer {
493 if is_utf16_code_unit_bidi(u) {
494 return true;
495 }
496 }
497 false
498 }
499 }
500}
501
502cfg_if! {
503 if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] {
504 #[inline(always)]
505 fn check_utf16_for_latin1_and_bidi_impl(buffer: &[u16]) -> Latin1Bidi {
506 let mut offset = 0usize;
507 let len = buffer.len();
508 if len >= SIMD_STRIDE_SIZE / 2 {
509 let src = buffer.as_ptr();
510 let mut until_alignment = ((SIMD_ALIGNMENT - ((src as usize) & SIMD_ALIGNMENT_MASK)) &
511 SIMD_ALIGNMENT_MASK) / 2;
512 if until_alignment + (SIMD_STRIDE_SIZE / 2) <= len {
513 while until_alignment != 0 {
514 if buffer[offset] > 0xFF {
515 if is_utf16_bidi_impl(&buffer[offset..]) {
518 return Latin1Bidi::Bidi;
519 }
520 return Latin1Bidi::LeftToRight;
521 }
522 offset += 1;
523 until_alignment -= 1;
524 }
525 let len_minus_stride = len - (SIMD_STRIDE_SIZE / 2);
526 loop {
527 let mut s = unsafe { *(src.add(offset) as *const u16x8) };
528 if !simd_is_latin1(s) {
529 loop {
530 if is_u16x8_bidi(s) {
531 return Latin1Bidi::Bidi;
532 }
533 offset += SIMD_STRIDE_SIZE / 2;
534 if offset > len_minus_stride {
535 for &u in &buffer[offset..] {
536 if is_utf16_code_unit_bidi(u) {
537 return Latin1Bidi::Bidi;
538 }
539 }
540 return Latin1Bidi::LeftToRight;
541 }
542 s = unsafe { *(src.add(offset) as *const u16x8) };
543 }
544 }
545 offset += SIMD_STRIDE_SIZE / 2;
546 if offset > len_minus_stride {
547 break;
548 }
549 }
550 }
551 }
552 let mut iter = (&buffer[offset..]).iter();
553 loop {
554 if let Some(&u) = iter.next() {
555 if u > 0xFF {
556 let mut inner_u = u;
557 loop {
558 if is_utf16_code_unit_bidi(inner_u) {
559 return Latin1Bidi::Bidi;
560 }
561 if let Some(&code_unit) = iter.next() {
562 inner_u = code_unit;
563 } else {
564 return Latin1Bidi::LeftToRight;
565 }
566 }
567 }
568 } else {
569 return Latin1Bidi::Latin1;
570 }
571 }
572 }
573 } else {
574 #[cfg_attr(feature = "cargo-clippy", allow(cast_ptr_alignment))]
575 #[inline(always)]
576 fn check_utf16_for_latin1_and_bidi_impl(buffer: &[u16]) -> Latin1Bidi {
577 let mut offset = 0usize;
578 let len = buffer.len();
579 if len >= ALU_ALIGNMENT / 2 {
580 let src = buffer.as_ptr();
581 let mut until_alignment = ((ALU_ALIGNMENT - ((src as usize) & ALU_ALIGNMENT_MASK)) &
582 ALU_ALIGNMENT_MASK) / 2;
583 if until_alignment + ALU_ALIGNMENT / 2 <= len {
584 while until_alignment != 0 {
585 if buffer[offset] > 0xFF {
586 if is_utf16_bidi_impl(&buffer[offset..]) {
587 return Latin1Bidi::Bidi;
588 }
589 return Latin1Bidi::LeftToRight;
590 }
591 offset += 1;
592 until_alignment -= 1;
593 }
594 let len_minus_stride = len - ALU_ALIGNMENT / 2;
595 loop {
596 if unsafe { *(src.add(offset) as *const usize) } & LATIN1_MASK != 0 {
597 if is_utf16_bidi_impl(&buffer[offset..]) {
598 return Latin1Bidi::Bidi;
599 }
600 return Latin1Bidi::LeftToRight;
601 }
602 offset += ALU_ALIGNMENT / 2;
603 if offset > len_minus_stride {
604 break;
605 }
606 }
607 }
608 }
609 let mut iter = (&buffer[offset..]).iter();
610 loop {
611 if let Some(&u) = iter.next() {
612 if u > 0xFF {
613 let mut inner_u = u;
614 loop {
615 if is_utf16_code_unit_bidi(inner_u) {
616 return Latin1Bidi::Bidi;
617 }
618 if let Some(&code_unit) = iter.next() {
619 inner_u = code_unit;
620 } else {
621 return Latin1Bidi::LeftToRight;
622 }
623 }
624 }
625 } else {
626 return Latin1Bidi::Latin1;
627 }
628 }
629 }
630 }
631}
632
633pub fn is_ascii(buffer: &[u8]) -> bool {
638 is_ascii_impl(buffer)
639}
640
641pub fn is_basic_latin(buffer: &[u16]) -> bool {
647 is_basic_latin_impl(buffer)
648}
649
650pub fn is_utf8_latin1(buffer: &[u8]) -> bool {
656 is_utf8_latin1_impl(buffer).is_none()
657}
658
659pub fn is_str_latin1(buffer: &str) -> bool {
665 is_str_latin1_impl(buffer).is_none()
666}
667
668pub fn is_utf16_latin1(buffer: &[u16]) -> bool {
674 is_utf16_latin1_impl(buffer)
675}
676
677#[cfg_attr(feature = "cargo-clippy", allow(collapsible_if, cyclomatic_complexity))]
694#[inline]
695pub fn is_utf8_bidi(buffer: &[u8]) -> bool {
696 let mut src = buffer;
735 'outer: loop {
736 if let Some((mut byte, mut read)) = validate_ascii(src) {
737 if read + 4 <= src.len() {
740 'inner: loop {
741 match byte {
743 0..=0x7F => {
744 read += 1;
746 src = &src[read..];
747 continue 'outer;
748 }
749 0xC2..=0xD5 => {
750 let second = unsafe { *(src.get_unchecked(read + 1)) };
752 if !in_inclusive_range8(second, 0x80, 0xBF) {
753 return true;
754 }
755 read += 2;
756 }
757 0xD6 => {
758 let second = unsafe { *(src.get_unchecked(read + 1)) };
760 if !in_inclusive_range8(second, 0x80, 0xBF) {
761 return true;
762 }
763 if second > 0x8F {
765 return true;
766 }
767 read += 2;
768 }
769 0xE1 | 0xE3..=0xEC | 0xEE => {
771 let second = unsafe { *(src.get_unchecked(read + 1)) };
773 let third = unsafe { *(src.get_unchecked(read + 2)) };
774 if ((UTF8_DATA.table[usize::from(second)]
775 & unsafe {
776 *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80))
777 })
778 | (third >> 6))
779 != 2
780 {
781 return true;
782 }
783 read += 3;
784 }
785 0xE2 => {
786 let second = unsafe { *(src.get_unchecked(read + 1)) };
788 let third = unsafe { *(src.get_unchecked(read + 2)) };
789 if ((UTF8_DATA.table[usize::from(second)]
790 & unsafe {
791 *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80))
792 })
793 | (third >> 6))
794 != 2
795 {
796 return true;
797 }
798 if second == 0x80 {
799 if third == 0x8F || third == 0xAB || third == 0xAE {
800 return true;
801 }
802 } else if second == 0x81 {
803 if third == 0xA7 {
804 return true;
805 }
806 }
807 read += 3;
808 }
809 0xEF => {
810 let second = unsafe { *(src.get_unchecked(read + 1)) };
812 let third = unsafe { *(src.get_unchecked(read + 2)) };
813 if ((UTF8_DATA.table[usize::from(second)]
814 & unsafe {
815 *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80))
816 })
817 | (third >> 6))
818 != 2
819 {
820 return true;
821 }
822 if in_inclusive_range8(second, 0xAC, 0xB7) {
823 if second == 0xAC {
824 if third > 0x9C {
825 return true;
826 }
827 } else {
828 return true;
829 }
830 } else if in_inclusive_range8(second, 0xB9, 0xBB) {
831 if second == 0xB9 {
832 if third > 0xAF {
833 return true;
834 }
835 } else if second == 0xBB {
836 if third != 0xBF {
837 return true;
838 }
839 } else {
840 return true;
841 }
842 }
843 read += 3;
844 }
845 0xE0 => {
846 let second = unsafe { *(src.get_unchecked(read + 1)) };
848 let third = unsafe { *(src.get_unchecked(read + 2)) };
849 if ((UTF8_DATA.table[usize::from(second)]
850 & unsafe {
851 *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80))
852 })
853 | (third >> 6))
854 != 2
855 {
856 return true;
857 }
858 if second < 0xA4 {
860 return true;
861 }
862 read += 3;
863 }
864 0xED => {
865 let second = unsafe { *(src.get_unchecked(read + 1)) };
867 let third = unsafe { *(src.get_unchecked(read + 2)) };
868 if ((UTF8_DATA.table[usize::from(second)]
869 & unsafe {
870 *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80))
871 })
872 | (third >> 6))
873 != 2
874 {
875 return true;
876 }
877 read += 3;
878 }
879 0xF1..=0xF4 => {
880 let second = unsafe { *(src.get_unchecked(read + 1)) };
882 let third = unsafe { *(src.get_unchecked(read + 2)) };
883 let fourth = unsafe { *(src.get_unchecked(read + 3)) };
884 if (u16::from(
885 UTF8_DATA.table[usize::from(second)]
886 & unsafe {
887 *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80))
888 },
889 ) | u16::from(third >> 6)
890 | (u16::from(fourth & 0xC0) << 2))
891 != 0x202
892 {
893 return true;
894 }
895 read += 4;
896 }
897 0xF0 => {
898 let second = unsafe { *(src.get_unchecked(read + 1)) };
900 let third = unsafe { *(src.get_unchecked(read + 2)) };
901 let fourth = unsafe { *(src.get_unchecked(read + 3)) };
902 if (u16::from(
903 UTF8_DATA.table[usize::from(second)]
904 & unsafe {
905 *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80))
906 },
907 ) | u16::from(third >> 6)
908 | (u16::from(fourth & 0xC0) << 2))
909 != 0x202
910 {
911 return true;
912 }
913 if unsafe { unlikely(second == 0x90 || second == 0x9E) } {
914 let third = src[read + 2];
915 if third >= 0xA0 {
916 return true;
917 }
918 }
919 read += 4;
920 }
921 _ => {
922 return true;
924 }
925 }
926 if read + 4 > src.len() {
927 if read == src.len() {
928 return false;
929 }
930 byte = src[read];
931 break 'inner;
932 }
933 byte = src[read];
934 continue 'inner;
935 }
936 }
937 match byte {
942 0..=0x7F => {
943 read += 1;
945 src = &src[read..];
946 continue 'outer;
947 }
948 0xC2..=0xD5 => {
949 let new_read = read + 2;
951 if new_read > src.len() {
952 return true;
953 }
954 let second = unsafe { *(src.get_unchecked(read + 1)) };
955 if !in_inclusive_range8(second, 0x80, 0xBF) {
956 return true;
957 }
958 read = new_read;
959 src = &src[read..];
962 continue 'outer;
963 }
964 0xD6 => {
965 let new_read = read + 2;
967 if new_read > src.len() {
968 return true;
969 }
970 let second = unsafe { *(src.get_unchecked(read + 1)) };
971 if !in_inclusive_range8(second, 0x80, 0xBF) {
972 return true;
973 }
974 if second > 0x8F {
976 return true;
977 }
978 read = new_read;
979 src = &src[read..];
982 continue 'outer;
983 }
984 0xE1 | 0xE3..=0xEC | 0xEE => {
986 let new_read = read + 3;
988 if new_read > src.len() {
989 return true;
990 }
991 let second = unsafe { *(src.get_unchecked(read + 1)) };
992 let third = unsafe { *(src.get_unchecked(read + 2)) };
993 if ((UTF8_DATA.table[usize::from(second)]
994 & unsafe { *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80)) })
995 | (third >> 6))
996 != 2
997 {
998 return true;
999 }
1000 }
1001 0xE2 => {
1002 let new_read = read + 3;
1004 if new_read > src.len() {
1005 return true;
1006 }
1007 let second = unsafe { *(src.get_unchecked(read + 1)) };
1008 let third = unsafe { *(src.get_unchecked(read + 2)) };
1009 if ((UTF8_DATA.table[usize::from(second)]
1010 & unsafe { *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80)) })
1011 | (third >> 6))
1012 != 2
1013 {
1014 return true;
1015 }
1016 if second == 0x80 {
1017 if third == 0x8F || third == 0xAB || third == 0xAE {
1018 return true;
1019 }
1020 } else if second == 0x81 {
1021 if third == 0xA7 {
1022 return true;
1023 }
1024 }
1025 }
1026 0xEF => {
1027 let new_read = read + 3;
1029 if new_read > src.len() {
1030 return true;
1031 }
1032 let second = unsafe { *(src.get_unchecked(read + 1)) };
1033 let third = unsafe { *(src.get_unchecked(read + 2)) };
1034 if ((UTF8_DATA.table[usize::from(second)]
1035 & unsafe { *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80)) })
1036 | (third >> 6))
1037 != 2
1038 {
1039 return true;
1040 }
1041 if in_inclusive_range8(second, 0xAC, 0xB7) {
1042 if second == 0xAC {
1043 if third > 0x9C {
1044 return true;
1045 }
1046 } else {
1047 return true;
1048 }
1049 } else if in_inclusive_range8(second, 0xB9, 0xBB) {
1050 if second == 0xB9 {
1051 if third > 0xAF {
1052 return true;
1053 }
1054 } else if second == 0xBB {
1055 if third != 0xBF {
1056 return true;
1057 }
1058 } else {
1059 return true;
1060 }
1061 }
1062 }
1063 0xE0 => {
1064 let new_read = read + 3;
1066 if new_read > src.len() {
1067 return true;
1068 }
1069 let second = unsafe { *(src.get_unchecked(read + 1)) };
1070 let third = unsafe { *(src.get_unchecked(read + 2)) };
1071 if ((UTF8_DATA.table[usize::from(second)]
1072 & unsafe { *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80)) })
1073 | (third >> 6))
1074 != 2
1075 {
1076 return true;
1077 }
1078 if second < 0xA4 {
1080 return true;
1081 }
1082 }
1083 0xED => {
1084 let new_read = read + 3;
1086 if new_read > src.len() {
1087 return true;
1088 }
1089 let second = unsafe { *(src.get_unchecked(read + 1)) };
1090 let third = unsafe { *(src.get_unchecked(read + 2)) };
1091 if ((UTF8_DATA.table[usize::from(second)]
1092 & unsafe { *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80)) })
1093 | (third >> 6))
1094 != 2
1095 {
1096 return true;
1097 }
1098 }
1099 _ => {
1100 return true;
1102 }
1103 }
1104 return false;
1105 } else {
1106 return false;
1107 }
1108 }
1109}
1110
1111#[cfg_attr(feature = "cargo-clippy", allow(collapsible_if))]
1124#[inline]
1125pub fn is_str_bidi(buffer: &str) -> bool {
1126 let mut bytes = buffer.as_bytes();
1156 'outer: loop {
1157 if let Some((mut byte, mut read)) = validate_ascii(bytes) {
1164 'inner: loop {
1165 if byte < 0xE0 {
1167 if byte >= 0x80 {
1168 if unsafe { unlikely(byte >= 0xD6) } {
1172 if byte == 0xD6 {
1173 let second = bytes[read + 1];
1174 if second > 0x8F {
1175 return true;
1176 }
1177 } else {
1178 return true;
1179 }
1180 }
1181 read += 2;
1182 } else {
1183 read += 1;
1185 bytes = &bytes[read..];
1191 continue 'outer;
1192 }
1193 } else if byte < 0xF0 {
1194 if unsafe { unlikely(!in_inclusive_range8(byte, 0xE3, 0xEE) && byte != 0xE1) } {
1196 let second = bytes[read + 1];
1197 if byte == 0xE0 {
1198 if second < 0xA4 {
1199 return true;
1200 }
1201 } else if byte == 0xE2 {
1202 let third = bytes[read + 2];
1203 if second == 0x80 {
1204 if third == 0x8F || third == 0xAB || third == 0xAE {
1205 return true;
1206 }
1207 } else if second == 0x81 {
1208 if third == 0xA7 {
1209 return true;
1210 }
1211 }
1212 } else {
1213 debug_assert_eq!(byte, 0xEF);
1214 if in_inclusive_range8(second, 0xAC, 0xB7) {
1215 if second == 0xAC {
1216 let third = bytes[read + 2];
1217 if third > 0x9C {
1218 return true;
1219 }
1220 } else {
1221 return true;
1222 }
1223 } else if in_inclusive_range8(second, 0xB9, 0xBB) {
1224 if second == 0xB9 {
1225 let third = bytes[read + 2];
1226 if third > 0xAF {
1227 return true;
1228 }
1229 } else if second == 0xBB {
1230 let third = bytes[read + 2];
1231 if third != 0xBF {
1232 return true;
1233 }
1234 } else {
1235 return true;
1236 }
1237 }
1238 }
1239 }
1240 read += 3;
1241 } else {
1242 let second = bytes[read + 1];
1244 if unsafe { unlikely(byte == 0xF0 && (second == 0x90 || second == 0x9E)) } {
1245 let third = bytes[read + 2];
1246 if third >= 0xA0 {
1247 return true;
1248 }
1249 }
1250 read += 4;
1251 }
1252 if read >= bytes.len() {
1256 return false;
1257 }
1258 byte = bytes[read];
1259 continue 'inner;
1260 }
1261 } else {
1262 return false;
1263 }
1264 }
1265}
1266
1267pub fn is_utf16_bidi(buffer: &[u16]) -> bool {
1285 is_utf16_bidi_impl(buffer)
1286}
1287
1288#[inline(always)]
1300pub fn is_char_bidi(c: char) -> bool {
1301 let code_point = u32::from(c);
1321 if code_point < 0x0590 {
1322 return false;
1324 }
1325 if in_range32(code_point, 0x0900, 0xFB1D) {
1326 if in_inclusive_range32(code_point, 0x200F, 0x2067) {
1328 return code_point == 0x200F
1330 || code_point == 0x202B
1331 || code_point == 0x202E
1332 || code_point == 0x2067;
1333 }
1334 return false;
1335 }
1336 if code_point > 0x1EFFF {
1337 return false;
1339 }
1340 if in_range32(code_point, 0x11000, 0x1E800) {
1341 return false;
1343 }
1344 if in_range32(code_point, 0xFEFF, 0x10800) {
1345 return false;
1348 }
1349 if in_range32(code_point, 0xFE00, 0xFE70) {
1350 return false;
1352 }
1353 true
1354}
1355
1356#[inline(always)]
1375pub fn is_utf16_code_unit_bidi(u: u16) -> bool {
1376 if u < 0x0590 {
1377 return false;
1379 }
1380 if in_range16(u, 0x0900, 0xD802) {
1381 if in_inclusive_range16(u, 0x200F, 0x2067) {
1383 return u == 0x200F || u == 0x202B || u == 0x202E || u == 0x2067;
1385 }
1386 return false;
1387 }
1388 if in_range16(u, 0xD83C, 0xFB1D) {
1389 return false;
1392 }
1393 if in_range16(u, 0xD804, 0xD83A) {
1394 return false;
1396 }
1397 if u > 0xFEFE {
1398 return false;
1400 }
1401 if in_range16(u, 0xFE00, 0xFE70) {
1402 return false;
1404 }
1405 true
1406}
1407
1408pub fn check_utf8_for_latin1_and_bidi(buffer: &[u8]) -> Latin1Bidi {
1417 if let Some(offset) = is_utf8_latin1_impl(buffer) {
1418 if is_utf8_bidi(&buffer[offset..]) {
1419 Latin1Bidi::Bidi
1420 } else {
1421 Latin1Bidi::LeftToRight
1422 }
1423 } else {
1424 Latin1Bidi::Latin1
1425 }
1426}
1427
1428pub fn check_str_for_latin1_and_bidi(buffer: &str) -> Latin1Bidi {
1437 if let Some(offset) = is_str_latin1_impl(buffer) {
1440 if is_str_bidi(&buffer[offset..]) {
1441 Latin1Bidi::Bidi
1442 } else {
1443 Latin1Bidi::LeftToRight
1444 }
1445 } else {
1446 Latin1Bidi::Latin1
1447 }
1448}
1449
1450pub fn check_utf16_for_latin1_and_bidi(buffer: &[u16]) -> Latin1Bidi {
1459 check_utf16_for_latin1_and_bidi_impl(buffer)
1460}
1461
1462pub fn convert_utf8_to_utf16(src: &[u8], dst: &mut [u16]) -> usize {
1474 assert!(dst.len() > src.len());
1477 let mut decoder = Utf8Decoder::new_inner();
1478 let mut total_read = 0usize;
1479 let mut total_written = 0usize;
1480 loop {
1481 let (result, read, written) =
1482 decoder.decode_to_utf16_raw(&src[total_read..], &mut dst[total_written..], true);
1483 total_read += read;
1484 total_written += written;
1485 match result {
1486 DecoderResult::InputEmpty => {
1487 return total_written;
1488 }
1489 DecoderResult::OutputFull => {
1490 unreachable!("The assert at the top of the function should have caught this.");
1491 }
1492 DecoderResult::Malformed(_, _) => {
1493 dst[total_written] = 0xFFFD;
1496 total_written += 1;
1497 }
1498 }
1499 }
1500}
1501
1502pub fn convert_str_to_utf16(src: &str, dst: &mut [u16]) -> usize {
1513 assert!(
1514 dst.len() >= src.len(),
1515 "Destination must not be shorter than the source."
1516 );
1517 let bytes = src.as_bytes();
1518 let mut read = 0;
1519 let mut written = 0;
1520 'outer: loop {
1521 let mut byte = {
1522 let src_remaining = &bytes[read..];
1523 let dst_remaining = &mut dst[written..];
1524 let length = src_remaining.len();
1525 match unsafe {
1526 ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
1527 } {
1528 None => {
1529 written += length;
1530 return written;
1531 }
1532 Some((non_ascii, consumed)) => {
1533 read += consumed;
1534 written += consumed;
1535 non_ascii
1536 }
1537 }
1538 };
1539 'inner: loop {
1540 if byte < 0xE0 {
1542 if byte >= 0x80 {
1543 let second = unsafe { *(bytes.get_unchecked(read + 1)) };
1545 let point = ((u16::from(byte) & 0x1F) << 6) | (u16::from(second) & 0x3F);
1546 unsafe { *(dst.get_unchecked_mut(written)) = point };
1547 read += 2;
1548 written += 1;
1549 } else {
1550 unsafe { *(dst.get_unchecked_mut(written)) = u16::from(byte) };
1552 read += 1;
1553 written += 1;
1554 continue 'outer;
1560 }
1561 } else if byte < 0xF0 {
1562 let second = unsafe { *(bytes.get_unchecked(read + 1)) };
1564 let third = unsafe { *(bytes.get_unchecked(read + 2)) };
1565 let point = ((u16::from(byte) & 0xF) << 12)
1566 | ((u16::from(second) & 0x3F) << 6)
1567 | (u16::from(third) & 0x3F);
1568 unsafe { *(dst.get_unchecked_mut(written)) = point };
1569 read += 3;
1570 written += 1;
1571 } else {
1572 let second = unsafe { *(bytes.get_unchecked(read + 1)) };
1574 let third = unsafe { *(bytes.get_unchecked(read + 2)) };
1575 let fourth = unsafe { *(bytes.get_unchecked(read + 3)) };
1576 let point = ((u32::from(byte) & 0x7) << 18)
1577 | ((u32::from(second) & 0x3F) << 12)
1578 | ((u32::from(third) & 0x3F) << 6)
1579 | (u32::from(fourth) & 0x3F);
1580 unsafe { *(dst.get_unchecked_mut(written)) = (0xD7C0 + (point >> 10)) as u16 };
1581 unsafe {
1582 *(dst.get_unchecked_mut(written + 1)) = (0xDC00 + (point & 0x3FF)) as u16
1583 };
1584 read += 4;
1585 written += 2;
1586 }
1587 if read >= src.len() {
1591 return written;
1592 }
1593 byte = bytes[read];
1594 continue 'inner;
1595 }
1596 }
1597}
1598
1599pub fn convert_utf8_to_utf16_without_replacement(src: &[u8], dst: &mut [u16]) -> Option<usize> {
1612 assert!(
1613 dst.len() >= src.len(),
1614 "Destination must not be shorter than the source."
1615 );
1616 let (read, written) = convert_utf8_to_utf16_up_to_invalid(src, dst);
1617 if read == src.len() {
1618 return Some(written);
1619 }
1620 None
1621}
1622
1623#[inline(always)]
1649pub fn convert_utf16_to_utf8_partial(src: &[u16], dst: &mut [u8]) -> (usize, usize) {
1650 let (read, written) = convert_utf16_to_utf8_partial_inner(src, dst);
1658 if unsafe { likely(read == src.len()) } {
1659 return (read, written);
1660 }
1661 let (tail_read, tail_written) =
1662 convert_utf16_to_utf8_partial_tail(&src[read..], &mut dst[written..]);
1663 (read + tail_read, written + tail_written)
1664}
1665
1666#[inline(always)]
1684pub fn convert_utf16_to_utf8(src: &[u16], dst: &mut [u8]) -> usize {
1685 assert!(dst.len() >= src.len() * 3);
1686 let (read, written) = convert_utf16_to_utf8_partial(src, dst);
1687 debug_assert_eq!(read, src.len());
1688 written
1689}
1690
1691pub fn convert_utf16_to_str_partial(src: &[u16], dst: &mut str) -> (usize, usize) {
1705 let bytes: &mut [u8] = unsafe { dst.as_bytes_mut() };
1706 let (read, written) = convert_utf16_to_utf8_partial(src, bytes);
1707 let len = bytes.len();
1708 let mut trail = written;
1709 while trail < len && ((bytes[trail] & 0xC0) == 0x80) {
1710 bytes[trail] = 0;
1711 trail += 1;
1712 }
1713 (read, written)
1714}
1715
1716#[inline(always)]
1729pub fn convert_utf16_to_str(src: &[u16], dst: &mut str) -> usize {
1730 assert!(dst.len() >= src.len() * 3);
1731 let (read, written) = convert_utf16_to_str_partial(src, dst);
1732 debug_assert_eq!(read, src.len());
1733 written
1734}
1735
1736pub fn convert_latin1_to_utf16(src: &[u8], dst: &mut [u16]) {
1748 assert!(
1749 dst.len() >= src.len(),
1750 "Destination must not be shorter than the source."
1751 );
1752 unsafe {
1756 unpack_latin1(src.as_ptr(), dst.as_mut_ptr(), src.len());
1757 }
1758}
1759
1760pub fn convert_latin1_to_utf8_partial(src: &[u8], dst: &mut [u8]) -> (usize, usize) {
1774 let src_len = src.len();
1775 let src_ptr = src.as_ptr();
1776 let dst_ptr = dst.as_mut_ptr();
1777 let dst_len = dst.len();
1778 let mut total_read = 0usize;
1779 let mut total_written = 0usize;
1780 loop {
1781 let src_left = src_len - total_read;
1783 let dst_left = dst_len - total_written;
1784 let min_left = ::std::cmp::min(src_left, dst_left);
1785 if let Some((non_ascii, consumed)) = unsafe {
1786 ascii_to_ascii(
1787 src_ptr.add(total_read),
1788 dst_ptr.add(total_written),
1789 min_left,
1790 )
1791 } {
1792 total_read += consumed;
1793 total_written += consumed;
1794 if total_written.checked_add(2).unwrap() > dst_len {
1795 return (total_read, total_written);
1796 }
1797
1798 total_read += 1; dst[total_written] = (non_ascii >> 6) | 0xC0;
1801 total_written += 1;
1802 dst[total_written] = (non_ascii & 0x3F) | 0x80;
1803 total_written += 1;
1804 continue;
1805 }
1806 return (total_read + min_left, total_written + min_left);
1807 }
1808}
1809
1810#[inline]
1829pub fn convert_latin1_to_utf8(src: &[u8], dst: &mut [u8]) -> usize {
1830 assert!(
1831 dst.len() >= src.len() * 2,
1832 "Destination must not be shorter than the source times two."
1833 );
1834 let (read, written) = convert_latin1_to_utf8_partial(src, dst);
1835 debug_assert_eq!(read, src.len());
1836 written
1837}
1838
1839#[inline]
1848pub fn convert_latin1_to_str_partial(src: &[u8], dst: &mut str) -> (usize, usize) {
1849 let bytes: &mut [u8] = unsafe { dst.as_bytes_mut() };
1850 let (read, written) = convert_latin1_to_utf8_partial(src, bytes);
1851 let len = bytes.len();
1852 let mut trail = written;
1853 let max = ::std::cmp::min(len, trail + MAX_STRIDE_SIZE);
1854 while trail < max {
1855 bytes[trail] = 0;
1856 trail += 1;
1857 }
1858 while trail < len && ((bytes[trail] & 0xC0) == 0x80) {
1859 bytes[trail] = 0;
1860 trail += 1;
1861 }
1862 (read, written)
1863}
1864
1865#[inline]
1878pub fn convert_latin1_to_str(src: &[u8], dst: &mut str) -> usize {
1879 assert!(
1880 dst.len() >= src.len() * 2,
1881 "Destination must not be shorter than the source times two."
1882 );
1883 let (read, written) = convert_latin1_to_str_partial(src, dst);
1884 debug_assert_eq!(read, src.len());
1885 written
1886}
1887
1888pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize {
1912 assert!(
1913 dst.len() >= src.len(),
1914 "Destination must not be shorter than the source."
1915 );
1916 non_fuzz_debug_assert!(is_utf8_latin1(src));
1917 let src_len = src.len();
1918 let src_ptr = src.as_ptr();
1919 let dst_ptr = dst.as_mut_ptr();
1920 let mut total_read = 0usize;
1921 let mut total_written = 0usize;
1922 loop {
1923 let src_left = src_len - total_read;
1925 if let Some((non_ascii, consumed)) = unsafe {
1926 ascii_to_ascii(
1927 src_ptr.add(total_read),
1928 dst_ptr.add(total_written),
1929 src_left,
1930 )
1931 } {
1932 total_read += consumed + 1;
1933 total_written += consumed;
1934
1935 if total_read == src_len {
1936 return total_written;
1937 }
1938
1939 let trail = src[total_read];
1940 total_read += 1;
1941
1942 dst[total_written] = ((non_ascii & 0x1F) << 6) | (trail & 0x3F);
1943 total_written += 1;
1944 continue;
1945 }
1946 return total_written + src_left;
1947 }
1948}
1949
1950pub fn convert_utf16_to_latin1_lossy(src: &[u16], dst: &mut [u8]) {
1974 assert!(
1975 dst.len() >= src.len(),
1976 "Destination must not be shorter than the source."
1977 );
1978 unsafe {
1980 pack_latin1(src.as_ptr(), dst.as_mut_ptr(), src.len());
1981 }
1982}
1983
1984pub fn decode_latin1<'a>(bytes: &'a [u8]) -> Cow<'a, str> {
1990 let up_to = ascii_valid_up_to(bytes);
1991 if up_to >= bytes.len() {
1993 debug_assert_eq!(up_to, bytes.len());
1994 let s: &str = unsafe { ::std::str::from_utf8_unchecked(bytes) };
1995 return Cow::Borrowed(s);
1996 }
1997 let (head, tail) = bytes.split_at(up_to);
1998 let capacity = head.len() + tail.len() * 2;
1999 let mut vec = Vec::with_capacity(capacity);
2000 unsafe {
2001 vec.set_len(capacity);
2002 }
2003 (&mut vec[..up_to]).copy_from_slice(head);
2004 let written = convert_latin1_to_utf8(tail, &mut vec[up_to..]);
2005 vec.truncate(up_to + written);
2006 Cow::Owned(unsafe { String::from_utf8_unchecked(vec) })
2007}
2008
2009pub fn encode_latin1_lossy<'a>(string: &'a str) -> Cow<'a, [u8]> {
2024 let bytes = string.as_bytes();
2025 let up_to = ascii_valid_up_to(bytes);
2026 if up_to >= bytes.len() {
2028 debug_assert_eq!(up_to, bytes.len());
2029 return Cow::Borrowed(bytes);
2030 }
2031 let (head, tail) = bytes.split_at(up_to);
2032 let capacity = bytes.len();
2033 let mut vec = Vec::with_capacity(capacity);
2034 unsafe {
2035 vec.set_len(capacity);
2036 }
2037 (&mut vec[..up_to]).copy_from_slice(head);
2038 let written = convert_utf8_to_latin1_lossy(tail, &mut vec[up_to..]);
2039 vec.truncate(up_to + written);
2040 Cow::Owned(vec)
2041}
2042
2043pub fn utf16_valid_up_to(buffer: &[u16]) -> usize {
2046 utf16_valid_up_to_impl(buffer)
2047}
2048
2049pub fn utf8_latin1_up_to(buffer: &[u8]) -> usize {
2053 is_utf8_latin1_impl(buffer).unwrap_or(buffer.len())
2054}
2055
2056pub fn str_latin1_up_to(buffer: &str) -> usize {
2059 is_str_latin1_impl(buffer).unwrap_or(buffer.len())
2060}
2061
2062#[inline]
2064pub fn ensure_utf16_validity(buffer: &mut [u16]) {
2065 let mut offset = 0;
2066 loop {
2067 offset += utf16_valid_up_to(&buffer[offset..]);
2068 if offset == buffer.len() {
2069 return;
2070 }
2071 buffer[offset] = 0xFFFD;
2072 offset += 1;
2073 }
2074}
2075
2076pub fn copy_ascii_to_ascii(src: &[u8], dst: &mut [u8]) -> usize {
2088 assert!(
2089 dst.len() >= src.len(),
2090 "Destination must not be shorter than the source."
2091 );
2092 if let Some((_, consumed)) =
2093 unsafe { ascii_to_ascii(src.as_ptr(), dst.as_mut_ptr(), src.len()) }
2094 {
2095 consumed
2096 } else {
2097 src.len()
2098 }
2099}
2100
2101pub fn copy_ascii_to_basic_latin(src: &[u8], dst: &mut [u16]) -> usize {
2114 assert!(
2115 dst.len() >= src.len(),
2116 "Destination must not be shorter than the source."
2117 );
2118 if let Some((_, consumed)) =
2119 unsafe { ascii_to_basic_latin(src.as_ptr(), dst.as_mut_ptr(), src.len()) }
2120 {
2121 consumed
2122 } else {
2123 src.len()
2124 }
2125}
2126
2127pub fn copy_basic_latin_to_ascii(src: &[u16], dst: &mut [u8]) -> usize {
2140 assert!(
2141 dst.len() >= src.len(),
2142 "Destination must not be shorter than the source."
2143 );
2144 if let Some((_, consumed)) =
2145 unsafe { basic_latin_to_ascii(src.as_ptr(), dst.as_mut_ptr(), src.len()) }
2146 {
2147 consumed
2148 } else {
2149 src.len()
2150 }
2151}
2152
2153#[cfg(test)]
2157mod tests {
2158 use super::*;
2159
2160 #[test]
2161 fn test_is_ascii_success() {
2162 let mut src: Vec<u8> = Vec::with_capacity(128);
2163 src.resize(128, 0);
2164 for i in 0..src.len() {
2165 src[i] = i as u8;
2166 }
2167 for i in 0..src.len() {
2168 assert!(is_ascii(&src[i..]));
2169 }
2170 }
2171
2172 #[test]
2173 fn test_is_ascii_fail() {
2174 let mut src: Vec<u8> = Vec::with_capacity(128);
2175 src.resize(128, 0);
2176 for i in 0..src.len() {
2177 src[i] = i as u8;
2178 }
2179 for i in 0..src.len() {
2180 let tail = &mut src[i..];
2181 for j in 0..tail.len() {
2182 tail[j] = 0xA0;
2183 assert!(!is_ascii(tail));
2184 }
2185 }
2186 }
2187
2188 #[test]
2189 fn test_is_basic_latin_success() {
2190 let mut src: Vec<u16> = Vec::with_capacity(128);
2191 src.resize(128, 0);
2192 for i in 0..src.len() {
2193 src[i] = i as u16;
2194 }
2195 for i in 0..src.len() {
2196 assert!(is_basic_latin(&src[i..]));
2197 }
2198 }
2199
2200 #[test]
2201 fn test_is_basic_latin_fail() {
2202 let mut src: Vec<u16> = Vec::with_capacity(128);
2203 src.resize(128, 0);
2204 for i in 0..src.len() {
2205 src[i] = i as u16;
2206 }
2207 for i in 0..src.len() {
2208 let tail = &mut src[i..];
2209 for j in 0..tail.len() {
2210 tail[j] = 0xA0;
2211 assert!(!is_basic_latin(tail));
2212 }
2213 }
2214 }
2215
2216 #[test]
2217 fn test_is_utf16_latin1_success() {
2218 let mut src: Vec<u16> = Vec::with_capacity(256);
2219 src.resize(256, 0);
2220 for i in 0..src.len() {
2221 src[i] = i as u16;
2222 }
2223 for i in 0..src.len() {
2224 assert!(is_utf16_latin1(&src[i..]));
2225 assert_eq!(
2226 check_utf16_for_latin1_and_bidi(&src[i..]),
2227 Latin1Bidi::Latin1
2228 );
2229 }
2230 }
2231
2232 #[test]
2233 fn test_is_utf16_latin1_fail() {
2234 let len = if cfg!(miri) { 64 } else { 256 }; let mut src: Vec<u16> = Vec::with_capacity(len);
2236 src.resize(len, 0);
2237 for i in 0..src.len() {
2238 src[i] = i as u16;
2239 }
2240 for i in 0..src.len() {
2241 let tail = &mut src[i..];
2242 for j in 0..tail.len() {
2243 tail[j] = 0x100 + j as u16;
2244 assert!(!is_utf16_latin1(tail));
2245 assert_ne!(check_utf16_for_latin1_and_bidi(tail), Latin1Bidi::Latin1);
2246 }
2247 }
2248 }
2249
2250 #[test]
2251 fn test_is_str_latin1_success() {
2252 let len = if cfg!(miri) { 64 } else { 256 }; let mut src: Vec<u16> = Vec::with_capacity(len);
2254 src.resize(len, 0);
2255 for i in 0..src.len() {
2256 src[i] = i as u16;
2257 }
2258 for i in 0..src.len() {
2259 let s = String::from_utf16(&src[i..]).unwrap();
2260 assert!(is_str_latin1(&s[..]));
2261 assert_eq!(check_str_for_latin1_and_bidi(&s[..]), Latin1Bidi::Latin1);
2262 }
2263 }
2264
2265 #[test]
2266 fn test_is_str_latin1_fail() {
2267 let len = if cfg!(miri) { 32 } else { 256 }; let mut src: Vec<u16> = Vec::with_capacity(len);
2269 src.resize(len, 0);
2270 for i in 0..src.len() {
2271 src[i] = i as u16;
2272 }
2273 for i in 0..src.len() {
2274 let tail = &mut src[i..];
2275 for j in 0..tail.len() {
2276 tail[j] = 0x100 + j as u16;
2277 let s = String::from_utf16(tail).unwrap();
2278 assert!(!is_str_latin1(&s[..]));
2279 assert_ne!(check_str_for_latin1_and_bidi(&s[..]), Latin1Bidi::Latin1);
2280 }
2281 }
2282 }
2283
2284 #[test]
2285 fn test_is_utf8_latin1_success() {
2286 let len = if cfg!(miri) { 64 } else { 256 }; let mut src: Vec<u16> = Vec::with_capacity(len);
2288 src.resize(len, 0);
2289 for i in 0..src.len() {
2290 src[i] = i as u16;
2291 }
2292 for i in 0..src.len() {
2293 let s = String::from_utf16(&src[i..]).unwrap();
2294 assert!(is_utf8_latin1(s.as_bytes()));
2295 assert_eq!(
2296 check_utf8_for_latin1_and_bidi(s.as_bytes()),
2297 Latin1Bidi::Latin1
2298 );
2299 }
2300 }
2301
2302 #[test]
2303 fn test_is_utf8_latin1_fail() {
2304 let len = if cfg!(miri) { 32 } else { 256 }; let mut src: Vec<u16> = Vec::with_capacity(len);
2306 src.resize(len, 0);
2307 for i in 0..src.len() {
2308 src[i] = i as u16;
2309 }
2310 for i in 0..src.len() {
2311 let tail = &mut src[i..];
2312 for j in 0..tail.len() {
2313 tail[j] = 0x100 + j as u16;
2314 let s = String::from_utf16(tail).unwrap();
2315 assert!(!is_utf8_latin1(s.as_bytes()));
2316 assert_ne!(
2317 check_utf8_for_latin1_and_bidi(s.as_bytes()),
2318 Latin1Bidi::Latin1
2319 );
2320 }
2321 }
2322 }
2323
2324 #[test]
2325 fn test_is_utf8_latin1_invalid() {
2326 assert!(!is_utf8_latin1(b"\xC3"));
2327 assert!(!is_utf8_latin1(b"a\xC3"));
2328 assert!(!is_utf8_latin1(b"\xFF"));
2329 assert!(!is_utf8_latin1(b"a\xFF"));
2330 assert!(!is_utf8_latin1(b"\xC3\xFF"));
2331 assert!(!is_utf8_latin1(b"a\xC3\xFF"));
2332 }
2333
2334 #[test]
2335 fn test_convert_utf8_to_utf16() {
2336 let src = "abcdefghijklmnopqrstu\u{1F4A9}v\u{2603}w\u{00B6}xyzz";
2337 let mut dst: Vec<u16> = Vec::with_capacity(src.len() + 1);
2338 dst.resize(src.len() + 1, 0);
2339 let len = convert_utf8_to_utf16(src.as_bytes(), &mut dst[..]);
2340 dst.truncate(len);
2341 let reference: Vec<u16> = src.encode_utf16().collect();
2342 assert_eq!(dst, reference);
2343 }
2344
2345 #[test]
2346 fn test_convert_str_to_utf16() {
2347 let src = "abcdefghijklmnopqrstu\u{1F4A9}v\u{2603}w\u{00B6}xyzz";
2348 let mut dst: Vec<u16> = Vec::with_capacity(src.len());
2349 dst.resize(src.len(), 0);
2350 let len = convert_str_to_utf16(src, &mut dst[..]);
2351 dst.truncate(len);
2352 let reference: Vec<u16> = src.encode_utf16().collect();
2353 assert_eq!(dst, reference);
2354 }
2355
2356 #[test]
2357 fn test_convert_utf16_to_utf8_partial() {
2358 let reference = "abcdefghijklmnopqrstu\u{1F4A9}v\u{2603}w\u{00B6}xyzz";
2359 let src: Vec<u16> = reference.encode_utf16().collect();
2360 let mut dst: Vec<u8> = Vec::with_capacity(src.len() * 3 + 1);
2361 dst.resize(src.len() * 3 + 1, 0);
2362 let (read, written) = convert_utf16_to_utf8_partial(&src[..], &mut dst[..24]);
2363 let len = written + convert_utf16_to_utf8(&src[read..], &mut dst[written..]);
2364 dst.truncate(len);
2365 assert_eq!(dst, reference.as_bytes());
2366 }
2367
2368 #[test]
2369 fn test_convert_utf16_to_utf8() {
2370 let reference = "abcdefghijklmnopqrstu\u{1F4A9}v\u{2603}w\u{00B6}xyzz";
2371 let src: Vec<u16> = reference.encode_utf16().collect();
2372 let mut dst: Vec<u8> = Vec::with_capacity(src.len() * 3 + 1);
2373 dst.resize(src.len() * 3 + 1, 0);
2374 let len = convert_utf16_to_utf8(&src[..], &mut dst[..]);
2375 dst.truncate(len);
2376 assert_eq!(dst, reference.as_bytes());
2377 }
2378
2379 #[test]
2380 fn test_convert_latin1_to_utf16() {
2381 let mut src: Vec<u8> = Vec::with_capacity(256);
2382 src.resize(256, 0);
2383 let mut reference: Vec<u16> = Vec::with_capacity(256);
2384 reference.resize(256, 0);
2385 for i in 0..256 {
2386 src[i] = i as u8;
2387 reference[i] = i as u16;
2388 }
2389 let mut dst: Vec<u16> = Vec::with_capacity(src.len());
2390 dst.resize(src.len(), 0);
2391 convert_latin1_to_utf16(&src[..], &mut dst[..]);
2392 assert_eq!(dst, reference);
2393 }
2394
2395 #[test]
2396 fn test_convert_latin1_to_utf8_partial() {
2397 let mut dst = [0u8, 2];
2398 let (read, written) = convert_latin1_to_utf8_partial(b"a\xFF", &mut dst[..]);
2399 assert_eq!(read, 1);
2400 assert_eq!(written, 1);
2401 }
2402
2403 #[test]
2404 fn test_convert_latin1_to_utf8() {
2405 let mut src: Vec<u8> = Vec::with_capacity(256);
2406 src.resize(256, 0);
2407 let mut reference: Vec<u16> = Vec::with_capacity(256);
2408 reference.resize(256, 0);
2409 for i in 0..256 {
2410 src[i] = i as u8;
2411 reference[i] = i as u16;
2412 }
2413 let s = String::from_utf16(&reference[..]).unwrap();
2414 let mut dst: Vec<u8> = Vec::with_capacity(src.len() * 2);
2415 dst.resize(src.len() * 2, 0);
2416 let len = convert_latin1_to_utf8(&src[..], &mut dst[..]);
2417 dst.truncate(len);
2418 assert_eq!(&dst[..], s.as_bytes());
2419 }
2420
2421 #[test]
2422 fn test_convert_utf8_to_latin1_lossy() {
2423 let mut reference: Vec<u8> = Vec::with_capacity(256);
2424 reference.resize(256, 0);
2425 let mut src16: Vec<u16> = Vec::with_capacity(256);
2426 src16.resize(256, 0);
2427 for i in 0..256 {
2428 src16[i] = i as u16;
2429 reference[i] = i as u8;
2430 }
2431 let src = String::from_utf16(&src16[..]).unwrap();
2432 let mut dst: Vec<u8> = Vec::with_capacity(src.len());
2433 dst.resize(src.len(), 0);
2434 let len = convert_utf8_to_latin1_lossy(src.as_bytes(), &mut dst[..]);
2435 dst.truncate(len);
2436 assert_eq!(dst, reference);
2437 }
2438
2439 #[cfg(all(debug_assertions, not(fuzzing)))]
2440 #[test]
2441 #[should_panic]
2442 fn test_convert_utf8_to_latin1_lossy_panics() {
2443 let mut dst = [0u8; 16];
2444 let _ = convert_utf8_to_latin1_lossy("\u{100}".as_bytes(), &mut dst[..]);
2445 }
2446
2447 #[test]
2448 fn test_convert_utf16_to_latin1_lossy() {
2449 let mut src: Vec<u16> = Vec::with_capacity(256);
2450 src.resize(256, 0);
2451 let mut reference: Vec<u8> = Vec::with_capacity(256);
2452 reference.resize(256, 0);
2453 for i in 0..256 {
2454 src[i] = i as u16;
2455 reference[i] = i as u8;
2456 }
2457 let mut dst: Vec<u8> = Vec::with_capacity(src.len());
2458 dst.resize(src.len(), 0);
2459 convert_utf16_to_latin1_lossy(&src[..], &mut dst[..]);
2460 assert_eq!(dst, reference);
2461 }
2462
2463 #[test]
2464 fn test_convert_utf16_to_latin1_lossy_panics() {
2466 let mut dst = [0u8; 16];
2467 let _ = convert_utf16_to_latin1_lossy(&[0x0100u16], &mut dst[..]);
2468 }
2469
2470 #[test]
2471 fn test_utf16_valid_up_to() {
2472 let valid = vec![
2473 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0x2603u16,
2474 0xD83Du16, 0xDCA9u16, 0x00B6u16,
2475 ];
2476 assert_eq!(utf16_valid_up_to(&valid[..]), 16);
2477 let lone_high = vec![
2478 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
2479 0x2603u16, 0xD83Du16, 0x00B6u16,
2480 ];
2481 assert_eq!(utf16_valid_up_to(&lone_high[..]), 14);
2482 let lone_low = vec![
2483 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
2484 0x2603u16, 0xDCA9u16, 0x00B6u16,
2485 ];
2486 assert_eq!(utf16_valid_up_to(&lone_low[..]), 14);
2487 let lone_high_at_end = vec![
2488 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
2489 0x2603u16, 0x00B6u16, 0xD83Du16,
2490 ];
2491 assert_eq!(utf16_valid_up_to(&lone_high_at_end[..]), 15);
2492 }
2493
2494 #[test]
2495 fn test_ensure_utf16_validity() {
2496 let mut src = vec![
2497 0u16, 0xD83Du16, 0u16, 0u16, 0u16, 0xD83Du16, 0xDCA9u16, 0u16, 0u16, 0u16, 0u16, 0u16,
2498 0u16, 0xDCA9u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
2499 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
2500 ];
2501 let reference = vec![
2502 0u16, 0xFFFDu16, 0u16, 0u16, 0u16, 0xD83Du16, 0xDCA9u16, 0u16, 0u16, 0u16, 0u16, 0u16,
2503 0u16, 0xFFFDu16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
2504 0u16, 0u16, 0u16, 0u16, 0u16, 0u16,
2505 ];
2506 ensure_utf16_validity(&mut src[..]);
2507 assert_eq!(src, reference);
2508 }
2509
2510 #[test]
2511 fn test_is_char_bidi() {
2512 assert!(!is_char_bidi('a'));
2513 assert!(!is_char_bidi('\u{03B1}'));
2514 assert!(!is_char_bidi('\u{3041}'));
2515 assert!(!is_char_bidi('\u{1F4A9}'));
2516 assert!(!is_char_bidi('\u{FE00}'));
2517 assert!(!is_char_bidi('\u{202C}'));
2518 assert!(!is_char_bidi('\u{FEFF}'));
2519 assert!(is_char_bidi('\u{0590}'));
2520 assert!(is_char_bidi('\u{08FF}'));
2521 assert!(is_char_bidi('\u{061C}'));
2522 assert!(is_char_bidi('\u{FB50}'));
2523 assert!(is_char_bidi('\u{FDFF}'));
2524 assert!(is_char_bidi('\u{FE70}'));
2525 assert!(is_char_bidi('\u{FEFE}'));
2526 assert!(is_char_bidi('\u{200F}'));
2527 assert!(is_char_bidi('\u{202B}'));
2528 assert!(is_char_bidi('\u{202E}'));
2529 assert!(is_char_bidi('\u{2067}'));
2530 assert!(is_char_bidi('\u{10800}'));
2531 assert!(is_char_bidi('\u{10FFF}'));
2532 assert!(is_char_bidi('\u{1E800}'));
2533 assert!(is_char_bidi('\u{1EFFF}'));
2534 }
2535
2536 #[test]
2537 fn test_is_utf16_code_unit_bidi() {
2538 assert!(!is_utf16_code_unit_bidi(0x0062));
2539 assert!(!is_utf16_code_unit_bidi(0x03B1));
2540 assert!(!is_utf16_code_unit_bidi(0x3041));
2541 assert!(!is_utf16_code_unit_bidi(0xD801));
2542 assert!(!is_utf16_code_unit_bidi(0xFE00));
2543 assert!(!is_utf16_code_unit_bidi(0x202C));
2544 assert!(!is_utf16_code_unit_bidi(0xFEFF));
2545 assert!(is_utf16_code_unit_bidi(0x0590));
2546 assert!(is_utf16_code_unit_bidi(0x08FF));
2547 assert!(is_utf16_code_unit_bidi(0x061C));
2548 assert!(is_utf16_code_unit_bidi(0xFB1D));
2549 assert!(is_utf16_code_unit_bidi(0xFB50));
2550 assert!(is_utf16_code_unit_bidi(0xFDFF));
2551 assert!(is_utf16_code_unit_bidi(0xFE70));
2552 assert!(is_utf16_code_unit_bidi(0xFEFE));
2553 assert!(is_utf16_code_unit_bidi(0x200F));
2554 assert!(is_utf16_code_unit_bidi(0x202B));
2555 assert!(is_utf16_code_unit_bidi(0x202E));
2556 assert!(is_utf16_code_unit_bidi(0x2067));
2557 assert!(is_utf16_code_unit_bidi(0xD802));
2558 assert!(is_utf16_code_unit_bidi(0xD803));
2559 assert!(is_utf16_code_unit_bidi(0xD83A));
2560 assert!(is_utf16_code_unit_bidi(0xD83B));
2561 }
2562
2563 #[test]
2564 fn test_is_str_bidi() {
2565 assert!(!is_str_bidi("abcdefghijklmnopaabcdefghijklmnop"));
2566 assert!(!is_str_bidi("abcdefghijklmnop\u{03B1}abcdefghijklmnop"));
2567 assert!(!is_str_bidi("abcdefghijklmnop\u{3041}abcdefghijklmnop"));
2568 assert!(!is_str_bidi("abcdefghijklmnop\u{1F4A9}abcdefghijklmnop"));
2569 assert!(!is_str_bidi("abcdefghijklmnop\u{FE00}abcdefghijklmnop"));
2570 assert!(!is_str_bidi("abcdefghijklmnop\u{202C}abcdefghijklmnop"));
2571 assert!(!is_str_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop"));
2572 assert!(is_str_bidi("abcdefghijklmnop\u{0590}abcdefghijklmnop"));
2573 assert!(is_str_bidi("abcdefghijklmnop\u{08FF}abcdefghijklmnop"));
2574 assert!(is_str_bidi("abcdefghijklmnop\u{061C}abcdefghijklmnop"));
2575 assert!(is_str_bidi("abcdefghijklmnop\u{FB50}abcdefghijklmnop"));
2576 assert!(is_str_bidi("abcdefghijklmnop\u{FDFF}abcdefghijklmnop"));
2577 assert!(is_str_bidi("abcdefghijklmnop\u{FE70}abcdefghijklmnop"));
2578 assert!(is_str_bidi("abcdefghijklmnop\u{FEFE}abcdefghijklmnop"));
2579 assert!(is_str_bidi("abcdefghijklmnop\u{200F}abcdefghijklmnop"));
2580 assert!(is_str_bidi("abcdefghijklmnop\u{202B}abcdefghijklmnop"));
2581 assert!(is_str_bidi("abcdefghijklmnop\u{202E}abcdefghijklmnop"));
2582 assert!(is_str_bidi("abcdefghijklmnop\u{2067}abcdefghijklmnop"));
2583 assert!(is_str_bidi("abcdefghijklmnop\u{10800}abcdefghijklmnop"));
2584 assert!(is_str_bidi("abcdefghijklmnop\u{10FFF}abcdefghijklmnop"));
2585 assert!(is_str_bidi("abcdefghijklmnop\u{1E800}abcdefghijklmnop"));
2586 assert!(is_str_bidi("abcdefghijklmnop\u{1EFFF}abcdefghijklmnop"));
2587 }
2588
2589 #[test]
2590 fn test_is_utf8_bidi() {
2591 assert!(!is_utf8_bidi(
2592 "abcdefghijklmnopaabcdefghijklmnop".as_bytes()
2593 ));
2594 assert!(!is_utf8_bidi(
2595 "abcdefghijklmnop\u{03B1}abcdefghijklmnop".as_bytes()
2596 ));
2597 assert!(!is_utf8_bidi(
2598 "abcdefghijklmnop\u{3041}abcdefghijklmnop".as_bytes()
2599 ));
2600 assert!(!is_utf8_bidi(
2601 "abcdefghijklmnop\u{1F4A9}abcdefghijklmnop".as_bytes()
2602 ));
2603 assert!(!is_utf8_bidi(
2604 "abcdefghijklmnop\u{FE00}abcdefghijklmnop".as_bytes()
2605 ));
2606 assert!(!is_utf8_bidi(
2607 "abcdefghijklmnop\u{202C}abcdefghijklmnop".as_bytes()
2608 ));
2609 assert!(!is_utf8_bidi(
2610 "abcdefghijklmnop\u{FEFF}abcdefghijklmnop".as_bytes()
2611 ));
2612 assert!(is_utf8_bidi(
2613 "abcdefghijklmnop\u{0590}abcdefghijklmnop".as_bytes()
2614 ));
2615 assert!(is_utf8_bidi(
2616 "abcdefghijklmnop\u{08FF}abcdefghijklmnop".as_bytes()
2617 ));
2618 assert!(is_utf8_bidi(
2619 "abcdefghijklmnop\u{061C}abcdefghijklmnop".as_bytes()
2620 ));
2621 assert!(is_utf8_bidi(
2622 "abcdefghijklmnop\u{FB50}abcdefghijklmnop".as_bytes()
2623 ));
2624 assert!(is_utf8_bidi(
2625 "abcdefghijklmnop\u{FDFF}abcdefghijklmnop".as_bytes()
2626 ));
2627 assert!(is_utf8_bidi(
2628 "abcdefghijklmnop\u{FE70}abcdefghijklmnop".as_bytes()
2629 ));
2630 assert!(is_utf8_bidi(
2631 "abcdefghijklmnop\u{FEFE}abcdefghijklmnop".as_bytes()
2632 ));
2633 assert!(is_utf8_bidi(
2634 "abcdefghijklmnop\u{200F}abcdefghijklmnop".as_bytes()
2635 ));
2636 assert!(is_utf8_bidi(
2637 "abcdefghijklmnop\u{202B}abcdefghijklmnop".as_bytes()
2638 ));
2639 assert!(is_utf8_bidi(
2640 "abcdefghijklmnop\u{202E}abcdefghijklmnop".as_bytes()
2641 ));
2642 assert!(is_utf8_bidi(
2643 "abcdefghijklmnop\u{2067}abcdefghijklmnop".as_bytes()
2644 ));
2645 assert!(is_utf8_bidi(
2646 "abcdefghijklmnop\u{10800}abcdefghijklmnop".as_bytes()
2647 ));
2648 assert!(is_utf8_bidi(
2649 "abcdefghijklmnop\u{10FFF}abcdefghijklmnop".as_bytes()
2650 ));
2651 assert!(is_utf8_bidi(
2652 "abcdefghijklmnop\u{1E800}abcdefghijklmnop".as_bytes()
2653 ));
2654 assert!(is_utf8_bidi(
2655 "abcdefghijklmnop\u{1EFFF}abcdefghijklmnop".as_bytes()
2656 ));
2657 }
2658
2659 #[test]
2660 fn test_is_utf16_bidi() {
2661 assert!(!is_utf16_bidi(&[
2662 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0062, 0x62, 0x63, 0x64, 0x65, 0x66,
2663 0x67, 0x68, 0x69,
2664 ]));
2665 assert!(!is_utf16_bidi(&[
2666 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x03B1, 0x62, 0x63, 0x64, 0x65, 0x66,
2667 0x67, 0x68, 0x69,
2668 ]));
2669 assert!(!is_utf16_bidi(&[
2670 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x3041, 0x62, 0x63, 0x64, 0x65, 0x66,
2671 0x67, 0x68, 0x69,
2672 ]));
2673 assert!(!is_utf16_bidi(&[
2674 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD801, 0x62, 0x63, 0x64, 0x65, 0x66,
2675 0x67, 0x68, 0x69,
2676 ]));
2677 assert!(!is_utf16_bidi(&[
2678 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFE00, 0x62, 0x63, 0x64, 0x65, 0x66,
2679 0x67, 0x68, 0x69,
2680 ]));
2681 assert!(!is_utf16_bidi(&[
2682 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202C, 0x62, 0x63, 0x64, 0x65, 0x66,
2683 0x67, 0x68, 0x69,
2684 ]));
2685 assert!(!is_utf16_bidi(&[
2686 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFF, 0x62, 0x63, 0x64, 0x65, 0x66,
2687 0x67, 0x68, 0x69,
2688 ]));
2689 assert!(is_utf16_bidi(&[
2690 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0590, 0x62, 0x63, 0x64, 0x65, 0x66,
2691 0x67, 0x68, 0x69,
2692 ]));
2693 assert!(is_utf16_bidi(&[
2694 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x08FF, 0x62, 0x63, 0x64, 0x65, 0x66,
2695 0x67, 0x68, 0x69,
2696 ]));
2697 assert!(is_utf16_bidi(&[
2698 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x061C, 0x62, 0x63, 0x64, 0x65, 0x66,
2699 0x67, 0x68, 0x69,
2700 ]));
2701 assert!(is_utf16_bidi(&[
2702 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFB1D, 0x62, 0x63, 0x64, 0x65, 0x66,
2703 0x67, 0x68, 0x69,
2704 ]));
2705 assert!(is_utf16_bidi(&[
2706 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFB50, 0x62, 0x63, 0x64, 0x65, 0x66,
2707 0x67, 0x68, 0x69,
2708 ]));
2709 assert!(is_utf16_bidi(&[
2710 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFDFF, 0x62, 0x63, 0x64, 0x65, 0x66,
2711 0x67, 0x68, 0x69,
2712 ]));
2713 assert!(is_utf16_bidi(&[
2714 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFE70, 0x62, 0x63, 0x64, 0x65, 0x66,
2715 0x67, 0x68, 0x69,
2716 ]));
2717 assert!(is_utf16_bidi(&[
2718 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFE, 0x62, 0x63, 0x64, 0x65, 0x66,
2719 0x67, 0x68, 0x69,
2720 ]));
2721 assert!(is_utf16_bidi(&[
2722 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x200F, 0x62, 0x63, 0x64, 0x65, 0x66,
2723 0x67, 0x68, 0x69,
2724 ]));
2725 assert!(is_utf16_bidi(&[
2726 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202B, 0x62, 0x63, 0x64, 0x65, 0x66,
2727 0x67, 0x68, 0x69,
2728 ]));
2729 assert!(is_utf16_bidi(&[
2730 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202E, 0x62, 0x63, 0x64, 0x65, 0x66,
2731 0x67, 0x68, 0x69,
2732 ]));
2733 assert!(is_utf16_bidi(&[
2734 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x2067, 0x62, 0x63, 0x64, 0x65, 0x66,
2735 0x67, 0x68, 0x69,
2736 ]));
2737 assert!(is_utf16_bidi(&[
2738 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD802, 0x62, 0x63, 0x64, 0x65, 0x66,
2739 0x67, 0x68, 0x69,
2740 ]));
2741 assert!(is_utf16_bidi(&[
2742 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD803, 0x62, 0x63, 0x64, 0x65, 0x66,
2743 0x67, 0x68, 0x69,
2744 ]));
2745 assert!(is_utf16_bidi(&[
2746 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD83A, 0x62, 0x63, 0x64, 0x65, 0x66,
2747 0x67, 0x68, 0x69,
2748 ]));
2749 assert!(is_utf16_bidi(&[
2750 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD83B, 0x62, 0x63, 0x64, 0x65, 0x66,
2751 0x67, 0x68, 0x69,
2752 ]));
2753
2754 assert!(is_utf16_bidi(&[
2755 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0590, 0x3041, 0x62, 0x63, 0x64, 0x65,
2756 0x66, 0x67, 0x68, 0x69,
2757 ]));
2758 }
2759
2760 #[test]
2761 fn test_check_str_for_latin1_and_bidi() {
2762 assert_ne!(
2763 check_str_for_latin1_and_bidi("abcdefghijklmnopaabcdefghijklmnop"),
2764 Latin1Bidi::Bidi
2765 );
2766 assert_ne!(
2767 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{03B1}abcdefghijklmnop"),
2768 Latin1Bidi::Bidi
2769 );
2770 assert_ne!(
2771 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{3041}abcdefghijklmnop"),
2772 Latin1Bidi::Bidi
2773 );
2774 assert_ne!(
2775 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{1F4A9}abcdefghijklmnop"),
2776 Latin1Bidi::Bidi
2777 );
2778 assert_ne!(
2779 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FE00}abcdefghijklmnop"),
2780 Latin1Bidi::Bidi
2781 );
2782 assert_ne!(
2783 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{202C}abcdefghijklmnop"),
2784 Latin1Bidi::Bidi
2785 );
2786 assert_ne!(
2787 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop"),
2788 Latin1Bidi::Bidi
2789 );
2790 assert_eq!(
2791 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{0590}abcdefghijklmnop"),
2792 Latin1Bidi::Bidi
2793 );
2794 assert_eq!(
2795 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{08FF}abcdefghijklmnop"),
2796 Latin1Bidi::Bidi
2797 );
2798 assert_eq!(
2799 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{061C}abcdefghijklmnop"),
2800 Latin1Bidi::Bidi
2801 );
2802 assert_eq!(
2803 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FB50}abcdefghijklmnop"),
2804 Latin1Bidi::Bidi
2805 );
2806 assert_eq!(
2807 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FDFF}abcdefghijklmnop"),
2808 Latin1Bidi::Bidi
2809 );
2810 assert_eq!(
2811 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FE70}abcdefghijklmnop"),
2812 Latin1Bidi::Bidi
2813 );
2814 assert_eq!(
2815 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{FEFE}abcdefghijklmnop"),
2816 Latin1Bidi::Bidi
2817 );
2818 assert_eq!(
2819 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{200F}abcdefghijklmnop"),
2820 Latin1Bidi::Bidi
2821 );
2822 assert_eq!(
2823 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{202B}abcdefghijklmnop"),
2824 Latin1Bidi::Bidi
2825 );
2826 assert_eq!(
2827 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{202E}abcdefghijklmnop"),
2828 Latin1Bidi::Bidi
2829 );
2830 assert_eq!(
2831 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{2067}abcdefghijklmnop"),
2832 Latin1Bidi::Bidi
2833 );
2834 assert_eq!(
2835 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{10800}abcdefghijklmnop"),
2836 Latin1Bidi::Bidi
2837 );
2838 assert_eq!(
2839 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{10FFF}abcdefghijklmnop"),
2840 Latin1Bidi::Bidi
2841 );
2842 assert_eq!(
2843 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{1E800}abcdefghijklmnop"),
2844 Latin1Bidi::Bidi
2845 );
2846 assert_eq!(
2847 check_str_for_latin1_and_bidi("abcdefghijklmnop\u{1EFFF}abcdefghijklmnop"),
2848 Latin1Bidi::Bidi
2849 );
2850 }
2851
2852 #[test]
2853 fn test_check_utf8_for_latin1_and_bidi() {
2854 assert_ne!(
2855 check_utf8_for_latin1_and_bidi("abcdefghijklmnopaabcdefghijklmnop".as_bytes()),
2856 Latin1Bidi::Bidi
2857 );
2858 assert_ne!(
2859 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{03B1}abcdefghijklmnop".as_bytes()),
2860 Latin1Bidi::Bidi
2861 );
2862 assert_ne!(
2863 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{3041}abcdefghijklmnop".as_bytes()),
2864 Latin1Bidi::Bidi
2865 );
2866 assert_ne!(
2867 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{1F4A9}abcdefghijklmnop".as_bytes()),
2868 Latin1Bidi::Bidi
2869 );
2870 assert_ne!(
2871 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FE00}abcdefghijklmnop".as_bytes()),
2872 Latin1Bidi::Bidi
2873 );
2874 assert_ne!(
2875 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{202C}abcdefghijklmnop".as_bytes()),
2876 Latin1Bidi::Bidi
2877 );
2878 assert_ne!(
2879 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FEFF}abcdefghijklmnop".as_bytes()),
2880 Latin1Bidi::Bidi
2881 );
2882 assert_eq!(
2883 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{0590}abcdefghijklmnop".as_bytes()),
2884 Latin1Bidi::Bidi
2885 );
2886 assert_eq!(
2887 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{08FF}abcdefghijklmnop".as_bytes()),
2888 Latin1Bidi::Bidi
2889 );
2890 assert_eq!(
2891 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{061C}abcdefghijklmnop".as_bytes()),
2892 Latin1Bidi::Bidi
2893 );
2894 assert_eq!(
2895 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FB50}abcdefghijklmnop".as_bytes()),
2896 Latin1Bidi::Bidi
2897 );
2898 assert_eq!(
2899 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FDFF}abcdefghijklmnop".as_bytes()),
2900 Latin1Bidi::Bidi
2901 );
2902 assert_eq!(
2903 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FE70}abcdefghijklmnop".as_bytes()),
2904 Latin1Bidi::Bidi
2905 );
2906 assert_eq!(
2907 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{FEFE}abcdefghijklmnop".as_bytes()),
2908 Latin1Bidi::Bidi
2909 );
2910 assert_eq!(
2911 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{200F}abcdefghijklmnop".as_bytes()),
2912 Latin1Bidi::Bidi
2913 );
2914 assert_eq!(
2915 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{202B}abcdefghijklmnop".as_bytes()),
2916 Latin1Bidi::Bidi
2917 );
2918 assert_eq!(
2919 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{202E}abcdefghijklmnop".as_bytes()),
2920 Latin1Bidi::Bidi
2921 );
2922 assert_eq!(
2923 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{2067}abcdefghijklmnop".as_bytes()),
2924 Latin1Bidi::Bidi
2925 );
2926 assert_eq!(
2927 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{10800}abcdefghijklmnop".as_bytes()),
2928 Latin1Bidi::Bidi
2929 );
2930 assert_eq!(
2931 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{10FFF}abcdefghijklmnop".as_bytes()),
2932 Latin1Bidi::Bidi
2933 );
2934 assert_eq!(
2935 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{1E800}abcdefghijklmnop".as_bytes()),
2936 Latin1Bidi::Bidi
2937 );
2938 assert_eq!(
2939 check_utf8_for_latin1_and_bidi("abcdefghijklmnop\u{1EFFF}abcdefghijklmnop".as_bytes()),
2940 Latin1Bidi::Bidi
2941 );
2942 }
2943
2944 #[test]
2945 fn test_check_utf16_for_latin1_and_bidi() {
2946 assert_ne!(
2947 check_utf16_for_latin1_and_bidi(&[
2948 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0062, 0x62, 0x63, 0x64, 0x65,
2949 0x66, 0x67, 0x68, 0x69,
2950 ]),
2951 Latin1Bidi::Bidi
2952 );
2953 assert_ne!(
2954 check_utf16_for_latin1_and_bidi(&[
2955 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x03B1, 0x62, 0x63, 0x64, 0x65,
2956 0x66, 0x67, 0x68, 0x69,
2957 ]),
2958 Latin1Bidi::Bidi
2959 );
2960 assert_ne!(
2961 check_utf16_for_latin1_and_bidi(&[
2962 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x3041, 0x62, 0x63, 0x64, 0x65,
2963 0x66, 0x67, 0x68, 0x69,
2964 ]),
2965 Latin1Bidi::Bidi
2966 );
2967 assert_ne!(
2968 check_utf16_for_latin1_and_bidi(&[
2969 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD801, 0x62, 0x63, 0x64, 0x65,
2970 0x66, 0x67, 0x68, 0x69,
2971 ]),
2972 Latin1Bidi::Bidi
2973 );
2974 assert_ne!(
2975 check_utf16_for_latin1_and_bidi(&[
2976 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFE00, 0x62, 0x63, 0x64, 0x65,
2977 0x66, 0x67, 0x68, 0x69,
2978 ]),
2979 Latin1Bidi::Bidi
2980 );
2981 assert_ne!(
2982 check_utf16_for_latin1_and_bidi(&[
2983 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202C, 0x62, 0x63, 0x64, 0x65,
2984 0x66, 0x67, 0x68, 0x69,
2985 ]),
2986 Latin1Bidi::Bidi
2987 );
2988 assert_ne!(
2989 check_utf16_for_latin1_and_bidi(&[
2990 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFF, 0x62, 0x63, 0x64, 0x65,
2991 0x66, 0x67, 0x68, 0x69,
2992 ]),
2993 Latin1Bidi::Bidi
2994 );
2995 assert_eq!(
2996 check_utf16_for_latin1_and_bidi(&[
2997 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0590, 0x62, 0x63, 0x64, 0x65,
2998 0x66, 0x67, 0x68, 0x69,
2999 ]),
3000 Latin1Bidi::Bidi
3001 );
3002 assert_eq!(
3003 check_utf16_for_latin1_and_bidi(&[
3004 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x08FF, 0x62, 0x63, 0x64, 0x65,
3005 0x66, 0x67, 0x68, 0x69,
3006 ]),
3007 Latin1Bidi::Bidi
3008 );
3009 assert_eq!(
3010 check_utf16_for_latin1_and_bidi(&[
3011 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x061C, 0x62, 0x63, 0x64, 0x65,
3012 0x66, 0x67, 0x68, 0x69,
3013 ]),
3014 Latin1Bidi::Bidi
3015 );
3016 assert_eq!(
3017 check_utf16_for_latin1_and_bidi(&[
3018 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFB1D, 0x62, 0x63, 0x64, 0x65,
3019 0x66, 0x67, 0x68, 0x69,
3020 ]),
3021 Latin1Bidi::Bidi
3022 );
3023 assert_eq!(
3024 check_utf16_for_latin1_and_bidi(&[
3025 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFB50, 0x62, 0x63, 0x64, 0x65,
3026 0x66, 0x67, 0x68, 0x69,
3027 ]),
3028 Latin1Bidi::Bidi
3029 );
3030 assert_eq!(
3031 check_utf16_for_latin1_and_bidi(&[
3032 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFDFF, 0x62, 0x63, 0x64, 0x65,
3033 0x66, 0x67, 0x68, 0x69,
3034 ]),
3035 Latin1Bidi::Bidi
3036 );
3037 assert_eq!(
3038 check_utf16_for_latin1_and_bidi(&[
3039 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFE70, 0x62, 0x63, 0x64, 0x65,
3040 0x66, 0x67, 0x68, 0x69,
3041 ]),
3042 Latin1Bidi::Bidi
3043 );
3044 assert_eq!(
3045 check_utf16_for_latin1_and_bidi(&[
3046 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xFEFE, 0x62, 0x63, 0x64, 0x65,
3047 0x66, 0x67, 0x68, 0x69,
3048 ]),
3049 Latin1Bidi::Bidi
3050 );
3051 assert_eq!(
3052 check_utf16_for_latin1_and_bidi(&[
3053 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x200F, 0x62, 0x63, 0x64, 0x65,
3054 0x66, 0x67, 0x68, 0x69,
3055 ]),
3056 Latin1Bidi::Bidi
3057 );
3058 assert_eq!(
3059 check_utf16_for_latin1_and_bidi(&[
3060 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202B, 0x62, 0x63, 0x64, 0x65,
3061 0x66, 0x67, 0x68, 0x69,
3062 ]),
3063 Latin1Bidi::Bidi
3064 );
3065 assert_eq!(
3066 check_utf16_for_latin1_and_bidi(&[
3067 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x202E, 0x62, 0x63, 0x64, 0x65,
3068 0x66, 0x67, 0x68, 0x69,
3069 ]),
3070 Latin1Bidi::Bidi
3071 );
3072 assert_eq!(
3073 check_utf16_for_latin1_and_bidi(&[
3074 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x2067, 0x62, 0x63, 0x64, 0x65,
3075 0x66, 0x67, 0x68, 0x69,
3076 ]),
3077 Latin1Bidi::Bidi
3078 );
3079 assert_eq!(
3080 check_utf16_for_latin1_and_bidi(&[
3081 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD802, 0x62, 0x63, 0x64, 0x65,
3082 0x66, 0x67, 0x68, 0x69,
3083 ]),
3084 Latin1Bidi::Bidi
3085 );
3086 assert_eq!(
3087 check_utf16_for_latin1_and_bidi(&[
3088 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD803, 0x62, 0x63, 0x64, 0x65,
3089 0x66, 0x67, 0x68, 0x69,
3090 ]),
3091 Latin1Bidi::Bidi
3092 );
3093 assert_eq!(
3094 check_utf16_for_latin1_and_bidi(&[
3095 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD83A, 0x62, 0x63, 0x64, 0x65,
3096 0x66, 0x67, 0x68, 0x69,
3097 ]),
3098 Latin1Bidi::Bidi
3099 );
3100 assert_eq!(
3101 check_utf16_for_latin1_and_bidi(&[
3102 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD83B, 0x62, 0x63, 0x64, 0x65,
3103 0x66, 0x67, 0x68, 0x69,
3104 ]),
3105 Latin1Bidi::Bidi
3106 );
3107
3108 assert_eq!(
3109 check_utf16_for_latin1_and_bidi(&[
3110 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x0590, 0x3041, 0x62, 0x63, 0x64,
3111 0x65, 0x66, 0x67, 0x68, 0x69,
3112 ]),
3113 Latin1Bidi::Bidi
3114 );
3115 }
3116
3117 #[inline(always)]
3118 pub fn reference_is_char_bidi(c: char) -> bool {
3119 match c {
3120 '\u{0590}'..='\u{08FF}'
3121 | '\u{FB1D}'..='\u{FDFF}'
3122 | '\u{FE70}'..='\u{FEFE}'
3123 | '\u{10800}'..='\u{10FFF}'
3124 | '\u{1E800}'..='\u{1EFFF}'
3125 | '\u{200F}'
3126 | '\u{202B}'
3127 | '\u{202E}'
3128 | '\u{2067}' => true,
3129 _ => false,
3130 }
3131 }
3132
3133 #[inline(always)]
3134 pub fn reference_is_utf16_code_unit_bidi(u: u16) -> bool {
3135 match u {
3136 0x0590..=0x08FF
3137 | 0xFB1D..=0xFDFF
3138 | 0xFE70..=0xFEFE
3139 | 0xD802
3140 | 0xD803
3141 | 0xD83A
3142 | 0xD83B
3143 | 0x200F
3144 | 0x202B
3145 | 0x202E
3146 | 0x2067 => true,
3147 _ => false,
3148 }
3149 }
3150
3151 #[test]
3152 #[cfg_attr(miri, ignore)] fn test_is_char_bidi_thoroughly() {
3154 for i in 0..0xD800u32 {
3155 let c: char = ::std::char::from_u32(i).unwrap();
3156 assert_eq!(is_char_bidi(c), reference_is_char_bidi(c));
3157 }
3158 for i in 0xE000..0x110000u32 {
3159 let c: char = ::std::char::from_u32(i).unwrap();
3160 assert_eq!(is_char_bidi(c), reference_is_char_bidi(c));
3161 }
3162 }
3163
3164 #[test]
3165 #[cfg_attr(miri, ignore)] fn test_is_utf16_code_unit_bidi_thoroughly() {
3167 for i in 0..0x10000u32 {
3168 let u = i as u16;
3169 assert_eq!(
3170 is_utf16_code_unit_bidi(u),
3171 reference_is_utf16_code_unit_bidi(u)
3172 );
3173 }
3174 }
3175
3176 #[test]
3177 #[cfg_attr(miri, ignore)] fn test_is_str_bidi_thoroughly() {
3179 let mut buf = [0; 4];
3180 for i in 0..0xD800u32 {
3181 let c: char = ::std::char::from_u32(i).unwrap();
3182 assert_eq!(
3183 is_str_bidi(c.encode_utf8(&mut buf[..])),
3184 reference_is_char_bidi(c)
3185 );
3186 }
3187 for i in 0xE000..0x110000u32 {
3188 let c: char = ::std::char::from_u32(i).unwrap();
3189 assert_eq!(
3190 is_str_bidi(c.encode_utf8(&mut buf[..])),
3191 reference_is_char_bidi(c)
3192 );
3193 }
3194 }
3195
3196 #[test]
3197 #[cfg_attr(miri, ignore)] fn test_is_utf8_bidi_thoroughly() {
3199 let mut buf = [0; 8];
3200 for i in 0..0xD800u32 {
3201 let c: char = ::std::char::from_u32(i).unwrap();
3202 let expect = reference_is_char_bidi(c);
3203 {
3204 let len = {
3205 let bytes = c.encode_utf8(&mut buf[..]).as_bytes();
3206 assert_eq!(is_utf8_bidi(bytes), expect);
3207 bytes.len()
3208 };
3209 {
3210 let tail = &mut buf[len..];
3211 for b in tail.iter_mut() {
3212 *b = 0;
3213 }
3214 }
3215 }
3216 assert_eq!(is_utf8_bidi(&buf[..]), expect);
3217 }
3218 for i in 0xE000..0x110000u32 {
3219 let c: char = ::std::char::from_u32(i).unwrap();
3220 let expect = reference_is_char_bidi(c);
3221 {
3222 let len = {
3223 let bytes = c.encode_utf8(&mut buf[..]).as_bytes();
3224 assert_eq!(is_utf8_bidi(bytes), expect);
3225 bytes.len()
3226 };
3227 {
3228 let tail = &mut buf[len..];
3229 for b in tail.iter_mut() {
3230 *b = 0;
3231 }
3232 }
3233 }
3234 assert_eq!(is_utf8_bidi(&buf[..]), expect);
3235 }
3236 }
3237
3238 #[test]
3239 #[cfg_attr(miri, ignore)] fn test_is_utf16_bidi_thoroughly() {
3241 let mut buf = [0; 32];
3242 for i in 0..0x10000u32 {
3243 let u = i as u16;
3244 buf[15] = u;
3245 assert_eq!(
3246 is_utf16_bidi(&buf[..]),
3247 reference_is_utf16_code_unit_bidi(u)
3248 );
3249 }
3250 }
3251
3252 #[test]
3253 fn test_is_utf8_bidi_edge_cases() {
3254 assert!(!is_utf8_bidi(b"\xD5\xBF\x61"));
3255 assert!(!is_utf8_bidi(b"\xD6\x80\x61"));
3256 assert!(!is_utf8_bidi(b"abc"));
3257 assert!(is_utf8_bidi(b"\xD5\xBF\xC2"));
3258 assert!(is_utf8_bidi(b"\xD6\x80\xC2"));
3259 assert!(is_utf8_bidi(b"ab\xC2"));
3260 }
3261
3262 #[test]
3263 fn test_decode_latin1() {
3264 match decode_latin1(b"ab") {
3265 Cow::Borrowed(s) => {
3266 assert_eq!(s, "ab");
3267 }
3268 Cow::Owned(_) => {
3269 unreachable!("Should have borrowed");
3270 }
3271 }
3272 assert_eq!(decode_latin1(b"a\xE4"), "a\u{E4}");
3273 }
3274
3275 #[test]
3276 fn test_encode_latin1_lossy() {
3277 match encode_latin1_lossy("ab") {
3278 Cow::Borrowed(s) => {
3279 assert_eq!(s, b"ab");
3280 }
3281 Cow::Owned(_) => {
3282 unreachable!("Should have borrowed");
3283 }
3284 }
3285 assert_eq!(encode_latin1_lossy("a\u{E4}"), &(b"a\xE4")[..]);
3286 }
3287
3288 #[test]
3289 fn test_convert_utf8_to_utf16_without_replacement() {
3290 let mut buf = [0u16; 5];
3291 assert_eq!(
3292 convert_utf8_to_utf16_without_replacement(b"ab", &mut buf[..2]),
3293 Some(2)
3294 );
3295 assert_eq!(buf[0], u16::from(b'a'));
3296 assert_eq!(buf[1], u16::from(b'b'));
3297 assert_eq!(buf[2], 0);
3298 assert_eq!(
3299 convert_utf8_to_utf16_without_replacement(b"\xC3\xA4c", &mut buf[..3]),
3300 Some(2)
3301 );
3302 assert_eq!(buf[0], 0xE4);
3303 assert_eq!(buf[1], u16::from(b'c'));
3304 assert_eq!(buf[2], 0);
3305 assert_eq!(
3306 convert_utf8_to_utf16_without_replacement(b"\xE2\x98\x83", &mut buf[..3]),
3307 Some(1)
3308 );
3309 assert_eq!(buf[0], 0x2603);
3310 assert_eq!(buf[1], u16::from(b'c'));
3311 assert_eq!(buf[2], 0);
3312 assert_eq!(
3313 convert_utf8_to_utf16_without_replacement(b"\xE2\x98\x83d", &mut buf[..4]),
3314 Some(2)
3315 );
3316 assert_eq!(buf[0], 0x2603);
3317 assert_eq!(buf[1], u16::from(b'd'));
3318 assert_eq!(buf[2], 0);
3319 assert_eq!(
3320 convert_utf8_to_utf16_without_replacement(b"\xE2\x98\x83\xC3\xA4", &mut buf[..5]),
3321 Some(2)
3322 );
3323 assert_eq!(buf[0], 0x2603);
3324 assert_eq!(buf[1], 0xE4);
3325 assert_eq!(buf[2], 0);
3326 assert_eq!(
3327 convert_utf8_to_utf16_without_replacement(b"\xF0\x9F\x93\x8E", &mut buf[..4]),
3328 Some(2)
3329 );
3330 assert_eq!(buf[0], 0xD83D);
3331 assert_eq!(buf[1], 0xDCCE);
3332 assert_eq!(buf[2], 0);
3333 assert_eq!(
3334 convert_utf8_to_utf16_without_replacement(b"\xF0\x9F\x93\x8Ee", &mut buf[..5]),
3335 Some(3)
3336 );
3337 assert_eq!(buf[0], 0xD83D);
3338 assert_eq!(buf[1], 0xDCCE);
3339 assert_eq!(buf[2], u16::from(b'e'));
3340 assert_eq!(
3341 convert_utf8_to_utf16_without_replacement(b"\xF0\x9F\x93", &mut buf[..5]),
3342 None
3343 );
3344 }
3345}