brotli/enc/
utf8_util.rs
1use crate::enc::floatX;
2
3fn parse_as_utf8(input: &[u8], size: usize) -> (usize, i32) {
4 if (input[0] & 0x80) == 0 {
5 if input[0] > 0 {
6 return (1, i32::from(input[0]));
7 }
8 }
9 if size > 1 && (input[0] & 0xe0) == 0xc0 && (input[1] & 0xc0) == 0x80 {
10 let symbol = (input[0] as i32 & 0x1f) << 6 | input[1] as i32 & 0x3f;
11 if symbol > 0x7f {
12 return (2, symbol);
13 }
14 }
15 if size > 2
16 && (input[0] & 0xf0) == 0xe0
17 && (input[1] & 0xc0) == 0x80
18 && (input[2] & 0xc0) == 0x80
19 {
20 let symbol = (i32::from(input[0]) & 0x0f) << 12
21 | (i32::from(input[1]) & 0x3f) << 6
22 | i32::from(input[2]) & 0x3f;
23 if symbol > 0x7ff {
24 return (3, symbol);
25 }
26 }
27 if size > 3
28 && (input[0] & 0xf8) == 0xf0
29 && (input[1] & 0xc0) == 0x80
30 && (input[2] & 0xc0) == 0x80
31 && (input[3] & 0xc0) == 0x80
32 {
33 let symbol = (i32::from(input[0]) & 0x07) << 18
34 | (i32::from(input[1]) & 0x3f) << 12
35 | (i32::from(input[2]) & 0x3f) << 6
36 | i32::from(input[3]) & 0x3f;
37 if symbol > 0xffff && symbol <= 0x10_ffff {
38 return (4, symbol);
39 }
40 }
41
42 (1, 0x11_0000 | i32::from(input[0]))
43}
44
45#[deprecated(note = "Use is_mostly_utf8 instead")]
46pub fn BrotliIsMostlyUTF8(
47 data: &[u8],
48 pos: usize,
49 mask: usize,
50 length: usize,
51 min_fraction: floatX,
52) -> i32 {
53 is_mostly_utf8(data, pos, mask, length, min_fraction).into()
54}
55
56pub(crate) fn is_mostly_utf8(
57 data: &[u8],
58 pos: usize,
59 mask: usize,
60 length: usize,
61 min_fraction: floatX,
62) -> bool {
63 let mut size_utf8: usize = 0;
64 let mut i: usize = 0;
65 while i < length {
66 let (bytes_read, symbol) = parse_as_utf8(&data[(pos.wrapping_add(i) & mask)..], length - i);
67 i = i.wrapping_add(bytes_read);
68 if symbol < 0x11_0000 {
69 size_utf8 = size_utf8.wrapping_add(bytes_read);
70 }
71 }
72 size_utf8 as floatX > min_fraction * length as floatX
73}