brotli/enc/
literal_cost.rs

1use core::cmp::min;
2
3use super::util::{floatX, FastLog2f64};
4use crate::enc::utf8_util::is_mostly_utf8;
5
6static kMinUTF8Ratio: floatX = 0.75;
7
8fn UTF8Position(last: usize, c: usize, clamp: usize) -> usize {
9    if c < 128usize {
10        0usize
11    } else if c >= 192usize {
12        min(1usize, clamp)
13    } else if last < 0xe0usize {
14        0usize
15    } else {
16        min(2usize, clamp)
17    }
18}
19
20fn DecideMultiByteStatsLevel(pos: usize, len: usize, mask: usize, data: &[u8]) -> usize {
21    let mut counts = [0usize; 3];
22    let mut max_utf8: usize = 1;
23    let mut last_c: usize = 0usize;
24    let mut i: usize;
25    i = 0usize;
26    while i < len {
27        {
28            let c: usize = data[(pos.wrapping_add(i) & mask)] as usize;
29            {
30                let _rhs = 1;
31                let _lhs = &mut counts[UTF8Position(last_c, c, 2usize)];
32                *_lhs = (*_lhs).wrapping_add(_rhs as usize);
33            }
34            last_c = c;
35        }
36        i = i.wrapping_add(1);
37    }
38    if counts[2] < 500usize {
39        max_utf8 = 1;
40    }
41    if counts[1].wrapping_add(counts[2]) < 25usize {
42        max_utf8 = 0usize;
43    }
44    max_utf8
45}
46
47fn EstimateBitCostsForLiteralsUTF8(
48    pos: usize,
49    len: usize,
50    mask: usize,
51    data: &[u8],
52    cost: &mut [floatX],
53) {
54    let max_utf8: usize = DecideMultiByteStatsLevel(pos, len, mask, data);
55    let mut histogram = [[0usize; 256]; 3];
56    let window_half: usize = 495usize;
57    let in_window: usize = min(window_half, len);
58    let mut in_window_utf8 = [0usize; 3];
59    let mut i: usize;
60    {
61        let mut last_c: usize = 0usize;
62        let mut utf8_pos: usize = 0usize;
63        i = 0usize;
64        while i < in_window {
65            {
66                let c: usize = data[(pos.wrapping_add(i) & mask)] as usize;
67                {
68                    let _rhs = 1;
69                    let _lhs = &mut histogram[utf8_pos][c];
70                    *_lhs = (*_lhs).wrapping_add(_rhs as usize);
71                }
72                {
73                    let _rhs = 1;
74                    let _lhs = &mut in_window_utf8[utf8_pos];
75                    *_lhs = (*_lhs).wrapping_add(_rhs as usize);
76                }
77                utf8_pos = UTF8Position(last_c, c, max_utf8);
78                last_c = c;
79            }
80            i = i.wrapping_add(1);
81        }
82    }
83    i = 0usize;
84    while i < len {
85        {
86            if i >= window_half {
87                let c: usize = (if i < window_half.wrapping_add(1) {
88                    0i32
89                } else {
90                    data[(pos
91                        .wrapping_add(i)
92                        .wrapping_sub(window_half)
93                        .wrapping_sub(1)
94                        & mask)] as i32
95                }) as usize;
96                let last_c: usize = (if i < window_half.wrapping_add(2) {
97                    0i32
98                } else {
99                    data[(pos
100                        .wrapping_add(i)
101                        .wrapping_sub(window_half)
102                        .wrapping_sub(2)
103                        & mask)] as i32
104                }) as usize;
105                let utf8_pos2: usize = UTF8Position(last_c, c, max_utf8);
106                {
107                    let _rhs = 1;
108                    let _lhs = &mut histogram[utf8_pos2]
109                        [data[(pos.wrapping_add(i).wrapping_sub(window_half) & mask)] as usize];
110                    *_lhs = (*_lhs).wrapping_sub(_rhs as usize);
111                }
112                {
113                    let _rhs = 1;
114                    let _lhs = &mut in_window_utf8[utf8_pos2];
115                    *_lhs = (*_lhs).wrapping_sub(_rhs as usize);
116                }
117            }
118            if i.wrapping_add(window_half) < len {
119                let c: usize = data[(pos
120                    .wrapping_add(i)
121                    .wrapping_add(window_half)
122                    .wrapping_sub(1)
123                    & mask)] as usize;
124                let last_c: usize = data[(pos
125                    .wrapping_add(i)
126                    .wrapping_add(window_half)
127                    .wrapping_sub(2)
128                    & mask)] as usize;
129                let utf8_pos2: usize = UTF8Position(last_c, c, max_utf8);
130                {
131                    let _rhs = 1;
132                    let _lhs = &mut histogram[utf8_pos2]
133                        [data[(pos.wrapping_add(i).wrapping_add(window_half) & mask)] as usize];
134                    *_lhs = (*_lhs).wrapping_add(_rhs as usize);
135                }
136                {
137                    let _rhs = 1;
138                    let _lhs = &mut in_window_utf8[utf8_pos2];
139                    *_lhs = (*_lhs).wrapping_add(_rhs as usize);
140                }
141            }
142            {
143                let c: usize = (if i < 1 {
144                    0i32
145                } else {
146                    data[(pos.wrapping_add(i).wrapping_sub(1) & mask)] as i32
147                }) as usize;
148                let last_c: usize = (if i < 2usize {
149                    0i32
150                } else {
151                    data[(pos.wrapping_add(i).wrapping_sub(2) & mask)] as i32
152                }) as usize;
153                let utf8_pos: usize = UTF8Position(last_c, c, max_utf8);
154                let masked_pos: usize = pos.wrapping_add(i) & mask;
155                let mut histo: usize = histogram[utf8_pos][data[masked_pos] as usize];
156                //precision is vital here: lets keep double precision
157                let mut lit_cost: f64;
158                if histo == 0usize {
159                    histo = 1;
160                }
161                lit_cost = FastLog2f64(in_window_utf8[utf8_pos] as u64) as f64
162                    - FastLog2f64(histo as u64) as f64;
163                lit_cost += 0.02905;
164                if lit_cost < 1.0 {
165                    lit_cost *= 0.5;
166                    lit_cost += 0.5;
167                }
168                if i < 2000usize {
169                    lit_cost += (0.7 - (2000usize).wrapping_sub(i) as (f64) / 2000.0 * 0.35);
170                }
171                cost[i] = lit_cost as floatX;
172            }
173        }
174        i = i.wrapping_add(1);
175    }
176}
177
178pub fn BrotliEstimateBitCostsForLiterals(
179    pos: usize,
180    len: usize,
181    mask: usize,
182    data: &[u8],
183    cost: &mut [floatX],
184) {
185    if is_mostly_utf8(data, pos, mask, len, kMinUTF8Ratio) {
186        EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
187    } else {
188        let mut histogram: [usize; 256] = [0; 256];
189
190        let window_half: usize = 2000usize;
191        let mut in_window: usize = min(window_half, len);
192        let mut i: usize;
193        for i in 0usize..in_window {
194            let _rhs = 1;
195            let _lhs = &mut histogram[data[(pos.wrapping_add(i) & mask)] as usize];
196            *_lhs = (*_lhs).wrapping_add(_rhs as usize);
197        }
198        i = 0usize;
199        while i < len {
200            {
201                let mut histo: usize;
202                if i >= window_half {
203                    {
204                        let _rhs = 1;
205                        let _lhs = &mut histogram
206                            [data[(pos.wrapping_add(i).wrapping_sub(window_half) & mask)] as usize];
207                        *_lhs = (*_lhs).wrapping_sub(_rhs as usize);
208                    }
209                    in_window = in_window.wrapping_sub(1);
210                }
211                if i.wrapping_add(window_half) < len {
212                    {
213                        let _rhs = 1;
214                        let _lhs = &mut histogram
215                            [data[(pos.wrapping_add(i).wrapping_add(window_half) & mask)] as usize];
216                        *_lhs = (*_lhs).wrapping_add(_rhs as usize);
217                    }
218                    in_window = in_window.wrapping_add(1);
219                }
220                histo = histogram[data[(pos.wrapping_add(i) & mask)] as usize];
221                if histo == 0usize {
222                    histo = 1;
223                }
224                {
225                    //precision is vital here: lets keep double precision
226                    let mut lit_cost: f64 =
227                        FastLog2f64(in_window as u64) as f64 - FastLog2f64(histo as u64) as f64;
228                    lit_cost += 0.029;
229                    if lit_cost < 1.0 {
230                        lit_cost *= 0.5;
231                        lit_cost += 0.5;
232                    }
233                    cost[i] = lit_cost as floatX;
234                }
235            }
236            i = i.wrapping_add(1);
237        }
238    }
239}