brotli/enc/
cluster.rs

1use alloc::{Allocator, SliceWrapper, SliceWrapperMut};
2use core::cmp::min;
3
4use {alloc, core};
5
6use super::bit_cost::BrotliPopulationCost;
7use super::histogram::{
8    CostAccessors, HistogramAddHistogram, HistogramClear, HistogramSelfAddHistogram,
9};
10use super::util::FastLog2;
11use crate::enc::combined_alloc::{alloc_or_default, allocate};
12
13#[derive(Clone, Copy)]
14pub struct HistogramPair {
15    pub idx1: u32,
16    pub idx2: u32,
17    pub cost_combo: super::util::floatX,
18    pub cost_diff: super::util::floatX,
19}
20
21impl Default for HistogramPair {
22    #[inline(always)]
23    fn default() -> HistogramPair {
24        HistogramPair {
25            idx1: 0,
26            idx2: 0,
27            cost_combo: 0.0,
28            cost_diff: 0.0,
29        }
30    }
31}
32/* Returns entropy reduction of the context map when we combine two clusters. */
33#[inline(always)]
34fn ClusterCostDiff(size_a: usize, size_b: usize) -> super::util::floatX {
35    let size_c: usize = size_a.wrapping_add(size_b);
36    size_a as (super::util::floatX) * FastLog2(size_a as u64)
37        + size_b as (super::util::floatX) * FastLog2(size_b as u64)
38        - size_c as (super::util::floatX) * FastLog2(size_c as u64)
39}
40
41#[inline(always)]
42fn HistogramPairIsLess(p1: &HistogramPair, p2: &HistogramPair) -> bool {
43    if p1.cost_diff != p2.cost_diff {
44        p1.cost_diff > p2.cost_diff
45    } else {
46        p1.idx2.wrapping_sub(p1.idx1) > p2.idx2.wrapping_sub(p2.idx1)
47    }
48}
49
50/* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
51it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
52fn BrotliCompareAndPushToQueue<
53    HistogramType: SliceWrapperMut<u32> + SliceWrapper<u32> + CostAccessors + Clone,
54>(
55    out: &[HistogramType],
56    cluster_size: &[u32],
57    mut idx1: u32,
58    mut idx2: u32,
59    max_num_pairs: usize,
60    scratch_space: &mut HistogramType::i32vec,
61    pairs: &mut [HistogramPair],
62    num_pairs: &mut usize,
63) {
64    let mut is_good_pair = false;
65    let mut p: HistogramPair = HistogramPair {
66        idx1: 0,
67        idx2: 0,
68        cost_combo: 0.0,
69        cost_diff: 0.0,
70    };
71    if idx1 == idx2 {
72    } else {
73        if idx2 < idx1 {
74            core::mem::swap(&mut idx2, &mut idx1);
75        }
76        p.idx1 = idx1;
77        p.idx2 = idx2;
78        p.cost_diff = 0.5
79            * ClusterCostDiff(
80                cluster_size[idx1 as usize] as usize,
81                cluster_size[idx2 as usize] as usize,
82            );
83        p.cost_diff -= (out[idx1 as usize]).bit_cost();
84        p.cost_diff -= (out[idx2 as usize]).bit_cost();
85        if (out[idx1 as usize]).total_count() == 0usize {
86            p.cost_combo = (out[idx2 as usize]).bit_cost();
87            is_good_pair = true;
88        } else if (out[idx2 as usize]).total_count() == 0usize {
89            p.cost_combo = (out[idx1 as usize]).bit_cost();
90            is_good_pair = true;
91        } else {
92            let threshold = if *num_pairs == 0 {
93                1e38
94            } else {
95                pairs[0].cost_diff.max(0.0)
96            };
97
98            let mut combo: HistogramType = out[idx1 as usize].clone();
99            HistogramAddHistogram(&mut combo, &out[idx2 as usize]);
100            let cost_combo: super::util::floatX = BrotliPopulationCost(&combo, scratch_space);
101            if cost_combo < threshold - p.cost_diff {
102                p.cost_combo = cost_combo;
103                is_good_pair = true;
104            }
105        }
106        if is_good_pair {
107            p.cost_diff += p.cost_combo;
108            if *num_pairs > 0usize && HistogramPairIsLess(&pairs[0], &p) {
109                /* Replace the top of the queue if needed. */
110                if *num_pairs < max_num_pairs {
111                    pairs[*num_pairs] = pairs[0];
112                    *num_pairs = num_pairs.wrapping_add(1);
113                }
114                pairs[0] = p;
115            } else if *num_pairs < max_num_pairs {
116                pairs[*num_pairs] = p;
117                *num_pairs = num_pairs.wrapping_add(1);
118            }
119        }
120    }
121}
122
123pub fn BrotliHistogramCombine<
124    HistogramType: SliceWrapperMut<u32> + SliceWrapper<u32> + CostAccessors + Clone,
125>(
126    out: &mut [HistogramType],
127    cluster_size: &mut [u32],
128    symbols: &mut [u32],
129    clusters: &mut [u32],
130    pairs: &mut [HistogramPair],
131    mut num_clusters: usize,
132    symbols_size: usize,
133    max_clusters: usize,
134    max_num_pairs: usize,
135    scratch_space: &mut HistogramType::i32vec,
136) -> usize {
137    let mut cost_diff_threshold: super::util::floatX = 0.0;
138    let mut min_cluster_size: usize = 1;
139    let mut num_pairs: usize = 0usize;
140    {
141        /* We maintain a vector of histogram pairs, with the property that the pair
142        with the maximum bit cost reduction is the first. */
143        for idx1 in 0..num_clusters {
144            for idx2 in idx1 + 1..num_clusters {
145                BrotliCompareAndPushToQueue(
146                    out,
147                    cluster_size,
148                    clusters[idx1],
149                    clusters[idx2],
150                    max_num_pairs,
151                    scratch_space,
152                    pairs,
153                    &mut num_pairs,
154                );
155            }
156        }
157    }
158    while num_clusters > min_cluster_size {
159        let mut i: usize;
160        if (pairs[0]).cost_diff >= cost_diff_threshold {
161            cost_diff_threshold = 1e38;
162            min_cluster_size = max_clusters;
163            {
164                continue;
165            }
166        }
167        /* Take the best pair from the top of heap. */
168        let best_idx1: u32 = (pairs[0]).idx1;
169        let best_idx2: u32 = (pairs[0]).idx2;
170        HistogramSelfAddHistogram(out, (best_idx1 as usize), (best_idx2 as usize));
171        (out[(best_idx1 as usize)]).set_bit_cost((pairs[0]).cost_combo);
172        {
173            let _rhs = cluster_size[(best_idx2 as usize)];
174            let _lhs = &mut cluster_size[(best_idx1 as usize)];
175            *_lhs = (*_lhs).wrapping_add(_rhs);
176        }
177        for i in 0usize..symbols_size {
178            if symbols[i] == best_idx2 {
179                symbols[i] = best_idx1;
180            }
181        }
182        i = 0usize;
183        'break9: while i < num_clusters {
184            {
185                if clusters[i] == best_idx2 {
186                    for offset in 0..(num_clusters - i - 1) {
187                        clusters[i + offset] = clusters[i + 1 + offset];
188                    }
189                    break 'break9;
190                }
191            }
192            i = i.wrapping_add(1);
193        }
194        num_clusters = num_clusters.wrapping_sub(1);
195        {
196            /* Remove pairs intersecting the just combined best pair. */
197            let mut copy_to_idx: usize = 0usize;
198            i = 0usize;
199            while i < num_pairs {
200                'continue12: loop {
201                    {
202                        let p: HistogramPair = pairs[i];
203                        if (p).idx1 == best_idx1
204                            || (p).idx2 == best_idx1
205                            || (p).idx1 == best_idx2
206                            || (p).idx2 == best_idx2
207                        {
208                            /* Remove invalid pair from the queue. */
209                            break 'continue12;
210                        }
211                        if HistogramPairIsLess(&pairs[0], &p) {
212                            /* Replace the top of the queue if needed. */
213                            let front: HistogramPair = pairs[0];
214                            pairs[0] = p;
215                            pairs[copy_to_idx] = front;
216                        } else {
217                            pairs[copy_to_idx] = p;
218                        }
219                        copy_to_idx = copy_to_idx.wrapping_add(1);
220                    }
221                    break;
222                }
223                i = i.wrapping_add(1);
224            }
225            num_pairs = copy_to_idx;
226        }
227        for i in 0usize..num_clusters {
228            BrotliCompareAndPushToQueue(
229                out,
230                cluster_size,
231                best_idx1,
232                clusters[i],
233                max_num_pairs,
234                scratch_space,
235                pairs,
236                &mut num_pairs,
237            );
238        }
239    }
240    num_clusters
241}
242
243/* What is the bit cost of moving histogram from cur_symbol to candidate. */
244#[inline(always)]
245pub fn BrotliHistogramBitCostDistance<
246    HistogramType: SliceWrapperMut<u32> + SliceWrapper<u32> + CostAccessors + Clone,
247>(
248    histogram: &HistogramType,
249    candidate: &HistogramType,
250    scratch_space: &mut HistogramType::i32vec,
251) -> super::util::floatX {
252    if histogram.total_count() == 0usize {
253        0.0
254    } else {
255        let mut tmp: HistogramType = histogram.clone();
256        HistogramAddHistogram(&mut tmp, candidate);
257        BrotliPopulationCost(&tmp, scratch_space) - candidate.bit_cost()
258    }
259}
260
261/* Find the best 'out' histogram for each of the 'in' histograms.
262When called, clusters[0..num_clusters) contains the unique values from
263symbols[0..in_size), but this property is not preserved in this function.
264Note: we assume that out[]->bit_cost_ is already up-to-date. */
265
266pub fn BrotliHistogramRemap<
267    HistogramType: SliceWrapperMut<u32> + SliceWrapper<u32> + CostAccessors + Clone,
268>(
269    inp: &[HistogramType],
270    in_size: usize,
271    clusters: &[u32],
272    num_clusters: usize,
273    scratch_space: &mut HistogramType::i32vec,
274    out: &mut [HistogramType],
275    symbols: &mut [u32],
276) {
277    for i in 0usize..in_size {
278        let mut best_out: u32 = if i == 0usize {
279            symbols[0]
280        } else {
281            symbols[i.wrapping_sub(1)]
282        };
283        let mut best_bits: super::util::floatX =
284            BrotliHistogramBitCostDistance(&inp[i], &mut out[(best_out as usize)], scratch_space);
285        for j in 0usize..num_clusters {
286            let cur_bits: super::util::floatX = BrotliHistogramBitCostDistance(
287                &inp[i],
288                &mut out[(clusters[j] as usize)],
289                scratch_space,
290            );
291            if cur_bits < best_bits {
292                best_bits = cur_bits;
293                best_out = clusters[j];
294            }
295        }
296        symbols[i] = best_out;
297    }
298    for i in 0usize..num_clusters {
299        HistogramClear(&mut out[(clusters[i] as usize)]);
300    }
301    for i in 0usize..in_size {
302        HistogramAddHistogram(&mut out[(symbols[i] as usize)], &inp[i]);
303    }
304}
305
306/* Reorders elements of the out[0..length) array and changes values in
307symbols[0..length) array in the following way:
308  * when called, symbols[] contains indexes into out[], and has N unique
309    values (possibly N < length)
310  * on return, symbols'[i] = f(symbols[i]) and
311               out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
312    where f is a bijection between the range of symbols[] and [0..N), and
313    the first occurrences of values in symbols'[i] come in consecutive
314    increasing order.
315Returns N, the number of unique values in symbols[]. */
316pub fn BrotliHistogramReindex<
317    HistogramType: SliceWrapperMut<u32> + SliceWrapper<u32> + CostAccessors + Clone,
318    Alloc: alloc::Allocator<u32> + alloc::Allocator<HistogramType>,
319>(
320    alloc: &mut Alloc,
321    out: &mut [HistogramType],
322    symbols: &mut [u32],
323    length: usize,
324) -> usize {
325    static kInvalidIndex: u32 = u32::MAX;
326    let mut new_index = alloc_or_default::<u32, _>(alloc, length);
327    let mut next_index: u32;
328    let mut tmp: <Alloc as Allocator<HistogramType>>::AllocatedMemory;
329    for i in 0usize..length {
330        new_index.slice_mut()[i] = kInvalidIndex;
331    }
332    next_index = 0u32;
333    for i in 0usize..length {
334        if new_index.slice()[(symbols[i] as usize)] == kInvalidIndex {
335            new_index.slice_mut()[(symbols[i] as usize)] = next_index;
336            next_index = next_index.wrapping_add(1);
337        }
338    }
339    tmp = alloc_or_default::<HistogramType, _>(alloc, next_index as usize);
340    next_index = 0u32;
341    for i in 0usize..length {
342        if new_index.slice()[(symbols[i] as usize)] == next_index {
343            tmp.slice_mut()[(next_index as usize)] = out[(symbols[i] as usize)].clone();
344            next_index = next_index.wrapping_add(1);
345        }
346        symbols[i] = new_index.slice()[(symbols[i] as usize)];
347    }
348    {
349        <Alloc as Allocator<u32>>::free_cell(alloc, new_index);
350    }
351    for i in 0usize..next_index as usize {
352        out[i] = tmp.slice()[i].clone();
353    }
354    {
355        <Alloc as Allocator<HistogramType>>::free_cell(alloc, tmp)
356    }
357    next_index as usize
358}
359
360pub fn BrotliClusterHistograms<
361    HistogramType: SliceWrapperMut<u32> + SliceWrapper<u32> + CostAccessors + Clone,
362    Alloc: alloc::Allocator<u32> + alloc::Allocator<HistogramPair> + alloc::Allocator<HistogramType>,
363>(
364    alloc: &mut Alloc,
365    inp: &[HistogramType],
366    in_size: usize,
367    max_histograms: usize,
368    scratch_space: &mut HistogramType::i32vec,
369    out: &mut [HistogramType],
370    out_size: &mut usize,
371    histogram_symbols: &mut [u32],
372) {
373    let mut cluster_size = alloc_or_default::<u32, Alloc>(alloc, in_size);
374    let mut clusters = alloc_or_default::<u32, Alloc>(alloc, in_size);
375    let mut num_clusters: usize = 0usize;
376    let max_input_histograms: usize = 64usize;
377    let pairs_capacity: usize = max_input_histograms
378        .wrapping_mul(max_input_histograms)
379        .wrapping_div(2);
380    let mut pairs = allocate::<HistogramPair, _>(alloc, pairs_capacity.wrapping_add(1));
381    let mut i: usize;
382    for i in 0usize..in_size {
383        cluster_size.slice_mut()[i] = 1u32;
384    }
385    for i in 0usize..in_size {
386        out[i] = inp[i].clone();
387        (out[i]).set_bit_cost(BrotliPopulationCost(&inp[i], scratch_space));
388        histogram_symbols[i] = i as u32;
389    }
390    i = 0usize;
391    while i < in_size {
392        {
393            let num_to_combine: usize = min(in_size.wrapping_sub(i), max_input_histograms);
394
395            for j in 0usize..num_to_combine {
396                clusters.slice_mut()[num_clusters.wrapping_add(j)] = i.wrapping_add(j) as u32;
397            }
398            let num_new_clusters: usize = BrotliHistogramCombine(
399                out,
400                cluster_size.slice_mut(),
401                &mut histogram_symbols[i..],
402                &mut clusters.slice_mut()[num_clusters..],
403                pairs.slice_mut(),
404                num_to_combine,
405                num_to_combine,
406                max_histograms,
407                pairs_capacity,
408                scratch_space,
409            );
410            num_clusters = num_clusters.wrapping_add(num_new_clusters);
411        }
412        i = i.wrapping_add(max_input_histograms);
413    }
414    {
415        let max_num_pairs: usize = min(
416            (64usize).wrapping_mul(num_clusters),
417            num_clusters.wrapping_div(2).wrapping_mul(num_clusters),
418        );
419        {
420            if pairs_capacity < max_num_pairs.wrapping_add(1) {
421                let mut _new_size: usize = if pairs_capacity == 0usize {
422                    max_num_pairs.wrapping_add(1)
423                } else {
424                    pairs_capacity
425                };
426                let mut new_array: <Alloc as Allocator<HistogramPair>>::AllocatedMemory;
427                while _new_size < max_num_pairs.wrapping_add(1) {
428                    _new_size = _new_size.wrapping_mul(2);
429                }
430                new_array = alloc_or_default::<HistogramPair, _>(alloc, _new_size);
431                new_array.slice_mut()[..pairs_capacity]
432                    .clone_from_slice(&pairs.slice()[..pairs_capacity]);
433                <Alloc as Allocator<HistogramPair>>::free_cell(
434                    alloc,
435                    core::mem::replace(&mut pairs, new_array),
436                );
437            }
438        }
439        num_clusters = BrotliHistogramCombine(
440            out,
441            cluster_size.slice_mut(),
442            histogram_symbols,
443            clusters.slice_mut(),
444            pairs.slice_mut(),
445            num_clusters,
446            in_size,
447            max_histograms,
448            max_num_pairs,
449            scratch_space,
450        );
451    }
452    <Alloc as Allocator<HistogramPair>>::free_cell(alloc, pairs);
453    <Alloc as Allocator<u32>>::free_cell(alloc, cluster_size);
454    BrotliHistogramRemap(
455        inp,
456        in_size,
457        clusters.slice(),
458        num_clusters,
459        scratch_space,
460        out,
461        histogram_symbols,
462    );
463    <Alloc as Allocator<u32>>::free_cell(alloc, clusters);
464    *out_size = BrotliHistogramReindex(alloc, out, histogram_symbols, in_size);
465}
466
467/////////// DONE //////////////////////////