roaring/bitmap/
multiops.rs

1use core::{
2    cmp::Reverse,
3    convert::Infallible,
4    mem,
5    ops::{BitOrAssign, BitXorAssign},
6};
7
8use alloc::borrow::Cow;
9
10use crate::{MultiOps, RoaringBitmap};
11
12use super::{container::Container, store::Store};
13
14#[cfg(not(feature = "std"))]
15use alloc::vec::Vec;
16
17/// When collecting bitmaps for optimizing the computation. If we don't know how many
18// elements are in the iterator we collect 10 elements.
19const BASE_COLLECT: usize = 10;
20
21/// If an iterator contain 50 elements or less we collect everything because it'll be so
22/// much faster without impacting the memory usage too much (in most cases).
23const MAX_COLLECT: usize = 50;
24
25impl<I> MultiOps<RoaringBitmap> for I
26where
27    I: IntoIterator<Item = RoaringBitmap>,
28{
29    type Output = RoaringBitmap;
30
31    fn union(self) -> Self::Output {
32        try_multi_or_owned(self.into_iter().map(Ok::<_, Infallible>)).unwrap()
33    }
34
35    fn intersection(self) -> Self::Output {
36        try_multi_and_owned(self.into_iter().map(Ok::<_, Infallible>)).unwrap()
37    }
38
39    fn difference(self) -> Self::Output {
40        try_multi_sub_owned(self.into_iter().map(Ok::<_, Infallible>)).unwrap()
41    }
42
43    fn symmetric_difference(self) -> Self::Output {
44        try_multi_xor_owned(self.into_iter().map(Ok::<_, Infallible>)).unwrap()
45    }
46}
47
48impl<I, E> MultiOps<Result<RoaringBitmap, E>> for I
49where
50    I: IntoIterator<Item = Result<RoaringBitmap, E>>,
51{
52    type Output = Result<RoaringBitmap, E>;
53
54    fn union(self) -> Self::Output {
55        try_multi_or_owned(self)
56    }
57
58    fn intersection(self) -> Self::Output {
59        try_multi_and_owned(self)
60    }
61
62    fn difference(self) -> Self::Output {
63        try_multi_sub_owned(self)
64    }
65
66    fn symmetric_difference(self) -> Self::Output {
67        try_multi_xor_owned(self)
68    }
69}
70
71impl<'a, I> MultiOps<&'a RoaringBitmap> for I
72where
73    I: IntoIterator<Item = &'a RoaringBitmap>,
74{
75    type Output = RoaringBitmap;
76
77    fn union(self) -> Self::Output {
78        try_multi_or_ref(self.into_iter().map(Ok::<_, Infallible>)).unwrap()
79    }
80
81    fn intersection(self) -> Self::Output {
82        try_multi_and_ref(self.into_iter().map(Ok::<_, Infallible>)).unwrap()
83    }
84
85    fn difference(self) -> Self::Output {
86        try_multi_sub_ref(self.into_iter().map(Ok::<_, Infallible>)).unwrap()
87    }
88
89    fn symmetric_difference(self) -> Self::Output {
90        try_multi_xor_ref(self.into_iter().map(Ok::<_, Infallible>)).unwrap()
91    }
92}
93
94impl<'a, I, E: 'a> MultiOps<Result<&'a RoaringBitmap, E>> for I
95where
96    I: IntoIterator<Item = Result<&'a RoaringBitmap, E>>,
97{
98    type Output = Result<RoaringBitmap, E>;
99
100    fn union(self) -> Self::Output {
101        try_multi_or_ref(self)
102    }
103
104    fn intersection(self) -> Self::Output {
105        try_multi_and_ref(self)
106    }
107
108    fn difference(self) -> Self::Output {
109        try_multi_sub_ref(self)
110    }
111
112    fn symmetric_difference(self) -> Self::Output {
113        try_multi_xor_ref(self)
114    }
115}
116
117#[inline]
118fn try_multi_and_owned<E>(
119    bitmaps: impl IntoIterator<Item = Result<RoaringBitmap, E>>,
120) -> Result<RoaringBitmap, E> {
121    let mut iter = bitmaps.into_iter();
122
123    // We're going to take a bunch of elements at the start of the iterator and sort
124    // them to reduce the size of our bitmap faster.
125    let mut start = collect_starting_elements(iter.by_ref())?;
126    start.sort_unstable_by_key(|bitmap| bitmap.containers.len());
127    let mut start = start.into_iter();
128
129    if let Some(mut lhs) = start.next() {
130        for rhs in start.map(Ok).chain(iter) {
131            if lhs.is_empty() {
132                return Ok(lhs);
133            }
134            lhs &= rhs?;
135        }
136
137        Ok(lhs)
138    } else {
139        Ok(RoaringBitmap::new())
140    }
141}
142
143#[inline]
144fn try_multi_and_ref<'a, E>(
145    bitmaps: impl IntoIterator<Item = Result<&'a RoaringBitmap, E>>,
146) -> Result<RoaringBitmap, E> {
147    let mut iter = bitmaps.into_iter();
148
149    // We're going to take a bunch of elements at the start of the iterator and sort
150    // them to reduce the size of our bitmap faster.
151    let mut start = collect_starting_elements(iter.by_ref())?;
152    start.sort_unstable_by_key(|bitmap| bitmap.containers.len());
153    let mut start = start.into_iter();
154
155    if let Some(mut lhs) = start.next().cloned() {
156        for rhs in start.map(Ok).chain(iter) {
157            if lhs.is_empty() {
158                return Ok(lhs);
159            }
160            lhs &= rhs?;
161        }
162        Ok(lhs)
163    } else {
164        Ok(RoaringBitmap::new())
165    }
166}
167
168#[inline]
169fn try_multi_sub_owned<E>(
170    bitmaps: impl IntoIterator<Item = Result<RoaringBitmap, E>>,
171) -> Result<RoaringBitmap, E> {
172    let mut iter = bitmaps.into_iter();
173    match iter.next().transpose()? {
174        Some(mut lhs) => {
175            for rhs in iter {
176                if lhs.is_empty() {
177                    return Ok(lhs);
178                }
179                lhs -= rhs?;
180            }
181            Ok(lhs)
182        }
183        None => Ok(RoaringBitmap::default()),
184    }
185}
186
187#[inline]
188fn try_multi_sub_ref<'a, E>(
189    bitmaps: impl IntoIterator<Item = Result<&'a RoaringBitmap, E>>,
190) -> Result<RoaringBitmap, E> {
191    let mut iter = bitmaps.into_iter();
192    match iter.next().transpose()?.cloned() {
193        Some(mut lhs) => {
194            for rhs in iter {
195                if lhs.is_empty() {
196                    return Ok(lhs);
197                }
198                lhs -= rhs?;
199            }
200
201            Ok(lhs)
202        }
203        None => Ok(RoaringBitmap::default()),
204    }
205}
206
207#[inline]
208fn try_multi_or_owned<E>(
209    bitmaps: impl IntoIterator<Item = Result<RoaringBitmap, E>>,
210) -> Result<RoaringBitmap, E> {
211    let mut iter = bitmaps.into_iter();
212
213    // We're going to take a bunch of elements at the start of the iterator and
214    // move the biggest one first to grow faster.
215    let mut start = collect_starting_elements(iter.by_ref())?;
216    start.sort_unstable_by_key(|bitmap| Reverse(bitmap.containers.len()));
217    let start_size = start.len();
218    let mut start = start.into_iter();
219
220    let mut containers = if let Some(c) = start.next() {
221        if c.is_empty() {
222            // everything must be empty if the max is empty
223            start.by_ref().nth(start_size);
224        }
225        c.containers
226    } else {
227        return Ok(RoaringBitmap::new());
228    };
229
230    for bitmap in start.map(Ok).chain(iter) {
231        merge_container_owned(&mut containers, bitmap?.containers, BitOrAssign::bitor_assign);
232    }
233
234    containers.retain_mut(|container| {
235        if !container.is_empty() {
236            container.ensure_correct_store();
237            true
238        } else {
239            false
240        }
241    });
242
243    Ok(RoaringBitmap { containers })
244}
245
246#[inline]
247fn try_multi_xor_owned<E>(
248    bitmaps: impl IntoIterator<Item = Result<RoaringBitmap, E>>,
249) -> Result<RoaringBitmap, E> {
250    let mut iter = bitmaps.into_iter();
251    let mut containers = match iter.next().transpose()? {
252        None => Vec::new(),
253        Some(v) => v.containers,
254    };
255
256    for bitmap in iter {
257        merge_container_owned(&mut containers, bitmap?.containers, BitXorAssign::bitxor_assign);
258    }
259
260    containers.retain_mut(|container| {
261        if !container.is_empty() {
262            container.ensure_correct_store();
263            true
264        } else {
265            false
266        }
267    });
268
269    Ok(RoaringBitmap { containers })
270}
271
272fn merge_container_owned(
273    lhs: &mut Vec<Container>,
274    rhs: Vec<Container>,
275    op: impl Fn(&mut Store, Store),
276) {
277    for mut rhs in rhs {
278        match lhs.binary_search_by_key(&rhs.key, |c| c.key) {
279            Err(loc) => lhs.insert(loc, rhs),
280            Ok(loc) => {
281                let lhs = &mut lhs[loc];
282                match (&lhs.store, &rhs.store) {
283                    (Store::Array(..), Store::Array(..)) => lhs.store = lhs.store.to_bitmap(),
284                    (Store::Array(..), Store::Bitmap(..)) => mem::swap(lhs, &mut rhs),
285                    _ => (),
286                };
287                op(&mut lhs.store, rhs.store);
288            }
289        }
290    }
291}
292
293#[inline]
294fn try_multi_or_ref<'a, E: 'a>(
295    bitmaps: impl IntoIterator<Item = Result<&'a RoaringBitmap, E>>,
296) -> Result<RoaringBitmap, E> {
297    // This algorithm operates on bitmaps. It must deal with arrays for which there are not (yet)
298    // any others with the same key.
299    //
300    //   1. Eager cloning would create useless intermediate values that might become bitmaps
301    //   2. Eager promoting forces disjoint containers to converted back to arrays at the end
302    //
303    // This strategy uses COW to lazily promote arrays to bitmaps as they are operated on.
304    // More memory efficient, negligible wall time difference benchmarks
305
306    // Phase 1. Borrow all the containers from the first element.
307    let mut iter = bitmaps.into_iter();
308    let mut start = collect_starting_elements(iter.by_ref())?;
309    let start_size = start.len();
310
311    start.sort_unstable_by_key(|bitmap| Reverse(bitmap.containers.len()));
312    let mut start = start.into_iter();
313    let mut containers = match start.next() {
314        Some(c) => {
315            let c: Vec<Cow<Container>> = c.containers.iter().map(Cow::Borrowed).collect();
316            if c.is_empty() {
317                // everything must be empty if the max is empty
318                start.by_ref().nth(start_size);
319            }
320            c
321        }
322        None => {
323            return Ok(RoaringBitmap::new());
324        }
325    };
326
327    // Phase 2: Operate on the remaining containers
328    for bitmap in start.map(Ok).chain(iter) {
329        merge_container_ref(&mut containers, &bitmap?.containers, |a, b| *a |= b);
330    }
331
332    // Phase 3: Clean up
333    let containers: Vec<_> = containers
334        .into_iter()
335        .filter(|container| !container.is_empty())
336        .map(|c| {
337            // Any borrowed bitmaps or arrays left over get cloned here
338            let mut container = c.into_owned();
339            container.ensure_correct_store();
340            container
341        })
342        .collect();
343
344    Ok(RoaringBitmap { containers })
345}
346
347#[inline]
348fn try_multi_xor_ref<'a, E: 'a>(
349    bitmaps: impl IntoIterator<Item = Result<&'a RoaringBitmap, E>>,
350) -> Result<RoaringBitmap, E> {
351    //
352    // This algorithm operates on bitmaps. It must deal with arrays for which there are not (yet)
353    // any others with the same key.
354    //
355    //   1. Eager cloning would create useless intermediate values that might become bitmaps
356    //   2. Eager promoting forces disjoint containers to converted back to arrays at the end
357    //
358    // This strategy uses COW to lazily promote arrays to bitmaps as they are operated on.
359    // More memory efficient, negligible wall time difference benchmarks
360
361    // Phase 1. Borrow all the containers from the first element.
362    let mut iter = bitmaps.into_iter();
363    let mut containers: Vec<Cow<Container>> = match iter.next().transpose()? {
364        None => Vec::new(),
365        Some(v) => v.containers.iter().map(Cow::Borrowed).collect(),
366    };
367
368    // Phase 2: Operate on the remaining containers
369    for bitmap in iter {
370        merge_container_ref(&mut containers, &bitmap?.containers, |a, b| *a ^= b);
371    }
372
373    // Phase 3: Clean up
374    let containers: Vec<_> = containers
375        .into_iter()
376        .filter(|container| !container.is_empty())
377        .map(|c| {
378            // Any borrowed bitmaps or arrays left over get cloned here
379            let mut container = c.into_owned();
380            container.ensure_correct_store();
381            container
382        })
383        .collect();
384
385    Ok(RoaringBitmap { containers })
386}
387
388fn merge_container_ref<'a>(
389    containers: &mut Vec<Cow<'a, Container>>,
390    rhs: &'a [Container],
391    op: impl Fn(&mut Store, &Store),
392) {
393    for rhs in rhs {
394        match containers.binary_search_by_key(&rhs.key, |c| c.key) {
395            Err(loc) => {
396                // A container not currently in containers. Borrow it.
397                containers.insert(loc, Cow::Borrowed(rhs))
398            }
399            Ok(loc) => {
400                // A container that is in containers. Operate on it.
401                let lhs = &mut containers[loc];
402                match (&lhs.store, &rhs.store) {
403                    (Store::Array(..), Store::Array(..)) => {
404                        // We had borrowed an array. Without cloning it, create a new bitmap
405                        // Add all the elements to the new bitmap
406                        let mut store = lhs.store.to_bitmap();
407                        op(&mut store, &rhs.store);
408                        *lhs = Cow::Owned(Container { key: lhs.key, store });
409                    }
410                    (Store::Array(..), Store::Bitmap(..)) => {
411                        // We had borrowed an array. Copy the rhs bitmap, add lhs to it
412                        let mut store = rhs.store.clone();
413                        op(&mut store, &lhs.store);
414                        *lhs = Cow::Owned(Container { key: lhs.key, store });
415                    }
416                    (Store::Bitmap(..), _) => {
417                        // This might be a owned or borrowed bitmap.
418                        // If it was borrowed it will clone-on-write
419                        op(&mut lhs.to_mut().store, &rhs.store);
420                    }
421                    (Store::Run(..), Store::Run(..)) => {
422                        op(&mut lhs.to_mut().store, &rhs.store);
423                    }
424                    (Store::Run(..), _) => {
425                        op(&mut lhs.to_mut().store, &rhs.store);
426                    }
427                    (Store::Array(..), Store::Run(..)) => {
428                        op(&mut lhs.to_mut().store, &rhs.store);
429                    }
430                };
431            }
432        }
433    }
434}
435
436#[inline]
437fn collect_starting_elements<I, El, Er>(iter: I) -> Result<Vec<El>, Er>
438where
439    I: IntoIterator<Item = Result<El, Er>>,
440{
441    let iter = iter.into_iter();
442    let mut to_collect = iter.size_hint().1.unwrap_or(BASE_COLLECT);
443    if to_collect > MAX_COLLECT {
444        to_collect = BASE_COLLECT;
445    }
446
447    let mut ret = Vec::with_capacity(to_collect);
448    for el in iter.take(to_collect) {
449        ret.push(el?);
450    }
451
452    Ok(ret)
453}