rayon/slice/
mergesort.rs

1//! Parallel merge sort.
2//!
3//! This implementation is copied verbatim from `std::slice::sort` and then parallelized.
4//! The only difference from the original is that the sequential `mergesort` returns
5//! `MergesortResult` and leaves descending arrays intact.
6
7use crate::iter::*;
8use crate::slice::ParallelSliceMut;
9use std::mem;
10use std::mem::size_of;
11use std::ptr;
12use std::slice;
13
14unsafe fn get_and_increment<T>(ptr: &mut *mut T) -> *mut T {
15    let old = *ptr;
16    *ptr = ptr.offset(1);
17    old
18}
19
20unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T {
21    *ptr = ptr.offset(-1);
22    *ptr
23}
24
25/// When dropped, copies from `src` into `dest` a sequence of length `len`.
26struct CopyOnDrop<T> {
27    src: *mut T,
28    dest: *mut T,
29    len: usize,
30}
31
32impl<T> Drop for CopyOnDrop<T> {
33    fn drop(&mut self) {
34        unsafe {
35            ptr::copy_nonoverlapping(self.src, self.dest, self.len);
36        }
37    }
38}
39
40/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
41///
42/// This is the integral subroutine of insertion sort.
43fn insert_head<T, F>(v: &mut [T], is_less: &F)
44where
45    F: Fn(&T, &T) -> bool,
46{
47    if v.len() >= 2 && is_less(&v[1], &v[0]) {
48        unsafe {
49            // There are three ways to implement insertion here:
50            //
51            // 1. Swap adjacent elements until the first one gets to its final destination.
52            //    However, this way we copy data around more than is necessary. If elements are big
53            //    structures (costly to copy), this method will be slow.
54            //
55            // 2. Iterate until the right place for the first element is found. Then shift the
56            //    elements succeeding it to make room for it and finally place it into the
57            //    remaining hole. This is a good method.
58            //
59            // 3. Copy the first element into a temporary variable. Iterate until the right place
60            //    for it is found. As we go along, copy every traversed element into the slot
61            //    preceding it. Finally, copy data from the temporary variable into the remaining
62            //    hole. This method is very good. Benchmarks demonstrated slightly better
63            //    performance than with the 2nd method.
64            //
65            // All methods were benchmarked, and the 3rd showed best results. So we chose that one.
66            let mut tmp = NoDrop {
67                value: Some(ptr::read(&v[0])),
68            };
69
70            // Intermediate state of the insertion process is always tracked by `hole`, which
71            // serves two purposes:
72            // 1. Protects integrity of `v` from panics in `is_less`.
73            // 2. Fills the remaining hole in `v` in the end.
74            //
75            // Panic safety:
76            //
77            // If `is_less` panics at any point during the process, `hole` will get dropped and
78            // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
79            // initially held exactly once.
80            let mut hole = InsertionHole {
81                src: tmp.value.as_mut().unwrap(),
82                dest: &mut v[1],
83            };
84            ptr::copy_nonoverlapping(&v[1], &mut v[0], 1);
85
86            for i in 2..v.len() {
87                if !is_less(&v[i], tmp.value.as_ref().unwrap()) {
88                    break;
89                }
90                ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1);
91                hole.dest = &mut v[i];
92            }
93            // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
94        }
95    }
96
97    // Holds a value, but never drops it.
98    struct NoDrop<T> {
99        value: Option<T>,
100    }
101
102    impl<T> Drop for NoDrop<T> {
103        fn drop(&mut self) {
104            mem::forget(self.value.take());
105        }
106    }
107
108    // When dropped, copies from `src` into `dest`.
109    struct InsertionHole<T> {
110        src: *mut T,
111        dest: *mut T,
112    }
113
114    impl<T> Drop for InsertionHole<T> {
115        fn drop(&mut self) {
116            unsafe {
117                ptr::copy_nonoverlapping(self.src, self.dest, 1);
118            }
119        }
120    }
121}
122
123/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
124/// stores the result into `v[..]`.
125///
126/// # Safety
127///
128/// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough
129/// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type.
130unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &F)
131where
132    F: Fn(&T, &T) -> bool,
133{
134    let len = v.len();
135    let v = v.as_mut_ptr();
136    let v_mid = v.add(mid);
137    let v_end = v.add(len);
138
139    // The merge process first copies the shorter run into `buf`. Then it traces the newly copied
140    // run and the longer run forwards (or backwards), comparing their next unconsumed elements and
141    // copying the lesser (or greater) one into `v`.
142    //
143    // As soon as the shorter run is fully consumed, the process is done. If the longer run gets
144    // consumed first, then we must copy whatever is left of the shorter run into the remaining
145    // hole in `v`.
146    //
147    // Intermediate state of the process is always tracked by `hole`, which serves two purposes:
148    // 1. Protects integrity of `v` from panics in `is_less`.
149    // 2. Fills the remaining hole in `v` if the longer run gets consumed first.
150    //
151    // Panic safety:
152    //
153    // If `is_less` panics at any point during the process, `hole` will get dropped and fill the
154    // hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every
155    // object it initially held exactly once.
156    let mut hole;
157
158    if mid <= len - mid {
159        // The left run is shorter.
160        ptr::copy_nonoverlapping(v, buf, mid);
161        hole = MergeHole {
162            start: buf,
163            end: buf.add(mid),
164            dest: v,
165        };
166
167        // Initially, these pointers point to the beginnings of their arrays.
168        let left = &mut hole.start;
169        let mut right = v_mid;
170        let out = &mut hole.dest;
171
172        while *left < hole.end && right < v_end {
173            // Consume the lesser side.
174            // If equal, prefer the left run to maintain stability.
175            let to_copy = if is_less(&*right, &**left) {
176                get_and_increment(&mut right)
177            } else {
178                get_and_increment(left)
179            };
180            ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1);
181        }
182    } else {
183        // The right run is shorter.
184        ptr::copy_nonoverlapping(v_mid, buf, len - mid);
185        hole = MergeHole {
186            start: buf,
187            end: buf.add(len - mid),
188            dest: v_mid,
189        };
190
191        // Initially, these pointers point past the ends of their arrays.
192        let left = &mut hole.dest;
193        let right = &mut hole.end;
194        let mut out = v_end;
195
196        while v < *left && buf < *right {
197            // Consume the greater side.
198            // If equal, prefer the right run to maintain stability.
199            let to_copy = if is_less(&*right.offset(-1), &*left.offset(-1)) {
200                decrement_and_get(left)
201            } else {
202                decrement_and_get(right)
203            };
204            ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1);
205        }
206    }
207    // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
208    // it will now be copied into the hole in `v`.
209
210    // When dropped, copies the range `start..end` into `dest..`.
211    struct MergeHole<T> {
212        start: *mut T,
213        end: *mut T,
214        dest: *mut T,
215    }
216
217    impl<T> Drop for MergeHole<T> {
218        fn drop(&mut self) {
219            // `T` is not a zero-sized type, so it's okay to divide by its size.
220            let len = (self.end as usize - self.start as usize) / size_of::<T>();
221            unsafe {
222                ptr::copy_nonoverlapping(self.start, self.dest, len);
223            }
224        }
225    }
226}
227
228/// The result of merge sort.
229#[must_use]
230#[derive(Clone, Copy, PartialEq, Eq)]
231enum MergesortResult {
232    /// The slice has already been sorted.
233    NonDescending,
234    /// The slice has been descending and therefore it was left intact.
235    Descending,
236    /// The slice was sorted.
237    Sorted,
238}
239
240/// A sorted run that starts at index `start` and is of length `len`.
241#[derive(Clone, Copy)]
242struct Run {
243    start: usize,
244    len: usize,
245}
246
247/// Examines the stack of runs and identifies the next pair of runs to merge. More specifically,
248/// if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
249/// algorithm should continue building a new run instead, `None` is returned.
250///
251/// TimSort is infamous for its buggy implementations, as described here:
252/// http://envisage-project.eu/timsort-specification-and-verification/
253///
254/// The gist of the story is: we must enforce the invariants on the top four runs on the stack.
255/// Enforcing them on just top three is not sufficient to ensure that the invariants will still
256/// hold for *all* runs in the stack.
257///
258/// This function correctly checks invariants for the top four runs. Additionally, if the top
259/// run starts at index 0, it will always demand a merge operation until the stack is fully
260/// collapsed, in order to complete the sort.
261#[inline]
262fn collapse(runs: &[Run]) -> Option<usize> {
263    let n = runs.len();
264
265    if n >= 2
266        && (runs[n - 1].start == 0
267            || runs[n - 2].len <= runs[n - 1].len
268            || (n >= 3 && runs[n - 3].len <= runs[n - 2].len + runs[n - 1].len)
269            || (n >= 4 && runs[n - 4].len <= runs[n - 3].len + runs[n - 2].len))
270    {
271        if n >= 3 && runs[n - 3].len < runs[n - 1].len {
272            Some(n - 3)
273        } else {
274            Some(n - 2)
275        }
276    } else {
277        None
278    }
279}
280
281/// Sorts a slice using merge sort, unless it is already in descending order.
282///
283/// This function doesn't modify the slice if it is already non-descending or descending.
284/// Otherwise, it sorts the slice into non-descending order.
285///
286/// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail
287/// [here](https://svn.python.org/projects/python/trunk/Objects/listsort.txt).
288///
289/// The algorithm identifies strictly descending and non-descending subsequences, which are called
290/// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
291/// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are
292/// satisfied:
293///
294/// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len`
295/// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
296///
297/// The invariants ensure that the total running time is `O(n log n)` worst-case.
298///
299/// # Safety
300///
301/// The argument `buf` is used as a temporary buffer and must be at least as long as `v`.
302unsafe fn mergesort<T, F>(v: &mut [T], buf: *mut T, is_less: &F) -> MergesortResult
303where
304    T: Send,
305    F: Fn(&T, &T) -> bool + Sync,
306{
307    // Very short runs are extended using insertion sort to span at least this many elements.
308    const MIN_RUN: usize = 10;
309
310    let len = v.len();
311
312    // In order to identify natural runs in `v`, we traverse it backwards. That might seem like a
313    // strange decision, but consider the fact that merges more often go in the opposite direction
314    // (forwards). According to benchmarks, merging forwards is slightly faster than merging
315    // backwards. To conclude, identifying runs by traversing backwards improves performance.
316    let mut runs = vec![];
317    let mut end = len;
318    while end > 0 {
319        // Find the next natural run, and reverse it if it's strictly descending.
320        let mut start = end - 1;
321
322        if start > 0 {
323            start -= 1;
324
325            if is_less(v.get_unchecked(start + 1), v.get_unchecked(start)) {
326                while start > 0 && is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) {
327                    start -= 1;
328                }
329
330                // If this descending run covers the whole slice, return immediately.
331                if start == 0 && end == len {
332                    return MergesortResult::Descending;
333                } else {
334                    v[start..end].reverse();
335                }
336            } else {
337                while start > 0 && !is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) {
338                    start -= 1;
339                }
340
341                // If this non-descending run covers the whole slice, return immediately.
342                if end - start == len {
343                    return MergesortResult::NonDescending;
344                }
345            }
346        }
347
348        // Insert some more elements into the run if it's too short. Insertion sort is faster than
349        // merge sort on short sequences, so this significantly improves performance.
350        while start > 0 && end - start < MIN_RUN {
351            start -= 1;
352            insert_head(&mut v[start..end], &is_less);
353        }
354
355        // Push this run onto the stack.
356        runs.push(Run {
357            start,
358            len: end - start,
359        });
360        end = start;
361
362        // Merge some pairs of adjacent runs to satisfy the invariants.
363        while let Some(r) = collapse(&runs) {
364            let left = runs[r + 1];
365            let right = runs[r];
366            merge(
367                &mut v[left.start..right.start + right.len],
368                left.len,
369                buf,
370                &is_less,
371            );
372
373            runs[r] = Run {
374                start: left.start,
375                len: left.len + right.len,
376            };
377            runs.remove(r + 1);
378        }
379    }
380
381    // Finally, exactly one run must remain in the stack.
382    debug_assert!(runs.len() == 1 && runs[0].start == 0 && runs[0].len == len);
383
384    // The original order of the slice was neither non-descending nor descending.
385    MergesortResult::Sorted
386}
387
388////////////////////////////////////////////////////////////////////////////
389// Everything above this line is copied from `std::slice::sort` (with very minor tweaks).
390// Everything below this line is parallelization.
391////////////////////////////////////////////////////////////////////////////
392
393/// Splits two sorted slices so that they can be merged in parallel.
394///
395/// Returns two indices `(a, b)` so that slices `left[..a]` and `right[..b]` come before
396/// `left[a..]` and `right[b..]`.
397fn split_for_merge<T, F>(left: &[T], right: &[T], is_less: &F) -> (usize, usize)
398where
399    F: Fn(&T, &T) -> bool,
400{
401    let left_len = left.len();
402    let right_len = right.len();
403
404    if left_len >= right_len {
405        let left_mid = left_len / 2;
406
407        // Find the first element in `right` that is greater than or equal to `left[left_mid]`.
408        let mut a = 0;
409        let mut b = right_len;
410        while a < b {
411            let m = a + (b - a) / 2;
412            if is_less(&right[m], &left[left_mid]) {
413                a = m + 1;
414            } else {
415                b = m;
416            }
417        }
418
419        (left_mid, a)
420    } else {
421        let right_mid = right_len / 2;
422
423        // Find the first element in `left` that is greater than `right[right_mid]`.
424        let mut a = 0;
425        let mut b = left_len;
426        while a < b {
427            let m = a + (b - a) / 2;
428            if is_less(&right[right_mid], &left[m]) {
429                b = m;
430            } else {
431                a = m + 1;
432            }
433        }
434
435        (a, right_mid)
436    }
437}
438
439/// Merges slices `left` and `right` in parallel and stores the result into `dest`.
440///
441/// # Safety
442///
443/// The `dest` pointer must have enough space to store the result.
444///
445/// Even if `is_less` panics at any point during the merge process, this function will fully copy
446/// all elements from `left` and `right` into `dest` (not necessarily in sorted order).
447unsafe fn par_merge<T, F>(left: &mut [T], right: &mut [T], dest: *mut T, is_less: &F)
448where
449    T: Send,
450    F: Fn(&T, &T) -> bool + Sync,
451{
452    // Slices whose lengths sum up to this value are merged sequentially. This number is slightly
453    // larger than `CHUNK_LENGTH`, and the reason is that merging is faster than merge sorting, so
454    // merging needs a bit coarser granularity in order to hide the overhead of Rayon's task
455    // scheduling.
456    const MAX_SEQUENTIAL: usize = 5000;
457
458    let left_len = left.len();
459    let right_len = right.len();
460
461    // Intermediate state of the merge process, which serves two purposes:
462    // 1. Protects integrity of `dest` from panics in `is_less`.
463    // 2. Copies the remaining elements as soon as one of the two sides is exhausted.
464    //
465    // Panic safety:
466    //
467    // If `is_less` panics at any point during the merge process, `s` will get dropped and copy the
468    // remaining parts of `left` and `right` into `dest`.
469    let mut s = State {
470        left_start: left.as_mut_ptr(),
471        left_end: left.as_mut_ptr().add(left_len),
472        right_start: right.as_mut_ptr(),
473        right_end: right.as_mut_ptr().add(right_len),
474        dest,
475    };
476
477    if left_len == 0 || right_len == 0 || left_len + right_len < MAX_SEQUENTIAL {
478        while s.left_start < s.left_end && s.right_start < s.right_end {
479            // Consume the lesser side.
480            // If equal, prefer the left run to maintain stability.
481            let to_copy = if is_less(&*s.right_start, &*s.left_start) {
482                get_and_increment(&mut s.right_start)
483            } else {
484                get_and_increment(&mut s.left_start)
485            };
486            ptr::copy_nonoverlapping(to_copy, get_and_increment(&mut s.dest), 1);
487        }
488    } else {
489        // Function `split_for_merge` might panic. If that happens, `s` will get destructed and copy
490        // the whole `left` and `right` into `dest`.
491        let (left_mid, right_mid) = split_for_merge(left, right, is_less);
492        let (left_l, left_r) = left.split_at_mut(left_mid);
493        let (right_l, right_r) = right.split_at_mut(right_mid);
494
495        // Prevent the destructor of `s` from running. Rayon will ensure that both calls to
496        // `par_merge` happen. If one of the two calls panics, they will ensure that elements still
497        // get copied into `dest_left` and `dest_right``.
498        mem::forget(s);
499
500        // Convert the pointers to `usize` because `*mut T` is not `Send`.
501        let dest_l = dest as usize;
502        let dest_r = dest.add(left_l.len() + right_l.len()) as usize;
503        rayon_core::join(
504            || par_merge(left_l, right_l, dest_l as *mut T, is_less),
505            || par_merge(left_r, right_r, dest_r as *mut T, is_less),
506        );
507    }
508    // Finally, `s` gets dropped if we used sequential merge, thus copying the remaining elements
509    // all at once.
510
511    // When dropped, copies arrays `left_start..left_end` and `right_start..right_end` into `dest`,
512    // in that order.
513    struct State<T> {
514        left_start: *mut T,
515        left_end: *mut T,
516        right_start: *mut T,
517        right_end: *mut T,
518        dest: *mut T,
519    }
520
521    impl<T> Drop for State<T> {
522        fn drop(&mut self) {
523            let size = size_of::<T>();
524            let left_len = (self.left_end as usize - self.left_start as usize) / size;
525            let right_len = (self.right_end as usize - self.right_start as usize) / size;
526
527            // Copy array `left`, followed by `right`.
528            unsafe {
529                ptr::copy_nonoverlapping(self.left_start, self.dest, left_len);
530                self.dest = self.dest.add(left_len);
531                ptr::copy_nonoverlapping(self.right_start, self.dest, right_len);
532            }
533        }
534    }
535}
536
537/// Recursively merges pre-sorted chunks inside `v`.
538///
539/// Chunks of `v` are stored in `chunks` as intervals (inclusive left and exclusive right bound).
540/// Argument `buf` is an auxiliary buffer that will be used during the procedure.
541/// If `into_buf` is true, the result will be stored into `buf`, otherwise it will be in `v`.
542///
543/// # Safety
544///
545/// The number of chunks must be positive and they must be adjacent: the right bound of each chunk
546/// must equal the left bound of the following chunk.
547///
548/// The buffer must be at least as long as `v`.
549unsafe fn recurse<T, F>(
550    v: *mut T,
551    buf: *mut T,
552    chunks: &[(usize, usize)],
553    into_buf: bool,
554    is_less: &F,
555) where
556    T: Send,
557    F: Fn(&T, &T) -> bool + Sync,
558{
559    let len = chunks.len();
560    debug_assert!(len > 0);
561
562    // Base case of the algorithm.
563    // If only one chunk is remaining, there's no more work to split and merge.
564    if len == 1 {
565        if into_buf {
566            // Copy the chunk from `v` into `buf`.
567            let (start, end) = chunks[0];
568            let src = v.add(start);
569            let dest = buf.add(start);
570            ptr::copy_nonoverlapping(src, dest, end - start);
571        }
572        return;
573    }
574
575    // Split the chunks into two halves.
576    let (start, _) = chunks[0];
577    let (mid, _) = chunks[len / 2];
578    let (_, end) = chunks[len - 1];
579    let (left, right) = chunks.split_at(len / 2);
580
581    // After recursive calls finish we'll have to merge chunks `(start, mid)` and `(mid, end)` from
582    // `src` into `dest`. If the current invocation has to store the result into `buf`, we'll
583    // merge chunks from `v` into `buf`, and viceversa.
584    //
585    // Recursive calls flip `into_buf` at each level of recursion. More concretely, `par_merge`
586    // merges chunks from `buf` into `v` at the first level, from `v` into `buf` at the second
587    // level etc.
588    let (src, dest) = if into_buf { (v, buf) } else { (buf, v) };
589
590    // Panic safety:
591    //
592    // If `is_less` panics at any point during the recursive calls, the destructor of `guard` will
593    // be executed, thus copying everything from `src` into `dest`. This way we ensure that all
594    // chunks are in fact copied into `dest`, even if the merge process doesn't finish.
595    let guard = CopyOnDrop {
596        src: src.add(start),
597        dest: dest.add(start),
598        len: end - start,
599    };
600
601    // Convert the pointers to `usize` because `*mut T` is not `Send`.
602    let v = v as usize;
603    let buf = buf as usize;
604    rayon_core::join(
605        || recurse(v as *mut T, buf as *mut T, left, !into_buf, is_less),
606        || recurse(v as *mut T, buf as *mut T, right, !into_buf, is_less),
607    );
608
609    // Everything went all right - recursive calls didn't panic.
610    // Forget the guard in order to prevent its destructor from running.
611    mem::forget(guard);
612
613    // Merge chunks `(start, mid)` and `(mid, end)` from `src` into `dest`.
614    let src_left = slice::from_raw_parts_mut(src.add(start), mid - start);
615    let src_right = slice::from_raw_parts_mut(src.add(mid), end - mid);
616    par_merge(src_left, src_right, dest.add(start), is_less);
617}
618
619/// Sorts `v` using merge sort in parallel.
620///
621/// The algorithm is stable, allocates memory, and `O(n log n)` worst-case.
622/// The allocated temporary buffer is of the same length as is `v`.
623pub(super) fn par_mergesort<T, F>(v: &mut [T], is_less: F)
624where
625    T: Send,
626    F: Fn(&T, &T) -> bool + Sync,
627{
628    // Slices of up to this length get sorted using insertion sort in order to avoid the cost of
629    // buffer allocation.
630    const MAX_INSERTION: usize = 20;
631    // The length of initial chunks. This number is as small as possible but so that the overhead
632    // of Rayon's task scheduling is still negligible.
633    const CHUNK_LENGTH: usize = 2000;
634
635    // Sorting has no meaningful behavior on zero-sized types.
636    if size_of::<T>() == 0 {
637        return;
638    }
639
640    let len = v.len();
641
642    // Short slices get sorted in-place via insertion sort to avoid allocations.
643    if len <= MAX_INSERTION {
644        if len >= 2 {
645            for i in (0..len - 1).rev() {
646                insert_head(&mut v[i..], &is_less);
647            }
648        }
649        return;
650    }
651
652    // Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it
653    // shallow copies of the contents of `v` without risking the dtors running on copies if
654    // `is_less` panics.
655    let mut buf = Vec::<T>::with_capacity(len);
656    let buf = buf.as_mut_ptr();
657
658    // If the slice is not longer than one chunk would be, do sequential merge sort and return.
659    if len <= CHUNK_LENGTH {
660        let res = unsafe { mergesort(v, buf, &is_less) };
661        if res == MergesortResult::Descending {
662            v.reverse();
663        }
664        return;
665    }
666
667    // Split the slice into chunks and merge sort them in parallel.
668    // However, descending chunks will not be sorted - they will be simply left intact.
669    let mut iter = {
670        // Convert the pointer to `usize` because `*mut T` is not `Send`.
671        let buf = buf as usize;
672
673        v.par_chunks_mut(CHUNK_LENGTH)
674            .with_max_len(1)
675            .enumerate()
676            .map(|(i, chunk)| {
677                let l = CHUNK_LENGTH * i;
678                let r = l + chunk.len();
679                unsafe {
680                    let buf = (buf as *mut T).add(l);
681                    (l, r, mergesort(chunk, buf, &is_less))
682                }
683            })
684            .collect::<Vec<_>>()
685            .into_iter()
686            .peekable()
687    };
688
689    // Now attempt to concatenate adjacent chunks that were left intact.
690    let mut chunks = Vec::with_capacity(iter.len());
691
692    while let Some((a, mut b, res)) = iter.next() {
693        // If this chunk was not modified by the sort procedure...
694        if res != MergesortResult::Sorted {
695            while let Some(&(x, y, r)) = iter.peek() {
696                // If the following chunk is of the same type and can be concatenated...
697                if r == res && (r == MergesortResult::Descending) == is_less(&v[x], &v[x - 1]) {
698                    // Concatenate them.
699                    b = y;
700                    iter.next();
701                } else {
702                    break;
703                }
704            }
705        }
706
707        // Descending chunks must be reversed.
708        if res == MergesortResult::Descending {
709            v[a..b].reverse();
710        }
711
712        chunks.push((a, b));
713    }
714
715    // All chunks are properly sorted.
716    // Now we just have to merge them together.
717    unsafe {
718        recurse(v.as_mut_ptr(), buf, &chunks, false, &is_less);
719    }
720}
721
722#[cfg(test)]
723mod tests {
724    use super::split_for_merge;
725    use rand::distributions::Uniform;
726    use rand::{thread_rng, Rng};
727
728    #[test]
729    fn test_split_for_merge() {
730        fn check(left: &[u32], right: &[u32]) {
731            let (l, r) = split_for_merge(left, right, &|&a, &b| a < b);
732            assert!(left[..l]
733                .iter()
734                .all(|&x| right[r..].iter().all(|&y| x <= y)));
735            assert!(right[..r].iter().all(|&x| left[l..].iter().all(|&y| x < y)));
736        }
737
738        check(&[1, 2, 2, 2, 2, 3], &[1, 2, 2, 2, 2, 3]);
739        check(&[1, 2, 2, 2, 2, 3], &[]);
740        check(&[], &[1, 2, 2, 2, 2, 3]);
741
742        let ref mut rng = thread_rng();
743
744        for _ in 0..100 {
745            let limit: u32 = rng.gen_range(1..21);
746            let left_len: usize = rng.gen_range(0..20);
747            let right_len: usize = rng.gen_range(0..20);
748
749            let mut left = rng
750                .sample_iter(&Uniform::new(0, limit))
751                .take(left_len)
752                .collect::<Vec<_>>();
753            let mut right = rng
754                .sample_iter(&Uniform::new(0, limit))
755                .take(right_len)
756                .collect::<Vec<_>>();
757
758            left.sort();
759            right.sort();
760            check(&left, &right);
761        }
762    }
763}