arrow_ord/
sort.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines sort kernel for `ArrayRef`
19
20use crate::ord::{make_comparator, DynComparator};
21use arrow_array::builder::BufferBuilder;
22use arrow_array::cast::*;
23use arrow_array::types::*;
24use arrow_array::*;
25use arrow_buffer::ArrowNativeType;
26use arrow_buffer::BooleanBufferBuilder;
27use arrow_data::ArrayDataBuilder;
28use arrow_schema::{ArrowError, DataType};
29use arrow_select::take::take;
30use std::cmp::Ordering;
31use std::sync::Arc;
32
33use crate::rank::rank;
34pub use arrow_schema::SortOptions;
35
36/// Sort the `ArrayRef` using `SortOptions`.
37///
38/// Performs a sort on values and indices. Nulls are ordered according
39/// to the `nulls_first` flag in `options`.  Floats are sorted using
40/// IEEE 754 totalOrder
41///
42/// Returns an `ArrowError::ComputeError(String)` if the array type is
43/// either unsupported by `sort_to_indices` or `take`.
44///
45/// Note: this is an unstable_sort, meaning it may not preserve the
46/// order of equal elements.
47///
48/// # Example
49/// ```rust
50/// # use std::sync::Arc;
51/// # use arrow_array::Int32Array;
52/// # use arrow_ord::sort::sort;
53/// let array = Int32Array::from(vec![5, 4, 3, 2, 1]);
54/// let sorted_array = sort(&array, None).unwrap();
55/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![1, 2, 3, 4, 5]));
56/// ```
57pub fn sort(values: &dyn Array, options: Option<SortOptions>) -> Result<ArrayRef, ArrowError> {
58    downcast_primitive_array!(
59        values => sort_native_type(values, options),
60        DataType::RunEndEncoded(_, _) => sort_run(values, options, None),
61        _ => {
62            let indices = sort_to_indices(values, options, None)?;
63            take(values, &indices, None)
64        }
65    )
66}
67
68fn sort_native_type<T>(
69    primitive_values: &PrimitiveArray<T>,
70    options: Option<SortOptions>,
71) -> Result<ArrayRef, ArrowError>
72where
73    T: ArrowPrimitiveType,
74{
75    let sort_options = options.unwrap_or_default();
76
77    let mut mutable_buffer = vec![T::default_value(); primitive_values.len()];
78    let mutable_slice = &mut mutable_buffer;
79
80    let input_values = primitive_values.values().as_ref();
81
82    let nulls_count = primitive_values.null_count();
83    let valid_count = primitive_values.len() - nulls_count;
84
85    let null_bit_buffer = match nulls_count > 0 {
86        true => {
87            let mut validity_buffer = BooleanBufferBuilder::new(primitive_values.len());
88            if sort_options.nulls_first {
89                validity_buffer.append_n(nulls_count, false);
90                validity_buffer.append_n(valid_count, true);
91            } else {
92                validity_buffer.append_n(valid_count, true);
93                validity_buffer.append_n(nulls_count, false);
94            }
95            Some(validity_buffer.finish().into())
96        }
97        false => None,
98    };
99
100    if let Some(nulls) = primitive_values.nulls().filter(|n| n.null_count() > 0) {
101        let values_slice = match sort_options.nulls_first {
102            true => &mut mutable_slice[nulls_count..],
103            false => &mut mutable_slice[..valid_count],
104        };
105
106        for (write_index, index) in nulls.valid_indices().enumerate() {
107            values_slice[write_index] = primitive_values.value(index);
108        }
109
110        values_slice.sort_unstable_by(|a, b| a.compare(*b));
111        if sort_options.descending {
112            values_slice.reverse();
113        }
114    } else {
115        mutable_slice.copy_from_slice(input_values);
116        mutable_slice.sort_unstable_by(|a, b| a.compare(*b));
117        if sort_options.descending {
118            mutable_slice.reverse();
119        }
120    }
121
122    Ok(Arc::new(
123        PrimitiveArray::<T>::new(mutable_buffer.into(), null_bit_buffer)
124            .with_data_type(primitive_values.data_type().clone()),
125    ))
126}
127
128/// Sort the `ArrayRef` partially.
129///
130/// If `limit` is specified, the resulting array will contain only
131/// first `limit` in the sort order. Any data data after the limit
132/// will be discarded.
133///
134/// Note: this is an unstable_sort, meaning it may not preserve the
135/// order of equal elements.
136///
137/// # Example
138/// ```rust
139/// # use std::sync::Arc;
140/// # use arrow_array::Int32Array;
141/// # use arrow_ord::sort::{sort_limit, SortOptions};
142/// let array = Int32Array::from(vec![5, 4, 3, 2, 1]);
143///
144/// // Find the the top 2 items
145/// let sorted_array = sort_limit(&array, None, Some(2)).unwrap();
146/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![1, 2]));
147///
148/// // Find the bottom top 2 items
149/// let options = Some(SortOptions {
150///                  descending: true,
151///                  ..Default::default()
152///               });
153/// let sorted_array = sort_limit(&array, options, Some(2)).unwrap();
154/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![5, 4]));
155/// ```
156pub fn sort_limit(
157    values: &dyn Array,
158    options: Option<SortOptions>,
159    limit: Option<usize>,
160) -> Result<ArrayRef, ArrowError> {
161    if let DataType::RunEndEncoded(_, _) = values.data_type() {
162        return sort_run(values, options, limit);
163    }
164    let indices = sort_to_indices(values, options, limit)?;
165    take(values, &indices, None)
166}
167
168/// we can only do this if the T is primitive
169#[inline]
170fn sort_unstable_by<T, F>(array: &mut [T], limit: usize, cmp: F)
171where
172    F: FnMut(&T, &T) -> Ordering,
173{
174    if array.len() == limit {
175        array.sort_unstable_by(cmp);
176    } else {
177        partial_sort(array, limit, cmp);
178    }
179}
180
181// partition indices into valid and null indices
182fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
183    match array.null_count() {
184        // faster path
185        0 => ((0..(array.len() as u32)).collect(), vec![]),
186        _ => {
187            let indices = 0..(array.len() as u32);
188            indices.partition(|index| array.is_valid(*index as usize))
189        }
190    }
191}
192
193/// Whether `arrow_ord::rank` can rank an array of given data type.
194fn can_rank(data_type: &DataType) -> bool {
195    data_type.is_primitive()
196        || matches!(
197            data_type,
198            DataType::Utf8 | DataType::LargeUtf8 | DataType::Binary | DataType::LargeBinary
199        )
200}
201
202/// Whether `sort_to_indices` can sort an array of given data type.
203fn can_sort_to_indices(data_type: &DataType) -> bool {
204    data_type.is_primitive()
205        || matches!(
206            data_type,
207            DataType::Boolean
208                | DataType::Utf8
209                | DataType::LargeUtf8
210                | DataType::Utf8View
211                | DataType::Binary
212                | DataType::LargeBinary
213                | DataType::BinaryView
214                | DataType::FixedSizeBinary(_)
215        )
216        || match data_type {
217            DataType::List(f) if can_rank(f.data_type()) => true,
218            DataType::LargeList(f) if can_rank(f.data_type()) => true,
219            DataType::FixedSizeList(f, _) if can_rank(f.data_type()) => true,
220            DataType::Dictionary(_, values) if can_rank(values.as_ref()) => true,
221            DataType::RunEndEncoded(_, f) if can_sort_to_indices(f.data_type()) => true,
222            _ => false,
223        }
224}
225
226/// Sort elements from `ArrayRef` into an unsigned integer (`UInt32Array`) of indices.
227/// Floats are sorted using IEEE 754 totalOrder.  `limit` is an option for [partial_sort].
228pub fn sort_to_indices(
229    array: &dyn Array,
230    options: Option<SortOptions>,
231    limit: Option<usize>,
232) -> Result<UInt32Array, ArrowError> {
233    let options = options.unwrap_or_default();
234
235    let (v, n) = partition_validity(array);
236
237    Ok(downcast_primitive_array! {
238        array => sort_primitive(array, v, n, options, limit),
239        DataType::Boolean => sort_boolean(array.as_boolean(), v, n, options, limit),
240        DataType::Utf8 => sort_bytes(array.as_string::<i32>(), v, n, options, limit),
241        DataType::LargeUtf8 => sort_bytes(array.as_string::<i64>(), v, n, options, limit),
242        DataType::Utf8View => sort_byte_view(array.as_string_view(), v, n, options, limit),
243        DataType::Binary => sort_bytes(array.as_binary::<i32>(), v, n, options, limit),
244        DataType::LargeBinary => sort_bytes(array.as_binary::<i64>(), v, n, options, limit),
245        DataType::BinaryView => sort_byte_view(array.as_binary_view(), v, n, options, limit),
246        DataType::FixedSizeBinary(_) => sort_fixed_size_binary(array.as_fixed_size_binary(), v, n, options, limit),
247        DataType::List(_) => sort_list(array.as_list::<i32>(), v, n, options, limit)?,
248        DataType::LargeList(_) => sort_list(array.as_list::<i64>(), v, n, options, limit)?,
249        DataType::FixedSizeList(_, _) => sort_fixed_size_list(array.as_fixed_size_list(), v, n, options, limit)?,
250        DataType::Dictionary(_, _) => downcast_dictionary_array!{
251            array => sort_dictionary(array, v, n, options, limit)?,
252            _ => unreachable!()
253        }
254        DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
255            DataType::Int16 => sort_run_to_indices::<Int16Type>(array, options, limit),
256            DataType::Int32 => sort_run_to_indices::<Int32Type>(array, options, limit),
257            DataType::Int64 => sort_run_to_indices::<Int64Type>(array, options, limit),
258            dt => {
259                return Err(ArrowError::ComputeError(format!(
260                    "Invalid run end data type: {dt}"
261                )))
262            }
263        },
264        t => {
265            return Err(ArrowError::ComputeError(format!(
266                "Sort not supported for data type {t:?}"
267            )));
268        }
269    })
270}
271
272fn sort_boolean(
273    values: &BooleanArray,
274    value_indices: Vec<u32>,
275    null_indices: Vec<u32>,
276    options: SortOptions,
277    limit: Option<usize>,
278) -> UInt32Array {
279    let mut valids = value_indices
280        .into_iter()
281        .map(|index| (index, values.value(index as usize)))
282        .collect::<Vec<(u32, bool)>>();
283    sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into()
284}
285
286fn sort_primitive<T: ArrowPrimitiveType>(
287    values: &PrimitiveArray<T>,
288    value_indices: Vec<u32>,
289    nulls: Vec<u32>,
290    options: SortOptions,
291    limit: Option<usize>,
292) -> UInt32Array {
293    let mut valids = value_indices
294        .into_iter()
295        .map(|index| (index, values.value(index as usize)))
296        .collect::<Vec<(u32, T::Native)>>();
297    sort_impl(options, &mut valids, &nulls, limit, T::Native::compare).into()
298}
299
300fn sort_bytes<T: ByteArrayType>(
301    values: &GenericByteArray<T>,
302    value_indices: Vec<u32>,
303    nulls: Vec<u32>,
304    options: SortOptions,
305    limit: Option<usize>,
306) -> UInt32Array {
307    let mut valids = value_indices
308        .into_iter()
309        .map(|index| (index, values.value(index as usize).as_ref()))
310        .collect::<Vec<(u32, &[u8])>>();
311
312    sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
313}
314
315fn sort_byte_view<T: ByteViewType>(
316    values: &GenericByteViewArray<T>,
317    value_indices: Vec<u32>,
318    nulls: Vec<u32>,
319    options: SortOptions,
320    limit: Option<usize>,
321) -> UInt32Array {
322    let mut valids = value_indices
323        .into_iter()
324        .map(|index| (index, values.value(index as usize).as_ref()))
325        .collect::<Vec<(u32, &[u8])>>();
326    sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
327}
328
329fn sort_fixed_size_binary(
330    values: &FixedSizeBinaryArray,
331    value_indices: Vec<u32>,
332    nulls: Vec<u32>,
333    options: SortOptions,
334    limit: Option<usize>,
335) -> UInt32Array {
336    let mut valids = value_indices
337        .iter()
338        .copied()
339        .map(|index| (index, values.value(index as usize)))
340        .collect::<Vec<(u32, &[u8])>>();
341    sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
342}
343
344fn sort_dictionary<K: ArrowDictionaryKeyType>(
345    dict: &DictionaryArray<K>,
346    value_indices: Vec<u32>,
347    null_indices: Vec<u32>,
348    options: SortOptions,
349    limit: Option<usize>,
350) -> Result<UInt32Array, ArrowError> {
351    let keys: &PrimitiveArray<K> = dict.keys();
352    let rank = child_rank(dict.values().as_ref(), options)?;
353
354    // create tuples that are used for sorting
355    let mut valids = value_indices
356        .into_iter()
357        .map(|index| {
358            let key: K::Native = keys.value(index as usize);
359            (index, rank[key.as_usize()])
360        })
361        .collect::<Vec<(u32, u32)>>();
362
363    Ok(sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into())
364}
365
366fn sort_list<O: OffsetSizeTrait>(
367    array: &GenericListArray<O>,
368    value_indices: Vec<u32>,
369    null_indices: Vec<u32>,
370    options: SortOptions,
371    limit: Option<usize>,
372) -> Result<UInt32Array, ArrowError> {
373    let rank = child_rank(array.values().as_ref(), options)?;
374    let offsets = array.value_offsets();
375    let mut valids = value_indices
376        .into_iter()
377        .map(|index| {
378            let end = offsets[index as usize + 1].as_usize();
379            let start = offsets[index as usize].as_usize();
380            (index, &rank[start..end])
381        })
382        .collect::<Vec<(u32, &[u32])>>();
383    Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
384}
385
386fn sort_fixed_size_list(
387    array: &FixedSizeListArray,
388    value_indices: Vec<u32>,
389    null_indices: Vec<u32>,
390    options: SortOptions,
391    limit: Option<usize>,
392) -> Result<UInt32Array, ArrowError> {
393    let rank = child_rank(array.values().as_ref(), options)?;
394    let size = array.value_length() as usize;
395    let mut valids = value_indices
396        .into_iter()
397        .map(|index| {
398            let start = index as usize * size;
399            (index, &rank[start..start + size])
400        })
401        .collect::<Vec<(u32, &[u32])>>();
402    Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
403}
404
405#[inline(never)]
406fn sort_impl<T: Copy>(
407    options: SortOptions,
408    valids: &mut [(u32, T)],
409    nulls: &[u32],
410    limit: Option<usize>,
411    mut cmp: impl FnMut(T, T) -> Ordering,
412) -> Vec<u32> {
413    let v_limit = match (limit, options.nulls_first) {
414        (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
415        _ => valids.len(),
416    };
417
418    match options.descending {
419        false => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1)),
420        true => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1).reverse()),
421    }
422
423    let len = valids.len() + nulls.len();
424    let limit = limit.unwrap_or(len).min(len);
425    let mut out = Vec::with_capacity(len);
426    match options.nulls_first {
427        true => {
428            out.extend_from_slice(&nulls[..nulls.len().min(limit)]);
429            let remaining = limit - out.len();
430            out.extend(valids.iter().map(|x| x.0).take(remaining));
431        }
432        false => {
433            out.extend(valids.iter().map(|x| x.0).take(limit));
434            let remaining = limit - out.len();
435            out.extend_from_slice(&nulls[..remaining])
436        }
437    }
438    out
439}
440
441/// Computes the rank for a set of child values
442fn child_rank(values: &dyn Array, options: SortOptions) -> Result<Vec<u32>, ArrowError> {
443    // If parent sort order is descending we need to invert the value of nulls_first so that
444    // when the parent is sorted based on the produced ranks, nulls are still ordered correctly
445    let value_options = Some(SortOptions {
446        descending: false,
447        nulls_first: options.nulls_first != options.descending,
448    });
449    rank(values, value_options)
450}
451
452// Sort run array and return sorted run array.
453// The output RunArray will be encoded at the same level as input run array.
454// For e.g. an input RunArray { run_ends = [2,4,6,8], values = [1,2,1,2] }
455// will result in output RunArray { run_ends = [2,4,6,8], values = [1,1,2,2] }
456// and not RunArray { run_ends = [4,8], values = [1,2] }
457fn sort_run(
458    values: &dyn Array,
459    options: Option<SortOptions>,
460    limit: Option<usize>,
461) -> Result<ArrayRef, ArrowError> {
462    match values.data_type() {
463        DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
464            DataType::Int16 => sort_run_downcasted::<Int16Type>(values, options, limit),
465            DataType::Int32 => sort_run_downcasted::<Int32Type>(values, options, limit),
466            DataType::Int64 => sort_run_downcasted::<Int64Type>(values, options, limit),
467            dt => unreachable!("Not valid run ends data type {dt}"),
468        },
469        dt => Err(ArrowError::InvalidArgumentError(format!(
470            "Input is not a run encoded array. Input data type {dt}"
471        ))),
472    }
473}
474
475fn sort_run_downcasted<R: RunEndIndexType>(
476    values: &dyn Array,
477    options: Option<SortOptions>,
478    limit: Option<usize>,
479) -> Result<ArrayRef, ArrowError> {
480    let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
481
482    // Determine the length of output run array.
483    let output_len = if let Some(limit) = limit {
484        limit.min(run_array.len())
485    } else {
486        run_array.len()
487    };
488
489    let run_ends = run_array.run_ends();
490
491    let mut new_run_ends_builder = BufferBuilder::<R::Native>::new(run_ends.len());
492    let mut new_run_end: usize = 0;
493    let mut new_physical_len: usize = 0;
494
495    let consume_runs = |run_length, _| {
496        new_run_end += run_length;
497        new_physical_len += 1;
498        new_run_ends_builder.append(R::Native::from_usize(new_run_end).unwrap());
499    };
500
501    let (values_indices, run_values) = sort_run_inner(run_array, options, output_len, consume_runs);
502
503    let new_run_ends = unsafe {
504        // Safety:
505        // The function builds a valid run_ends array and hence need not be validated.
506        ArrayDataBuilder::new(R::DATA_TYPE)
507            .len(new_physical_len)
508            .add_buffer(new_run_ends_builder.finish())
509            .build_unchecked()
510    };
511
512    // slice the sorted value indices based on limit.
513    let new_values_indices: PrimitiveArray<UInt32Type> = values_indices
514        .slice(0, new_run_ends.len())
515        .into_data()
516        .into();
517
518    let new_values = take(&run_values, &new_values_indices, None)?;
519
520    // Build sorted run array
521    let builder = ArrayDataBuilder::new(run_array.data_type().clone())
522        .len(new_run_end)
523        .add_child_data(new_run_ends)
524        .add_child_data(new_values.into_data());
525    let array_data: RunArray<R> = unsafe {
526        // Safety:
527        //  This function builds a valid run array and hence can skip validation.
528        builder.build_unchecked().into()
529    };
530    Ok(Arc::new(array_data))
531}
532
533// Sort to indices for run encoded array.
534// This function will be slow for run array as it decodes the physical indices to
535// logical indices and to get the run array back, the logical indices has to be
536// encoded back to run array.
537fn sort_run_to_indices<R: RunEndIndexType>(
538    values: &dyn Array,
539    options: SortOptions,
540    limit: Option<usize>,
541) -> UInt32Array {
542    let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
543    let output_len = if let Some(limit) = limit {
544        limit.min(run_array.len())
545    } else {
546        run_array.len()
547    };
548    let mut result: Vec<u32> = Vec::with_capacity(output_len);
549
550    //Add all logical indices belonging to a physical index to the output
551    let consume_runs = |run_length, logical_start| {
552        result.extend(logical_start as u32..(logical_start + run_length) as u32);
553    };
554    sort_run_inner(run_array, Some(options), output_len, consume_runs);
555
556    UInt32Array::from(result)
557}
558
559fn sort_run_inner<R: RunEndIndexType, F>(
560    run_array: &RunArray<R>,
561    options: Option<SortOptions>,
562    output_len: usize,
563    mut consume_runs: F,
564) -> (PrimitiveArray<UInt32Type>, ArrayRef)
565where
566    F: FnMut(usize, usize),
567{
568    // slice the run_array.values based on offset and length.
569    let start_physical_index = run_array.get_start_physical_index();
570    let end_physical_index = run_array.get_end_physical_index();
571    let physical_len = end_physical_index - start_physical_index + 1;
572    let run_values = run_array.values().slice(start_physical_index, physical_len);
573
574    // All the values have to be sorted irrespective of input limit.
575    let values_indices = sort_to_indices(&run_values, options, None).unwrap();
576
577    let mut remaining_len = output_len;
578
579    let run_ends = run_array.run_ends().values();
580
581    assert_eq!(
582        0,
583        values_indices.null_count(),
584        "The output of sort_to_indices should not have null values. Its values is {}",
585        values_indices.null_count()
586    );
587
588    // Calculate `run length` of sorted value indices.
589    // Find the `logical index` at which the run starts.
590    // Call the consumer using the run length and starting logical index.
591    for physical_index in values_indices.values() {
592        // As the values were sliced with offset = start_physical_index, it has to be added back
593        // before accessing `RunArray::run_ends`
594        let physical_index = *physical_index as usize + start_physical_index;
595
596        // calculate the run length and logical index of sorted values
597        let (run_length, logical_index_start) = unsafe {
598            // Safety:
599            // The index will be within bounds as its in bounds of start_physical_index
600            // and len, both of which are within bounds of run_array
601            if physical_index == start_physical_index {
602                (
603                    run_ends.get_unchecked(physical_index).as_usize() - run_array.offset(),
604                    0,
605                )
606            } else if physical_index == end_physical_index {
607                let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
608                (
609                    run_array.offset() + run_array.len() - prev_run_end,
610                    prev_run_end - run_array.offset(),
611                )
612            } else {
613                let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
614                (
615                    run_ends.get_unchecked(physical_index).as_usize() - prev_run_end,
616                    prev_run_end - run_array.offset(),
617                )
618            }
619        };
620        let new_run_length = run_length.min(remaining_len);
621        consume_runs(new_run_length, logical_index_start);
622        remaining_len -= new_run_length;
623
624        if remaining_len == 0 {
625            break;
626        }
627    }
628
629    if remaining_len > 0 {
630        panic!("Remaining length should be zero its values is {remaining_len}")
631    }
632    (values_indices, run_values)
633}
634
635/// One column to be used in lexicographical sort
636#[derive(Clone, Debug)]
637pub struct SortColumn {
638    /// The column to sort
639    pub values: ArrayRef,
640    /// Sort options for this column
641    pub options: Option<SortOptions>,
642}
643
644/// Sort a list of `ArrayRef` using `SortOptions` provided for each array.
645///
646/// Performs a stable lexicographical sort on values and indices.
647///
648/// Returns an `ArrowError::ComputeError(String)` if any of the array type is either unsupported by
649/// `lexsort_to_indices` or `take`.
650///
651/// Example:
652///
653/// ```
654/// # use std::convert::From;
655/// # use std::sync::Arc;
656/// # use arrow_array::{ArrayRef, StringArray, PrimitiveArray};
657/// # use arrow_array::types::Int64Type;
658/// # use arrow_array::cast::AsArray;
659/// # use arrow_ord::sort::{SortColumn, SortOptions, lexsort};
660///
661/// let sorted_columns = lexsort(&vec![
662///     SortColumn {
663///         values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
664///             None,
665///             Some(-2),
666///             Some(89),
667///             Some(-64),
668///             Some(101),
669///         ])) as ArrayRef,
670///         options: None,
671///     },
672///     SortColumn {
673///         values: Arc::new(StringArray::from(vec![
674///             Some("hello"),
675///             Some("world"),
676///             Some(","),
677///             Some("foobar"),
678///             Some("!"),
679///         ])) as ArrayRef,
680///         options: Some(SortOptions {
681///             descending: true,
682///             nulls_first: false,
683///         }),
684///     },
685/// ], None).unwrap();
686///
687/// assert_eq!(sorted_columns[0].as_primitive::<Int64Type>().value(1), -64);
688/// assert!(sorted_columns[0].is_null(0));
689/// ```
690///
691/// Note: for multi-column sorts without a limit, using the [row format](https://docs.rs/arrow-row/latest/arrow_row/)
692/// may be significantly faster
693///
694pub fn lexsort(columns: &[SortColumn], limit: Option<usize>) -> Result<Vec<ArrayRef>, ArrowError> {
695    let indices = lexsort_to_indices(columns, limit)?;
696    columns
697        .iter()
698        .map(|c| take(c.values.as_ref(), &indices, None))
699        .collect()
700}
701
702/// Sort elements lexicographically from a list of `ArrayRef` into an unsigned integer
703/// (`UInt32Array`) of indices.
704///
705/// Note: for multi-column sorts without a limit, using the [row format](https://docs.rs/arrow-row/latest/arrow_row/)
706/// may be significantly faster
707pub fn lexsort_to_indices(
708    columns: &[SortColumn],
709    limit: Option<usize>,
710) -> Result<UInt32Array, ArrowError> {
711    if columns.is_empty() {
712        return Err(ArrowError::InvalidArgumentError(
713            "Sort requires at least one column".to_string(),
714        ));
715    }
716    if columns.len() == 1 && can_sort_to_indices(columns[0].values.data_type()) {
717        // fallback to non-lexical sort
718        let column = &columns[0];
719        return sort_to_indices(&column.values, column.options, limit);
720    }
721
722    let row_count = columns[0].values.len();
723    if columns.iter().any(|item| item.values.len() != row_count) {
724        return Err(ArrowError::ComputeError(
725            "lexical sort columns have different row counts".to_string(),
726        ));
727    };
728
729    let mut value_indices = (0..row_count).collect::<Vec<usize>>();
730    let mut len = value_indices.len();
731
732    if let Some(limit) = limit {
733        len = limit.min(len);
734    }
735
736    let lexicographical_comparator = LexicographicalComparator::try_new(columns)?;
737    // uint32 can be sorted unstably
738    sort_unstable_by(&mut value_indices, len, |a, b| {
739        lexicographical_comparator.compare(*a, *b)
740    });
741
742    Ok(UInt32Array::from_iter_values(
743        value_indices.iter().take(len).map(|i| *i as u32),
744    ))
745}
746
747/// It's unstable_sort, may not preserve the order of equal elements
748pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
749where
750    F: FnMut(&T, &T) -> Ordering,
751{
752    if let Some(n) = limit.checked_sub(1) {
753        let (before, _mid, _after) = v.select_nth_unstable_by(n, &mut is_less);
754        before.sort_unstable_by(is_less);
755    }
756}
757
758/// A lexicographical comparator that wraps given array data (columns) and can lexicographically compare data
759/// at given two indices. The lifetime is the same at the data wrapped.
760pub struct LexicographicalComparator {
761    compare_items: Vec<DynComparator>,
762}
763
764impl LexicographicalComparator {
765    /// lexicographically compare values at the wrapped columns with given indices.
766    pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
767        for comparator in &self.compare_items {
768            match comparator(a_idx, b_idx) {
769                Ordering::Equal => continue,
770                r => return r,
771            }
772        }
773        Ordering::Equal
774    }
775
776    /// Create a new lex comparator that will wrap the given sort columns and give comparison
777    /// results with two indices.
778    pub fn try_new(columns: &[SortColumn]) -> Result<LexicographicalComparator, ArrowError> {
779        let compare_items = columns
780            .iter()
781            .map(|c| {
782                make_comparator(
783                    c.values.as_ref(),
784                    c.values.as_ref(),
785                    c.options.unwrap_or_default(),
786                )
787            })
788            .collect::<Result<Vec<_>, ArrowError>>()?;
789        Ok(LexicographicalComparator { compare_items })
790    }
791}
792
793#[cfg(test)]
794mod tests {
795    use super::*;
796    use arrow_array::builder::{
797        FixedSizeListBuilder, Int64Builder, ListBuilder, PrimitiveRunBuilder,
798    };
799    use arrow_buffer::{i256, NullBuffer};
800    use arrow_schema::Field;
801    use half::f16;
802    use rand::rngs::StdRng;
803    use rand::{Rng, RngCore, SeedableRng};
804
805    fn create_decimal128_array(data: Vec<Option<i128>>) -> Decimal128Array {
806        data.into_iter()
807            .collect::<Decimal128Array>()
808            .with_precision_and_scale(23, 6)
809            .unwrap()
810    }
811
812    fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array {
813        data.into_iter()
814            .collect::<Decimal256Array>()
815            .with_precision_and_scale(53, 6)
816            .unwrap()
817    }
818
819    fn test_sort_to_indices_decimal128_array(
820        data: Vec<Option<i128>>,
821        options: Option<SortOptions>,
822        limit: Option<usize>,
823        expected_data: Vec<u32>,
824    ) {
825        let output = create_decimal128_array(data);
826        let expected = UInt32Array::from(expected_data);
827        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
828        assert_eq!(output, expected)
829    }
830
831    fn test_sort_to_indices_decimal256_array(
832        data: Vec<Option<i256>>,
833        options: Option<SortOptions>,
834        limit: Option<usize>,
835        expected_data: Vec<u32>,
836    ) {
837        let output = create_decimal256_array(data);
838        let expected = UInt32Array::from(expected_data);
839        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
840        assert_eq!(output, expected)
841    }
842
843    fn test_sort_decimal128_array(
844        data: Vec<Option<i128>>,
845        options: Option<SortOptions>,
846        limit: Option<usize>,
847        expected_data: Vec<Option<i128>>,
848    ) {
849        let output = create_decimal128_array(data);
850        let expected = Arc::new(create_decimal128_array(expected_data)) as ArrayRef;
851        let output = match limit {
852            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
853            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
854        };
855        assert_eq!(&output, &expected)
856    }
857
858    fn test_sort_decimal256_array(
859        data: Vec<Option<i256>>,
860        options: Option<SortOptions>,
861        limit: Option<usize>,
862        expected_data: Vec<Option<i256>>,
863    ) {
864        let output = create_decimal256_array(data);
865        let expected = Arc::new(create_decimal256_array(expected_data)) as ArrayRef;
866        let output = match limit {
867            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
868            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
869        };
870        assert_eq!(&output, &expected)
871    }
872
873    fn test_sort_to_indices_boolean_arrays(
874        data: Vec<Option<bool>>,
875        options: Option<SortOptions>,
876        limit: Option<usize>,
877        expected_data: Vec<u32>,
878    ) {
879        let output = BooleanArray::from(data);
880        let expected = UInt32Array::from(expected_data);
881        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
882        assert_eq!(output, expected)
883    }
884
885    fn test_sort_to_indices_primitive_arrays<T>(
886        data: Vec<Option<T::Native>>,
887        options: Option<SortOptions>,
888        limit: Option<usize>,
889        expected_data: Vec<u32>,
890    ) where
891        T: ArrowPrimitiveType,
892        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
893    {
894        let output = PrimitiveArray::<T>::from(data);
895        let expected = UInt32Array::from(expected_data);
896        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
897        assert_eq!(output, expected)
898    }
899
900    fn test_sort_primitive_arrays<T>(
901        data: Vec<Option<T::Native>>,
902        options: Option<SortOptions>,
903        limit: Option<usize>,
904        expected_data: Vec<Option<T::Native>>,
905    ) where
906        T: ArrowPrimitiveType,
907        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
908    {
909        let output = PrimitiveArray::<T>::from(data);
910        let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
911        let output = match limit {
912            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
913            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
914        };
915        assert_eq!(&output, &expected)
916    }
917
918    fn test_sort_to_indices_string_arrays(
919        data: Vec<Option<&str>>,
920        options: Option<SortOptions>,
921        limit: Option<usize>,
922        expected_data: Vec<u32>,
923    ) {
924        let output = StringArray::from(data);
925        let expected = UInt32Array::from(expected_data);
926        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
927        assert_eq!(output, expected)
928    }
929
930    /// Tests both Utf8 and LargeUtf8
931    fn test_sort_string_arrays(
932        data: Vec<Option<&str>>,
933        options: Option<SortOptions>,
934        limit: Option<usize>,
935        expected_data: Vec<Option<&str>>,
936    ) {
937        let output = StringArray::from(data.clone());
938        let expected = Arc::new(StringArray::from(expected_data.clone())) as ArrayRef;
939        let output = match limit {
940            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
941            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
942        };
943        assert_eq!(&output, &expected);
944
945        let output = LargeStringArray::from(data.clone());
946        let expected = Arc::new(LargeStringArray::from(expected_data.clone())) as ArrayRef;
947        let output = match limit {
948            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
949            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
950        };
951        assert_eq!(&output, &expected);
952
953        let output = StringViewArray::from(data);
954        let expected = Arc::new(StringViewArray::from(expected_data)) as ArrayRef;
955        let output = match limit {
956            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
957            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
958        };
959        assert_eq!(&output, &expected);
960    }
961
962    fn test_sort_string_dict_arrays<T: ArrowDictionaryKeyType>(
963        data: Vec<Option<&str>>,
964        options: Option<SortOptions>,
965        limit: Option<usize>,
966        expected_data: Vec<Option<&str>>,
967    ) {
968        let array = data.into_iter().collect::<DictionaryArray<T>>();
969        let array_values = array.values().clone();
970        let dict = array_values
971            .as_any()
972            .downcast_ref::<StringArray>()
973            .expect("Unable to get dictionary values");
974
975        let sorted = match limit {
976            Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
977            _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
978        };
979        let sorted = sorted
980            .as_any()
981            .downcast_ref::<DictionaryArray<T>>()
982            .unwrap();
983        let sorted_values = sorted.values();
984        let sorted_dict = sorted_values
985            .as_any()
986            .downcast_ref::<StringArray>()
987            .expect("Unable to get dictionary values");
988        let sorted_keys = sorted.keys();
989
990        assert_eq!(sorted_dict, dict);
991
992        let sorted_strings = StringArray::from_iter((0..sorted.len()).map(|i| {
993            if sorted.is_valid(i) {
994                Some(sorted_dict.value(sorted_keys.value(i).as_usize()))
995            } else {
996                None
997            }
998        }));
999        let expected = StringArray::from(expected_data);
1000
1001        assert_eq!(sorted_strings, expected)
1002    }
1003
1004    fn test_sort_primitive_dict_arrays<K: ArrowDictionaryKeyType, T: ArrowPrimitiveType>(
1005        keys: PrimitiveArray<K>,
1006        values: PrimitiveArray<T>,
1007        options: Option<SortOptions>,
1008        limit: Option<usize>,
1009        expected_data: Vec<Option<T::Native>>,
1010    ) where
1011        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1012    {
1013        let array = DictionaryArray::<K>::new(keys, Arc::new(values));
1014        let array_values = array.values().clone();
1015        let dict = array_values.as_primitive::<T>();
1016
1017        let sorted = match limit {
1018            Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1019            _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1020        };
1021        let sorted = sorted
1022            .as_any()
1023            .downcast_ref::<DictionaryArray<K>>()
1024            .unwrap();
1025        let sorted_values = sorted.values();
1026        let sorted_dict = sorted_values
1027            .as_any()
1028            .downcast_ref::<PrimitiveArray<T>>()
1029            .expect("Unable to get dictionary values");
1030        let sorted_keys = sorted.keys();
1031
1032        assert_eq!(sorted_dict, dict);
1033
1034        let sorted_values: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(
1035            (0..sorted.len())
1036                .map(|i| {
1037                    let key = sorted_keys.value(i).as_usize();
1038                    if sorted.is_valid(i) && sorted_dict.is_valid(key) {
1039                        Some(sorted_dict.value(key))
1040                    } else {
1041                        None
1042                    }
1043                })
1044                .collect::<Vec<Option<T::Native>>>(),
1045        );
1046        let expected: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(expected_data);
1047
1048        assert_eq!(sorted_values, expected)
1049    }
1050
1051    fn test_sort_list_arrays<T>(
1052        data: Vec<Option<Vec<Option<T::Native>>>>,
1053        options: Option<SortOptions>,
1054        limit: Option<usize>,
1055        expected_data: Vec<Option<Vec<Option<T::Native>>>>,
1056        fixed_length: Option<i32>,
1057    ) where
1058        T: ArrowPrimitiveType,
1059        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1060    {
1061        // for FixedSizedList
1062        if let Some(length) = fixed_length {
1063            let input = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1064                data.clone(),
1065                length,
1066            ));
1067            let sorted = match limit {
1068                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1069                _ => sort(&(input as ArrayRef), options).unwrap(),
1070            };
1071            let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1072                expected_data.clone(),
1073                length,
1074            )) as ArrayRef;
1075
1076            assert_eq!(&sorted, &expected);
1077        }
1078
1079        // for List
1080        let input = Arc::new(ListArray::from_iter_primitive::<T, _, _>(data.clone()));
1081        let sorted = match limit {
1082            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1083            _ => sort(&(input as ArrayRef), options).unwrap(),
1084        };
1085        let expected = Arc::new(ListArray::from_iter_primitive::<T, _, _>(
1086            expected_data.clone(),
1087        )) as ArrayRef;
1088
1089        assert_eq!(&sorted, &expected);
1090
1091        // for LargeList
1092        let input = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(data));
1093        let sorted = match limit {
1094            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1095            _ => sort(&(input as ArrayRef), options).unwrap(),
1096        };
1097        let expected = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(
1098            expected_data,
1099        )) as ArrayRef;
1100
1101        assert_eq!(&sorted, &expected);
1102    }
1103
1104    fn test_lex_sort_arrays(
1105        input: Vec<SortColumn>,
1106        expected_output: Vec<ArrayRef>,
1107        limit: Option<usize>,
1108    ) {
1109        let sorted = lexsort(&input, limit).unwrap();
1110
1111        for (result, expected) in sorted.iter().zip(expected_output.iter()) {
1112            assert_eq!(result, expected);
1113        }
1114    }
1115
1116    /// slice all arrays in expected_output to offset/length
1117    fn slice_arrays(expected_output: Vec<ArrayRef>, offset: usize, length: usize) -> Vec<ArrayRef> {
1118        expected_output
1119            .into_iter()
1120            .map(|array| array.slice(offset, length))
1121            .collect()
1122    }
1123
1124    fn test_sort_binary_arrays(
1125        data: Vec<Option<Vec<u8>>>,
1126        options: Option<SortOptions>,
1127        limit: Option<usize>,
1128        expected_data: Vec<Option<Vec<u8>>>,
1129        fixed_length: Option<i32>,
1130    ) {
1131        // Fixed size binary array
1132        if let Some(length) = fixed_length {
1133            let input = Arc::new(
1134                FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.iter().cloned(), length)
1135                    .unwrap(),
1136            );
1137            let sorted = match limit {
1138                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1139                None => sort(&(input as ArrayRef), options).unwrap(),
1140            };
1141            let expected = Arc::new(
1142                FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1143                    expected_data.iter().cloned(),
1144                    length,
1145                )
1146                .unwrap(),
1147            ) as ArrayRef;
1148
1149            assert_eq!(&sorted, &expected);
1150        }
1151
1152        // Generic size binary array
1153        fn make_generic_binary_array<S: OffsetSizeTrait>(
1154            data: &[Option<Vec<u8>>],
1155        ) -> Arc<GenericBinaryArray<S>> {
1156            Arc::new(GenericBinaryArray::<S>::from_opt_vec(
1157                data.iter()
1158                    .map(|binary| binary.as_ref().map(Vec::as_slice))
1159                    .collect(),
1160            ))
1161        }
1162
1163        // BinaryArray
1164        let input = make_generic_binary_array::<i32>(&data);
1165        let sorted = match limit {
1166            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1167            None => sort(&(input as ArrayRef), options).unwrap(),
1168        };
1169        let expected = make_generic_binary_array::<i32>(&expected_data) as ArrayRef;
1170        assert_eq!(&sorted, &expected);
1171
1172        // LargeBinaryArray
1173        let input = make_generic_binary_array::<i64>(&data);
1174        let sorted = match limit {
1175            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1176            None => sort(&(input as ArrayRef), options).unwrap(),
1177        };
1178        let expected = make_generic_binary_array::<i64>(&expected_data) as ArrayRef;
1179        assert_eq!(&sorted, &expected);
1180    }
1181
1182    #[test]
1183    fn test_sort_to_indices_primitives() {
1184        test_sort_to_indices_primitive_arrays::<Int8Type>(
1185            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1186            None,
1187            None,
1188            vec![0, 5, 3, 1, 4, 2],
1189        );
1190        test_sort_to_indices_primitive_arrays::<Int16Type>(
1191            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1192            None,
1193            None,
1194            vec![0, 5, 3, 1, 4, 2],
1195        );
1196        test_sort_to_indices_primitive_arrays::<Int32Type>(
1197            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1198            None,
1199            None,
1200            vec![0, 5, 3, 1, 4, 2],
1201        );
1202        test_sort_to_indices_primitive_arrays::<Int64Type>(
1203            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1204            None,
1205            None,
1206            vec![0, 5, 3, 1, 4, 2],
1207        );
1208        test_sort_to_indices_primitive_arrays::<Float16Type>(
1209            vec![
1210                None,
1211                Some(f16::from_f32(-0.05)),
1212                Some(f16::from_f32(2.225)),
1213                Some(f16::from_f32(-1.01)),
1214                Some(f16::from_f32(-0.05)),
1215                None,
1216            ],
1217            None,
1218            None,
1219            vec![0, 5, 3, 1, 4, 2],
1220        );
1221        test_sort_to_indices_primitive_arrays::<Float32Type>(
1222            vec![
1223                None,
1224                Some(-0.05),
1225                Some(2.225),
1226                Some(-1.01),
1227                Some(-0.05),
1228                None,
1229            ],
1230            None,
1231            None,
1232            vec![0, 5, 3, 1, 4, 2],
1233        );
1234        test_sort_to_indices_primitive_arrays::<Float64Type>(
1235            vec![
1236                None,
1237                Some(-0.05),
1238                Some(2.225),
1239                Some(-1.01),
1240                Some(-0.05),
1241                None,
1242            ],
1243            None,
1244            None,
1245            vec![0, 5, 3, 1, 4, 2],
1246        );
1247
1248        // descending
1249        test_sort_to_indices_primitive_arrays::<Int8Type>(
1250            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1251            Some(SortOptions {
1252                descending: true,
1253                nulls_first: false,
1254            }),
1255            None,
1256            vec![2, 1, 4, 3, 0, 5],
1257        );
1258
1259        test_sort_to_indices_primitive_arrays::<Int16Type>(
1260            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1261            Some(SortOptions {
1262                descending: true,
1263                nulls_first: false,
1264            }),
1265            None,
1266            vec![2, 1, 4, 3, 0, 5],
1267        );
1268
1269        test_sort_to_indices_primitive_arrays::<Int32Type>(
1270            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1271            Some(SortOptions {
1272                descending: true,
1273                nulls_first: false,
1274            }),
1275            None,
1276            vec![2, 1, 4, 3, 0, 5],
1277        );
1278
1279        test_sort_to_indices_primitive_arrays::<Int64Type>(
1280            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1281            Some(SortOptions {
1282                descending: true,
1283                nulls_first: false,
1284            }),
1285            None,
1286            vec![2, 1, 4, 3, 0, 5],
1287        );
1288
1289        test_sort_to_indices_primitive_arrays::<Float16Type>(
1290            vec![
1291                None,
1292                Some(f16::from_f32(0.005)),
1293                Some(f16::from_f32(20.22)),
1294                Some(f16::from_f32(-10.3)),
1295                Some(f16::from_f32(0.005)),
1296                None,
1297            ],
1298            Some(SortOptions {
1299                descending: true,
1300                nulls_first: false,
1301            }),
1302            None,
1303            vec![2, 1, 4, 3, 0, 5],
1304        );
1305
1306        test_sort_to_indices_primitive_arrays::<Float32Type>(
1307            vec![
1308                None,
1309                Some(0.005),
1310                Some(20.22),
1311                Some(-10.3),
1312                Some(0.005),
1313                None,
1314            ],
1315            Some(SortOptions {
1316                descending: true,
1317                nulls_first: false,
1318            }),
1319            None,
1320            vec![2, 1, 4, 3, 0, 5],
1321        );
1322
1323        test_sort_to_indices_primitive_arrays::<Float64Type>(
1324            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
1325            Some(SortOptions {
1326                descending: true,
1327                nulls_first: false,
1328            }),
1329            None,
1330            vec![2, 1, 4, 3, 0, 5],
1331        );
1332
1333        // descending, nulls first
1334        test_sort_to_indices_primitive_arrays::<Int8Type>(
1335            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1336            Some(SortOptions {
1337                descending: true,
1338                nulls_first: true,
1339            }),
1340            None,
1341            vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
1342        );
1343
1344        test_sort_to_indices_primitive_arrays::<Int16Type>(
1345            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1346            Some(SortOptions {
1347                descending: true,
1348                nulls_first: true,
1349            }),
1350            None,
1351            vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
1352        );
1353
1354        test_sort_to_indices_primitive_arrays::<Int32Type>(
1355            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1356            Some(SortOptions {
1357                descending: true,
1358                nulls_first: true,
1359            }),
1360            None,
1361            vec![0, 5, 2, 1, 4, 3],
1362        );
1363
1364        test_sort_to_indices_primitive_arrays::<Int64Type>(
1365            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1366            Some(SortOptions {
1367                descending: true,
1368                nulls_first: true,
1369            }),
1370            None,
1371            vec![0, 5, 2, 1, 4, 3],
1372        );
1373
1374        test_sort_to_indices_primitive_arrays::<Float16Type>(
1375            vec![
1376                None,
1377                Some(f16::from_f32(0.1)),
1378                Some(f16::from_f32(0.2)),
1379                Some(f16::from_f32(-1.3)),
1380                Some(f16::from_f32(0.01)),
1381                None,
1382            ],
1383            Some(SortOptions {
1384                descending: true,
1385                nulls_first: true,
1386            }),
1387            None,
1388            vec![0, 5, 2, 1, 4, 3],
1389        );
1390
1391        test_sort_to_indices_primitive_arrays::<Float32Type>(
1392            vec![None, Some(0.1), Some(0.2), Some(-1.3), Some(0.01), None],
1393            Some(SortOptions {
1394                descending: true,
1395                nulls_first: true,
1396            }),
1397            None,
1398            vec![0, 5, 2, 1, 4, 3],
1399        );
1400
1401        test_sort_to_indices_primitive_arrays::<Float64Type>(
1402            vec![None, Some(10.1), Some(100.2), Some(-1.3), Some(10.01), None],
1403            Some(SortOptions {
1404                descending: true,
1405                nulls_first: true,
1406            }),
1407            None,
1408            vec![0, 5, 2, 1, 4, 3],
1409        );
1410
1411        // valid values less than limit with extra nulls
1412        test_sort_to_indices_primitive_arrays::<Float64Type>(
1413            vec![Some(2.0), None, None, Some(1.0)],
1414            Some(SortOptions {
1415                descending: false,
1416                nulls_first: false,
1417            }),
1418            Some(3),
1419            vec![3, 0, 1],
1420        );
1421
1422        test_sort_to_indices_primitive_arrays::<Float64Type>(
1423            vec![Some(2.0), None, None, Some(1.0)],
1424            Some(SortOptions {
1425                descending: false,
1426                nulls_first: true,
1427            }),
1428            Some(3),
1429            vec![1, 2, 3],
1430        );
1431
1432        // more nulls than limit
1433        test_sort_to_indices_primitive_arrays::<Float64Type>(
1434            vec![Some(1.0), None, None, None],
1435            Some(SortOptions {
1436                descending: false,
1437                nulls_first: true,
1438            }),
1439            Some(2),
1440            vec![1, 2],
1441        );
1442
1443        test_sort_to_indices_primitive_arrays::<Float64Type>(
1444            vec![Some(1.0), None, None, None],
1445            Some(SortOptions {
1446                descending: false,
1447                nulls_first: false,
1448            }),
1449            Some(2),
1450            vec![0, 1],
1451        );
1452    }
1453
1454    #[test]
1455    fn test_sort_to_indices_primitive_more_nulls_than_limit() {
1456        test_sort_to_indices_primitive_arrays::<Int32Type>(
1457            vec![None, None, Some(3), None, Some(1), None, Some(2)],
1458            Some(SortOptions {
1459                descending: false,
1460                nulls_first: false,
1461            }),
1462            Some(2),
1463            vec![4, 6],
1464        );
1465    }
1466
1467    #[test]
1468    fn test_sort_boolean() {
1469        // boolean
1470        test_sort_to_indices_boolean_arrays(
1471            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1472            None,
1473            None,
1474            vec![0, 5, 1, 4, 2, 3],
1475        );
1476
1477        // boolean, descending
1478        test_sort_to_indices_boolean_arrays(
1479            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1480            Some(SortOptions {
1481                descending: true,
1482                nulls_first: false,
1483            }),
1484            None,
1485            vec![2, 3, 1, 4, 0, 5],
1486        );
1487
1488        // boolean, descending, nulls first
1489        test_sort_to_indices_boolean_arrays(
1490            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1491            Some(SortOptions {
1492                descending: true,
1493                nulls_first: true,
1494            }),
1495            None,
1496            vec![0, 5, 2, 3, 1, 4],
1497        );
1498
1499        // boolean, descending, nulls first, limit
1500        test_sort_to_indices_boolean_arrays(
1501            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1502            Some(SortOptions {
1503                descending: true,
1504                nulls_first: true,
1505            }),
1506            Some(3),
1507            vec![0, 5, 2],
1508        );
1509
1510        // valid values less than limit with extra nulls
1511        test_sort_to_indices_boolean_arrays(
1512            vec![Some(true), None, None, Some(false)],
1513            Some(SortOptions {
1514                descending: false,
1515                nulls_first: false,
1516            }),
1517            Some(3),
1518            vec![3, 0, 1],
1519        );
1520
1521        test_sort_to_indices_boolean_arrays(
1522            vec![Some(true), None, None, Some(false)],
1523            Some(SortOptions {
1524                descending: false,
1525                nulls_first: true,
1526            }),
1527            Some(3),
1528            vec![1, 2, 3],
1529        );
1530
1531        // more nulls than limit
1532        test_sort_to_indices_boolean_arrays(
1533            vec![Some(true), None, None, None],
1534            Some(SortOptions {
1535                descending: false,
1536                nulls_first: true,
1537            }),
1538            Some(2),
1539            vec![1, 2],
1540        );
1541
1542        test_sort_to_indices_boolean_arrays(
1543            vec![Some(true), None, None, None],
1544            Some(SortOptions {
1545                descending: false,
1546                nulls_first: false,
1547            }),
1548            Some(2),
1549            vec![0, 1],
1550        );
1551    }
1552
1553    #[test]
1554    fn test_sort_indices_decimal128() {
1555        // decimal default
1556        test_sort_to_indices_decimal128_array(
1557            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1558            None,
1559            None,
1560            vec![0, 6, 4, 2, 3, 5, 1],
1561        );
1562        // decimal descending
1563        test_sort_to_indices_decimal128_array(
1564            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1565            Some(SortOptions {
1566                descending: true,
1567                nulls_first: false,
1568            }),
1569            None,
1570            vec![1, 5, 3, 2, 4, 0, 6],
1571        );
1572        // decimal null_first and descending
1573        test_sort_to_indices_decimal128_array(
1574            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1575            Some(SortOptions {
1576                descending: true,
1577                nulls_first: true,
1578            }),
1579            None,
1580            vec![0, 6, 1, 5, 3, 2, 4],
1581        );
1582        // decimal null_first
1583        test_sort_to_indices_decimal128_array(
1584            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1585            Some(SortOptions {
1586                descending: false,
1587                nulls_first: true,
1588            }),
1589            None,
1590            vec![0, 6, 4, 2, 3, 5, 1],
1591        );
1592        // limit
1593        test_sort_to_indices_decimal128_array(
1594            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1595            None,
1596            Some(3),
1597            vec![0, 6, 4],
1598        );
1599        // limit descending
1600        test_sort_to_indices_decimal128_array(
1601            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1602            Some(SortOptions {
1603                descending: true,
1604                nulls_first: false,
1605            }),
1606            Some(3),
1607            vec![1, 5, 3],
1608        );
1609        // limit descending null_first
1610        test_sort_to_indices_decimal128_array(
1611            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1612            Some(SortOptions {
1613                descending: true,
1614                nulls_first: true,
1615            }),
1616            Some(3),
1617            vec![0, 6, 1],
1618        );
1619        // limit null_first
1620        test_sort_to_indices_decimal128_array(
1621            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1622            Some(SortOptions {
1623                descending: false,
1624                nulls_first: true,
1625            }),
1626            Some(3),
1627            vec![0, 6, 4],
1628        );
1629    }
1630
1631    #[test]
1632    fn test_sort_indices_decimal256() {
1633        let data = vec![
1634            None,
1635            Some(i256::from_i128(5)),
1636            Some(i256::from_i128(2)),
1637            Some(i256::from_i128(3)),
1638            Some(i256::from_i128(1)),
1639            Some(i256::from_i128(4)),
1640            None,
1641        ];
1642
1643        // decimal default
1644        test_sort_to_indices_decimal256_array(data.clone(), None, None, vec![0, 6, 4, 2, 3, 5, 1]);
1645        // decimal descending
1646        test_sort_to_indices_decimal256_array(
1647            data.clone(),
1648            Some(SortOptions {
1649                descending: true,
1650                nulls_first: false,
1651            }),
1652            None,
1653            vec![1, 5, 3, 2, 4, 0, 6],
1654        );
1655        // decimal null_first and descending
1656        test_sort_to_indices_decimal256_array(
1657            data.clone(),
1658            Some(SortOptions {
1659                descending: true,
1660                nulls_first: true,
1661            }),
1662            None,
1663            vec![0, 6, 1, 5, 3, 2, 4],
1664        );
1665        // decimal null_first
1666        test_sort_to_indices_decimal256_array(
1667            data.clone(),
1668            Some(SortOptions {
1669                descending: false,
1670                nulls_first: true,
1671            }),
1672            None,
1673            vec![0, 6, 4, 2, 3, 5, 1],
1674        );
1675        // limit
1676        test_sort_to_indices_decimal256_array(data.clone(), None, Some(3), vec![0, 6, 4]);
1677        // limit descending
1678        test_sort_to_indices_decimal256_array(
1679            data.clone(),
1680            Some(SortOptions {
1681                descending: true,
1682                nulls_first: false,
1683            }),
1684            Some(3),
1685            vec![1, 5, 3],
1686        );
1687        // limit descending null_first
1688        test_sort_to_indices_decimal256_array(
1689            data.clone(),
1690            Some(SortOptions {
1691                descending: true,
1692                nulls_first: true,
1693            }),
1694            Some(3),
1695            vec![0, 6, 1],
1696        );
1697        // limit null_first
1698        test_sort_to_indices_decimal256_array(
1699            data,
1700            Some(SortOptions {
1701                descending: false,
1702                nulls_first: true,
1703            }),
1704            Some(3),
1705            vec![0, 6, 4],
1706        );
1707    }
1708
1709    #[test]
1710    fn test_sort_indices_decimal256_max_min() {
1711        let data = vec![
1712            None,
1713            Some(i256::MIN),
1714            Some(i256::from_i128(1)),
1715            Some(i256::MAX),
1716            Some(i256::from_i128(-1)),
1717        ];
1718        test_sort_to_indices_decimal256_array(
1719            data.clone(),
1720            Some(SortOptions {
1721                descending: false,
1722                nulls_first: true,
1723            }),
1724            None,
1725            vec![0, 1, 4, 2, 3],
1726        );
1727
1728        test_sort_to_indices_decimal256_array(
1729            data.clone(),
1730            Some(SortOptions {
1731                descending: true,
1732                nulls_first: true,
1733            }),
1734            None,
1735            vec![0, 3, 2, 4, 1],
1736        );
1737
1738        test_sort_to_indices_decimal256_array(
1739            data.clone(),
1740            Some(SortOptions {
1741                descending: false,
1742                nulls_first: true,
1743            }),
1744            Some(4),
1745            vec![0, 1, 4, 2],
1746        );
1747
1748        test_sort_to_indices_decimal256_array(
1749            data.clone(),
1750            Some(SortOptions {
1751                descending: true,
1752                nulls_first: true,
1753            }),
1754            Some(4),
1755            vec![0, 3, 2, 4],
1756        );
1757    }
1758
1759    #[test]
1760    fn test_sort_decimal128() {
1761        // decimal default
1762        test_sort_decimal128_array(
1763            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1764            None,
1765            None,
1766            vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
1767        );
1768        // decimal descending
1769        test_sort_decimal128_array(
1770            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1771            Some(SortOptions {
1772                descending: true,
1773                nulls_first: false,
1774            }),
1775            None,
1776            vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None],
1777        );
1778        // decimal null_first and descending
1779        test_sort_decimal128_array(
1780            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1781            Some(SortOptions {
1782                descending: true,
1783                nulls_first: true,
1784            }),
1785            None,
1786            vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
1787        );
1788        // decimal null_first
1789        test_sort_decimal128_array(
1790            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1791            Some(SortOptions {
1792                descending: false,
1793                nulls_first: true,
1794            }),
1795            None,
1796            vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
1797        );
1798        // limit
1799        test_sort_decimal128_array(
1800            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1801            None,
1802            Some(3),
1803            vec![None, None, Some(1)],
1804        );
1805        // limit descending
1806        test_sort_decimal128_array(
1807            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1808            Some(SortOptions {
1809                descending: true,
1810                nulls_first: false,
1811            }),
1812            Some(3),
1813            vec![Some(5), Some(4), Some(3)],
1814        );
1815        // limit descending null_first
1816        test_sort_decimal128_array(
1817            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1818            Some(SortOptions {
1819                descending: true,
1820                nulls_first: true,
1821            }),
1822            Some(3),
1823            vec![None, None, Some(5)],
1824        );
1825        // limit null_first
1826        test_sort_decimal128_array(
1827            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
1828            Some(SortOptions {
1829                descending: false,
1830                nulls_first: true,
1831            }),
1832            Some(3),
1833            vec![None, None, Some(1)],
1834        );
1835    }
1836
1837    #[test]
1838    fn test_sort_decimal256() {
1839        let data = vec![
1840            None,
1841            Some(i256::from_i128(5)),
1842            Some(i256::from_i128(2)),
1843            Some(i256::from_i128(3)),
1844            Some(i256::from_i128(1)),
1845            Some(i256::from_i128(4)),
1846            None,
1847        ];
1848        // decimal default
1849        test_sort_decimal256_array(
1850            data.clone(),
1851            None,
1852            None,
1853            [None, None, Some(1), Some(2), Some(3), Some(4), Some(5)]
1854                .iter()
1855                .map(|v| v.map(i256::from_i128))
1856                .collect(),
1857        );
1858        // decimal descending
1859        test_sort_decimal256_array(
1860            data.clone(),
1861            Some(SortOptions {
1862                descending: true,
1863                nulls_first: false,
1864            }),
1865            None,
1866            [Some(5), Some(4), Some(3), Some(2), Some(1), None, None]
1867                .iter()
1868                .map(|v| v.map(i256::from_i128))
1869                .collect(),
1870        );
1871        // decimal null_first and descending
1872        test_sort_decimal256_array(
1873            data.clone(),
1874            Some(SortOptions {
1875                descending: true,
1876                nulls_first: true,
1877            }),
1878            None,
1879            [None, None, Some(5), Some(4), Some(3), Some(2), Some(1)]
1880                .iter()
1881                .map(|v| v.map(i256::from_i128))
1882                .collect(),
1883        );
1884        // decimal null_first
1885        test_sort_decimal256_array(
1886            data.clone(),
1887            Some(SortOptions {
1888                descending: false,
1889                nulls_first: true,
1890            }),
1891            None,
1892            [None, None, Some(1), Some(2), Some(3), Some(4), Some(5)]
1893                .iter()
1894                .map(|v| v.map(i256::from_i128))
1895                .collect(),
1896        );
1897        // limit
1898        test_sort_decimal256_array(
1899            data.clone(),
1900            None,
1901            Some(3),
1902            [None, None, Some(1)]
1903                .iter()
1904                .map(|v| v.map(i256::from_i128))
1905                .collect(),
1906        );
1907        // limit descending
1908        test_sort_decimal256_array(
1909            data.clone(),
1910            Some(SortOptions {
1911                descending: true,
1912                nulls_first: false,
1913            }),
1914            Some(3),
1915            [Some(5), Some(4), Some(3)]
1916                .iter()
1917                .map(|v| v.map(i256::from_i128))
1918                .collect(),
1919        );
1920        // limit descending null_first
1921        test_sort_decimal256_array(
1922            data.clone(),
1923            Some(SortOptions {
1924                descending: true,
1925                nulls_first: true,
1926            }),
1927            Some(3),
1928            [None, None, Some(5)]
1929                .iter()
1930                .map(|v| v.map(i256::from_i128))
1931                .collect(),
1932        );
1933        // limit null_first
1934        test_sort_decimal256_array(
1935            data,
1936            Some(SortOptions {
1937                descending: false,
1938                nulls_first: true,
1939            }),
1940            Some(3),
1941            [None, None, Some(1)]
1942                .iter()
1943                .map(|v| v.map(i256::from_i128))
1944                .collect(),
1945        );
1946    }
1947
1948    #[test]
1949    fn test_sort_decimal256_max_min() {
1950        test_sort_decimal256_array(
1951            vec![
1952                None,
1953                Some(i256::MIN),
1954                Some(i256::from_i128(1)),
1955                Some(i256::MAX),
1956                Some(i256::from_i128(-1)),
1957                None,
1958            ],
1959            Some(SortOptions {
1960                descending: false,
1961                nulls_first: true,
1962            }),
1963            None,
1964            vec![
1965                None,
1966                None,
1967                Some(i256::MIN),
1968                Some(i256::from_i128(-1)),
1969                Some(i256::from_i128(1)),
1970                Some(i256::MAX),
1971            ],
1972        );
1973
1974        test_sort_decimal256_array(
1975            vec![
1976                None,
1977                Some(i256::MIN),
1978                Some(i256::from_i128(1)),
1979                Some(i256::MAX),
1980                Some(i256::from_i128(-1)),
1981                None,
1982            ],
1983            Some(SortOptions {
1984                descending: true,
1985                nulls_first: true,
1986            }),
1987            None,
1988            vec![
1989                None,
1990                None,
1991                Some(i256::MAX),
1992                Some(i256::from_i128(1)),
1993                Some(i256::from_i128(-1)),
1994                Some(i256::MIN),
1995            ],
1996        );
1997
1998        test_sort_decimal256_array(
1999            vec![
2000                None,
2001                Some(i256::MIN),
2002                Some(i256::from_i128(1)),
2003                Some(i256::MAX),
2004                Some(i256::from_i128(-1)),
2005                None,
2006            ],
2007            Some(SortOptions {
2008                descending: false,
2009                nulls_first: true,
2010            }),
2011            Some(4),
2012            vec![None, None, Some(i256::MIN), Some(i256::from_i128(-1))],
2013        );
2014
2015        test_sort_decimal256_array(
2016            vec![
2017                None,
2018                Some(i256::MIN),
2019                Some(i256::from_i128(1)),
2020                Some(i256::MAX),
2021                Some(i256::from_i128(-1)),
2022                None,
2023            ],
2024            Some(SortOptions {
2025                descending: true,
2026                nulls_first: true,
2027            }),
2028            Some(4),
2029            vec![None, None, Some(i256::MAX), Some(i256::from_i128(1))],
2030        );
2031    }
2032
2033    #[test]
2034    fn test_sort_primitives() {
2035        // default case
2036        test_sort_primitive_arrays::<UInt8Type>(
2037            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2038            None,
2039            None,
2040            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2041        );
2042        test_sort_primitive_arrays::<UInt16Type>(
2043            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2044            None,
2045            None,
2046            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2047        );
2048        test_sort_primitive_arrays::<UInt32Type>(
2049            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2050            None,
2051            None,
2052            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2053        );
2054        test_sort_primitive_arrays::<UInt64Type>(
2055            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2056            None,
2057            None,
2058            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2059        );
2060
2061        // descending
2062        test_sort_primitive_arrays::<Int8Type>(
2063            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2064            Some(SortOptions {
2065                descending: true,
2066                nulls_first: false,
2067            }),
2068            None,
2069            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2070        );
2071        test_sort_primitive_arrays::<Int16Type>(
2072            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2073            Some(SortOptions {
2074                descending: true,
2075                nulls_first: false,
2076            }),
2077            None,
2078            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2079        );
2080        test_sort_primitive_arrays::<Int32Type>(
2081            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2082            Some(SortOptions {
2083                descending: true,
2084                nulls_first: false,
2085            }),
2086            None,
2087            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2088        );
2089        test_sort_primitive_arrays::<Int16Type>(
2090            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2091            Some(SortOptions {
2092                descending: true,
2093                nulls_first: false,
2094            }),
2095            None,
2096            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2097        );
2098
2099        // descending, nulls first
2100        test_sort_primitive_arrays::<Int8Type>(
2101            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2102            Some(SortOptions {
2103                descending: true,
2104                nulls_first: true,
2105            }),
2106            None,
2107            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2108        );
2109        test_sort_primitive_arrays::<Int16Type>(
2110            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2111            Some(SortOptions {
2112                descending: true,
2113                nulls_first: true,
2114            }),
2115            None,
2116            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2117        );
2118        test_sort_primitive_arrays::<Int32Type>(
2119            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2120            Some(SortOptions {
2121                descending: true,
2122                nulls_first: true,
2123            }),
2124            None,
2125            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2126        );
2127        test_sort_primitive_arrays::<Int64Type>(
2128            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2129            Some(SortOptions {
2130                descending: true,
2131                nulls_first: true,
2132            }),
2133            None,
2134            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2135        );
2136
2137        test_sort_primitive_arrays::<Int64Type>(
2138            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2139            Some(SortOptions {
2140                descending: true,
2141                nulls_first: true,
2142            }),
2143            Some(3),
2144            vec![None, None, Some(2)],
2145        );
2146
2147        test_sort_primitive_arrays::<Float16Type>(
2148            vec![
2149                None,
2150                Some(f16::from_f32(0.0)),
2151                Some(f16::from_f32(2.0)),
2152                Some(f16::from_f32(-1.0)),
2153                Some(f16::from_f32(0.0)),
2154                None,
2155            ],
2156            Some(SortOptions {
2157                descending: true,
2158                nulls_first: true,
2159            }),
2160            None,
2161            vec![
2162                None,
2163                None,
2164                Some(f16::from_f32(2.0)),
2165                Some(f16::from_f32(0.0)),
2166                Some(f16::from_f32(0.0)),
2167                Some(f16::from_f32(-1.0)),
2168            ],
2169        );
2170
2171        test_sort_primitive_arrays::<Float32Type>(
2172            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2173            Some(SortOptions {
2174                descending: true,
2175                nulls_first: true,
2176            }),
2177            None,
2178            vec![None, None, Some(2.0), Some(0.0), Some(0.0), Some(-1.0)],
2179        );
2180        test_sort_primitive_arrays::<Float64Type>(
2181            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2182            Some(SortOptions {
2183                descending: true,
2184                nulls_first: true,
2185            }),
2186            None,
2187            vec![None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)],
2188        );
2189        test_sort_primitive_arrays::<Float64Type>(
2190            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2191            Some(SortOptions {
2192                descending: true,
2193                nulls_first: true,
2194            }),
2195            None,
2196            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2197        );
2198
2199        // int8 nulls first
2200        test_sort_primitive_arrays::<Int8Type>(
2201            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2202            Some(SortOptions {
2203                descending: false,
2204                nulls_first: true,
2205            }),
2206            None,
2207            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2208        );
2209        test_sort_primitive_arrays::<Int16Type>(
2210            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2211            Some(SortOptions {
2212                descending: false,
2213                nulls_first: true,
2214            }),
2215            None,
2216            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2217        );
2218        test_sort_primitive_arrays::<Int32Type>(
2219            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2220            Some(SortOptions {
2221                descending: false,
2222                nulls_first: true,
2223            }),
2224            None,
2225            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2226        );
2227        test_sort_primitive_arrays::<Int64Type>(
2228            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2229            Some(SortOptions {
2230                descending: false,
2231                nulls_first: true,
2232            }),
2233            None,
2234            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2235        );
2236        test_sort_primitive_arrays::<Float16Type>(
2237            vec![
2238                None,
2239                Some(f16::from_f32(0.0)),
2240                Some(f16::from_f32(2.0)),
2241                Some(f16::from_f32(-1.0)),
2242                Some(f16::from_f32(0.0)),
2243                None,
2244            ],
2245            Some(SortOptions {
2246                descending: false,
2247                nulls_first: true,
2248            }),
2249            None,
2250            vec![
2251                None,
2252                None,
2253                Some(f16::from_f32(-1.0)),
2254                Some(f16::from_f32(0.0)),
2255                Some(f16::from_f32(0.0)),
2256                Some(f16::from_f32(2.0)),
2257            ],
2258        );
2259        test_sort_primitive_arrays::<Float32Type>(
2260            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2261            Some(SortOptions {
2262                descending: false,
2263                nulls_first: true,
2264            }),
2265            None,
2266            vec![None, None, Some(-1.0), Some(0.0), Some(0.0), Some(2.0)],
2267        );
2268        test_sort_primitive_arrays::<Float64Type>(
2269            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2270            Some(SortOptions {
2271                descending: false,
2272                nulls_first: true,
2273            }),
2274            None,
2275            vec![None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)],
2276        );
2277        test_sort_primitive_arrays::<Float64Type>(
2278            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2279            Some(SortOptions {
2280                descending: false,
2281                nulls_first: true,
2282            }),
2283            None,
2284            vec![Some(1.0), Some(f64::NAN), Some(f64::NAN), Some(f64::NAN)],
2285        );
2286
2287        // limit
2288        test_sort_primitive_arrays::<Float64Type>(
2289            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2290            Some(SortOptions {
2291                descending: false,
2292                nulls_first: true,
2293            }),
2294            Some(2),
2295            vec![Some(1.0), Some(f64::NAN)],
2296        );
2297
2298        // limit with actual value
2299        test_sort_primitive_arrays::<Float64Type>(
2300            vec![Some(2.0), Some(4.0), Some(3.0), Some(1.0)],
2301            Some(SortOptions {
2302                descending: false,
2303                nulls_first: true,
2304            }),
2305            Some(3),
2306            vec![Some(1.0), Some(2.0), Some(3.0)],
2307        );
2308
2309        // valid values less than limit with extra nulls
2310        test_sort_primitive_arrays::<Float64Type>(
2311            vec![Some(2.0), None, None, Some(1.0)],
2312            Some(SortOptions {
2313                descending: false,
2314                nulls_first: false,
2315            }),
2316            Some(3),
2317            vec![Some(1.0), Some(2.0), None],
2318        );
2319
2320        test_sort_primitive_arrays::<Float64Type>(
2321            vec![Some(2.0), None, None, Some(1.0)],
2322            Some(SortOptions {
2323                descending: false,
2324                nulls_first: true,
2325            }),
2326            Some(3),
2327            vec![None, None, Some(1.0)],
2328        );
2329
2330        // more nulls than limit
2331        test_sort_primitive_arrays::<Float64Type>(
2332            vec![Some(2.0), None, None, None],
2333            Some(SortOptions {
2334                descending: false,
2335                nulls_first: true,
2336            }),
2337            Some(2),
2338            vec![None, None],
2339        );
2340
2341        test_sort_primitive_arrays::<Float64Type>(
2342            vec![Some(2.0), None, None, None],
2343            Some(SortOptions {
2344                descending: false,
2345                nulls_first: false,
2346            }),
2347            Some(2),
2348            vec![Some(2.0), None],
2349        );
2350    }
2351
2352    #[test]
2353    fn test_sort_to_indices_strings() {
2354        test_sort_to_indices_string_arrays(
2355            vec![
2356                None,
2357                Some("bad"),
2358                Some("sad"),
2359                None,
2360                Some("glad"),
2361                Some("-ad"),
2362            ],
2363            None,
2364            None,
2365            vec![0, 3, 5, 1, 4, 2],
2366        );
2367
2368        test_sort_to_indices_string_arrays(
2369            vec![
2370                None,
2371                Some("bad"),
2372                Some("sad"),
2373                None,
2374                Some("glad"),
2375                Some("-ad"),
2376            ],
2377            Some(SortOptions {
2378                descending: true,
2379                nulls_first: false,
2380            }),
2381            None,
2382            vec![2, 4, 1, 5, 0, 3],
2383        );
2384
2385        test_sort_to_indices_string_arrays(
2386            vec![
2387                None,
2388                Some("bad"),
2389                Some("sad"),
2390                None,
2391                Some("glad"),
2392                Some("-ad"),
2393            ],
2394            Some(SortOptions {
2395                descending: false,
2396                nulls_first: true,
2397            }),
2398            None,
2399            vec![0, 3, 5, 1, 4, 2],
2400        );
2401
2402        test_sort_to_indices_string_arrays(
2403            vec![
2404                None,
2405                Some("bad"),
2406                Some("sad"),
2407                None,
2408                Some("glad"),
2409                Some("-ad"),
2410            ],
2411            Some(SortOptions {
2412                descending: true,
2413                nulls_first: true,
2414            }),
2415            None,
2416            vec![0, 3, 2, 4, 1, 5],
2417        );
2418
2419        test_sort_to_indices_string_arrays(
2420            vec![
2421                None,
2422                Some("bad"),
2423                Some("sad"),
2424                None,
2425                Some("glad"),
2426                Some("-ad"),
2427            ],
2428            Some(SortOptions {
2429                descending: true,
2430                nulls_first: true,
2431            }),
2432            Some(3),
2433            vec![0, 3, 2],
2434        );
2435
2436        // valid values less than limit with extra nulls
2437        test_sort_to_indices_string_arrays(
2438            vec![Some("def"), None, None, Some("abc")],
2439            Some(SortOptions {
2440                descending: false,
2441                nulls_first: false,
2442            }),
2443            Some(3),
2444            vec![3, 0, 1],
2445        );
2446
2447        test_sort_to_indices_string_arrays(
2448            vec![Some("def"), None, None, Some("abc")],
2449            Some(SortOptions {
2450                descending: false,
2451                nulls_first: true,
2452            }),
2453            Some(3),
2454            vec![1, 2, 3],
2455        );
2456
2457        // more nulls than limit
2458        test_sort_to_indices_string_arrays(
2459            vec![Some("def"), None, None, None],
2460            Some(SortOptions {
2461                descending: false,
2462                nulls_first: true,
2463            }),
2464            Some(2),
2465            vec![1, 2],
2466        );
2467
2468        test_sort_to_indices_string_arrays(
2469            vec![Some("def"), None, None, None],
2470            Some(SortOptions {
2471                descending: false,
2472                nulls_first: false,
2473            }),
2474            Some(2),
2475            vec![0, 1],
2476        );
2477    }
2478
2479    #[test]
2480    fn test_sort_strings() {
2481        test_sort_string_arrays(
2482            vec![
2483                None,
2484                Some("bad"),
2485                Some("sad"),
2486                Some("long string longer than 12 bytes"),
2487                None,
2488                Some("glad"),
2489                Some("lang string longer than 12 bytes"),
2490                Some("-ad"),
2491            ],
2492            None,
2493            None,
2494            vec![
2495                None,
2496                None,
2497                Some("-ad"),
2498                Some("bad"),
2499                Some("glad"),
2500                Some("lang string longer than 12 bytes"),
2501                Some("long string longer than 12 bytes"),
2502                Some("sad"),
2503            ],
2504        );
2505
2506        test_sort_string_arrays(
2507            vec![
2508                None,
2509                Some("bad"),
2510                Some("sad"),
2511                Some("long string longer than 12 bytes"),
2512                None,
2513                Some("glad"),
2514                Some("lang string longer than 12 bytes"),
2515                Some("-ad"),
2516            ],
2517            Some(SortOptions {
2518                descending: true,
2519                nulls_first: false,
2520            }),
2521            None,
2522            vec![
2523                Some("sad"),
2524                Some("long string longer than 12 bytes"),
2525                Some("lang string longer than 12 bytes"),
2526                Some("glad"),
2527                Some("bad"),
2528                Some("-ad"),
2529                None,
2530                None,
2531            ],
2532        );
2533
2534        test_sort_string_arrays(
2535            vec![
2536                None,
2537                Some("bad"),
2538                Some("long string longer than 12 bytes"),
2539                Some("sad"),
2540                None,
2541                Some("glad"),
2542                Some("lang string longer than 12 bytes"),
2543                Some("-ad"),
2544            ],
2545            Some(SortOptions {
2546                descending: false,
2547                nulls_first: true,
2548            }),
2549            None,
2550            vec![
2551                None,
2552                None,
2553                Some("-ad"),
2554                Some("bad"),
2555                Some("glad"),
2556                Some("lang string longer than 12 bytes"),
2557                Some("long string longer than 12 bytes"),
2558                Some("sad"),
2559            ],
2560        );
2561
2562        test_sort_string_arrays(
2563            vec![
2564                None,
2565                Some("bad"),
2566                Some("long string longer than 12 bytes"),
2567                Some("sad"),
2568                None,
2569                Some("glad"),
2570                Some("lang string longer than 12 bytes"),
2571                Some("-ad"),
2572            ],
2573            Some(SortOptions {
2574                descending: true,
2575                nulls_first: true,
2576            }),
2577            None,
2578            vec![
2579                None,
2580                None,
2581                Some("sad"),
2582                Some("long string longer than 12 bytes"),
2583                Some("lang string longer than 12 bytes"),
2584                Some("glad"),
2585                Some("bad"),
2586                Some("-ad"),
2587            ],
2588        );
2589
2590        test_sort_string_arrays(
2591            vec![
2592                None,
2593                Some("bad"),
2594                Some("long string longer than 12 bytes"),
2595                Some("sad"),
2596                None,
2597                Some("glad"),
2598                Some("lang string longer than 12 bytes"),
2599                Some("-ad"),
2600            ],
2601            Some(SortOptions {
2602                descending: true,
2603                nulls_first: true,
2604            }),
2605            Some(3),
2606            vec![None, None, Some("sad")],
2607        );
2608
2609        // valid values less than limit with extra nulls
2610        test_sort_string_arrays(
2611            vec![
2612                Some("def long string longer than 12"),
2613                None,
2614                None,
2615                Some("abc"),
2616            ],
2617            Some(SortOptions {
2618                descending: false,
2619                nulls_first: false,
2620            }),
2621            Some(3),
2622            vec![Some("abc"), Some("def long string longer than 12"), None],
2623        );
2624
2625        test_sort_string_arrays(
2626            vec![
2627                Some("def long string longer than 12"),
2628                None,
2629                None,
2630                Some("abc"),
2631            ],
2632            Some(SortOptions {
2633                descending: false,
2634                nulls_first: true,
2635            }),
2636            Some(3),
2637            vec![None, None, Some("abc")],
2638        );
2639
2640        // more nulls than limit
2641        test_sort_string_arrays(
2642            vec![Some("def long string longer than 12"), None, None, None],
2643            Some(SortOptions {
2644                descending: false,
2645                nulls_first: true,
2646            }),
2647            Some(2),
2648            vec![None, None],
2649        );
2650
2651        test_sort_string_arrays(
2652            vec![Some("def long string longer than 12"), None, None, None],
2653            Some(SortOptions {
2654                descending: false,
2655                nulls_first: false,
2656            }),
2657            Some(2),
2658            vec![Some("def long string longer than 12"), None],
2659        );
2660    }
2661
2662    #[test]
2663    fn test_sort_run_to_run() {
2664        test_sort_run_inner(|array, sort_options, limit| sort_run(array, sort_options, limit));
2665    }
2666
2667    #[test]
2668    fn test_sort_run_to_indices() {
2669        test_sort_run_inner(|array, sort_options, limit| {
2670            let indices = sort_to_indices(array, sort_options, limit).unwrap();
2671            take(array, &indices, None)
2672        });
2673    }
2674
2675    fn test_sort_run_inner<F>(sort_fn: F)
2676    where
2677        F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
2678    {
2679        // Create an input array for testing
2680        let total_len = 80;
2681        let vals: Vec<Option<i32>> = vec![Some(1), None, Some(2), Some(3), Some(4), None, Some(5)];
2682        let repeats: Vec<usize> = vec![1, 3, 2, 4];
2683        let mut input_array: Vec<Option<i32>> = Vec::with_capacity(total_len);
2684        for ix in 0_usize..32 {
2685            let repeat: usize = repeats[ix % repeats.len()];
2686            let val: Option<i32> = vals[ix % vals.len()];
2687            input_array.resize(input_array.len() + repeat, val);
2688        }
2689
2690        // create run array using input_array
2691        // Encode the input_array to run array
2692        let mut builder =
2693            PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
2694        builder.extend(input_array.iter().copied());
2695        let run_array = builder.finish();
2696
2697        // slice lengths that are tested
2698        let slice_lens = [
2699            1, 2, 3, 4, 5, 6, 7, 37, 38, 39, 40, 41, 42, 43, 74, 75, 76, 77, 78, 79, 80,
2700        ];
2701        for slice_len in slice_lens {
2702            test_sort_run_inner2(
2703                input_array.as_slice(),
2704                &run_array,
2705                0,
2706                slice_len,
2707                None,
2708                &sort_fn,
2709            );
2710            test_sort_run_inner2(
2711                input_array.as_slice(),
2712                &run_array,
2713                total_len - slice_len,
2714                slice_len,
2715                None,
2716                &sort_fn,
2717            );
2718            // Test with non zero limit
2719            if slice_len > 1 {
2720                test_sort_run_inner2(
2721                    input_array.as_slice(),
2722                    &run_array,
2723                    0,
2724                    slice_len,
2725                    Some(slice_len / 2),
2726                    &sort_fn,
2727                );
2728                test_sort_run_inner2(
2729                    input_array.as_slice(),
2730                    &run_array,
2731                    total_len - slice_len,
2732                    slice_len,
2733                    Some(slice_len / 2),
2734                    &sort_fn,
2735                );
2736            }
2737        }
2738    }
2739
2740    fn test_sort_run_inner2<F>(
2741        input_array: &[Option<i32>],
2742        run_array: &RunArray<Int16Type>,
2743        offset: usize,
2744        length: usize,
2745        limit: Option<usize>,
2746        sort_fn: &F,
2747    ) where
2748        F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
2749    {
2750        // Run the sort and build actual result
2751        let sliced_array = run_array.slice(offset, length);
2752        let sorted_sliced_array = sort_fn(&sliced_array, None, limit).unwrap();
2753        let sorted_run_array = sorted_sliced_array
2754            .as_any()
2755            .downcast_ref::<RunArray<Int16Type>>()
2756            .unwrap();
2757        let typed_run_array = sorted_run_array
2758            .downcast::<PrimitiveArray<Int32Type>>()
2759            .unwrap();
2760        let actual: Vec<Option<i32>> = typed_run_array.into_iter().collect();
2761
2762        // build expected result.
2763        let mut sliced_input = input_array[offset..(offset + length)].to_owned();
2764        sliced_input.sort();
2765        let expected = if let Some(limit) = limit {
2766            sliced_input.iter().take(limit).copied().collect()
2767        } else {
2768            sliced_input
2769        };
2770
2771        assert_eq!(expected, actual)
2772    }
2773
2774    #[test]
2775    fn test_sort_string_dicts() {
2776        test_sort_string_dict_arrays::<Int8Type>(
2777            vec![
2778                None,
2779                Some("bad"),
2780                Some("sad"),
2781                None,
2782                Some("glad"),
2783                Some("-ad"),
2784            ],
2785            None,
2786            None,
2787            vec![
2788                None,
2789                None,
2790                Some("-ad"),
2791                Some("bad"),
2792                Some("glad"),
2793                Some("sad"),
2794            ],
2795        );
2796
2797        test_sort_string_dict_arrays::<Int16Type>(
2798            vec![
2799                None,
2800                Some("bad"),
2801                Some("sad"),
2802                None,
2803                Some("glad"),
2804                Some("-ad"),
2805            ],
2806            Some(SortOptions {
2807                descending: true,
2808                nulls_first: false,
2809            }),
2810            None,
2811            vec![
2812                Some("sad"),
2813                Some("glad"),
2814                Some("bad"),
2815                Some("-ad"),
2816                None,
2817                None,
2818            ],
2819        );
2820
2821        test_sort_string_dict_arrays::<Int32Type>(
2822            vec![
2823                None,
2824                Some("bad"),
2825                Some("sad"),
2826                None,
2827                Some("glad"),
2828                Some("-ad"),
2829            ],
2830            Some(SortOptions {
2831                descending: false,
2832                nulls_first: true,
2833            }),
2834            None,
2835            vec![
2836                None,
2837                None,
2838                Some("-ad"),
2839                Some("bad"),
2840                Some("glad"),
2841                Some("sad"),
2842            ],
2843        );
2844
2845        test_sort_string_dict_arrays::<Int16Type>(
2846            vec![
2847                None,
2848                Some("bad"),
2849                Some("sad"),
2850                None,
2851                Some("glad"),
2852                Some("-ad"),
2853            ],
2854            Some(SortOptions {
2855                descending: true,
2856                nulls_first: true,
2857            }),
2858            None,
2859            vec![
2860                None,
2861                None,
2862                Some("sad"),
2863                Some("glad"),
2864                Some("bad"),
2865                Some("-ad"),
2866            ],
2867        );
2868
2869        test_sort_string_dict_arrays::<Int16Type>(
2870            vec![
2871                None,
2872                Some("bad"),
2873                Some("sad"),
2874                None,
2875                Some("glad"),
2876                Some("-ad"),
2877            ],
2878            Some(SortOptions {
2879                descending: true,
2880                nulls_first: true,
2881            }),
2882            Some(3),
2883            vec![None, None, Some("sad")],
2884        );
2885
2886        // valid values less than limit with extra nulls
2887        test_sort_string_dict_arrays::<Int16Type>(
2888            vec![Some("def"), None, None, Some("abc")],
2889            Some(SortOptions {
2890                descending: false,
2891                nulls_first: false,
2892            }),
2893            Some(3),
2894            vec![Some("abc"), Some("def"), None],
2895        );
2896
2897        test_sort_string_dict_arrays::<Int16Type>(
2898            vec![Some("def"), None, None, Some("abc")],
2899            Some(SortOptions {
2900                descending: false,
2901                nulls_first: true,
2902            }),
2903            Some(3),
2904            vec![None, None, Some("abc")],
2905        );
2906
2907        // more nulls than limit
2908        test_sort_string_dict_arrays::<Int16Type>(
2909            vec![Some("def"), None, None, None],
2910            Some(SortOptions {
2911                descending: false,
2912                nulls_first: true,
2913            }),
2914            Some(2),
2915            vec![None, None],
2916        );
2917
2918        test_sort_string_dict_arrays::<Int16Type>(
2919            vec![Some("def"), None, None, None],
2920            Some(SortOptions {
2921                descending: false,
2922                nulls_first: false,
2923            }),
2924            Some(2),
2925            vec![Some("def"), None],
2926        );
2927    }
2928
2929    #[test]
2930    fn test_sort_list() {
2931        test_sort_list_arrays::<Int8Type>(
2932            vec![
2933                Some(vec![Some(1)]),
2934                Some(vec![Some(4)]),
2935                Some(vec![Some(2)]),
2936                Some(vec![Some(3)]),
2937            ],
2938            Some(SortOptions {
2939                descending: false,
2940                nulls_first: false,
2941            }),
2942            None,
2943            vec![
2944                Some(vec![Some(1)]),
2945                Some(vec![Some(2)]),
2946                Some(vec![Some(3)]),
2947                Some(vec![Some(4)]),
2948            ],
2949            Some(1),
2950        );
2951
2952        test_sort_list_arrays::<Float16Type>(
2953            vec![
2954                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
2955                Some(vec![
2956                    Some(f16::from_f32(4.0)),
2957                    Some(f16::from_f32(3.0)),
2958                    Some(f16::from_f32(2.0)),
2959                    Some(f16::from_f32(1.0)),
2960                ]),
2961                Some(vec![
2962                    Some(f16::from_f32(2.0)),
2963                    Some(f16::from_f32(3.0)),
2964                    Some(f16::from_f32(4.0)),
2965                ]),
2966                Some(vec![
2967                    Some(f16::from_f32(3.0)),
2968                    Some(f16::from_f32(3.0)),
2969                    Some(f16::from_f32(3.0)),
2970                    Some(f16::from_f32(3.0)),
2971                ]),
2972                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
2973            ],
2974            Some(SortOptions {
2975                descending: false,
2976                nulls_first: false,
2977            }),
2978            None,
2979            vec![
2980                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
2981                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
2982                Some(vec![
2983                    Some(f16::from_f32(2.0)),
2984                    Some(f16::from_f32(3.0)),
2985                    Some(f16::from_f32(4.0)),
2986                ]),
2987                Some(vec![
2988                    Some(f16::from_f32(3.0)),
2989                    Some(f16::from_f32(3.0)),
2990                    Some(f16::from_f32(3.0)),
2991                    Some(f16::from_f32(3.0)),
2992                ]),
2993                Some(vec![
2994                    Some(f16::from_f32(4.0)),
2995                    Some(f16::from_f32(3.0)),
2996                    Some(f16::from_f32(2.0)),
2997                    Some(f16::from_f32(1.0)),
2998                ]),
2999            ],
3000            None,
3001        );
3002
3003        test_sort_list_arrays::<Float32Type>(
3004            vec![
3005                Some(vec![Some(1.0), Some(0.0)]),
3006                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3007                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3008                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3009                Some(vec![Some(1.0), Some(1.0)]),
3010            ],
3011            Some(SortOptions {
3012                descending: false,
3013                nulls_first: false,
3014            }),
3015            None,
3016            vec![
3017                Some(vec![Some(1.0), Some(0.0)]),
3018                Some(vec![Some(1.0), Some(1.0)]),
3019                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3020                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3021                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3022            ],
3023            None,
3024        );
3025
3026        test_sort_list_arrays::<Float64Type>(
3027            vec![
3028                Some(vec![Some(1.0), Some(0.0)]),
3029                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3030                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3031                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3032                Some(vec![Some(1.0), Some(1.0)]),
3033            ],
3034            Some(SortOptions {
3035                descending: false,
3036                nulls_first: false,
3037            }),
3038            None,
3039            vec![
3040                Some(vec![Some(1.0), Some(0.0)]),
3041                Some(vec![Some(1.0), Some(1.0)]),
3042                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3043                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3044                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3045            ],
3046            None,
3047        );
3048
3049        test_sort_list_arrays::<Int32Type>(
3050            vec![
3051                Some(vec![Some(1), Some(0)]),
3052                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3053                Some(vec![Some(2), Some(3), Some(4)]),
3054                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3055                Some(vec![Some(1), Some(1)]),
3056            ],
3057            Some(SortOptions {
3058                descending: false,
3059                nulls_first: false,
3060            }),
3061            None,
3062            vec![
3063                Some(vec![Some(1), Some(0)]),
3064                Some(vec![Some(1), Some(1)]),
3065                Some(vec![Some(2), Some(3), Some(4)]),
3066                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3067                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3068            ],
3069            None,
3070        );
3071
3072        test_sort_list_arrays::<Int32Type>(
3073            vec![
3074                None,
3075                Some(vec![Some(4), None, Some(2)]),
3076                Some(vec![Some(2), Some(3), Some(4)]),
3077                None,
3078                Some(vec![Some(3), Some(3), None]),
3079            ],
3080            Some(SortOptions {
3081                descending: false,
3082                nulls_first: false,
3083            }),
3084            None,
3085            vec![
3086                Some(vec![Some(2), Some(3), Some(4)]),
3087                Some(vec![Some(3), Some(3), None]),
3088                Some(vec![Some(4), None, Some(2)]),
3089                None,
3090                None,
3091            ],
3092            Some(3),
3093        );
3094
3095        test_sort_list_arrays::<Int32Type>(
3096            vec![
3097                Some(vec![Some(1), Some(0)]),
3098                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3099                Some(vec![Some(2), Some(3), Some(4)]),
3100                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3101                Some(vec![Some(1), Some(1)]),
3102            ],
3103            Some(SortOptions {
3104                descending: false,
3105                nulls_first: false,
3106            }),
3107            Some(2),
3108            vec![Some(vec![Some(1), Some(0)]), Some(vec![Some(1), Some(1)])],
3109            None,
3110        );
3111
3112        // valid values less than limit with extra nulls
3113        test_sort_list_arrays::<Int32Type>(
3114            vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3115            Some(SortOptions {
3116                descending: false,
3117                nulls_first: false,
3118            }),
3119            Some(3),
3120            vec![Some(vec![Some(1)]), Some(vec![Some(2)]), None],
3121            None,
3122        );
3123
3124        test_sort_list_arrays::<Int32Type>(
3125            vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3126            Some(SortOptions {
3127                descending: false,
3128                nulls_first: true,
3129            }),
3130            Some(3),
3131            vec![None, None, Some(vec![Some(1)])],
3132            None,
3133        );
3134
3135        // more nulls than limit
3136        test_sort_list_arrays::<Int32Type>(
3137            vec![Some(vec![Some(1)]), None, None, None],
3138            Some(SortOptions {
3139                descending: false,
3140                nulls_first: true,
3141            }),
3142            Some(2),
3143            vec![None, None],
3144            None,
3145        );
3146
3147        test_sort_list_arrays::<Int32Type>(
3148            vec![Some(vec![Some(1)]), None, None, None],
3149            Some(SortOptions {
3150                descending: false,
3151                nulls_first: false,
3152            }),
3153            Some(2),
3154            vec![Some(vec![Some(1)]), None],
3155            None,
3156        );
3157    }
3158
3159    #[test]
3160    fn test_sort_binary() {
3161        test_sort_binary_arrays(
3162            vec![
3163                Some(vec![0, 0, 0]),
3164                Some(vec![0, 0, 5]),
3165                Some(vec![0, 0, 3]),
3166                Some(vec![0, 0, 7]),
3167                Some(vec![0, 0, 1]),
3168            ],
3169            Some(SortOptions {
3170                descending: false,
3171                nulls_first: false,
3172            }),
3173            None,
3174            vec![
3175                Some(vec![0, 0, 0]),
3176                Some(vec![0, 0, 1]),
3177                Some(vec![0, 0, 3]),
3178                Some(vec![0, 0, 5]),
3179                Some(vec![0, 0, 7]),
3180            ],
3181            Some(3),
3182        );
3183
3184        // with nulls
3185        test_sort_binary_arrays(
3186            vec![
3187                Some(vec![0, 0, 0]),
3188                None,
3189                Some(vec![0, 0, 3]),
3190                Some(vec![0, 0, 7]),
3191                Some(vec![0, 0, 1]),
3192                None,
3193            ],
3194            Some(SortOptions {
3195                descending: false,
3196                nulls_first: false,
3197            }),
3198            None,
3199            vec![
3200                Some(vec![0, 0, 0]),
3201                Some(vec![0, 0, 1]),
3202                Some(vec![0, 0, 3]),
3203                Some(vec![0, 0, 7]),
3204                None,
3205                None,
3206            ],
3207            Some(3),
3208        );
3209
3210        test_sort_binary_arrays(
3211            vec![
3212                Some(vec![3, 5, 7]),
3213                None,
3214                Some(vec![1, 7, 1]),
3215                Some(vec![2, 7, 3]),
3216                None,
3217                Some(vec![1, 4, 3]),
3218            ],
3219            Some(SortOptions {
3220                descending: false,
3221                nulls_first: false,
3222            }),
3223            None,
3224            vec![
3225                Some(vec![1, 4, 3]),
3226                Some(vec![1, 7, 1]),
3227                Some(vec![2, 7, 3]),
3228                Some(vec![3, 5, 7]),
3229                None,
3230                None,
3231            ],
3232            Some(3),
3233        );
3234
3235        // descending
3236        test_sort_binary_arrays(
3237            vec![
3238                Some(vec![0, 0, 0]),
3239                None,
3240                Some(vec![0, 0, 3]),
3241                Some(vec![0, 0, 7]),
3242                Some(vec![0, 0, 1]),
3243                None,
3244            ],
3245            Some(SortOptions {
3246                descending: true,
3247                nulls_first: false,
3248            }),
3249            None,
3250            vec![
3251                Some(vec![0, 0, 7]),
3252                Some(vec![0, 0, 3]),
3253                Some(vec![0, 0, 1]),
3254                Some(vec![0, 0, 0]),
3255                None,
3256                None,
3257            ],
3258            Some(3),
3259        );
3260
3261        // nulls first
3262        test_sort_binary_arrays(
3263            vec![
3264                Some(vec![0, 0, 0]),
3265                None,
3266                Some(vec![0, 0, 3]),
3267                Some(vec![0, 0, 7]),
3268                Some(vec![0, 0, 1]),
3269                None,
3270            ],
3271            Some(SortOptions {
3272                descending: false,
3273                nulls_first: true,
3274            }),
3275            None,
3276            vec![
3277                None,
3278                None,
3279                Some(vec![0, 0, 0]),
3280                Some(vec![0, 0, 1]),
3281                Some(vec![0, 0, 3]),
3282                Some(vec![0, 0, 7]),
3283            ],
3284            Some(3),
3285        );
3286
3287        // limit
3288        test_sort_binary_arrays(
3289            vec![
3290                Some(vec![0, 0, 0]),
3291                None,
3292                Some(vec![0, 0, 3]),
3293                Some(vec![0, 0, 7]),
3294                Some(vec![0, 0, 1]),
3295                None,
3296            ],
3297            Some(SortOptions {
3298                descending: false,
3299                nulls_first: true,
3300            }),
3301            Some(4),
3302            vec![None, None, Some(vec![0, 0, 0]), Some(vec![0, 0, 1])],
3303            Some(3),
3304        );
3305
3306        // var length
3307        test_sort_binary_arrays(
3308            vec![
3309                Some(b"Hello".to_vec()),
3310                None,
3311                Some(b"from".to_vec()),
3312                Some(b"Apache".to_vec()),
3313                Some(b"Arrow-rs".to_vec()),
3314                None,
3315            ],
3316            Some(SortOptions {
3317                descending: false,
3318                nulls_first: false,
3319            }),
3320            None,
3321            vec![
3322                Some(b"Apache".to_vec()),
3323                Some(b"Arrow-rs".to_vec()),
3324                Some(b"Hello".to_vec()),
3325                Some(b"from".to_vec()),
3326                None,
3327                None,
3328            ],
3329            None,
3330        );
3331
3332        // limit
3333        test_sort_binary_arrays(
3334            vec![
3335                Some(b"Hello".to_vec()),
3336                None,
3337                Some(b"from".to_vec()),
3338                Some(b"Apache".to_vec()),
3339                Some(b"Arrow-rs".to_vec()),
3340                None,
3341            ],
3342            Some(SortOptions {
3343                descending: false,
3344                nulls_first: true,
3345            }),
3346            Some(4),
3347            vec![
3348                None,
3349                None,
3350                Some(b"Apache".to_vec()),
3351                Some(b"Arrow-rs".to_vec()),
3352            ],
3353            None,
3354        );
3355    }
3356
3357    #[test]
3358    fn test_lex_sort_single_column() {
3359        let input = vec![SortColumn {
3360            values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3361                Some(17),
3362                Some(2),
3363                Some(-1),
3364                Some(0),
3365            ])) as ArrayRef,
3366            options: None,
3367        }];
3368        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3369            Some(-1),
3370            Some(0),
3371            Some(2),
3372            Some(17),
3373        ])) as ArrayRef];
3374        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3375        test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2));
3376
3377        // Explicitly test a limit on the sort as a demonstration
3378        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3379            Some(-1),
3380            Some(0),
3381            Some(2),
3382        ])) as ArrayRef];
3383        test_lex_sort_arrays(input, expected, Some(3));
3384    }
3385
3386    #[test]
3387    fn test_lex_sort_unaligned_rows() {
3388        let input = vec![
3389            SortColumn {
3390                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![None, Some(-1)]))
3391                    as ArrayRef,
3392                options: None,
3393            },
3394            SortColumn {
3395                values: Arc::new(StringArray::from(vec![Some("foo")])) as ArrayRef,
3396                options: None,
3397            },
3398        ];
3399        assert!(
3400            lexsort(&input, None).is_err(),
3401            "lexsort should reject columns with different row counts"
3402        );
3403    }
3404
3405    #[test]
3406    fn test_lex_sort_mixed_types() {
3407        let input = vec![
3408            SortColumn {
3409                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3410                    Some(0),
3411                    Some(2),
3412                    Some(-1),
3413                    Some(0),
3414                ])) as ArrayRef,
3415                options: None,
3416            },
3417            SortColumn {
3418                values: Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3419                    Some(101),
3420                    Some(8),
3421                    Some(7),
3422                    Some(102),
3423                ])) as ArrayRef,
3424                options: None,
3425            },
3426            SortColumn {
3427                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3428                    Some(-1),
3429                    Some(-2),
3430                    Some(-3),
3431                    Some(-4),
3432                ])) as ArrayRef,
3433                options: None,
3434            },
3435        ];
3436        let expected = vec![
3437            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3438                Some(-1),
3439                Some(0),
3440                Some(0),
3441                Some(2),
3442            ])) as ArrayRef,
3443            Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3444                Some(7),
3445                Some(101),
3446                Some(102),
3447                Some(8),
3448            ])) as ArrayRef,
3449            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3450                Some(-3),
3451                Some(-1),
3452                Some(-4),
3453                Some(-2),
3454            ])) as ArrayRef,
3455        ];
3456        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3457        test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
3458
3459        // test mix of string and in64 with option
3460        let input = vec![
3461            SortColumn {
3462                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3463                    Some(0),
3464                    Some(2),
3465                    Some(-1),
3466                    Some(0),
3467                ])) as ArrayRef,
3468                options: Some(SortOptions {
3469                    descending: true,
3470                    nulls_first: true,
3471                }),
3472            },
3473            SortColumn {
3474                values: Arc::new(StringArray::from(vec![
3475                    Some("foo"),
3476                    Some("9"),
3477                    Some("7"),
3478                    Some("bar"),
3479                ])) as ArrayRef,
3480                options: Some(SortOptions {
3481                    descending: true,
3482                    nulls_first: true,
3483                }),
3484            },
3485        ];
3486        let expected = vec![
3487            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3488                Some(2),
3489                Some(0),
3490                Some(0),
3491                Some(-1),
3492            ])) as ArrayRef,
3493            Arc::new(StringArray::from(vec![
3494                Some("9"),
3495                Some("foo"),
3496                Some("bar"),
3497                Some("7"),
3498            ])) as ArrayRef,
3499        ];
3500        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3501        test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3));
3502
3503        // test sort with nulls first
3504        let input = vec![
3505            SortColumn {
3506                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3507                    None,
3508                    Some(-1),
3509                    Some(2),
3510                    None,
3511                ])) as ArrayRef,
3512                options: Some(SortOptions {
3513                    descending: true,
3514                    nulls_first: true,
3515                }),
3516            },
3517            SortColumn {
3518                values: Arc::new(StringArray::from(vec![
3519                    Some("foo"),
3520                    Some("world"),
3521                    Some("hello"),
3522                    None,
3523                ])) as ArrayRef,
3524                options: Some(SortOptions {
3525                    descending: true,
3526                    nulls_first: true,
3527                }),
3528            },
3529        ];
3530        let expected = vec![
3531            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3532                None,
3533                None,
3534                Some(2),
3535                Some(-1),
3536            ])) as ArrayRef,
3537            Arc::new(StringArray::from(vec![
3538                None,
3539                Some("foo"),
3540                Some("hello"),
3541                Some("world"),
3542            ])) as ArrayRef,
3543        ];
3544        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3545        test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1));
3546
3547        // test sort with nulls last
3548        let input = vec![
3549            SortColumn {
3550                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3551                    None,
3552                    Some(-1),
3553                    Some(2),
3554                    None,
3555                ])) as ArrayRef,
3556                options: Some(SortOptions {
3557                    descending: true,
3558                    nulls_first: false,
3559                }),
3560            },
3561            SortColumn {
3562                values: Arc::new(StringArray::from(vec![
3563                    Some("foo"),
3564                    Some("world"),
3565                    Some("hello"),
3566                    None,
3567                ])) as ArrayRef,
3568                options: Some(SortOptions {
3569                    descending: true,
3570                    nulls_first: false,
3571                }),
3572            },
3573        ];
3574        let expected = vec![
3575            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3576                Some(2),
3577                Some(-1),
3578                None,
3579                None,
3580            ])) as ArrayRef,
3581            Arc::new(StringArray::from(vec![
3582                Some("hello"),
3583                Some("world"),
3584                Some("foo"),
3585                None,
3586            ])) as ArrayRef,
3587        ];
3588        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3589        test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
3590
3591        // test sort with opposite options
3592        let input = vec![
3593            SortColumn {
3594                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3595                    None,
3596                    Some(-1),
3597                    Some(2),
3598                    Some(-1),
3599                    None,
3600                ])) as ArrayRef,
3601                options: Some(SortOptions {
3602                    descending: false,
3603                    nulls_first: false,
3604                }),
3605            },
3606            SortColumn {
3607                values: Arc::new(StringArray::from(vec![
3608                    Some("foo"),
3609                    Some("bar"),
3610                    Some("world"),
3611                    Some("hello"),
3612                    None,
3613                ])) as ArrayRef,
3614                options: Some(SortOptions {
3615                    descending: true,
3616                    nulls_first: true,
3617                }),
3618            },
3619        ];
3620        let expected = vec![
3621            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3622                Some(-1),
3623                Some(-1),
3624                Some(2),
3625                None,
3626                None,
3627            ])) as ArrayRef,
3628            Arc::new(StringArray::from(vec![
3629                Some("hello"),
3630                Some("bar"),
3631                Some("world"),
3632                None,
3633                Some("foo"),
3634            ])) as ArrayRef,
3635        ];
3636        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3637        test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
3638
3639        // Limiting by more rows than present is ok
3640        test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
3641
3642        // test with FixedSizeListArray, arrays order: [UInt32, FixedSizeList(UInt32, 1)]
3643
3644        // case1
3645        let primitive_array_data = vec![
3646            Some(2),
3647            Some(3),
3648            Some(2),
3649            Some(0),
3650            None,
3651            Some(2),
3652            Some(1),
3653            Some(2),
3654        ];
3655        let list_array_data = vec![
3656            None,
3657            Some(vec![Some(4)]),
3658            Some(vec![Some(3)]),
3659            Some(vec![Some(1)]),
3660            Some(vec![Some(5)]),
3661            Some(vec![Some(0)]),
3662            Some(vec![Some(2)]),
3663            Some(vec![None]),
3664        ];
3665
3666        let expected_primitive_array_data = vec![
3667            None,
3668            Some(0),
3669            Some(1),
3670            Some(2),
3671            Some(2),
3672            Some(2),
3673            Some(2),
3674            Some(3),
3675        ];
3676        let expected_list_array_data = vec![
3677            Some(vec![Some(5)]),
3678            Some(vec![Some(1)]),
3679            Some(vec![Some(2)]),
3680            None, // <-
3681            Some(vec![None]),
3682            Some(vec![Some(0)]),
3683            Some(vec![Some(3)]), // <-
3684            Some(vec![Some(4)]),
3685        ];
3686        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
3687            primitive_array_data.clone(),
3688            list_array_data.clone(),
3689            expected_primitive_array_data.clone(),
3690            expected_list_array_data,
3691            None,
3692            None,
3693        );
3694
3695        // case2
3696        let primitive_array_options = SortOptions {
3697            descending: false,
3698            nulls_first: true,
3699        };
3700        let list_array_options = SortOptions {
3701            descending: false,
3702            nulls_first: false, // has been modified
3703        };
3704        let expected_list_array_data = vec![
3705            Some(vec![Some(5)]),
3706            Some(vec![Some(1)]),
3707            Some(vec![Some(2)]),
3708            Some(vec![Some(0)]), // <-
3709            Some(vec![Some(3)]),
3710            Some(vec![None]),
3711            None, // <-
3712            Some(vec![Some(4)]),
3713        ];
3714        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
3715            primitive_array_data.clone(),
3716            list_array_data.clone(),
3717            expected_primitive_array_data.clone(),
3718            expected_list_array_data,
3719            Some(primitive_array_options),
3720            Some(list_array_options),
3721        );
3722
3723        // case3
3724        let primitive_array_options = SortOptions {
3725            descending: false,
3726            nulls_first: true,
3727        };
3728        let list_array_options = SortOptions {
3729            descending: true, // has been modified
3730            nulls_first: true,
3731        };
3732        let expected_list_array_data = vec![
3733            Some(vec![Some(5)]),
3734            Some(vec![Some(1)]),
3735            Some(vec![Some(2)]),
3736            None, // <-
3737            Some(vec![None]),
3738            Some(vec![Some(3)]),
3739            Some(vec![Some(0)]), // <-
3740            Some(vec![Some(4)]),
3741        ];
3742        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
3743            primitive_array_data.clone(),
3744            list_array_data.clone(),
3745            expected_primitive_array_data,
3746            expected_list_array_data,
3747            Some(primitive_array_options),
3748            Some(list_array_options),
3749        );
3750
3751        // test with ListArray/LargeListArray, arrays order: [List<UInt32>/LargeList<UInt32>, UInt32]
3752
3753        let list_array_data = vec![
3754            Some(vec![Some(2), Some(1)]), // 0
3755            None,                         // 10
3756            Some(vec![Some(3)]),          // 1
3757            Some(vec![Some(2), Some(0)]), // 2
3758            Some(vec![None, Some(2)]),    // 3
3759            Some(vec![Some(0)]),          // none
3760            None,                         // 11
3761            Some(vec![Some(2), None]),    // 4
3762            Some(vec![None]),             // 5
3763            Some(vec![Some(2), Some(1)]), // 6
3764        ];
3765        let primitive_array_data = vec![
3766            Some(0),
3767            Some(10),
3768            Some(1),
3769            Some(2),
3770            Some(3),
3771            None,
3772            Some(11),
3773            Some(4),
3774            Some(5),
3775            Some(6),
3776        ];
3777        let expected_list_array_data = vec![
3778            None,
3779            None,
3780            Some(vec![None]),
3781            Some(vec![None, Some(2)]),
3782            Some(vec![Some(0)]),
3783            Some(vec![Some(2), None]),
3784            Some(vec![Some(2), Some(0)]),
3785            Some(vec![Some(2), Some(1)]),
3786            Some(vec![Some(2), Some(1)]),
3787            Some(vec![Some(3)]),
3788        ];
3789        let expected_primitive_array_data = vec![
3790            Some(10),
3791            Some(11),
3792            Some(5),
3793            Some(3),
3794            None,
3795            Some(4),
3796            Some(2),
3797            Some(0),
3798            Some(6),
3799            Some(1),
3800        ];
3801        test_lex_sort_mixed_types_with_list::<Int32Type>(
3802            list_array_data.clone(),
3803            primitive_array_data.clone(),
3804            expected_list_array_data,
3805            expected_primitive_array_data,
3806            None,
3807            None,
3808        );
3809    }
3810
3811    fn test_lex_sort_mixed_types_with_fixed_size_list<T>(
3812        primitive_array_data: Vec<Option<T::Native>>,
3813        list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
3814        expected_primitive_array_data: Vec<Option<T::Native>>,
3815        expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
3816        primitive_array_options: Option<SortOptions>,
3817        list_array_options: Option<SortOptions>,
3818    ) where
3819        T: ArrowPrimitiveType,
3820        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
3821    {
3822        let input = vec![
3823            SortColumn {
3824                values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
3825                    as ArrayRef,
3826                options: primitive_array_options,
3827            },
3828            SortColumn {
3829                values: Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
3830                    list_array_data.clone(),
3831                    1,
3832                )) as ArrayRef,
3833                options: list_array_options,
3834            },
3835        ];
3836
3837        let expected = vec![
3838            Arc::new(PrimitiveArray::<T>::from(
3839                expected_primitive_array_data.clone(),
3840            )) as ArrayRef,
3841            Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
3842                expected_list_array_data.clone(),
3843                1,
3844            )) as ArrayRef,
3845        ];
3846
3847        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3848        test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
3849    }
3850
3851    fn test_lex_sort_mixed_types_with_list<T>(
3852        list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
3853        primitive_array_data: Vec<Option<T::Native>>,
3854        expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
3855        expected_primitive_array_data: Vec<Option<T::Native>>,
3856        list_array_options: Option<SortOptions>,
3857        primitive_array_options: Option<SortOptions>,
3858    ) where
3859        T: ArrowPrimitiveType,
3860        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
3861    {
3862        macro_rules! run_test {
3863            ($ARRAY_TYPE:ident) => {
3864                let input = vec![
3865                    SortColumn {
3866                        values: Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
3867                            list_array_data.clone(),
3868                        )) as ArrayRef,
3869                        options: list_array_options.clone(),
3870                    },
3871                    SortColumn {
3872                        values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
3873                            as ArrayRef,
3874                        options: primitive_array_options.clone(),
3875                    },
3876                ];
3877
3878                let expected = vec![
3879                    Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
3880                        expected_list_array_data.clone(),
3881                    )) as ArrayRef,
3882                    Arc::new(PrimitiveArray::<T>::from(
3883                        expected_primitive_array_data.clone(),
3884                    )) as ArrayRef,
3885                ];
3886
3887                test_lex_sort_arrays(input.clone(), expected.clone(), None);
3888                test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
3889            };
3890        }
3891        run_test!(ListArray);
3892        run_test!(LargeListArray);
3893    }
3894
3895    #[test]
3896    fn test_partial_sort() {
3897        let mut before: Vec<&str> = vec![
3898            "a", "cat", "mat", "on", "sat", "the", "xxx", "xxxx", "fdadfdsf",
3899        ];
3900        let mut d = before.clone();
3901        d.sort_unstable();
3902
3903        for last in 0..before.len() {
3904            partial_sort(&mut before, last, |a, b| a.cmp(b));
3905            assert_eq!(&d[0..last], &before.as_slice()[0..last]);
3906        }
3907    }
3908
3909    #[test]
3910    fn test_partial_rand_sort() {
3911        let size = 1000u32;
3912        let mut rng = StdRng::seed_from_u64(42);
3913        let mut before: Vec<u32> = (0..size).map(|_| rng.gen::<u32>()).collect();
3914        let mut d = before.clone();
3915        let last = (rng.next_u32() % size) as usize;
3916        d.sort_unstable();
3917
3918        partial_sort(&mut before, last, |a, b| a.cmp(b));
3919        assert_eq!(&d[0..last], &before[0..last]);
3920    }
3921
3922    #[test]
3923    fn test_sort_int8_dicts() {
3924        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
3925        let values = Int8Array::from(vec![1, 3, 5]);
3926        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
3927            keys,
3928            values,
3929            None,
3930            None,
3931            vec![None, None, Some(1), Some(3), Some(5), Some(5)],
3932        );
3933
3934        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
3935        let values = Int8Array::from(vec![1, 3, 5]);
3936        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
3937            keys,
3938            values,
3939            Some(SortOptions {
3940                descending: true,
3941                nulls_first: false,
3942            }),
3943            None,
3944            vec![Some(5), Some(5), Some(3), Some(1), None, None],
3945        );
3946
3947        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
3948        let values = Int8Array::from(vec![1, 3, 5]);
3949        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
3950            keys,
3951            values,
3952            Some(SortOptions {
3953                descending: false,
3954                nulls_first: false,
3955            }),
3956            None,
3957            vec![Some(1), Some(3), Some(5), Some(5), None, None],
3958        );
3959
3960        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
3961        let values = Int8Array::from(vec![1, 3, 5]);
3962        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
3963            keys,
3964            values,
3965            Some(SortOptions {
3966                descending: true,
3967                nulls_first: true,
3968            }),
3969            Some(3),
3970            vec![None, None, Some(5)],
3971        );
3972
3973        // Values have `None`.
3974        let keys = Int8Array::from(vec![
3975            Some(1_i8),
3976            None,
3977            Some(3),
3978            None,
3979            Some(2),
3980            Some(3),
3981            Some(0),
3982        ]);
3983        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
3984        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
3985            keys,
3986            values,
3987            None,
3988            None,
3989            vec![None, None, None, Some(1), Some(3), Some(5), Some(5)],
3990        );
3991
3992        let keys = Int8Array::from(vec![
3993            Some(1_i8),
3994            None,
3995            Some(3),
3996            None,
3997            Some(2),
3998            Some(3),
3999            Some(0),
4000        ]);
4001        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4002        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4003            keys,
4004            values,
4005            Some(SortOptions {
4006                descending: false,
4007                nulls_first: false,
4008            }),
4009            None,
4010            vec![Some(1), Some(3), Some(5), Some(5), None, None, None],
4011        );
4012
4013        let keys = Int8Array::from(vec![
4014            Some(1_i8),
4015            None,
4016            Some(3),
4017            None,
4018            Some(2),
4019            Some(3),
4020            Some(0),
4021        ]);
4022        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4023        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4024            keys,
4025            values,
4026            Some(SortOptions {
4027                descending: true,
4028                nulls_first: false,
4029            }),
4030            None,
4031            vec![Some(5), Some(5), Some(3), Some(1), None, None, None],
4032        );
4033
4034        let keys = Int8Array::from(vec![
4035            Some(1_i8),
4036            None,
4037            Some(3),
4038            None,
4039            Some(2),
4040            Some(3),
4041            Some(0),
4042        ]);
4043        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4044        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4045            keys,
4046            values,
4047            Some(SortOptions {
4048                descending: true,
4049                nulls_first: true,
4050            }),
4051            None,
4052            vec![None, None, None, Some(5), Some(5), Some(3), Some(1)],
4053        );
4054    }
4055
4056    #[test]
4057    fn test_sort_f32_dicts() {
4058        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4059        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4060        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4061            keys,
4062            values,
4063            None,
4064            None,
4065            vec![None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4066        );
4067
4068        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4069        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4070        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4071            keys,
4072            values,
4073            Some(SortOptions {
4074                descending: true,
4075                nulls_first: false,
4076            }),
4077            None,
4078            vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None],
4079        );
4080
4081        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4082        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4083        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4084            keys,
4085            values,
4086            Some(SortOptions {
4087                descending: false,
4088                nulls_first: false,
4089            }),
4090            None,
4091            vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None],
4092        );
4093
4094        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4095        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4096        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4097            keys,
4098            values,
4099            Some(SortOptions {
4100                descending: true,
4101                nulls_first: true,
4102            }),
4103            Some(3),
4104            vec![None, None, Some(5.1)],
4105        );
4106
4107        // Values have `None`.
4108        let keys = Int8Array::from(vec![
4109            Some(1_i8),
4110            None,
4111            Some(3),
4112            None,
4113            Some(2),
4114            Some(3),
4115            Some(0),
4116        ]);
4117        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4118        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4119            keys,
4120            values,
4121            None,
4122            None,
4123            vec![None, None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4124        );
4125
4126        let keys = Int8Array::from(vec![
4127            Some(1_i8),
4128            None,
4129            Some(3),
4130            None,
4131            Some(2),
4132            Some(3),
4133            Some(0),
4134        ]);
4135        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4136        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4137            keys,
4138            values,
4139            Some(SortOptions {
4140                descending: false,
4141                nulls_first: false,
4142            }),
4143            None,
4144            vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None, None],
4145        );
4146
4147        let keys = Int8Array::from(vec![
4148            Some(1_i8),
4149            None,
4150            Some(3),
4151            None,
4152            Some(2),
4153            Some(3),
4154            Some(0),
4155        ]);
4156        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4157        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4158            keys,
4159            values,
4160            Some(SortOptions {
4161                descending: true,
4162                nulls_first: false,
4163            }),
4164            None,
4165            vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None, None],
4166        );
4167
4168        let keys = Int8Array::from(vec![
4169            Some(1_i8),
4170            None,
4171            Some(3),
4172            None,
4173            Some(2),
4174            Some(3),
4175            Some(0),
4176        ]);
4177        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4178        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4179            keys,
4180            values,
4181            Some(SortOptions {
4182                descending: true,
4183                nulls_first: true,
4184            }),
4185            None,
4186            vec![None, None, None, Some(5.1), Some(5.1), Some(3.0), Some(1.2)],
4187        );
4188    }
4189
4190    #[test]
4191    fn test_lexicographic_comparator_null_dict_values() {
4192        let values = Int32Array::new(
4193            vec![1, 2, 3, 4].into(),
4194            Some(NullBuffer::from(vec![true, false, false, true])),
4195        );
4196        let keys = Int32Array::new(
4197            vec![0, 1, 53, 3].into(),
4198            Some(NullBuffer::from(vec![true, true, false, true])),
4199        );
4200        // [1, NULL, NULL, 4]
4201        let dict = DictionaryArray::new(keys, Arc::new(values));
4202
4203        let comparator = LexicographicalComparator::try_new(&[SortColumn {
4204            values: Arc::new(dict),
4205            options: None,
4206        }])
4207        .unwrap();
4208        // 1.cmp(NULL)
4209        assert_eq!(comparator.compare(0, 1), Ordering::Greater);
4210        // NULL.cmp(NULL)
4211        assert_eq!(comparator.compare(2, 1), Ordering::Equal);
4212        // NULL.cmp(4)
4213        assert_eq!(comparator.compare(2, 3), Ordering::Less);
4214    }
4215
4216    #[test]
4217    fn sort_list_equal() {
4218        let a = {
4219            let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), 2);
4220            for value in [[1, 5], [0, 3], [1, 3]] {
4221                builder.values().append_slice(&value);
4222                builder.append(true);
4223            }
4224            builder.finish()
4225        };
4226
4227        let sort_indices = sort_to_indices(&a, None, None).unwrap();
4228        assert_eq!(sort_indices.values(), &[1, 2, 0]);
4229
4230        let a = {
4231            let mut builder = ListBuilder::new(Int64Builder::new());
4232            for value in [[1, 5], [0, 3], [1, 3]] {
4233                builder.values().append_slice(&value);
4234                builder.append(true);
4235            }
4236            builder.finish()
4237        };
4238
4239        let sort_indices = sort_to_indices(&a, None, None).unwrap();
4240        assert_eq!(sort_indices.values(), &[1, 2, 0]);
4241    }
4242
4243    #[test]
4244    fn sort_struct_fallback_to_lexsort() {
4245        let float = Arc::new(Float32Array::from(vec![1.0, -0.1, 3.5, 1.0]));
4246        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4247
4248        let struct_array = StructArray::from(vec![
4249            (
4250                Arc::new(Field::new("b", DataType::Float32, false)),
4251                float.clone() as ArrayRef,
4252            ),
4253            (
4254                Arc::new(Field::new("c", DataType::Int32, false)),
4255                int.clone() as ArrayRef,
4256            ),
4257        ]);
4258
4259        assert!(!can_sort_to_indices(struct_array.data_type()));
4260        assert!(sort_to_indices(&struct_array, None, None)
4261            .err()
4262            .unwrap()
4263            .to_string()
4264            .contains("Sort not supported for data type"));
4265
4266        let sort_columns = vec![SortColumn {
4267            values: Arc::new(struct_array.clone()) as ArrayRef,
4268            options: None,
4269        }];
4270        let sorted = lexsort(&sort_columns, None).unwrap();
4271
4272        let expected_struct_array = Arc::new(StructArray::from(vec![
4273            (
4274                Arc::new(Field::new("b", DataType::Float32, false)),
4275                Arc::new(Float32Array::from(vec![-0.1, 1.0, 1.0, 3.5])) as ArrayRef,
4276            ),
4277            (
4278                Arc::new(Field::new("c", DataType::Int32, false)),
4279                Arc::new(Int32Array::from(vec![28, 31, 42, 19])) as ArrayRef,
4280            ),
4281        ])) as ArrayRef;
4282
4283        assert_eq!(&sorted[0], &expected_struct_array);
4284    }
4285}