Struct arrow::array::RunArray

source ·
pub struct RunArray<R>
where R: RunEndIndexType,
{ /* private fields */ }
Expand description

An array of run-end encoded values

This encoding is variation on run-length encoding (RLE) and is good for representing data containing same values repeated consecutively.

RunArray contains run_ends array and values array of same length. The run_ends array stores the indexes at which the run ends. The values array stores the value of each run. Below example illustrates how a logical array is represented in RunArray

┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┐
  ┌─────────────────┐  ┌─────────┐       ┌─────────────────┐
│ │        A        │  │    2    │ │     │        A        │     
  ├─────────────────┤  ├─────────┤       ├─────────────────┤
│ │        D        │  │    3    │ │     │        A        │    run length of 'A' = runs_ends[0] - 0 = 2
  ├─────────────────┤  ├─────────┤       ├─────────────────┤
│ │        B        │  │    6    │ │     │        D        │    run length of 'D' = run_ends[1] - run_ends[0] = 1
  └─────────────────┘  └─────────┘       ├─────────────────┤
│        values          run_ends  │     │        B        │     
                                         ├─────────────────┤
└ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┘     │        B        │     
                                         ├─────────────────┤
               RunArray                  │        B        │    run length of 'B' = run_ends[2] - run_ends[1] = 3
              length = 3                 └─────────────────┘
  
                                            Logical array
                                               Contents

Implementations§

source§

impl<R> RunArray<R>
where R: RunEndIndexType,

source

pub fn logical_len(run_ends: &PrimitiveArray<R>) -> usize

Calculates the logical length of the array encoded by the given run_ends array.

source

pub fn try_new( run_ends: &PrimitiveArray<R>, values: &dyn Array, ) -> Result<RunArray<R>, ArrowError>

Attempts to create RunArray using given run_ends (index where a run ends) and the values (value of the run). Returns an error if the given data is not compatible with RunEndEncoded specification.

source

pub fn run_ends(&self) -> &RunEndBuffer<<R as ArrowPrimitiveType>::Native>

Returns a reference to RunEndBuffer

source

pub fn values(&self) -> &Arc<dyn Array>

Returns a reference to values array

Note: any slicing of this RunArray array is not applied to the returned array and must be handled separately

source

pub fn get_start_physical_index(&self) -> usize

Returns the physical index at which the array slice starts.

source

pub fn get_end_physical_index(&self) -> usize

Returns the physical index at which the array slice ends.

source

pub fn downcast<V>(&self) -> Option<TypedRunArray<'_, R, V>>
where V: 'static,

Downcast this RunArray to a TypedRunArray

use arrow_array::{Array, ArrayAccessor, RunArray, StringArray, types::Int32Type};

let orig = [Some("a"), Some("b"), None];
let run_array = RunArray::<Int32Type>::from_iter(orig);
let typed = run_array.downcast::<StringArray>().unwrap();
assert_eq!(typed.value(0), "a");
assert_eq!(typed.value(1), "b");
assert!(typed.values().is_null(2));
source

pub fn get_physical_index(&self, logical_index: usize) -> usize

Returns index to the physical array for the given index to the logical array. This function adjusts the input logical index based on ArrayData::offset Performs a binary search on the run_ends array for the input index.

The result is arbitrary if logical_index >= self.len()

source

pub fn get_physical_indices<I>( &self, logical_indices: &[I], ) -> Result<Vec<usize>, ArrowError>
where I: ArrowNativeType,

Returns the physical indices of the input logical indices. Returns error if any of the logical index cannot be converted to physical index. The logical indices are sorted and iterated along with run_ends array to find matching physical index. The approach used here was chosen over finding physical index for each logical index using binary search using the function get_physical_index. Running benchmarks on both approaches showed that the approach used here scaled well for larger inputs. See https://github.com/apache/arrow-rs/pull/3622#issuecomment-1407753727 for more details.

source

pub fn slice(&self, offset: usize, length: usize) -> RunArray<R>

Returns a zero-copy slice of this array with the indicated offset and length.

Trait Implementations§

source§

impl<T> Array for RunArray<T>
where T: RunEndIndexType,

source§

fn as_any(&self) -> &(dyn Any + 'static)

Returns the array as Any so that it can be downcasted to a specific implementation. Read more
source§

fn to_data(&self) -> ArrayData

Returns the underlying data of this array
source§

fn into_data(self) -> ArrayData

Returns the underlying data of this array Read more
source§

fn data_type(&self) -> &DataType

Returns a reference to the DataType of this array. Read more
source§

fn slice(&self, offset: usize, length: usize) -> Arc<dyn Array>

Returns a zero-copy slice of this array with the indicated offset and length. Read more
source§

fn len(&self) -> usize

Returns the length (i.e., number of elements) of this array. Read more
source§

fn is_empty(&self) -> bool

Returns whether this array is empty. Read more
source§

fn offset(&self) -> usize

Returns the offset into the underlying data used by this array(-slice). Note that the underlying data can be shared by many arrays. This defaults to 0. Read more
source§

fn nulls(&self) -> Option<&NullBuffer>

Returns the null buffer of this array if any. Read more
source§

fn logical_nulls(&self) -> Option<NullBuffer>

Returns a potentially computed NullBuffer that represents the logical null values of this array, if any. Read more
source§

fn is_nullable(&self) -> bool

Returns false if the array is guaranteed to not contain any logical nulls Read more
source§

fn get_buffer_memory_size(&self) -> usize

Returns the total number of bytes of memory pointed to by this array. The buffers store bytes in the Arrow memory format, and include the data as well as the validity map. Note that this does not always correspond to the exact memory usage of an array, since multiple arrays can share the same buffers or slices thereof.
source§

fn get_array_memory_size(&self) -> usize

Returns the total number of bytes of memory occupied physically by this array. This value will always be greater than returned by get_buffer_memory_size() and includes the overhead of the data structures that contain the pointers to the various buffers.
source§

fn is_null(&self, index: usize) -> bool

Returns whether the element at index is null according to Array::nulls Read more
source§

fn is_valid(&self, index: usize) -> bool

Returns whether the element at index is not null, the opposite of Self::is_null. Read more
source§

fn null_count(&self) -> usize

Returns the total number of physical null values in this array. Read more
source§

impl<R> Clone for RunArray<R>
where R: RunEndIndexType,

source§

fn clone(&self) -> RunArray<R>

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl<R> Debug for RunArray<R>
where R: RunEndIndexType,

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more
source§

impl<R> From<ArrayData> for RunArray<R>
where R: RunEndIndexType,

source§

fn from(data: ArrayData) -> RunArray<R>

Converts to this type from the input type.
source§

impl<R> From<RunArray<R>> for ArrayData
where R: RunEndIndexType,

source§

fn from(array: RunArray<R>) -> ArrayData

Converts to this type from the input type.
source§

impl<'a, T> FromIterator<&'a str> for RunArray<T>
where T: RunEndIndexType,

Constructs a RunArray from an iterator of strings.

§Example:

use arrow_array::{RunArray, PrimitiveArray, StringArray, types::Int16Type};

let test = vec!["a", "a", "b", "c"];
let array: RunArray<Int16Type> = test.into_iter().collect();
assert_eq!(
    "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n  \"a\",\n  \"b\",\n  \"c\",\n]}\n",
    format!("{:?}", array)
);
source§

fn from_iter<I>(iter: I) -> RunArray<T>
where I: IntoIterator<Item = &'a str>,

Creates a value from an iterator. Read more
source§

impl<'a, T> FromIterator<Option<&'a str>> for RunArray<T>
where T: RunEndIndexType,

Constructs a RunArray from an iterator of optional strings.

§Example:

use arrow_array::{RunArray, PrimitiveArray, StringArray, types::Int16Type};

let test = vec!["a", "a", "b", "c", "c"];
let array: RunArray<Int16Type> = test
    .iter()
    .map(|&x| if x == "b" { None } else { Some(x) })
    .collect();
assert_eq!(
    "RunArray {run_ends: [2, 3, 5], values: StringArray\n[\n  \"a\",\n  null,\n  \"c\",\n]}\n",
    format!("{:?}", array)
);
source§

fn from_iter<I>(iter: I) -> RunArray<T>
where I: IntoIterator<Item = Option<&'a str>>,

Creates a value from an iterator. Read more

Auto Trait Implementations§

§

impl<R> Freeze for RunArray<R>

§

impl<R> !RefUnwindSafe for RunArray<R>

§

impl<R> Send for RunArray<R>

§

impl<R> Sync for RunArray<R>

§

impl<R> Unpin for RunArray<R>

§

impl<R> !UnwindSafe for RunArray<R>

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> CloneToUninit for T
where T: Clone,

source§

default unsafe fn clone_to_uninit(&self, dst: *mut T)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dst. Read more
source§

impl<T> Datum for T
where T: Array,

source§

fn get(&self) -> (&dyn Array, bool)

Returns the value for this Datum and a boolean indicating if the value is scalar
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> ToOwned for T
where T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.