parquet::arrow::arrow_reader::statistics

Struct StatisticsConverter

pub struct StatisticsConverter<'a> { /* private fields */ }

Expand description

Extracts Parquet statistics as Arrow arrays

This is used to convert Parquet statistics to Arrow ArrayRef, with proper type conversions. This information can be used for pruning Parquet files, row groups, and data pages based on the statistics embedded in Parquet metadata.

§Schemas

The converter uses the schema of the Parquet file and the Arrow schema to convert the underlying statistics value (stored as a parquet value) into the corresponding Arrow value. For example, Decimals are stored as binary in parquet files and this structure handles mapping them to the i128 representation used in Arrow.

Note: The Parquet schema and Arrow schema do not have to be identical (for example, the columns may be in different orders and one or the other schemas may have additional columns). The function parquet_column is used to match the column in the Parquet schema to the column in the Arrow schema.

Struct StatisticsConverter Copy item path

§Schemas

Implementations§

impl<'a> StatisticsConverter<'a>

pub fn parquet_column_index(&self) -> Option<usize>

pub fn arrow_field(&self) -> &'a Field

pub fn with_missing_null_counts_as_zero( self, missing_null_counts_as_zero: bool, ) -> Self

pub fn row_group_row_counts<I>( &self, metadatas: I, ) -> Result<Option<UInt64Array>>where I: IntoIterator<Item = &'a RowGroupMetaData>,

§Return Value

§Example

pub fn try_new<'b>( column_name: &'b str, arrow_schema: &'a Schema, parquet_schema: &'a SchemaDescriptor, ) -> Result<Self>

§Errors

pub fn row_group_mins<I>(&self, metadatas: I) -> Result<ArrayRef>where I: IntoIterator<Item = &'a RowGroupMetaData>,

§Return Value

§Errors

§Example

pub fn row_group_maxes<I>(&self, metadatas: I) -> Result<ArrayRef>where I: IntoIterator<Item = &'a RowGroupMetaData>,

pub fn row_group_is_max_value_exact<I>( &self, metadatas: I, ) -> Result<BooleanArray>where I: IntoIterator<Item = &'a RowGroupMetaData>,

pub fn row_group_is_min_value_exact<I>( &self, metadatas: I, ) -> Result<BooleanArray>where I: IntoIterator<Item = &'a RowGroupMetaData>,

pub fn row_group_null_counts<I>(&self, metadatas: I) -> Result<UInt64Array>where I: IntoIterator<Item = &'a RowGroupMetaData>,

pub fn data_page_mins<I>( &self, column_page_index: &ParquetColumnIndex, column_offset_index: &ParquetOffsetIndex, row_group_indices: I, ) -> Result<ArrayRef>where I: IntoIterator<Item = &'a usize>,

§Parameters:

§Return Value

§Errors

pub fn data_page_maxes<I>( &self, column_page_index: &ParquetColumnIndex, column_offset_index: &ParquetOffsetIndex, row_group_indices: I, ) -> Result<ArrayRef>where I: IntoIterator<Item = &'a usize>,

pub fn data_page_null_counts<I>( &self, column_page_index: &ParquetColumnIndex, column_offset_index: &ParquetOffsetIndex, row_group_indices: I, ) -> Result<UInt64Array>where I: IntoIterator<Item = &'a usize>,

pub fn data_page_row_counts<I>( &self, column_offset_index: &ParquetOffsetIndex, row_group_metadatas: &'a [RowGroupMetaData], row_group_indices: I, ) -> Result<Option<UInt64Array>>where I: IntoIterator<Item = &'a usize>,

§Parameters:

Trait Implementations§

impl<'a> Debug for StatisticsConverter<'a>

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Auto Trait Implementations§

impl<'a> Freeze for StatisticsConverter<'a>

impl<'a> RefUnwindSafe for StatisticsConverter<'a>

impl<'a> Send for StatisticsConverter<'a>

impl<'a> Sync for StatisticsConverter<'a>

impl<'a> Unpin for StatisticsConverter<'a>

impl<'a> UnsafeUnpin for StatisticsConverter<'a>

impl<'a> UnwindSafe for StatisticsConverter<'a>

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

impl<T> Allocation for Twhere T: RefUnwindSafe + Send + Sync,

Struct StatisticsConverter

pub fn row_group_row_counts<I>( &self, metadatas: I, ) -> Result<Option<UInt64Array>>
where I: IntoIterator<Item = &'a RowGroupMetaData>,

pub fn row_group_mins<I>(&self, metadatas: I) -> Result<ArrayRef>
where I: IntoIterator<Item = &'a RowGroupMetaData>,

pub fn row_group_maxes<I>(&self, metadatas: I) -> Result<ArrayRef>
where I: IntoIterator<Item = &'a RowGroupMetaData>,

pub fn row_group_is_max_value_exact<I>( &self, metadatas: I, ) -> Result<BooleanArray>
where I: IntoIterator<Item = &'a RowGroupMetaData>,

pub fn row_group_is_min_value_exact<I>( &self, metadatas: I, ) -> Result<BooleanArray>
where I: IntoIterator<Item = &'a RowGroupMetaData>,

pub fn row_group_null_counts<I>(&self, metadatas: I) -> Result<UInt64Array>
where I: IntoIterator<Item = &'a RowGroupMetaData>,

pub fn data_page_mins<I>( &self, column_page_index: &ParquetColumnIndex, column_offset_index: &ParquetOffsetIndex, row_group_indices: I, ) -> Result<ArrayRef>
where I: IntoIterator<Item = &'a usize>,

pub fn data_page_maxes<I>( &self, column_page_index: &ParquetColumnIndex, column_offset_index: &ParquetOffsetIndex, row_group_indices: I, ) -> Result<ArrayRef>
where I: IntoIterator<Item = &'a usize>,

pub fn data_page_null_counts<I>( &self, column_page_index: &ParquetColumnIndex, column_offset_index: &ParquetOffsetIndex, row_group_indices: I, ) -> Result<UInt64Array>
where I: IntoIterator<Item = &'a usize>,

pub fn data_page_row_counts<I>( &self, column_offset_index: &ParquetOffsetIndex, row_group_metadatas: &'a [RowGroupMetaData], row_group_indices: I, ) -> Result<Option<UInt64Array>>
where I: IntoIterator<Item = &'a usize>,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<T> Allocation for T
where T: RefUnwindSafe + Send + Sync,