use std::fmt::Debug;
use arrow::array::{
Array, BinaryArray, BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array,
Int64Array, Int8Array, StringArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
};
use crate::stats::primitive::{
truncate_bytes, truncate_string, PrimitiveStats, TruncateBound, TRUNCATE_LEN,
};
use crate::stats::{
ColumnNullStats, ColumnStatKinds, ColumnarStats, DynStats, NoneStats, OptionStats,
};
pub trait ColumnarStatsBuilder<T>: Debug {
type ArrowColumn: arrow::array::Array + 'static;
type FinishedStats: DynStats;
fn from_column(col: &Self::ArrowColumn) -> Self
where
Self: Sized;
fn from_column_dyn(col: &dyn arrow::array::Array) -> Option<Self>
where
Self: Sized,
{
let col = col.as_any().downcast_ref::<Self::ArrowColumn>()?;
Some(Self::from_column(col))
}
fn finish(self) -> Self::FinishedStats
where
Self::FinishedStats: Sized;
}
macro_rules! primitive_stats {
($native:ty, $arrow_col:ty, $min_fn:path, $max_fn:path) => {
impl ColumnarStatsBuilder<$native> for PrimitiveStats<$native> {
type FinishedStats = Self;
type ArrowColumn = $arrow_col;
fn from_column(col: &Self::ArrowColumn) -> Self
where
Self: Sized,
{
let lower = $min_fn(col).unwrap_or_default();
let upper = $max_fn(col).unwrap_or_default();
PrimitiveStats { lower, upper }
}
fn finish(self) -> Self::FinishedStats {
self
}
}
};
}
primitive_stats!(
bool,
BooleanArray,
arrow::compute::min_boolean,
arrow::compute::max_boolean
);
primitive_stats!(u8, UInt8Array, arrow::compute::min, arrow::compute::max);
primitive_stats!(u16, UInt16Array, arrow::compute::min, arrow::compute::max);
primitive_stats!(u32, UInt32Array, arrow::compute::min, arrow::compute::max);
primitive_stats!(u64, UInt64Array, arrow::compute::min, arrow::compute::max);
primitive_stats!(i8, Int8Array, arrow::compute::min, arrow::compute::max);
primitive_stats!(i16, Int16Array, arrow::compute::min, arrow::compute::max);
primitive_stats!(i32, Int32Array, arrow::compute::min, arrow::compute::max);
primitive_stats!(i64, Int64Array, arrow::compute::min, arrow::compute::max);
primitive_stats!(f32, Float32Array, arrow::compute::min, arrow::compute::max);
primitive_stats!(f64, Float64Array, arrow::compute::min, arrow::compute::max);
impl ColumnarStatsBuilder<&str> for PrimitiveStats<String> {
type ArrowColumn = StringArray;
type FinishedStats = Self;
fn from_column(col: &Self::ArrowColumn) -> Self
where
Self: Sized,
{
let lower = arrow::compute::min_string(col).unwrap_or_default();
let lower = truncate_string(lower, TRUNCATE_LEN, TruncateBound::Lower)
.expect("lower bound should always truncate");
let upper = arrow::compute::max_string(col).unwrap_or_default();
let upper = truncate_string(upper, TRUNCATE_LEN, TruncateBound::Upper)
.unwrap_or_else(|| upper.to_owned());
PrimitiveStats { lower, upper }
}
fn finish(self) -> Self::FinishedStats
where
Self::FinishedStats: Sized,
{
self
}
}
impl ColumnarStatsBuilder<&[u8]> for PrimitiveStats<Vec<u8>> {
type ArrowColumn = BinaryArray;
type FinishedStats = Self;
fn from_column(col: &Self::ArrowColumn) -> Self
where
Self: Sized,
{
let lower = arrow::compute::min_binary(col).unwrap_or_default();
let lower = truncate_bytes(lower, TRUNCATE_LEN, TruncateBound::Lower)
.expect("lower bound should always truncate");
let upper = arrow::compute::max_binary(col).unwrap_or_default();
let upper = truncate_bytes(upper, TRUNCATE_LEN, TruncateBound::Upper)
.unwrap_or_else(|| upper.to_owned());
PrimitiveStats { lower, upper }
}
fn finish(self) -> Self::FinishedStats
where
Self::FinishedStats: Sized,
{
self
}
}
impl<I, T> ColumnarStatsBuilder<Option<I>> for OptionStats<T>
where
T: ColumnarStatsBuilder<I> + DynStats,
T::FinishedStats: Into<ColumnStatKinds>,
{
type ArrowColumn = T::ArrowColumn;
type FinishedStats = ColumnarStats;
fn from_column(col: &Self::ArrowColumn) -> Self
where
Self: Sized,
{
OptionStats {
none: col.null_count(),
some: T::from_column(col),
}
}
fn finish(self) -> Self::FinishedStats
where
Self::FinishedStats: Sized,
{
ColumnarStats {
nulls: Some(ColumnNullStats { count: self.none }),
values: self.some.finish().into(),
}
}
}
#[derive(Debug)]
pub struct NoneStatsBuilder<A>(std::marker::PhantomData<A>);
impl<T, A: arrow::array::Array + 'static> ColumnarStatsBuilder<T> for NoneStatsBuilder<A> {
type ArrowColumn = A;
type FinishedStats = NoneStats;
fn from_column(_col: &Self::ArrowColumn) -> Self
where
Self: Sized,
{
NoneStatsBuilder(std::marker::PhantomData)
}
fn finish(self) -> Self::FinishedStats
where
Self::FinishedStats: Sized,
{
NoneStats
}
}