use crate::delta::{
add_days_datetime, add_months_datetime, shift_months, sub_days_datetime, sub_months_datetime,
};
use crate::temporal_conversions::as_datetime_with_timezone;
use crate::timezone::Tz;
use crate::{ArrowNativeTypeOp, OffsetSizeTrait};
use arrow_buffer::{i256, Buffer, OffsetBuffer};
use arrow_data::decimal::{validate_decimal256_precision, validate_decimal_precision};
use arrow_data::{validate_binary_view, validate_string_view};
use arrow_schema::{
ArrowError, DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL_DEFAULT_SCALE,
};
use chrono::{Duration, NaiveDate, NaiveDateTime};
use half::f16;
use std::fmt::Debug;
use std::marker::PhantomData;
use std::ops::{Add, Sub};
#[derive(Debug)]
pub struct BooleanType {}
impl BooleanType {
pub const DATA_TYPE: DataType = DataType::Boolean;
}
pub trait ArrowPrimitiveType: primitive::PrimitiveTypeSealed + 'static {
type Native: ArrowNativeTypeOp;
const DATA_TYPE: DataType;
fn get_byte_width() -> usize {
std::mem::size_of::<Self::Native>()
}
fn default_value() -> Self::Native {
Default::default()
}
}
mod primitive {
pub trait PrimitiveTypeSealed {}
}
macro_rules! make_type {
($name:ident, $native_ty:ty, $data_ty:expr, $doc_string: literal) => {
#[derive(Debug)]
#[doc = $doc_string]
pub struct $name {}
impl ArrowPrimitiveType for $name {
type Native = $native_ty;
const DATA_TYPE: DataType = $data_ty;
}
impl primitive::PrimitiveTypeSealed for $name {}
};
}
make_type!(Int8Type, i8, DataType::Int8, "A signed 8-bit integer type.");
make_type!(
Int16Type,
i16,
DataType::Int16,
"A signed 16-bit integer type."
);
make_type!(
Int32Type,
i32,
DataType::Int32,
"A signed 32-bit integer type."
);
make_type!(
Int64Type,
i64,
DataType::Int64,
"A signed 64-bit integer type."
);
make_type!(
UInt8Type,
u8,
DataType::UInt8,
"An unsigned 8-bit integer type."
);
make_type!(
UInt16Type,
u16,
DataType::UInt16,
"An unsigned 16-bit integer type."
);
make_type!(
UInt32Type,
u32,
DataType::UInt32,
"An unsigned 32-bit integer type."
);
make_type!(
UInt64Type,
u64,
DataType::UInt64,
"An unsigned 64-bit integer type."
);
make_type!(
Float16Type,
f16,
DataType::Float16,
"A 16-bit floating point number type."
);
make_type!(
Float32Type,
f32,
DataType::Float32,
"A 32-bit floating point number type."
);
make_type!(
Float64Type,
f64,
DataType::Float64,
"A 64-bit floating point number type."
);
make_type!(
TimestampSecondType,
i64,
DataType::Timestamp(TimeUnit::Second, None),
"A timestamp second type with an optional timezone."
);
make_type!(
TimestampMillisecondType,
i64,
DataType::Timestamp(TimeUnit::Millisecond, None),
"A timestamp millisecond type with an optional timezone."
);
make_type!(
TimestampMicrosecondType,
i64,
DataType::Timestamp(TimeUnit::Microsecond, None),
"A timestamp microsecond type with an optional timezone."
);
make_type!(
TimestampNanosecondType,
i64,
DataType::Timestamp(TimeUnit::Nanosecond, None),
"A timestamp nanosecond type with an optional timezone."
);
make_type!(
Date32Type,
i32,
DataType::Date32,
"A 32-bit date type representing the elapsed time since UNIX epoch in days(32 bits)."
);
make_type!(
Date64Type,
i64,
DataType::Date64,
"A 64-bit date type representing the elapsed time since UNIX epoch in milliseconds(64 bits)."
);
make_type!(
Time32SecondType,
i32,
DataType::Time32(TimeUnit::Second),
"A 32-bit time type representing the elapsed time since midnight in seconds."
);
make_type!(
Time32MillisecondType,
i32,
DataType::Time32(TimeUnit::Millisecond),
"A 32-bit time type representing the elapsed time since midnight in milliseconds."
);
make_type!(
Time64MicrosecondType,
i64,
DataType::Time64(TimeUnit::Microsecond),
"A 64-bit time type representing the elapsed time since midnight in microseconds."
);
make_type!(
Time64NanosecondType,
i64,
DataType::Time64(TimeUnit::Nanosecond),
"A 64-bit time type representing the elapsed time since midnight in nanoseconds."
);
make_type!(
IntervalYearMonthType,
i32,
DataType::Interval(IntervalUnit::YearMonth),
"A “calendar” interval stored as the number of whole months."
);
make_type!(
IntervalDayTimeType,
i64,
DataType::Interval(IntervalUnit::DayTime),
r#"A “calendar” interval type in days and milliseconds.
## Representation
This type is stored as a single 64 bit integer, interpreted as two i32 fields:
1. the number of elapsed days
2. The number of milliseconds (no leap seconds),
```text
┌──────────────┬──────────────┐
│ Days │ Milliseconds │
│ (32 bits) │ (32 bits) │
└──────────────┴──────────────┘
0 31 63 bit offset
```
Please see the [Arrow Spec](https://github.com/apache/arrow/blob/081b4022fe6f659d8765efc82b3f4787c5039e3c/format/Schema.fbs#L406-L408) for more details
## Note on Comparing and Ordering for Calendar Types
Values of `IntervalDayTimeType` are compared using their binary representation,
which can lead to surprising results. Please see the description of ordering on
[`IntervalMonthDayNanoType`] for more details
"#
);
make_type!(
IntervalMonthDayNanoType,
i128,
DataType::Interval(IntervalUnit::MonthDayNano),
r#"A “calendar” interval type in months, days, and nanoseconds.
## Representation
This type is stored as a single 128 bit integer,
interpreted as three different signed integral fields:
1. The number of months (32 bits)
2. The number days (32 bits)
2. The number of nanoseconds (64 bits).
Nanoseconds does not allow for leap seconds.
Each field is independent (e.g. there is no constraint that the quantity of
nanoseconds represents less than a day's worth of time).
```text
┌──────────────────────────────┬─────────────┬──────────────┐
│ Nanos │ Days │ Months │
│ (64 bits) │ (32 bits) │ (32 bits) │
└──────────────────────────────┴─────────────┴──────────────┘
0 63 95 127 bit offset
```
Please see the [Arrow Spec](https://github.com/apache/arrow/blob/081b4022fe6f659d8765efc82b3f4787c5039e3c/format/Schema.fbs#L409-L415) for more details
## Note on Comparing and Ordering for Calendar Types
Values of `IntervalMonthDayNanoType` are compared using their binary representation,
which can lead to surprising results.
Spans of time measured in calendar units are not fixed in absolute size (e.g.
number of seconds) which makes defining comparisons and ordering non trivial.
For example `1 month` is 28 days for February but `1 month` is 31 days
in December.
This makes the seemingly simple operation of comparing two intervals
complicated in practice. For example is `1 month` more or less than `30 days`? The
answer depends on what month you are talking about.
This crate defines comparisons for calendar types using their binary
representation which is fast and efficient, but leads
to potentially surprising results.
For example a
`IntervalMonthDayNano` of `1 month` will compare as **greater** than a
`IntervalMonthDayNano` of `100 days` because the binary representation of `1 month`
is larger than the binary representation of 100 days.
"#
);
make_type!(
DurationSecondType,
i64,
DataType::Duration(TimeUnit::Second),
"An elapsed time type in seconds."
);
make_type!(
DurationMillisecondType,
i64,
DataType::Duration(TimeUnit::Millisecond),
"An elapsed time type in milliseconds."
);
make_type!(
DurationMicrosecondType,
i64,
DataType::Duration(TimeUnit::Microsecond),
"An elapsed time type in microseconds."
);
make_type!(
DurationNanosecondType,
i64,
DataType::Duration(TimeUnit::Nanosecond),
"An elapsed time type in nanoseconds."
);
pub trait ArrowDictionaryKeyType: ArrowPrimitiveType {}
impl ArrowDictionaryKeyType for Int8Type {}
impl ArrowDictionaryKeyType for Int16Type {}
impl ArrowDictionaryKeyType for Int32Type {}
impl ArrowDictionaryKeyType for Int64Type {}
impl ArrowDictionaryKeyType for UInt8Type {}
impl ArrowDictionaryKeyType for UInt16Type {}
impl ArrowDictionaryKeyType for UInt32Type {}
impl ArrowDictionaryKeyType for UInt64Type {}
pub trait RunEndIndexType: ArrowPrimitiveType {}
impl RunEndIndexType for Int16Type {}
impl RunEndIndexType for Int32Type {}
impl RunEndIndexType for Int64Type {}
pub trait ArrowTemporalType: ArrowPrimitiveType {}
impl ArrowTemporalType for TimestampSecondType {}
impl ArrowTemporalType for TimestampMillisecondType {}
impl ArrowTemporalType for TimestampMicrosecondType {}
impl ArrowTemporalType for TimestampNanosecondType {}
impl ArrowTemporalType for Date32Type {}
impl ArrowTemporalType for Date64Type {}
impl ArrowTemporalType for Time32SecondType {}
impl ArrowTemporalType for Time32MillisecondType {}
impl ArrowTemporalType for Time64MicrosecondType {}
impl ArrowTemporalType for Time64NanosecondType {}
impl ArrowTemporalType for DurationSecondType {}
impl ArrowTemporalType for DurationMillisecondType {}
impl ArrowTemporalType for DurationMicrosecondType {}
impl ArrowTemporalType for DurationNanosecondType {}
pub trait ArrowTimestampType: ArrowTemporalType<Native = i64> {
const UNIT: TimeUnit;
#[deprecated(note = "Use Self::UNIT")]
fn get_time_unit() -> TimeUnit {
Self::UNIT
}
fn make_value(naive: NaiveDateTime) -> Option<i64>;
}
impl ArrowTimestampType for TimestampSecondType {
const UNIT: TimeUnit = TimeUnit::Second;
fn make_value(naive: NaiveDateTime) -> Option<i64> {
Some(naive.and_utc().timestamp())
}
}
impl ArrowTimestampType for TimestampMillisecondType {
const UNIT: TimeUnit = TimeUnit::Millisecond;
fn make_value(naive: NaiveDateTime) -> Option<i64> {
let utc = naive.and_utc();
let millis = utc.timestamp().checked_mul(1_000)?;
millis.checked_add(utc.timestamp_subsec_millis() as i64)
}
}
impl ArrowTimestampType for TimestampMicrosecondType {
const UNIT: TimeUnit = TimeUnit::Microsecond;
fn make_value(naive: NaiveDateTime) -> Option<i64> {
let utc = naive.and_utc();
let micros = utc.timestamp().checked_mul(1_000_000)?;
micros.checked_add(utc.timestamp_subsec_micros() as i64)
}
}
impl ArrowTimestampType for TimestampNanosecondType {
const UNIT: TimeUnit = TimeUnit::Nanosecond;
fn make_value(naive: NaiveDateTime) -> Option<i64> {
let utc = naive.and_utc();
let nanos = utc.timestamp().checked_mul(1_000_000_000)?;
nanos.checked_add(utc.timestamp_subsec_nanos() as i64)
}
}
fn add_year_months<T: ArrowTimestampType>(
timestamp: <T as ArrowPrimitiveType>::Native,
delta: <IntervalYearMonthType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<T as ArrowPrimitiveType>::Native> {
let months = IntervalYearMonthType::to_months(delta);
let res = as_datetime_with_timezone::<T>(timestamp, tz)?;
let res = add_months_datetime(res, months)?;
let res = res.naive_utc();
T::make_value(res)
}
fn add_day_time<T: ArrowTimestampType>(
timestamp: <T as ArrowPrimitiveType>::Native,
delta: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<T as ArrowPrimitiveType>::Native> {
let (days, ms) = IntervalDayTimeType::to_parts(delta);
let res = as_datetime_with_timezone::<T>(timestamp, tz)?;
let res = add_days_datetime(res, days)?;
let res = res.checked_add_signed(Duration::try_milliseconds(ms as i64)?)?;
let res = res.naive_utc();
T::make_value(res)
}
fn add_month_day_nano<T: ArrowTimestampType>(
timestamp: <T as ArrowPrimitiveType>::Native,
delta: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<T as ArrowPrimitiveType>::Native> {
let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(delta);
let res = as_datetime_with_timezone::<T>(timestamp, tz)?;
let res = add_months_datetime(res, months)?;
let res = add_days_datetime(res, days)?;
let res = res.checked_add_signed(Duration::nanoseconds(nanos))?;
let res = res.naive_utc();
T::make_value(res)
}
fn subtract_year_months<T: ArrowTimestampType>(
timestamp: <T as ArrowPrimitiveType>::Native,
delta: <IntervalYearMonthType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<T as ArrowPrimitiveType>::Native> {
let months = IntervalYearMonthType::to_months(delta);
let res = as_datetime_with_timezone::<T>(timestamp, tz)?;
let res = sub_months_datetime(res, months)?;
let res = res.naive_utc();
T::make_value(res)
}
fn subtract_day_time<T: ArrowTimestampType>(
timestamp: <T as ArrowPrimitiveType>::Native,
delta: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<T as ArrowPrimitiveType>::Native> {
let (days, ms) = IntervalDayTimeType::to_parts(delta);
let res = as_datetime_with_timezone::<T>(timestamp, tz)?;
let res = sub_days_datetime(res, days)?;
let res = res.checked_sub_signed(Duration::try_milliseconds(ms as i64)?)?;
let res = res.naive_utc();
T::make_value(res)
}
fn subtract_month_day_nano<T: ArrowTimestampType>(
timestamp: <T as ArrowPrimitiveType>::Native,
delta: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<T as ArrowPrimitiveType>::Native> {
let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(delta);
let res = as_datetime_with_timezone::<T>(timestamp, tz)?;
let res = sub_months_datetime(res, months)?;
let res = sub_days_datetime(res, days)?;
let res = res.checked_sub_signed(Duration::nanoseconds(nanos))?;
let res = res.naive_utc();
T::make_value(res)
}
impl TimestampSecondType {
pub fn add_year_months(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalYearMonthType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
add_year_months::<Self>(timestamp, delta, tz)
}
pub fn add_day_time(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
add_day_time::<Self>(timestamp, delta, tz)
}
pub fn add_month_day_nano(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
add_month_day_nano::<Self>(timestamp, delta, tz)
}
pub fn subtract_year_months(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalYearMonthType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
subtract_year_months::<Self>(timestamp, delta, tz)
}
pub fn subtract_day_time(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
subtract_day_time::<Self>(timestamp, delta, tz)
}
pub fn subtract_month_day_nano(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
subtract_month_day_nano::<Self>(timestamp, delta, tz)
}
}
impl TimestampMicrosecondType {
pub fn add_year_months(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalYearMonthType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
add_year_months::<Self>(timestamp, delta, tz)
}
pub fn add_day_time(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
add_day_time::<Self>(timestamp, delta, tz)
}
pub fn add_month_day_nano(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
add_month_day_nano::<Self>(timestamp, delta, tz)
}
pub fn subtract_year_months(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalYearMonthType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
subtract_year_months::<Self>(timestamp, delta, tz)
}
pub fn subtract_day_time(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
subtract_day_time::<Self>(timestamp, delta, tz)
}
pub fn subtract_month_day_nano(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
subtract_month_day_nano::<Self>(timestamp, delta, tz)
}
}
impl TimestampMillisecondType {
pub fn add_year_months(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalYearMonthType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
add_year_months::<Self>(timestamp, delta, tz)
}
pub fn add_day_time(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
add_day_time::<Self>(timestamp, delta, tz)
}
pub fn add_month_day_nano(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
add_month_day_nano::<Self>(timestamp, delta, tz)
}
pub fn subtract_year_months(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalYearMonthType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
subtract_year_months::<Self>(timestamp, delta, tz)
}
pub fn subtract_day_time(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
subtract_day_time::<Self>(timestamp, delta, tz)
}
pub fn subtract_month_day_nano(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
subtract_month_day_nano::<Self>(timestamp, delta, tz)
}
}
impl TimestampNanosecondType {
pub fn add_year_months(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalYearMonthType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
add_year_months::<Self>(timestamp, delta, tz)
}
pub fn add_day_time(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
add_day_time::<Self>(timestamp, delta, tz)
}
pub fn add_month_day_nano(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
add_month_day_nano::<Self>(timestamp, delta, tz)
}
pub fn subtract_year_months(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalYearMonthType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
subtract_year_months::<Self>(timestamp, delta, tz)
}
pub fn subtract_day_time(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
subtract_day_time::<Self>(timestamp, delta, tz)
}
pub fn subtract_month_day_nano(
timestamp: <Self as ArrowPrimitiveType>::Native,
delta: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
tz: Tz,
) -> Option<<Self as ArrowPrimitiveType>::Native> {
subtract_month_day_nano::<Self>(timestamp, delta, tz)
}
}
impl IntervalYearMonthType {
#[inline]
pub fn make_value(
years: i32,
months: i32,
) -> <IntervalYearMonthType as ArrowPrimitiveType>::Native {
years * 12 + months
}
#[inline]
pub fn to_months(i: <IntervalYearMonthType as ArrowPrimitiveType>::Native) -> i32 {
i
}
}
impl IntervalDayTimeType {
#[inline]
pub fn make_value(
days: i32,
millis: i32,
) -> <IntervalDayTimeType as ArrowPrimitiveType>::Native {
let m = millis as u64 & u32::MAX as u64;
let d = (days as u64 & u32::MAX as u64) << 32;
(m | d) as <IntervalDayTimeType as ArrowPrimitiveType>::Native
}
#[inline]
pub fn to_parts(i: <IntervalDayTimeType as ArrowPrimitiveType>::Native) -> (i32, i32) {
let days = (i >> 32) as i32;
let ms = i as i32;
(days, ms)
}
}
impl IntervalMonthDayNanoType {
#[inline]
pub fn make_value(
months: i32,
days: i32,
nanos: i64,
) -> <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native {
let m = (months as u128 & u32::MAX as u128) << 96;
let d = (days as u128 & u32::MAX as u128) << 64;
let n = nanos as u128 & u64::MAX as u128;
(m | d | n) as <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native
}
#[inline]
pub fn to_parts(
i: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
) -> (i32, i32, i64) {
let months = (i >> 96) as i32;
let days = (i >> 64) as i32;
let nanos = i as i64;
(months, days, nanos)
}
}
impl Date32Type {
pub fn to_naive_date(i: <Date32Type as ArrowPrimitiveType>::Native) -> NaiveDate {
let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
epoch.add(Duration::try_days(i as i64).unwrap())
}
pub fn from_naive_date(d: NaiveDate) -> <Date32Type as ArrowPrimitiveType>::Native {
let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
d.sub(epoch).num_days() as <Date32Type as ArrowPrimitiveType>::Native
}
pub fn add_year_months(
date: <Date32Type as ArrowPrimitiveType>::Native,
delta: <IntervalYearMonthType as ArrowPrimitiveType>::Native,
) -> <Date32Type as ArrowPrimitiveType>::Native {
let prior = Date32Type::to_naive_date(date);
let months = IntervalYearMonthType::to_months(delta);
let posterior = shift_months(prior, months);
Date32Type::from_naive_date(posterior)
}
pub fn add_day_time(
date: <Date32Type as ArrowPrimitiveType>::Native,
delta: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
) -> <Date32Type as ArrowPrimitiveType>::Native {
let (days, ms) = IntervalDayTimeType::to_parts(delta);
let res = Date32Type::to_naive_date(date);
let res = res.add(Duration::try_days(days as i64).unwrap());
let res = res.add(Duration::try_milliseconds(ms as i64).unwrap());
Date32Type::from_naive_date(res)
}
pub fn add_month_day_nano(
date: <Date32Type as ArrowPrimitiveType>::Native,
delta: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
) -> <Date32Type as ArrowPrimitiveType>::Native {
let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(delta);
let res = Date32Type::to_naive_date(date);
let res = shift_months(res, months);
let res = res.add(Duration::try_days(days as i64).unwrap());
let res = res.add(Duration::nanoseconds(nanos));
Date32Type::from_naive_date(res)
}
pub fn subtract_year_months(
date: <Date32Type as ArrowPrimitiveType>::Native,
delta: <IntervalYearMonthType as ArrowPrimitiveType>::Native,
) -> <Date32Type as ArrowPrimitiveType>::Native {
let prior = Date32Type::to_naive_date(date);
let months = IntervalYearMonthType::to_months(-delta);
let posterior = shift_months(prior, months);
Date32Type::from_naive_date(posterior)
}
pub fn subtract_day_time(
date: <Date32Type as ArrowPrimitiveType>::Native,
delta: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
) -> <Date32Type as ArrowPrimitiveType>::Native {
let (days, ms) = IntervalDayTimeType::to_parts(delta);
let res = Date32Type::to_naive_date(date);
let res = res.sub(Duration::try_days(days as i64).unwrap());
let res = res.sub(Duration::try_milliseconds(ms as i64).unwrap());
Date32Type::from_naive_date(res)
}
pub fn subtract_month_day_nano(
date: <Date32Type as ArrowPrimitiveType>::Native,
delta: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
) -> <Date32Type as ArrowPrimitiveType>::Native {
let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(delta);
let res = Date32Type::to_naive_date(date);
let res = shift_months(res, -months);
let res = res.sub(Duration::try_days(days as i64).unwrap());
let res = res.sub(Duration::nanoseconds(nanos));
Date32Type::from_naive_date(res)
}
}
impl Date64Type {
pub fn to_naive_date(i: <Date64Type as ArrowPrimitiveType>::Native) -> NaiveDate {
let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
epoch.add(Duration::try_milliseconds(i).unwrap())
}
pub fn from_naive_date(d: NaiveDate) -> <Date64Type as ArrowPrimitiveType>::Native {
let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
d.sub(epoch).num_milliseconds() as <Date64Type as ArrowPrimitiveType>::Native
}
pub fn add_year_months(
date: <Date64Type as ArrowPrimitiveType>::Native,
delta: <IntervalYearMonthType as ArrowPrimitiveType>::Native,
) -> <Date64Type as ArrowPrimitiveType>::Native {
let prior = Date64Type::to_naive_date(date);
let months = IntervalYearMonthType::to_months(delta);
let posterior = shift_months(prior, months);
Date64Type::from_naive_date(posterior)
}
pub fn add_day_time(
date: <Date64Type as ArrowPrimitiveType>::Native,
delta: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
) -> <Date64Type as ArrowPrimitiveType>::Native {
let (days, ms) = IntervalDayTimeType::to_parts(delta);
let res = Date64Type::to_naive_date(date);
let res = res.add(Duration::try_days(days as i64).unwrap());
let res = res.add(Duration::try_milliseconds(ms as i64).unwrap());
Date64Type::from_naive_date(res)
}
pub fn add_month_day_nano(
date: <Date64Type as ArrowPrimitiveType>::Native,
delta: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
) -> <Date64Type as ArrowPrimitiveType>::Native {
let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(delta);
let res = Date64Type::to_naive_date(date);
let res = shift_months(res, months);
let res = res.add(Duration::try_days(days as i64).unwrap());
let res = res.add(Duration::nanoseconds(nanos));
Date64Type::from_naive_date(res)
}
pub fn subtract_year_months(
date: <Date64Type as ArrowPrimitiveType>::Native,
delta: <IntervalYearMonthType as ArrowPrimitiveType>::Native,
) -> <Date64Type as ArrowPrimitiveType>::Native {
let prior = Date64Type::to_naive_date(date);
let months = IntervalYearMonthType::to_months(-delta);
let posterior = shift_months(prior, months);
Date64Type::from_naive_date(posterior)
}
pub fn subtract_day_time(
date: <Date64Type as ArrowPrimitiveType>::Native,
delta: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
) -> <Date64Type as ArrowPrimitiveType>::Native {
let (days, ms) = IntervalDayTimeType::to_parts(delta);
let res = Date64Type::to_naive_date(date);
let res = res.sub(Duration::try_days(days as i64).unwrap());
let res = res.sub(Duration::try_milliseconds(ms as i64).unwrap());
Date64Type::from_naive_date(res)
}
pub fn subtract_month_day_nano(
date: <Date64Type as ArrowPrimitiveType>::Native,
delta: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
) -> <Date64Type as ArrowPrimitiveType>::Native {
let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(delta);
let res = Date64Type::to_naive_date(date);
let res = shift_months(res, -months);
let res = res.sub(Duration::try_days(days as i64).unwrap());
let res = res.sub(Duration::nanoseconds(nanos));
Date64Type::from_naive_date(res)
}
}
mod decimal {
use super::*;
pub trait DecimalTypeSealed {}
impl DecimalTypeSealed for Decimal128Type {}
impl DecimalTypeSealed for Decimal256Type {}
}
pub trait DecimalType:
'static + Send + Sync + ArrowPrimitiveType + decimal::DecimalTypeSealed
{
const BYTE_LENGTH: usize;
const MAX_PRECISION: u8;
const MAX_SCALE: i8;
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType;
const DEFAULT_TYPE: DataType;
const PREFIX: &'static str;
fn format_decimal(value: Self::Native, precision: u8, scale: i8) -> String;
fn validate_decimal_precision(value: Self::Native, precision: u8) -> Result<(), ArrowError>;
}
pub fn validate_decimal_precision_and_scale<T: DecimalType>(
precision: u8,
scale: i8,
) -> Result<(), ArrowError> {
if precision == 0 {
return Err(ArrowError::InvalidArgumentError(format!(
"precision cannot be 0, has to be between [1, {}]",
T::MAX_PRECISION
)));
}
if precision > T::MAX_PRECISION {
return Err(ArrowError::InvalidArgumentError(format!(
"precision {} is greater than max {}",
precision,
T::MAX_PRECISION
)));
}
if scale > T::MAX_SCALE {
return Err(ArrowError::InvalidArgumentError(format!(
"scale {} is greater than max {}",
scale,
T::MAX_SCALE
)));
}
if scale > 0 && scale as u8 > precision {
return Err(ArrowError::InvalidArgumentError(format!(
"scale {scale} is greater than precision {precision}"
)));
}
Ok(())
}
#[derive(Debug)]
pub struct Decimal128Type {}
impl DecimalType for Decimal128Type {
const BYTE_LENGTH: usize = 16;
const MAX_PRECISION: u8 = DECIMAL128_MAX_PRECISION;
const MAX_SCALE: i8 = DECIMAL128_MAX_SCALE;
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal128;
const DEFAULT_TYPE: DataType =
DataType::Decimal128(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE);
const PREFIX: &'static str = "Decimal128";
fn format_decimal(value: Self::Native, precision: u8, scale: i8) -> String {
format_decimal_str(&value.to_string(), precision as usize, scale)
}
fn validate_decimal_precision(num: i128, precision: u8) -> Result<(), ArrowError> {
validate_decimal_precision(num, precision)
}
}
impl ArrowPrimitiveType for Decimal128Type {
type Native = i128;
const DATA_TYPE: DataType = <Self as DecimalType>::DEFAULT_TYPE;
}
impl primitive::PrimitiveTypeSealed for Decimal128Type {}
#[derive(Debug)]
pub struct Decimal256Type {}
impl DecimalType for Decimal256Type {
const BYTE_LENGTH: usize = 32;
const MAX_PRECISION: u8 = DECIMAL256_MAX_PRECISION;
const MAX_SCALE: i8 = DECIMAL256_MAX_SCALE;
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal256;
const DEFAULT_TYPE: DataType =
DataType::Decimal256(DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE);
const PREFIX: &'static str = "Decimal256";
fn format_decimal(value: Self::Native, precision: u8, scale: i8) -> String {
format_decimal_str(&value.to_string(), precision as usize, scale)
}
fn validate_decimal_precision(num: i256, precision: u8) -> Result<(), ArrowError> {
validate_decimal256_precision(num, precision)
}
}
impl ArrowPrimitiveType for Decimal256Type {
type Native = i256;
const DATA_TYPE: DataType = <Self as DecimalType>::DEFAULT_TYPE;
}
impl primitive::PrimitiveTypeSealed for Decimal256Type {}
fn format_decimal_str(value_str: &str, precision: usize, scale: i8) -> String {
let (sign, rest) = match value_str.strip_prefix('-') {
Some(stripped) => ("-", stripped),
None => ("", value_str),
};
let bound = precision.min(rest.len()) + sign.len();
let value_str = &value_str[0..bound];
if scale == 0 {
value_str.to_string()
} else if scale < 0 {
let padding = value_str.len() + scale.unsigned_abs() as usize;
format!("{value_str:0<padding$}")
} else if rest.len() > scale as usize {
let (whole, decimal) = value_str.split_at(value_str.len() - scale as usize);
format!("{whole}.{decimal}")
} else {
format!("{}0.{:0>width$}", sign, rest, width = scale as usize)
}
}
pub(crate) mod bytes {
use super::*;
pub trait ByteArrayTypeSealed {}
impl<O: OffsetSizeTrait> ByteArrayTypeSealed for GenericStringType<O> {}
impl<O: OffsetSizeTrait> ByteArrayTypeSealed for GenericBinaryType<O> {}
pub trait ByteArrayNativeType: std::fmt::Debug + Send + Sync {
unsafe fn from_bytes_unchecked(b: &[u8]) -> &Self;
}
impl ByteArrayNativeType for [u8] {
#[inline]
unsafe fn from_bytes_unchecked(b: &[u8]) -> &Self {
b
}
}
impl ByteArrayNativeType for str {
#[inline]
unsafe fn from_bytes_unchecked(b: &[u8]) -> &Self {
std::str::from_utf8_unchecked(b)
}
}
}
pub trait ByteArrayType: 'static + Send + Sync + bytes::ByteArrayTypeSealed {
type Offset: OffsetSizeTrait;
type Native: bytes::ByteArrayNativeType + AsRef<Self::Native> + AsRef<[u8]> + ?Sized;
const PREFIX: &'static str;
const DATA_TYPE: DataType;
fn validate(offsets: &OffsetBuffer<Self::Offset>, values: &Buffer) -> Result<(), ArrowError>;
}
pub struct GenericStringType<O: OffsetSizeTrait> {
phantom: PhantomData<O>,
}
impl<O: OffsetSizeTrait> ByteArrayType for GenericStringType<O> {
type Offset = O;
type Native = str;
const PREFIX: &'static str = "String";
const DATA_TYPE: DataType = if O::IS_LARGE {
DataType::LargeUtf8
} else {
DataType::Utf8
};
fn validate(offsets: &OffsetBuffer<Self::Offset>, values: &Buffer) -> Result<(), ArrowError> {
let validated = std::str::from_utf8(values).map_err(|e| {
ArrowError::InvalidArgumentError(format!("Encountered non UTF-8 data: {e}"))
})?;
for offset in offsets.iter() {
let o = offset.as_usize();
if !validated.is_char_boundary(o) {
if o < validated.len() {
return Err(ArrowError::InvalidArgumentError(format!(
"Split UTF-8 codepoint at offset {o}"
)));
}
return Err(ArrowError::InvalidArgumentError(format!(
"Offset of {o} exceeds length of values {}",
validated.len()
)));
}
}
Ok(())
}
}
pub type Utf8Type = GenericStringType<i32>;
pub type LargeUtf8Type = GenericStringType<i64>;
pub struct GenericBinaryType<O: OffsetSizeTrait> {
phantom: PhantomData<O>,
}
impl<O: OffsetSizeTrait> ByteArrayType for GenericBinaryType<O> {
type Offset = O;
type Native = [u8];
const PREFIX: &'static str = "Binary";
const DATA_TYPE: DataType = if O::IS_LARGE {
DataType::LargeBinary
} else {
DataType::Binary
};
fn validate(offsets: &OffsetBuffer<Self::Offset>, values: &Buffer) -> Result<(), ArrowError> {
let max_offset = offsets.last().unwrap().as_usize();
if values.len() < max_offset {
return Err(ArrowError::InvalidArgumentError(format!(
"Maximum offset of {max_offset} is larger than values of length {}",
values.len()
)));
}
Ok(())
}
}
pub type BinaryType = GenericBinaryType<i32>;
pub type LargeBinaryType = GenericBinaryType<i64>;
mod byte_view {
use crate::types::{BinaryViewType, StringViewType};
pub trait Sealed: Send + Sync {}
impl Sealed for StringViewType {}
impl Sealed for BinaryViewType {}
}
pub trait ByteViewType: byte_view::Sealed + 'static + PartialEq + Send + Sync {
const IS_UTF8: bool;
const DATA_TYPE: DataType = if Self::IS_UTF8 {
DataType::Utf8View
} else {
DataType::BinaryView
};
const PREFIX: &'static str;
type Native: bytes::ByteArrayNativeType + AsRef<Self::Native> + AsRef<[u8]> + ?Sized;
type Owned: Debug + Clone + Sync + Send + AsRef<Self::Native>;
fn validate(views: &[u128], buffers: &[Buffer]) -> Result<(), ArrowError>;
}
#[derive(PartialEq)]
pub struct StringViewType {}
impl ByteViewType for StringViewType {
const IS_UTF8: bool = true;
const PREFIX: &'static str = "String";
type Native = str;
type Owned = String;
fn validate(views: &[u128], buffers: &[Buffer]) -> Result<(), ArrowError> {
validate_string_view(views, buffers)
}
}
#[derive(PartialEq)]
pub struct BinaryViewType {}
impl ByteViewType for BinaryViewType {
const IS_UTF8: bool = false;
const PREFIX: &'static str = "Binary";
type Native = [u8];
type Owned = Vec<u8>;
fn validate(views: &[u128], buffers: &[Buffer]) -> Result<(), ArrowError> {
validate_binary_view(views, buffers)
}
}
#[cfg(test)]
mod tests {
use super::*;
use arrow_data::{layout, BufferSpec};
#[test]
fn month_day_nano_should_roundtrip() {
let value = IntervalMonthDayNanoType::make_value(1, 2, 3);
assert_eq!(IntervalMonthDayNanoType::to_parts(value), (1, 2, 3));
}
#[test]
fn month_day_nano_should_roundtrip_neg() {
let value = IntervalMonthDayNanoType::make_value(-1, -2, -3);
assert_eq!(IntervalMonthDayNanoType::to_parts(value), (-1, -2, -3));
}
#[test]
fn day_time_should_roundtrip() {
let value = IntervalDayTimeType::make_value(1, 2);
assert_eq!(IntervalDayTimeType::to_parts(value), (1, 2));
}
#[test]
fn day_time_should_roundtrip_neg() {
let value = IntervalDayTimeType::make_value(-1, -2);
assert_eq!(IntervalDayTimeType::to_parts(value), (-1, -2));
}
#[test]
fn year_month_should_roundtrip() {
let value = IntervalYearMonthType::make_value(1, 2);
assert_eq!(IntervalYearMonthType::to_months(value), 14);
}
#[test]
fn year_month_should_roundtrip_neg() {
let value = IntervalYearMonthType::make_value(-1, -2);
assert_eq!(IntervalYearMonthType::to_months(value), -14);
}
fn test_layout<T: ArrowPrimitiveType>() {
let layout = layout(&T::DATA_TYPE);
assert_eq!(layout.buffers.len(), 1);
let spec = &layout.buffers[0];
assert_eq!(
spec,
&BufferSpec::FixedWidth {
byte_width: std::mem::size_of::<T::Native>(),
alignment: std::mem::align_of::<T::Native>(),
}
);
}
#[test]
fn test_layouts() {
test_layout::<Int8Type>();
test_layout::<Int16Type>();
test_layout::<Int32Type>();
test_layout::<Int64Type>();
test_layout::<UInt8Type>();
test_layout::<UInt16Type>();
test_layout::<UInt32Type>();
test_layout::<UInt64Type>();
test_layout::<Float16Type>();
test_layout::<Float32Type>();
test_layout::<Float64Type>();
test_layout::<Decimal128Type>();
test_layout::<Decimal256Type>();
test_layout::<TimestampNanosecondType>();
test_layout::<TimestampMillisecondType>();
test_layout::<TimestampMicrosecondType>();
test_layout::<TimestampNanosecondType>();
test_layout::<TimestampSecondType>();
test_layout::<Date32Type>();
test_layout::<Date64Type>();
test_layout::<Time32SecondType>();
test_layout::<Time32MillisecondType>();
test_layout::<Time64MicrosecondType>();
test_layout::<Time64NanosecondType>();
test_layout::<IntervalMonthDayNanoType>();
test_layout::<IntervalDayTimeType>();
test_layout::<IntervalYearMonthType>();
test_layout::<DurationNanosecondType>();
test_layout::<DurationMicrosecondType>();
test_layout::<DurationMillisecondType>();
test_layout::<DurationSecondType>();
}
}