use arrow_array::timezone::Tz;
use arrow_array::types::*;
use arrow_array::ArrowNativeTypeOp;
use arrow_buffer::ArrowNativeType;
use arrow_schema::ArrowError;
use chrono::prelude::*;
use half::f16;
use std::str::FromStr;
#[inline]
fn parse_nanos<const N: usize, const O: u8>(digits: &[u8]) -> u32 {
digits[..N]
.iter()
.fold(0_u32, |acc, v| acc * 10 + v.wrapping_sub(O) as u32)
* 10_u32.pow((9 - N) as _)
}
struct TimestampParser {
digits: [u8; 32],
mask: u32,
}
impl TimestampParser {
fn new(bytes: &[u8]) -> Self {
let mut digits = [0; 32];
let mut mask = 0;
for (idx, (o, i)) in digits.iter_mut().zip(bytes).enumerate() {
*o = i.wrapping_sub(b'0');
mask |= ((*o < 10) as u32) << idx
}
Self { digits, mask }
}
fn test(&self, idx: usize, b: u8) -> bool {
self.digits[idx] == b.wrapping_sub(b'0')
}
fn date(&self) -> Option<NaiveDate> {
if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
return None;
}
let year = self.digits[0] as u16 * 1000
+ self.digits[1] as u16 * 100
+ self.digits[2] as u16 * 10
+ self.digits[3] as u16;
let month = self.digits[5] * 10 + self.digits[6];
let day = self.digits[8] * 10 + self.digits[9];
NaiveDate::from_ymd_opt(year as _, month as _, day as _)
}
fn time(&self) -> Option<(NaiveTime, usize)> {
let time = |hour, min, sec, nano| match sec {
60 => {
let nano = 1_000_000_000 + nano;
NaiveTime::from_hms_nano_opt(hour as _, min as _, 59, nano)
}
_ => NaiveTime::from_hms_nano_opt(hour as _, min as _, sec as _, nano),
};
match (self.mask >> 11) & 0b11111111 {
0b11011011 if self.test(13, b':') && self.test(16, b':') => {
let hour = self.digits[11] * 10 + self.digits[12];
let minute = self.digits[14] * 10 + self.digits[15];
let second = self.digits[17] * 10 + self.digits[18];
match self.test(19, b'.') {
true => {
let digits = (self.mask >> 20).trailing_ones();
let nanos = match digits {
0 => return None,
1 => parse_nanos::<1, 0>(&self.digits[20..21]),
2 => parse_nanos::<2, 0>(&self.digits[20..22]),
3 => parse_nanos::<3, 0>(&self.digits[20..23]),
4 => parse_nanos::<4, 0>(&self.digits[20..24]),
5 => parse_nanos::<5, 0>(&self.digits[20..25]),
6 => parse_nanos::<6, 0>(&self.digits[20..26]),
7 => parse_nanos::<7, 0>(&self.digits[20..27]),
8 => parse_nanos::<8, 0>(&self.digits[20..28]),
_ => parse_nanos::<9, 0>(&self.digits[20..29]),
};
Some((time(hour, minute, second, nanos)?, 20 + digits as usize))
}
false => Some((time(hour, minute, second, 0)?, 19)),
}
}
0b111111 => {
let hour = self.digits[11] * 10 + self.digits[12];
let minute = self.digits[13] * 10 + self.digits[14];
let second = self.digits[15] * 10 + self.digits[16];
let time = time(hour, minute, second, 0)?;
Some((time, 17))
}
_ => None,
}
}
}
pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
let err =
|ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
let bytes = s.as_bytes();
if bytes.len() < 10 {
return Err(err("timestamp must contain at least 10 characters"));
}
let parser = TimestampParser::new(bytes);
let date = parser.date().ok_or_else(|| err("error parsing date"))?;
if bytes.len() == 10 {
let datetime = date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap());
return timezone
.from_local_datetime(&datetime)
.single()
.ok_or_else(|| err("error computing timezone offset"));
}
if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') {
return Err(err("invalid timestamp separator"));
}
let (time, mut tz_offset) = parser.time().ok_or_else(|| err("error parsing time"))?;
let datetime = date.and_time(time);
if tz_offset == 32 {
while tz_offset < bytes.len() && bytes[tz_offset].is_ascii_digit() {
tz_offset += 1;
}
}
if bytes.len() <= tz_offset {
return timezone
.from_local_datetime(&datetime)
.single()
.ok_or_else(|| err("error computing timezone offset"));
}
if (bytes[tz_offset] == b'z' || bytes[tz_offset] == b'Z') && tz_offset == bytes.len() - 1 {
return Ok(timezone.from_utc_datetime(&datetime));
}
let parsed_tz: Tz = s[tz_offset..].trim_start().parse()?;
let parsed = parsed_tz
.from_local_datetime(&datetime)
.single()
.ok_or_else(|| err("error computing timezone offset"))?;
Ok(parsed.with_timezone(timezone))
}
#[inline]
pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc())
}
#[inline]
fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
dt.and_utc()
.timestamp_nanos_opt()
.ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string()))
}
pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
let nt = string_to_time(s)
.ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
}
fn string_to_time(s: &str) -> Option<NaiveTime> {
let bytes = s.as_bytes();
if bytes.len() < 4 {
return None;
}
let (am, bytes) = match bytes.get(bytes.len() - 3..) {
Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
_ => (None, bytes),
};
if bytes.len() < 4 {
return None;
}
let mut digits = [b'0'; 6];
let bytes = match (bytes[1], bytes[2]) {
(b':', _) => {
digits[1] = bytes[0];
&bytes[2..]
}
(_, b':') => {
digits[0] = bytes[0];
digits[1] = bytes[1];
&bytes[3..]
}
_ => return None,
};
if bytes.len() < 2 {
return None; }
digits[2] = bytes[0];
digits[3] = bytes[1];
let nanoseconds = match bytes.get(2) {
Some(b':') => {
if bytes.len() < 5 {
return None;
}
digits[4] = bytes[3];
digits[5] = bytes[4];
match bytes.get(5) {
Some(b'.') => {
let decimal = &bytes[6..];
if decimal.iter().any(|x| !x.is_ascii_digit()) {
return None;
}
match decimal.len() {
0 => return None,
1 => parse_nanos::<1, b'0'>(decimal),
2 => parse_nanos::<2, b'0'>(decimal),
3 => parse_nanos::<3, b'0'>(decimal),
4 => parse_nanos::<4, b'0'>(decimal),
5 => parse_nanos::<5, b'0'>(decimal),
6 => parse_nanos::<6, b'0'>(decimal),
7 => parse_nanos::<7, b'0'>(decimal),
8 => parse_nanos::<8, b'0'>(decimal),
_ => parse_nanos::<9, b'0'>(decimal),
}
}
Some(_) => return None,
None => 0,
}
}
Some(_) => return None,
None => 0,
};
digits.iter_mut().for_each(|x| *x = x.wrapping_sub(b'0'));
if digits.iter().any(|x| *x > 9) {
return None;
}
let hour = match (digits[0] * 10 + digits[1], am) {
(12, Some(true)) => 0, (h @ 1..=11, Some(true)) => h, (12, Some(false)) => 12, (h @ 1..=11, Some(false)) => h + 12, (_, Some(_)) => return None,
(h, None) => h,
};
let (second, nanoseconds) = match digits[4] * 10 + digits[5] {
60 => (59, nanoseconds + 1_000_000_000),
s => (s, nanoseconds),
};
NaiveTime::from_hms_nano_opt(
hour as _,
(digits[2] * 10 + digits[3]) as _,
second as _,
nanoseconds,
)
}
pub trait Parser: ArrowPrimitiveType {
fn parse(string: &str) -> Option<Self::Native>;
fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
Self::parse(string)
}
}
impl Parser for Float16Type {
fn parse(string: &str) -> Option<f16> {
lexical_core::parse(string.as_bytes())
.ok()
.map(f16::from_f32)
}
}
impl Parser for Float32Type {
fn parse(string: &str) -> Option<f32> {
lexical_core::parse(string.as_bytes()).ok()
}
}
impl Parser for Float64Type {
fn parse(string: &str) -> Option<f64> {
lexical_core::parse(string.as_bytes()).ok()
}
}
macro_rules! parser_primitive {
($t:ty) => {
impl Parser for $t {
fn parse(string: &str) -> Option<Self::Native> {
if !string.as_bytes().last().is_some_and(|x| x.is_ascii_digit()) {
return None;
}
match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
string.as_bytes(),
) {
(Some(n), x) if x == string.len() => Some(n),
_ => None,
}
}
}
};
}
parser_primitive!(UInt64Type);
parser_primitive!(UInt32Type);
parser_primitive!(UInt16Type);
parser_primitive!(UInt8Type);
parser_primitive!(Int64Type);
parser_primitive!(Int32Type);
parser_primitive!(Int16Type);
parser_primitive!(Int8Type);
impl Parser for TimestampNanosecondType {
fn parse(string: &str) -> Option<i64> {
string_to_timestamp_nanos(string).ok()
}
}
impl Parser for TimestampMicrosecondType {
fn parse(string: &str) -> Option<i64> {
let nanos = string_to_timestamp_nanos(string).ok();
nanos.map(|x| x / 1000)
}
}
impl Parser for TimestampMillisecondType {
fn parse(string: &str) -> Option<i64> {
let nanos = string_to_timestamp_nanos(string).ok();
nanos.map(|x| x / 1_000_000)
}
}
impl Parser for TimestampSecondType {
fn parse(string: &str) -> Option<i64> {
let nanos = string_to_timestamp_nanos(string).ok();
nanos.map(|x| x / 1_000_000_000)
}
}
impl Parser for Time64NanosecondType {
fn parse(string: &str) -> Option<Self::Native> {
string_to_time_nanoseconds(string)
.ok()
.or_else(|| string.parse::<Self::Native>().ok())
}
fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
let nt = NaiveTime::parse_from_str(string, format).ok()?;
Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
}
}
impl Parser for Time64MicrosecondType {
fn parse(string: &str) -> Option<Self::Native> {
string_to_time_nanoseconds(string)
.ok()
.map(|nanos| nanos / 1_000)
.or_else(|| string.parse::<Self::Native>().ok())
}
fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
let nt = NaiveTime::parse_from_str(string, format).ok()?;
Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
}
}
impl Parser for Time32MillisecondType {
fn parse(string: &str) -> Option<Self::Native> {
string_to_time_nanoseconds(string)
.ok()
.map(|nanos| (nanos / 1_000_000) as i32)
.or_else(|| string.parse::<Self::Native>().ok())
}
fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
let nt = NaiveTime::parse_from_str(string, format).ok()?;
Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
}
}
impl Parser for Time32SecondType {
fn parse(string: &str) -> Option<Self::Native> {
string_to_time_nanoseconds(string)
.ok()
.map(|nanos| (nanos / 1_000_000_000) as i32)
.or_else(|| string.parse::<Self::Native>().ok())
}
fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
let nt = NaiveTime::parse_from_str(string, format).ok()?;
Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
}
}
const EPOCH_DAYS_FROM_CE: i32 = 719_163;
const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
fn parse_date(string: &str) -> Option<NaiveDate> {
if string.len() > 10 {
return string_to_datetime(&Utc, string)
.map(|dt| dt.date_naive())
.ok();
};
let mut digits = [0; 10];
let mut mask = 0;
for (idx, (o, i)) in digits.iter_mut().zip(string.bytes()).enumerate() {
*o = i.wrapping_sub(b'0');
mask |= ((*o < 10) as u16) << idx
}
const HYPHEN: u8 = b'-'.wrapping_sub(b'0');
if digits[4] != HYPHEN {
let (year, month, day) = match (mask, string.len()) {
(0b11111111, 8) => (
digits[0] as u16 * 1000
+ digits[1] as u16 * 100
+ digits[2] as u16 * 10
+ digits[3] as u16,
digits[4] * 10 + digits[5],
digits[6] * 10 + digits[7],
),
_ => return None,
};
return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
}
let (month, day) = match mask {
0b1101101111 => {
if digits[7] != HYPHEN {
return None;
}
(digits[5] * 10 + digits[6], digits[8] * 10 + digits[9])
}
0b101101111 => {
if digits[7] != HYPHEN {
return None;
}
(digits[5] * 10 + digits[6], digits[8])
}
0b110101111 => {
if digits[6] != HYPHEN {
return None;
}
(digits[5], digits[7] * 10 + digits[8])
}
0b10101111 => {
if digits[6] != HYPHEN {
return None;
}
(digits[5], digits[7])
}
_ => return None,
};
let year =
digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
NaiveDate::from_ymd_opt(year as _, month as _, day as _)
}
impl Parser for Date32Type {
fn parse(string: &str) -> Option<i32> {
let date = parse_date(string)?;
Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
}
fn parse_formatted(string: &str, format: &str) -> Option<i32> {
let date = NaiveDate::parse_from_str(string, format).ok()?;
Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
}
}
impl Parser for Date64Type {
fn parse(string: &str) -> Option<i64> {
if string.len() <= 10 {
let datetime = NaiveDateTime::new(parse_date(string)?, NaiveTime::default());
Some(datetime.and_utc().timestamp_millis())
} else {
let date_time = string_to_datetime(&Utc, string).ok()?;
Some(date_time.timestamp_millis())
}
}
fn parse_formatted(string: &str, format: &str) -> Option<i64> {
use chrono::format::Fixed;
use chrono::format::StrftimeItems;
let fmt = StrftimeItems::new(format);
let has_zone = fmt.into_iter().any(|item| match item {
chrono::format::Item::Fixed(fixed_item) => matches!(
fixed_item,
Fixed::RFC2822
| Fixed::RFC3339
| Fixed::TimezoneName
| Fixed::TimezoneOffsetColon
| Fixed::TimezoneOffsetColonZ
| Fixed::TimezoneOffset
| Fixed::TimezoneOffsetZ
),
_ => false,
});
if has_zone {
let date_time = chrono::DateTime::parse_from_str(string, format).ok()?;
Some(date_time.timestamp_millis())
} else {
let date_time = NaiveDateTime::parse_from_str(string, format).ok()?;
Some(date_time.and_utc().timestamp_millis())
}
}
}
pub fn parse_decimal<T: DecimalType>(
s: &str,
precision: u8,
scale: i8,
) -> Result<T::Native, ArrowError> {
let mut result = T::Native::usize_as(0);
let mut fractionals = 0;
let mut digits = 0;
let base = T::Native::usize_as(10);
let bs = s.as_bytes();
let (bs, negative) = match bs.first() {
Some(b'-') => (&bs[1..], true),
Some(b'+') => (&bs[1..], false),
_ => (bs, false),
};
if bs.is_empty() {
return Err(ArrowError::ParseError(format!(
"can't parse the string value {s} to decimal"
)));
}
let mut bs = bs.iter();
while let Some(b) = bs.next() {
match b {
b'0'..=b'9' => {
if digits == 0 && *b == b'0' {
continue;
}
digits += 1;
result = result.mul_wrapping(base);
result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
}
b'.' => {
for b in bs.by_ref() {
if !b.is_ascii_digit() {
return Err(ArrowError::ParseError(format!(
"can't parse the string value {s} to decimal"
)));
}
if fractionals == scale {
continue;
}
fractionals += 1;
digits += 1;
result = result.mul_wrapping(base);
result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
}
if digits == 0 {
return Err(ArrowError::ParseError(format!(
"can't parse the string value {s} to decimal"
)));
}
}
_ => {
return Err(ArrowError::ParseError(format!(
"can't parse the string value {s} to decimal"
)));
}
}
}
if fractionals < scale {
let exp = scale - fractionals;
if exp as u8 + digits > precision {
return Err(ArrowError::ParseError("parse decimal overflow".to_string()));
}
let mul = base.pow_wrapping(exp as _);
result = result.mul_wrapping(mul);
} else if digits > precision {
return Err(ArrowError::ParseError("parse decimal overflow".to_string()));
}
Ok(if negative {
result.neg_wrapping()
} else {
result
})
}
pub fn parse_interval_year_month(
value: &str,
) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> {
let config = IntervalParseConfig::new(IntervalUnit::Year);
let interval = Interval::parse(value, &config)?;
let months = interval.to_year_months().map_err(|_| {
ArrowError::CastError(format!(
"Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
))
})?;
Ok(IntervalYearMonthType::make_value(0, months))
}
pub fn parse_interval_day_time(
value: &str,
) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
let config = IntervalParseConfig::new(IntervalUnit::Day);
let interval = Interval::parse(value, &config)?;
let (days, millis) = interval.to_day_time().map_err(|_| ArrowError::CastError(format!(
"Cannot cast {value} to IntervalDayTime because the nanos part isn't multiple of milliseconds"
)))?;
Ok(IntervalDayTimeType::make_value(days, millis))
}
pub fn parse_interval_month_day_nano(
value: &str,
) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
let config = IntervalParseConfig::new(IntervalUnit::Month);
let interval = Interval::parse(value, &config)?;
let (months, days, nanos) = interval.to_month_day_nanos();
Ok(IntervalMonthDayNanoType::make_value(months, days, nanos))
}
const NANOS_PER_MILLIS: i64 = 1_000_000;
const NANOS_PER_SECOND: i64 = 1_000 * NANOS_PER_MILLIS;
const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
#[cfg(test)]
const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR;
#[rustfmt::skip]
#[derive(Clone, Copy)]
#[repr(u16)]
enum IntervalUnit {
Century = 0b_0000_0000_0001,
Decade = 0b_0000_0000_0010,
Year = 0b_0000_0000_0100,
Month = 0b_0000_0000_1000,
Week = 0b_0000_0001_0000,
Day = 0b_0000_0010_0000,
Hour = 0b_0000_0100_0000,
Minute = 0b_0000_1000_0000,
Second = 0b_0001_0000_0000,
Millisecond = 0b_0010_0000_0000,
Microsecond = 0b_0100_0000_0000,
Nanosecond = 0b_1000_0000_0000,
}
impl FromStr for IntervalUnit {
type Err = ArrowError;
fn from_str(s: &str) -> Result<Self, ArrowError> {
match s.to_lowercase().as_str() {
"century" | "centuries" => Ok(Self::Century),
"decade" | "decades" => Ok(Self::Decade),
"year" | "years" => Ok(Self::Year),
"month" | "months" => Ok(Self::Month),
"week" | "weeks" => Ok(Self::Week),
"day" | "days" => Ok(Self::Day),
"hour" | "hours" => Ok(Self::Hour),
"minute" | "minutes" => Ok(Self::Minute),
"second" | "seconds" => Ok(Self::Second),
"millisecond" | "milliseconds" => Ok(Self::Millisecond),
"microsecond" | "microseconds" => Ok(Self::Microsecond),
"nanosecond" | "nanoseconds" => Ok(Self::Nanosecond),
_ => Err(ArrowError::NotYetImplemented(format!(
"Unknown interval type: {s}"
))),
}
}
}
pub type MonthDayNano = (i32, i32, i64);
const INTERVAL_PRECISION: u32 = 15;
#[derive(Clone, Copy, Debug, PartialEq)]
struct IntervalAmount {
integer: i64,
frac: i64,
}
#[cfg(test)]
impl IntervalAmount {
fn new(integer: i64, frac: i64) -> Self {
Self { integer, frac }
}
}
impl FromStr for IntervalAmount {
type Err = ArrowError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.split_once('.') {
Some((integer, frac))
if frac.len() <= INTERVAL_PRECISION as usize
&& !frac.is_empty()
&& !frac.starts_with('-') =>
{
let explicit_neg = integer.starts_with('-');
let integer = if integer.is_empty() || integer == "-" {
Ok(0)
} else {
integer.parse::<i64>().map_err(|_| {
ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
})
}?;
let frac_unscaled = frac.parse::<i64>().map_err(|_| {
ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
})?;
let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
let frac = if integer < 0 || explicit_neg {
-frac
} else {
frac
};
let result = Self { integer, frac };
Ok(result)
}
Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
"Failed to parse {s} as interval amount"
))),
Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
Err(ArrowError::ParseError(format!(
"{s} exceeds the precision available for interval amount"
)))
}
Some(_) | None => {
let integer = s.parse::<i64>().map_err(|_| {
ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
})?;
let result = Self { integer, frac: 0 };
Ok(result)
}
}
}
}
#[derive(Debug, Default, PartialEq)]
struct Interval {
months: i32,
days: i32,
nanos: i64,
}
impl Interval {
fn new(months: i32, days: i32, nanos: i64) -> Self {
Self {
months,
days,
nanos,
}
}
fn to_year_months(&self) -> Result<i32, ArrowError> {
match (self.months, self.days, self.nanos) {
(months, days, nanos) if days == 0 && nanos == 0 => Ok(months),
_ => Err(ArrowError::InvalidArgumentError(format!(
"Unable to represent interval with days and nanos as year-months: {:?}",
self
))),
}
}
fn to_day_time(&self) -> Result<(i32, i32), ArrowError> {
let days = self.months.mul_checked(30)?.add_checked(self.days)?;
match self.nanos {
nanos if nanos % NANOS_PER_MILLIS == 0 => {
let millis = (self.nanos / 1_000_000).try_into().map_err(|_| {
ArrowError::InvalidArgumentError(format!(
"Unable to represent {} nanos as milliseconds in a signed 32-bit integer",
self.nanos
))
})?;
Ok((days, millis))
}
nanos => Err(ArrowError::InvalidArgumentError(format!(
"Unable to represent {nanos} as milliseconds"
))),
}
}
fn to_month_day_nanos(&self) -> (i32, i32, i64) {
(self.months, self.days, self.nanos)
}
fn parse(value: &str, config: &IntervalParseConfig) -> Result<Self, ArrowError> {
let components = parse_interval_components(value, config)?;
components
.into_iter()
.try_fold(Self::default(), |result, (amount, unit)| {
result.add(amount, unit)
})
}
fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
let result = match unit {
IntervalUnit::Century => {
let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
let months = months_int
.add_checked(month_frac)?
.try_into()
.map_err(|_| {
ArrowError::ParseError(format!(
"Unable to represent {} centuries as months in a signed 32-bit integer",
&amount.integer
))
})?;
Self::new(self.months.add_checked(months)?, self.days, self.nanos)
}
IntervalUnit::Decade => {
let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
let months = months_int
.add_checked(month_frac)?
.try_into()
.map_err(|_| {
ArrowError::ParseError(format!(
"Unable to represent {} decades as months in a signed 32-bit integer",
&amount.integer
))
})?;
Self::new(self.months.add_checked(months)?, self.days, self.nanos)
}
IntervalUnit::Year => {
let months_int = amount.integer.mul_checked(12)?;
let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
let months = months_int
.add_checked(month_frac)?
.try_into()
.map_err(|_| {
ArrowError::ParseError(format!(
"Unable to represent {} years as months in a signed 32-bit integer",
&amount.integer
))
})?;
Self::new(self.months.add_checked(months)?, self.days, self.nanos)
}
IntervalUnit::Month => {
let months = amount.integer.try_into().map_err(|_| {
ArrowError::ParseError(format!(
"Unable to represent {} months in a signed 32-bit integer",
&amount.integer
))
})?;
let days = amount.frac * 3 / 10_i64.pow(INTERVAL_PRECISION - 1);
let days = days.try_into().map_err(|_| {
ArrowError::ParseError(format!(
"Unable to represent {} months as days in a signed 32-bit integer",
amount.frac / 10_i64.pow(INTERVAL_PRECISION)
))
})?;
Self::new(
self.months.add_checked(months)?,
self.days.add_checked(days)?,
self.nanos,
)
}
IntervalUnit::Week => {
let days = amount.integer.mul_checked(7)?.try_into().map_err(|_| {
ArrowError::ParseError(format!(
"Unable to represent {} weeks as days in a signed 32-bit integer",
&amount.integer
))
})?;
let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
Self::new(
self.months,
self.days.add_checked(days)?,
self.nanos.add_checked(nanos)?,
)
}
IntervalUnit::Day => {
let days = amount.integer.try_into().map_err(|_| {
ArrowError::InvalidArgumentError(format!(
"Unable to represent {} days in a signed 32-bit integer",
amount.integer
))
})?;
let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
Self::new(
self.months,
self.days.add_checked(days)?,
self.nanos.add_checked(nanos)?,
)
}
IntervalUnit::Hour => {
let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
let nanos = nanos_int.add_checked(nanos_frac)?;
Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
}
IntervalUnit::Minute => {
let nanos_int = amount.integer.mul_checked(NANOS_PER_MINUTE)?;
let nanos_frac = amount.frac * 6 / 10_i64.pow(INTERVAL_PRECISION - 10);
let nanos = nanos_int.add_checked(nanos_frac)?;
Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
}
IntervalUnit::Second => {
let nanos_int = amount.integer.mul_checked(NANOS_PER_SECOND)?;
let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 9);
let nanos = nanos_int.add_checked(nanos_frac)?;
Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
}
IntervalUnit::Millisecond => {
let nanos_int = amount.integer.mul_checked(NANOS_PER_MILLIS)?;
let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 6);
let nanos = nanos_int.add_checked(nanos_frac)?;
Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
}
IntervalUnit::Microsecond => {
let nanos_int = amount.integer.mul_checked(1_000)?;
let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 3);
let nanos = nanos_int.add_checked(nanos_frac)?;
Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
}
IntervalUnit::Nanosecond => {
let nanos_int = amount.integer;
let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION);
let nanos = nanos_int.add_checked(nanos_frac)?;
Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
}
};
Ok(result)
}
}
struct IntervalParseConfig {
default_unit: IntervalUnit,
}
impl IntervalParseConfig {
fn new(default_unit: IntervalUnit) -> Self {
Self { default_unit }
}
}
fn parse_interval_components(
value: &str,
config: &IntervalParseConfig,
) -> Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> {
let parts = value.split_whitespace();
let raw_amounts = parts.clone().step_by(2);
let raw_units = parts.skip(1).step_by(2);
let (amounts, invalid_amounts) = raw_amounts
.map(IntervalAmount::from_str)
.partition::<Vec<_>, _>(Result::is_ok);
if !invalid_amounts.is_empty() {
return Err(ArrowError::NotYetImplemented(format!(
"Unsupported Interval Expression with value {value:?}"
)));
}
let (units, invalid_units): (Vec<_>, Vec<_>) = raw_units
.clone()
.map(IntervalUnit::from_str)
.partition(Result::is_ok);
if !invalid_units.is_empty() {
return Err(ArrowError::ParseError(format!(
"Invalid input syntax for type interval: {value:?}"
)));
}
let amounts = amounts.into_iter().map(Result::unwrap).collect::<Vec<_>>();
let units = units.into_iter().map(Result::unwrap).collect::<Vec<_>>();
if amounts.len() == 1 && units.is_empty() {
return Ok(vec![(amounts[0], config.default_unit)]);
};
let mut observed_interval_types = 0;
for (unit, raw_unit) in units.iter().zip(raw_units) {
if observed_interval_types & (*unit as u16) != 0 {
return Err(ArrowError::ParseError(format!(
"Invalid input syntax for type interval: {value:?}. Repeated type '{raw_unit}'",
)));
}
observed_interval_types |= *unit as u16;
}
let result = amounts.iter().copied().zip(units.iter().copied());
Ok(result.collect::<Vec<_>>())
}
#[cfg(test)]
mod tests {
use super::*;
use arrow_array::temporal_conversions::date32_to_datetime;
use arrow_buffer::i256;
#[test]
fn test_parse_nanos() {
assert_eq!(parse_nanos::<3, 0>(&[1, 2, 3]), 123_000_000);
assert_eq!(parse_nanos::<5, 0>(&[1, 2, 3, 4, 5]), 123_450_000);
assert_eq!(parse_nanos::<6, b'0'>(b"123456"), 123_456_000);
}
#[test]
fn string_to_timestamp_timezone() {
assert_eq!(
1599572549190855000,
parse_timestamp("2020-09-08T13:42:29.190855+00:00").unwrap()
);
assert_eq!(
1599572549190855000,
parse_timestamp("2020-09-08T13:42:29.190855Z").unwrap()
);
assert_eq!(
1599572549000000000,
parse_timestamp("2020-09-08T13:42:29Z").unwrap()
); assert_eq!(
1599590549190855000,
parse_timestamp("2020-09-08T13:42:29.190855-05:00").unwrap()
);
}
#[test]
fn string_to_timestamp_timezone_space() {
assert_eq!(
1599572549190855000,
parse_timestamp("2020-09-08 13:42:29.190855+00:00").unwrap()
);
assert_eq!(
1599572549190855000,
parse_timestamp("2020-09-08 13:42:29.190855Z").unwrap()
);
assert_eq!(
1599572549000000000,
parse_timestamp("2020-09-08 13:42:29Z").unwrap()
); assert_eq!(
1599590549190855000,
parse_timestamp("2020-09-08 13:42:29.190855-05:00").unwrap()
);
}
#[test]
#[cfg_attr(miri, ignore)] fn string_to_timestamp_no_timezone() {
let naive_datetime = NaiveDateTime::new(
NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
);
assert_eq!(
naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
);
assert_eq!(
naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
);
let datetime_whole_secs = NaiveDateTime::new(
NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
NaiveTime::from_hms_opt(13, 42, 29).unwrap(),
)
.and_utc();
assert_eq!(
datetime_whole_secs.timestamp_nanos_opt().unwrap(),
parse_timestamp("2020-09-08T13:42:29").unwrap()
);
assert_eq!(
datetime_whole_secs.timestamp_nanos_opt().unwrap(),
parse_timestamp("2020-09-08 13:42:29").unwrap()
);
let datetime_no_time = NaiveDateTime::new(
NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
)
.and_utc();
assert_eq!(
datetime_no_time.timestamp_nanos_opt().unwrap(),
parse_timestamp("2020-09-08").unwrap()
)
}
#[test]
fn string_to_timestamp_chrono() {
let cases = [
"2020-09-08T13:42:29Z",
"1969-01-01T00:00:00.1Z",
"2020-09-08T12:00:12.12345678+00:00",
"2020-09-08T12:00:12+00:00",
"2020-09-08T12:00:12.1+00:00",
"2020-09-08T12:00:12.12+00:00",
"2020-09-08T12:00:12.123+00:00",
"2020-09-08T12:00:12.1234+00:00",
"2020-09-08T12:00:12.12345+00:00",
"2020-09-08T12:00:12.123456+00:00",
"2020-09-08T12:00:12.1234567+00:00",
"2020-09-08T12:00:12.12345678+00:00",
"2020-09-08T12:00:12.123456789+00:00",
"2020-09-08T12:00:12.12345678912z",
"2020-09-08T12:00:12.123456789123Z",
"2020-09-08T12:00:12.123456789123+02:00",
"2020-09-08T12:00:12.12345678912345Z",
"2020-09-08T12:00:12.1234567891234567+02:00",
"2020-09-08T12:00:60Z",
"2020-09-08T12:00:60.123Z",
"2020-09-08T12:00:60.123456+02:00",
"2020-09-08T12:00:60.1234567891234567+02:00",
"2020-09-08T12:00:60.999999999+02:00",
"2020-09-08t12:00:12.12345678+00:00",
"2020-09-08t12:00:12+00:00",
"2020-09-08t12:00:12Z",
];
for case in cases {
let chrono = DateTime::parse_from_rfc3339(case).unwrap();
let chrono_utc = chrono.with_timezone(&Utc);
let custom = string_to_datetime(&Utc, case).unwrap();
assert_eq!(chrono_utc, custom)
}
}
#[test]
fn string_to_timestamp_naive() {
let cases = [
"2018-11-13T17:11:10.011375885995",
"2030-12-04T17:11:10.123",
"2030-12-04T17:11:10.1234",
"2030-12-04T17:11:10.123456",
];
for case in cases {
let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
let custom = string_to_datetime(&Utc, case).unwrap();
assert_eq!(chrono, custom.naive_utc())
}
}
#[test]
fn string_to_timestamp_invalid() {
let cases = [
("", "timestamp must contain at least 10 characters"),
("SS", "timestamp must contain at least 10 characters"),
("Wed, 18 Feb 2015 23:16:09 GMT", "error parsing date"),
("1997-01-31H09:26:56.123Z", "invalid timestamp separator"),
("1997-01-31 09:26:56.123Z", "error parsing time"),
("1997:01:31T09:26:56.123Z", "error parsing date"),
("1997:1:31T09:26:56.123Z", "error parsing date"),
("1997-01-32T09:26:56.123Z", "error parsing date"),
("1997-13-32T09:26:56.123Z", "error parsing date"),
("1997-02-29T09:26:56.123Z", "error parsing date"),
("2015-02-30T17:35:20-08:00", "error parsing date"),
("1997-01-10T9:26:56.123Z", "error parsing time"),
("2015-01-20T25:35:20-08:00", "error parsing time"),
("1997-01-10T09:61:56.123Z", "error parsing time"),
("1997-01-10T09:61:90.123Z", "error parsing time"),
("1997-01-10T12:00:6.123Z", "error parsing time"),
("1997-01-31T092656.123Z", "error parsing time"),
("1997-01-10T12:00:06.", "error parsing time"),
("1997-01-10T12:00:06. ", "error parsing time"),
];
for (s, ctx) in cases {
let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
assert_eq!(actual, expected)
}
}
fn parse_timestamp(s: &str) -> Result<i64, ArrowError> {
let result = string_to_timestamp_nanos(s);
if let Err(e) = &result {
eprintln!("Error parsing timestamp '{s}': {e:?}");
}
result
}
#[test]
fn string_without_timezone_to_timestamp() {
let naive_datetime = NaiveDateTime::new(
NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
);
assert_eq!(
naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
);
assert_eq!(
naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
);
let naive_datetime = NaiveDateTime::new(
NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
NaiveTime::from_hms_nano_opt(13, 42, 29, 0).unwrap(),
);
assert_eq!(
naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
parse_timestamp("2020-09-08T13:42:29").unwrap()
);
assert_eq!(
naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
parse_timestamp("2020-09-08 13:42:29").unwrap()
);
let tz: Tz = "+02:00".parse().unwrap();
let date = string_to_datetime(&tz, "2020-09-08 13:42:29").unwrap();
let utc = date.naive_utc().to_string();
assert_eq!(utc, "2020-09-08 11:42:29");
let local = date.naive_local().to_string();
assert_eq!(local, "2020-09-08 13:42:29");
let date = string_to_datetime(&tz, "2020-09-08 13:42:29Z").unwrap();
let utc = date.naive_utc().to_string();
assert_eq!(utc, "2020-09-08 13:42:29");
let local = date.naive_local().to_string();
assert_eq!(local, "2020-09-08 15:42:29");
let dt =
NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
let local: Tz = "+08:00".parse().unwrap();
let date = string_to_datetime(&local, "2020-09-08T13:42:29Z").unwrap();
assert_eq!(dt, date.naive_utc());
assert_ne!(dt, date.naive_local());
let date = string_to_datetime(&local, "2020-09-08 13:42:29").unwrap();
assert_eq!(dt, date.naive_local());
assert_ne!(dt, date.naive_utc());
}
#[test]
fn parse_date32() {
let cases = [
"2020-09-08",
"2020-9-8",
"2020-09-8",
"2020-9-08",
"2020-12-1",
"1690-2-5",
"2020-09-08 01:02:03",
];
for case in cases {
let v = date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap();
let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d")
.or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S"))
.unwrap();
assert_eq!(v.date(), expected);
}
let err_cases = [
"",
"80-01-01",
"342",
"Foo",
"2020-09-08-03",
"2020--04-03",
"2020--",
"2020-09-08 01",
"2020-09-08 01:02",
"2020-09-08 01-02-03",
"2020-9-8 01:02:03",
"2020-09-08 1:2:3",
];
for case in err_cases {
assert_eq!(Date32Type::parse(case), None);
}
}
#[test]
fn parse_time64_nanos() {
assert_eq!(
Time64NanosecondType::parse("02:10:01.1234567899999999"),
Some(7_801_123_456_789)
);
assert_eq!(
Time64NanosecondType::parse("02:10:01.1234567"),
Some(7_801_123_456_700)
);
assert_eq!(
Time64NanosecondType::parse("2:10:01.1234567"),
Some(7_801_123_456_700)
);
assert_eq!(
Time64NanosecondType::parse("12:10:01.123456789 AM"),
Some(601_123_456_789)
);
assert_eq!(
Time64NanosecondType::parse("12:10:01.123456789 am"),
Some(601_123_456_789)
);
assert_eq!(
Time64NanosecondType::parse("2:10:01.12345678 PM"),
Some(51_001_123_456_780)
);
assert_eq!(
Time64NanosecondType::parse("2:10:01.12345678 pm"),
Some(51_001_123_456_780)
);
assert_eq!(
Time64NanosecondType::parse("02:10:01"),
Some(7_801_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("2:10:01"),
Some(7_801_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("12:10:01 AM"),
Some(601_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("12:10:01 am"),
Some(601_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("2:10:01 PM"),
Some(51_001_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("2:10:01 pm"),
Some(51_001_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("02:10"),
Some(7_800_000_000_000)
);
assert_eq!(Time64NanosecondType::parse("2:10"), Some(7_800_000_000_000));
assert_eq!(
Time64NanosecondType::parse("12:10 AM"),
Some(600_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("12:10 am"),
Some(600_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("2:10 PM"),
Some(51_000_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("2:10 pm"),
Some(51_000_000_000_000)
);
assert_eq!(Time64NanosecondType::parse("1"), Some(1));
assert_eq!(
Time64NanosecondType::parse("23:59:60"),
Some(86_400_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
Some(7_801_123_456_700)
);
}
#[test]
fn parse_time64_micros() {
assert_eq!(
Time64MicrosecondType::parse("02:10:01.1234"),
Some(7_801_123_400)
);
assert_eq!(
Time64MicrosecondType::parse("2:10:01.1234"),
Some(7_801_123_400)
);
assert_eq!(
Time64MicrosecondType::parse("12:10:01.123456 AM"),
Some(601_123_456)
);
assert_eq!(
Time64MicrosecondType::parse("12:10:01.123456 am"),
Some(601_123_456)
);
assert_eq!(
Time64MicrosecondType::parse("2:10:01.12345 PM"),
Some(51_001_123_450)
);
assert_eq!(
Time64MicrosecondType::parse("2:10:01.12345 pm"),
Some(51_001_123_450)
);
assert_eq!(
Time64MicrosecondType::parse("02:10:01"),
Some(7_801_000_000)
);
assert_eq!(Time64MicrosecondType::parse("2:10:01"), Some(7_801_000_000));
assert_eq!(
Time64MicrosecondType::parse("12:10:01 AM"),
Some(601_000_000)
);
assert_eq!(
Time64MicrosecondType::parse("12:10:01 am"),
Some(601_000_000)
);
assert_eq!(
Time64MicrosecondType::parse("2:10:01 PM"),
Some(51_001_000_000)
);
assert_eq!(
Time64MicrosecondType::parse("2:10:01 pm"),
Some(51_001_000_000)
);
assert_eq!(Time64MicrosecondType::parse("02:10"), Some(7_800_000_000));
assert_eq!(Time64MicrosecondType::parse("2:10"), Some(7_800_000_000));
assert_eq!(Time64MicrosecondType::parse("12:10 AM"), Some(600_000_000));
assert_eq!(Time64MicrosecondType::parse("12:10 am"), Some(600_000_000));
assert_eq!(
Time64MicrosecondType::parse("2:10 PM"),
Some(51_000_000_000)
);
assert_eq!(
Time64MicrosecondType::parse("2:10 pm"),
Some(51_000_000_000)
);
assert_eq!(Time64MicrosecondType::parse("1"), Some(1));
assert_eq!(
Time64MicrosecondType::parse("23:59:60"),
Some(86_400_000_000)
);
assert_eq!(
Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
Some(7_801_123_400)
);
}
#[test]
fn parse_time32_millis() {
assert_eq!(Time32MillisecondType::parse("02:10:01.1"), Some(7_801_100));
assert_eq!(Time32MillisecondType::parse("2:10:01.1"), Some(7_801_100));
assert_eq!(
Time32MillisecondType::parse("12:10:01.123 AM"),
Some(601_123)
);
assert_eq!(
Time32MillisecondType::parse("12:10:01.123 am"),
Some(601_123)
);
assert_eq!(
Time32MillisecondType::parse("2:10:01.12 PM"),
Some(51_001_120)
);
assert_eq!(
Time32MillisecondType::parse("2:10:01.12 pm"),
Some(51_001_120)
);
assert_eq!(Time32MillisecondType::parse("02:10:01"), Some(7_801_000));
assert_eq!(Time32MillisecondType::parse("2:10:01"), Some(7_801_000));
assert_eq!(Time32MillisecondType::parse("12:10:01 AM"), Some(601_000));
assert_eq!(Time32MillisecondType::parse("12:10:01 am"), Some(601_000));
assert_eq!(Time32MillisecondType::parse("2:10:01 PM"), Some(51_001_000));
assert_eq!(Time32MillisecondType::parse("2:10:01 pm"), Some(51_001_000));
assert_eq!(Time32MillisecondType::parse("02:10"), Some(7_800_000));
assert_eq!(Time32MillisecondType::parse("2:10"), Some(7_800_000));
assert_eq!(Time32MillisecondType::parse("12:10 AM"), Some(600_000));
assert_eq!(Time32MillisecondType::parse("12:10 am"), Some(600_000));
assert_eq!(Time32MillisecondType::parse("2:10 PM"), Some(51_000_000));
assert_eq!(Time32MillisecondType::parse("2:10 pm"), Some(51_000_000));
assert_eq!(Time32MillisecondType::parse("1"), Some(1));
assert_eq!(Time32MillisecondType::parse("23:59:60"), Some(86_400_000));
assert_eq!(
Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
Some(7_801_100)
);
}
#[test]
fn parse_time32_secs() {
assert_eq!(Time32SecondType::parse("02:10:01.1"), Some(7_801));
assert_eq!(Time32SecondType::parse("02:10:01"), Some(7_801));
assert_eq!(Time32SecondType::parse("2:10:01"), Some(7_801));
assert_eq!(Time32SecondType::parse("12:10:01 AM"), Some(601));
assert_eq!(Time32SecondType::parse("12:10:01 am"), Some(601));
assert_eq!(Time32SecondType::parse("2:10:01 PM"), Some(51_001));
assert_eq!(Time32SecondType::parse("2:10:01 pm"), Some(51_001));
assert_eq!(Time32SecondType::parse("02:10"), Some(7_800));
assert_eq!(Time32SecondType::parse("2:10"), Some(7_800));
assert_eq!(Time32SecondType::parse("12:10 AM"), Some(600));
assert_eq!(Time32SecondType::parse("12:10 am"), Some(600));
assert_eq!(Time32SecondType::parse("2:10 PM"), Some(51_000));
assert_eq!(Time32SecondType::parse("2:10 pm"), Some(51_000));
assert_eq!(Time32SecondType::parse("1"), Some(1));
assert_eq!(Time32SecondType::parse("23:59:60"), Some(86400));
assert_eq!(
Time32SecondType::parse_formatted("02 - 10 - 01", "%H - %M - %S"),
Some(7_801)
);
}
#[test]
fn test_string_to_time_invalid() {
let cases = [
"25:00",
"9:00:",
"009:00",
"09:0:00",
"25:00:00",
"13:00 AM",
"13:00 PM",
"12:00. AM",
"09:0:00",
"09:01:0",
"09:01:1",
"9:1:0",
"09:01:0",
"1:00.123",
"1:00:00.123f",
" 9:00:00",
":09:00",
"T9:00:00",
"AM",
];
for case in cases {
assert!(string_to_time(case).is_none(), "{case}");
}
}
#[test]
fn test_string_to_time_chrono() {
let cases = [
("1:00", "%H:%M"),
("12:00", "%H:%M"),
("13:00", "%H:%M"),
("24:00", "%H:%M"),
("1:00:00", "%H:%M:%S"),
("12:00:30", "%H:%M:%S"),
("13:00:59", "%H:%M:%S"),
("24:00:60", "%H:%M:%S"),
("09:00:00", "%H:%M:%S%.f"),
("0:00:30.123456", "%H:%M:%S%.f"),
("0:00 AM", "%I:%M %P"),
("1:00 AM", "%I:%M %P"),
("12:00 AM", "%I:%M %P"),
("13:00 AM", "%I:%M %P"),
("0:00 PM", "%I:%M %P"),
("1:00 PM", "%I:%M %P"),
("12:00 PM", "%I:%M %P"),
("13:00 PM", "%I:%M %P"),
("1:00 pM", "%I:%M %P"),
("1:00 Pm", "%I:%M %P"),
("1:00 aM", "%I:%M %P"),
("1:00 Am", "%I:%M %P"),
("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
("1:00:30.123456789 PM", "%I:%M:%S%.f %P"),
("1:00:30.123456789123 PM", "%I:%M:%S%.f %P"),
("1:00:30.1234 PM", "%I:%M:%S%.f %P"),
("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
("1:00:30.123456789123456789 PM", "%I:%M:%S%.f %P"),
("1:00:30.12F456 PM", "%I:%M:%S%.f %P"),
];
for (s, format) in cases {
let chrono = NaiveTime::parse_from_str(s, format).ok();
let custom = string_to_time(s);
assert_eq!(chrono, custom, "{s}");
}
}
#[test]
fn test_parse_interval() {
let config = IntervalParseConfig::new(IntervalUnit::Month);
assert_eq!(
Interval::new(1i32, 0i32, 0i64),
Interval::parse("1 month", &config).unwrap(),
);
assert_eq!(
Interval::new(2i32, 0i32, 0i64),
Interval::parse("2 month", &config).unwrap(),
);
assert_eq!(
Interval::new(-1i32, -18i32, -(NANOS_PER_DAY / 5)),
Interval::parse("-1.5 months -3.2 days", &config).unwrap(),
);
assert_eq!(
Interval::new(0i32, 15i32, 0),
Interval::parse("0.5 months", &config).unwrap(),
);
assert_eq!(
Interval::new(0i32, 15i32, 0),
Interval::parse(".5 months", &config).unwrap(),
);
assert_eq!(
Interval::new(0i32, -15i32, 0),
Interval::parse("-0.5 months", &config).unwrap(),
);
assert_eq!(
Interval::new(0i32, -15i32, 0),
Interval::parse("-.5 months", &config).unwrap(),
);
assert_eq!(
Interval::new(2i32, 10i32, 9 * NANOS_PER_HOUR),
Interval::parse("2.1 months 7.25 days 3 hours", &config).unwrap(),
);
assert_eq!(
Interval::parse("1 centurys 1 month", &config)
.unwrap_err()
.to_string(),
r#"Parser error: Invalid input syntax for type interval: "1 centurys 1 month""#
);
assert_eq!(
Interval::new(37i32, 0i32, 0i64),
Interval::parse("3 year 1 month", &config).unwrap(),
);
assert_eq!(
Interval::new(35i32, 0i32, 0i64),
Interval::parse("3 year -1 month", &config).unwrap(),
);
assert_eq!(
Interval::new(-37i32, 0i32, 0i64),
Interval::parse("-3 year -1 month", &config).unwrap(),
);
assert_eq!(
Interval::new(-35i32, 0i32, 0i64),
Interval::parse("-3 year 1 month", &config).unwrap(),
);
assert_eq!(
Interval::new(0i32, 5i32, 0i64),
Interval::parse("5 days", &config).unwrap(),
);
assert_eq!(
Interval::new(0i32, 7i32, 3 * NANOS_PER_HOUR),
Interval::parse("7 days 3 hours", &config).unwrap(),
);
assert_eq!(
Interval::new(0i32, 7i32, 5 * NANOS_PER_MINUTE),
Interval::parse("7 days 5 minutes", &config).unwrap(),
);
assert_eq!(
Interval::new(0i32, 7i32, -5 * NANOS_PER_MINUTE),
Interval::parse("7 days -5 minutes", &config).unwrap(),
);
assert_eq!(
Interval::new(0i32, -7i32, 5 * NANOS_PER_HOUR),
Interval::parse("-7 days 5 hours", &config).unwrap(),
);
assert_eq!(
Interval::new(
0i32,
-7i32,
-5 * NANOS_PER_HOUR - 5 * NANOS_PER_MINUTE - 5 * NANOS_PER_SECOND
),
Interval::parse("-7 days -5 hours -5 minutes -5 seconds", &config).unwrap(),
);
assert_eq!(
Interval::new(12i32, 0i32, 25 * NANOS_PER_MILLIS),
Interval::parse("1 year 25 millisecond", &config).unwrap(),
);
assert_eq!(
Interval::new(
12i32,
1i32,
(NANOS_PER_SECOND as f64 * 0.000000001_f64) as i64
),
Interval::parse("1 year 1 day 0.000000001 seconds", &config).unwrap(),
);
assert_eq!(
Interval::new(12i32, 1i32, NANOS_PER_MILLIS / 10),
Interval::parse("1 year 1 day 0.1 milliseconds", &config).unwrap(),
);
assert_eq!(
Interval::new(12i32, 1i32, 1000i64),
Interval::parse("1 year 1 day 1 microsecond", &config).unwrap(),
);
assert_eq!(
Interval::new(12i32, 1i32, 1i64),
Interval::parse("1 year 1 day 1 nanoseconds", &config).unwrap(),
);
assert_eq!(
Interval::new(1i32, 0i32, -NANOS_PER_SECOND),
Interval::parse("1 month -1 second", &config).unwrap(),
);
assert_eq!(
Interval::new(
-13i32,
-8i32,
-NANOS_PER_HOUR
- NANOS_PER_MINUTE
- NANOS_PER_SECOND
- (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
),
Interval::parse(
"-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
&config
)
.unwrap(),
);
}
#[test]
fn test_duplicate_interval_type() {
let config = IntervalParseConfig::new(IntervalUnit::Month);
let err = Interval::parse("1 month 1 second 1 second", &config)
.expect_err("parsing interval should have failed");
assert_eq!(
r#"ParseError("Invalid input syntax for type interval: \"1 month 1 second 1 second\". Repeated type 'second'")"#,
format!("{err:?}")
);
let err = Interval::parse("1 century 2 centuries", &config)
.expect_err("parsing interval should have failed");
assert_eq!(
r#"ParseError("Invalid input syntax for type interval: \"1 century 2 centuries\". Repeated type 'centuries'")"#,
format!("{err:?}")
);
}
#[test]
fn test_interval_amount_parsing() {
let result = IntervalAmount::from_str("123").unwrap();
let expected = IntervalAmount::new(123, 0);
assert_eq!(result, expected);
let result = IntervalAmount::from_str("0.3").unwrap();
let expected = IntervalAmount::new(0, 3 * 10_i64.pow(INTERVAL_PRECISION - 1));
assert_eq!(result, expected);
let result = IntervalAmount::from_str("-3.5").unwrap();
let expected = IntervalAmount::new(-3, -5 * 10_i64.pow(INTERVAL_PRECISION - 1));
assert_eq!(result, expected);
let result = IntervalAmount::from_str("3.");
assert!(result.is_err());
let result = IntervalAmount::from_str("3.-5");
assert!(result.is_err());
}
#[test]
fn test_interval_precision() {
let config = IntervalParseConfig::new(IntervalUnit::Month);
let result = Interval::parse("100000.1 days", &config).unwrap();
let expected = Interval::new(0_i32, 100_000_i32, NANOS_PER_DAY / 10);
assert_eq!(result, expected);
}
#[test]
fn test_interval_addition() {
let start = Interval::new(1, 2, 3);
let expected = Interval::new(4921, 2, 3);
let result = start
.add(
IntervalAmount::new(4, 10_i64.pow(INTERVAL_PRECISION - 1)),
IntervalUnit::Century,
)
.unwrap();
assert_eq!(result, expected);
let start = Interval::new(1, 2, 3);
let expected = Interval::new(1231, 2, 3);
let result = start
.add(
IntervalAmount::new(10, 25 * 10_i64.pow(INTERVAL_PRECISION - 2)),
IntervalUnit::Decade,
)
.unwrap();
assert_eq!(result, expected);
let start = Interval::new(1, 2, 3);
let expected = Interval::new(364, 2, 3);
let result = start
.add(
IntervalAmount::new(30, 3 * 10_i64.pow(INTERVAL_PRECISION - 1)),
IntervalUnit::Year,
)
.unwrap();
assert_eq!(result, expected);
let start = Interval::new(1, 2, 3);
let expected = Interval::new(2, 17, 3);
let result = start
.add(
IntervalAmount::new(1, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
IntervalUnit::Month,
)
.unwrap();
assert_eq!(result, expected);
let start = Interval::new(1, 25, 3);
let expected = Interval::new(1, 11, 3);
let result = start
.add(IntervalAmount::new(-2, 0), IntervalUnit::Week)
.unwrap();
assert_eq!(result, expected);
let start = Interval::new(12, 15, 3);
let expected = Interval::new(12, 17, 3 + 17_280 * NANOS_PER_SECOND);
let result = start
.add(
IntervalAmount::new(2, 2 * 10_i64.pow(INTERVAL_PRECISION - 1)),
IntervalUnit::Day,
)
.unwrap();
assert_eq!(result, expected);
let start = Interval::new(1, 2, 3);
let expected = Interval::new(1, 2, 3 + 45_000 * NANOS_PER_SECOND);
let result = start
.add(
IntervalAmount::new(12, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
IntervalUnit::Hour,
)
.unwrap();
assert_eq!(result, expected);
let start = Interval::new(0, 0, -3);
let expected = Interval::new(0, 0, -90_000_000_000 - 3);
let result = start
.add(
IntervalAmount::new(-1, -5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
IntervalUnit::Minute,
)
.unwrap();
assert_eq!(result, expected);
}
#[test]
fn string_to_timestamp_old() {
parse_timestamp("1677-06-14T07:29:01.256")
.map_err(|e| assert!(e.to_string().ends_with(ERR_NANOSECONDS_NOT_SUPPORTED)))
.unwrap_err();
}
#[test]
fn test_parse_decimal_with_parameter() {
let tests = [
("0", 0i128),
("123.123", 123123i128),
("123.1234", 123123i128),
("123.1", 123100i128),
("123", 123000i128),
("-123.123", -123123i128),
("-123.1234", -123123i128),
("-123.1", -123100i128),
("-123", -123000i128),
("0.0000123", 0i128),
("12.", 12000i128),
("-12.", -12000i128),
("00.1", 100i128),
("-00.1", -100i128),
("12345678912345678.1234", 12345678912345678123i128),
("-12345678912345678.1234", -12345678912345678123i128),
("99999999999999999.999", 99999999999999999999i128),
("-99999999999999999.999", -99999999999999999999i128),
(".123", 123i128),
("-.123", -123i128),
("123.", 123000i128),
("-123.", -123000i128),
];
for (s, i) in tests {
let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
assert_eq!(i, result_128.unwrap());
let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
assert_eq!(i256::from_i128(i), result_256.unwrap());
}
let can_not_parse_tests = ["123,123", ".", "123.123.123", "", "+", "-"];
for s in can_not_parse_tests {
let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
assert_eq!(
format!("Parser error: can't parse the string value {s} to decimal"),
result_128.unwrap_err().to_string()
);
let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
assert_eq!(
format!("Parser error: can't parse the string value {s} to decimal"),
result_256.unwrap_err().to_string()
);
}
let overflow_parse_tests = ["12345678", "12345678.9", "99999999.99"];
for s in overflow_parse_tests {
let result_128 = parse_decimal::<Decimal128Type>(s, 10, 3);
let expected_128 = "Parser error: parse decimal overflow";
let actual_128 = result_128.unwrap_err().to_string();
assert!(
actual_128.contains(expected_128),
"actual: '{actual_128}', expected: '{expected_128}'"
);
let result_256 = parse_decimal::<Decimal256Type>(s, 10, 3);
let expected_256 = "Parser error: parse decimal overflow";
let actual_256 = result_256.unwrap_err().to_string();
assert!(
actual_256.contains(expected_256),
"actual: '{actual_256}', expected: '{expected_256}'"
);
}
let edge_tests_128 = [
(
"99999999999999999999999999999999999999",
99999999999999999999999999999999999999i128,
0,
),
(
"999999999999999999999999999999999999.99",
99999999999999999999999999999999999999i128,
2,
),
(
"9999999999999999999999999.9999999999999",
99999999999999999999999999999999999999i128,
13,
),
(
"9999999999999999999999999",
99999999999999999999999990000000000000i128,
13,
),
(
"0.99999999999999999999999999999999999999",
99999999999999999999999999999999999999i128,
38,
),
];
for (s, i, scale) in edge_tests_128 {
let result_128 = parse_decimal::<Decimal128Type>(s, 38, scale);
assert_eq!(i, result_128.unwrap());
}
let edge_tests_256 = [
(
"9999999999999999999999999999999999999999999999999999999999999999999999999999",
i256::from_string(
"9999999999999999999999999999999999999999999999999999999999999999999999999999",
)
.unwrap(),
0,
),
(
"999999999999999999999999999999999999999999999999999999999999999999999999.9999",
i256::from_string(
"9999999999999999999999999999999999999999999999999999999999999999999999999999",
)
.unwrap(),
4,
),
(
"99999999999999999999999999999999999999999999999999.99999999999999999999999999",
i256::from_string(
"9999999999999999999999999999999999999999999999999999999999999999999999999999",
)
.unwrap(),
26,
),
(
"99999999999999999999999999999999999999999999999999",
i256::from_string(
"9999999999999999999999999999999999999999999999999900000000000000000000000000",
)
.unwrap(),
26,
),
];
for (s, i, scale) in edge_tests_256 {
let result = parse_decimal::<Decimal256Type>(s, 76, scale);
assert_eq!(i, result.unwrap());
}
}
#[test]
fn test_parse_empty() {
assert_eq!(Int32Type::parse(""), None);
assert_eq!(Int64Type::parse(""), None);
assert_eq!(UInt32Type::parse(""), None);
assert_eq!(UInt64Type::parse(""), None);
assert_eq!(Float32Type::parse(""), None);
assert_eq!(Float64Type::parse(""), None);
assert_eq!(Int32Type::parse("+"), None);
assert_eq!(Int64Type::parse("+"), None);
assert_eq!(UInt32Type::parse("+"), None);
assert_eq!(UInt64Type::parse("+"), None);
assert_eq!(Float32Type::parse("+"), None);
assert_eq!(Float64Type::parse("+"), None);
assert_eq!(TimestampNanosecondType::parse(""), None);
assert_eq!(Date32Type::parse(""), None);
}
}