1use arrow_array::timezone::Tz;
22use arrow_array::types::*;
23use arrow_array::ArrowNativeTypeOp;
24use arrow_buffer::ArrowNativeType;
25use arrow_schema::ArrowError;
26use chrono::prelude::*;
27use half::f16;
28use std::str::FromStr;
29
30#[inline]
32fn parse_nanos<const N: usize, const O: u8>(digits: &[u8]) -> u32 {
33 digits[..N]
34 .iter()
35 .fold(0_u32, |acc, v| acc * 10 + v.wrapping_sub(O) as u32)
36 * 10_u32.pow((9 - N) as _)
37}
38
39struct TimestampParser {
41 digits: [u8; 32],
45 mask: u32,
47}
48
49impl TimestampParser {
50 fn new(bytes: &[u8]) -> Self {
51 let mut digits = [0; 32];
52 let mut mask = 0;
53
54 for (idx, (o, i)) in digits.iter_mut().zip(bytes).enumerate() {
56 *o = i.wrapping_sub(b'0');
57 mask |= ((*o < 10) as u32) << idx
58 }
59
60 Self { digits, mask }
61 }
62
63 fn test(&self, idx: usize, b: u8) -> bool {
65 self.digits[idx] == b.wrapping_sub(b'0')
66 }
67
68 fn date(&self) -> Option<NaiveDate> {
70 if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
71 return None;
72 }
73
74 let year = self.digits[0] as u16 * 1000
75 + self.digits[1] as u16 * 100
76 + self.digits[2] as u16 * 10
77 + self.digits[3] as u16;
78
79 let month = self.digits[5] * 10 + self.digits[6];
80 let day = self.digits[8] * 10 + self.digits[9];
81
82 NaiveDate::from_ymd_opt(year as _, month as _, day as _)
83 }
84
85 fn time(&self) -> Option<(NaiveTime, usize)> {
94 let time = |hour, min, sec, nano| match sec {
96 60 => {
97 let nano = 1_000_000_000 + nano;
98 NaiveTime::from_hms_nano_opt(hour as _, min as _, 59, nano)
99 }
100 _ => NaiveTime::from_hms_nano_opt(hour as _, min as _, sec as _, nano),
101 };
102
103 match (self.mask >> 11) & 0b11111111 {
104 0b11011011 if self.test(13, b':') && self.test(16, b':') => {
106 let hour = self.digits[11] * 10 + self.digits[12];
107 let minute = self.digits[14] * 10 + self.digits[15];
108 let second = self.digits[17] * 10 + self.digits[18];
109
110 match self.test(19, b'.') {
111 true => {
112 let digits = (self.mask >> 20).trailing_ones();
113 let nanos = match digits {
114 0 => return None,
115 1 => parse_nanos::<1, 0>(&self.digits[20..21]),
116 2 => parse_nanos::<2, 0>(&self.digits[20..22]),
117 3 => parse_nanos::<3, 0>(&self.digits[20..23]),
118 4 => parse_nanos::<4, 0>(&self.digits[20..24]),
119 5 => parse_nanos::<5, 0>(&self.digits[20..25]),
120 6 => parse_nanos::<6, 0>(&self.digits[20..26]),
121 7 => parse_nanos::<7, 0>(&self.digits[20..27]),
122 8 => parse_nanos::<8, 0>(&self.digits[20..28]),
123 _ => parse_nanos::<9, 0>(&self.digits[20..29]),
124 };
125 Some((time(hour, minute, second, nanos)?, 20 + digits as usize))
126 }
127 false => Some((time(hour, minute, second, 0)?, 19)),
128 }
129 }
130 0b111111 => {
132 let hour = self.digits[11] * 10 + self.digits[12];
133 let minute = self.digits[13] * 10 + self.digits[14];
134 let second = self.digits[15] * 10 + self.digits[16];
135 let time = time(hour, minute, second, 0)?;
136 Some((time, 17))
137 }
138 _ => None,
139 }
140 }
141}
142
143pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
177 let err =
178 |ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
179
180 let bytes = s.as_bytes();
181 if bytes.len() < 10 {
182 return Err(err("timestamp must contain at least 10 characters"));
183 }
184
185 let parser = TimestampParser::new(bytes);
186 let date = parser.date().ok_or_else(|| err("error parsing date"))?;
187 if bytes.len() == 10 {
188 let datetime = date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap());
189 return timezone
190 .from_local_datetime(&datetime)
191 .single()
192 .ok_or_else(|| err("error computing timezone offset"));
193 }
194
195 if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') {
196 return Err(err("invalid timestamp separator"));
197 }
198
199 let (time, mut tz_offset) = parser.time().ok_or_else(|| err("error parsing time"))?;
200 let datetime = date.and_time(time);
201
202 if tz_offset == 32 {
203 while tz_offset < bytes.len() && bytes[tz_offset].is_ascii_digit() {
205 tz_offset += 1;
206 }
207 }
208
209 if bytes.len() <= tz_offset {
210 return timezone
211 .from_local_datetime(&datetime)
212 .single()
213 .ok_or_else(|| err("error computing timezone offset"));
214 }
215
216 if (bytes[tz_offset] == b'z' || bytes[tz_offset] == b'Z') && tz_offset == bytes.len() - 1 {
217 return Ok(timezone.from_utc_datetime(&datetime));
218 }
219
220 let parsed_tz: Tz = s[tz_offset..].trim_start().parse()?;
222 let parsed = parsed_tz
223 .from_local_datetime(&datetime)
224 .single()
225 .ok_or_else(|| err("error computing timezone offset"))?;
226
227 Ok(parsed.with_timezone(timezone))
228}
229
230#[inline]
272pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
273 to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc())
274}
275
276#[inline]
278fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
279 dt.and_utc()
280 .timestamp_nanos_opt()
281 .ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string()))
282}
283
284pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
300 let nt = string_to_time(s)
301 .ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
302 Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
303}
304
305fn string_to_time(s: &str) -> Option<NaiveTime> {
306 let bytes = s.as_bytes();
307 if bytes.len() < 4 {
308 return None;
309 }
310
311 let (am, bytes) = match bytes.get(bytes.len() - 3..) {
312 Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
313 Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
314 _ => (None, bytes),
315 };
316
317 if bytes.len() < 4 {
318 return None;
319 }
320
321 let mut digits = [b'0'; 6];
322
323 let bytes = match (bytes[1], bytes[2]) {
325 (b':', _) => {
326 digits[1] = bytes[0];
327 &bytes[2..]
328 }
329 (_, b':') => {
330 digits[0] = bytes[0];
331 digits[1] = bytes[1];
332 &bytes[3..]
333 }
334 _ => return None,
335 };
336
337 if bytes.len() < 2 {
338 return None; }
340
341 digits[2] = bytes[0];
343 digits[3] = bytes[1];
344
345 let nanoseconds = match bytes.get(2) {
346 Some(b':') => {
347 if bytes.len() < 5 {
348 return None;
349 }
350
351 digits[4] = bytes[3];
353 digits[5] = bytes[4];
354
355 match bytes.get(5) {
357 Some(b'.') => {
358 let decimal = &bytes[6..];
359 if decimal.iter().any(|x| !x.is_ascii_digit()) {
360 return None;
361 }
362 match decimal.len() {
363 0 => return None,
364 1 => parse_nanos::<1, b'0'>(decimal),
365 2 => parse_nanos::<2, b'0'>(decimal),
366 3 => parse_nanos::<3, b'0'>(decimal),
367 4 => parse_nanos::<4, b'0'>(decimal),
368 5 => parse_nanos::<5, b'0'>(decimal),
369 6 => parse_nanos::<6, b'0'>(decimal),
370 7 => parse_nanos::<7, b'0'>(decimal),
371 8 => parse_nanos::<8, b'0'>(decimal),
372 _ => parse_nanos::<9, b'0'>(decimal),
373 }
374 }
375 Some(_) => return None,
376 None => 0,
377 }
378 }
379 Some(_) => return None,
380 None => 0,
381 };
382
383 digits.iter_mut().for_each(|x| *x = x.wrapping_sub(b'0'));
384 if digits.iter().any(|x| *x > 9) {
385 return None;
386 }
387
388 let hour = match (digits[0] * 10 + digits[1], am) {
389 (12, Some(true)) => 0, (h @ 1..=11, Some(true)) => h, (12, Some(false)) => 12, (h @ 1..=11, Some(false)) => h + 12, (_, Some(_)) => return None,
394 (h, None) => h,
395 };
396
397 let (second, nanoseconds) = match digits[4] * 10 + digits[5] {
399 60 => (59, nanoseconds + 1_000_000_000),
400 s => (s, nanoseconds),
401 };
402
403 NaiveTime::from_hms_nano_opt(
404 hour as _,
405 (digits[2] * 10 + digits[3]) as _,
406 second as _,
407 nanoseconds,
408 )
409}
410
411pub trait Parser: ArrowPrimitiveType {
435 fn parse(string: &str) -> Option<Self::Native>;
437
438 fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
442 Self::parse(string)
443 }
444}
445
446impl Parser for Float16Type {
447 fn parse(string: &str) -> Option<f16> {
448 lexical_core::parse(string.as_bytes())
449 .ok()
450 .map(f16::from_f32)
451 }
452}
453
454impl Parser for Float32Type {
455 fn parse(string: &str) -> Option<f32> {
456 lexical_core::parse(string.as_bytes()).ok()
457 }
458}
459
460impl Parser for Float64Type {
461 fn parse(string: &str) -> Option<f64> {
462 lexical_core::parse(string.as_bytes()).ok()
463 }
464}
465
466#[inline(always)]
468fn is_some_and<T>(opt: Option<T>, f: impl FnOnce(T) -> bool) -> bool {
469 match opt {
470 None => false,
471 Some(x) => f(x),
472 }
473}
474
475macro_rules! parser_primitive {
476 ($t:ty) => {
477 impl Parser for $t {
478 fn parse(string: &str) -> Option<Self::Native> {
479 if !is_some_and(string.as_bytes().last(), |x| x.is_ascii_digit()) {
480 return None;
481 }
482 match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
483 string.as_bytes(),
484 ) {
485 (Some(n), x) if x == string.len() => Some(n),
486 _ => None,
487 }
488 }
489 }
490 };
491}
492parser_primitive!(UInt64Type);
493parser_primitive!(UInt32Type);
494parser_primitive!(UInt16Type);
495parser_primitive!(UInt8Type);
496parser_primitive!(Int64Type);
497parser_primitive!(Int32Type);
498parser_primitive!(Int16Type);
499parser_primitive!(Int8Type);
500parser_primitive!(DurationNanosecondType);
501parser_primitive!(DurationMicrosecondType);
502parser_primitive!(DurationMillisecondType);
503parser_primitive!(DurationSecondType);
504
505impl Parser for TimestampNanosecondType {
506 fn parse(string: &str) -> Option<i64> {
507 string_to_timestamp_nanos(string).ok()
508 }
509}
510
511impl Parser for TimestampMicrosecondType {
512 fn parse(string: &str) -> Option<i64> {
513 let nanos = string_to_timestamp_nanos(string).ok();
514 nanos.map(|x| x / 1000)
515 }
516}
517
518impl Parser for TimestampMillisecondType {
519 fn parse(string: &str) -> Option<i64> {
520 let nanos = string_to_timestamp_nanos(string).ok();
521 nanos.map(|x| x / 1_000_000)
522 }
523}
524
525impl Parser for TimestampSecondType {
526 fn parse(string: &str) -> Option<i64> {
527 let nanos = string_to_timestamp_nanos(string).ok();
528 nanos.map(|x| x / 1_000_000_000)
529 }
530}
531
532impl Parser for Time64NanosecondType {
533 fn parse(string: &str) -> Option<Self::Native> {
535 string_to_time_nanoseconds(string)
536 .ok()
537 .or_else(|| string.parse::<Self::Native>().ok())
538 }
539
540 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
541 let nt = NaiveTime::parse_from_str(string, format).ok()?;
542 Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
543 }
544}
545
546impl Parser for Time64MicrosecondType {
547 fn parse(string: &str) -> Option<Self::Native> {
549 string_to_time_nanoseconds(string)
550 .ok()
551 .map(|nanos| nanos / 1_000)
552 .or_else(|| string.parse::<Self::Native>().ok())
553 }
554
555 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
556 let nt = NaiveTime::parse_from_str(string, format).ok()?;
557 Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
558 }
559}
560
561impl Parser for Time32MillisecondType {
562 fn parse(string: &str) -> Option<Self::Native> {
564 string_to_time_nanoseconds(string)
565 .ok()
566 .map(|nanos| (nanos / 1_000_000) as i32)
567 .or_else(|| string.parse::<Self::Native>().ok())
568 }
569
570 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
571 let nt = NaiveTime::parse_from_str(string, format).ok()?;
572 Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
573 }
574}
575
576impl Parser for Time32SecondType {
577 fn parse(string: &str) -> Option<Self::Native> {
579 string_to_time_nanoseconds(string)
580 .ok()
581 .map(|nanos| (nanos / 1_000_000_000) as i32)
582 .or_else(|| string.parse::<Self::Native>().ok())
583 }
584
585 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
586 let nt = NaiveTime::parse_from_str(string, format).ok()?;
587 Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
588 }
589}
590
591const EPOCH_DAYS_FROM_CE: i32 = 719_163;
593
594const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
596
597fn parse_date(string: &str) -> Option<NaiveDate> {
598 if string.len() > 10 {
599 return string_to_datetime(&Utc, string)
601 .map(|dt| dt.date_naive())
602 .ok();
603 };
604 let mut digits = [0; 10];
605 let mut mask = 0;
606
607 for (idx, (o, i)) in digits.iter_mut().zip(string.bytes()).enumerate() {
609 *o = i.wrapping_sub(b'0');
610 mask |= ((*o < 10) as u16) << idx
611 }
612
613 const HYPHEN: u8 = b'-'.wrapping_sub(b'0');
614
615 if digits[4] != HYPHEN {
617 let (year, month, day) = match (mask, string.len()) {
618 (0b11111111, 8) => (
619 digits[0] as u16 * 1000
620 + digits[1] as u16 * 100
621 + digits[2] as u16 * 10
622 + digits[3] as u16,
623 digits[4] * 10 + digits[5],
624 digits[6] * 10 + digits[7],
625 ),
626 _ => return None,
627 };
628 return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
629 }
630
631 let (month, day) = match mask {
632 0b1101101111 => {
633 if digits[7] != HYPHEN {
634 return None;
635 }
636 (digits[5] * 10 + digits[6], digits[8] * 10 + digits[9])
637 }
638 0b101101111 => {
639 if digits[7] != HYPHEN {
640 return None;
641 }
642 (digits[5] * 10 + digits[6], digits[8])
643 }
644 0b110101111 => {
645 if digits[6] != HYPHEN {
646 return None;
647 }
648 (digits[5], digits[7] * 10 + digits[8])
649 }
650 0b10101111 => {
651 if digits[6] != HYPHEN {
652 return None;
653 }
654 (digits[5], digits[7])
655 }
656 _ => return None,
657 };
658
659 let year =
660 digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
661
662 NaiveDate::from_ymd_opt(year as _, month as _, day as _)
663}
664
665impl Parser for Date32Type {
666 fn parse(string: &str) -> Option<i32> {
667 let date = parse_date(string)?;
668 Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
669 }
670
671 fn parse_formatted(string: &str, format: &str) -> Option<i32> {
672 let date = NaiveDate::parse_from_str(string, format).ok()?;
673 Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
674 }
675}
676
677impl Parser for Date64Type {
678 fn parse(string: &str) -> Option<i64> {
679 if string.len() <= 10 {
680 let datetime = NaiveDateTime::new(parse_date(string)?, NaiveTime::default());
681 Some(datetime.and_utc().timestamp_millis())
682 } else {
683 let date_time = string_to_datetime(&Utc, string).ok()?;
684 Some(date_time.timestamp_millis())
685 }
686 }
687
688 fn parse_formatted(string: &str, format: &str) -> Option<i64> {
689 use chrono::format::Fixed;
690 use chrono::format::StrftimeItems;
691 let fmt = StrftimeItems::new(format);
692 let has_zone = fmt.into_iter().any(|item| match item {
693 chrono::format::Item::Fixed(fixed_item) => matches!(
694 fixed_item,
695 Fixed::RFC2822
696 | Fixed::RFC3339
697 | Fixed::TimezoneName
698 | Fixed::TimezoneOffsetColon
699 | Fixed::TimezoneOffsetColonZ
700 | Fixed::TimezoneOffset
701 | Fixed::TimezoneOffsetZ
702 ),
703 _ => false,
704 });
705 if has_zone {
706 let date_time = chrono::DateTime::parse_from_str(string, format).ok()?;
707 Some(date_time.timestamp_millis())
708 } else {
709 let date_time = NaiveDateTime::parse_from_str(string, format).ok()?;
710 Some(date_time.and_utc().timestamp_millis())
711 }
712 }
713}
714
715fn parse_e_notation<T: DecimalType>(
716 s: &str,
717 mut digits: u16,
718 mut fractionals: i16,
719 mut result: T::Native,
720 index: usize,
721 precision: u16,
722 scale: i16,
723) -> Result<T::Native, ArrowError> {
724 let mut exp: i16 = 0;
725 let base = T::Native::usize_as(10);
726
727 let mut exp_start: bool = false;
728 let mut pos_shift_direction: bool = true;
730
731 let mut bs;
733 if fractionals > 0 {
734 bs = s.as_bytes().iter().skip(index + fractionals as usize + 1);
736 } else {
737 bs = s.as_bytes().iter().skip(index);
739 }
740
741 while let Some(b) = bs.next() {
742 match b {
743 b'0'..=b'9' => {
744 result = result.mul_wrapping(base);
745 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
746 if fractionals > 0 {
747 fractionals += 1;
748 }
749 digits += 1;
750 }
751 &b'e' | &b'E' => {
752 exp_start = true;
753 }
754 _ => {
755 return Err(ArrowError::ParseError(format!(
756 "can't parse the string value {s} to decimal"
757 )));
758 }
759 };
760
761 if exp_start {
762 pos_shift_direction = match bs.next() {
763 Some(&b'-') => false,
764 Some(&b'+') => true,
765 Some(b) => {
766 if !b.is_ascii_digit() {
767 return Err(ArrowError::ParseError(format!(
768 "can't parse the string value {s} to decimal"
769 )));
770 }
771
772 exp *= 10;
773 exp += (b - b'0') as i16;
774
775 true
776 }
777 None => {
778 return Err(ArrowError::ParseError(format!(
779 "can't parse the string value {s} to decimal"
780 )))
781 }
782 };
783
784 for b in bs.by_ref() {
785 if !b.is_ascii_digit() {
786 return Err(ArrowError::ParseError(format!(
787 "can't parse the string value {s} to decimal"
788 )));
789 }
790 exp *= 10;
791 exp += (b - b'0') as i16;
792 }
793 }
794 }
795
796 if digits == 0 && fractionals == 0 && exp == 0 {
797 return Err(ArrowError::ParseError(format!(
798 "can't parse the string value {s} to decimal"
799 )));
800 }
801
802 if !pos_shift_direction {
803 if exp - (digits as i16 + scale) > 0 {
806 return Ok(T::Native::usize_as(0));
807 }
808 exp *= -1;
809 }
810
811 exp = fractionals - exp;
813 if !pos_shift_direction && exp > digits as i16 {
815 digits = exp as u16;
816 }
817 exp = scale - exp;
819
820 if (digits as i16 + exp) as u16 > precision {
821 return Err(ArrowError::ParseError(format!(
822 "parse decimal overflow ({s})"
823 )));
824 }
825
826 if exp < 0 {
827 result = result.div_wrapping(base.pow_wrapping(-exp as _));
828 } else {
829 result = result.mul_wrapping(base.pow_wrapping(exp as _));
830 }
831
832 Ok(result)
833}
834
835pub fn parse_decimal<T: DecimalType>(
838 s: &str,
839 precision: u8,
840 scale: i8,
841) -> Result<T::Native, ArrowError> {
842 let mut result = T::Native::usize_as(0);
843 let mut fractionals: i8 = 0;
844 let mut digits: u8 = 0;
845 let base = T::Native::usize_as(10);
846
847 let bs = s.as_bytes();
848 let (signed, negative) = match bs.first() {
849 Some(b'-') => (true, true),
850 Some(b'+') => (true, false),
851 _ => (false, false),
852 };
853
854 if bs.is_empty() || signed && bs.len() == 1 {
855 return Err(ArrowError::ParseError(format!(
856 "can't parse the string value {s} to decimal"
857 )));
858 }
859
860 let mut bs = bs.iter().enumerate().skip(signed as usize);
862
863 let mut is_e_notation = false;
864
865 while let Some((index, b)) = bs.next() {
868 match b {
869 b'0'..=b'9' => {
870 if digits == 0 && *b == b'0' {
871 continue;
873 }
874 digits += 1;
875 result = result.mul_wrapping(base);
876 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
877 }
878 b'.' => {
879 let point_index = index;
880
881 for (_, b) in bs.by_ref() {
882 if !b.is_ascii_digit() {
883 if *b == b'e' || *b == b'E' {
884 result = match parse_e_notation::<T>(
885 s,
886 digits as u16,
887 fractionals as i16,
888 result,
889 point_index,
890 precision as u16,
891 scale as i16,
892 ) {
893 Err(e) => return Err(e),
894 Ok(v) => v,
895 };
896
897 is_e_notation = true;
898
899 break;
900 }
901 return Err(ArrowError::ParseError(format!(
902 "can't parse the string value {s} to decimal"
903 )));
904 }
905 if fractionals == scale && scale != 0 {
906 continue;
910 }
911 fractionals += 1;
912 digits += 1;
913 result = result.mul_wrapping(base);
914 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
915 }
916
917 if is_e_notation {
918 break;
919 }
920
921 if digits == 0 {
923 return Err(ArrowError::ParseError(format!(
924 "can't parse the string value {s} to decimal"
925 )));
926 }
927 }
928 b'e' | b'E' => {
929 result = match parse_e_notation::<T>(
930 s,
931 digits as u16,
932 fractionals as i16,
933 result,
934 index,
935 precision as u16,
936 scale as i16,
937 ) {
938 Err(e) => return Err(e),
939 Ok(v) => v,
940 };
941
942 is_e_notation = true;
943
944 break;
945 }
946 _ => {
947 return Err(ArrowError::ParseError(format!(
948 "can't parse the string value {s} to decimal"
949 )));
950 }
951 }
952 }
953
954 if !is_e_notation {
955 if fractionals < scale {
956 let exp = scale - fractionals;
957 if exp as u8 + digits > precision {
958 return Err(ArrowError::ParseError(format!(
959 "parse decimal overflow ({s})"
960 )));
961 }
962 let mul = base.pow_wrapping(exp as _);
963 result = result.mul_wrapping(mul);
964 } else if digits > precision {
965 return Err(ArrowError::ParseError(format!(
966 "parse decimal overflow ({s})"
967 )));
968 }
969 }
970
971 Ok(if negative {
972 result.neg_wrapping()
973 } else {
974 result
975 })
976}
977
978pub fn parse_interval_year_month(
980 value: &str,
981) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> {
982 let config = IntervalParseConfig::new(IntervalUnit::Year);
983 let interval = Interval::parse(value, &config)?;
984
985 let months = interval.to_year_months().map_err(|_| {
986 ArrowError::CastError(format!(
987 "Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
988 ))
989 })?;
990
991 Ok(IntervalYearMonthType::make_value(0, months))
992}
993
994pub fn parse_interval_day_time(
996 value: &str,
997) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
998 let config = IntervalParseConfig::new(IntervalUnit::Day);
999 let interval = Interval::parse(value, &config)?;
1000
1001 let (days, millis) = interval.to_day_time().map_err(|_| ArrowError::CastError(format!(
1002 "Cannot cast {value} to IntervalDayTime because the nanos part isn't multiple of milliseconds"
1003 )))?;
1004
1005 Ok(IntervalDayTimeType::make_value(days, millis))
1006}
1007
1008pub fn parse_interval_month_day_nano_config(
1010 value: &str,
1011 config: IntervalParseConfig,
1012) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1013 let interval = Interval::parse(value, &config)?;
1014
1015 let (months, days, nanos) = interval.to_month_day_nanos();
1016
1017 Ok(IntervalMonthDayNanoType::make_value(months, days, nanos))
1018}
1019
1020pub fn parse_interval_month_day_nano(
1022 value: &str,
1023) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1024 parse_interval_month_day_nano_config(value, IntervalParseConfig::new(IntervalUnit::Month))
1025}
1026
1027const NANOS_PER_MILLIS: i64 = 1_000_000;
1028const NANOS_PER_SECOND: i64 = 1_000 * NANOS_PER_MILLIS;
1029const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
1030const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
1031#[cfg(test)]
1032const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR;
1033
1034#[derive(Debug, Clone)]
1038pub struct IntervalParseConfig {
1039 default_unit: IntervalUnit,
1042}
1043
1044impl IntervalParseConfig {
1045 pub fn new(default_unit: IntervalUnit) -> Self {
1047 Self { default_unit }
1048 }
1049}
1050
1051#[rustfmt::skip]
1052#[derive(Debug, Clone, Copy)]
1053#[repr(u16)]
1054pub enum IntervalUnit {
1057 Century = 0b_0000_0000_0001,
1059 Decade = 0b_0000_0000_0010,
1061 Year = 0b_0000_0000_0100,
1063 Month = 0b_0000_0000_1000,
1065 Week = 0b_0000_0001_0000,
1067 Day = 0b_0000_0010_0000,
1069 Hour = 0b_0000_0100_0000,
1071 Minute = 0b_0000_1000_0000,
1073 Second = 0b_0001_0000_0000,
1075 Millisecond = 0b_0010_0000_0000,
1077 Microsecond = 0b_0100_0000_0000,
1079 Nanosecond = 0b_1000_0000_0000,
1081}
1082
1083impl FromStr for IntervalUnit {
1088 type Err = ArrowError;
1089
1090 fn from_str(s: &str) -> Result<Self, ArrowError> {
1091 match s.to_lowercase().as_str() {
1092 "c" | "cent" | "cents" | "century" | "centuries" => Ok(Self::Century),
1093 "dec" | "decs" | "decade" | "decades" => Ok(Self::Decade),
1094 "y" | "yr" | "yrs" | "year" | "years" => Ok(Self::Year),
1095 "mon" | "mons" | "month" | "months" => Ok(Self::Month),
1096 "w" | "week" | "weeks" => Ok(Self::Week),
1097 "d" | "day" | "days" => Ok(Self::Day),
1098 "h" | "hr" | "hrs" | "hour" | "hours" => Ok(Self::Hour),
1099 "m" | "min" | "mins" | "minute" | "minutes" => Ok(Self::Minute),
1100 "s" | "sec" | "secs" | "second" | "seconds" => Ok(Self::Second),
1101 "ms" | "msec" | "msecs" | "msecond" | "mseconds" | "millisecond" | "milliseconds" => {
1102 Ok(Self::Millisecond)
1103 }
1104 "us" | "usec" | "usecs" | "usecond" | "useconds" | "microsecond" | "microseconds" => {
1105 Ok(Self::Microsecond)
1106 }
1107 "nanosecond" | "nanoseconds" => Ok(Self::Nanosecond),
1108 _ => Err(ArrowError::InvalidArgumentError(format!(
1109 "Unknown interval type: {s}"
1110 ))),
1111 }
1112 }
1113}
1114
1115impl IntervalUnit {
1116 fn from_str_or_config(
1117 s: Option<&str>,
1118 config: &IntervalParseConfig,
1119 ) -> Result<Self, ArrowError> {
1120 match s {
1121 Some(s) => s.parse(),
1122 None => Ok(config.default_unit),
1123 }
1124 }
1125}
1126
1127pub type MonthDayNano = (i32, i32, i64);
1129
1130const INTERVAL_PRECISION: u32 = 15;
1132
1133#[derive(Clone, Copy, Debug, PartialEq)]
1134struct IntervalAmount {
1135 integer: i64,
1137 frac: i64,
1139}
1140
1141#[cfg(test)]
1142impl IntervalAmount {
1143 fn new(integer: i64, frac: i64) -> Self {
1144 Self { integer, frac }
1145 }
1146}
1147
1148impl FromStr for IntervalAmount {
1149 type Err = ArrowError;
1150
1151 fn from_str(s: &str) -> Result<Self, Self::Err> {
1152 match s.split_once('.') {
1153 Some((integer, frac))
1154 if frac.len() <= INTERVAL_PRECISION as usize
1155 && !frac.is_empty()
1156 && !frac.starts_with('-') =>
1157 {
1158 let explicit_neg = integer.starts_with('-');
1161 let integer = if integer.is_empty() || integer == "-" {
1162 Ok(0)
1163 } else {
1164 integer.parse::<i64>().map_err(|_| {
1165 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1166 })
1167 }?;
1168
1169 let frac_unscaled = frac.parse::<i64>().map_err(|_| {
1170 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1171 })?;
1172
1173 let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
1175
1176 let frac = if integer < 0 || explicit_neg {
1178 -frac
1179 } else {
1180 frac
1181 };
1182
1183 let result = Self { integer, frac };
1184
1185 Ok(result)
1186 }
1187 Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
1188 "Failed to parse {s} as interval amount"
1189 ))),
1190 Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
1191 Err(ArrowError::ParseError(format!(
1192 "{s} exceeds the precision available for interval amount"
1193 )))
1194 }
1195 Some(_) | None => {
1196 let integer = s.parse::<i64>().map_err(|_| {
1197 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1198 })?;
1199
1200 let result = Self { integer, frac: 0 };
1201 Ok(result)
1202 }
1203 }
1204 }
1205}
1206
1207#[derive(Debug, Default, PartialEq)]
1208struct Interval {
1209 months: i32,
1210 days: i32,
1211 nanos: i64,
1212}
1213
1214impl Interval {
1215 fn new(months: i32, days: i32, nanos: i64) -> Self {
1216 Self {
1217 months,
1218 days,
1219 nanos,
1220 }
1221 }
1222
1223 fn to_year_months(&self) -> Result<i32, ArrowError> {
1224 match (self.months, self.days, self.nanos) {
1225 (months, days, nanos) if days == 0 && nanos == 0 => Ok(months),
1226 _ => Err(ArrowError::InvalidArgumentError(format!(
1227 "Unable to represent interval with days and nanos as year-months: {:?}",
1228 self
1229 ))),
1230 }
1231 }
1232
1233 fn to_day_time(&self) -> Result<(i32, i32), ArrowError> {
1234 let days = self.months.mul_checked(30)?.add_checked(self.days)?;
1235
1236 match self.nanos {
1237 nanos if nanos % NANOS_PER_MILLIS == 0 => {
1238 let millis = (self.nanos / 1_000_000).try_into().map_err(|_| {
1239 ArrowError::InvalidArgumentError(format!(
1240 "Unable to represent {} nanos as milliseconds in a signed 32-bit integer",
1241 self.nanos
1242 ))
1243 })?;
1244
1245 Ok((days, millis))
1246 }
1247 nanos => Err(ArrowError::InvalidArgumentError(format!(
1248 "Unable to represent {nanos} as milliseconds"
1249 ))),
1250 }
1251 }
1252
1253 fn to_month_day_nanos(&self) -> (i32, i32, i64) {
1254 (self.months, self.days, self.nanos)
1255 }
1256
1257 fn parse(value: &str, config: &IntervalParseConfig) -> Result<Self, ArrowError> {
1260 let components = parse_interval_components(value, config)?;
1261
1262 components
1263 .into_iter()
1264 .try_fold(Self::default(), |result, (amount, unit)| {
1265 result.add(amount, unit)
1266 })
1267 }
1268
1269 fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
1276 let result = match unit {
1277 IntervalUnit::Century => {
1278 let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
1279 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
1280 let months = months_int
1281 .add_checked(month_frac)?
1282 .try_into()
1283 .map_err(|_| {
1284 ArrowError::ParseError(format!(
1285 "Unable to represent {} centuries as months in a signed 32-bit integer",
1286 &amount.integer
1287 ))
1288 })?;
1289
1290 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1291 }
1292 IntervalUnit::Decade => {
1293 let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
1294
1295 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
1296 let months = months_int
1297 .add_checked(month_frac)?
1298 .try_into()
1299 .map_err(|_| {
1300 ArrowError::ParseError(format!(
1301 "Unable to represent {} decades as months in a signed 32-bit integer",
1302 &amount.integer
1303 ))
1304 })?;
1305
1306 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1307 }
1308 IntervalUnit::Year => {
1309 let months_int = amount.integer.mul_checked(12)?;
1310 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
1311 let months = months_int
1312 .add_checked(month_frac)?
1313 .try_into()
1314 .map_err(|_| {
1315 ArrowError::ParseError(format!(
1316 "Unable to represent {} years as months in a signed 32-bit integer",
1317 &amount.integer
1318 ))
1319 })?;
1320
1321 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1322 }
1323 IntervalUnit::Month => {
1324 let months = amount.integer.try_into().map_err(|_| {
1325 ArrowError::ParseError(format!(
1326 "Unable to represent {} months in a signed 32-bit integer",
1327 &amount.integer
1328 ))
1329 })?;
1330
1331 let days = amount.frac * 3 / 10_i64.pow(INTERVAL_PRECISION - 1);
1332 let days = days.try_into().map_err(|_| {
1333 ArrowError::ParseError(format!(
1334 "Unable to represent {} months as days in a signed 32-bit integer",
1335 amount.frac / 10_i64.pow(INTERVAL_PRECISION)
1336 ))
1337 })?;
1338
1339 Self::new(
1340 self.months.add_checked(months)?,
1341 self.days.add_checked(days)?,
1342 self.nanos,
1343 )
1344 }
1345 IntervalUnit::Week => {
1346 let days = amount.integer.mul_checked(7)?.try_into().map_err(|_| {
1347 ArrowError::ParseError(format!(
1348 "Unable to represent {} weeks as days in a signed 32-bit integer",
1349 &amount.integer
1350 ))
1351 })?;
1352
1353 let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1354
1355 Self::new(
1356 self.months,
1357 self.days.add_checked(days)?,
1358 self.nanos.add_checked(nanos)?,
1359 )
1360 }
1361 IntervalUnit::Day => {
1362 let days = amount.integer.try_into().map_err(|_| {
1363 ArrowError::InvalidArgumentError(format!(
1364 "Unable to represent {} days in a signed 32-bit integer",
1365 amount.integer
1366 ))
1367 })?;
1368
1369 let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1370
1371 Self::new(
1372 self.months,
1373 self.days.add_checked(days)?,
1374 self.nanos.add_checked(nanos)?,
1375 )
1376 }
1377 IntervalUnit::Hour => {
1378 let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
1379 let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1380 let nanos = nanos_int.add_checked(nanos_frac)?;
1381
1382 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1383 }
1384 IntervalUnit::Minute => {
1385 let nanos_int = amount.integer.mul_checked(NANOS_PER_MINUTE)?;
1386 let nanos_frac = amount.frac * 6 / 10_i64.pow(INTERVAL_PRECISION - 10);
1387
1388 let nanos = nanos_int.add_checked(nanos_frac)?;
1389
1390 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1391 }
1392 IntervalUnit::Second => {
1393 let nanos_int = amount.integer.mul_checked(NANOS_PER_SECOND)?;
1394 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 9);
1395 let nanos = nanos_int.add_checked(nanos_frac)?;
1396
1397 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1398 }
1399 IntervalUnit::Millisecond => {
1400 let nanos_int = amount.integer.mul_checked(NANOS_PER_MILLIS)?;
1401 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 6);
1402 let nanos = nanos_int.add_checked(nanos_frac)?;
1403
1404 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1405 }
1406 IntervalUnit::Microsecond => {
1407 let nanos_int = amount.integer.mul_checked(1_000)?;
1408 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 3);
1409 let nanos = nanos_int.add_checked(nanos_frac)?;
1410
1411 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1412 }
1413 IntervalUnit::Nanosecond => {
1414 let nanos_int = amount.integer;
1415 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION);
1416 let nanos = nanos_int.add_checked(nanos_frac)?;
1417
1418 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1419 }
1420 };
1421
1422 Ok(result)
1423 }
1424}
1425
1426fn parse_interval_components(
1428 value: &str,
1429 config: &IntervalParseConfig,
1430) -> Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> {
1431 let raw_pairs = split_interval_components(value);
1432
1433 let Ok(pairs): Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> = raw_pairs
1435 .iter()
1436 .map(|(a, u)| Ok((a.parse()?, IntervalUnit::from_str_or_config(*u, config)?)))
1437 .collect()
1438 else {
1439 return Err(ArrowError::ParseError(format!(
1440 "Invalid input syntax for type interval: {value:?}"
1441 )));
1442 };
1443
1444 let (amounts, units): (Vec<_>, Vec<_>) = pairs.into_iter().unzip();
1446
1447 let mut observed_interval_types = 0;
1449 for (unit, (_, raw_unit)) in units.iter().zip(raw_pairs) {
1450 if observed_interval_types & (*unit as u16) != 0 {
1451 return Err(ArrowError::ParseError(format!(
1452 "Invalid input syntax for type interval: {:?}. Repeated type '{}'",
1453 value,
1454 raw_unit.unwrap_or_default(),
1455 )));
1456 }
1457
1458 observed_interval_types |= *unit as u16;
1459 }
1460
1461 let result = amounts.iter().copied().zip(units.iter().copied());
1462
1463 Ok(result.collect::<Vec<_>>())
1464}
1465
1466fn split_interval_components(value: &str) -> Vec<(&str, Option<&str>)> {
1472 let mut result = vec![];
1473 let mut words = value.split(char::is_whitespace);
1474 while let Some(word) = words.next() {
1475 if let Some(split_word_at) = word.find(not_interval_amount) {
1476 let (amount, unit) = word.split_at(split_word_at);
1477 result.push((amount, Some(unit)));
1478 } else if let Some(unit) = words.next() {
1479 result.push((word, Some(unit)));
1480 } else {
1481 result.push((word, None));
1482 break;
1483 }
1484 }
1485 result
1486}
1487
1488fn not_interval_amount(c: char) -> bool {
1490 !c.is_ascii_digit() && c != '.' && c != '-'
1491}
1492
1493#[cfg(test)]
1494mod tests {
1495 use super::*;
1496 use arrow_array::temporal_conversions::date32_to_datetime;
1497 use arrow_buffer::i256;
1498
1499 #[test]
1500 fn test_parse_nanos() {
1501 assert_eq!(parse_nanos::<3, 0>(&[1, 2, 3]), 123_000_000);
1502 assert_eq!(parse_nanos::<5, 0>(&[1, 2, 3, 4, 5]), 123_450_000);
1503 assert_eq!(parse_nanos::<6, b'0'>(b"123456"), 123_456_000);
1504 }
1505
1506 #[test]
1507 fn string_to_timestamp_timezone() {
1508 assert_eq!(
1510 1599572549190855000,
1511 parse_timestamp("2020-09-08T13:42:29.190855+00:00").unwrap()
1512 );
1513 assert_eq!(
1514 1599572549190855000,
1515 parse_timestamp("2020-09-08T13:42:29.190855Z").unwrap()
1516 );
1517 assert_eq!(
1518 1599572549000000000,
1519 parse_timestamp("2020-09-08T13:42:29Z").unwrap()
1520 ); assert_eq!(
1522 1599590549190855000,
1523 parse_timestamp("2020-09-08T13:42:29.190855-05:00").unwrap()
1524 );
1525 }
1526
1527 #[test]
1528 fn string_to_timestamp_timezone_space() {
1529 assert_eq!(
1531 1599572549190855000,
1532 parse_timestamp("2020-09-08 13:42:29.190855+00:00").unwrap()
1533 );
1534 assert_eq!(
1535 1599572549190855000,
1536 parse_timestamp("2020-09-08 13:42:29.190855Z").unwrap()
1537 );
1538 assert_eq!(
1539 1599572549000000000,
1540 parse_timestamp("2020-09-08 13:42:29Z").unwrap()
1541 ); assert_eq!(
1543 1599590549190855000,
1544 parse_timestamp("2020-09-08 13:42:29.190855-05:00").unwrap()
1545 );
1546 }
1547
1548 #[test]
1549 #[cfg_attr(miri, ignore)] fn string_to_timestamp_no_timezone() {
1551 let naive_datetime = NaiveDateTime::new(
1555 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1556 NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1557 );
1558
1559 assert_eq!(
1561 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1562 parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1563 );
1564
1565 assert_eq!(
1566 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1567 parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1568 );
1569
1570 let datetime_whole_secs = NaiveDateTime::new(
1573 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1574 NaiveTime::from_hms_opt(13, 42, 29).unwrap(),
1575 )
1576 .and_utc();
1577
1578 assert_eq!(
1580 datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1581 parse_timestamp("2020-09-08T13:42:29").unwrap()
1582 );
1583
1584 assert_eq!(
1585 datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1586 parse_timestamp("2020-09-08 13:42:29").unwrap()
1587 );
1588
1589 let datetime_no_time = NaiveDateTime::new(
1593 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1594 NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
1595 )
1596 .and_utc();
1597
1598 assert_eq!(
1599 datetime_no_time.timestamp_nanos_opt().unwrap(),
1600 parse_timestamp("2020-09-08").unwrap()
1601 )
1602 }
1603
1604 #[test]
1605 fn string_to_timestamp_chrono() {
1606 let cases = [
1607 "2020-09-08T13:42:29Z",
1608 "1969-01-01T00:00:00.1Z",
1609 "2020-09-08T12:00:12.12345678+00:00",
1610 "2020-09-08T12:00:12+00:00",
1611 "2020-09-08T12:00:12.1+00:00",
1612 "2020-09-08T12:00:12.12+00:00",
1613 "2020-09-08T12:00:12.123+00:00",
1614 "2020-09-08T12:00:12.1234+00:00",
1615 "2020-09-08T12:00:12.12345+00:00",
1616 "2020-09-08T12:00:12.123456+00:00",
1617 "2020-09-08T12:00:12.1234567+00:00",
1618 "2020-09-08T12:00:12.12345678+00:00",
1619 "2020-09-08T12:00:12.123456789+00:00",
1620 "2020-09-08T12:00:12.12345678912z",
1621 "2020-09-08T12:00:12.123456789123Z",
1622 "2020-09-08T12:00:12.123456789123+02:00",
1623 "2020-09-08T12:00:12.12345678912345Z",
1624 "2020-09-08T12:00:12.1234567891234567+02:00",
1625 "2020-09-08T12:00:60Z",
1626 "2020-09-08T12:00:60.123Z",
1627 "2020-09-08T12:00:60.123456+02:00",
1628 "2020-09-08T12:00:60.1234567891234567+02:00",
1629 "2020-09-08T12:00:60.999999999+02:00",
1630 "2020-09-08t12:00:12.12345678+00:00",
1631 "2020-09-08t12:00:12+00:00",
1632 "2020-09-08t12:00:12Z",
1633 ];
1634
1635 for case in cases {
1636 let chrono = DateTime::parse_from_rfc3339(case).unwrap();
1637 let chrono_utc = chrono.with_timezone(&Utc);
1638
1639 let custom = string_to_datetime(&Utc, case).unwrap();
1640 assert_eq!(chrono_utc, custom)
1641 }
1642 }
1643
1644 #[test]
1645 fn string_to_timestamp_naive() {
1646 let cases = [
1647 "2018-11-13T17:11:10.011375885995",
1648 "2030-12-04T17:11:10.123",
1649 "2030-12-04T17:11:10.1234",
1650 "2030-12-04T17:11:10.123456",
1651 ];
1652 for case in cases {
1653 let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
1654 let custom = string_to_datetime(&Utc, case).unwrap();
1655 assert_eq!(chrono, custom.naive_utc())
1656 }
1657 }
1658
1659 #[test]
1660 fn string_to_timestamp_invalid() {
1661 let cases = [
1663 ("", "timestamp must contain at least 10 characters"),
1664 ("SS", "timestamp must contain at least 10 characters"),
1665 ("Wed, 18 Feb 2015 23:16:09 GMT", "error parsing date"),
1666 ("1997-01-31H09:26:56.123Z", "invalid timestamp separator"),
1667 ("1997-01-31 09:26:56.123Z", "error parsing time"),
1668 ("1997:01:31T09:26:56.123Z", "error parsing date"),
1669 ("1997:1:31T09:26:56.123Z", "error parsing date"),
1670 ("1997-01-32T09:26:56.123Z", "error parsing date"),
1671 ("1997-13-32T09:26:56.123Z", "error parsing date"),
1672 ("1997-02-29T09:26:56.123Z", "error parsing date"),
1673 ("2015-02-30T17:35:20-08:00", "error parsing date"),
1674 ("1997-01-10T9:26:56.123Z", "error parsing time"),
1675 ("2015-01-20T25:35:20-08:00", "error parsing time"),
1676 ("1997-01-10T09:61:56.123Z", "error parsing time"),
1677 ("1997-01-10T09:61:90.123Z", "error parsing time"),
1678 ("1997-01-10T12:00:6.123Z", "error parsing time"),
1679 ("1997-01-31T092656.123Z", "error parsing time"),
1680 ("1997-01-10T12:00:06.", "error parsing time"),
1681 ("1997-01-10T12:00:06. ", "error parsing time"),
1682 ];
1683
1684 for (s, ctx) in cases {
1685 let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
1686 let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
1687 assert_eq!(actual, expected)
1688 }
1689 }
1690
1691 fn parse_timestamp(s: &str) -> Result<i64, ArrowError> {
1693 let result = string_to_timestamp_nanos(s);
1694 if let Err(e) = &result {
1695 eprintln!("Error parsing timestamp '{s}': {e:?}");
1696 }
1697 result
1698 }
1699
1700 #[test]
1701 fn string_without_timezone_to_timestamp() {
1702 let naive_datetime = NaiveDateTime::new(
1705 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1706 NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1707 );
1708
1709 assert_eq!(
1711 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1712 parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1713 );
1714
1715 assert_eq!(
1716 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1717 parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1718 );
1719
1720 let naive_datetime = NaiveDateTime::new(
1721 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1722 NaiveTime::from_hms_nano_opt(13, 42, 29, 0).unwrap(),
1723 );
1724
1725 assert_eq!(
1727 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1728 parse_timestamp("2020-09-08T13:42:29").unwrap()
1729 );
1730
1731 assert_eq!(
1732 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1733 parse_timestamp("2020-09-08 13:42:29").unwrap()
1734 );
1735
1736 let tz: Tz = "+02:00".parse().unwrap();
1737 let date = string_to_datetime(&tz, "2020-09-08 13:42:29").unwrap();
1738 let utc = date.naive_utc().to_string();
1739 assert_eq!(utc, "2020-09-08 11:42:29");
1740 let local = date.naive_local().to_string();
1741 assert_eq!(local, "2020-09-08 13:42:29");
1742
1743 let date = string_to_datetime(&tz, "2020-09-08 13:42:29Z").unwrap();
1744 let utc = date.naive_utc().to_string();
1745 assert_eq!(utc, "2020-09-08 13:42:29");
1746 let local = date.naive_local().to_string();
1747 assert_eq!(local, "2020-09-08 15:42:29");
1748
1749 let dt =
1750 NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
1751 let local: Tz = "+08:00".parse().unwrap();
1752
1753 let date = string_to_datetime(&local, "2020-09-08T13:42:29Z").unwrap();
1755 assert_eq!(dt, date.naive_utc());
1756 assert_ne!(dt, date.naive_local());
1757
1758 let date = string_to_datetime(&local, "2020-09-08 13:42:29").unwrap();
1760 assert_eq!(dt, date.naive_local());
1761 assert_ne!(dt, date.naive_utc());
1762 }
1763
1764 #[test]
1765 fn parse_date32() {
1766 let cases = [
1767 "2020-09-08",
1768 "2020-9-8",
1769 "2020-09-8",
1770 "2020-9-08",
1771 "2020-12-1",
1772 "1690-2-5",
1773 "2020-09-08 01:02:03",
1774 ];
1775 for case in cases {
1776 let v = date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap();
1777 let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d")
1778 .or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S"))
1779 .unwrap();
1780 assert_eq!(v.date(), expected);
1781 }
1782
1783 let err_cases = [
1784 "",
1785 "80-01-01",
1786 "342",
1787 "Foo",
1788 "2020-09-08-03",
1789 "2020--04-03",
1790 "2020--",
1791 "2020-09-08 01",
1792 "2020-09-08 01:02",
1793 "2020-09-08 01-02-03",
1794 "2020-9-8 01:02:03",
1795 "2020-09-08 1:2:3",
1796 ];
1797 for case in err_cases {
1798 assert_eq!(Date32Type::parse(case), None);
1799 }
1800 }
1801
1802 #[test]
1803 fn parse_time64_nanos() {
1804 assert_eq!(
1805 Time64NanosecondType::parse("02:10:01.1234567899999999"),
1806 Some(7_801_123_456_789)
1807 );
1808 assert_eq!(
1809 Time64NanosecondType::parse("02:10:01.1234567"),
1810 Some(7_801_123_456_700)
1811 );
1812 assert_eq!(
1813 Time64NanosecondType::parse("2:10:01.1234567"),
1814 Some(7_801_123_456_700)
1815 );
1816 assert_eq!(
1817 Time64NanosecondType::parse("12:10:01.123456789 AM"),
1818 Some(601_123_456_789)
1819 );
1820 assert_eq!(
1821 Time64NanosecondType::parse("12:10:01.123456789 am"),
1822 Some(601_123_456_789)
1823 );
1824 assert_eq!(
1825 Time64NanosecondType::parse("2:10:01.12345678 PM"),
1826 Some(51_001_123_456_780)
1827 );
1828 assert_eq!(
1829 Time64NanosecondType::parse("2:10:01.12345678 pm"),
1830 Some(51_001_123_456_780)
1831 );
1832 assert_eq!(
1833 Time64NanosecondType::parse("02:10:01"),
1834 Some(7_801_000_000_000)
1835 );
1836 assert_eq!(
1837 Time64NanosecondType::parse("2:10:01"),
1838 Some(7_801_000_000_000)
1839 );
1840 assert_eq!(
1841 Time64NanosecondType::parse("12:10:01 AM"),
1842 Some(601_000_000_000)
1843 );
1844 assert_eq!(
1845 Time64NanosecondType::parse("12:10:01 am"),
1846 Some(601_000_000_000)
1847 );
1848 assert_eq!(
1849 Time64NanosecondType::parse("2:10:01 PM"),
1850 Some(51_001_000_000_000)
1851 );
1852 assert_eq!(
1853 Time64NanosecondType::parse("2:10:01 pm"),
1854 Some(51_001_000_000_000)
1855 );
1856 assert_eq!(
1857 Time64NanosecondType::parse("02:10"),
1858 Some(7_800_000_000_000)
1859 );
1860 assert_eq!(Time64NanosecondType::parse("2:10"), Some(7_800_000_000_000));
1861 assert_eq!(
1862 Time64NanosecondType::parse("12:10 AM"),
1863 Some(600_000_000_000)
1864 );
1865 assert_eq!(
1866 Time64NanosecondType::parse("12:10 am"),
1867 Some(600_000_000_000)
1868 );
1869 assert_eq!(
1870 Time64NanosecondType::parse("2:10 PM"),
1871 Some(51_000_000_000_000)
1872 );
1873 assert_eq!(
1874 Time64NanosecondType::parse("2:10 pm"),
1875 Some(51_000_000_000_000)
1876 );
1877
1878 assert_eq!(Time64NanosecondType::parse("1"), Some(1));
1880
1881 assert_eq!(
1883 Time64NanosecondType::parse("23:59:60"),
1884 Some(86_400_000_000_000)
1885 );
1886
1887 assert_eq!(
1889 Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
1890 Some(7_801_123_456_700)
1891 );
1892 }
1893
1894 #[test]
1895 fn parse_time64_micros() {
1896 assert_eq!(
1898 Time64MicrosecondType::parse("02:10:01.1234"),
1899 Some(7_801_123_400)
1900 );
1901 assert_eq!(
1902 Time64MicrosecondType::parse("2:10:01.1234"),
1903 Some(7_801_123_400)
1904 );
1905 assert_eq!(
1906 Time64MicrosecondType::parse("12:10:01.123456 AM"),
1907 Some(601_123_456)
1908 );
1909 assert_eq!(
1910 Time64MicrosecondType::parse("12:10:01.123456 am"),
1911 Some(601_123_456)
1912 );
1913 assert_eq!(
1914 Time64MicrosecondType::parse("2:10:01.12345 PM"),
1915 Some(51_001_123_450)
1916 );
1917 assert_eq!(
1918 Time64MicrosecondType::parse("2:10:01.12345 pm"),
1919 Some(51_001_123_450)
1920 );
1921 assert_eq!(
1922 Time64MicrosecondType::parse("02:10:01"),
1923 Some(7_801_000_000)
1924 );
1925 assert_eq!(Time64MicrosecondType::parse("2:10:01"), Some(7_801_000_000));
1926 assert_eq!(
1927 Time64MicrosecondType::parse("12:10:01 AM"),
1928 Some(601_000_000)
1929 );
1930 assert_eq!(
1931 Time64MicrosecondType::parse("12:10:01 am"),
1932 Some(601_000_000)
1933 );
1934 assert_eq!(
1935 Time64MicrosecondType::parse("2:10:01 PM"),
1936 Some(51_001_000_000)
1937 );
1938 assert_eq!(
1939 Time64MicrosecondType::parse("2:10:01 pm"),
1940 Some(51_001_000_000)
1941 );
1942 assert_eq!(Time64MicrosecondType::parse("02:10"), Some(7_800_000_000));
1943 assert_eq!(Time64MicrosecondType::parse("2:10"), Some(7_800_000_000));
1944 assert_eq!(Time64MicrosecondType::parse("12:10 AM"), Some(600_000_000));
1945 assert_eq!(Time64MicrosecondType::parse("12:10 am"), Some(600_000_000));
1946 assert_eq!(
1947 Time64MicrosecondType::parse("2:10 PM"),
1948 Some(51_000_000_000)
1949 );
1950 assert_eq!(
1951 Time64MicrosecondType::parse("2:10 pm"),
1952 Some(51_000_000_000)
1953 );
1954
1955 assert_eq!(Time64MicrosecondType::parse("1"), Some(1));
1957
1958 assert_eq!(
1960 Time64MicrosecondType::parse("23:59:60"),
1961 Some(86_400_000_000)
1962 );
1963
1964 assert_eq!(
1966 Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
1967 Some(7_801_123_400)
1968 );
1969 }
1970
1971 #[test]
1972 fn parse_time32_millis() {
1973 assert_eq!(Time32MillisecondType::parse("02:10:01.1"), Some(7_801_100));
1975 assert_eq!(Time32MillisecondType::parse("2:10:01.1"), Some(7_801_100));
1976 assert_eq!(
1977 Time32MillisecondType::parse("12:10:01.123 AM"),
1978 Some(601_123)
1979 );
1980 assert_eq!(
1981 Time32MillisecondType::parse("12:10:01.123 am"),
1982 Some(601_123)
1983 );
1984 assert_eq!(
1985 Time32MillisecondType::parse("2:10:01.12 PM"),
1986 Some(51_001_120)
1987 );
1988 assert_eq!(
1989 Time32MillisecondType::parse("2:10:01.12 pm"),
1990 Some(51_001_120)
1991 );
1992 assert_eq!(Time32MillisecondType::parse("02:10:01"), Some(7_801_000));
1993 assert_eq!(Time32MillisecondType::parse("2:10:01"), Some(7_801_000));
1994 assert_eq!(Time32MillisecondType::parse("12:10:01 AM"), Some(601_000));
1995 assert_eq!(Time32MillisecondType::parse("12:10:01 am"), Some(601_000));
1996 assert_eq!(Time32MillisecondType::parse("2:10:01 PM"), Some(51_001_000));
1997 assert_eq!(Time32MillisecondType::parse("2:10:01 pm"), Some(51_001_000));
1998 assert_eq!(Time32MillisecondType::parse("02:10"), Some(7_800_000));
1999 assert_eq!(Time32MillisecondType::parse("2:10"), Some(7_800_000));
2000 assert_eq!(Time32MillisecondType::parse("12:10 AM"), Some(600_000));
2001 assert_eq!(Time32MillisecondType::parse("12:10 am"), Some(600_000));
2002 assert_eq!(Time32MillisecondType::parse("2:10 PM"), Some(51_000_000));
2003 assert_eq!(Time32MillisecondType::parse("2:10 pm"), Some(51_000_000));
2004
2005 assert_eq!(Time32MillisecondType::parse("1"), Some(1));
2007
2008 assert_eq!(Time32MillisecondType::parse("23:59:60"), Some(86_400_000));
2010
2011 assert_eq!(
2013 Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
2014 Some(7_801_100)
2015 );
2016 }
2017
2018 #[test]
2019 fn parse_time32_secs() {
2020 assert_eq!(Time32SecondType::parse("02:10:01.1"), Some(7_801));
2022 assert_eq!(Time32SecondType::parse("02:10:01"), Some(7_801));
2023 assert_eq!(Time32SecondType::parse("2:10:01"), Some(7_801));
2024 assert_eq!(Time32SecondType::parse("12:10:01 AM"), Some(601));
2025 assert_eq!(Time32SecondType::parse("12:10:01 am"), Some(601));
2026 assert_eq!(Time32SecondType::parse("2:10:01 PM"), Some(51_001));
2027 assert_eq!(Time32SecondType::parse("2:10:01 pm"), Some(51_001));
2028 assert_eq!(Time32SecondType::parse("02:10"), Some(7_800));
2029 assert_eq!(Time32SecondType::parse("2:10"), Some(7_800));
2030 assert_eq!(Time32SecondType::parse("12:10 AM"), Some(600));
2031 assert_eq!(Time32SecondType::parse("12:10 am"), Some(600));
2032 assert_eq!(Time32SecondType::parse("2:10 PM"), Some(51_000));
2033 assert_eq!(Time32SecondType::parse("2:10 pm"), Some(51_000));
2034
2035 assert_eq!(Time32SecondType::parse("1"), Some(1));
2037
2038 assert_eq!(Time32SecondType::parse("23:59:60"), Some(86400));
2040
2041 assert_eq!(
2043 Time32SecondType::parse_formatted("02 - 10 - 01", "%H - %M - %S"),
2044 Some(7_801)
2045 );
2046 }
2047
2048 #[test]
2049 fn test_string_to_time_invalid() {
2050 let cases = [
2051 "25:00",
2052 "9:00:",
2053 "009:00",
2054 "09:0:00",
2055 "25:00:00",
2056 "13:00 AM",
2057 "13:00 PM",
2058 "12:00. AM",
2059 "09:0:00",
2060 "09:01:0",
2061 "09:01:1",
2062 "9:1:0",
2063 "09:01:0",
2064 "1:00.123",
2065 "1:00:00.123f",
2066 " 9:00:00",
2067 ":09:00",
2068 "T9:00:00",
2069 "AM",
2070 ];
2071 for case in cases {
2072 assert!(string_to_time(case).is_none(), "{case}");
2073 }
2074 }
2075
2076 #[test]
2077 fn test_string_to_time_chrono() {
2078 let cases = [
2079 ("1:00", "%H:%M"),
2080 ("12:00", "%H:%M"),
2081 ("13:00", "%H:%M"),
2082 ("24:00", "%H:%M"),
2083 ("1:00:00", "%H:%M:%S"),
2084 ("12:00:30", "%H:%M:%S"),
2085 ("13:00:59", "%H:%M:%S"),
2086 ("24:00:60", "%H:%M:%S"),
2087 ("09:00:00", "%H:%M:%S%.f"),
2088 ("0:00:30.123456", "%H:%M:%S%.f"),
2089 ("0:00 AM", "%I:%M %P"),
2090 ("1:00 AM", "%I:%M %P"),
2091 ("12:00 AM", "%I:%M %P"),
2092 ("13:00 AM", "%I:%M %P"),
2093 ("0:00 PM", "%I:%M %P"),
2094 ("1:00 PM", "%I:%M %P"),
2095 ("12:00 PM", "%I:%M %P"),
2096 ("13:00 PM", "%I:%M %P"),
2097 ("1:00 pM", "%I:%M %P"),
2098 ("1:00 Pm", "%I:%M %P"),
2099 ("1:00 aM", "%I:%M %P"),
2100 ("1:00 Am", "%I:%M %P"),
2101 ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2102 ("1:00:30.123456789 PM", "%I:%M:%S%.f %P"),
2103 ("1:00:30.123456789123 PM", "%I:%M:%S%.f %P"),
2104 ("1:00:30.1234 PM", "%I:%M:%S%.f %P"),
2105 ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2106 ("1:00:30.123456789123456789 PM", "%I:%M:%S%.f %P"),
2107 ("1:00:30.12F456 PM", "%I:%M:%S%.f %P"),
2108 ];
2109 for (s, format) in cases {
2110 let chrono = NaiveTime::parse_from_str(s, format).ok();
2111 let custom = string_to_time(s);
2112 assert_eq!(chrono, custom, "{s}");
2113 }
2114 }
2115
2116 #[test]
2117 fn test_parse_interval() {
2118 let config = IntervalParseConfig::new(IntervalUnit::Month);
2119
2120 assert_eq!(
2121 Interval::new(1i32, 0i32, 0i64),
2122 Interval::parse("1 month", &config).unwrap(),
2123 );
2124
2125 assert_eq!(
2126 Interval::new(2i32, 0i32, 0i64),
2127 Interval::parse("2 month", &config).unwrap(),
2128 );
2129
2130 assert_eq!(
2131 Interval::new(-1i32, -18i32, -(NANOS_PER_DAY / 5)),
2132 Interval::parse("-1.5 months -3.2 days", &config).unwrap(),
2133 );
2134
2135 assert_eq!(
2136 Interval::new(0i32, 15i32, 0),
2137 Interval::parse("0.5 months", &config).unwrap(),
2138 );
2139
2140 assert_eq!(
2141 Interval::new(0i32, 15i32, 0),
2142 Interval::parse(".5 months", &config).unwrap(),
2143 );
2144
2145 assert_eq!(
2146 Interval::new(0i32, -15i32, 0),
2147 Interval::parse("-0.5 months", &config).unwrap(),
2148 );
2149
2150 assert_eq!(
2151 Interval::new(0i32, -15i32, 0),
2152 Interval::parse("-.5 months", &config).unwrap(),
2153 );
2154
2155 assert_eq!(
2156 Interval::new(2i32, 10i32, 9 * NANOS_PER_HOUR),
2157 Interval::parse("2.1 months 7.25 days 3 hours", &config).unwrap(),
2158 );
2159
2160 assert_eq!(
2161 Interval::parse("1 centurys 1 month", &config)
2162 .unwrap_err()
2163 .to_string(),
2164 r#"Parser error: Invalid input syntax for type interval: "1 centurys 1 month""#
2165 );
2166
2167 assert_eq!(
2168 Interval::new(37i32, 0i32, 0i64),
2169 Interval::parse("3 year 1 month", &config).unwrap(),
2170 );
2171
2172 assert_eq!(
2173 Interval::new(35i32, 0i32, 0i64),
2174 Interval::parse("3 year -1 month", &config).unwrap(),
2175 );
2176
2177 assert_eq!(
2178 Interval::new(-37i32, 0i32, 0i64),
2179 Interval::parse("-3 year -1 month", &config).unwrap(),
2180 );
2181
2182 assert_eq!(
2183 Interval::new(-35i32, 0i32, 0i64),
2184 Interval::parse("-3 year 1 month", &config).unwrap(),
2185 );
2186
2187 assert_eq!(
2188 Interval::new(0i32, 5i32, 0i64),
2189 Interval::parse("5 days", &config).unwrap(),
2190 );
2191
2192 assert_eq!(
2193 Interval::new(0i32, 7i32, 3 * NANOS_PER_HOUR),
2194 Interval::parse("7 days 3 hours", &config).unwrap(),
2195 );
2196
2197 assert_eq!(
2198 Interval::new(0i32, 7i32, 5 * NANOS_PER_MINUTE),
2199 Interval::parse("7 days 5 minutes", &config).unwrap(),
2200 );
2201
2202 assert_eq!(
2203 Interval::new(0i32, 7i32, -5 * NANOS_PER_MINUTE),
2204 Interval::parse("7 days -5 minutes", &config).unwrap(),
2205 );
2206
2207 assert_eq!(
2208 Interval::new(0i32, -7i32, 5 * NANOS_PER_HOUR),
2209 Interval::parse("-7 days 5 hours", &config).unwrap(),
2210 );
2211
2212 assert_eq!(
2213 Interval::new(
2214 0i32,
2215 -7i32,
2216 -5 * NANOS_PER_HOUR - 5 * NANOS_PER_MINUTE - 5 * NANOS_PER_SECOND
2217 ),
2218 Interval::parse("-7 days -5 hours -5 minutes -5 seconds", &config).unwrap(),
2219 );
2220
2221 assert_eq!(
2222 Interval::new(12i32, 0i32, 25 * NANOS_PER_MILLIS),
2223 Interval::parse("1 year 25 millisecond", &config).unwrap(),
2224 );
2225
2226 assert_eq!(
2227 Interval::new(
2228 12i32,
2229 1i32,
2230 (NANOS_PER_SECOND as f64 * 0.000000001_f64) as i64
2231 ),
2232 Interval::parse("1 year 1 day 0.000000001 seconds", &config).unwrap(),
2233 );
2234
2235 assert_eq!(
2236 Interval::new(12i32, 1i32, NANOS_PER_MILLIS / 10),
2237 Interval::parse("1 year 1 day 0.1 milliseconds", &config).unwrap(),
2238 );
2239
2240 assert_eq!(
2241 Interval::new(12i32, 1i32, 1000i64),
2242 Interval::parse("1 year 1 day 1 microsecond", &config).unwrap(),
2243 );
2244
2245 assert_eq!(
2246 Interval::new(12i32, 1i32, 1i64),
2247 Interval::parse("1 year 1 day 1 nanoseconds", &config).unwrap(),
2248 );
2249
2250 assert_eq!(
2251 Interval::new(1i32, 0i32, -NANOS_PER_SECOND),
2252 Interval::parse("1 month -1 second", &config).unwrap(),
2253 );
2254
2255 assert_eq!(
2256 Interval::new(
2257 -13i32,
2258 -8i32,
2259 -NANOS_PER_HOUR
2260 - NANOS_PER_MINUTE
2261 - NANOS_PER_SECOND
2262 - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2263 ),
2264 Interval::parse(
2265 "-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
2266 &config
2267 )
2268 .unwrap(),
2269 );
2270
2271 assert_eq!(
2273 Interval::new(1, 0, 0),
2274 Interval::parse("1", &config).unwrap()
2275 );
2276 assert_eq!(
2277 Interval::new(42, 0, 0),
2278 Interval::parse("42", &config).unwrap()
2279 );
2280 assert_eq!(
2281 Interval::new(0, 0, 42_000_000_000),
2282 Interval::parse("42", &IntervalParseConfig::new(IntervalUnit::Second)).unwrap()
2283 );
2284
2285 assert_eq!(
2287 Interval::new(1, 0, 0),
2288 Interval::parse("1 mon", &config).unwrap()
2289 );
2290 assert_eq!(
2291 Interval::new(1, 0, 0),
2292 Interval::parse("1 mons", &config).unwrap()
2293 );
2294 assert_eq!(
2295 Interval::new(0, 0, 1_000_000),
2296 Interval::parse("1 ms", &config).unwrap()
2297 );
2298 assert_eq!(
2299 Interval::new(0, 0, 1_000),
2300 Interval::parse("1 us", &config).unwrap()
2301 );
2302
2303 assert_eq!(
2305 Interval::new(0, 0, 1_000),
2306 Interval::parse("1us", &config).unwrap()
2307 );
2308 assert_eq!(
2309 Interval::new(0, 0, NANOS_PER_SECOND),
2310 Interval::parse("1s", &config).unwrap()
2311 );
2312 assert_eq!(
2313 Interval::new(1, 2, 10_864_000_000_000),
2314 Interval::parse("1mon 2days 3hr 1min 4sec", &config).unwrap()
2315 );
2316
2317 assert_eq!(
2318 Interval::new(
2319 -13i32,
2320 -8i32,
2321 -NANOS_PER_HOUR
2322 - NANOS_PER_MINUTE
2323 - NANOS_PER_SECOND
2324 - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2325 ),
2326 Interval::parse(
2327 "-1year -1month -1week -1day -1 hour -1 minute -1 second -1.11millisecond",
2328 &config
2329 )
2330 .unwrap(),
2331 );
2332
2333 assert_eq!(
2334 Interval::parse("1h s", &config).unwrap_err().to_string(),
2335 r#"Parser error: Invalid input syntax for type interval: "1h s""#
2336 );
2337
2338 assert_eq!(
2339 Interval::parse("1XX", &config).unwrap_err().to_string(),
2340 r#"Parser error: Invalid input syntax for type interval: "1XX""#
2341 );
2342 }
2343
2344 #[test]
2345 fn test_duplicate_interval_type() {
2346 let config = IntervalParseConfig::new(IntervalUnit::Month);
2347
2348 let err = Interval::parse("1 month 1 second 1 second", &config)
2349 .expect_err("parsing interval should have failed");
2350 assert_eq!(
2351 r#"ParseError("Invalid input syntax for type interval: \"1 month 1 second 1 second\". Repeated type 'second'")"#,
2352 format!("{err:?}")
2353 );
2354
2355 let err = Interval::parse("1 century 2 centuries", &config)
2357 .expect_err("parsing interval should have failed");
2358 assert_eq!(
2359 r#"ParseError("Invalid input syntax for type interval: \"1 century 2 centuries\". Repeated type 'centuries'")"#,
2360 format!("{err:?}")
2361 );
2362 }
2363
2364 #[test]
2365 fn test_interval_amount_parsing() {
2366 let result = IntervalAmount::from_str("123").unwrap();
2368 let expected = IntervalAmount::new(123, 0);
2369
2370 assert_eq!(result, expected);
2371
2372 let result = IntervalAmount::from_str("0.3").unwrap();
2374 let expected = IntervalAmount::new(0, 3 * 10_i64.pow(INTERVAL_PRECISION - 1));
2375
2376 assert_eq!(result, expected);
2377
2378 let result = IntervalAmount::from_str("-3.5").unwrap();
2380 let expected = IntervalAmount::new(-3, -5 * 10_i64.pow(INTERVAL_PRECISION - 1));
2381
2382 assert_eq!(result, expected);
2383
2384 let result = IntervalAmount::from_str("3.");
2386 assert!(result.is_err());
2387
2388 let result = IntervalAmount::from_str("3.-5");
2390 assert!(result.is_err());
2391 }
2392
2393 #[test]
2394 fn test_interval_precision() {
2395 let config = IntervalParseConfig::new(IntervalUnit::Month);
2396
2397 let result = Interval::parse("100000.1 days", &config).unwrap();
2398 let expected = Interval::new(0_i32, 100_000_i32, NANOS_PER_DAY / 10);
2399
2400 assert_eq!(result, expected);
2401 }
2402
2403 #[test]
2404 fn test_interval_addition() {
2405 let start = Interval::new(1, 2, 3);
2407 let expected = Interval::new(4921, 2, 3);
2408
2409 let result = start
2410 .add(
2411 IntervalAmount::new(4, 10_i64.pow(INTERVAL_PRECISION - 1)),
2412 IntervalUnit::Century,
2413 )
2414 .unwrap();
2415
2416 assert_eq!(result, expected);
2417
2418 let start = Interval::new(1, 2, 3);
2420 let expected = Interval::new(1231, 2, 3);
2421
2422 let result = start
2423 .add(
2424 IntervalAmount::new(10, 25 * 10_i64.pow(INTERVAL_PRECISION - 2)),
2425 IntervalUnit::Decade,
2426 )
2427 .unwrap();
2428
2429 assert_eq!(result, expected);
2430
2431 let start = Interval::new(1, 2, 3);
2433 let expected = Interval::new(364, 2, 3);
2434
2435 let result = start
2436 .add(
2437 IntervalAmount::new(30, 3 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2438 IntervalUnit::Year,
2439 )
2440 .unwrap();
2441
2442 assert_eq!(result, expected);
2443
2444 let start = Interval::new(1, 2, 3);
2446 let expected = Interval::new(2, 17, 3);
2447
2448 let result = start
2449 .add(
2450 IntervalAmount::new(1, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2451 IntervalUnit::Month,
2452 )
2453 .unwrap();
2454
2455 assert_eq!(result, expected);
2456
2457 let start = Interval::new(1, 25, 3);
2459 let expected = Interval::new(1, 11, 3);
2460
2461 let result = start
2462 .add(IntervalAmount::new(-2, 0), IntervalUnit::Week)
2463 .unwrap();
2464
2465 assert_eq!(result, expected);
2466
2467 let start = Interval::new(12, 15, 3);
2469 let expected = Interval::new(12, 17, 3 + 17_280 * NANOS_PER_SECOND);
2470
2471 let result = start
2472 .add(
2473 IntervalAmount::new(2, 2 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2474 IntervalUnit::Day,
2475 )
2476 .unwrap();
2477
2478 assert_eq!(result, expected);
2479
2480 let start = Interval::new(1, 2, 3);
2482 let expected = Interval::new(1, 2, 3 + 45_000 * NANOS_PER_SECOND);
2483
2484 let result = start
2485 .add(
2486 IntervalAmount::new(12, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2487 IntervalUnit::Hour,
2488 )
2489 .unwrap();
2490
2491 assert_eq!(result, expected);
2492
2493 let start = Interval::new(0, 0, -3);
2495 let expected = Interval::new(0, 0, -90_000_000_000 - 3);
2496
2497 let result = start
2498 .add(
2499 IntervalAmount::new(-1, -5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2500 IntervalUnit::Minute,
2501 )
2502 .unwrap();
2503
2504 assert_eq!(result, expected);
2505 }
2506
2507 #[test]
2508 fn string_to_timestamp_old() {
2509 parse_timestamp("1677-06-14T07:29:01.256")
2510 .map_err(|e| assert!(e.to_string().ends_with(ERR_NANOSECONDS_NOT_SUPPORTED)))
2511 .unwrap_err();
2512 }
2513
2514 #[test]
2515 fn test_parse_decimal_with_parameter() {
2516 let tests = [
2517 ("0", 0i128),
2518 ("123.123", 123123i128),
2519 ("123.1234", 123123i128),
2520 ("123.1", 123100i128),
2521 ("123", 123000i128),
2522 ("-123.123", -123123i128),
2523 ("-123.1234", -123123i128),
2524 ("-123.1", -123100i128),
2525 ("-123", -123000i128),
2526 ("0.0000123", 0i128),
2527 ("12.", 12000i128),
2528 ("-12.", -12000i128),
2529 ("00.1", 100i128),
2530 ("-00.1", -100i128),
2531 ("12345678912345678.1234", 12345678912345678123i128),
2532 ("-12345678912345678.1234", -12345678912345678123i128),
2533 ("99999999999999999.999", 99999999999999999999i128),
2534 ("-99999999999999999.999", -99999999999999999999i128),
2535 (".123", 123i128),
2536 ("-.123", -123i128),
2537 ("123.", 123000i128),
2538 ("-123.", -123000i128),
2539 ];
2540 for (s, i) in tests {
2541 let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2542 assert_eq!(i, result_128.unwrap());
2543 let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2544 assert_eq!(i256::from_i128(i), result_256.unwrap());
2545 }
2546
2547 let e_notation_tests = [
2548 ("1.23e3", "1230.0", 2),
2549 ("5.6714e+2", "567.14", 4),
2550 ("5.6714e-2", "0.056714", 4),
2551 ("5.6714e-2", "0.056714", 3),
2552 ("5.6741214125e2", "567.41214125", 4),
2553 ("8.91E4", "89100.0", 2),
2554 ("3.14E+5", "314000.0", 2),
2555 ("2.718e0", "2.718", 2),
2556 ("9.999999e-1", "0.9999999", 4),
2557 ("1.23e+3", "1230", 2),
2558 ("1.234559e+3", "1234.559", 2),
2559 ("1.00E-10", "0.0000000001", 11),
2560 ("1.23e-4", "0.000123", 2),
2561 ("9.876e7", "98760000.0", 2),
2562 ("5.432E+8", "543200000.0", 10),
2563 ("1.234567e9", "1234567000.0", 2),
2564 ("1.234567e2", "123.45670000", 2),
2565 ("4749.3e-5", "0.047493", 10),
2566 ("4749.3e+5", "474930000", 10),
2567 ("4749.3e-5", "0.047493", 1),
2568 ("4749.3e+5", "474930000", 1),
2569 ("0E-8", "0", 10),
2570 ("0E+6", "0", 10),
2571 ("1E-8", "0.00000001", 10),
2572 ("12E+6", "12000000", 10),
2573 ("12E-6", "0.000012", 10),
2574 ("0.1e-6", "0.0000001", 10),
2575 ("0.1e+6", "100000", 10),
2576 ("0.12e-6", "0.00000012", 10),
2577 ("0.12e+6", "120000", 10),
2578 ("000000000001e0", "000000000001", 3),
2579 ("000001.1034567002e0", "000001.1034567002", 3),
2580 ("1.234e16", "12340000000000000", 0),
2581 ("123.4e16", "1234000000000000000", 0),
2582 ];
2583 for (e, d, scale) in e_notation_tests {
2584 let result_128_e = parse_decimal::<Decimal128Type>(e, 20, scale);
2585 let result_128_d = parse_decimal::<Decimal128Type>(d, 20, scale);
2586 assert_eq!(result_128_e.unwrap(), result_128_d.unwrap());
2587 let result_256_e = parse_decimal::<Decimal256Type>(e, 20, scale);
2588 let result_256_d = parse_decimal::<Decimal256Type>(d, 20, scale);
2589 assert_eq!(result_256_e.unwrap(), result_256_d.unwrap());
2590 }
2591 let can_not_parse_tests = [
2592 "123,123",
2593 ".",
2594 "123.123.123",
2595 "",
2596 "+",
2597 "-",
2598 "e",
2599 "1.3e+e3",
2600 "5.6714ee-2",
2601 "4.11ee-+4",
2602 "4.11e++4",
2603 "1.1e.12",
2604 "1.23e+3.",
2605 "1.23e+3.1",
2606 ];
2607 for s in can_not_parse_tests {
2608 let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2609 assert_eq!(
2610 format!("Parser error: can't parse the string value {s} to decimal"),
2611 result_128.unwrap_err().to_string()
2612 );
2613 let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2614 assert_eq!(
2615 format!("Parser error: can't parse the string value {s} to decimal"),
2616 result_256.unwrap_err().to_string()
2617 );
2618 }
2619 let overflow_parse_tests = [
2620 ("12345678", 3),
2621 ("1.2345678e7", 3),
2622 ("12345678.9", 3),
2623 ("1.23456789e+7", 3),
2624 ("99999999.99", 3),
2625 ("9.999999999e7", 3),
2626 ("12345678908765.123456", 3),
2627 ("123456789087651234.56e-4", 3),
2628 ("1234560000000", 0),
2629 ("1.23456e12", 0),
2630 ];
2631 for (s, scale) in overflow_parse_tests {
2632 let result_128 = parse_decimal::<Decimal128Type>(s, 10, scale);
2633 let expected_128 = "Parser error: parse decimal overflow";
2634 let actual_128 = result_128.unwrap_err().to_string();
2635
2636 assert!(
2637 actual_128.contains(expected_128),
2638 "actual: '{actual_128}', expected: '{expected_128}'"
2639 );
2640
2641 let result_256 = parse_decimal::<Decimal256Type>(s, 10, scale);
2642 let expected_256 = "Parser error: parse decimal overflow";
2643 let actual_256 = result_256.unwrap_err().to_string();
2644
2645 assert!(
2646 actual_256.contains(expected_256),
2647 "actual: '{actual_256}', expected: '{expected_256}'"
2648 );
2649 }
2650
2651 let edge_tests_128 = [
2652 (
2653 "99999999999999999999999999999999999999",
2654 99999999999999999999999999999999999999i128,
2655 0,
2656 ),
2657 (
2658 "999999999999999999999999999999999999.99",
2659 99999999999999999999999999999999999999i128,
2660 2,
2661 ),
2662 (
2663 "9999999999999999999999999.9999999999999",
2664 99999999999999999999999999999999999999i128,
2665 13,
2666 ),
2667 (
2668 "9999999999999999999999999",
2669 99999999999999999999999990000000000000i128,
2670 13,
2671 ),
2672 (
2673 "0.99999999999999999999999999999999999999",
2674 99999999999999999999999999999999999999i128,
2675 38,
2676 ),
2677 (
2678 "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001016744",
2679 0i128,
2680 15,
2681 ),
2682 (
2683 "1.016744e-320",
2684 0i128,
2685 15,
2686 ),
2687 (
2688 "-1e3",
2689 -1000000000i128,
2690 6,
2691 ),
2692 (
2693 "+1e3",
2694 1000000000i128,
2695 6,
2696 ),
2697 (
2698 "-1e31",
2699 -10000000000000000000000000000000000000i128,
2700 6,
2701 ),
2702 ];
2703 for (s, i, scale) in edge_tests_128 {
2704 let result_128 = parse_decimal::<Decimal128Type>(s, 38, scale);
2705 assert_eq!(i, result_128.unwrap());
2706 }
2707 let edge_tests_256 = [
2708 (
2709 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2710 i256::from_string(
2711 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2712 )
2713 .unwrap(),
2714 0,
2715 ),
2716 (
2717 "999999999999999999999999999999999999999999999999999999999999999999999999.9999",
2718 i256::from_string(
2719 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2720 )
2721 .unwrap(),
2722 4,
2723 ),
2724 (
2725 "99999999999999999999999999999999999999999999999999.99999999999999999999999999",
2726 i256::from_string(
2727 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2728 )
2729 .unwrap(),
2730 26,
2731 ),
2732 (
2733 "9.999999999999999999999999999999999999999999999999999999999999999999999999999e49",
2734 i256::from_string(
2735 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2736 )
2737 .unwrap(),
2738 26,
2739 ),
2740 (
2741 "99999999999999999999999999999999999999999999999999",
2742 i256::from_string(
2743 "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2744 )
2745 .unwrap(),
2746 26,
2747 ),
2748 (
2749 "9.9999999999999999999999999999999999999999999999999e+49",
2750 i256::from_string(
2751 "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2752 )
2753 .unwrap(),
2754 26,
2755 ),
2756 ];
2757 for (s, i, scale) in edge_tests_256 {
2758 let result = parse_decimal::<Decimal256Type>(s, 76, scale);
2759 assert_eq!(i, result.unwrap());
2760 }
2761 }
2762
2763 #[test]
2764 fn test_parse_empty() {
2765 assert_eq!(Int32Type::parse(""), None);
2766 assert_eq!(Int64Type::parse(""), None);
2767 assert_eq!(UInt32Type::parse(""), None);
2768 assert_eq!(UInt64Type::parse(""), None);
2769 assert_eq!(Float32Type::parse(""), None);
2770 assert_eq!(Float64Type::parse(""), None);
2771 assert_eq!(Int32Type::parse("+"), None);
2772 assert_eq!(Int64Type::parse("+"), None);
2773 assert_eq!(UInt32Type::parse("+"), None);
2774 assert_eq!(UInt64Type::parse("+"), None);
2775 assert_eq!(Float32Type::parse("+"), None);
2776 assert_eq!(Float64Type::parse("+"), None);
2777 assert_eq!(TimestampNanosecondType::parse(""), None);
2778 assert_eq!(Date32Type::parse(""), None);
2779 }
2780
2781 #[test]
2782 fn test_parse_interval_month_day_nano_config() {
2783 let interval = parse_interval_month_day_nano_config(
2784 "1",
2785 IntervalParseConfig::new(IntervalUnit::Second),
2786 )
2787 .unwrap();
2788 assert_eq!(interval.months, 0);
2789 assert_eq!(interval.days, 0);
2790 assert_eq!(interval.nanoseconds, NANOS_PER_SECOND);
2791 }
2792}