arrow_cast/
parse.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`Parser`] implementations for converting strings to Arrow types
19//!
20//! Used by the CSV and JSON readers to convert strings to Arrow types
21use arrow_array::timezone::Tz;
22use arrow_array::types::*;
23use arrow_array::ArrowNativeTypeOp;
24use arrow_buffer::ArrowNativeType;
25use arrow_schema::ArrowError;
26use chrono::prelude::*;
27use half::f16;
28use std::str::FromStr;
29
30/// Parse nanoseconds from the first `N` values in digits, subtracting the offset `O`
31#[inline]
32fn parse_nanos<const N: usize, const O: u8>(digits: &[u8]) -> u32 {
33    digits[..N]
34        .iter()
35        .fold(0_u32, |acc, v| acc * 10 + v.wrapping_sub(O) as u32)
36        * 10_u32.pow((9 - N) as _)
37}
38
39/// Helper for parsing RFC3339 timestamps
40struct TimestampParser {
41    /// The timestamp bytes to parse minus `b'0'`
42    ///
43    /// This makes interpretation as an integer inexpensive
44    digits: [u8; 32],
45    /// A mask containing a `1` bit where the corresponding byte is a valid ASCII digit
46    mask: u32,
47}
48
49impl TimestampParser {
50    fn new(bytes: &[u8]) -> Self {
51        let mut digits = [0; 32];
52        let mut mask = 0;
53
54        // Treating all bytes the same way, helps LLVM vectorise this correctly
55        for (idx, (o, i)) in digits.iter_mut().zip(bytes).enumerate() {
56            *o = i.wrapping_sub(b'0');
57            mask |= ((*o < 10) as u32) << idx
58        }
59
60        Self { digits, mask }
61    }
62
63    /// Returns true if the byte at `idx` in the original string equals `b`
64    fn test(&self, idx: usize, b: u8) -> bool {
65        self.digits[idx] == b.wrapping_sub(b'0')
66    }
67
68    /// Parses a date of the form `1997-01-31`
69    fn date(&self) -> Option<NaiveDate> {
70        if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
71            return None;
72        }
73
74        let year = self.digits[0] as u16 * 1000
75            + self.digits[1] as u16 * 100
76            + self.digits[2] as u16 * 10
77            + self.digits[3] as u16;
78
79        let month = self.digits[5] * 10 + self.digits[6];
80        let day = self.digits[8] * 10 + self.digits[9];
81
82        NaiveDate::from_ymd_opt(year as _, month as _, day as _)
83    }
84
85    /// Parses a time of any of forms
86    /// - `09:26:56`
87    /// - `09:26:56.123`
88    /// - `09:26:56.123456`
89    /// - `09:26:56.123456789`
90    /// - `092656`
91    ///
92    /// Returning the end byte offset
93    fn time(&self) -> Option<(NaiveTime, usize)> {
94        // Make a NaiveTime handling leap seconds
95        let time = |hour, min, sec, nano| match sec {
96            60 => {
97                let nano = 1_000_000_000 + nano;
98                NaiveTime::from_hms_nano_opt(hour as _, min as _, 59, nano)
99            }
100            _ => NaiveTime::from_hms_nano_opt(hour as _, min as _, sec as _, nano),
101        };
102
103        match (self.mask >> 11) & 0b11111111 {
104            // 09:26:56
105            0b11011011 if self.test(13, b':') && self.test(16, b':') => {
106                let hour = self.digits[11] * 10 + self.digits[12];
107                let minute = self.digits[14] * 10 + self.digits[15];
108                let second = self.digits[17] * 10 + self.digits[18];
109
110                match self.test(19, b'.') {
111                    true => {
112                        let digits = (self.mask >> 20).trailing_ones();
113                        let nanos = match digits {
114                            0 => return None,
115                            1 => parse_nanos::<1, 0>(&self.digits[20..21]),
116                            2 => parse_nanos::<2, 0>(&self.digits[20..22]),
117                            3 => parse_nanos::<3, 0>(&self.digits[20..23]),
118                            4 => parse_nanos::<4, 0>(&self.digits[20..24]),
119                            5 => parse_nanos::<5, 0>(&self.digits[20..25]),
120                            6 => parse_nanos::<6, 0>(&self.digits[20..26]),
121                            7 => parse_nanos::<7, 0>(&self.digits[20..27]),
122                            8 => parse_nanos::<8, 0>(&self.digits[20..28]),
123                            _ => parse_nanos::<9, 0>(&self.digits[20..29]),
124                        };
125                        Some((time(hour, minute, second, nanos)?, 20 + digits as usize))
126                    }
127                    false => Some((time(hour, minute, second, 0)?, 19)),
128                }
129            }
130            // 092656
131            0b111111 => {
132                let hour = self.digits[11] * 10 + self.digits[12];
133                let minute = self.digits[13] * 10 + self.digits[14];
134                let second = self.digits[15] * 10 + self.digits[16];
135                let time = time(hour, minute, second, 0)?;
136                Some((time, 17))
137            }
138            _ => None,
139        }
140    }
141}
142
143/// Accepts a string and parses it relative to the provided `timezone`
144///
145/// In addition to RFC3339 / ISO8601 standard timestamps, it also
146/// accepts strings that use a space ` ` to separate the date and time
147/// as well as strings that have no explicit timezone offset.
148///
149/// Examples of accepted inputs:
150/// * `1997-01-31T09:26:56.123Z`        # RCF3339
151/// * `1997-01-31T09:26:56.123-05:00`   # RCF3339
152/// * `1997-01-31 09:26:56.123-05:00`   # close to RCF3339 but with a space rather than T
153/// * `2023-01-01 04:05:06.789 -08`     # close to RCF3339, no fractional seconds or time separator
154/// * `1997-01-31T09:26:56.123`         # close to RCF3339 but no timezone offset specified
155/// * `1997-01-31 09:26:56.123`         # close to RCF3339 but uses a space and no timezone offset
156/// * `1997-01-31 09:26:56`             # close to RCF3339, no fractional seconds
157/// * `1997-01-31 092656`               # close to RCF3339, no fractional seconds
158/// * `1997-01-31 092656+04:00`         # close to RCF3339, no fractional seconds or time separator
159/// * `1997-01-31`                      # close to RCF3339, only date no time
160///
161/// [IANA timezones] are only supported if the `arrow-array/chrono-tz` feature is enabled
162///
163/// * `2023-01-01 040506 America/Los_Angeles`
164///
165/// If a timestamp is ambiguous, for example as a result of daylight-savings time, an error
166/// will be returned
167///
168/// Some formats supported by PostgresSql <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-DATETIME-TIME-TABLE>
169/// are not supported, like
170///
171/// * "2023-01-01 04:05:06.789 +07:30:00",
172/// * "2023-01-01 040506 +07:30:00",
173/// * "2023-01-01 04:05:06.789 PST",
174///
175/// [IANA timezones]: https://www.iana.org/time-zones
176pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
177    let err =
178        |ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
179
180    let bytes = s.as_bytes();
181    if bytes.len() < 10 {
182        return Err(err("timestamp must contain at least 10 characters"));
183    }
184
185    let parser = TimestampParser::new(bytes);
186    let date = parser.date().ok_or_else(|| err("error parsing date"))?;
187    if bytes.len() == 10 {
188        let datetime = date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap());
189        return timezone
190            .from_local_datetime(&datetime)
191            .single()
192            .ok_or_else(|| err("error computing timezone offset"));
193    }
194
195    if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') {
196        return Err(err("invalid timestamp separator"));
197    }
198
199    let (time, mut tz_offset) = parser.time().ok_or_else(|| err("error parsing time"))?;
200    let datetime = date.and_time(time);
201
202    if tz_offset == 32 {
203        // Decimal overrun
204        while tz_offset < bytes.len() && bytes[tz_offset].is_ascii_digit() {
205            tz_offset += 1;
206        }
207    }
208
209    if bytes.len() <= tz_offset {
210        return timezone
211            .from_local_datetime(&datetime)
212            .single()
213            .ok_or_else(|| err("error computing timezone offset"));
214    }
215
216    if (bytes[tz_offset] == b'z' || bytes[tz_offset] == b'Z') && tz_offset == bytes.len() - 1 {
217        return Ok(timezone.from_utc_datetime(&datetime));
218    }
219
220    // Parse remainder of string as timezone
221    let parsed_tz: Tz = s[tz_offset..].trim_start().parse()?;
222    let parsed = parsed_tz
223        .from_local_datetime(&datetime)
224        .single()
225        .ok_or_else(|| err("error computing timezone offset"))?;
226
227    Ok(parsed.with_timezone(timezone))
228}
229
230/// Accepts a string in RFC3339 / ISO8601 standard format and some
231/// variants and converts it to a nanosecond precision timestamp.
232///
233/// See [`string_to_datetime`] for the full set of supported formats
234///
235/// Implements the `to_timestamp` function to convert a string to a
236/// timestamp, following the model of spark SQL’s to_`timestamp`.
237///
238/// Internally, this function uses the `chrono` library for the
239/// datetime parsing
240///
241/// We hope to extend this function in the future with a second
242/// parameter to specifying the format string.
243///
244/// ## Timestamp Precision
245///
246/// Function uses the maximum precision timestamps supported by
247/// Arrow (nanoseconds stored as a 64-bit integer) timestamps. This
248/// means the range of dates that timestamps can represent is ~1677 AD
249/// to 2262 AM
250///
251/// ## Timezone / Offset Handling
252///
253/// Numerical values of timestamps are stored compared to offset UTC.
254///
255/// This function interprets string without an explicit time zone as timestamps
256/// relative to UTC, see [`string_to_datetime`] for alternative semantics
257///
258/// In particular:
259///
260/// ```
261/// # use arrow_cast::parse::string_to_timestamp_nanos;
262/// // Note all three of these timestamps are parsed as the same value
263/// let a = string_to_timestamp_nanos("1997-01-31 09:26:56.123Z").unwrap();
264/// let b = string_to_timestamp_nanos("1997-01-31T09:26:56.123").unwrap();
265/// let c = string_to_timestamp_nanos("1997-01-31T14:26:56.123+05:00").unwrap();
266///
267/// assert_eq!(a, b);
268/// assert_eq!(b, c);
269/// ```
270///
271#[inline]
272pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
273    to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc())
274}
275
276/// Fallible conversion of [`NaiveDateTime`] to `i64` nanoseconds
277#[inline]
278fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
279    dt.and_utc()
280        .timestamp_nanos_opt()
281        .ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string()))
282}
283
284/// Accepts a string in ISO8601 standard format and some
285/// variants and converts it to nanoseconds since midnight.
286///
287/// Examples of accepted inputs:
288///
289/// * `09:26:56.123 AM`
290/// * `23:59:59`
291/// * `6:00 pm`
292///
293/// Internally, this function uses the `chrono` library for the time parsing
294///
295/// ## Timezone / Offset Handling
296///
297/// This function does not support parsing strings with a timezone
298/// or offset specified, as it considers only time since midnight.
299pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
300    let nt = string_to_time(s)
301        .ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
302    Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
303}
304
305fn string_to_time(s: &str) -> Option<NaiveTime> {
306    let bytes = s.as_bytes();
307    if bytes.len() < 4 {
308        return None;
309    }
310
311    let (am, bytes) = match bytes.get(bytes.len() - 3..) {
312        Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
313        Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
314        _ => (None, bytes),
315    };
316
317    if bytes.len() < 4 {
318        return None;
319    }
320
321    let mut digits = [b'0'; 6];
322
323    // Extract hour
324    let bytes = match (bytes[1], bytes[2]) {
325        (b':', _) => {
326            digits[1] = bytes[0];
327            &bytes[2..]
328        }
329        (_, b':') => {
330            digits[0] = bytes[0];
331            digits[1] = bytes[1];
332            &bytes[3..]
333        }
334        _ => return None,
335    };
336
337    if bytes.len() < 2 {
338        return None; // Minutes required
339    }
340
341    // Extract minutes
342    digits[2] = bytes[0];
343    digits[3] = bytes[1];
344
345    let nanoseconds = match bytes.get(2) {
346        Some(b':') => {
347            if bytes.len() < 5 {
348                return None;
349            }
350
351            // Extract seconds
352            digits[4] = bytes[3];
353            digits[5] = bytes[4];
354
355            // Extract sub-seconds if any
356            match bytes.get(5) {
357                Some(b'.') => {
358                    let decimal = &bytes[6..];
359                    if decimal.iter().any(|x| !x.is_ascii_digit()) {
360                        return None;
361                    }
362                    match decimal.len() {
363                        0 => return None,
364                        1 => parse_nanos::<1, b'0'>(decimal),
365                        2 => parse_nanos::<2, b'0'>(decimal),
366                        3 => parse_nanos::<3, b'0'>(decimal),
367                        4 => parse_nanos::<4, b'0'>(decimal),
368                        5 => parse_nanos::<5, b'0'>(decimal),
369                        6 => parse_nanos::<6, b'0'>(decimal),
370                        7 => parse_nanos::<7, b'0'>(decimal),
371                        8 => parse_nanos::<8, b'0'>(decimal),
372                        _ => parse_nanos::<9, b'0'>(decimal),
373                    }
374                }
375                Some(_) => return None,
376                None => 0,
377            }
378        }
379        Some(_) => return None,
380        None => 0,
381    };
382
383    digits.iter_mut().for_each(|x| *x = x.wrapping_sub(b'0'));
384    if digits.iter().any(|x| *x > 9) {
385        return None;
386    }
387
388    let hour = match (digits[0] * 10 + digits[1], am) {
389        (12, Some(true)) => 0,               // 12:00 AM -> 00:00
390        (h @ 1..=11, Some(true)) => h,       // 1:00 AM -> 01:00
391        (12, Some(false)) => 12,             // 12:00 PM -> 12:00
392        (h @ 1..=11, Some(false)) => h + 12, // 1:00 PM -> 13:00
393        (_, Some(_)) => return None,
394        (h, None) => h,
395    };
396
397    // Handle leap second
398    let (second, nanoseconds) = match digits[4] * 10 + digits[5] {
399        60 => (59, nanoseconds + 1_000_000_000),
400        s => (s, nanoseconds),
401    };
402
403    NaiveTime::from_hms_nano_opt(
404        hour as _,
405        (digits[2] * 10 + digits[3]) as _,
406        second as _,
407        nanoseconds,
408    )
409}
410
411/// Specialized parsing implementations to convert strings to Arrow types.
412///
413/// This is used by csv and json reader and can be used directly as well.
414///
415/// # Example
416///
417/// To parse a string to a [`Date32Type`]:
418///
419/// ```
420/// use arrow_cast::parse::Parser;
421/// use arrow_array::types::Date32Type;
422/// let date = Date32Type::parse("2021-01-01").unwrap();
423/// assert_eq!(date, 18628);
424/// ```
425///
426/// To parse a string to a [`TimestampNanosecondType`]:
427///
428/// ```
429/// use arrow_cast::parse::Parser;
430/// use arrow_array::types::TimestampNanosecondType;
431/// let ts = TimestampNanosecondType::parse("2021-01-01T00:00:00.123456789Z").unwrap();
432/// assert_eq!(ts, 1609459200123456789);
433/// ```
434pub trait Parser: ArrowPrimitiveType {
435    /// Parse a string to the native type
436    fn parse(string: &str) -> Option<Self::Native>;
437
438    /// Parse a string to the native type with a format string
439    ///
440    /// When not implemented, the format string is unused, and this method is equivalent to [parse](#tymethod.parse)
441    fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
442        Self::parse(string)
443    }
444}
445
446impl Parser for Float16Type {
447    fn parse(string: &str) -> Option<f16> {
448        lexical_core::parse(string.as_bytes())
449            .ok()
450            .map(f16::from_f32)
451    }
452}
453
454impl Parser for Float32Type {
455    fn parse(string: &str) -> Option<f32> {
456        lexical_core::parse(string.as_bytes()).ok()
457    }
458}
459
460impl Parser for Float64Type {
461    fn parse(string: &str) -> Option<f64> {
462        lexical_core::parse(string.as_bytes()).ok()
463    }
464}
465
466/// This API is only stable since 1.70 so can't use it when current MSRV is lower
467#[inline(always)]
468fn is_some_and<T>(opt: Option<T>, f: impl FnOnce(T) -> bool) -> bool {
469    match opt {
470        None => false,
471        Some(x) => f(x),
472    }
473}
474
475macro_rules! parser_primitive {
476    ($t:ty) => {
477        impl Parser for $t {
478            fn parse(string: &str) -> Option<Self::Native> {
479                if !is_some_and(string.as_bytes().last(), |x| x.is_ascii_digit()) {
480                    return None;
481                }
482                match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
483                    string.as_bytes(),
484                ) {
485                    (Some(n), x) if x == string.len() => Some(n),
486                    _ => None,
487                }
488            }
489        }
490    };
491}
492parser_primitive!(UInt64Type);
493parser_primitive!(UInt32Type);
494parser_primitive!(UInt16Type);
495parser_primitive!(UInt8Type);
496parser_primitive!(Int64Type);
497parser_primitive!(Int32Type);
498parser_primitive!(Int16Type);
499parser_primitive!(Int8Type);
500parser_primitive!(DurationNanosecondType);
501parser_primitive!(DurationMicrosecondType);
502parser_primitive!(DurationMillisecondType);
503parser_primitive!(DurationSecondType);
504
505impl Parser for TimestampNanosecondType {
506    fn parse(string: &str) -> Option<i64> {
507        string_to_timestamp_nanos(string).ok()
508    }
509}
510
511impl Parser for TimestampMicrosecondType {
512    fn parse(string: &str) -> Option<i64> {
513        let nanos = string_to_timestamp_nanos(string).ok();
514        nanos.map(|x| x / 1000)
515    }
516}
517
518impl Parser for TimestampMillisecondType {
519    fn parse(string: &str) -> Option<i64> {
520        let nanos = string_to_timestamp_nanos(string).ok();
521        nanos.map(|x| x / 1_000_000)
522    }
523}
524
525impl Parser for TimestampSecondType {
526    fn parse(string: &str) -> Option<i64> {
527        let nanos = string_to_timestamp_nanos(string).ok();
528        nanos.map(|x| x / 1_000_000_000)
529    }
530}
531
532impl Parser for Time64NanosecondType {
533    // Will truncate any fractions of a nanosecond
534    fn parse(string: &str) -> Option<Self::Native> {
535        string_to_time_nanoseconds(string)
536            .ok()
537            .or_else(|| string.parse::<Self::Native>().ok())
538    }
539
540    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
541        let nt = NaiveTime::parse_from_str(string, format).ok()?;
542        Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
543    }
544}
545
546impl Parser for Time64MicrosecondType {
547    // Will truncate any fractions of a microsecond
548    fn parse(string: &str) -> Option<Self::Native> {
549        string_to_time_nanoseconds(string)
550            .ok()
551            .map(|nanos| nanos / 1_000)
552            .or_else(|| string.parse::<Self::Native>().ok())
553    }
554
555    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
556        let nt = NaiveTime::parse_from_str(string, format).ok()?;
557        Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
558    }
559}
560
561impl Parser for Time32MillisecondType {
562    // Will truncate any fractions of a millisecond
563    fn parse(string: &str) -> Option<Self::Native> {
564        string_to_time_nanoseconds(string)
565            .ok()
566            .map(|nanos| (nanos / 1_000_000) as i32)
567            .or_else(|| string.parse::<Self::Native>().ok())
568    }
569
570    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
571        let nt = NaiveTime::parse_from_str(string, format).ok()?;
572        Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
573    }
574}
575
576impl Parser for Time32SecondType {
577    // Will truncate any fractions of a second
578    fn parse(string: &str) -> Option<Self::Native> {
579        string_to_time_nanoseconds(string)
580            .ok()
581            .map(|nanos| (nanos / 1_000_000_000) as i32)
582            .or_else(|| string.parse::<Self::Native>().ok())
583    }
584
585    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
586        let nt = NaiveTime::parse_from_str(string, format).ok()?;
587        Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
588    }
589}
590
591/// Number of days between 0001-01-01 and 1970-01-01
592const EPOCH_DAYS_FROM_CE: i32 = 719_163;
593
594/// Error message if nanosecond conversion request beyond supported interval
595const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
596
597fn parse_date(string: &str) -> Option<NaiveDate> {
598    if string.len() > 10 {
599        // Try to parse as datetime and return just the date part
600        return string_to_datetime(&Utc, string)
601            .map(|dt| dt.date_naive())
602            .ok();
603    };
604    let mut digits = [0; 10];
605    let mut mask = 0;
606
607    // Treating all bytes the same way, helps LLVM vectorise this correctly
608    for (idx, (o, i)) in digits.iter_mut().zip(string.bytes()).enumerate() {
609        *o = i.wrapping_sub(b'0');
610        mask |= ((*o < 10) as u16) << idx
611    }
612
613    const HYPHEN: u8 = b'-'.wrapping_sub(b'0');
614
615    //  refer to https://www.rfc-editor.org/rfc/rfc3339#section-3
616    if digits[4] != HYPHEN {
617        let (year, month, day) = match (mask, string.len()) {
618            (0b11111111, 8) => (
619                digits[0] as u16 * 1000
620                    + digits[1] as u16 * 100
621                    + digits[2] as u16 * 10
622                    + digits[3] as u16,
623                digits[4] * 10 + digits[5],
624                digits[6] * 10 + digits[7],
625            ),
626            _ => return None,
627        };
628        return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
629    }
630
631    let (month, day) = match mask {
632        0b1101101111 => {
633            if digits[7] != HYPHEN {
634                return None;
635            }
636            (digits[5] * 10 + digits[6], digits[8] * 10 + digits[9])
637        }
638        0b101101111 => {
639            if digits[7] != HYPHEN {
640                return None;
641            }
642            (digits[5] * 10 + digits[6], digits[8])
643        }
644        0b110101111 => {
645            if digits[6] != HYPHEN {
646                return None;
647            }
648            (digits[5], digits[7] * 10 + digits[8])
649        }
650        0b10101111 => {
651            if digits[6] != HYPHEN {
652                return None;
653            }
654            (digits[5], digits[7])
655        }
656        _ => return None,
657    };
658
659    let year =
660        digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
661
662    NaiveDate::from_ymd_opt(year as _, month as _, day as _)
663}
664
665impl Parser for Date32Type {
666    fn parse(string: &str) -> Option<i32> {
667        let date = parse_date(string)?;
668        Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
669    }
670
671    fn parse_formatted(string: &str, format: &str) -> Option<i32> {
672        let date = NaiveDate::parse_from_str(string, format).ok()?;
673        Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
674    }
675}
676
677impl Parser for Date64Type {
678    fn parse(string: &str) -> Option<i64> {
679        if string.len() <= 10 {
680            let datetime = NaiveDateTime::new(parse_date(string)?, NaiveTime::default());
681            Some(datetime.and_utc().timestamp_millis())
682        } else {
683            let date_time = string_to_datetime(&Utc, string).ok()?;
684            Some(date_time.timestamp_millis())
685        }
686    }
687
688    fn parse_formatted(string: &str, format: &str) -> Option<i64> {
689        use chrono::format::Fixed;
690        use chrono::format::StrftimeItems;
691        let fmt = StrftimeItems::new(format);
692        let has_zone = fmt.into_iter().any(|item| match item {
693            chrono::format::Item::Fixed(fixed_item) => matches!(
694                fixed_item,
695                Fixed::RFC2822
696                    | Fixed::RFC3339
697                    | Fixed::TimezoneName
698                    | Fixed::TimezoneOffsetColon
699                    | Fixed::TimezoneOffsetColonZ
700                    | Fixed::TimezoneOffset
701                    | Fixed::TimezoneOffsetZ
702            ),
703            _ => false,
704        });
705        if has_zone {
706            let date_time = chrono::DateTime::parse_from_str(string, format).ok()?;
707            Some(date_time.timestamp_millis())
708        } else {
709            let date_time = NaiveDateTime::parse_from_str(string, format).ok()?;
710            Some(date_time.and_utc().timestamp_millis())
711        }
712    }
713}
714
715fn parse_e_notation<T: DecimalType>(
716    s: &str,
717    mut digits: u16,
718    mut fractionals: i16,
719    mut result: T::Native,
720    index: usize,
721    precision: u16,
722    scale: i16,
723) -> Result<T::Native, ArrowError> {
724    let mut exp: i16 = 0;
725    let base = T::Native::usize_as(10);
726
727    let mut exp_start: bool = false;
728    // e has a plus sign
729    let mut pos_shift_direction: bool = true;
730
731    // skip to point or exponent index
732    let mut bs;
733    if fractionals > 0 {
734        // it's a fraction, so the point index needs to be skipped, so +1
735        bs = s.as_bytes().iter().skip(index + fractionals as usize + 1);
736    } else {
737        // it's actually an integer that is already written into the result, so let's skip on to e
738        bs = s.as_bytes().iter().skip(index);
739    }
740
741    while let Some(b) = bs.next() {
742        match b {
743            b'0'..=b'9' => {
744                result = result.mul_wrapping(base);
745                result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
746                if fractionals > 0 {
747                    fractionals += 1;
748                }
749                digits += 1;
750            }
751            &b'e' | &b'E' => {
752                exp_start = true;
753            }
754            _ => {
755                return Err(ArrowError::ParseError(format!(
756                    "can't parse the string value {s} to decimal"
757                )));
758            }
759        };
760
761        if exp_start {
762            pos_shift_direction = match bs.next() {
763                Some(&b'-') => false,
764                Some(&b'+') => true,
765                Some(b) => {
766                    if !b.is_ascii_digit() {
767                        return Err(ArrowError::ParseError(format!(
768                            "can't parse the string value {s} to decimal"
769                        )));
770                    }
771
772                    exp *= 10;
773                    exp += (b - b'0') as i16;
774
775                    true
776                }
777                None => {
778                    return Err(ArrowError::ParseError(format!(
779                        "can't parse the string value {s} to decimal"
780                    )))
781                }
782            };
783
784            for b in bs.by_ref() {
785                if !b.is_ascii_digit() {
786                    return Err(ArrowError::ParseError(format!(
787                        "can't parse the string value {s} to decimal"
788                    )));
789                }
790                exp *= 10;
791                exp += (b - b'0') as i16;
792            }
793        }
794    }
795
796    if digits == 0 && fractionals == 0 && exp == 0 {
797        return Err(ArrowError::ParseError(format!(
798            "can't parse the string value {s} to decimal"
799        )));
800    }
801
802    if !pos_shift_direction {
803        // exponent has a large negative sign
804        // 1.12345e-30 => 0.0{29}12345, scale = 5
805        if exp - (digits as i16 + scale) > 0 {
806            return Ok(T::Native::usize_as(0));
807        }
808        exp *= -1;
809    }
810
811    // point offset
812    exp = fractionals - exp;
813    // We have zeros on the left, we need to count them
814    if !pos_shift_direction && exp > digits as i16 {
815        digits = exp as u16;
816    }
817    // Number of numbers to be removed or added
818    exp = scale - exp;
819
820    if (digits as i16 + exp) as u16 > precision {
821        return Err(ArrowError::ParseError(format!(
822            "parse decimal overflow ({s})"
823        )));
824    }
825
826    if exp < 0 {
827        result = result.div_wrapping(base.pow_wrapping(-exp as _));
828    } else {
829        result = result.mul_wrapping(base.pow_wrapping(exp as _));
830    }
831
832    Ok(result)
833}
834
835/// Parse the string format decimal value to i128/i256 format and checking the precision and scale.
836/// The result value can't be out of bounds.
837pub fn parse_decimal<T: DecimalType>(
838    s: &str,
839    precision: u8,
840    scale: i8,
841) -> Result<T::Native, ArrowError> {
842    let mut result = T::Native::usize_as(0);
843    let mut fractionals: i8 = 0;
844    let mut digits: u8 = 0;
845    let base = T::Native::usize_as(10);
846
847    let bs = s.as_bytes();
848    let (signed, negative) = match bs.first() {
849        Some(b'-') => (true, true),
850        Some(b'+') => (true, false),
851        _ => (false, false),
852    };
853
854    if bs.is_empty() || signed && bs.len() == 1 {
855        return Err(ArrowError::ParseError(format!(
856            "can't parse the string value {s} to decimal"
857        )));
858    }
859
860    // Iterate over the raw input bytes, skipping the sign if any
861    let mut bs = bs.iter().enumerate().skip(signed as usize);
862
863    let mut is_e_notation = false;
864
865    // Overflow checks are not required if 10^(precision - 1) <= T::MAX holds.
866    // Thus, if we validate the precision correctly, we can skip overflow checks.
867    while let Some((index, b)) = bs.next() {
868        match b {
869            b'0'..=b'9' => {
870                if digits == 0 && *b == b'0' {
871                    // Ignore leading zeros.
872                    continue;
873                }
874                digits += 1;
875                result = result.mul_wrapping(base);
876                result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
877            }
878            b'.' => {
879                let point_index = index;
880
881                for (_, b) in bs.by_ref() {
882                    if !b.is_ascii_digit() {
883                        if *b == b'e' || *b == b'E' {
884                            result = match parse_e_notation::<T>(
885                                s,
886                                digits as u16,
887                                fractionals as i16,
888                                result,
889                                point_index,
890                                precision as u16,
891                                scale as i16,
892                            ) {
893                                Err(e) => return Err(e),
894                                Ok(v) => v,
895                            };
896
897                            is_e_notation = true;
898
899                            break;
900                        }
901                        return Err(ArrowError::ParseError(format!(
902                            "can't parse the string value {s} to decimal"
903                        )));
904                    }
905                    if fractionals == scale && scale != 0 {
906                        // We have processed all the digits that we need. All that
907                        // is left is to validate that the rest of the string contains
908                        // valid digits.
909                        continue;
910                    }
911                    fractionals += 1;
912                    digits += 1;
913                    result = result.mul_wrapping(base);
914                    result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
915                }
916
917                if is_e_notation {
918                    break;
919                }
920
921                // Fail on "."
922                if digits == 0 {
923                    return Err(ArrowError::ParseError(format!(
924                        "can't parse the string value {s} to decimal"
925                    )));
926                }
927            }
928            b'e' | b'E' => {
929                result = match parse_e_notation::<T>(
930                    s,
931                    digits as u16,
932                    fractionals as i16,
933                    result,
934                    index,
935                    precision as u16,
936                    scale as i16,
937                ) {
938                    Err(e) => return Err(e),
939                    Ok(v) => v,
940                };
941
942                is_e_notation = true;
943
944                break;
945            }
946            _ => {
947                return Err(ArrowError::ParseError(format!(
948                    "can't parse the string value {s} to decimal"
949                )));
950            }
951        }
952    }
953
954    if !is_e_notation {
955        if fractionals < scale {
956            let exp = scale - fractionals;
957            if exp as u8 + digits > precision {
958                return Err(ArrowError::ParseError(format!(
959                    "parse decimal overflow ({s})"
960                )));
961            }
962            let mul = base.pow_wrapping(exp as _);
963            result = result.mul_wrapping(mul);
964        } else if digits > precision {
965            return Err(ArrowError::ParseError(format!(
966                "parse decimal overflow ({s})"
967            )));
968        }
969    }
970
971    Ok(if negative {
972        result.neg_wrapping()
973    } else {
974        result
975    })
976}
977
978/// Parse human-readable interval string to Arrow [IntervalYearMonthType]
979pub fn parse_interval_year_month(
980    value: &str,
981) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> {
982    let config = IntervalParseConfig::new(IntervalUnit::Year);
983    let interval = Interval::parse(value, &config)?;
984
985    let months = interval.to_year_months().map_err(|_| {
986        ArrowError::CastError(format!(
987            "Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
988        ))
989    })?;
990
991    Ok(IntervalYearMonthType::make_value(0, months))
992}
993
994/// Parse human-readable interval string to Arrow [IntervalDayTimeType]
995pub fn parse_interval_day_time(
996    value: &str,
997) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
998    let config = IntervalParseConfig::new(IntervalUnit::Day);
999    let interval = Interval::parse(value, &config)?;
1000
1001    let (days, millis) = interval.to_day_time().map_err(|_| ArrowError::CastError(format!(
1002        "Cannot cast {value} to IntervalDayTime because the nanos part isn't multiple of milliseconds"
1003    )))?;
1004
1005    Ok(IntervalDayTimeType::make_value(days, millis))
1006}
1007
1008/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType]
1009pub fn parse_interval_month_day_nano_config(
1010    value: &str,
1011    config: IntervalParseConfig,
1012) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1013    let interval = Interval::parse(value, &config)?;
1014
1015    let (months, days, nanos) = interval.to_month_day_nanos();
1016
1017    Ok(IntervalMonthDayNanoType::make_value(months, days, nanos))
1018}
1019
1020/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType]
1021pub fn parse_interval_month_day_nano(
1022    value: &str,
1023) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1024    parse_interval_month_day_nano_config(value, IntervalParseConfig::new(IntervalUnit::Month))
1025}
1026
1027const NANOS_PER_MILLIS: i64 = 1_000_000;
1028const NANOS_PER_SECOND: i64 = 1_000 * NANOS_PER_MILLIS;
1029const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
1030const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
1031#[cfg(test)]
1032const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR;
1033
1034/// Config to parse interval strings
1035///
1036/// Currently stores the `default_unit` to use if the string doesn't have one specified
1037#[derive(Debug, Clone)]
1038pub struct IntervalParseConfig {
1039    /// The default unit to use if none is specified
1040    /// e.g. `INTERVAL 1` represents `INTERVAL 1 SECOND` when default_unit = [IntervalUnit::Second]
1041    default_unit: IntervalUnit,
1042}
1043
1044impl IntervalParseConfig {
1045    /// Create a new [IntervalParseConfig] with the given default unit
1046    pub fn new(default_unit: IntervalUnit) -> Self {
1047        Self { default_unit }
1048    }
1049}
1050
1051#[rustfmt::skip]
1052#[derive(Debug, Clone, Copy)]
1053#[repr(u16)]
1054/// Represents the units of an interval, with each variant
1055/// corresponding to a bit in the interval's bitfield representation
1056pub enum IntervalUnit {
1057    /// A Century
1058    Century     = 0b_0000_0000_0001,
1059    /// A Decade
1060    Decade      = 0b_0000_0000_0010,
1061    /// A Year
1062    Year        = 0b_0000_0000_0100,
1063    /// A Month
1064    Month       = 0b_0000_0000_1000,
1065    /// A Week
1066    Week        = 0b_0000_0001_0000,
1067    /// A Day
1068    Day         = 0b_0000_0010_0000,
1069    /// An Hour
1070    Hour        = 0b_0000_0100_0000,
1071    /// A Minute
1072    Minute      = 0b_0000_1000_0000,
1073    /// A Second
1074    Second      = 0b_0001_0000_0000,
1075    /// A Millisecond
1076    Millisecond = 0b_0010_0000_0000,
1077    /// A Microsecond
1078    Microsecond = 0b_0100_0000_0000,
1079    /// A Nanosecond
1080    Nanosecond  = 0b_1000_0000_0000,
1081}
1082
1083/// Logic for parsing interval unit strings
1084///
1085/// See <https://github.com/postgres/postgres/blob/2caa85f4aae689e6f6721d7363b4c66a2a6417d6/src/backend/utils/adt/datetime.c#L189>
1086/// for a list of unit names supported by PostgreSQL which we try to match here.
1087impl FromStr for IntervalUnit {
1088    type Err = ArrowError;
1089
1090    fn from_str(s: &str) -> Result<Self, ArrowError> {
1091        match s.to_lowercase().as_str() {
1092            "c" | "cent" | "cents" | "century" | "centuries" => Ok(Self::Century),
1093            "dec" | "decs" | "decade" | "decades" => Ok(Self::Decade),
1094            "y" | "yr" | "yrs" | "year" | "years" => Ok(Self::Year),
1095            "mon" | "mons" | "month" | "months" => Ok(Self::Month),
1096            "w" | "week" | "weeks" => Ok(Self::Week),
1097            "d" | "day" | "days" => Ok(Self::Day),
1098            "h" | "hr" | "hrs" | "hour" | "hours" => Ok(Self::Hour),
1099            "m" | "min" | "mins" | "minute" | "minutes" => Ok(Self::Minute),
1100            "s" | "sec" | "secs" | "second" | "seconds" => Ok(Self::Second),
1101            "ms" | "msec" | "msecs" | "msecond" | "mseconds" | "millisecond" | "milliseconds" => {
1102                Ok(Self::Millisecond)
1103            }
1104            "us" | "usec" | "usecs" | "usecond" | "useconds" | "microsecond" | "microseconds" => {
1105                Ok(Self::Microsecond)
1106            }
1107            "nanosecond" | "nanoseconds" => Ok(Self::Nanosecond),
1108            _ => Err(ArrowError::InvalidArgumentError(format!(
1109                "Unknown interval type: {s}"
1110            ))),
1111        }
1112    }
1113}
1114
1115impl IntervalUnit {
1116    fn from_str_or_config(
1117        s: Option<&str>,
1118        config: &IntervalParseConfig,
1119    ) -> Result<Self, ArrowError> {
1120        match s {
1121            Some(s) => s.parse(),
1122            None => Ok(config.default_unit),
1123        }
1124    }
1125}
1126
1127/// A tuple representing (months, days, nanoseconds) in an interval
1128pub type MonthDayNano = (i32, i32, i64);
1129
1130/// Chosen based on the number of decimal digits in 1 week in nanoseconds
1131const INTERVAL_PRECISION: u32 = 15;
1132
1133#[derive(Clone, Copy, Debug, PartialEq)]
1134struct IntervalAmount {
1135    /// The integer component of the interval amount
1136    integer: i64,
1137    /// The fractional component multiplied by 10^INTERVAL_PRECISION
1138    frac: i64,
1139}
1140
1141#[cfg(test)]
1142impl IntervalAmount {
1143    fn new(integer: i64, frac: i64) -> Self {
1144        Self { integer, frac }
1145    }
1146}
1147
1148impl FromStr for IntervalAmount {
1149    type Err = ArrowError;
1150
1151    fn from_str(s: &str) -> Result<Self, Self::Err> {
1152        match s.split_once('.') {
1153            Some((integer, frac))
1154                if frac.len() <= INTERVAL_PRECISION as usize
1155                    && !frac.is_empty()
1156                    && !frac.starts_with('-') =>
1157            {
1158                // integer will be "" for values like ".5"
1159                // and "-" for values like "-.5"
1160                let explicit_neg = integer.starts_with('-');
1161                let integer = if integer.is_empty() || integer == "-" {
1162                    Ok(0)
1163                } else {
1164                    integer.parse::<i64>().map_err(|_| {
1165                        ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1166                    })
1167                }?;
1168
1169                let frac_unscaled = frac.parse::<i64>().map_err(|_| {
1170                    ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1171                })?;
1172
1173                // scale fractional part by interval precision
1174                let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
1175
1176                // propagate the sign of the integer part to the fractional part
1177                let frac = if integer < 0 || explicit_neg {
1178                    -frac
1179                } else {
1180                    frac
1181                };
1182
1183                let result = Self { integer, frac };
1184
1185                Ok(result)
1186            }
1187            Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
1188                "Failed to parse {s} as interval amount"
1189            ))),
1190            Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
1191                Err(ArrowError::ParseError(format!(
1192                    "{s} exceeds the precision available for interval amount"
1193                )))
1194            }
1195            Some(_) | None => {
1196                let integer = s.parse::<i64>().map_err(|_| {
1197                    ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1198                })?;
1199
1200                let result = Self { integer, frac: 0 };
1201                Ok(result)
1202            }
1203        }
1204    }
1205}
1206
1207#[derive(Debug, Default, PartialEq)]
1208struct Interval {
1209    months: i32,
1210    days: i32,
1211    nanos: i64,
1212}
1213
1214impl Interval {
1215    fn new(months: i32, days: i32, nanos: i64) -> Self {
1216        Self {
1217            months,
1218            days,
1219            nanos,
1220        }
1221    }
1222
1223    fn to_year_months(&self) -> Result<i32, ArrowError> {
1224        match (self.months, self.days, self.nanos) {
1225            (months, days, nanos) if days == 0 && nanos == 0 => Ok(months),
1226            _ => Err(ArrowError::InvalidArgumentError(format!(
1227                "Unable to represent interval with days and nanos as year-months: {:?}",
1228                self
1229            ))),
1230        }
1231    }
1232
1233    fn to_day_time(&self) -> Result<(i32, i32), ArrowError> {
1234        let days = self.months.mul_checked(30)?.add_checked(self.days)?;
1235
1236        match self.nanos {
1237            nanos if nanos % NANOS_PER_MILLIS == 0 => {
1238                let millis = (self.nanos / 1_000_000).try_into().map_err(|_| {
1239                    ArrowError::InvalidArgumentError(format!(
1240                        "Unable to represent {} nanos as milliseconds in a signed 32-bit integer",
1241                        self.nanos
1242                    ))
1243                })?;
1244
1245                Ok((days, millis))
1246            }
1247            nanos => Err(ArrowError::InvalidArgumentError(format!(
1248                "Unable to represent {nanos} as milliseconds"
1249            ))),
1250        }
1251    }
1252
1253    fn to_month_day_nanos(&self) -> (i32, i32, i64) {
1254        (self.months, self.days, self.nanos)
1255    }
1256
1257    /// Parse string value in traditional Postgres format such as
1258    /// `1 year 2 months 3 days 4 hours 5 minutes 6 seconds`
1259    fn parse(value: &str, config: &IntervalParseConfig) -> Result<Self, ArrowError> {
1260        let components = parse_interval_components(value, config)?;
1261
1262        components
1263            .into_iter()
1264            .try_fold(Self::default(), |result, (amount, unit)| {
1265                result.add(amount, unit)
1266            })
1267    }
1268
1269    /// Interval addition following Postgres behavior. Fractional units will be spilled into smaller units.
1270    /// When the interval unit is larger than months, the result is rounded to total months and not spilled to days/nanos.
1271    /// Fractional parts of weeks and days are represented using days and nanoseconds.
1272    /// e.g. INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 days
1273    /// e.g. INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
1274    /// [Postgres reference](https://www.postgresql.org/docs/15/datatype-datetime.html#DATATYPE-INTERVAL-INPUT:~:text=Field%20values%20can,fractional%20on%20output.)
1275    fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
1276        let result = match unit {
1277            IntervalUnit::Century => {
1278                let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
1279                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
1280                let months = months_int
1281                    .add_checked(month_frac)?
1282                    .try_into()
1283                    .map_err(|_| {
1284                        ArrowError::ParseError(format!(
1285                            "Unable to represent {} centuries as months in a signed 32-bit integer",
1286                            &amount.integer
1287                        ))
1288                    })?;
1289
1290                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1291            }
1292            IntervalUnit::Decade => {
1293                let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
1294
1295                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
1296                let months = months_int
1297                    .add_checked(month_frac)?
1298                    .try_into()
1299                    .map_err(|_| {
1300                        ArrowError::ParseError(format!(
1301                            "Unable to represent {} decades as months in a signed 32-bit integer",
1302                            &amount.integer
1303                        ))
1304                    })?;
1305
1306                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1307            }
1308            IntervalUnit::Year => {
1309                let months_int = amount.integer.mul_checked(12)?;
1310                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
1311                let months = months_int
1312                    .add_checked(month_frac)?
1313                    .try_into()
1314                    .map_err(|_| {
1315                        ArrowError::ParseError(format!(
1316                            "Unable to represent {} years as months in a signed 32-bit integer",
1317                            &amount.integer
1318                        ))
1319                    })?;
1320
1321                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1322            }
1323            IntervalUnit::Month => {
1324                let months = amount.integer.try_into().map_err(|_| {
1325                    ArrowError::ParseError(format!(
1326                        "Unable to represent {} months in a signed 32-bit integer",
1327                        &amount.integer
1328                    ))
1329                })?;
1330
1331                let days = amount.frac * 3 / 10_i64.pow(INTERVAL_PRECISION - 1);
1332                let days = days.try_into().map_err(|_| {
1333                    ArrowError::ParseError(format!(
1334                        "Unable to represent {} months as days in a signed 32-bit integer",
1335                        amount.frac / 10_i64.pow(INTERVAL_PRECISION)
1336                    ))
1337                })?;
1338
1339                Self::new(
1340                    self.months.add_checked(months)?,
1341                    self.days.add_checked(days)?,
1342                    self.nanos,
1343                )
1344            }
1345            IntervalUnit::Week => {
1346                let days = amount.integer.mul_checked(7)?.try_into().map_err(|_| {
1347                    ArrowError::ParseError(format!(
1348                        "Unable to represent {} weeks as days in a signed 32-bit integer",
1349                        &amount.integer
1350                    ))
1351                })?;
1352
1353                let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1354
1355                Self::new(
1356                    self.months,
1357                    self.days.add_checked(days)?,
1358                    self.nanos.add_checked(nanos)?,
1359                )
1360            }
1361            IntervalUnit::Day => {
1362                let days = amount.integer.try_into().map_err(|_| {
1363                    ArrowError::InvalidArgumentError(format!(
1364                        "Unable to represent {} days in a signed 32-bit integer",
1365                        amount.integer
1366                    ))
1367                })?;
1368
1369                let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1370
1371                Self::new(
1372                    self.months,
1373                    self.days.add_checked(days)?,
1374                    self.nanos.add_checked(nanos)?,
1375                )
1376            }
1377            IntervalUnit::Hour => {
1378                let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
1379                let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1380                let nanos = nanos_int.add_checked(nanos_frac)?;
1381
1382                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1383            }
1384            IntervalUnit::Minute => {
1385                let nanos_int = amount.integer.mul_checked(NANOS_PER_MINUTE)?;
1386                let nanos_frac = amount.frac * 6 / 10_i64.pow(INTERVAL_PRECISION - 10);
1387
1388                let nanos = nanos_int.add_checked(nanos_frac)?;
1389
1390                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1391            }
1392            IntervalUnit::Second => {
1393                let nanos_int = amount.integer.mul_checked(NANOS_PER_SECOND)?;
1394                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 9);
1395                let nanos = nanos_int.add_checked(nanos_frac)?;
1396
1397                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1398            }
1399            IntervalUnit::Millisecond => {
1400                let nanos_int = amount.integer.mul_checked(NANOS_PER_MILLIS)?;
1401                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 6);
1402                let nanos = nanos_int.add_checked(nanos_frac)?;
1403
1404                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1405            }
1406            IntervalUnit::Microsecond => {
1407                let nanos_int = amount.integer.mul_checked(1_000)?;
1408                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 3);
1409                let nanos = nanos_int.add_checked(nanos_frac)?;
1410
1411                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1412            }
1413            IntervalUnit::Nanosecond => {
1414                let nanos_int = amount.integer;
1415                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION);
1416                let nanos = nanos_int.add_checked(nanos_frac)?;
1417
1418                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1419            }
1420        };
1421
1422        Ok(result)
1423    }
1424}
1425
1426/// parse the string into a vector of interval components i.e. (amount, unit) tuples
1427fn parse_interval_components(
1428    value: &str,
1429    config: &IntervalParseConfig,
1430) -> Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> {
1431    let raw_pairs = split_interval_components(value);
1432
1433    // parse amounts and units
1434    let Ok(pairs): Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> = raw_pairs
1435        .iter()
1436        .map(|(a, u)| Ok((a.parse()?, IntervalUnit::from_str_or_config(*u, config)?)))
1437        .collect()
1438    else {
1439        return Err(ArrowError::ParseError(format!(
1440            "Invalid input syntax for type interval: {value:?}"
1441        )));
1442    };
1443
1444    // collect parsed results
1445    let (amounts, units): (Vec<_>, Vec<_>) = pairs.into_iter().unzip();
1446
1447    // duplicate units?
1448    let mut observed_interval_types = 0;
1449    for (unit, (_, raw_unit)) in units.iter().zip(raw_pairs) {
1450        if observed_interval_types & (*unit as u16) != 0 {
1451            return Err(ArrowError::ParseError(format!(
1452                "Invalid input syntax for type interval: {:?}. Repeated type '{}'",
1453                value,
1454                raw_unit.unwrap_or_default(),
1455            )));
1456        }
1457
1458        observed_interval_types |= *unit as u16;
1459    }
1460
1461    let result = amounts.iter().copied().zip(units.iter().copied());
1462
1463    Ok(result.collect::<Vec<_>>())
1464}
1465
1466/// Split an interval into a vec of amounts and units.
1467///
1468/// Pairs are separated by spaces, but within a pair the amount and unit may or may not be separated by a space.
1469///
1470/// This should match the behavior of PostgreSQL's interval parser.
1471fn split_interval_components(value: &str) -> Vec<(&str, Option<&str>)> {
1472    let mut result = vec![];
1473    let mut words = value.split(char::is_whitespace);
1474    while let Some(word) = words.next() {
1475        if let Some(split_word_at) = word.find(not_interval_amount) {
1476            let (amount, unit) = word.split_at(split_word_at);
1477            result.push((amount, Some(unit)));
1478        } else if let Some(unit) = words.next() {
1479            result.push((word, Some(unit)));
1480        } else {
1481            result.push((word, None));
1482            break;
1483        }
1484    }
1485    result
1486}
1487
1488/// test if a character is NOT part of an interval numeric amount
1489fn not_interval_amount(c: char) -> bool {
1490    !c.is_ascii_digit() && c != '.' && c != '-'
1491}
1492
1493#[cfg(test)]
1494mod tests {
1495    use super::*;
1496    use arrow_array::temporal_conversions::date32_to_datetime;
1497    use arrow_buffer::i256;
1498
1499    #[test]
1500    fn test_parse_nanos() {
1501        assert_eq!(parse_nanos::<3, 0>(&[1, 2, 3]), 123_000_000);
1502        assert_eq!(parse_nanos::<5, 0>(&[1, 2, 3, 4, 5]), 123_450_000);
1503        assert_eq!(parse_nanos::<6, b'0'>(b"123456"), 123_456_000);
1504    }
1505
1506    #[test]
1507    fn string_to_timestamp_timezone() {
1508        // Explicit timezone
1509        assert_eq!(
1510            1599572549190855000,
1511            parse_timestamp("2020-09-08T13:42:29.190855+00:00").unwrap()
1512        );
1513        assert_eq!(
1514            1599572549190855000,
1515            parse_timestamp("2020-09-08T13:42:29.190855Z").unwrap()
1516        );
1517        assert_eq!(
1518            1599572549000000000,
1519            parse_timestamp("2020-09-08T13:42:29Z").unwrap()
1520        ); // no fractional part
1521        assert_eq!(
1522            1599590549190855000,
1523            parse_timestamp("2020-09-08T13:42:29.190855-05:00").unwrap()
1524        );
1525    }
1526
1527    #[test]
1528    fn string_to_timestamp_timezone_space() {
1529        // Ensure space rather than T between time and date is accepted
1530        assert_eq!(
1531            1599572549190855000,
1532            parse_timestamp("2020-09-08 13:42:29.190855+00:00").unwrap()
1533        );
1534        assert_eq!(
1535            1599572549190855000,
1536            parse_timestamp("2020-09-08 13:42:29.190855Z").unwrap()
1537        );
1538        assert_eq!(
1539            1599572549000000000,
1540            parse_timestamp("2020-09-08 13:42:29Z").unwrap()
1541        ); // no fractional part
1542        assert_eq!(
1543            1599590549190855000,
1544            parse_timestamp("2020-09-08 13:42:29.190855-05:00").unwrap()
1545        );
1546    }
1547
1548    #[test]
1549    #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function: mktime
1550    fn string_to_timestamp_no_timezone() {
1551        // This test is designed to succeed in regardless of the local
1552        // timezone the test machine is running. Thus it is still
1553        // somewhat susceptible to bugs in the use of chrono
1554        let naive_datetime = NaiveDateTime::new(
1555            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1556            NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1557        );
1558
1559        // Ensure both T and ' ' variants work
1560        assert_eq!(
1561            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1562            parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1563        );
1564
1565        assert_eq!(
1566            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1567            parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1568        );
1569
1570        // Also ensure that parsing timestamps with no fractional
1571        // second part works as well
1572        let datetime_whole_secs = NaiveDateTime::new(
1573            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1574            NaiveTime::from_hms_opt(13, 42, 29).unwrap(),
1575        )
1576        .and_utc();
1577
1578        // Ensure both T and ' ' variants work
1579        assert_eq!(
1580            datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1581            parse_timestamp("2020-09-08T13:42:29").unwrap()
1582        );
1583
1584        assert_eq!(
1585            datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1586            parse_timestamp("2020-09-08 13:42:29").unwrap()
1587        );
1588
1589        // ensure without time work
1590        // no time, should be the nano second at
1591        // 2020-09-08 0:0:0
1592        let datetime_no_time = NaiveDateTime::new(
1593            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1594            NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
1595        )
1596        .and_utc();
1597
1598        assert_eq!(
1599            datetime_no_time.timestamp_nanos_opt().unwrap(),
1600            parse_timestamp("2020-09-08").unwrap()
1601        )
1602    }
1603
1604    #[test]
1605    fn string_to_timestamp_chrono() {
1606        let cases = [
1607            "2020-09-08T13:42:29Z",
1608            "1969-01-01T00:00:00.1Z",
1609            "2020-09-08T12:00:12.12345678+00:00",
1610            "2020-09-08T12:00:12+00:00",
1611            "2020-09-08T12:00:12.1+00:00",
1612            "2020-09-08T12:00:12.12+00:00",
1613            "2020-09-08T12:00:12.123+00:00",
1614            "2020-09-08T12:00:12.1234+00:00",
1615            "2020-09-08T12:00:12.12345+00:00",
1616            "2020-09-08T12:00:12.123456+00:00",
1617            "2020-09-08T12:00:12.1234567+00:00",
1618            "2020-09-08T12:00:12.12345678+00:00",
1619            "2020-09-08T12:00:12.123456789+00:00",
1620            "2020-09-08T12:00:12.12345678912z",
1621            "2020-09-08T12:00:12.123456789123Z",
1622            "2020-09-08T12:00:12.123456789123+02:00",
1623            "2020-09-08T12:00:12.12345678912345Z",
1624            "2020-09-08T12:00:12.1234567891234567+02:00",
1625            "2020-09-08T12:00:60Z",
1626            "2020-09-08T12:00:60.123Z",
1627            "2020-09-08T12:00:60.123456+02:00",
1628            "2020-09-08T12:00:60.1234567891234567+02:00",
1629            "2020-09-08T12:00:60.999999999+02:00",
1630            "2020-09-08t12:00:12.12345678+00:00",
1631            "2020-09-08t12:00:12+00:00",
1632            "2020-09-08t12:00:12Z",
1633        ];
1634
1635        for case in cases {
1636            let chrono = DateTime::parse_from_rfc3339(case).unwrap();
1637            let chrono_utc = chrono.with_timezone(&Utc);
1638
1639            let custom = string_to_datetime(&Utc, case).unwrap();
1640            assert_eq!(chrono_utc, custom)
1641        }
1642    }
1643
1644    #[test]
1645    fn string_to_timestamp_naive() {
1646        let cases = [
1647            "2018-11-13T17:11:10.011375885995",
1648            "2030-12-04T17:11:10.123",
1649            "2030-12-04T17:11:10.1234",
1650            "2030-12-04T17:11:10.123456",
1651        ];
1652        for case in cases {
1653            let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
1654            let custom = string_to_datetime(&Utc, case).unwrap();
1655            assert_eq!(chrono, custom.naive_utc())
1656        }
1657    }
1658
1659    #[test]
1660    fn string_to_timestamp_invalid() {
1661        // Test parsing invalid formats
1662        let cases = [
1663            ("", "timestamp must contain at least 10 characters"),
1664            ("SS", "timestamp must contain at least 10 characters"),
1665            ("Wed, 18 Feb 2015 23:16:09 GMT", "error parsing date"),
1666            ("1997-01-31H09:26:56.123Z", "invalid timestamp separator"),
1667            ("1997-01-31  09:26:56.123Z", "error parsing time"),
1668            ("1997:01:31T09:26:56.123Z", "error parsing date"),
1669            ("1997:1:31T09:26:56.123Z", "error parsing date"),
1670            ("1997-01-32T09:26:56.123Z", "error parsing date"),
1671            ("1997-13-32T09:26:56.123Z", "error parsing date"),
1672            ("1997-02-29T09:26:56.123Z", "error parsing date"),
1673            ("2015-02-30T17:35:20-08:00", "error parsing date"),
1674            ("1997-01-10T9:26:56.123Z", "error parsing time"),
1675            ("2015-01-20T25:35:20-08:00", "error parsing time"),
1676            ("1997-01-10T09:61:56.123Z", "error parsing time"),
1677            ("1997-01-10T09:61:90.123Z", "error parsing time"),
1678            ("1997-01-10T12:00:6.123Z", "error parsing time"),
1679            ("1997-01-31T092656.123Z", "error parsing time"),
1680            ("1997-01-10T12:00:06.", "error parsing time"),
1681            ("1997-01-10T12:00:06. ", "error parsing time"),
1682        ];
1683
1684        for (s, ctx) in cases {
1685            let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
1686            let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
1687            assert_eq!(actual, expected)
1688        }
1689    }
1690
1691    // Parse a timestamp to timestamp int with a useful human readable error message
1692    fn parse_timestamp(s: &str) -> Result<i64, ArrowError> {
1693        let result = string_to_timestamp_nanos(s);
1694        if let Err(e) = &result {
1695            eprintln!("Error parsing timestamp '{s}': {e:?}");
1696        }
1697        result
1698    }
1699
1700    #[test]
1701    fn string_without_timezone_to_timestamp() {
1702        // string without timezone should always output the same regardless the local or session timezone
1703
1704        let naive_datetime = NaiveDateTime::new(
1705            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1706            NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1707        );
1708
1709        // Ensure both T and ' ' variants work
1710        assert_eq!(
1711            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1712            parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1713        );
1714
1715        assert_eq!(
1716            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1717            parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1718        );
1719
1720        let naive_datetime = NaiveDateTime::new(
1721            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1722            NaiveTime::from_hms_nano_opt(13, 42, 29, 0).unwrap(),
1723        );
1724
1725        // Ensure both T and ' ' variants work
1726        assert_eq!(
1727            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1728            parse_timestamp("2020-09-08T13:42:29").unwrap()
1729        );
1730
1731        assert_eq!(
1732            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1733            parse_timestamp("2020-09-08 13:42:29").unwrap()
1734        );
1735
1736        let tz: Tz = "+02:00".parse().unwrap();
1737        let date = string_to_datetime(&tz, "2020-09-08 13:42:29").unwrap();
1738        let utc = date.naive_utc().to_string();
1739        assert_eq!(utc, "2020-09-08 11:42:29");
1740        let local = date.naive_local().to_string();
1741        assert_eq!(local, "2020-09-08 13:42:29");
1742
1743        let date = string_to_datetime(&tz, "2020-09-08 13:42:29Z").unwrap();
1744        let utc = date.naive_utc().to_string();
1745        assert_eq!(utc, "2020-09-08 13:42:29");
1746        let local = date.naive_local().to_string();
1747        assert_eq!(local, "2020-09-08 15:42:29");
1748
1749        let dt =
1750            NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
1751        let local: Tz = "+08:00".parse().unwrap();
1752
1753        // Parsed as offset from UTC
1754        let date = string_to_datetime(&local, "2020-09-08T13:42:29Z").unwrap();
1755        assert_eq!(dt, date.naive_utc());
1756        assert_ne!(dt, date.naive_local());
1757
1758        // Parsed as offset from local
1759        let date = string_to_datetime(&local, "2020-09-08 13:42:29").unwrap();
1760        assert_eq!(dt, date.naive_local());
1761        assert_ne!(dt, date.naive_utc());
1762    }
1763
1764    #[test]
1765    fn parse_date32() {
1766        let cases = [
1767            "2020-09-08",
1768            "2020-9-8",
1769            "2020-09-8",
1770            "2020-9-08",
1771            "2020-12-1",
1772            "1690-2-5",
1773            "2020-09-08 01:02:03",
1774        ];
1775        for case in cases {
1776            let v = date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap();
1777            let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d")
1778                .or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S"))
1779                .unwrap();
1780            assert_eq!(v.date(), expected);
1781        }
1782
1783        let err_cases = [
1784            "",
1785            "80-01-01",
1786            "342",
1787            "Foo",
1788            "2020-09-08-03",
1789            "2020--04-03",
1790            "2020--",
1791            "2020-09-08 01",
1792            "2020-09-08 01:02",
1793            "2020-09-08 01-02-03",
1794            "2020-9-8 01:02:03",
1795            "2020-09-08 1:2:3",
1796        ];
1797        for case in err_cases {
1798            assert_eq!(Date32Type::parse(case), None);
1799        }
1800    }
1801
1802    #[test]
1803    fn parse_time64_nanos() {
1804        assert_eq!(
1805            Time64NanosecondType::parse("02:10:01.1234567899999999"),
1806            Some(7_801_123_456_789)
1807        );
1808        assert_eq!(
1809            Time64NanosecondType::parse("02:10:01.1234567"),
1810            Some(7_801_123_456_700)
1811        );
1812        assert_eq!(
1813            Time64NanosecondType::parse("2:10:01.1234567"),
1814            Some(7_801_123_456_700)
1815        );
1816        assert_eq!(
1817            Time64NanosecondType::parse("12:10:01.123456789 AM"),
1818            Some(601_123_456_789)
1819        );
1820        assert_eq!(
1821            Time64NanosecondType::parse("12:10:01.123456789 am"),
1822            Some(601_123_456_789)
1823        );
1824        assert_eq!(
1825            Time64NanosecondType::parse("2:10:01.12345678 PM"),
1826            Some(51_001_123_456_780)
1827        );
1828        assert_eq!(
1829            Time64NanosecondType::parse("2:10:01.12345678 pm"),
1830            Some(51_001_123_456_780)
1831        );
1832        assert_eq!(
1833            Time64NanosecondType::parse("02:10:01"),
1834            Some(7_801_000_000_000)
1835        );
1836        assert_eq!(
1837            Time64NanosecondType::parse("2:10:01"),
1838            Some(7_801_000_000_000)
1839        );
1840        assert_eq!(
1841            Time64NanosecondType::parse("12:10:01 AM"),
1842            Some(601_000_000_000)
1843        );
1844        assert_eq!(
1845            Time64NanosecondType::parse("12:10:01 am"),
1846            Some(601_000_000_000)
1847        );
1848        assert_eq!(
1849            Time64NanosecondType::parse("2:10:01 PM"),
1850            Some(51_001_000_000_000)
1851        );
1852        assert_eq!(
1853            Time64NanosecondType::parse("2:10:01 pm"),
1854            Some(51_001_000_000_000)
1855        );
1856        assert_eq!(
1857            Time64NanosecondType::parse("02:10"),
1858            Some(7_800_000_000_000)
1859        );
1860        assert_eq!(Time64NanosecondType::parse("2:10"), Some(7_800_000_000_000));
1861        assert_eq!(
1862            Time64NanosecondType::parse("12:10 AM"),
1863            Some(600_000_000_000)
1864        );
1865        assert_eq!(
1866            Time64NanosecondType::parse("12:10 am"),
1867            Some(600_000_000_000)
1868        );
1869        assert_eq!(
1870            Time64NanosecondType::parse("2:10 PM"),
1871            Some(51_000_000_000_000)
1872        );
1873        assert_eq!(
1874            Time64NanosecondType::parse("2:10 pm"),
1875            Some(51_000_000_000_000)
1876        );
1877
1878        // parse directly as nanoseconds
1879        assert_eq!(Time64NanosecondType::parse("1"), Some(1));
1880
1881        // leap second
1882        assert_eq!(
1883            Time64NanosecondType::parse("23:59:60"),
1884            Some(86_400_000_000_000)
1885        );
1886
1887        // custom format
1888        assert_eq!(
1889            Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
1890            Some(7_801_123_456_700)
1891        );
1892    }
1893
1894    #[test]
1895    fn parse_time64_micros() {
1896        // expected formats
1897        assert_eq!(
1898            Time64MicrosecondType::parse("02:10:01.1234"),
1899            Some(7_801_123_400)
1900        );
1901        assert_eq!(
1902            Time64MicrosecondType::parse("2:10:01.1234"),
1903            Some(7_801_123_400)
1904        );
1905        assert_eq!(
1906            Time64MicrosecondType::parse("12:10:01.123456 AM"),
1907            Some(601_123_456)
1908        );
1909        assert_eq!(
1910            Time64MicrosecondType::parse("12:10:01.123456 am"),
1911            Some(601_123_456)
1912        );
1913        assert_eq!(
1914            Time64MicrosecondType::parse("2:10:01.12345 PM"),
1915            Some(51_001_123_450)
1916        );
1917        assert_eq!(
1918            Time64MicrosecondType::parse("2:10:01.12345 pm"),
1919            Some(51_001_123_450)
1920        );
1921        assert_eq!(
1922            Time64MicrosecondType::parse("02:10:01"),
1923            Some(7_801_000_000)
1924        );
1925        assert_eq!(Time64MicrosecondType::parse("2:10:01"), Some(7_801_000_000));
1926        assert_eq!(
1927            Time64MicrosecondType::parse("12:10:01 AM"),
1928            Some(601_000_000)
1929        );
1930        assert_eq!(
1931            Time64MicrosecondType::parse("12:10:01 am"),
1932            Some(601_000_000)
1933        );
1934        assert_eq!(
1935            Time64MicrosecondType::parse("2:10:01 PM"),
1936            Some(51_001_000_000)
1937        );
1938        assert_eq!(
1939            Time64MicrosecondType::parse("2:10:01 pm"),
1940            Some(51_001_000_000)
1941        );
1942        assert_eq!(Time64MicrosecondType::parse("02:10"), Some(7_800_000_000));
1943        assert_eq!(Time64MicrosecondType::parse("2:10"), Some(7_800_000_000));
1944        assert_eq!(Time64MicrosecondType::parse("12:10 AM"), Some(600_000_000));
1945        assert_eq!(Time64MicrosecondType::parse("12:10 am"), Some(600_000_000));
1946        assert_eq!(
1947            Time64MicrosecondType::parse("2:10 PM"),
1948            Some(51_000_000_000)
1949        );
1950        assert_eq!(
1951            Time64MicrosecondType::parse("2:10 pm"),
1952            Some(51_000_000_000)
1953        );
1954
1955        // parse directly as microseconds
1956        assert_eq!(Time64MicrosecondType::parse("1"), Some(1));
1957
1958        // leap second
1959        assert_eq!(
1960            Time64MicrosecondType::parse("23:59:60"),
1961            Some(86_400_000_000)
1962        );
1963
1964        // custom format
1965        assert_eq!(
1966            Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
1967            Some(7_801_123_400)
1968        );
1969    }
1970
1971    #[test]
1972    fn parse_time32_millis() {
1973        // expected formats
1974        assert_eq!(Time32MillisecondType::parse("02:10:01.1"), Some(7_801_100));
1975        assert_eq!(Time32MillisecondType::parse("2:10:01.1"), Some(7_801_100));
1976        assert_eq!(
1977            Time32MillisecondType::parse("12:10:01.123 AM"),
1978            Some(601_123)
1979        );
1980        assert_eq!(
1981            Time32MillisecondType::parse("12:10:01.123 am"),
1982            Some(601_123)
1983        );
1984        assert_eq!(
1985            Time32MillisecondType::parse("2:10:01.12 PM"),
1986            Some(51_001_120)
1987        );
1988        assert_eq!(
1989            Time32MillisecondType::parse("2:10:01.12 pm"),
1990            Some(51_001_120)
1991        );
1992        assert_eq!(Time32MillisecondType::parse("02:10:01"), Some(7_801_000));
1993        assert_eq!(Time32MillisecondType::parse("2:10:01"), Some(7_801_000));
1994        assert_eq!(Time32MillisecondType::parse("12:10:01 AM"), Some(601_000));
1995        assert_eq!(Time32MillisecondType::parse("12:10:01 am"), Some(601_000));
1996        assert_eq!(Time32MillisecondType::parse("2:10:01 PM"), Some(51_001_000));
1997        assert_eq!(Time32MillisecondType::parse("2:10:01 pm"), Some(51_001_000));
1998        assert_eq!(Time32MillisecondType::parse("02:10"), Some(7_800_000));
1999        assert_eq!(Time32MillisecondType::parse("2:10"), Some(7_800_000));
2000        assert_eq!(Time32MillisecondType::parse("12:10 AM"), Some(600_000));
2001        assert_eq!(Time32MillisecondType::parse("12:10 am"), Some(600_000));
2002        assert_eq!(Time32MillisecondType::parse("2:10 PM"), Some(51_000_000));
2003        assert_eq!(Time32MillisecondType::parse("2:10 pm"), Some(51_000_000));
2004
2005        // parse directly as milliseconds
2006        assert_eq!(Time32MillisecondType::parse("1"), Some(1));
2007
2008        // leap second
2009        assert_eq!(Time32MillisecondType::parse("23:59:60"), Some(86_400_000));
2010
2011        // custom format
2012        assert_eq!(
2013            Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
2014            Some(7_801_100)
2015        );
2016    }
2017
2018    #[test]
2019    fn parse_time32_secs() {
2020        // expected formats
2021        assert_eq!(Time32SecondType::parse("02:10:01.1"), Some(7_801));
2022        assert_eq!(Time32SecondType::parse("02:10:01"), Some(7_801));
2023        assert_eq!(Time32SecondType::parse("2:10:01"), Some(7_801));
2024        assert_eq!(Time32SecondType::parse("12:10:01 AM"), Some(601));
2025        assert_eq!(Time32SecondType::parse("12:10:01 am"), Some(601));
2026        assert_eq!(Time32SecondType::parse("2:10:01 PM"), Some(51_001));
2027        assert_eq!(Time32SecondType::parse("2:10:01 pm"), Some(51_001));
2028        assert_eq!(Time32SecondType::parse("02:10"), Some(7_800));
2029        assert_eq!(Time32SecondType::parse("2:10"), Some(7_800));
2030        assert_eq!(Time32SecondType::parse("12:10 AM"), Some(600));
2031        assert_eq!(Time32SecondType::parse("12:10 am"), Some(600));
2032        assert_eq!(Time32SecondType::parse("2:10 PM"), Some(51_000));
2033        assert_eq!(Time32SecondType::parse("2:10 pm"), Some(51_000));
2034
2035        // parse directly as seconds
2036        assert_eq!(Time32SecondType::parse("1"), Some(1));
2037
2038        // leap second
2039        assert_eq!(Time32SecondType::parse("23:59:60"), Some(86400));
2040
2041        // custom format
2042        assert_eq!(
2043            Time32SecondType::parse_formatted("02 - 10 - 01", "%H - %M - %S"),
2044            Some(7_801)
2045        );
2046    }
2047
2048    #[test]
2049    fn test_string_to_time_invalid() {
2050        let cases = [
2051            "25:00",
2052            "9:00:",
2053            "009:00",
2054            "09:0:00",
2055            "25:00:00",
2056            "13:00 AM",
2057            "13:00 PM",
2058            "12:00. AM",
2059            "09:0:00",
2060            "09:01:0",
2061            "09:01:1",
2062            "9:1:0",
2063            "09:01:0",
2064            "1:00.123",
2065            "1:00:00.123f",
2066            " 9:00:00",
2067            ":09:00",
2068            "T9:00:00",
2069            "AM",
2070        ];
2071        for case in cases {
2072            assert!(string_to_time(case).is_none(), "{case}");
2073        }
2074    }
2075
2076    #[test]
2077    fn test_string_to_time_chrono() {
2078        let cases = [
2079            ("1:00", "%H:%M"),
2080            ("12:00", "%H:%M"),
2081            ("13:00", "%H:%M"),
2082            ("24:00", "%H:%M"),
2083            ("1:00:00", "%H:%M:%S"),
2084            ("12:00:30", "%H:%M:%S"),
2085            ("13:00:59", "%H:%M:%S"),
2086            ("24:00:60", "%H:%M:%S"),
2087            ("09:00:00", "%H:%M:%S%.f"),
2088            ("0:00:30.123456", "%H:%M:%S%.f"),
2089            ("0:00 AM", "%I:%M %P"),
2090            ("1:00 AM", "%I:%M %P"),
2091            ("12:00 AM", "%I:%M %P"),
2092            ("13:00 AM", "%I:%M %P"),
2093            ("0:00 PM", "%I:%M %P"),
2094            ("1:00 PM", "%I:%M %P"),
2095            ("12:00 PM", "%I:%M %P"),
2096            ("13:00 PM", "%I:%M %P"),
2097            ("1:00 pM", "%I:%M %P"),
2098            ("1:00 Pm", "%I:%M %P"),
2099            ("1:00 aM", "%I:%M %P"),
2100            ("1:00 Am", "%I:%M %P"),
2101            ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2102            ("1:00:30.123456789 PM", "%I:%M:%S%.f %P"),
2103            ("1:00:30.123456789123 PM", "%I:%M:%S%.f %P"),
2104            ("1:00:30.1234 PM", "%I:%M:%S%.f %P"),
2105            ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2106            ("1:00:30.123456789123456789 PM", "%I:%M:%S%.f %P"),
2107            ("1:00:30.12F456 PM", "%I:%M:%S%.f %P"),
2108        ];
2109        for (s, format) in cases {
2110            let chrono = NaiveTime::parse_from_str(s, format).ok();
2111            let custom = string_to_time(s);
2112            assert_eq!(chrono, custom, "{s}");
2113        }
2114    }
2115
2116    #[test]
2117    fn test_parse_interval() {
2118        let config = IntervalParseConfig::new(IntervalUnit::Month);
2119
2120        assert_eq!(
2121            Interval::new(1i32, 0i32, 0i64),
2122            Interval::parse("1 month", &config).unwrap(),
2123        );
2124
2125        assert_eq!(
2126            Interval::new(2i32, 0i32, 0i64),
2127            Interval::parse("2 month", &config).unwrap(),
2128        );
2129
2130        assert_eq!(
2131            Interval::new(-1i32, -18i32, -(NANOS_PER_DAY / 5)),
2132            Interval::parse("-1.5 months -3.2 days", &config).unwrap(),
2133        );
2134
2135        assert_eq!(
2136            Interval::new(0i32, 15i32, 0),
2137            Interval::parse("0.5 months", &config).unwrap(),
2138        );
2139
2140        assert_eq!(
2141            Interval::new(0i32, 15i32, 0),
2142            Interval::parse(".5 months", &config).unwrap(),
2143        );
2144
2145        assert_eq!(
2146            Interval::new(0i32, -15i32, 0),
2147            Interval::parse("-0.5 months", &config).unwrap(),
2148        );
2149
2150        assert_eq!(
2151            Interval::new(0i32, -15i32, 0),
2152            Interval::parse("-.5 months", &config).unwrap(),
2153        );
2154
2155        assert_eq!(
2156            Interval::new(2i32, 10i32, 9 * NANOS_PER_HOUR),
2157            Interval::parse("2.1 months 7.25 days 3 hours", &config).unwrap(),
2158        );
2159
2160        assert_eq!(
2161            Interval::parse("1 centurys 1 month", &config)
2162                .unwrap_err()
2163                .to_string(),
2164            r#"Parser error: Invalid input syntax for type interval: "1 centurys 1 month""#
2165        );
2166
2167        assert_eq!(
2168            Interval::new(37i32, 0i32, 0i64),
2169            Interval::parse("3 year 1 month", &config).unwrap(),
2170        );
2171
2172        assert_eq!(
2173            Interval::new(35i32, 0i32, 0i64),
2174            Interval::parse("3 year -1 month", &config).unwrap(),
2175        );
2176
2177        assert_eq!(
2178            Interval::new(-37i32, 0i32, 0i64),
2179            Interval::parse("-3 year -1 month", &config).unwrap(),
2180        );
2181
2182        assert_eq!(
2183            Interval::new(-35i32, 0i32, 0i64),
2184            Interval::parse("-3 year 1 month", &config).unwrap(),
2185        );
2186
2187        assert_eq!(
2188            Interval::new(0i32, 5i32, 0i64),
2189            Interval::parse("5 days", &config).unwrap(),
2190        );
2191
2192        assert_eq!(
2193            Interval::new(0i32, 7i32, 3 * NANOS_PER_HOUR),
2194            Interval::parse("7 days 3 hours", &config).unwrap(),
2195        );
2196
2197        assert_eq!(
2198            Interval::new(0i32, 7i32, 5 * NANOS_PER_MINUTE),
2199            Interval::parse("7 days 5 minutes", &config).unwrap(),
2200        );
2201
2202        assert_eq!(
2203            Interval::new(0i32, 7i32, -5 * NANOS_PER_MINUTE),
2204            Interval::parse("7 days -5 minutes", &config).unwrap(),
2205        );
2206
2207        assert_eq!(
2208            Interval::new(0i32, -7i32, 5 * NANOS_PER_HOUR),
2209            Interval::parse("-7 days 5 hours", &config).unwrap(),
2210        );
2211
2212        assert_eq!(
2213            Interval::new(
2214                0i32,
2215                -7i32,
2216                -5 * NANOS_PER_HOUR - 5 * NANOS_PER_MINUTE - 5 * NANOS_PER_SECOND
2217            ),
2218            Interval::parse("-7 days -5 hours -5 minutes -5 seconds", &config).unwrap(),
2219        );
2220
2221        assert_eq!(
2222            Interval::new(12i32, 0i32, 25 * NANOS_PER_MILLIS),
2223            Interval::parse("1 year 25 millisecond", &config).unwrap(),
2224        );
2225
2226        assert_eq!(
2227            Interval::new(
2228                12i32,
2229                1i32,
2230                (NANOS_PER_SECOND as f64 * 0.000000001_f64) as i64
2231            ),
2232            Interval::parse("1 year 1 day 0.000000001 seconds", &config).unwrap(),
2233        );
2234
2235        assert_eq!(
2236            Interval::new(12i32, 1i32, NANOS_PER_MILLIS / 10),
2237            Interval::parse("1 year 1 day 0.1 milliseconds", &config).unwrap(),
2238        );
2239
2240        assert_eq!(
2241            Interval::new(12i32, 1i32, 1000i64),
2242            Interval::parse("1 year 1 day 1 microsecond", &config).unwrap(),
2243        );
2244
2245        assert_eq!(
2246            Interval::new(12i32, 1i32, 1i64),
2247            Interval::parse("1 year 1 day 1 nanoseconds", &config).unwrap(),
2248        );
2249
2250        assert_eq!(
2251            Interval::new(1i32, 0i32, -NANOS_PER_SECOND),
2252            Interval::parse("1 month -1 second", &config).unwrap(),
2253        );
2254
2255        assert_eq!(
2256            Interval::new(
2257                -13i32,
2258                -8i32,
2259                -NANOS_PER_HOUR
2260                    - NANOS_PER_MINUTE
2261                    - NANOS_PER_SECOND
2262                    - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2263            ),
2264            Interval::parse(
2265                "-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
2266                &config
2267            )
2268            .unwrap(),
2269        );
2270
2271        // no units
2272        assert_eq!(
2273            Interval::new(1, 0, 0),
2274            Interval::parse("1", &config).unwrap()
2275        );
2276        assert_eq!(
2277            Interval::new(42, 0, 0),
2278            Interval::parse("42", &config).unwrap()
2279        );
2280        assert_eq!(
2281            Interval::new(0, 0, 42_000_000_000),
2282            Interval::parse("42", &IntervalParseConfig::new(IntervalUnit::Second)).unwrap()
2283        );
2284
2285        // shorter units
2286        assert_eq!(
2287            Interval::new(1, 0, 0),
2288            Interval::parse("1 mon", &config).unwrap()
2289        );
2290        assert_eq!(
2291            Interval::new(1, 0, 0),
2292            Interval::parse("1 mons", &config).unwrap()
2293        );
2294        assert_eq!(
2295            Interval::new(0, 0, 1_000_000),
2296            Interval::parse("1 ms", &config).unwrap()
2297        );
2298        assert_eq!(
2299            Interval::new(0, 0, 1_000),
2300            Interval::parse("1 us", &config).unwrap()
2301        );
2302
2303        // no space
2304        assert_eq!(
2305            Interval::new(0, 0, 1_000),
2306            Interval::parse("1us", &config).unwrap()
2307        );
2308        assert_eq!(
2309            Interval::new(0, 0, NANOS_PER_SECOND),
2310            Interval::parse("1s", &config).unwrap()
2311        );
2312        assert_eq!(
2313            Interval::new(1, 2, 10_864_000_000_000),
2314            Interval::parse("1mon 2days 3hr 1min 4sec", &config).unwrap()
2315        );
2316
2317        assert_eq!(
2318            Interval::new(
2319                -13i32,
2320                -8i32,
2321                -NANOS_PER_HOUR
2322                    - NANOS_PER_MINUTE
2323                    - NANOS_PER_SECOND
2324                    - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2325            ),
2326            Interval::parse(
2327                "-1year -1month -1week -1day -1 hour -1 minute -1 second -1.11millisecond",
2328                &config
2329            )
2330            .unwrap(),
2331        );
2332
2333        assert_eq!(
2334            Interval::parse("1h s", &config).unwrap_err().to_string(),
2335            r#"Parser error: Invalid input syntax for type interval: "1h s""#
2336        );
2337
2338        assert_eq!(
2339            Interval::parse("1XX", &config).unwrap_err().to_string(),
2340            r#"Parser error: Invalid input syntax for type interval: "1XX""#
2341        );
2342    }
2343
2344    #[test]
2345    fn test_duplicate_interval_type() {
2346        let config = IntervalParseConfig::new(IntervalUnit::Month);
2347
2348        let err = Interval::parse("1 month 1 second 1 second", &config)
2349            .expect_err("parsing interval should have failed");
2350        assert_eq!(
2351            r#"ParseError("Invalid input syntax for type interval: \"1 month 1 second 1 second\". Repeated type 'second'")"#,
2352            format!("{err:?}")
2353        );
2354
2355        // test with singular and plural forms
2356        let err = Interval::parse("1 century 2 centuries", &config)
2357            .expect_err("parsing interval should have failed");
2358        assert_eq!(
2359            r#"ParseError("Invalid input syntax for type interval: \"1 century 2 centuries\". Repeated type 'centuries'")"#,
2360            format!("{err:?}")
2361        );
2362    }
2363
2364    #[test]
2365    fn test_interval_amount_parsing() {
2366        // integer
2367        let result = IntervalAmount::from_str("123").unwrap();
2368        let expected = IntervalAmount::new(123, 0);
2369
2370        assert_eq!(result, expected);
2371
2372        // positive w/ fractional
2373        let result = IntervalAmount::from_str("0.3").unwrap();
2374        let expected = IntervalAmount::new(0, 3 * 10_i64.pow(INTERVAL_PRECISION - 1));
2375
2376        assert_eq!(result, expected);
2377
2378        // negative w/ fractional
2379        let result = IntervalAmount::from_str("-3.5").unwrap();
2380        let expected = IntervalAmount::new(-3, -5 * 10_i64.pow(INTERVAL_PRECISION - 1));
2381
2382        assert_eq!(result, expected);
2383
2384        // invalid: missing fractional
2385        let result = IntervalAmount::from_str("3.");
2386        assert!(result.is_err());
2387
2388        // invalid: sign in fractional
2389        let result = IntervalAmount::from_str("3.-5");
2390        assert!(result.is_err());
2391    }
2392
2393    #[test]
2394    fn test_interval_precision() {
2395        let config = IntervalParseConfig::new(IntervalUnit::Month);
2396
2397        let result = Interval::parse("100000.1 days", &config).unwrap();
2398        let expected = Interval::new(0_i32, 100_000_i32, NANOS_PER_DAY / 10);
2399
2400        assert_eq!(result, expected);
2401    }
2402
2403    #[test]
2404    fn test_interval_addition() {
2405        // add 4.1 centuries
2406        let start = Interval::new(1, 2, 3);
2407        let expected = Interval::new(4921, 2, 3);
2408
2409        let result = start
2410            .add(
2411                IntervalAmount::new(4, 10_i64.pow(INTERVAL_PRECISION - 1)),
2412                IntervalUnit::Century,
2413            )
2414            .unwrap();
2415
2416        assert_eq!(result, expected);
2417
2418        // add 10.25 decades
2419        let start = Interval::new(1, 2, 3);
2420        let expected = Interval::new(1231, 2, 3);
2421
2422        let result = start
2423            .add(
2424                IntervalAmount::new(10, 25 * 10_i64.pow(INTERVAL_PRECISION - 2)),
2425                IntervalUnit::Decade,
2426            )
2427            .unwrap();
2428
2429        assert_eq!(result, expected);
2430
2431        // add 30.3 years (reminder: Postgres logic does not spill to days/nanos when interval is larger than a month)
2432        let start = Interval::new(1, 2, 3);
2433        let expected = Interval::new(364, 2, 3);
2434
2435        let result = start
2436            .add(
2437                IntervalAmount::new(30, 3 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2438                IntervalUnit::Year,
2439            )
2440            .unwrap();
2441
2442        assert_eq!(result, expected);
2443
2444        // add 1.5 months
2445        let start = Interval::new(1, 2, 3);
2446        let expected = Interval::new(2, 17, 3);
2447
2448        let result = start
2449            .add(
2450                IntervalAmount::new(1, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2451                IntervalUnit::Month,
2452            )
2453            .unwrap();
2454
2455        assert_eq!(result, expected);
2456
2457        // add -2 weeks
2458        let start = Interval::new(1, 25, 3);
2459        let expected = Interval::new(1, 11, 3);
2460
2461        let result = start
2462            .add(IntervalAmount::new(-2, 0), IntervalUnit::Week)
2463            .unwrap();
2464
2465        assert_eq!(result, expected);
2466
2467        // add 2.2 days
2468        let start = Interval::new(12, 15, 3);
2469        let expected = Interval::new(12, 17, 3 + 17_280 * NANOS_PER_SECOND);
2470
2471        let result = start
2472            .add(
2473                IntervalAmount::new(2, 2 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2474                IntervalUnit::Day,
2475            )
2476            .unwrap();
2477
2478        assert_eq!(result, expected);
2479
2480        // add 12.5 hours
2481        let start = Interval::new(1, 2, 3);
2482        let expected = Interval::new(1, 2, 3 + 45_000 * NANOS_PER_SECOND);
2483
2484        let result = start
2485            .add(
2486                IntervalAmount::new(12, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2487                IntervalUnit::Hour,
2488            )
2489            .unwrap();
2490
2491        assert_eq!(result, expected);
2492
2493        // add -1.5 minutes
2494        let start = Interval::new(0, 0, -3);
2495        let expected = Interval::new(0, 0, -90_000_000_000 - 3);
2496
2497        let result = start
2498            .add(
2499                IntervalAmount::new(-1, -5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2500                IntervalUnit::Minute,
2501            )
2502            .unwrap();
2503
2504        assert_eq!(result, expected);
2505    }
2506
2507    #[test]
2508    fn string_to_timestamp_old() {
2509        parse_timestamp("1677-06-14T07:29:01.256")
2510            .map_err(|e| assert!(e.to_string().ends_with(ERR_NANOSECONDS_NOT_SUPPORTED)))
2511            .unwrap_err();
2512    }
2513
2514    #[test]
2515    fn test_parse_decimal_with_parameter() {
2516        let tests = [
2517            ("0", 0i128),
2518            ("123.123", 123123i128),
2519            ("123.1234", 123123i128),
2520            ("123.1", 123100i128),
2521            ("123", 123000i128),
2522            ("-123.123", -123123i128),
2523            ("-123.1234", -123123i128),
2524            ("-123.1", -123100i128),
2525            ("-123", -123000i128),
2526            ("0.0000123", 0i128),
2527            ("12.", 12000i128),
2528            ("-12.", -12000i128),
2529            ("00.1", 100i128),
2530            ("-00.1", -100i128),
2531            ("12345678912345678.1234", 12345678912345678123i128),
2532            ("-12345678912345678.1234", -12345678912345678123i128),
2533            ("99999999999999999.999", 99999999999999999999i128),
2534            ("-99999999999999999.999", -99999999999999999999i128),
2535            (".123", 123i128),
2536            ("-.123", -123i128),
2537            ("123.", 123000i128),
2538            ("-123.", -123000i128),
2539        ];
2540        for (s, i) in tests {
2541            let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2542            assert_eq!(i, result_128.unwrap());
2543            let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2544            assert_eq!(i256::from_i128(i), result_256.unwrap());
2545        }
2546
2547        let e_notation_tests = [
2548            ("1.23e3", "1230.0", 2),
2549            ("5.6714e+2", "567.14", 4),
2550            ("5.6714e-2", "0.056714", 4),
2551            ("5.6714e-2", "0.056714", 3),
2552            ("5.6741214125e2", "567.41214125", 4),
2553            ("8.91E4", "89100.0", 2),
2554            ("3.14E+5", "314000.0", 2),
2555            ("2.718e0", "2.718", 2),
2556            ("9.999999e-1", "0.9999999", 4),
2557            ("1.23e+3", "1230", 2),
2558            ("1.234559e+3", "1234.559", 2),
2559            ("1.00E-10", "0.0000000001", 11),
2560            ("1.23e-4", "0.000123", 2),
2561            ("9.876e7", "98760000.0", 2),
2562            ("5.432E+8", "543200000.0", 10),
2563            ("1.234567e9", "1234567000.0", 2),
2564            ("1.234567e2", "123.45670000", 2),
2565            ("4749.3e-5", "0.047493", 10),
2566            ("4749.3e+5", "474930000", 10),
2567            ("4749.3e-5", "0.047493", 1),
2568            ("4749.3e+5", "474930000", 1),
2569            ("0E-8", "0", 10),
2570            ("0E+6", "0", 10),
2571            ("1E-8", "0.00000001", 10),
2572            ("12E+6", "12000000", 10),
2573            ("12E-6", "0.000012", 10),
2574            ("0.1e-6", "0.0000001", 10),
2575            ("0.1e+6", "100000", 10),
2576            ("0.12e-6", "0.00000012", 10),
2577            ("0.12e+6", "120000", 10),
2578            ("000000000001e0", "000000000001", 3),
2579            ("000001.1034567002e0", "000001.1034567002", 3),
2580            ("1.234e16", "12340000000000000", 0),
2581            ("123.4e16", "1234000000000000000", 0),
2582        ];
2583        for (e, d, scale) in e_notation_tests {
2584            let result_128_e = parse_decimal::<Decimal128Type>(e, 20, scale);
2585            let result_128_d = parse_decimal::<Decimal128Type>(d, 20, scale);
2586            assert_eq!(result_128_e.unwrap(), result_128_d.unwrap());
2587            let result_256_e = parse_decimal::<Decimal256Type>(e, 20, scale);
2588            let result_256_d = parse_decimal::<Decimal256Type>(d, 20, scale);
2589            assert_eq!(result_256_e.unwrap(), result_256_d.unwrap());
2590        }
2591        let can_not_parse_tests = [
2592            "123,123",
2593            ".",
2594            "123.123.123",
2595            "",
2596            "+",
2597            "-",
2598            "e",
2599            "1.3e+e3",
2600            "5.6714ee-2",
2601            "4.11ee-+4",
2602            "4.11e++4",
2603            "1.1e.12",
2604            "1.23e+3.",
2605            "1.23e+3.1",
2606        ];
2607        for s in can_not_parse_tests {
2608            let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2609            assert_eq!(
2610                format!("Parser error: can't parse the string value {s} to decimal"),
2611                result_128.unwrap_err().to_string()
2612            );
2613            let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2614            assert_eq!(
2615                format!("Parser error: can't parse the string value {s} to decimal"),
2616                result_256.unwrap_err().to_string()
2617            );
2618        }
2619        let overflow_parse_tests = [
2620            ("12345678", 3),
2621            ("1.2345678e7", 3),
2622            ("12345678.9", 3),
2623            ("1.23456789e+7", 3),
2624            ("99999999.99", 3),
2625            ("9.999999999e7", 3),
2626            ("12345678908765.123456", 3),
2627            ("123456789087651234.56e-4", 3),
2628            ("1234560000000", 0),
2629            ("1.23456e12", 0),
2630        ];
2631        for (s, scale) in overflow_parse_tests {
2632            let result_128 = parse_decimal::<Decimal128Type>(s, 10, scale);
2633            let expected_128 = "Parser error: parse decimal overflow";
2634            let actual_128 = result_128.unwrap_err().to_string();
2635
2636            assert!(
2637                actual_128.contains(expected_128),
2638                "actual: '{actual_128}', expected: '{expected_128}'"
2639            );
2640
2641            let result_256 = parse_decimal::<Decimal256Type>(s, 10, scale);
2642            let expected_256 = "Parser error: parse decimal overflow";
2643            let actual_256 = result_256.unwrap_err().to_string();
2644
2645            assert!(
2646                actual_256.contains(expected_256),
2647                "actual: '{actual_256}', expected: '{expected_256}'"
2648            );
2649        }
2650
2651        let edge_tests_128 = [
2652            (
2653                "99999999999999999999999999999999999999",
2654                99999999999999999999999999999999999999i128,
2655                0,
2656            ),
2657            (
2658                "999999999999999999999999999999999999.99",
2659                99999999999999999999999999999999999999i128,
2660                2,
2661            ),
2662            (
2663                "9999999999999999999999999.9999999999999",
2664                99999999999999999999999999999999999999i128,
2665                13,
2666            ),
2667            (
2668                "9999999999999999999999999",
2669                99999999999999999999999990000000000000i128,
2670                13,
2671            ),
2672            (
2673                "0.99999999999999999999999999999999999999",
2674                99999999999999999999999999999999999999i128,
2675                38,
2676            ),
2677            (
2678                "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001016744",
2679                0i128,
2680                15,
2681            ),
2682            (
2683                "1.016744e-320",
2684                0i128,
2685                15,
2686            ),
2687            (
2688                "-1e3",
2689                -1000000000i128,
2690                6,
2691            ),
2692            (
2693                "+1e3",
2694                1000000000i128,
2695                6,
2696            ),
2697            (
2698                "-1e31",
2699                -10000000000000000000000000000000000000i128,
2700                6,
2701            ),
2702        ];
2703        for (s, i, scale) in edge_tests_128 {
2704            let result_128 = parse_decimal::<Decimal128Type>(s, 38, scale);
2705            assert_eq!(i, result_128.unwrap());
2706        }
2707        let edge_tests_256 = [
2708            (
2709                "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2710                i256::from_string(
2711                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2712                )
2713                .unwrap(),
2714                0,
2715            ),
2716            (
2717                "999999999999999999999999999999999999999999999999999999999999999999999999.9999",
2718                i256::from_string(
2719                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2720                )
2721                .unwrap(),
2722                4,
2723            ),
2724            (
2725                "99999999999999999999999999999999999999999999999999.99999999999999999999999999",
2726                i256::from_string(
2727                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2728                )
2729                .unwrap(),
2730                26,
2731            ),
2732            (
2733                "9.999999999999999999999999999999999999999999999999999999999999999999999999999e49",
2734                i256::from_string(
2735                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2736                )
2737                .unwrap(),
2738                26,
2739            ),
2740            (
2741                "99999999999999999999999999999999999999999999999999",
2742                i256::from_string(
2743                    "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2744                )
2745                .unwrap(),
2746                26,
2747            ),
2748            (
2749                "9.9999999999999999999999999999999999999999999999999e+49",
2750                i256::from_string(
2751                    "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2752                )
2753                .unwrap(),
2754                26,
2755            ),
2756        ];
2757        for (s, i, scale) in edge_tests_256 {
2758            let result = parse_decimal::<Decimal256Type>(s, 76, scale);
2759            assert_eq!(i, result.unwrap());
2760        }
2761    }
2762
2763    #[test]
2764    fn test_parse_empty() {
2765        assert_eq!(Int32Type::parse(""), None);
2766        assert_eq!(Int64Type::parse(""), None);
2767        assert_eq!(UInt32Type::parse(""), None);
2768        assert_eq!(UInt64Type::parse(""), None);
2769        assert_eq!(Float32Type::parse(""), None);
2770        assert_eq!(Float64Type::parse(""), None);
2771        assert_eq!(Int32Type::parse("+"), None);
2772        assert_eq!(Int64Type::parse("+"), None);
2773        assert_eq!(UInt32Type::parse("+"), None);
2774        assert_eq!(UInt64Type::parse("+"), None);
2775        assert_eq!(Float32Type::parse("+"), None);
2776        assert_eq!(Float64Type::parse("+"), None);
2777        assert_eq!(TimestampNanosecondType::parse(""), None);
2778        assert_eq!(Date32Type::parse(""), None);
2779    }
2780
2781    #[test]
2782    fn test_parse_interval_month_day_nano_config() {
2783        let interval = parse_interval_month_day_nano_config(
2784            "1",
2785            IntervalParseConfig::new(IntervalUnit::Second),
2786        )
2787        .unwrap();
2788        assert_eq!(interval.months, 0);
2789        assert_eq!(interval.days, 0);
2790        assert_eq!(interval.nanoseconds, NANOS_PER_SECOND);
2791    }
2792}