arrow_cast/
parse.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`Parser`] implementations for converting strings to Arrow types
19//!
20//! Used by the CSV and JSON readers to convert strings to Arrow types
21use arrow_array::ArrowNativeTypeOp;
22use arrow_array::timezone::Tz;
23use arrow_array::types::*;
24use arrow_buffer::ArrowNativeType;
25use arrow_schema::ArrowError;
26use chrono::prelude::*;
27use half::f16;
28use std::str::FromStr;
29
30/// Parse nanoseconds from the first `N` values in digits, subtracting the offset `O`
31#[inline]
32fn parse_nanos<const N: usize, const O: u8>(digits: &[u8]) -> u32 {
33    digits[..N]
34        .iter()
35        .fold(0_u32, |acc, v| acc * 10 + v.wrapping_sub(O) as u32)
36        * 10_u32.pow((9 - N) as _)
37}
38
39/// Helper for parsing RFC3339 timestamps
40struct TimestampParser {
41    /// The timestamp bytes to parse minus `b'0'`
42    ///
43    /// This makes interpretation as an integer inexpensive
44    digits: [u8; 32],
45    /// A mask containing a `1` bit where the corresponding byte is a valid ASCII digit
46    mask: u32,
47}
48
49impl TimestampParser {
50    fn new(bytes: &[u8]) -> Self {
51        let mut digits = [0; 32];
52        let mut mask = 0;
53
54        // Treating all bytes the same way, helps LLVM vectorise this correctly
55        for (idx, (o, i)) in digits.iter_mut().zip(bytes).enumerate() {
56            *o = i.wrapping_sub(b'0');
57            mask |= ((*o < 10) as u32) << idx
58        }
59
60        Self { digits, mask }
61    }
62
63    /// Returns true if the byte at `idx` in the original string equals `b`
64    fn test(&self, idx: usize, b: u8) -> bool {
65        self.digits[idx] == b.wrapping_sub(b'0')
66    }
67
68    /// Parses a date of the form `1997-01-31`
69    fn date(&self) -> Option<NaiveDate> {
70        if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
71            return None;
72        }
73
74        let year = self.digits[0] as u16 * 1000
75            + self.digits[1] as u16 * 100
76            + self.digits[2] as u16 * 10
77            + self.digits[3] as u16;
78
79        let month = self.digits[5] * 10 + self.digits[6];
80        let day = self.digits[8] * 10 + self.digits[9];
81
82        NaiveDate::from_ymd_opt(year as _, month as _, day as _)
83    }
84
85    /// Parses a time of any of forms
86    /// - `09:26:56`
87    /// - `09:26:56.123`
88    /// - `09:26:56.123456`
89    /// - `09:26:56.123456789`
90    /// - `092656`
91    ///
92    /// Returning the end byte offset
93    fn time(&self) -> Option<(NaiveTime, usize)> {
94        // Make a NaiveTime handling leap seconds
95        let time = |hour, min, sec, nano| match sec {
96            60 => {
97                let nano = 1_000_000_000 + nano;
98                NaiveTime::from_hms_nano_opt(hour as _, min as _, 59, nano)
99            }
100            _ => NaiveTime::from_hms_nano_opt(hour as _, min as _, sec as _, nano),
101        };
102
103        match (self.mask >> 11) & 0b11111111 {
104            // 09:26:56
105            0b11011011 if self.test(13, b':') && self.test(16, b':') => {
106                let hour = self.digits[11] * 10 + self.digits[12];
107                let minute = self.digits[14] * 10 + self.digits[15];
108                let second = self.digits[17] * 10 + self.digits[18];
109
110                match self.test(19, b'.') {
111                    true => {
112                        let digits = (self.mask >> 20).trailing_ones();
113                        let nanos = match digits {
114                            0 => return None,
115                            1 => parse_nanos::<1, 0>(&self.digits[20..21]),
116                            2 => parse_nanos::<2, 0>(&self.digits[20..22]),
117                            3 => parse_nanos::<3, 0>(&self.digits[20..23]),
118                            4 => parse_nanos::<4, 0>(&self.digits[20..24]),
119                            5 => parse_nanos::<5, 0>(&self.digits[20..25]),
120                            6 => parse_nanos::<6, 0>(&self.digits[20..26]),
121                            7 => parse_nanos::<7, 0>(&self.digits[20..27]),
122                            8 => parse_nanos::<8, 0>(&self.digits[20..28]),
123                            _ => parse_nanos::<9, 0>(&self.digits[20..29]),
124                        };
125                        Some((time(hour, minute, second, nanos)?, 20 + digits as usize))
126                    }
127                    false => Some((time(hour, minute, second, 0)?, 19)),
128                }
129            }
130            // 092656
131            0b111111 => {
132                let hour = self.digits[11] * 10 + self.digits[12];
133                let minute = self.digits[13] * 10 + self.digits[14];
134                let second = self.digits[15] * 10 + self.digits[16];
135                let time = time(hour, minute, second, 0)?;
136                Some((time, 17))
137            }
138            _ => None,
139        }
140    }
141}
142
143/// Accepts a string and parses it relative to the provided `timezone`
144///
145/// In addition to RFC3339 / ISO8601 standard timestamps, it also
146/// accepts strings that use a space ` ` to separate the date and time
147/// as well as strings that have no explicit timezone offset.
148///
149/// Examples of accepted inputs:
150/// * `1997-01-31T09:26:56.123Z`        # RCF3339
151/// * `1997-01-31T09:26:56.123-05:00`   # RCF3339
152/// * `1997-01-31 09:26:56.123-05:00`   # close to RCF3339 but with a space rather than T
153/// * `2023-01-01 04:05:06.789 -08`     # close to RCF3339, no fractional seconds or time separator
154/// * `1997-01-31T09:26:56.123`         # close to RCF3339 but no timezone offset specified
155/// * `1997-01-31 09:26:56.123`         # close to RCF3339 but uses a space and no timezone offset
156/// * `1997-01-31 09:26:56`             # close to RCF3339, no fractional seconds
157/// * `1997-01-31 092656`               # close to RCF3339, no fractional seconds
158/// * `1997-01-31 092656+04:00`         # close to RCF3339, no fractional seconds or time separator
159/// * `1997-01-31`                      # close to RCF3339, only date no time
160///
161/// [IANA timezones] are only supported if the `arrow-array/chrono-tz` feature is enabled
162///
163/// * `2023-01-01 040506 America/Los_Angeles`
164///
165/// If a timestamp is ambiguous, for example as a result of daylight-savings time, an error
166/// will be returned
167///
168/// Some formats supported by PostgresSql <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-DATETIME-TIME-TABLE>
169/// are not supported, like
170///
171/// * "2023-01-01 04:05:06.789 +07:30:00",
172/// * "2023-01-01 040506 +07:30:00",
173/// * "2023-01-01 04:05:06.789 PST",
174///
175/// [IANA timezones]: https://www.iana.org/time-zones
176pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
177    let err =
178        |ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
179
180    let bytes = s.as_bytes();
181    if bytes.len() < 10 {
182        return Err(err("timestamp must contain at least 10 characters"));
183    }
184
185    let parser = TimestampParser::new(bytes);
186    let date = parser.date().ok_or_else(|| err("error parsing date"))?;
187    if bytes.len() == 10 {
188        let datetime = date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap());
189        return timezone
190            .from_local_datetime(&datetime)
191            .single()
192            .ok_or_else(|| err("error computing timezone offset"));
193    }
194
195    if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') {
196        return Err(err("invalid timestamp separator"));
197    }
198
199    let (time, mut tz_offset) = parser.time().ok_or_else(|| err("error parsing time"))?;
200    let datetime = date.and_time(time);
201
202    if tz_offset == 32 {
203        // Decimal overrun
204        while tz_offset < bytes.len() && bytes[tz_offset].is_ascii_digit() {
205            tz_offset += 1;
206        }
207    }
208
209    if bytes.len() <= tz_offset {
210        return timezone
211            .from_local_datetime(&datetime)
212            .single()
213            .ok_or_else(|| err("error computing timezone offset"));
214    }
215
216    if (bytes[tz_offset] == b'z' || bytes[tz_offset] == b'Z') && tz_offset == bytes.len() - 1 {
217        return Ok(timezone.from_utc_datetime(&datetime));
218    }
219
220    // Parse remainder of string as timezone
221    let parsed_tz: Tz = s[tz_offset..].trim_start().parse()?;
222    let parsed = parsed_tz
223        .from_local_datetime(&datetime)
224        .single()
225        .ok_or_else(|| err("error computing timezone offset"))?;
226
227    Ok(parsed.with_timezone(timezone))
228}
229
230/// Accepts a string in RFC3339 / ISO8601 standard format and some
231/// variants and converts it to a nanosecond precision timestamp.
232///
233/// See [`string_to_datetime`] for the full set of supported formats
234///
235/// Implements the `to_timestamp` function to convert a string to a
236/// timestamp, following the model of spark SQL’s to_`timestamp`.
237///
238/// Internally, this function uses the `chrono` library for the
239/// datetime parsing
240///
241/// We hope to extend this function in the future with a second
242/// parameter to specifying the format string.
243///
244/// ## Timestamp Precision
245///
246/// Function uses the maximum precision timestamps supported by
247/// Arrow (nanoseconds stored as a 64-bit integer) timestamps. This
248/// means the range of dates that timestamps can represent is ~1677 AD
249/// to 2262 AM
250///
251/// ## Timezone / Offset Handling
252///
253/// Numerical values of timestamps are stored compared to offset UTC.
254///
255/// This function interprets string without an explicit time zone as timestamps
256/// relative to UTC, see [`string_to_datetime`] for alternative semantics
257///
258/// In particular:
259///
260/// ```
261/// # use arrow_cast::parse::string_to_timestamp_nanos;
262/// // Note all three of these timestamps are parsed as the same value
263/// let a = string_to_timestamp_nanos("1997-01-31 09:26:56.123Z").unwrap();
264/// let b = string_to_timestamp_nanos("1997-01-31T09:26:56.123").unwrap();
265/// let c = string_to_timestamp_nanos("1997-01-31T14:26:56.123+05:00").unwrap();
266///
267/// assert_eq!(a, b);
268/// assert_eq!(b, c);
269/// ```
270///
271#[inline]
272pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
273    to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc())
274}
275
276/// Fallible conversion of [`NaiveDateTime`] to `i64` nanoseconds
277#[inline]
278fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
279    dt.and_utc()
280        .timestamp_nanos_opt()
281        .ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string()))
282}
283
284/// Accepts a string in ISO8601 standard format and some
285/// variants and converts it to nanoseconds since midnight.
286///
287/// Examples of accepted inputs:
288///
289/// * `09:26:56.123 AM`
290/// * `23:59:59`
291/// * `6:00 pm`
292///
293/// Internally, this function uses the `chrono` library for the time parsing
294///
295/// ## Timezone / Offset Handling
296///
297/// This function does not support parsing strings with a timezone
298/// or offset specified, as it considers only time since midnight.
299pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
300    let nt = string_to_time(s)
301        .ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
302    Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
303}
304
305fn string_to_time(s: &str) -> Option<NaiveTime> {
306    let bytes = s.as_bytes();
307    if bytes.len() < 4 {
308        return None;
309    }
310
311    let (am, bytes) = match bytes.get(bytes.len() - 3..) {
312        Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
313        Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
314        _ => (None, bytes),
315    };
316
317    if bytes.len() < 4 {
318        return None;
319    }
320
321    let mut digits = [b'0'; 6];
322
323    // Extract hour
324    let bytes = match (bytes[1], bytes[2]) {
325        (b':', _) => {
326            digits[1] = bytes[0];
327            &bytes[2..]
328        }
329        (_, b':') => {
330            digits[0] = bytes[0];
331            digits[1] = bytes[1];
332            &bytes[3..]
333        }
334        _ => return None,
335    };
336
337    if bytes.len() < 2 {
338        return None; // Minutes required
339    }
340
341    // Extract minutes
342    digits[2] = bytes[0];
343    digits[3] = bytes[1];
344
345    let nanoseconds = match bytes.get(2) {
346        Some(b':') => {
347            if bytes.len() < 5 {
348                return None;
349            }
350
351            // Extract seconds
352            digits[4] = bytes[3];
353            digits[5] = bytes[4];
354
355            // Extract sub-seconds if any
356            match bytes.get(5) {
357                Some(b'.') => {
358                    let decimal = &bytes[6..];
359                    if decimal.iter().any(|x| !x.is_ascii_digit()) {
360                        return None;
361                    }
362                    match decimal.len() {
363                        0 => return None,
364                        1 => parse_nanos::<1, b'0'>(decimal),
365                        2 => parse_nanos::<2, b'0'>(decimal),
366                        3 => parse_nanos::<3, b'0'>(decimal),
367                        4 => parse_nanos::<4, b'0'>(decimal),
368                        5 => parse_nanos::<5, b'0'>(decimal),
369                        6 => parse_nanos::<6, b'0'>(decimal),
370                        7 => parse_nanos::<7, b'0'>(decimal),
371                        8 => parse_nanos::<8, b'0'>(decimal),
372                        _ => parse_nanos::<9, b'0'>(decimal),
373                    }
374                }
375                Some(_) => return None,
376                None => 0,
377            }
378        }
379        Some(_) => return None,
380        None => 0,
381    };
382
383    digits.iter_mut().for_each(|x| *x = x.wrapping_sub(b'0'));
384    if digits.iter().any(|x| *x > 9) {
385        return None;
386    }
387
388    let hour = match (digits[0] * 10 + digits[1], am) {
389        (12, Some(true)) => 0,               // 12:00 AM -> 00:00
390        (h @ 1..=11, Some(true)) => h,       // 1:00 AM -> 01:00
391        (12, Some(false)) => 12,             // 12:00 PM -> 12:00
392        (h @ 1..=11, Some(false)) => h + 12, // 1:00 PM -> 13:00
393        (_, Some(_)) => return None,
394        (h, None) => h,
395    };
396
397    // Handle leap second
398    let (second, nanoseconds) = match digits[4] * 10 + digits[5] {
399        60 => (59, nanoseconds + 1_000_000_000),
400        s => (s, nanoseconds),
401    };
402
403    NaiveTime::from_hms_nano_opt(
404        hour as _,
405        (digits[2] * 10 + digits[3]) as _,
406        second as _,
407        nanoseconds,
408    )
409}
410
411/// Specialized parsing implementations to convert strings to Arrow types.
412///
413/// This is used by csv and json reader and can be used directly as well.
414///
415/// # Example
416///
417/// To parse a string to a [`Date32Type`]:
418///
419/// ```
420/// use arrow_cast::parse::Parser;
421/// use arrow_array::types::Date32Type;
422/// let date = Date32Type::parse("2021-01-01").unwrap();
423/// assert_eq!(date, 18628);
424/// ```
425///
426/// To parse a string to a [`TimestampNanosecondType`]:
427///
428/// ```
429/// use arrow_cast::parse::Parser;
430/// use arrow_array::types::TimestampNanosecondType;
431/// let ts = TimestampNanosecondType::parse("2021-01-01T00:00:00.123456789Z").unwrap();
432/// assert_eq!(ts, 1609459200123456789);
433/// ```
434pub trait Parser: ArrowPrimitiveType {
435    /// Parse a string to the native type
436    fn parse(string: &str) -> Option<Self::Native>;
437
438    /// Parse a string to the native type with a format string
439    ///
440    /// When not implemented, the format string is unused, and this method is equivalent to [parse](#tymethod.parse)
441    fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
442        Self::parse(string)
443    }
444}
445
446impl Parser for Float16Type {
447    fn parse(string: &str) -> Option<f16> {
448        lexical_core::parse(string.as_bytes())
449            .ok()
450            .map(f16::from_f32)
451    }
452}
453
454impl Parser for Float32Type {
455    fn parse(string: &str) -> Option<f32> {
456        lexical_core::parse(string.as_bytes()).ok()
457    }
458}
459
460impl Parser for Float64Type {
461    fn parse(string: &str) -> Option<f64> {
462        lexical_core::parse(string.as_bytes()).ok()
463    }
464}
465
466macro_rules! parser_primitive {
467    ($t:ty) => {
468        impl Parser for $t {
469            fn parse(string: &str) -> Option<Self::Native> {
470                if !string.as_bytes().last().is_some_and(|x| x.is_ascii_digit()) {
471                    return None;
472                }
473                match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
474                    string.as_bytes(),
475                ) {
476                    (Some(n), x) if x == string.len() => Some(n),
477                    _ => None,
478                }
479            }
480        }
481    };
482}
483parser_primitive!(UInt64Type);
484parser_primitive!(UInt32Type);
485parser_primitive!(UInt16Type);
486parser_primitive!(UInt8Type);
487parser_primitive!(Int64Type);
488parser_primitive!(Int32Type);
489parser_primitive!(Int16Type);
490parser_primitive!(Int8Type);
491parser_primitive!(DurationNanosecondType);
492parser_primitive!(DurationMicrosecondType);
493parser_primitive!(DurationMillisecondType);
494parser_primitive!(DurationSecondType);
495
496impl Parser for TimestampNanosecondType {
497    fn parse(string: &str) -> Option<i64> {
498        string_to_timestamp_nanos(string).ok()
499    }
500}
501
502impl Parser for TimestampMicrosecondType {
503    fn parse(string: &str) -> Option<i64> {
504        let nanos = string_to_timestamp_nanos(string).ok();
505        nanos.map(|x| x / 1000)
506    }
507}
508
509impl Parser for TimestampMillisecondType {
510    fn parse(string: &str) -> Option<i64> {
511        let nanos = string_to_timestamp_nanos(string).ok();
512        nanos.map(|x| x / 1_000_000)
513    }
514}
515
516impl Parser for TimestampSecondType {
517    fn parse(string: &str) -> Option<i64> {
518        let nanos = string_to_timestamp_nanos(string).ok();
519        nanos.map(|x| x / 1_000_000_000)
520    }
521}
522
523impl Parser for Time64NanosecondType {
524    // Will truncate any fractions of a nanosecond
525    fn parse(string: &str) -> Option<Self::Native> {
526        string_to_time_nanoseconds(string)
527            .ok()
528            .or_else(|| string.parse::<Self::Native>().ok())
529    }
530
531    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
532        let nt = NaiveTime::parse_from_str(string, format).ok()?;
533        Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
534    }
535}
536
537impl Parser for Time64MicrosecondType {
538    // Will truncate any fractions of a microsecond
539    fn parse(string: &str) -> Option<Self::Native> {
540        string_to_time_nanoseconds(string)
541            .ok()
542            .map(|nanos| nanos / 1_000)
543            .or_else(|| string.parse::<Self::Native>().ok())
544    }
545
546    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
547        let nt = NaiveTime::parse_from_str(string, format).ok()?;
548        Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
549    }
550}
551
552impl Parser for Time32MillisecondType {
553    // Will truncate any fractions of a millisecond
554    fn parse(string: &str) -> Option<Self::Native> {
555        string_to_time_nanoseconds(string)
556            .ok()
557            .map(|nanos| (nanos / 1_000_000) as i32)
558            .or_else(|| string.parse::<Self::Native>().ok())
559    }
560
561    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
562        let nt = NaiveTime::parse_from_str(string, format).ok()?;
563        Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
564    }
565}
566
567impl Parser for Time32SecondType {
568    // Will truncate any fractions of a second
569    fn parse(string: &str) -> Option<Self::Native> {
570        string_to_time_nanoseconds(string)
571            .ok()
572            .map(|nanos| (nanos / 1_000_000_000) as i32)
573            .or_else(|| string.parse::<Self::Native>().ok())
574    }
575
576    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
577        let nt = NaiveTime::parse_from_str(string, format).ok()?;
578        Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
579    }
580}
581
582/// Number of days between 0001-01-01 and 1970-01-01
583const EPOCH_DAYS_FROM_CE: i32 = 719_163;
584
585/// Error message if nanosecond conversion request beyond supported interval
586const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
587
588fn parse_date(string: &str) -> Option<NaiveDate> {
589    // If the date has an extended (signed) year such as "+10999-12-31" or "-0012-05-06"
590    //
591    // According to [ISO 8601], years have:
592    //  Four digits or more for the year. Years in the range 0000 to 9999 will be pre-padded by
593    //  zero to ensure four digits. Years outside that range will have a prefixed positive or negative symbol.
594    //
595    // [ISO 8601]: https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/time/format/DateTimeFormatter.html#ISO_LOCAL_DATE
596    if string.starts_with('+') || string.starts_with('-') {
597        // Skip the sign and look for the hyphen that terminates the year digits.
598        // According to ISO 8601 the unsigned part must be at least 4 digits.
599        let rest = &string[1..];
600        let hyphen = rest.find('-')?;
601        if hyphen < 4 {
602            return None;
603        }
604        // The year substring is the sign and the digits (but not the separator)
605        // e.g. for "+10999-12-31", hyphen is 5 and s[..6] is "+10999"
606        let year: i32 = string[..hyphen + 1].parse().ok()?;
607        // The remainder should begin with a '-' which we strip off, leaving the month-day part.
608        let remainder = string[hyphen + 1..].strip_prefix('-')?;
609        let mut parts = remainder.splitn(2, '-');
610        let month: u32 = parts.next()?.parse().ok()?;
611        let day: u32 = parts.next()?.parse().ok()?;
612        return NaiveDate::from_ymd_opt(year, month, day);
613    }
614
615    if string.len() > 10 {
616        // Try to parse as datetime and return just the date part
617        return string_to_datetime(&Utc, string)
618            .map(|dt| dt.date_naive())
619            .ok();
620    };
621    let mut digits = [0; 10];
622    let mut mask = 0;
623
624    // Treating all bytes the same way, helps LLVM vectorise this correctly
625    for (idx, (o, i)) in digits.iter_mut().zip(string.bytes()).enumerate() {
626        *o = i.wrapping_sub(b'0');
627        mask |= ((*o < 10) as u16) << idx
628    }
629
630    const HYPHEN: u8 = b'-'.wrapping_sub(b'0');
631
632    //  refer to https://www.rfc-editor.org/rfc/rfc3339#section-3
633    if digits[4] != HYPHEN {
634        let (year, month, day) = match (mask, string.len()) {
635            (0b11111111, 8) => (
636                digits[0] as u16 * 1000
637                    + digits[1] as u16 * 100
638                    + digits[2] as u16 * 10
639                    + digits[3] as u16,
640                digits[4] * 10 + digits[5],
641                digits[6] * 10 + digits[7],
642            ),
643            _ => return None,
644        };
645        return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
646    }
647
648    let (month, day) = match mask {
649        0b1101101111 => {
650            if digits[7] != HYPHEN {
651                return None;
652            }
653            (digits[5] * 10 + digits[6], digits[8] * 10 + digits[9])
654        }
655        0b101101111 => {
656            if digits[7] != HYPHEN {
657                return None;
658            }
659            (digits[5] * 10 + digits[6], digits[8])
660        }
661        0b110101111 => {
662            if digits[6] != HYPHEN {
663                return None;
664            }
665            (digits[5], digits[7] * 10 + digits[8])
666        }
667        0b10101111 => {
668            if digits[6] != HYPHEN {
669                return None;
670            }
671            (digits[5], digits[7])
672        }
673        _ => return None,
674    };
675
676    let year =
677        digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
678
679    NaiveDate::from_ymd_opt(year as _, month as _, day as _)
680}
681
682impl Parser for Date32Type {
683    fn parse(string: &str) -> Option<i32> {
684        let date = parse_date(string)?;
685        Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
686    }
687
688    fn parse_formatted(string: &str, format: &str) -> Option<i32> {
689        let date = NaiveDate::parse_from_str(string, format).ok()?;
690        Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
691    }
692}
693
694impl Parser for Date64Type {
695    fn parse(string: &str) -> Option<i64> {
696        if string.len() <= 10 {
697            let datetime = NaiveDateTime::new(parse_date(string)?, NaiveTime::default());
698            Some(datetime.and_utc().timestamp_millis())
699        } else {
700            let date_time = string_to_datetime(&Utc, string).ok()?;
701            Some(date_time.timestamp_millis())
702        }
703    }
704
705    fn parse_formatted(string: &str, format: &str) -> Option<i64> {
706        use chrono::format::Fixed;
707        use chrono::format::StrftimeItems;
708        let fmt = StrftimeItems::new(format);
709        let has_zone = fmt.into_iter().any(|item| match item {
710            chrono::format::Item::Fixed(fixed_item) => matches!(
711                fixed_item,
712                Fixed::RFC2822
713                    | Fixed::RFC3339
714                    | Fixed::TimezoneName
715                    | Fixed::TimezoneOffsetColon
716                    | Fixed::TimezoneOffsetColonZ
717                    | Fixed::TimezoneOffset
718                    | Fixed::TimezoneOffsetZ
719            ),
720            _ => false,
721        });
722        if has_zone {
723            let date_time = chrono::DateTime::parse_from_str(string, format).ok()?;
724            Some(date_time.timestamp_millis())
725        } else {
726            let date_time = NaiveDateTime::parse_from_str(string, format).ok()?;
727            Some(date_time.and_utc().timestamp_millis())
728        }
729    }
730}
731
732fn parse_e_notation<T: DecimalType>(
733    s: &str,
734    mut digits: u16,
735    mut fractionals: i16,
736    mut result: T::Native,
737    index: usize,
738    precision: u16,
739    scale: i16,
740) -> Result<T::Native, ArrowError> {
741    let mut exp: i16 = 0;
742    let base = T::Native::usize_as(10);
743
744    let mut exp_start: bool = false;
745    // e has a plus sign
746    let mut pos_shift_direction: bool = true;
747
748    // skip to point or exponent index
749    let mut bs;
750    if fractionals > 0 {
751        // it's a fraction, so the point index needs to be skipped, so +1
752        bs = s.as_bytes().iter().skip(index + fractionals as usize + 1);
753    } else {
754        // it's actually an integer that is already written into the result, so let's skip on to e
755        bs = s.as_bytes().iter().skip(index);
756    }
757
758    while let Some(b) = bs.next() {
759        match b {
760            b'0'..=b'9' => {
761                result = result.mul_wrapping(base);
762                result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
763                if fractionals > 0 {
764                    fractionals += 1;
765                }
766                digits += 1;
767            }
768            &b'e' | &b'E' => {
769                exp_start = true;
770            }
771            _ => {
772                return Err(ArrowError::ParseError(format!(
773                    "can't parse the string value {s} to decimal"
774                )));
775            }
776        };
777
778        if exp_start {
779            pos_shift_direction = match bs.next() {
780                Some(&b'-') => false,
781                Some(&b'+') => true,
782                Some(b) => {
783                    if !b.is_ascii_digit() {
784                        return Err(ArrowError::ParseError(format!(
785                            "can't parse the string value {s} to decimal"
786                        )));
787                    }
788
789                    exp *= 10;
790                    exp += (b - b'0') as i16;
791
792                    true
793                }
794                None => {
795                    return Err(ArrowError::ParseError(format!(
796                        "can't parse the string value {s} to decimal"
797                    )));
798                }
799            };
800
801            for b in bs.by_ref() {
802                if !b.is_ascii_digit() {
803                    return Err(ArrowError::ParseError(format!(
804                        "can't parse the string value {s} to decimal"
805                    )));
806                }
807                exp *= 10;
808                exp += (b - b'0') as i16;
809            }
810        }
811    }
812
813    if digits == 0 && fractionals == 0 && exp == 0 {
814        return Err(ArrowError::ParseError(format!(
815            "can't parse the string value {s} to decimal"
816        )));
817    }
818
819    if !pos_shift_direction {
820        // exponent has a large negative sign
821        // 1.12345e-30 => 0.0{29}12345, scale = 5
822        if exp - (digits as i16 + scale) > 0 {
823            return Ok(T::Native::usize_as(0));
824        }
825        exp *= -1;
826    }
827
828    // point offset
829    exp = fractionals - exp;
830    // We have zeros on the left, we need to count them
831    if !pos_shift_direction && exp > digits as i16 {
832        digits = exp as u16;
833    }
834    // Number of numbers to be removed or added
835    exp = scale - exp;
836
837    if (digits as i16 + exp) as u16 > precision {
838        return Err(ArrowError::ParseError(format!(
839            "parse decimal overflow ({s})"
840        )));
841    }
842
843    if exp < 0 {
844        result = result.div_wrapping(base.pow_wrapping(-exp as _));
845    } else {
846        result = result.mul_wrapping(base.pow_wrapping(exp as _));
847    }
848
849    Ok(result)
850}
851
852/// Parse the string format decimal value to i128/i256 format and checking the precision and scale.
853/// The result value can't be out of bounds.
854pub fn parse_decimal<T: DecimalType>(
855    s: &str,
856    precision: u8,
857    scale: i8,
858) -> Result<T::Native, ArrowError> {
859    let mut result = T::Native::usize_as(0);
860    let mut fractionals: i8 = 0;
861    let mut digits: u8 = 0;
862    let base = T::Native::usize_as(10);
863
864    let bs = s.as_bytes();
865    let (signed, negative) = match bs.first() {
866        Some(b'-') => (true, true),
867        Some(b'+') => (true, false),
868        _ => (false, false),
869    };
870
871    if bs.is_empty() || signed && bs.len() == 1 {
872        return Err(ArrowError::ParseError(format!(
873            "can't parse the string value {s} to decimal"
874        )));
875    }
876
877    // Iterate over the raw input bytes, skipping the sign if any
878    let mut bs = bs.iter().enumerate().skip(signed as usize);
879
880    let mut is_e_notation = false;
881
882    // Overflow checks are not required if 10^(precision - 1) <= T::MAX holds.
883    // Thus, if we validate the precision correctly, we can skip overflow checks.
884    while let Some((index, b)) = bs.next() {
885        match b {
886            b'0'..=b'9' => {
887                if digits == 0 && *b == b'0' {
888                    // Ignore leading zeros.
889                    continue;
890                }
891                digits += 1;
892                result = result.mul_wrapping(base);
893                result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
894            }
895            b'.' => {
896                let point_index = index;
897
898                for (_, b) in bs.by_ref() {
899                    if !b.is_ascii_digit() {
900                        if *b == b'e' || *b == b'E' {
901                            result = parse_e_notation::<T>(
902                                s,
903                                digits as u16,
904                                fractionals as i16,
905                                result,
906                                point_index,
907                                precision as u16,
908                                scale as i16,
909                            )?;
910
911                            is_e_notation = true;
912
913                            break;
914                        }
915                        return Err(ArrowError::ParseError(format!(
916                            "can't parse the string value {s} to decimal"
917                        )));
918                    }
919                    if fractionals == scale && scale != 0 {
920                        // We have processed all the digits that we need. All that
921                        // is left is to validate that the rest of the string contains
922                        // valid digits.
923                        continue;
924                    }
925                    fractionals += 1;
926                    digits += 1;
927                    result = result.mul_wrapping(base);
928                    result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
929                }
930
931                if is_e_notation {
932                    break;
933                }
934
935                // Fail on "."
936                if digits == 0 {
937                    return Err(ArrowError::ParseError(format!(
938                        "can't parse the string value {s} to decimal"
939                    )));
940                }
941            }
942            b'e' | b'E' => {
943                result = parse_e_notation::<T>(
944                    s,
945                    digits as u16,
946                    fractionals as i16,
947                    result,
948                    index,
949                    precision as u16,
950                    scale as i16,
951                )?;
952
953                is_e_notation = true;
954
955                break;
956            }
957            _ => {
958                return Err(ArrowError::ParseError(format!(
959                    "can't parse the string value {s} to decimal"
960                )));
961            }
962        }
963    }
964
965    if !is_e_notation {
966        if fractionals < scale {
967            let exp = scale - fractionals;
968            if exp as u8 + digits > precision {
969                return Err(ArrowError::ParseError(format!(
970                    "parse decimal overflow ({s})"
971                )));
972            }
973            let mul = base.pow_wrapping(exp as _);
974            result = result.mul_wrapping(mul);
975        } else if digits > precision {
976            return Err(ArrowError::ParseError(format!(
977                "parse decimal overflow ({s})"
978            )));
979        }
980    }
981
982    Ok(if negative {
983        result.neg_wrapping()
984    } else {
985        result
986    })
987}
988
989/// Parse human-readable interval string to Arrow [IntervalYearMonthType]
990pub fn parse_interval_year_month(
991    value: &str,
992) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> {
993    let config = IntervalParseConfig::new(IntervalUnit::Year);
994    let interval = Interval::parse(value, &config)?;
995
996    let months = interval.to_year_months().map_err(|_| {
997        ArrowError::CastError(format!(
998            "Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
999        ))
1000    })?;
1001
1002    Ok(IntervalYearMonthType::make_value(0, months))
1003}
1004
1005/// Parse human-readable interval string to Arrow [IntervalDayTimeType]
1006pub fn parse_interval_day_time(
1007    value: &str,
1008) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
1009    let config = IntervalParseConfig::new(IntervalUnit::Day);
1010    let interval = Interval::parse(value, &config)?;
1011
1012    let (days, millis) = interval.to_day_time().map_err(|_| ArrowError::CastError(format!(
1013        "Cannot cast {value} to IntervalDayTime because the nanos part isn't multiple of milliseconds"
1014    )))?;
1015
1016    Ok(IntervalDayTimeType::make_value(days, millis))
1017}
1018
1019/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType]
1020pub fn parse_interval_month_day_nano_config(
1021    value: &str,
1022    config: IntervalParseConfig,
1023) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1024    let interval = Interval::parse(value, &config)?;
1025
1026    let (months, days, nanos) = interval.to_month_day_nanos();
1027
1028    Ok(IntervalMonthDayNanoType::make_value(months, days, nanos))
1029}
1030
1031/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType]
1032pub fn parse_interval_month_day_nano(
1033    value: &str,
1034) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1035    parse_interval_month_day_nano_config(value, IntervalParseConfig::new(IntervalUnit::Month))
1036}
1037
1038const NANOS_PER_MILLIS: i64 = 1_000_000;
1039const NANOS_PER_SECOND: i64 = 1_000 * NANOS_PER_MILLIS;
1040const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
1041const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
1042#[cfg(test)]
1043const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR;
1044
1045/// Config to parse interval strings
1046///
1047/// Currently stores the `default_unit` to use if the string doesn't have one specified
1048#[derive(Debug, Clone)]
1049pub struct IntervalParseConfig {
1050    /// The default unit to use if none is specified
1051    /// e.g. `INTERVAL 1` represents `INTERVAL 1 SECOND` when default_unit = [IntervalUnit::Second]
1052    default_unit: IntervalUnit,
1053}
1054
1055impl IntervalParseConfig {
1056    /// Create a new [IntervalParseConfig] with the given default unit
1057    pub fn new(default_unit: IntervalUnit) -> Self {
1058        Self { default_unit }
1059    }
1060}
1061
1062#[rustfmt::skip]
1063#[derive(Debug, Clone, Copy)]
1064#[repr(u16)]
1065/// Represents the units of an interval, with each variant
1066/// corresponding to a bit in the interval's bitfield representation
1067pub enum IntervalUnit {
1068    /// A Century
1069    Century     = 0b_0000_0000_0001,
1070    /// A Decade
1071    Decade      = 0b_0000_0000_0010,
1072    /// A Year
1073    Year        = 0b_0000_0000_0100,
1074    /// A Month
1075    Month       = 0b_0000_0000_1000,
1076    /// A Week
1077    Week        = 0b_0000_0001_0000,
1078    /// A Day
1079    Day         = 0b_0000_0010_0000,
1080    /// An Hour
1081    Hour        = 0b_0000_0100_0000,
1082    /// A Minute
1083    Minute      = 0b_0000_1000_0000,
1084    /// A Second
1085    Second      = 0b_0001_0000_0000,
1086    /// A Millisecond
1087    Millisecond = 0b_0010_0000_0000,
1088    /// A Microsecond
1089    Microsecond = 0b_0100_0000_0000,
1090    /// A Nanosecond
1091    Nanosecond  = 0b_1000_0000_0000,
1092}
1093
1094/// Logic for parsing interval unit strings
1095///
1096/// See <https://github.com/postgres/postgres/blob/2caa85f4aae689e6f6721d7363b4c66a2a6417d6/src/backend/utils/adt/datetime.c#L189>
1097/// for a list of unit names supported by PostgreSQL which we try to match here.
1098impl FromStr for IntervalUnit {
1099    type Err = ArrowError;
1100
1101    fn from_str(s: &str) -> Result<Self, ArrowError> {
1102        match s.to_lowercase().as_str() {
1103            "c" | "cent" | "cents" | "century" | "centuries" => Ok(Self::Century),
1104            "dec" | "decs" | "decade" | "decades" => Ok(Self::Decade),
1105            "y" | "yr" | "yrs" | "year" | "years" => Ok(Self::Year),
1106            "mon" | "mons" | "month" | "months" => Ok(Self::Month),
1107            "w" | "week" | "weeks" => Ok(Self::Week),
1108            "d" | "day" | "days" => Ok(Self::Day),
1109            "h" | "hr" | "hrs" | "hour" | "hours" => Ok(Self::Hour),
1110            "m" | "min" | "mins" | "minute" | "minutes" => Ok(Self::Minute),
1111            "s" | "sec" | "secs" | "second" | "seconds" => Ok(Self::Second),
1112            "ms" | "msec" | "msecs" | "msecond" | "mseconds" | "millisecond" | "milliseconds" => {
1113                Ok(Self::Millisecond)
1114            }
1115            "us" | "usec" | "usecs" | "usecond" | "useconds" | "microsecond" | "microseconds" => {
1116                Ok(Self::Microsecond)
1117            }
1118            "nanosecond" | "nanoseconds" => Ok(Self::Nanosecond),
1119            _ => Err(ArrowError::InvalidArgumentError(format!(
1120                "Unknown interval type: {s}"
1121            ))),
1122        }
1123    }
1124}
1125
1126impl IntervalUnit {
1127    fn from_str_or_config(
1128        s: Option<&str>,
1129        config: &IntervalParseConfig,
1130    ) -> Result<Self, ArrowError> {
1131        match s {
1132            Some(s) => s.parse(),
1133            None => Ok(config.default_unit),
1134        }
1135    }
1136}
1137
1138/// A tuple representing (months, days, nanoseconds) in an interval
1139pub type MonthDayNano = (i32, i32, i64);
1140
1141/// Chosen based on the number of decimal digits in 1 week in nanoseconds
1142const INTERVAL_PRECISION: u32 = 15;
1143
1144#[derive(Clone, Copy, Debug, PartialEq)]
1145struct IntervalAmount {
1146    /// The integer component of the interval amount
1147    integer: i64,
1148    /// The fractional component multiplied by 10^INTERVAL_PRECISION
1149    frac: i64,
1150}
1151
1152#[cfg(test)]
1153impl IntervalAmount {
1154    fn new(integer: i64, frac: i64) -> Self {
1155        Self { integer, frac }
1156    }
1157}
1158
1159impl FromStr for IntervalAmount {
1160    type Err = ArrowError;
1161
1162    fn from_str(s: &str) -> Result<Self, Self::Err> {
1163        match s.split_once('.') {
1164            Some((integer, frac))
1165                if frac.len() <= INTERVAL_PRECISION as usize
1166                    && !frac.is_empty()
1167                    && !frac.starts_with('-') =>
1168            {
1169                // integer will be "" for values like ".5"
1170                // and "-" for values like "-.5"
1171                let explicit_neg = integer.starts_with('-');
1172                let integer = if integer.is_empty() || integer == "-" {
1173                    Ok(0)
1174                } else {
1175                    integer.parse::<i64>().map_err(|_| {
1176                        ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1177                    })
1178                }?;
1179
1180                let frac_unscaled = frac.parse::<i64>().map_err(|_| {
1181                    ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1182                })?;
1183
1184                // scale fractional part by interval precision
1185                let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
1186
1187                // propagate the sign of the integer part to the fractional part
1188                let frac = if integer < 0 || explicit_neg {
1189                    -frac
1190                } else {
1191                    frac
1192                };
1193
1194                let result = Self { integer, frac };
1195
1196                Ok(result)
1197            }
1198            Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
1199                "Failed to parse {s} as interval amount"
1200            ))),
1201            Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
1202                Err(ArrowError::ParseError(format!(
1203                    "{s} exceeds the precision available for interval amount"
1204                )))
1205            }
1206            Some(_) | None => {
1207                let integer = s.parse::<i64>().map_err(|_| {
1208                    ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1209                })?;
1210
1211                let result = Self { integer, frac: 0 };
1212                Ok(result)
1213            }
1214        }
1215    }
1216}
1217
1218#[derive(Debug, Default, PartialEq)]
1219struct Interval {
1220    months: i32,
1221    days: i32,
1222    nanos: i64,
1223}
1224
1225impl Interval {
1226    fn new(months: i32, days: i32, nanos: i64) -> Self {
1227        Self {
1228            months,
1229            days,
1230            nanos,
1231        }
1232    }
1233
1234    fn to_year_months(&self) -> Result<i32, ArrowError> {
1235        match (self.months, self.days, self.nanos) {
1236            (months, days, nanos) if days == 0 && nanos == 0 => Ok(months),
1237            _ => Err(ArrowError::InvalidArgumentError(format!(
1238                "Unable to represent interval with days and nanos as year-months: {self:?}"
1239            ))),
1240        }
1241    }
1242
1243    fn to_day_time(&self) -> Result<(i32, i32), ArrowError> {
1244        let days = self.months.mul_checked(30)?.add_checked(self.days)?;
1245
1246        match self.nanos {
1247            nanos if nanos % NANOS_PER_MILLIS == 0 => {
1248                let millis = (self.nanos / 1_000_000).try_into().map_err(|_| {
1249                    ArrowError::InvalidArgumentError(format!(
1250                        "Unable to represent {} nanos as milliseconds in a signed 32-bit integer",
1251                        self.nanos
1252                    ))
1253                })?;
1254
1255                Ok((days, millis))
1256            }
1257            nanos => Err(ArrowError::InvalidArgumentError(format!(
1258                "Unable to represent {nanos} as milliseconds"
1259            ))),
1260        }
1261    }
1262
1263    fn to_month_day_nanos(&self) -> (i32, i32, i64) {
1264        (self.months, self.days, self.nanos)
1265    }
1266
1267    /// Parse string value in traditional Postgres format such as
1268    /// `1 year 2 months 3 days 4 hours 5 minutes 6 seconds`
1269    fn parse(value: &str, config: &IntervalParseConfig) -> Result<Self, ArrowError> {
1270        let components = parse_interval_components(value, config)?;
1271
1272        components
1273            .into_iter()
1274            .try_fold(Self::default(), |result, (amount, unit)| {
1275                result.add(amount, unit)
1276            })
1277    }
1278
1279    /// Interval addition following Postgres behavior. Fractional units will be spilled into smaller units.
1280    /// When the interval unit is larger than months, the result is rounded to total months and not spilled to days/nanos.
1281    /// Fractional parts of weeks and days are represented using days and nanoseconds.
1282    /// e.g. INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 days
1283    /// e.g. INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
1284    /// [Postgres reference](https://www.postgresql.org/docs/15/datatype-datetime.html#DATATYPE-INTERVAL-INPUT:~:text=Field%20values%20can,fractional%20on%20output.)
1285    fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
1286        let result = match unit {
1287            IntervalUnit::Century => {
1288                let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
1289                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
1290                let months = months_int
1291                    .add_checked(month_frac)?
1292                    .try_into()
1293                    .map_err(|_| {
1294                        ArrowError::ParseError(format!(
1295                            "Unable to represent {} centuries as months in a signed 32-bit integer",
1296                            &amount.integer
1297                        ))
1298                    })?;
1299
1300                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1301            }
1302            IntervalUnit::Decade => {
1303                let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
1304
1305                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
1306                let months = months_int
1307                    .add_checked(month_frac)?
1308                    .try_into()
1309                    .map_err(|_| {
1310                        ArrowError::ParseError(format!(
1311                            "Unable to represent {} decades as months in a signed 32-bit integer",
1312                            &amount.integer
1313                        ))
1314                    })?;
1315
1316                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1317            }
1318            IntervalUnit::Year => {
1319                let months_int = amount.integer.mul_checked(12)?;
1320                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
1321                let months = months_int
1322                    .add_checked(month_frac)?
1323                    .try_into()
1324                    .map_err(|_| {
1325                        ArrowError::ParseError(format!(
1326                            "Unable to represent {} years as months in a signed 32-bit integer",
1327                            &amount.integer
1328                        ))
1329                    })?;
1330
1331                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1332            }
1333            IntervalUnit::Month => {
1334                let months = amount.integer.try_into().map_err(|_| {
1335                    ArrowError::ParseError(format!(
1336                        "Unable to represent {} months in a signed 32-bit integer",
1337                        &amount.integer
1338                    ))
1339                })?;
1340
1341                let days = amount.frac * 3 / 10_i64.pow(INTERVAL_PRECISION - 1);
1342                let days = days.try_into().map_err(|_| {
1343                    ArrowError::ParseError(format!(
1344                        "Unable to represent {} months as days in a signed 32-bit integer",
1345                        amount.frac / 10_i64.pow(INTERVAL_PRECISION)
1346                    ))
1347                })?;
1348
1349                Self::new(
1350                    self.months.add_checked(months)?,
1351                    self.days.add_checked(days)?,
1352                    self.nanos,
1353                )
1354            }
1355            IntervalUnit::Week => {
1356                let days = amount.integer.mul_checked(7)?.try_into().map_err(|_| {
1357                    ArrowError::ParseError(format!(
1358                        "Unable to represent {} weeks as days in a signed 32-bit integer",
1359                        &amount.integer
1360                    ))
1361                })?;
1362
1363                let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1364
1365                Self::new(
1366                    self.months,
1367                    self.days.add_checked(days)?,
1368                    self.nanos.add_checked(nanos)?,
1369                )
1370            }
1371            IntervalUnit::Day => {
1372                let days = amount.integer.try_into().map_err(|_| {
1373                    ArrowError::InvalidArgumentError(format!(
1374                        "Unable to represent {} days in a signed 32-bit integer",
1375                        amount.integer
1376                    ))
1377                })?;
1378
1379                let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1380
1381                Self::new(
1382                    self.months,
1383                    self.days.add_checked(days)?,
1384                    self.nanos.add_checked(nanos)?,
1385                )
1386            }
1387            IntervalUnit::Hour => {
1388                let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
1389                let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1390                let nanos = nanos_int.add_checked(nanos_frac)?;
1391
1392                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1393            }
1394            IntervalUnit::Minute => {
1395                let nanos_int = amount.integer.mul_checked(NANOS_PER_MINUTE)?;
1396                let nanos_frac = amount.frac * 6 / 10_i64.pow(INTERVAL_PRECISION - 10);
1397
1398                let nanos = nanos_int.add_checked(nanos_frac)?;
1399
1400                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1401            }
1402            IntervalUnit::Second => {
1403                let nanos_int = amount.integer.mul_checked(NANOS_PER_SECOND)?;
1404                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 9);
1405                let nanos = nanos_int.add_checked(nanos_frac)?;
1406
1407                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1408            }
1409            IntervalUnit::Millisecond => {
1410                let nanos_int = amount.integer.mul_checked(NANOS_PER_MILLIS)?;
1411                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 6);
1412                let nanos = nanos_int.add_checked(nanos_frac)?;
1413
1414                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1415            }
1416            IntervalUnit::Microsecond => {
1417                let nanos_int = amount.integer.mul_checked(1_000)?;
1418                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 3);
1419                let nanos = nanos_int.add_checked(nanos_frac)?;
1420
1421                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1422            }
1423            IntervalUnit::Nanosecond => {
1424                let nanos_int = amount.integer;
1425                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION);
1426                let nanos = nanos_int.add_checked(nanos_frac)?;
1427
1428                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1429            }
1430        };
1431
1432        Ok(result)
1433    }
1434}
1435
1436/// parse the string into a vector of interval components i.e. (amount, unit) tuples
1437fn parse_interval_components(
1438    value: &str,
1439    config: &IntervalParseConfig,
1440) -> Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> {
1441    let raw_pairs = split_interval_components(value);
1442
1443    // parse amounts and units
1444    let Ok(pairs): Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> = raw_pairs
1445        .iter()
1446        .map(|(a, u)| Ok((a.parse()?, IntervalUnit::from_str_or_config(*u, config)?)))
1447        .collect()
1448    else {
1449        return Err(ArrowError::ParseError(format!(
1450            "Invalid input syntax for type interval: {value:?}"
1451        )));
1452    };
1453
1454    // collect parsed results
1455    let (amounts, units): (Vec<_>, Vec<_>) = pairs.into_iter().unzip();
1456
1457    // duplicate units?
1458    let mut observed_interval_types = 0;
1459    for (unit, (_, raw_unit)) in units.iter().zip(raw_pairs) {
1460        if observed_interval_types & (*unit as u16) != 0 {
1461            return Err(ArrowError::ParseError(format!(
1462                "Invalid input syntax for type interval: {:?}. Repeated type '{}'",
1463                value,
1464                raw_unit.unwrap_or_default(),
1465            )));
1466        }
1467
1468        observed_interval_types |= *unit as u16;
1469    }
1470
1471    let result = amounts.iter().copied().zip(units.iter().copied());
1472
1473    Ok(result.collect::<Vec<_>>())
1474}
1475
1476/// Split an interval into a vec of amounts and units.
1477///
1478/// Pairs are separated by spaces, but within a pair the amount and unit may or may not be separated by a space.
1479///
1480/// This should match the behavior of PostgreSQL's interval parser.
1481fn split_interval_components(value: &str) -> Vec<(&str, Option<&str>)> {
1482    let mut result = vec![];
1483    let mut words = value.split(char::is_whitespace);
1484    while let Some(word) = words.next() {
1485        if let Some(split_word_at) = word.find(not_interval_amount) {
1486            let (amount, unit) = word.split_at(split_word_at);
1487            result.push((amount, Some(unit)));
1488        } else if let Some(unit) = words.next() {
1489            result.push((word, Some(unit)));
1490        } else {
1491            result.push((word, None));
1492            break;
1493        }
1494    }
1495    result
1496}
1497
1498/// test if a character is NOT part of an interval numeric amount
1499fn not_interval_amount(c: char) -> bool {
1500    !c.is_ascii_digit() && c != '.' && c != '-'
1501}
1502
1503#[cfg(test)]
1504mod tests {
1505    use super::*;
1506    use arrow_array::temporal_conversions::date32_to_datetime;
1507    use arrow_buffer::i256;
1508
1509    #[test]
1510    fn test_parse_nanos() {
1511        assert_eq!(parse_nanos::<3, 0>(&[1, 2, 3]), 123_000_000);
1512        assert_eq!(parse_nanos::<5, 0>(&[1, 2, 3, 4, 5]), 123_450_000);
1513        assert_eq!(parse_nanos::<6, b'0'>(b"123456"), 123_456_000);
1514    }
1515
1516    #[test]
1517    fn string_to_timestamp_timezone() {
1518        // Explicit timezone
1519        assert_eq!(
1520            1599572549190855000,
1521            parse_timestamp("2020-09-08T13:42:29.190855+00:00").unwrap()
1522        );
1523        assert_eq!(
1524            1599572549190855000,
1525            parse_timestamp("2020-09-08T13:42:29.190855Z").unwrap()
1526        );
1527        assert_eq!(
1528            1599572549000000000,
1529            parse_timestamp("2020-09-08T13:42:29Z").unwrap()
1530        ); // no fractional part
1531        assert_eq!(
1532            1599590549190855000,
1533            parse_timestamp("2020-09-08T13:42:29.190855-05:00").unwrap()
1534        );
1535    }
1536
1537    #[test]
1538    fn string_to_timestamp_timezone_space() {
1539        // Ensure space rather than T between time and date is accepted
1540        assert_eq!(
1541            1599572549190855000,
1542            parse_timestamp("2020-09-08 13:42:29.190855+00:00").unwrap()
1543        );
1544        assert_eq!(
1545            1599572549190855000,
1546            parse_timestamp("2020-09-08 13:42:29.190855Z").unwrap()
1547        );
1548        assert_eq!(
1549            1599572549000000000,
1550            parse_timestamp("2020-09-08 13:42:29Z").unwrap()
1551        ); // no fractional part
1552        assert_eq!(
1553            1599590549190855000,
1554            parse_timestamp("2020-09-08 13:42:29.190855-05:00").unwrap()
1555        );
1556    }
1557
1558    #[test]
1559    #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function: mktime
1560    fn string_to_timestamp_no_timezone() {
1561        // This test is designed to succeed in regardless of the local
1562        // timezone the test machine is running. Thus it is still
1563        // somewhat susceptible to bugs in the use of chrono
1564        let naive_datetime = NaiveDateTime::new(
1565            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1566            NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1567        );
1568
1569        // Ensure both T and ' ' variants work
1570        assert_eq!(
1571            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1572            parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1573        );
1574
1575        assert_eq!(
1576            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1577            parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1578        );
1579
1580        // Also ensure that parsing timestamps with no fractional
1581        // second part works as well
1582        let datetime_whole_secs = NaiveDateTime::new(
1583            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1584            NaiveTime::from_hms_opt(13, 42, 29).unwrap(),
1585        )
1586        .and_utc();
1587
1588        // Ensure both T and ' ' variants work
1589        assert_eq!(
1590            datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1591            parse_timestamp("2020-09-08T13:42:29").unwrap()
1592        );
1593
1594        assert_eq!(
1595            datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1596            parse_timestamp("2020-09-08 13:42:29").unwrap()
1597        );
1598
1599        // ensure without time work
1600        // no time, should be the nano second at
1601        // 2020-09-08 0:0:0
1602        let datetime_no_time = NaiveDateTime::new(
1603            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1604            NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
1605        )
1606        .and_utc();
1607
1608        assert_eq!(
1609            datetime_no_time.timestamp_nanos_opt().unwrap(),
1610            parse_timestamp("2020-09-08").unwrap()
1611        )
1612    }
1613
1614    #[test]
1615    fn string_to_timestamp_chrono() {
1616        let cases = [
1617            "2020-09-08T13:42:29Z",
1618            "1969-01-01T00:00:00.1Z",
1619            "2020-09-08T12:00:12.12345678+00:00",
1620            "2020-09-08T12:00:12+00:00",
1621            "2020-09-08T12:00:12.1+00:00",
1622            "2020-09-08T12:00:12.12+00:00",
1623            "2020-09-08T12:00:12.123+00:00",
1624            "2020-09-08T12:00:12.1234+00:00",
1625            "2020-09-08T12:00:12.12345+00:00",
1626            "2020-09-08T12:00:12.123456+00:00",
1627            "2020-09-08T12:00:12.1234567+00:00",
1628            "2020-09-08T12:00:12.12345678+00:00",
1629            "2020-09-08T12:00:12.123456789+00:00",
1630            "2020-09-08T12:00:12.12345678912z",
1631            "2020-09-08T12:00:12.123456789123Z",
1632            "2020-09-08T12:00:12.123456789123+02:00",
1633            "2020-09-08T12:00:12.12345678912345Z",
1634            "2020-09-08T12:00:12.1234567891234567+02:00",
1635            "2020-09-08T12:00:60Z",
1636            "2020-09-08T12:00:60.123Z",
1637            "2020-09-08T12:00:60.123456+02:00",
1638            "2020-09-08T12:00:60.1234567891234567+02:00",
1639            "2020-09-08T12:00:60.999999999+02:00",
1640            "2020-09-08t12:00:12.12345678+00:00",
1641            "2020-09-08t12:00:12+00:00",
1642            "2020-09-08t12:00:12Z",
1643        ];
1644
1645        for case in cases {
1646            let chrono = DateTime::parse_from_rfc3339(case).unwrap();
1647            let chrono_utc = chrono.with_timezone(&Utc);
1648
1649            let custom = string_to_datetime(&Utc, case).unwrap();
1650            assert_eq!(chrono_utc, custom)
1651        }
1652    }
1653
1654    #[test]
1655    fn string_to_timestamp_naive() {
1656        let cases = [
1657            "2018-11-13T17:11:10.011375885995",
1658            "2030-12-04T17:11:10.123",
1659            "2030-12-04T17:11:10.1234",
1660            "2030-12-04T17:11:10.123456",
1661        ];
1662        for case in cases {
1663            let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
1664            let custom = string_to_datetime(&Utc, case).unwrap();
1665            assert_eq!(chrono, custom.naive_utc())
1666        }
1667    }
1668
1669    #[test]
1670    fn string_to_timestamp_invalid() {
1671        // Test parsing invalid formats
1672        let cases = [
1673            ("", "timestamp must contain at least 10 characters"),
1674            ("SS", "timestamp must contain at least 10 characters"),
1675            ("Wed, 18 Feb 2015 23:16:09 GMT", "error parsing date"),
1676            ("1997-01-31H09:26:56.123Z", "invalid timestamp separator"),
1677            ("1997-01-31  09:26:56.123Z", "error parsing time"),
1678            ("1997:01:31T09:26:56.123Z", "error parsing date"),
1679            ("1997:1:31T09:26:56.123Z", "error parsing date"),
1680            ("1997-01-32T09:26:56.123Z", "error parsing date"),
1681            ("1997-13-32T09:26:56.123Z", "error parsing date"),
1682            ("1997-02-29T09:26:56.123Z", "error parsing date"),
1683            ("2015-02-30T17:35:20-08:00", "error parsing date"),
1684            ("1997-01-10T9:26:56.123Z", "error parsing time"),
1685            ("2015-01-20T25:35:20-08:00", "error parsing time"),
1686            ("1997-01-10T09:61:56.123Z", "error parsing time"),
1687            ("1997-01-10T09:61:90.123Z", "error parsing time"),
1688            ("1997-01-10T12:00:6.123Z", "error parsing time"),
1689            ("1997-01-31T092656.123Z", "error parsing time"),
1690            ("1997-01-10T12:00:06.", "error parsing time"),
1691            ("1997-01-10T12:00:06. ", "error parsing time"),
1692        ];
1693
1694        for (s, ctx) in cases {
1695            let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
1696            let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
1697            assert_eq!(actual, expected)
1698        }
1699    }
1700
1701    // Parse a timestamp to timestamp int with a useful human readable error message
1702    fn parse_timestamp(s: &str) -> Result<i64, ArrowError> {
1703        let result = string_to_timestamp_nanos(s);
1704        if let Err(e) = &result {
1705            eprintln!("Error parsing timestamp '{s}': {e:?}");
1706        }
1707        result
1708    }
1709
1710    #[test]
1711    fn string_without_timezone_to_timestamp() {
1712        // string without timezone should always output the same regardless the local or session timezone
1713
1714        let naive_datetime = NaiveDateTime::new(
1715            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1716            NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1717        );
1718
1719        // Ensure both T and ' ' variants work
1720        assert_eq!(
1721            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1722            parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1723        );
1724
1725        assert_eq!(
1726            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1727            parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1728        );
1729
1730        let naive_datetime = NaiveDateTime::new(
1731            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1732            NaiveTime::from_hms_nano_opt(13, 42, 29, 0).unwrap(),
1733        );
1734
1735        // Ensure both T and ' ' variants work
1736        assert_eq!(
1737            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1738            parse_timestamp("2020-09-08T13:42:29").unwrap()
1739        );
1740
1741        assert_eq!(
1742            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1743            parse_timestamp("2020-09-08 13:42:29").unwrap()
1744        );
1745
1746        let tz: Tz = "+02:00".parse().unwrap();
1747        let date = string_to_datetime(&tz, "2020-09-08 13:42:29").unwrap();
1748        let utc = date.naive_utc().to_string();
1749        assert_eq!(utc, "2020-09-08 11:42:29");
1750        let local = date.naive_local().to_string();
1751        assert_eq!(local, "2020-09-08 13:42:29");
1752
1753        let date = string_to_datetime(&tz, "2020-09-08 13:42:29Z").unwrap();
1754        let utc = date.naive_utc().to_string();
1755        assert_eq!(utc, "2020-09-08 13:42:29");
1756        let local = date.naive_local().to_string();
1757        assert_eq!(local, "2020-09-08 15:42:29");
1758
1759        let dt =
1760            NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
1761        let local: Tz = "+08:00".parse().unwrap();
1762
1763        // Parsed as offset from UTC
1764        let date = string_to_datetime(&local, "2020-09-08T13:42:29Z").unwrap();
1765        assert_eq!(dt, date.naive_utc());
1766        assert_ne!(dt, date.naive_local());
1767
1768        // Parsed as offset from local
1769        let date = string_to_datetime(&local, "2020-09-08 13:42:29").unwrap();
1770        assert_eq!(dt, date.naive_local());
1771        assert_ne!(dt, date.naive_utc());
1772    }
1773
1774    #[test]
1775    fn parse_date32() {
1776        let cases = [
1777            "2020-09-08",
1778            "2020-9-8",
1779            "2020-09-8",
1780            "2020-9-08",
1781            "2020-12-1",
1782            "1690-2-5",
1783            "2020-09-08 01:02:03",
1784        ];
1785        for case in cases {
1786            let v = date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap();
1787            let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d")
1788                .or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S"))
1789                .unwrap();
1790            assert_eq!(v.date(), expected);
1791        }
1792
1793        let err_cases = [
1794            "",
1795            "80-01-01",
1796            "342",
1797            "Foo",
1798            "2020-09-08-03",
1799            "2020--04-03",
1800            "2020--",
1801            "2020-09-08 01",
1802            "2020-09-08 01:02",
1803            "2020-09-08 01-02-03",
1804            "2020-9-8 01:02:03",
1805            "2020-09-08 1:2:3",
1806        ];
1807        for case in err_cases {
1808            assert_eq!(Date32Type::parse(case), None);
1809        }
1810    }
1811
1812    #[test]
1813    fn parse_time64_nanos() {
1814        assert_eq!(
1815            Time64NanosecondType::parse("02:10:01.1234567899999999"),
1816            Some(7_801_123_456_789)
1817        );
1818        assert_eq!(
1819            Time64NanosecondType::parse("02:10:01.1234567"),
1820            Some(7_801_123_456_700)
1821        );
1822        assert_eq!(
1823            Time64NanosecondType::parse("2:10:01.1234567"),
1824            Some(7_801_123_456_700)
1825        );
1826        assert_eq!(
1827            Time64NanosecondType::parse("12:10:01.123456789 AM"),
1828            Some(601_123_456_789)
1829        );
1830        assert_eq!(
1831            Time64NanosecondType::parse("12:10:01.123456789 am"),
1832            Some(601_123_456_789)
1833        );
1834        assert_eq!(
1835            Time64NanosecondType::parse("2:10:01.12345678 PM"),
1836            Some(51_001_123_456_780)
1837        );
1838        assert_eq!(
1839            Time64NanosecondType::parse("2:10:01.12345678 pm"),
1840            Some(51_001_123_456_780)
1841        );
1842        assert_eq!(
1843            Time64NanosecondType::parse("02:10:01"),
1844            Some(7_801_000_000_000)
1845        );
1846        assert_eq!(
1847            Time64NanosecondType::parse("2:10:01"),
1848            Some(7_801_000_000_000)
1849        );
1850        assert_eq!(
1851            Time64NanosecondType::parse("12:10:01 AM"),
1852            Some(601_000_000_000)
1853        );
1854        assert_eq!(
1855            Time64NanosecondType::parse("12:10:01 am"),
1856            Some(601_000_000_000)
1857        );
1858        assert_eq!(
1859            Time64NanosecondType::parse("2:10:01 PM"),
1860            Some(51_001_000_000_000)
1861        );
1862        assert_eq!(
1863            Time64NanosecondType::parse("2:10:01 pm"),
1864            Some(51_001_000_000_000)
1865        );
1866        assert_eq!(
1867            Time64NanosecondType::parse("02:10"),
1868            Some(7_800_000_000_000)
1869        );
1870        assert_eq!(Time64NanosecondType::parse("2:10"), Some(7_800_000_000_000));
1871        assert_eq!(
1872            Time64NanosecondType::parse("12:10 AM"),
1873            Some(600_000_000_000)
1874        );
1875        assert_eq!(
1876            Time64NanosecondType::parse("12:10 am"),
1877            Some(600_000_000_000)
1878        );
1879        assert_eq!(
1880            Time64NanosecondType::parse("2:10 PM"),
1881            Some(51_000_000_000_000)
1882        );
1883        assert_eq!(
1884            Time64NanosecondType::parse("2:10 pm"),
1885            Some(51_000_000_000_000)
1886        );
1887
1888        // parse directly as nanoseconds
1889        assert_eq!(Time64NanosecondType::parse("1"), Some(1));
1890
1891        // leap second
1892        assert_eq!(
1893            Time64NanosecondType::parse("23:59:60"),
1894            Some(86_400_000_000_000)
1895        );
1896
1897        // custom format
1898        assert_eq!(
1899            Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
1900            Some(7_801_123_456_700)
1901        );
1902    }
1903
1904    #[test]
1905    fn parse_time64_micros() {
1906        // expected formats
1907        assert_eq!(
1908            Time64MicrosecondType::parse("02:10:01.1234"),
1909            Some(7_801_123_400)
1910        );
1911        assert_eq!(
1912            Time64MicrosecondType::parse("2:10:01.1234"),
1913            Some(7_801_123_400)
1914        );
1915        assert_eq!(
1916            Time64MicrosecondType::parse("12:10:01.123456 AM"),
1917            Some(601_123_456)
1918        );
1919        assert_eq!(
1920            Time64MicrosecondType::parse("12:10:01.123456 am"),
1921            Some(601_123_456)
1922        );
1923        assert_eq!(
1924            Time64MicrosecondType::parse("2:10:01.12345 PM"),
1925            Some(51_001_123_450)
1926        );
1927        assert_eq!(
1928            Time64MicrosecondType::parse("2:10:01.12345 pm"),
1929            Some(51_001_123_450)
1930        );
1931        assert_eq!(
1932            Time64MicrosecondType::parse("02:10:01"),
1933            Some(7_801_000_000)
1934        );
1935        assert_eq!(Time64MicrosecondType::parse("2:10:01"), Some(7_801_000_000));
1936        assert_eq!(
1937            Time64MicrosecondType::parse("12:10:01 AM"),
1938            Some(601_000_000)
1939        );
1940        assert_eq!(
1941            Time64MicrosecondType::parse("12:10:01 am"),
1942            Some(601_000_000)
1943        );
1944        assert_eq!(
1945            Time64MicrosecondType::parse("2:10:01 PM"),
1946            Some(51_001_000_000)
1947        );
1948        assert_eq!(
1949            Time64MicrosecondType::parse("2:10:01 pm"),
1950            Some(51_001_000_000)
1951        );
1952        assert_eq!(Time64MicrosecondType::parse("02:10"), Some(7_800_000_000));
1953        assert_eq!(Time64MicrosecondType::parse("2:10"), Some(7_800_000_000));
1954        assert_eq!(Time64MicrosecondType::parse("12:10 AM"), Some(600_000_000));
1955        assert_eq!(Time64MicrosecondType::parse("12:10 am"), Some(600_000_000));
1956        assert_eq!(
1957            Time64MicrosecondType::parse("2:10 PM"),
1958            Some(51_000_000_000)
1959        );
1960        assert_eq!(
1961            Time64MicrosecondType::parse("2:10 pm"),
1962            Some(51_000_000_000)
1963        );
1964
1965        // parse directly as microseconds
1966        assert_eq!(Time64MicrosecondType::parse("1"), Some(1));
1967
1968        // leap second
1969        assert_eq!(
1970            Time64MicrosecondType::parse("23:59:60"),
1971            Some(86_400_000_000)
1972        );
1973
1974        // custom format
1975        assert_eq!(
1976            Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
1977            Some(7_801_123_400)
1978        );
1979    }
1980
1981    #[test]
1982    fn parse_time32_millis() {
1983        // expected formats
1984        assert_eq!(Time32MillisecondType::parse("02:10:01.1"), Some(7_801_100));
1985        assert_eq!(Time32MillisecondType::parse("2:10:01.1"), Some(7_801_100));
1986        assert_eq!(
1987            Time32MillisecondType::parse("12:10:01.123 AM"),
1988            Some(601_123)
1989        );
1990        assert_eq!(
1991            Time32MillisecondType::parse("12:10:01.123 am"),
1992            Some(601_123)
1993        );
1994        assert_eq!(
1995            Time32MillisecondType::parse("2:10:01.12 PM"),
1996            Some(51_001_120)
1997        );
1998        assert_eq!(
1999            Time32MillisecondType::parse("2:10:01.12 pm"),
2000            Some(51_001_120)
2001        );
2002        assert_eq!(Time32MillisecondType::parse("02:10:01"), Some(7_801_000));
2003        assert_eq!(Time32MillisecondType::parse("2:10:01"), Some(7_801_000));
2004        assert_eq!(Time32MillisecondType::parse("12:10:01 AM"), Some(601_000));
2005        assert_eq!(Time32MillisecondType::parse("12:10:01 am"), Some(601_000));
2006        assert_eq!(Time32MillisecondType::parse("2:10:01 PM"), Some(51_001_000));
2007        assert_eq!(Time32MillisecondType::parse("2:10:01 pm"), Some(51_001_000));
2008        assert_eq!(Time32MillisecondType::parse("02:10"), Some(7_800_000));
2009        assert_eq!(Time32MillisecondType::parse("2:10"), Some(7_800_000));
2010        assert_eq!(Time32MillisecondType::parse("12:10 AM"), Some(600_000));
2011        assert_eq!(Time32MillisecondType::parse("12:10 am"), Some(600_000));
2012        assert_eq!(Time32MillisecondType::parse("2:10 PM"), Some(51_000_000));
2013        assert_eq!(Time32MillisecondType::parse("2:10 pm"), Some(51_000_000));
2014
2015        // parse directly as milliseconds
2016        assert_eq!(Time32MillisecondType::parse("1"), Some(1));
2017
2018        // leap second
2019        assert_eq!(Time32MillisecondType::parse("23:59:60"), Some(86_400_000));
2020
2021        // custom format
2022        assert_eq!(
2023            Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
2024            Some(7_801_100)
2025        );
2026    }
2027
2028    #[test]
2029    fn parse_time32_secs() {
2030        // expected formats
2031        assert_eq!(Time32SecondType::parse("02:10:01.1"), Some(7_801));
2032        assert_eq!(Time32SecondType::parse("02:10:01"), Some(7_801));
2033        assert_eq!(Time32SecondType::parse("2:10:01"), Some(7_801));
2034        assert_eq!(Time32SecondType::parse("12:10:01 AM"), Some(601));
2035        assert_eq!(Time32SecondType::parse("12:10:01 am"), Some(601));
2036        assert_eq!(Time32SecondType::parse("2:10:01 PM"), Some(51_001));
2037        assert_eq!(Time32SecondType::parse("2:10:01 pm"), Some(51_001));
2038        assert_eq!(Time32SecondType::parse("02:10"), Some(7_800));
2039        assert_eq!(Time32SecondType::parse("2:10"), Some(7_800));
2040        assert_eq!(Time32SecondType::parse("12:10 AM"), Some(600));
2041        assert_eq!(Time32SecondType::parse("12:10 am"), Some(600));
2042        assert_eq!(Time32SecondType::parse("2:10 PM"), Some(51_000));
2043        assert_eq!(Time32SecondType::parse("2:10 pm"), Some(51_000));
2044
2045        // parse directly as seconds
2046        assert_eq!(Time32SecondType::parse("1"), Some(1));
2047
2048        // leap second
2049        assert_eq!(Time32SecondType::parse("23:59:60"), Some(86400));
2050
2051        // custom format
2052        assert_eq!(
2053            Time32SecondType::parse_formatted("02 - 10 - 01", "%H - %M - %S"),
2054            Some(7_801)
2055        );
2056    }
2057
2058    #[test]
2059    fn test_string_to_time_invalid() {
2060        let cases = [
2061            "25:00",
2062            "9:00:",
2063            "009:00",
2064            "09:0:00",
2065            "25:00:00",
2066            "13:00 AM",
2067            "13:00 PM",
2068            "12:00. AM",
2069            "09:0:00",
2070            "09:01:0",
2071            "09:01:1",
2072            "9:1:0",
2073            "09:01:0",
2074            "1:00.123",
2075            "1:00:00.123f",
2076            " 9:00:00",
2077            ":09:00",
2078            "T9:00:00",
2079            "AM",
2080        ];
2081        for case in cases {
2082            assert!(string_to_time(case).is_none(), "{case}");
2083        }
2084    }
2085
2086    #[test]
2087    fn test_string_to_time_chrono() {
2088        let cases = [
2089            ("1:00", "%H:%M"),
2090            ("12:00", "%H:%M"),
2091            ("13:00", "%H:%M"),
2092            ("24:00", "%H:%M"),
2093            ("1:00:00", "%H:%M:%S"),
2094            ("12:00:30", "%H:%M:%S"),
2095            ("13:00:59", "%H:%M:%S"),
2096            ("24:00:60", "%H:%M:%S"),
2097            ("09:00:00", "%H:%M:%S%.f"),
2098            ("0:00:30.123456", "%H:%M:%S%.f"),
2099            ("0:00 AM", "%I:%M %P"),
2100            ("1:00 AM", "%I:%M %P"),
2101            ("12:00 AM", "%I:%M %P"),
2102            ("13:00 AM", "%I:%M %P"),
2103            ("0:00 PM", "%I:%M %P"),
2104            ("1:00 PM", "%I:%M %P"),
2105            ("12:00 PM", "%I:%M %P"),
2106            ("13:00 PM", "%I:%M %P"),
2107            ("1:00 pM", "%I:%M %P"),
2108            ("1:00 Pm", "%I:%M %P"),
2109            ("1:00 aM", "%I:%M %P"),
2110            ("1:00 Am", "%I:%M %P"),
2111            ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2112            ("1:00:30.123456789 PM", "%I:%M:%S%.f %P"),
2113            ("1:00:30.123456789123 PM", "%I:%M:%S%.f %P"),
2114            ("1:00:30.1234 PM", "%I:%M:%S%.f %P"),
2115            ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2116            ("1:00:30.123456789123456789 PM", "%I:%M:%S%.f %P"),
2117            ("1:00:30.12F456 PM", "%I:%M:%S%.f %P"),
2118        ];
2119        for (s, format) in cases {
2120            let chrono = NaiveTime::parse_from_str(s, format).ok();
2121            let custom = string_to_time(s);
2122            assert_eq!(chrono, custom, "{s}");
2123        }
2124    }
2125
2126    #[test]
2127    fn test_parse_interval() {
2128        let config = IntervalParseConfig::new(IntervalUnit::Month);
2129
2130        assert_eq!(
2131            Interval::new(1i32, 0i32, 0i64),
2132            Interval::parse("1 month", &config).unwrap(),
2133        );
2134
2135        assert_eq!(
2136            Interval::new(2i32, 0i32, 0i64),
2137            Interval::parse("2 month", &config).unwrap(),
2138        );
2139
2140        assert_eq!(
2141            Interval::new(-1i32, -18i32, -(NANOS_PER_DAY / 5)),
2142            Interval::parse("-1.5 months -3.2 days", &config).unwrap(),
2143        );
2144
2145        assert_eq!(
2146            Interval::new(0i32, 15i32, 0),
2147            Interval::parse("0.5 months", &config).unwrap(),
2148        );
2149
2150        assert_eq!(
2151            Interval::new(0i32, 15i32, 0),
2152            Interval::parse(".5 months", &config).unwrap(),
2153        );
2154
2155        assert_eq!(
2156            Interval::new(0i32, -15i32, 0),
2157            Interval::parse("-0.5 months", &config).unwrap(),
2158        );
2159
2160        assert_eq!(
2161            Interval::new(0i32, -15i32, 0),
2162            Interval::parse("-.5 months", &config).unwrap(),
2163        );
2164
2165        assert_eq!(
2166            Interval::new(2i32, 10i32, 9 * NANOS_PER_HOUR),
2167            Interval::parse("2.1 months 7.25 days 3 hours", &config).unwrap(),
2168        );
2169
2170        assert_eq!(
2171            Interval::parse("1 centurys 1 month", &config)
2172                .unwrap_err()
2173                .to_string(),
2174            r#"Parser error: Invalid input syntax for type interval: "1 centurys 1 month""#
2175        );
2176
2177        assert_eq!(
2178            Interval::new(37i32, 0i32, 0i64),
2179            Interval::parse("3 year 1 month", &config).unwrap(),
2180        );
2181
2182        assert_eq!(
2183            Interval::new(35i32, 0i32, 0i64),
2184            Interval::parse("3 year -1 month", &config).unwrap(),
2185        );
2186
2187        assert_eq!(
2188            Interval::new(-37i32, 0i32, 0i64),
2189            Interval::parse("-3 year -1 month", &config).unwrap(),
2190        );
2191
2192        assert_eq!(
2193            Interval::new(-35i32, 0i32, 0i64),
2194            Interval::parse("-3 year 1 month", &config).unwrap(),
2195        );
2196
2197        assert_eq!(
2198            Interval::new(0i32, 5i32, 0i64),
2199            Interval::parse("5 days", &config).unwrap(),
2200        );
2201
2202        assert_eq!(
2203            Interval::new(0i32, 7i32, 3 * NANOS_PER_HOUR),
2204            Interval::parse("7 days 3 hours", &config).unwrap(),
2205        );
2206
2207        assert_eq!(
2208            Interval::new(0i32, 7i32, 5 * NANOS_PER_MINUTE),
2209            Interval::parse("7 days 5 minutes", &config).unwrap(),
2210        );
2211
2212        assert_eq!(
2213            Interval::new(0i32, 7i32, -5 * NANOS_PER_MINUTE),
2214            Interval::parse("7 days -5 minutes", &config).unwrap(),
2215        );
2216
2217        assert_eq!(
2218            Interval::new(0i32, -7i32, 5 * NANOS_PER_HOUR),
2219            Interval::parse("-7 days 5 hours", &config).unwrap(),
2220        );
2221
2222        assert_eq!(
2223            Interval::new(
2224                0i32,
2225                -7i32,
2226                -5 * NANOS_PER_HOUR - 5 * NANOS_PER_MINUTE - 5 * NANOS_PER_SECOND
2227            ),
2228            Interval::parse("-7 days -5 hours -5 minutes -5 seconds", &config).unwrap(),
2229        );
2230
2231        assert_eq!(
2232            Interval::new(12i32, 0i32, 25 * NANOS_PER_MILLIS),
2233            Interval::parse("1 year 25 millisecond", &config).unwrap(),
2234        );
2235
2236        assert_eq!(
2237            Interval::new(
2238                12i32,
2239                1i32,
2240                (NANOS_PER_SECOND as f64 * 0.000000001_f64) as i64
2241            ),
2242            Interval::parse("1 year 1 day 0.000000001 seconds", &config).unwrap(),
2243        );
2244
2245        assert_eq!(
2246            Interval::new(12i32, 1i32, NANOS_PER_MILLIS / 10),
2247            Interval::parse("1 year 1 day 0.1 milliseconds", &config).unwrap(),
2248        );
2249
2250        assert_eq!(
2251            Interval::new(12i32, 1i32, 1000i64),
2252            Interval::parse("1 year 1 day 1 microsecond", &config).unwrap(),
2253        );
2254
2255        assert_eq!(
2256            Interval::new(12i32, 1i32, 1i64),
2257            Interval::parse("1 year 1 day 1 nanoseconds", &config).unwrap(),
2258        );
2259
2260        assert_eq!(
2261            Interval::new(1i32, 0i32, -NANOS_PER_SECOND),
2262            Interval::parse("1 month -1 second", &config).unwrap(),
2263        );
2264
2265        assert_eq!(
2266            Interval::new(
2267                -13i32,
2268                -8i32,
2269                -NANOS_PER_HOUR
2270                    - NANOS_PER_MINUTE
2271                    - NANOS_PER_SECOND
2272                    - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2273            ),
2274            Interval::parse(
2275                "-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
2276                &config
2277            )
2278            .unwrap(),
2279        );
2280
2281        // no units
2282        assert_eq!(
2283            Interval::new(1, 0, 0),
2284            Interval::parse("1", &config).unwrap()
2285        );
2286        assert_eq!(
2287            Interval::new(42, 0, 0),
2288            Interval::parse("42", &config).unwrap()
2289        );
2290        assert_eq!(
2291            Interval::new(0, 0, 42_000_000_000),
2292            Interval::parse("42", &IntervalParseConfig::new(IntervalUnit::Second)).unwrap()
2293        );
2294
2295        // shorter units
2296        assert_eq!(
2297            Interval::new(1, 0, 0),
2298            Interval::parse("1 mon", &config).unwrap()
2299        );
2300        assert_eq!(
2301            Interval::new(1, 0, 0),
2302            Interval::parse("1 mons", &config).unwrap()
2303        );
2304        assert_eq!(
2305            Interval::new(0, 0, 1_000_000),
2306            Interval::parse("1 ms", &config).unwrap()
2307        );
2308        assert_eq!(
2309            Interval::new(0, 0, 1_000),
2310            Interval::parse("1 us", &config).unwrap()
2311        );
2312
2313        // no space
2314        assert_eq!(
2315            Interval::new(0, 0, 1_000),
2316            Interval::parse("1us", &config).unwrap()
2317        );
2318        assert_eq!(
2319            Interval::new(0, 0, NANOS_PER_SECOND),
2320            Interval::parse("1s", &config).unwrap()
2321        );
2322        assert_eq!(
2323            Interval::new(1, 2, 10_864_000_000_000),
2324            Interval::parse("1mon 2days 3hr 1min 4sec", &config).unwrap()
2325        );
2326
2327        assert_eq!(
2328            Interval::new(
2329                -13i32,
2330                -8i32,
2331                -NANOS_PER_HOUR
2332                    - NANOS_PER_MINUTE
2333                    - NANOS_PER_SECOND
2334                    - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2335            ),
2336            Interval::parse(
2337                "-1year -1month -1week -1day -1 hour -1 minute -1 second -1.11millisecond",
2338                &config
2339            )
2340            .unwrap(),
2341        );
2342
2343        assert_eq!(
2344            Interval::parse("1h s", &config).unwrap_err().to_string(),
2345            r#"Parser error: Invalid input syntax for type interval: "1h s""#
2346        );
2347
2348        assert_eq!(
2349            Interval::parse("1XX", &config).unwrap_err().to_string(),
2350            r#"Parser error: Invalid input syntax for type interval: "1XX""#
2351        );
2352    }
2353
2354    #[test]
2355    fn test_duplicate_interval_type() {
2356        let config = IntervalParseConfig::new(IntervalUnit::Month);
2357
2358        let err = Interval::parse("1 month 1 second 1 second", &config)
2359            .expect_err("parsing interval should have failed");
2360        assert_eq!(
2361            r#"ParseError("Invalid input syntax for type interval: \"1 month 1 second 1 second\". Repeated type 'second'")"#,
2362            format!("{err:?}")
2363        );
2364
2365        // test with singular and plural forms
2366        let err = Interval::parse("1 century 2 centuries", &config)
2367            .expect_err("parsing interval should have failed");
2368        assert_eq!(
2369            r#"ParseError("Invalid input syntax for type interval: \"1 century 2 centuries\". Repeated type 'centuries'")"#,
2370            format!("{err:?}")
2371        );
2372    }
2373
2374    #[test]
2375    fn test_interval_amount_parsing() {
2376        // integer
2377        let result = IntervalAmount::from_str("123").unwrap();
2378        let expected = IntervalAmount::new(123, 0);
2379
2380        assert_eq!(result, expected);
2381
2382        // positive w/ fractional
2383        let result = IntervalAmount::from_str("0.3").unwrap();
2384        let expected = IntervalAmount::new(0, 3 * 10_i64.pow(INTERVAL_PRECISION - 1));
2385
2386        assert_eq!(result, expected);
2387
2388        // negative w/ fractional
2389        let result = IntervalAmount::from_str("-3.5").unwrap();
2390        let expected = IntervalAmount::new(-3, -5 * 10_i64.pow(INTERVAL_PRECISION - 1));
2391
2392        assert_eq!(result, expected);
2393
2394        // invalid: missing fractional
2395        let result = IntervalAmount::from_str("3.");
2396        assert!(result.is_err());
2397
2398        // invalid: sign in fractional
2399        let result = IntervalAmount::from_str("3.-5");
2400        assert!(result.is_err());
2401    }
2402
2403    #[test]
2404    fn test_interval_precision() {
2405        let config = IntervalParseConfig::new(IntervalUnit::Month);
2406
2407        let result = Interval::parse("100000.1 days", &config).unwrap();
2408        let expected = Interval::new(0_i32, 100_000_i32, NANOS_PER_DAY / 10);
2409
2410        assert_eq!(result, expected);
2411    }
2412
2413    #[test]
2414    fn test_interval_addition() {
2415        // add 4.1 centuries
2416        let start = Interval::new(1, 2, 3);
2417        let expected = Interval::new(4921, 2, 3);
2418
2419        let result = start
2420            .add(
2421                IntervalAmount::new(4, 10_i64.pow(INTERVAL_PRECISION - 1)),
2422                IntervalUnit::Century,
2423            )
2424            .unwrap();
2425
2426        assert_eq!(result, expected);
2427
2428        // add 10.25 decades
2429        let start = Interval::new(1, 2, 3);
2430        let expected = Interval::new(1231, 2, 3);
2431
2432        let result = start
2433            .add(
2434                IntervalAmount::new(10, 25 * 10_i64.pow(INTERVAL_PRECISION - 2)),
2435                IntervalUnit::Decade,
2436            )
2437            .unwrap();
2438
2439        assert_eq!(result, expected);
2440
2441        // add 30.3 years (reminder: Postgres logic does not spill to days/nanos when interval is larger than a month)
2442        let start = Interval::new(1, 2, 3);
2443        let expected = Interval::new(364, 2, 3);
2444
2445        let result = start
2446            .add(
2447                IntervalAmount::new(30, 3 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2448                IntervalUnit::Year,
2449            )
2450            .unwrap();
2451
2452        assert_eq!(result, expected);
2453
2454        // add 1.5 months
2455        let start = Interval::new(1, 2, 3);
2456        let expected = Interval::new(2, 17, 3);
2457
2458        let result = start
2459            .add(
2460                IntervalAmount::new(1, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2461                IntervalUnit::Month,
2462            )
2463            .unwrap();
2464
2465        assert_eq!(result, expected);
2466
2467        // add -2 weeks
2468        let start = Interval::new(1, 25, 3);
2469        let expected = Interval::new(1, 11, 3);
2470
2471        let result = start
2472            .add(IntervalAmount::new(-2, 0), IntervalUnit::Week)
2473            .unwrap();
2474
2475        assert_eq!(result, expected);
2476
2477        // add 2.2 days
2478        let start = Interval::new(12, 15, 3);
2479        let expected = Interval::new(12, 17, 3 + 17_280 * NANOS_PER_SECOND);
2480
2481        let result = start
2482            .add(
2483                IntervalAmount::new(2, 2 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2484                IntervalUnit::Day,
2485            )
2486            .unwrap();
2487
2488        assert_eq!(result, expected);
2489
2490        // add 12.5 hours
2491        let start = Interval::new(1, 2, 3);
2492        let expected = Interval::new(1, 2, 3 + 45_000 * NANOS_PER_SECOND);
2493
2494        let result = start
2495            .add(
2496                IntervalAmount::new(12, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2497                IntervalUnit::Hour,
2498            )
2499            .unwrap();
2500
2501        assert_eq!(result, expected);
2502
2503        // add -1.5 minutes
2504        let start = Interval::new(0, 0, -3);
2505        let expected = Interval::new(0, 0, -90_000_000_000 - 3);
2506
2507        let result = start
2508            .add(
2509                IntervalAmount::new(-1, -5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2510                IntervalUnit::Minute,
2511            )
2512            .unwrap();
2513
2514        assert_eq!(result, expected);
2515    }
2516
2517    #[test]
2518    fn string_to_timestamp_old() {
2519        parse_timestamp("1677-06-14T07:29:01.256")
2520            .map_err(|e| assert!(e.to_string().ends_with(ERR_NANOSECONDS_NOT_SUPPORTED)))
2521            .unwrap_err();
2522    }
2523
2524    #[test]
2525    fn test_parse_decimal_with_parameter() {
2526        let tests = [
2527            ("0", 0i128),
2528            ("123.123", 123123i128),
2529            ("123.1234", 123123i128),
2530            ("123.1", 123100i128),
2531            ("123", 123000i128),
2532            ("-123.123", -123123i128),
2533            ("-123.1234", -123123i128),
2534            ("-123.1", -123100i128),
2535            ("-123", -123000i128),
2536            ("0.0000123", 0i128),
2537            ("12.", 12000i128),
2538            ("-12.", -12000i128),
2539            ("00.1", 100i128),
2540            ("-00.1", -100i128),
2541            ("12345678912345678.1234", 12345678912345678123i128),
2542            ("-12345678912345678.1234", -12345678912345678123i128),
2543            ("99999999999999999.999", 99999999999999999999i128),
2544            ("-99999999999999999.999", -99999999999999999999i128),
2545            (".123", 123i128),
2546            ("-.123", -123i128),
2547            ("123.", 123000i128),
2548            ("-123.", -123000i128),
2549        ];
2550        for (s, i) in tests {
2551            let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2552            assert_eq!(i, result_128.unwrap());
2553            let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2554            assert_eq!(i256::from_i128(i), result_256.unwrap());
2555        }
2556
2557        let e_notation_tests = [
2558            ("1.23e3", "1230.0", 2),
2559            ("5.6714e+2", "567.14", 4),
2560            ("5.6714e-2", "0.056714", 4),
2561            ("5.6714e-2", "0.056714", 3),
2562            ("5.6741214125e2", "567.41214125", 4),
2563            ("8.91E4", "89100.0", 2),
2564            ("3.14E+5", "314000.0", 2),
2565            ("2.718e0", "2.718", 2),
2566            ("9.999999e-1", "0.9999999", 4),
2567            ("1.23e+3", "1230", 2),
2568            ("1.234559e+3", "1234.559", 2),
2569            ("1.00E-10", "0.0000000001", 11),
2570            ("1.23e-4", "0.000123", 2),
2571            ("9.876e7", "98760000.0", 2),
2572            ("5.432E+8", "543200000.0", 10),
2573            ("1.234567e9", "1234567000.0", 2),
2574            ("1.234567e2", "123.45670000", 2),
2575            ("4749.3e-5", "0.047493", 10),
2576            ("4749.3e+5", "474930000", 10),
2577            ("4749.3e-5", "0.047493", 1),
2578            ("4749.3e+5", "474930000", 1),
2579            ("0E-8", "0", 10),
2580            ("0E+6", "0", 10),
2581            ("1E-8", "0.00000001", 10),
2582            ("12E+6", "12000000", 10),
2583            ("12E-6", "0.000012", 10),
2584            ("0.1e-6", "0.0000001", 10),
2585            ("0.1e+6", "100000", 10),
2586            ("0.12e-6", "0.00000012", 10),
2587            ("0.12e+6", "120000", 10),
2588            ("000000000001e0", "000000000001", 3),
2589            ("000001.1034567002e0", "000001.1034567002", 3),
2590            ("1.234e16", "12340000000000000", 0),
2591            ("123.4e16", "1234000000000000000", 0),
2592        ];
2593        for (e, d, scale) in e_notation_tests {
2594            let result_128_e = parse_decimal::<Decimal128Type>(e, 20, scale);
2595            let result_128_d = parse_decimal::<Decimal128Type>(d, 20, scale);
2596            assert_eq!(result_128_e.unwrap(), result_128_d.unwrap());
2597            let result_256_e = parse_decimal::<Decimal256Type>(e, 20, scale);
2598            let result_256_d = parse_decimal::<Decimal256Type>(d, 20, scale);
2599            assert_eq!(result_256_e.unwrap(), result_256_d.unwrap());
2600        }
2601        let can_not_parse_tests = [
2602            "123,123",
2603            ".",
2604            "123.123.123",
2605            "",
2606            "+",
2607            "-",
2608            "e",
2609            "1.3e+e3",
2610            "5.6714ee-2",
2611            "4.11ee-+4",
2612            "4.11e++4",
2613            "1.1e.12",
2614            "1.23e+3.",
2615            "1.23e+3.1",
2616        ];
2617        for s in can_not_parse_tests {
2618            let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2619            assert_eq!(
2620                format!("Parser error: can't parse the string value {s} to decimal"),
2621                result_128.unwrap_err().to_string()
2622            );
2623            let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2624            assert_eq!(
2625                format!("Parser error: can't parse the string value {s} to decimal"),
2626                result_256.unwrap_err().to_string()
2627            );
2628        }
2629        let overflow_parse_tests = [
2630            ("12345678", 3),
2631            ("1.2345678e7", 3),
2632            ("12345678.9", 3),
2633            ("1.23456789e+7", 3),
2634            ("99999999.99", 3),
2635            ("9.999999999e7", 3),
2636            ("12345678908765.123456", 3),
2637            ("123456789087651234.56e-4", 3),
2638            ("1234560000000", 0),
2639            ("1.23456e12", 0),
2640        ];
2641        for (s, scale) in overflow_parse_tests {
2642            let result_128 = parse_decimal::<Decimal128Type>(s, 10, scale);
2643            let expected_128 = "Parser error: parse decimal overflow";
2644            let actual_128 = result_128.unwrap_err().to_string();
2645
2646            assert!(
2647                actual_128.contains(expected_128),
2648                "actual: '{actual_128}', expected: '{expected_128}'"
2649            );
2650
2651            let result_256 = parse_decimal::<Decimal256Type>(s, 10, scale);
2652            let expected_256 = "Parser error: parse decimal overflow";
2653            let actual_256 = result_256.unwrap_err().to_string();
2654
2655            assert!(
2656                actual_256.contains(expected_256),
2657                "actual: '{actual_256}', expected: '{expected_256}'"
2658            );
2659        }
2660
2661        let edge_tests_128 = [
2662            (
2663                "99999999999999999999999999999999999999",
2664                99999999999999999999999999999999999999i128,
2665                0,
2666            ),
2667            (
2668                "999999999999999999999999999999999999.99",
2669                99999999999999999999999999999999999999i128,
2670                2,
2671            ),
2672            (
2673                "9999999999999999999999999.9999999999999",
2674                99999999999999999999999999999999999999i128,
2675                13,
2676            ),
2677            (
2678                "9999999999999999999999999",
2679                99999999999999999999999990000000000000i128,
2680                13,
2681            ),
2682            (
2683                "0.99999999999999999999999999999999999999",
2684                99999999999999999999999999999999999999i128,
2685                38,
2686            ),
2687            (
2688                "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001016744",
2689                0i128,
2690                15,
2691            ),
2692            ("1.016744e-320", 0i128, 15),
2693            ("-1e3", -1000000000i128, 6),
2694            ("+1e3", 1000000000i128, 6),
2695            ("-1e31", -10000000000000000000000000000000000000i128, 6),
2696        ];
2697        for (s, i, scale) in edge_tests_128 {
2698            let result_128 = parse_decimal::<Decimal128Type>(s, 38, scale);
2699            assert_eq!(i, result_128.unwrap());
2700        }
2701        let edge_tests_256 = [
2702            (
2703                "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2704                i256::from_string(
2705                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2706                )
2707                .unwrap(),
2708                0,
2709            ),
2710            (
2711                "999999999999999999999999999999999999999999999999999999999999999999999999.9999",
2712                i256::from_string(
2713                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2714                )
2715                .unwrap(),
2716                4,
2717            ),
2718            (
2719                "99999999999999999999999999999999999999999999999999.99999999999999999999999999",
2720                i256::from_string(
2721                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2722                )
2723                .unwrap(),
2724                26,
2725            ),
2726            (
2727                "9.999999999999999999999999999999999999999999999999999999999999999999999999999e49",
2728                i256::from_string(
2729                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2730                )
2731                .unwrap(),
2732                26,
2733            ),
2734            (
2735                "99999999999999999999999999999999999999999999999999",
2736                i256::from_string(
2737                    "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2738                )
2739                .unwrap(),
2740                26,
2741            ),
2742            (
2743                "9.9999999999999999999999999999999999999999999999999e+49",
2744                i256::from_string(
2745                    "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2746                )
2747                .unwrap(),
2748                26,
2749            ),
2750        ];
2751        for (s, i, scale) in edge_tests_256 {
2752            let result = parse_decimal::<Decimal256Type>(s, 76, scale);
2753            assert_eq!(i, result.unwrap());
2754        }
2755    }
2756
2757    #[test]
2758    fn test_parse_empty() {
2759        assert_eq!(Int32Type::parse(""), None);
2760        assert_eq!(Int64Type::parse(""), None);
2761        assert_eq!(UInt32Type::parse(""), None);
2762        assert_eq!(UInt64Type::parse(""), None);
2763        assert_eq!(Float32Type::parse(""), None);
2764        assert_eq!(Float64Type::parse(""), None);
2765        assert_eq!(Int32Type::parse("+"), None);
2766        assert_eq!(Int64Type::parse("+"), None);
2767        assert_eq!(UInt32Type::parse("+"), None);
2768        assert_eq!(UInt64Type::parse("+"), None);
2769        assert_eq!(Float32Type::parse("+"), None);
2770        assert_eq!(Float64Type::parse("+"), None);
2771        assert_eq!(TimestampNanosecondType::parse(""), None);
2772        assert_eq!(Date32Type::parse(""), None);
2773    }
2774
2775    #[test]
2776    fn test_parse_interval_month_day_nano_config() {
2777        let interval = parse_interval_month_day_nano_config(
2778            "1",
2779            IntervalParseConfig::new(IntervalUnit::Second),
2780        )
2781        .unwrap();
2782        assert_eq!(interval.months, 0);
2783        assert_eq!(interval.days, 0);
2784        assert_eq!(interval.nanoseconds, NANOS_PER_SECOND);
2785    }
2786}
arrow_cast/parse.rs

arrow_cast/
parse.rs