mz_expr/scalar/func/
format.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Data type formatting functions.
11//!
12//! <https://www.postgresql.org/docs/current/functions-formatting.html>
13
14#![allow(non_camel_case_types)]
15
16use std::fmt;
17
18use aho_corasick::AhoCorasickBuilder;
19use enum_iterator::Sequence;
20use mz_lowertest::MzReflect;
21use mz_ore::cast::CastFrom;
22use num_enum::{IntoPrimitive, TryFromPrimitive};
23use serde::{Deserialize, Serialize};
24
25use crate::scalar::func::TimestampLike;
26
27/// The raw tokens that can appear in a format string. Many of these tokens
28/// overlap, in which case the longest matching token should be selected.
29#[repr(u8)]
30#[derive(Eq, PartialEq, TryFromPrimitive, IntoPrimitive, Sequence)]
31enum DateTimeToken {
32    a_d,
33    A_D,
34    a_m,
35    A_M,
36    ad,
37    AD,
38    am,
39    AM,
40    b_c,
41    B_C,
42    bc,
43    BC,
44    cc,
45    CC,
46    d,
47    D,
48    day,
49    Day,
50    DAY,
51    dd,
52    DD,
53    ddd,
54    DDD,
55    dy,
56    Dy,
57    DY,
58    fm,
59    FM,
60    fx,
61    FX,
62    hh,
63    HH,
64    hh12,
65    HH12,
66    hh24,
67    HH24,
68    i,
69    I,
70    id,
71    ID,
72    iddd,
73    IDDD,
74    iw,
75    IW,
76    iy,
77    IY,
78    iyy,
79    IYY,
80    iyyy,
81    IYYY,
82    j,
83    J,
84    mi,
85    MI,
86    mm,
87    MM,
88    mon,
89    Mon,
90    MON,
91    month,
92    Month,
93    MONTH,
94    ms,
95    MS,
96    OF,
97    p_m,
98    P_M,
99    pm,
100    PM,
101    q,
102    Q,
103    rm,
104    RM,
105    ss,
106    SS,
107    ssss,
108    SSSS,
109    sssss,
110    SSSSS,
111    tz,
112    TZ,
113    TZH,
114    TZM,
115    us,
116    US,
117    w,
118    W,
119    ww,
120    WW,
121    y_yyy,
122    Y_YYY,
123    y,
124    Y,
125    yy,
126    YY,
127    yyy,
128    YYY,
129    yyyy,
130    YYYY,
131    th,
132    TH,
133    EscQuote,
134    Quote,
135}
136
137impl DateTimeToken {
138    /// Returns the literal sequence of characters that this `DateTimeToken`
139    /// matches.
140    const fn pattern(&self) -> &'static str {
141        match self {
142            DateTimeToken::AD => "AD",
143            DateTimeToken::ad => "ad",
144            DateTimeToken::A_D => "A.D.",
145            DateTimeToken::a_d => "a.d.",
146            DateTimeToken::AM => "AM",
147            DateTimeToken::am => "am",
148            DateTimeToken::A_M => "A.M.",
149            DateTimeToken::a_m => "a.m.",
150            DateTimeToken::BC => "BC",
151            DateTimeToken::bc => "bc",
152            DateTimeToken::B_C => "B.C.",
153            DateTimeToken::b_c => "b.c.",
154            DateTimeToken::CC => "CC",
155            DateTimeToken::cc => "cc",
156            DateTimeToken::D => "D",
157            DateTimeToken::d => "d",
158            DateTimeToken::DAY => "DAY",
159            DateTimeToken::Day => "Day",
160            DateTimeToken::day => "day",
161            DateTimeToken::DD => "DD",
162            DateTimeToken::dd => "dd",
163            DateTimeToken::DDD => "DDD",
164            DateTimeToken::ddd => "ddd",
165            DateTimeToken::DY => "DY",
166            DateTimeToken::Dy => "Dy",
167            DateTimeToken::dy => "dy",
168            DateTimeToken::FM => "FM",
169            DateTimeToken::fm => "fm",
170            DateTimeToken::FX => "FX",
171            DateTimeToken::fx => "fx",
172            DateTimeToken::HH => "HH",
173            DateTimeToken::hh => "hh",
174            DateTimeToken::HH12 => "HH12",
175            DateTimeToken::hh12 => "hh12",
176            DateTimeToken::HH24 => "HH24",
177            DateTimeToken::hh24 => "hh24",
178            DateTimeToken::I => "I",
179            DateTimeToken::i => "i",
180            DateTimeToken::ID => "ID",
181            DateTimeToken::id => "id",
182            DateTimeToken::IDDD => "IDDD",
183            DateTimeToken::iddd => "iddd",
184            DateTimeToken::IW => "IW",
185            DateTimeToken::iw => "iw",
186            DateTimeToken::IY => "IY",
187            DateTimeToken::iy => "iy",
188            DateTimeToken::IYY => "IYY",
189            DateTimeToken::iyy => "iyy",
190            DateTimeToken::IYYY => "IYYY",
191            DateTimeToken::iyyy => "iyyy",
192            DateTimeToken::J => "J",
193            DateTimeToken::j => "j",
194            DateTimeToken::MI => "MI",
195            DateTimeToken::mi => "mi",
196            DateTimeToken::MM => "MM",
197            DateTimeToken::mm => "mm",
198            DateTimeToken::MON => "MON",
199            DateTimeToken::Mon => "Mon",
200            DateTimeToken::mon => "mon",
201            DateTimeToken::MONTH => "MONTH",
202            DateTimeToken::Month => "Month",
203            DateTimeToken::month => "month",
204            DateTimeToken::MS => "MS",
205            DateTimeToken::ms => "ms",
206            DateTimeToken::OF => "OF",
207            DateTimeToken::PM => "PM",
208            DateTimeToken::pm => "pm",
209            DateTimeToken::P_M => "P.M.",
210            DateTimeToken::p_m => "p.m.",
211            DateTimeToken::Q => "Q",
212            DateTimeToken::q => "q",
213            DateTimeToken::rm => "rm",
214            DateTimeToken::RM => "RM",
215            DateTimeToken::SS => "ss",
216            DateTimeToken::ss => "SS",
217            DateTimeToken::SSSS => "SSSS",
218            DateTimeToken::ssss => "ssss",
219            DateTimeToken::SSSSS => "SSSSS",
220            DateTimeToken::sssss => "sssss",
221            DateTimeToken::TZ => "TZ",
222            DateTimeToken::tz => "tz",
223            DateTimeToken::TZH => "TZH",
224            DateTimeToken::TZM => "TZM",
225            DateTimeToken::US => "US",
226            DateTimeToken::us => "us",
227            DateTimeToken::W => "W",
228            DateTimeToken::w => "w",
229            DateTimeToken::WW => "ww",
230            DateTimeToken::ww => "WW",
231            DateTimeToken::Y => "Y",
232            DateTimeToken::y => "y",
233            DateTimeToken::Y_YYY => "Y,YYY",
234            DateTimeToken::y_yyy => "y,yyy",
235            DateTimeToken::YY => "YY",
236            DateTimeToken::yy => "yy",
237            DateTimeToken::YYY => "YYY",
238            DateTimeToken::yyy => "yyy",
239            DateTimeToken::YYYY => "YYYY",
240            DateTimeToken::yyyy => "yyyy",
241            DateTimeToken::Quote => "\"",
242            DateTimeToken::EscQuote => "\\\"",
243            DateTimeToken::TH => "TH",
244            DateTimeToken::th => "th",
245        }
246    }
247
248    /// Returns the list of all known patterns, in the same order as the enum
249    /// variants.
250    fn patterns() -> Vec<&'static str> {
251        enum_iterator::all::<Self>().map(|v| v.pattern()).collect()
252    }
253
254    /// Returns the `DateTimeField` associated with this token, if any.
255    ///
256    /// Some tokens do not correspond directly to a field, but instead modify
257    /// other fields.
258    fn field(&self) -> Option<DateTimeField> {
259        use DateTimeToken::*;
260        use WordCaps::*;
261        match self {
262            AD | BC => Some(DateTimeField::Era {
263                dots: false,
264                caps: true,
265            }),
266            ad | bc => Some(DateTimeField::Era {
267                dots: false,
268                caps: false,
269            }),
270            A_D | B_C => Some(DateTimeField::Era {
271                dots: true,
272                caps: true,
273            }),
274            a_d | b_c => Some(DateTimeField::Era {
275                dots: true,
276                caps: false,
277            }),
278            AM | PM => Some(DateTimeField::Meridiem {
279                dots: false,
280                caps: true,
281            }),
282            am | pm => Some(DateTimeField::Meridiem {
283                dots: false,
284                caps: false,
285            }),
286            A_M | P_M => Some(DateTimeField::Meridiem {
287                dots: true,
288                caps: true,
289            }),
290            a_m | p_m => Some(DateTimeField::Meridiem {
291                dots: true,
292                caps: false,
293            }),
294            cc | CC => Some(DateTimeField::Century),
295            d | D => Some(DateTimeField::DayOfWeek),
296            day => Some(DateTimeField::DayName {
297                abbrev: false,
298                caps: NoCaps,
299            }),
300            Day => Some(DateTimeField::DayName {
301                abbrev: false,
302                caps: FirstCaps,
303            }),
304            DAY => Some(DateTimeField::DayName {
305                abbrev: false,
306                caps: AllCaps,
307            }),
308            dy => Some(DateTimeField::DayName {
309                abbrev: true,
310                caps: NoCaps,
311            }),
312            Dy => Some(DateTimeField::DayName {
313                abbrev: true,
314                caps: FirstCaps,
315            }),
316            DY => Some(DateTimeField::DayName {
317                abbrev: true,
318                caps: AllCaps,
319            }),
320            dd | DD => Some(DateTimeField::DayOfMonth),
321            ddd | DDD => Some(DateTimeField::DayOfYear),
322            fm | FM | fx | FX | th | TH | Quote | EscQuote => None,
323            hh | HH | hh12 | HH12 => Some(DateTimeField::Hour12),
324            hh24 | HH24 => Some(DateTimeField::Hour24),
325            id | ID => Some(DateTimeField::IsoDayOfWeek),
326            iddd | IDDD => Some(DateTimeField::IsoDayOfYear),
327            iw | IW => Some(DateTimeField::IsoWeekOfYear),
328            j | J => Some(DateTimeField::JulianDay),
329            mi | MI => Some(DateTimeField::Minute),
330            mm | MM => Some(DateTimeField::MonthOfYear),
331            mon => Some(DateTimeField::MonthName {
332                abbrev: true,
333                caps: NoCaps,
334            }),
335            Mon => Some(DateTimeField::MonthName {
336                abbrev: true,
337                caps: FirstCaps,
338            }),
339            MON => Some(DateTimeField::MonthName {
340                abbrev: true,
341                caps: AllCaps,
342            }),
343            month => Some(DateTimeField::MonthName {
344                abbrev: false,
345                caps: NoCaps,
346            }),
347            Month => Some(DateTimeField::MonthName {
348                abbrev: false,
349                caps: FirstCaps,
350            }),
351            MONTH => Some(DateTimeField::MonthName {
352                abbrev: false,
353                caps: AllCaps,
354            }),
355            ms | MS => Some(DateTimeField::Millisecond),
356            OF => Some(DateTimeField::TimezoneOffset),
357            q | Q => Some(DateTimeField::Quarter),
358            rm => Some(DateTimeField::MonthInRomanNumerals { caps: false }),
359            RM => Some(DateTimeField::MonthInRomanNumerals { caps: true }),
360            ss | SS => Some(DateTimeField::Second),
361            ssss | SSSS | sssss | SSSSS => Some(DateTimeField::SecondsPastMidnight),
362            tz => Some(DateTimeField::Timezone { caps: false }),
363            TZ => Some(DateTimeField::Timezone { caps: true }),
364            TZH => Some(DateTimeField::TimezoneHours),
365            TZM => Some(DateTimeField::TimezoneMinutes),
366            us | US => Some(DateTimeField::Microsecond),
367            w | W => Some(DateTimeField::WeekOfMonth),
368            ww | WW => Some(DateTimeField::WeekOfYear),
369            y | Y => Some(DateTimeField::Year1),
370            yy | YY => Some(DateTimeField::Year2),
371            yyy | YYY => Some(DateTimeField::Year3),
372            yyyy | YYYY => Some(DateTimeField::Year4 { separator: false }),
373            y_yyy | Y_YYY => Some(DateTimeField::Year4 { separator: true }),
374            i | I => Some(DateTimeField::IsoYear1),
375            iy | IY => Some(DateTimeField::IsoYear2),
376            iyy | IYY => Some(DateTimeField::IsoYear3),
377            iyyy | IYYY => Some(DateTimeField::IsoYear4),
378        }
379    }
380
381    /// Returns how this token should be rendered if it appears within quotes.
382    /// This is usually the same string as the `pattern` method returns, but
383    /// not always.
384    fn as_literal(&self) -> &'static str {
385        match self {
386            DateTimeToken::Quote => "",
387            DateTimeToken::EscQuote => "\"",
388            _ => self.pattern(),
389        }
390    }
391
392    /// Returns whether this token is a fill mode toggle.
393    fn is_fill_mode_toggle(&self) -> bool {
394        matches!(self, DateTimeToken::fm | DateTimeToken::FM)
395    }
396
397    /// Returns how this token affects the ordinal mode, if at all.
398    fn ordinal_mode(&self) -> OrdinalMode {
399        match self {
400            DateTimeToken::th => OrdinalMode::Lower,
401            DateTimeToken::TH => OrdinalMode::Upper,
402            _ => OrdinalMode::None,
403        }
404    }
405}
406
407/// Specifies the ordinal suffix that should be attached to numeric fields.
408#[derive(
409    Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash, Serialize, Deserialize, MzReflect,
410)]
411enum OrdinalMode {
412    /// No ordinal suffix.
413    None,
414    /// A lowercase ordinal suffix.
415    Lower,
416    /// An uppercase ordinal suffix.
417    Upper,
418}
419
420impl OrdinalMode {
421    fn render(self, out: &mut impl fmt::Write, n: impl Into<i64>) -> Result<(), fmt::Error> {
422        let n = n.into();
423        // Numbers that end in teen always use "th" as their ordinal suffix.
424        // Otherwise the last digit determines the ordinal suffix.
425        let n = match n % 100 {
426            10..=19 => 0,
427            _ => n % 10,
428        };
429        match self {
430            OrdinalMode::None => Ok(()),
431            OrdinalMode::Lower => match n {
432                1 => out.write_str("st"),
433                2 => out.write_str("nd"),
434                3 => out.write_str("rd"),
435                _ => out.write_str("th"),
436            },
437            OrdinalMode::Upper => match n {
438                1 => out.write_str("ST"),
439                2 => out.write_str("ND"),
440                3 => out.write_str("RD"),
441                _ => out.write_str("TH"),
442            },
443        }
444    }
445}
446
447/// Specifies the capitalization of a word.
448#[allow(clippy::enum_variant_names)] // Having "Caps" in the variant names is clarifying.
449#[derive(
450    Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash, Serialize, Deserialize, MzReflect,
451)]
452enum WordCaps {
453    /// All of the letters should be capitalized.
454    AllCaps,
455    /// Only the first letter should be capitalized.
456    FirstCaps,
457    /// None of the letters should be capitalized.
458    NoCaps,
459}
460
461/// A date-time field.
462///
463/// The variants are largely self-evident, but are described in detail in the
464/// PostgreSQL documentation if necessary.
465#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Clone, Hash, Serialize, Deserialize, MzReflect)]
466enum DateTimeField {
467    Hour12,
468    Hour24,
469    Minute,
470    Second,
471    Millisecond,
472    Microsecond,
473    SecondsPastMidnight,
474    Meridiem { dots: bool, caps: bool },
475    Year1,
476    Year2,
477    Year3,
478    Year4 { separator: bool },
479    IsoYear1,
480    IsoYear2,
481    IsoYear3,
482    IsoYear4,
483    Era { dots: bool, caps: bool },
484    MonthName { abbrev: bool, caps: WordCaps },
485    MonthOfYear,
486    DayName { abbrev: bool, caps: WordCaps },
487    DayOfWeek,
488    IsoDayOfWeek,
489    DayOfMonth,
490    DayOfYear,
491    IsoDayOfYear,
492    WeekOfMonth,
493    WeekOfYear,
494    IsoWeekOfYear,
495    Century,
496    JulianDay,
497    Quarter,
498    MonthInRomanNumerals { caps: bool },
499    Timezone { caps: bool },
500    TimezoneHours,
501    TimezoneMinutes,
502    TimezoneOffset,
503}
504
505/// An element of a date-time format string.
506#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, MzReflect)]
507enum DateTimeFormatNode {
508    /// A field whose value will be computed from the input timestamp.
509    Field {
510        /// The inner field.
511        field: DateTimeField,
512        /// Whether the field should be padded with spaces to its maximum width.
513        /// Does not have an effect for all fields, as the width of some fields
514        /// is unknowable.
515        fill: bool,
516        /// Whether the field should be followed with an ordinal suffix, like
517        /// "th." Only meaningful for numeric fields.
518        ordinal: OrdinalMode,
519    },
520    /// A literal character.
521    Literal(char),
522}
523
524const WEEKDAYS_ALL_CAPS: [&str; 7] = [
525    "SUNDAY",
526    "MONDAY",
527    "TUESDAY",
528    "WEDNESDAY",
529    "THURSDAY",
530    "FRIDAY",
531    "SATURDAY",
532];
533
534const WEEKDAYS_FIRST_CAPS: [&str; 7] = [
535    "Sunday",
536    "Monday",
537    "Tuesday",
538    "Wednesday",
539    "Thursday",
540    "Friday",
541    "Saturday",
542];
543
544const WEEKDAYS_NO_CAPS: [&str; 7] = [
545    "sunday",
546    "monday",
547    "tuesday",
548    "wednesday",
549    "thursday",
550    "friday",
551    "saturday",
552];
553
554const WEEKDAYS_ABBREV_ALL_CAPS: [&str; 7] = ["SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"];
555
556const WEEKDAYS_ABBREV_FIRST_CAPS: [&str; 7] = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
557
558const WEEKDAYS_ABBREV_NO_CAPS: [&str; 7] = ["sun", "mon", "tue", "wed", "thu", "fri", "sat"];
559
560const MONTHS_ALL_CAPS: [&str; 12] = [
561    "JANUARY",
562    "FEBRUARY",
563    "MARCH",
564    "APRIL",
565    "MAY",
566    "JUNE",
567    "JULY",
568    "AUGUST",
569    "SEPTEMBER",
570    "OCTOBER",
571    "NOVEMBER",
572    "DECEMBER",
573];
574
575const MONTHS_FIRST_CAPS: [&str; 12] = [
576    "January",
577    "February",
578    "March",
579    "April",
580    "May",
581    "June",
582    "July",
583    "August",
584    "September",
585    "October",
586    "November",
587    "December",
588];
589
590const MONTHS_NO_CAPS: [&str; 12] = [
591    "january",
592    "february",
593    "march",
594    "april",
595    "may",
596    "june",
597    "july",
598    "august",
599    "september",
600    "october",
601    "november",
602    "december",
603];
604
605const MONTHS_ABBREV_ALL_CAPS: [&str; 12] = [
606    "JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC",
607];
608
609const MONTHS_ABBREV_FIRST_CAPS: [&str; 12] = [
610    "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
611];
612
613const MONTHS_ABBREV_NO_CAPS: [&str; 12] = [
614    "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec",
615];
616
617const MONTHS_ROMAN_NO_CAPS: [&str; 12] = [
618    "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii",
619];
620
621const MONTHS_ROMAN_CAPS: [&str; 12] = [
622    "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", "XI", "XII",
623];
624
625impl DateTimeFormatNode {
626    fn render(&self, buf: &mut impl fmt::Write, ts: &impl TimestampLike) -> Result<(), fmt::Error> {
627        use WordCaps::*;
628        match self {
629            DateTimeFormatNode::Literal(ch) => buf.write_char(*ch),
630            DateTimeFormatNode::Field {
631                field,
632                fill,
633                ordinal,
634            } => {
635                macro_rules! write_num {
636                    ($n:expr, $width:expr) => {{
637                        write!(
638                            buf,
639                            "{:0width$}",
640                            $n,
641                            width = if *fill { $width } else { 0 }
642                        )?;
643                        ordinal.render(buf, $n)
644                    }};
645                    ($n:expr) => {
646                        write_num!($n, 0)
647                    };
648                }
649
650                macro_rules! write_str {
651                    ($s:expr, $width:expr) => {{ write!(buf, "{:width$}", $s, width = if *fill { $width } else { 0 }) }};
652                    ($s:expr) => {
653                        write_str!($s, 0)
654                    };
655                }
656
657                match field {
658                    DateTimeField::Era {
659                        dots: false,
660                        caps: true,
661                    } => write_str!(if ts.year_ce().0 { "AD" } else { "BC" }),
662                    DateTimeField::Era {
663                        dots: false,
664                        caps: false,
665                    } => write_str!(if ts.year_ce().0 { "ad" } else { "bc" }),
666                    DateTimeField::Era {
667                        dots: true,
668                        caps: true,
669                    } => write_str!(if ts.year_ce().0 { "A.D." } else { "B.C." }),
670                    DateTimeField::Era {
671                        dots: true,
672                        caps: false,
673                    } => write_str!(if ts.year_ce().0 { "a.d." } else { "b.c." }),
674                    DateTimeField::Meridiem {
675                        dots: false,
676                        caps: true,
677                    } => write_str!(if ts.hour12().0 { "PM" } else { "AM" }),
678                    DateTimeField::Meridiem {
679                        dots: false,
680                        caps: false,
681                    } => write_str!(if ts.hour12().0 { "pm" } else { "am" }),
682                    DateTimeField::Meridiem {
683                        dots: true,
684                        caps: true,
685                    } => write_str!(if ts.hour12().0 { "P.M." } else { "A.M." }),
686                    DateTimeField::Meridiem {
687                        dots: true,
688                        caps: false,
689                    } => write_str!(if ts.hour12().0 { "p.m." } else { "a.m." }),
690                    DateTimeField::Century => {
691                        let n = if ts.year() > 0 {
692                            (ts.year() - 1) / 100 + 1
693                        } else {
694                            ts.year() / 100 - 1
695                        };
696                        write_num!(n, if n >= 0 { 2 } else { 3 })
697                    }
698                    DateTimeField::DayOfWeek => write_num!(ts.weekday().number_from_sunday(), 1),
699                    DateTimeField::IsoDayOfWeek => write_num!(ts.weekday().number_from_monday(), 1),
700                    DateTimeField::DayName {
701                        abbrev: false,
702                        caps: AllCaps,
703                    } => write_str!(WEEKDAYS_ALL_CAPS[ts.weekday0()], 9),
704                    DateTimeField::DayName {
705                        abbrev: false,
706                        caps: FirstCaps,
707                    } => write_str!(WEEKDAYS_FIRST_CAPS[ts.weekday0()], 9),
708                    DateTimeField::DayName {
709                        abbrev: false,
710                        caps: NoCaps,
711                    } => write_str!(WEEKDAYS_NO_CAPS[ts.weekday0()], 9),
712                    DateTimeField::DayName {
713                        abbrev: true,
714                        caps: AllCaps,
715                    } => write_str!(WEEKDAYS_ABBREV_ALL_CAPS[ts.weekday0()]),
716                    DateTimeField::DayName {
717                        abbrev: true,
718                        caps: FirstCaps,
719                    } => write_str!(WEEKDAYS_ABBREV_FIRST_CAPS[ts.weekday0()]),
720                    DateTimeField::DayName {
721                        abbrev: true,
722                        caps: NoCaps,
723                    } => write_str!(WEEKDAYS_ABBREV_NO_CAPS[ts.weekday0()]),
724                    DateTimeField::DayOfMonth => write_num!(ts.day(), 2),
725                    DateTimeField::DayOfYear => write_num!(ts.ordinal(), 3),
726                    DateTimeField::Hour12 => write_num!(ts.hour12().1, 2),
727                    DateTimeField::Hour24 => write_num!(ts.hour(), 2),
728                    DateTimeField::IsoYear1 => write_num!(ts.iso_year_ce() % 10, 1),
729                    DateTimeField::IsoYear2 => write_num!(ts.iso_year_ce() % 100, 2),
730                    DateTimeField::IsoYear3 => write_num!(ts.iso_year_ce() % 1000, 3),
731                    DateTimeField::IsoYear4 => write_num!(ts.iso_year_ce(), 4),
732                    DateTimeField::IsoDayOfYear => write_num!(
733                        ts.iso_week().week0() * 7 + ts.weekday().number_from_monday(),
734                        3
735                    ),
736                    DateTimeField::IsoWeekOfYear => write_num!(ts.iso_week().week(), 2),
737                    DateTimeField::JulianDay => write_num!(ts.num_days_from_ce() + 1_721_425),
738                    DateTimeField::Minute => write_num!(ts.minute(), 2),
739                    DateTimeField::MonthOfYear => write_num!(ts.month(), 2),
740                    DateTimeField::MonthName {
741                        abbrev: true,
742                        caps: AllCaps,
743                    } => write_str!(MONTHS_ABBREV_ALL_CAPS[usize::cast_from(ts.month0())]),
744                    DateTimeField::MonthName {
745                        abbrev: true,
746                        caps: FirstCaps,
747                    } => write_str!(MONTHS_ABBREV_FIRST_CAPS[usize::cast_from(ts.month0())]),
748                    DateTimeField::MonthName {
749                        abbrev: true,
750                        caps: NoCaps,
751                    } => write_str!(MONTHS_ABBREV_NO_CAPS[usize::cast_from(ts.month0())]),
752                    DateTimeField::MonthName {
753                        abbrev: false,
754                        caps: AllCaps,
755                    } => write_str!(MONTHS_ALL_CAPS[usize::cast_from(ts.month0())], 9),
756                    DateTimeField::MonthName {
757                        abbrev: false,
758                        caps: FirstCaps,
759                    } => write_str!(MONTHS_FIRST_CAPS[usize::cast_from(ts.month0())], 9),
760                    DateTimeField::MonthName {
761                        abbrev: false,
762                        caps: NoCaps,
763                    } => write_str!(MONTHS_NO_CAPS[usize::cast_from(ts.month0())], 9),
764                    DateTimeField::Millisecond => write_num!(ts.nanosecond() / 1_000_000, 3),
765                    DateTimeField::Quarter => write_num!(ts.month0() / 3 + 1),
766                    DateTimeField::MonthInRomanNumerals { caps: true } => {
767                        write_str!(MONTHS_ROMAN_CAPS[usize::cast_from(ts.month0())], 4)
768                    }
769                    DateTimeField::MonthInRomanNumerals { caps: false } => {
770                        write_str!(MONTHS_ROMAN_NO_CAPS[usize::cast_from(ts.month0())], 4)
771                    }
772                    DateTimeField::Second => write_num!(ts.second(), 2),
773                    DateTimeField::SecondsPastMidnight => {
774                        write_num!(ts.num_seconds_from_midnight())
775                    }
776                    DateTimeField::Timezone { caps } => write_str!(ts.timezone_name(*caps)),
777                    DateTimeField::TimezoneOffset => write_str!(ts.timezone_offset()),
778                    DateTimeField::TimezoneHours => write_str!(ts.timezone_hours()),
779                    DateTimeField::TimezoneMinutes => write_str!(ts.timezone_minutes()),
780                    DateTimeField::Microsecond => write_num!(ts.nanosecond() / 1_000, 6),
781                    DateTimeField::WeekOfMonth => write_num!(ts.day0() / 7 + 1, 1),
782                    DateTimeField::WeekOfYear => write_num!(ts.ordinal0() / 7 + 1, 2),
783                    DateTimeField::Year1 => write_num!(ts.year_ce().1 % 10, 1),
784                    DateTimeField::Year2 => write_num!(ts.year_ce().1 % 100, 2),
785                    DateTimeField::Year3 => write_num!(ts.year_ce().1 % 1000, 3),
786                    DateTimeField::Year4 { separator: false } => write_num!(ts.year_ce().1, 4),
787                    DateTimeField::Year4 { separator: true } => {
788                        let n = ts.year_ce().1;
789                        write!(buf, "{},{:03}", n / 1000, n % 1000)?;
790                        ordinal.render(buf, n)
791                    }
792                }
793            }
794        }
795    }
796}
797
798/// A compiled date-time format string.
799#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, MzReflect)]
800pub struct DateTimeFormat(Vec<DateTimeFormatNode>);
801
802impl DateTimeFormat {
803    /// Compiles a new `DateTimeFormat` from the input string `s`.
804    pub fn compile(s: &str) -> DateTimeFormat {
805        // The approach here uses the Aho-Corasick string searching algorithm to
806        // repeatedly and efficiently find the next token of interest. Tokens of
807        // interest are typically field specifiers, like "DDDD", or field
808        // modifiers, like "FM". Characters in between tokens of interest are
809        // recorded as literals. We also consider a double quote a token of
810        // interest, as a double quote disables matching of field
811        // specifiers/modifiers until the next double quote.
812
813        struct Match {
814            start: usize,
815            end: usize,
816            token: DateTimeToken,
817        }
818
819        let matcher = AhoCorasickBuilder::new()
820            .match_kind(aho_corasick::MatchKind::LeftmostLongest)
821            .build(DateTimeToken::patterns())
822            .unwrap_or_else(|e| panic!("automaton build error: {e}"));
823
824        let matches: Vec<_> = matcher
825            .find_iter(&s)
826            .map(|m| Match {
827                start: m.start(),
828                end: m.end(),
829                token: DateTimeToken::try_from(
830                    u8::try_from(m.pattern().as_u32()).expect("match index fits in a u8"),
831                )
832                .expect("match pattern missing"),
833            })
834            .collect();
835
836        let mut out = Vec::new();
837        let mut pos = 0;
838        let mut in_quotes = false;
839        for i in 0..matches.len() {
840            // Any characters since the last match are to be taken literally.
841            for c in s[pos..matches[i].start].chars() {
842                if !(in_quotes && c == '\\') {
843                    // Backslash is an escape character inside of quotes.
844                    out.push(DateTimeFormatNode::Literal(c));
845                }
846            }
847
848            if in_quotes {
849                // If we see a format specifier inside of a quoted block, it
850                // is taken literally.
851                for c in matches[i].token.as_literal().chars() {
852                    out.push(DateTimeFormatNode::Literal(c))
853                }
854            } else if let Some(field) = matches[i].token.field() {
855                // We found a format specifier. Look backwards for a fill mode
856                // toggle (fill mode is on by default), and forwards for an
857                // ordinal suffix specifier (default is no ordinal suffix).
858                let fill = i == 0
859                    || matches[i - 1].end != matches[i].start
860                    || !matches[i - 1].token.is_fill_mode_toggle();
861                let ordinal = match matches.get(i + 1) {
862                    Some(m) if m.start == matches[i].end => m.token.ordinal_mode(),
863                    _ => OrdinalMode::None,
864                };
865                out.push(DateTimeFormatNode::Field {
866                    field,
867                    fill,
868                    ordinal,
869                });
870            }
871
872            if matches[i].token == DateTimeToken::Quote {
873                in_quotes = !in_quotes;
874            }
875            pos = matches[i].end;
876        }
877        for c in s[pos..].chars() {
878            out.push(DateTimeFormatNode::Literal(c));
879        }
880        DateTimeFormat(out)
881    }
882
883    /// Renders the format string using the timestamp `ts` as the input. The
884    /// placeholders in the format string will be filled in appropriately
885    /// according to the value of `ts`.
886    pub fn render(&self, ts: &impl TimestampLike) -> String {
887        let mut out = String::new();
888        for node in &self.0 {
889            node.render(&mut out, ts)
890                .expect("rendering to string cannot fail");
891        }
892        out
893    }
894}