Skip to main content

mz_expr/scalar/func/
format.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Data type formatting functions.
11//!
12//! <https://www.postgresql.org/docs/current/functions-formatting.html>
13
14#![allow(non_camel_case_types)]
15
16use std::fmt;
17
18use aho_corasick::AhoCorasickBuilder;
19use enum_iterator::Sequence;
20use mz_lowertest::MzReflect;
21use mz_ore::cast::CastFrom;
22use num_enum::{IntoPrimitive, TryFromPrimitive};
23use serde::{Deserialize, Serialize};
24
25use crate::scalar::func::TimestampLike;
26
27/// The raw tokens that can appear in a format string. Many of these tokens
28/// overlap, in which case the longest matching token should be selected.
29#[repr(u8)]
30#[derive(Eq, PartialEq, TryFromPrimitive, IntoPrimitive, Sequence)]
31enum DateTimeToken {
32    a_d,
33    A_D,
34    a_m,
35    A_M,
36    ad,
37    AD,
38    am,
39    AM,
40    b_c,
41    B_C,
42    bc,
43    BC,
44    cc,
45    CC,
46    d,
47    D,
48    day,
49    Day,
50    DAY,
51    dd,
52    DD,
53    ddd,
54    DDD,
55    dy,
56    Dy,
57    DY,
58    fm,
59    FM,
60    fx,
61    FX,
62    hh,
63    HH,
64    hh12,
65    HH12,
66    hh24,
67    HH24,
68    i,
69    I,
70    id,
71    ID,
72    iddd,
73    IDDD,
74    iw,
75    IW,
76    iy,
77    IY,
78    iyy,
79    IYY,
80    iyyy,
81    IYYY,
82    j,
83    J,
84    mi,
85    MI,
86    mm,
87    MM,
88    mon,
89    Mon,
90    MON,
91    month,
92    Month,
93    MONTH,
94    ms,
95    MS,
96    OF,
97    p_m,
98    P_M,
99    pm,
100    PM,
101    q,
102    Q,
103    rm,
104    RM,
105    ss,
106    SS,
107    ssss,
108    SSSS,
109    sssss,
110    SSSSS,
111    tz,
112    TZ,
113    TZH,
114    TZM,
115    us,
116    US,
117    w,
118    W,
119    ww,
120    WW,
121    y_yyy,
122    Y_YYY,
123    y,
124    Y,
125    yy,
126    YY,
127    yyy,
128    YYY,
129    yyyy,
130    YYYY,
131    th,
132    TH,
133    EscQuote,
134    Quote,
135}
136
137impl DateTimeToken {
138    /// Returns the literal sequence of characters that this `DateTimeToken`
139    /// matches.
140    const fn pattern(&self) -> &'static str {
141        match self {
142            DateTimeToken::AD => "AD",
143            DateTimeToken::ad => "ad",
144            DateTimeToken::A_D => "A.D.",
145            DateTimeToken::a_d => "a.d.",
146            DateTimeToken::AM => "AM",
147            DateTimeToken::am => "am",
148            DateTimeToken::A_M => "A.M.",
149            DateTimeToken::a_m => "a.m.",
150            DateTimeToken::BC => "BC",
151            DateTimeToken::bc => "bc",
152            DateTimeToken::B_C => "B.C.",
153            DateTimeToken::b_c => "b.c.",
154            DateTimeToken::CC => "CC",
155            DateTimeToken::cc => "cc",
156            DateTimeToken::D => "D",
157            DateTimeToken::d => "d",
158            DateTimeToken::DAY => "DAY",
159            DateTimeToken::Day => "Day",
160            DateTimeToken::day => "day",
161            DateTimeToken::DD => "DD",
162            DateTimeToken::dd => "dd",
163            DateTimeToken::DDD => "DDD",
164            DateTimeToken::ddd => "ddd",
165            DateTimeToken::DY => "DY",
166            DateTimeToken::Dy => "Dy",
167            DateTimeToken::dy => "dy",
168            DateTimeToken::FM => "FM",
169            DateTimeToken::fm => "fm",
170            DateTimeToken::FX => "FX",
171            DateTimeToken::fx => "fx",
172            DateTimeToken::HH => "HH",
173            DateTimeToken::hh => "hh",
174            DateTimeToken::HH12 => "HH12",
175            DateTimeToken::hh12 => "hh12",
176            DateTimeToken::HH24 => "HH24",
177            DateTimeToken::hh24 => "hh24",
178            DateTimeToken::I => "I",
179            DateTimeToken::i => "i",
180            DateTimeToken::ID => "ID",
181            DateTimeToken::id => "id",
182            DateTimeToken::IDDD => "IDDD",
183            DateTimeToken::iddd => "iddd",
184            DateTimeToken::IW => "IW",
185            DateTimeToken::iw => "iw",
186            DateTimeToken::IY => "IY",
187            DateTimeToken::iy => "iy",
188            DateTimeToken::IYY => "IYY",
189            DateTimeToken::iyy => "iyy",
190            DateTimeToken::IYYY => "IYYY",
191            DateTimeToken::iyyy => "iyyy",
192            DateTimeToken::J => "J",
193            DateTimeToken::j => "j",
194            DateTimeToken::MI => "MI",
195            DateTimeToken::mi => "mi",
196            DateTimeToken::MM => "MM",
197            DateTimeToken::mm => "mm",
198            DateTimeToken::MON => "MON",
199            DateTimeToken::Mon => "Mon",
200            DateTimeToken::mon => "mon",
201            DateTimeToken::MONTH => "MONTH",
202            DateTimeToken::Month => "Month",
203            DateTimeToken::month => "month",
204            DateTimeToken::MS => "MS",
205            DateTimeToken::ms => "ms",
206            DateTimeToken::OF => "OF",
207            DateTimeToken::PM => "PM",
208            DateTimeToken::pm => "pm",
209            DateTimeToken::P_M => "P.M.",
210            DateTimeToken::p_m => "p.m.",
211            DateTimeToken::Q => "Q",
212            DateTimeToken::q => "q",
213            DateTimeToken::rm => "rm",
214            DateTimeToken::RM => "RM",
215            DateTimeToken::SS => "ss",
216            DateTimeToken::ss => "SS",
217            DateTimeToken::SSSS => "SSSS",
218            DateTimeToken::ssss => "ssss",
219            DateTimeToken::SSSSS => "SSSSS",
220            DateTimeToken::sssss => "sssss",
221            DateTimeToken::TZ => "TZ",
222            DateTimeToken::tz => "tz",
223            DateTimeToken::TZH => "TZH",
224            DateTimeToken::TZM => "TZM",
225            DateTimeToken::US => "US",
226            DateTimeToken::us => "us",
227            DateTimeToken::W => "W",
228            DateTimeToken::w => "w",
229            DateTimeToken::WW => "ww",
230            DateTimeToken::ww => "WW",
231            DateTimeToken::Y => "Y",
232            DateTimeToken::y => "y",
233            DateTimeToken::Y_YYY => "Y,YYY",
234            DateTimeToken::y_yyy => "y,yyy",
235            DateTimeToken::YY => "YY",
236            DateTimeToken::yy => "yy",
237            DateTimeToken::YYY => "YYY",
238            DateTimeToken::yyy => "yyy",
239            DateTimeToken::YYYY => "YYYY",
240            DateTimeToken::yyyy => "yyyy",
241            DateTimeToken::Quote => "\"",
242            DateTimeToken::EscQuote => "\\\"",
243            DateTimeToken::TH => "TH",
244            DateTimeToken::th => "th",
245        }
246    }
247
248    /// Returns the list of all known patterns, in the same order as the enum
249    /// variants.
250    fn patterns() -> Vec<&'static str> {
251        enum_iterator::all::<Self>().map(|v| v.pattern()).collect()
252    }
253
254    /// Returns the `DateTimeField` associated with this token, if any.
255    ///
256    /// Some tokens do not correspond directly to a field, but instead modify
257    /// other fields.
258    fn field(&self) -> Option<DateTimeField> {
259        use DateTimeToken::*;
260        use WordCaps::*;
261        match self {
262            AD | BC => Some(DateTimeField::Era {
263                dots: false,
264                caps: true,
265            }),
266            ad | bc => Some(DateTimeField::Era {
267                dots: false,
268                caps: false,
269            }),
270            A_D | B_C => Some(DateTimeField::Era {
271                dots: true,
272                caps: true,
273            }),
274            a_d | b_c => Some(DateTimeField::Era {
275                dots: true,
276                caps: false,
277            }),
278            AM | PM => Some(DateTimeField::Meridiem {
279                dots: false,
280                caps: true,
281            }),
282            am | pm => Some(DateTimeField::Meridiem {
283                dots: false,
284                caps: false,
285            }),
286            A_M | P_M => Some(DateTimeField::Meridiem {
287                dots: true,
288                caps: true,
289            }),
290            a_m | p_m => Some(DateTimeField::Meridiem {
291                dots: true,
292                caps: false,
293            }),
294            cc | CC => Some(DateTimeField::Century),
295            d | D => Some(DateTimeField::DayOfWeek),
296            day => Some(DateTimeField::DayName {
297                abbrev: false,
298                caps: NoCaps,
299            }),
300            Day => Some(DateTimeField::DayName {
301                abbrev: false,
302                caps: FirstCaps,
303            }),
304            DAY => Some(DateTimeField::DayName {
305                abbrev: false,
306                caps: AllCaps,
307            }),
308            dy => Some(DateTimeField::DayName {
309                abbrev: true,
310                caps: NoCaps,
311            }),
312            Dy => Some(DateTimeField::DayName {
313                abbrev: true,
314                caps: FirstCaps,
315            }),
316            DY => Some(DateTimeField::DayName {
317                abbrev: true,
318                caps: AllCaps,
319            }),
320            dd | DD => Some(DateTimeField::DayOfMonth),
321            ddd | DDD => Some(DateTimeField::DayOfYear),
322            fm | FM | fx | FX | th | TH | Quote | EscQuote => None,
323            hh | HH | hh12 | HH12 => Some(DateTimeField::Hour12),
324            hh24 | HH24 => Some(DateTimeField::Hour24),
325            id | ID => Some(DateTimeField::IsoDayOfWeek),
326            iddd | IDDD => Some(DateTimeField::IsoDayOfYear),
327            iw | IW => Some(DateTimeField::IsoWeekOfYear),
328            j | J => Some(DateTimeField::JulianDay),
329            mi | MI => Some(DateTimeField::Minute),
330            mm | MM => Some(DateTimeField::MonthOfYear),
331            mon => Some(DateTimeField::MonthName {
332                abbrev: true,
333                caps: NoCaps,
334            }),
335            Mon => Some(DateTimeField::MonthName {
336                abbrev: true,
337                caps: FirstCaps,
338            }),
339            MON => Some(DateTimeField::MonthName {
340                abbrev: true,
341                caps: AllCaps,
342            }),
343            month => Some(DateTimeField::MonthName {
344                abbrev: false,
345                caps: NoCaps,
346            }),
347            Month => Some(DateTimeField::MonthName {
348                abbrev: false,
349                caps: FirstCaps,
350            }),
351            MONTH => Some(DateTimeField::MonthName {
352                abbrev: false,
353                caps: AllCaps,
354            }),
355            ms | MS => Some(DateTimeField::Millisecond),
356            OF => Some(DateTimeField::TimezoneOffset),
357            q | Q => Some(DateTimeField::Quarter),
358            rm => Some(DateTimeField::MonthInRomanNumerals { caps: false }),
359            RM => Some(DateTimeField::MonthInRomanNumerals { caps: true }),
360            ss | SS => Some(DateTimeField::Second),
361            ssss | SSSS | sssss | SSSSS => Some(DateTimeField::SecondsPastMidnight),
362            tz => Some(DateTimeField::Timezone { caps: false }),
363            TZ => Some(DateTimeField::Timezone { caps: true }),
364            TZH => Some(DateTimeField::TimezoneHours),
365            TZM => Some(DateTimeField::TimezoneMinutes),
366            us | US => Some(DateTimeField::Microsecond),
367            w | W => Some(DateTimeField::WeekOfMonth),
368            ww | WW => Some(DateTimeField::WeekOfYear),
369            y | Y => Some(DateTimeField::Year1),
370            yy | YY => Some(DateTimeField::Year2),
371            yyy | YYY => Some(DateTimeField::Year3),
372            yyyy | YYYY => Some(DateTimeField::Year4 { separator: false }),
373            y_yyy | Y_YYY => Some(DateTimeField::Year4 { separator: true }),
374            i | I => Some(DateTimeField::IsoYear1),
375            iy | IY => Some(DateTimeField::IsoYear2),
376            iyy | IYY => Some(DateTimeField::IsoYear3),
377            iyyy | IYYY => Some(DateTimeField::IsoYear4),
378        }
379    }
380
381    /// Returns how this token should be rendered if it appears within quotes.
382    /// This is usually the same string as the `pattern` method returns, but
383    /// not always.
384    fn as_literal(&self) -> &'static str {
385        match self {
386            DateTimeToken::Quote => "",
387            DateTimeToken::EscQuote => "\"",
388            _ => self.pattern(),
389        }
390    }
391
392    /// Returns whether this token is a fill mode toggle.
393    fn is_fill_mode_toggle(&self) -> bool {
394        matches!(self, DateTimeToken::fm | DateTimeToken::FM)
395    }
396
397    /// Returns how this token affects the ordinal mode, if at all.
398    fn ordinal_mode(&self) -> OrdinalMode {
399        match self {
400            DateTimeToken::th => OrdinalMode::Lower,
401            DateTimeToken::TH => OrdinalMode::Upper,
402            _ => OrdinalMode::None,
403        }
404    }
405}
406
407/// Specifies the ordinal suffix that should be attached to numeric fields.
408#[derive(
409    Debug,
410    Eq,
411    PartialEq,
412    PartialOrd,
413    Ord,
414    Copy,
415    Clone,
416    Hash,
417    Serialize,
418    Deserialize,
419    MzReflect
420)]
421enum OrdinalMode {
422    /// No ordinal suffix.
423    None,
424    /// A lowercase ordinal suffix.
425    Lower,
426    /// An uppercase ordinal suffix.
427    Upper,
428}
429
430impl OrdinalMode {
431    fn render(self, out: &mut impl fmt::Write, n: impl Into<i64>) -> Result<(), fmt::Error> {
432        let n = n.into();
433        // Numbers that end in teen always use "th" as their ordinal suffix.
434        // Otherwise the last digit determines the ordinal suffix.
435        let n = match n % 100 {
436            10..=19 => 0,
437            _ => n % 10,
438        };
439        match self {
440            OrdinalMode::None => Ok(()),
441            OrdinalMode::Lower => match n {
442                1 => out.write_str("st"),
443                2 => out.write_str("nd"),
444                3 => out.write_str("rd"),
445                _ => out.write_str("th"),
446            },
447            OrdinalMode::Upper => match n {
448                1 => out.write_str("ST"),
449                2 => out.write_str("ND"),
450                3 => out.write_str("RD"),
451                _ => out.write_str("TH"),
452            },
453        }
454    }
455}
456
457/// Specifies the capitalization of a word.
458#[allow(clippy::enum_variant_names)] // Having "Caps" in the variant names is clarifying.
459#[derive(
460    Debug,
461    Eq,
462    PartialEq,
463    PartialOrd,
464    Ord,
465    Copy,
466    Clone,
467    Hash,
468    Serialize,
469    Deserialize,
470    MzReflect
471)]
472enum WordCaps {
473    /// All of the letters should be capitalized.
474    AllCaps,
475    /// Only the first letter should be capitalized.
476    FirstCaps,
477    /// None of the letters should be capitalized.
478    NoCaps,
479}
480
481/// A date-time field.
482///
483/// The variants are largely self-evident, but are described in detail in the
484/// PostgreSQL documentation if necessary.
485#[derive(
486    Debug,
487    Eq,
488    PartialEq,
489    PartialOrd,
490    Ord,
491    Clone,
492    Hash,
493    Serialize,
494    Deserialize,
495    MzReflect
496)]
497enum DateTimeField {
498    Hour12,
499    Hour24,
500    Minute,
501    Second,
502    Millisecond,
503    Microsecond,
504    SecondsPastMidnight,
505    Meridiem { dots: bool, caps: bool },
506    Year1,
507    Year2,
508    Year3,
509    Year4 { separator: bool },
510    IsoYear1,
511    IsoYear2,
512    IsoYear3,
513    IsoYear4,
514    Era { dots: bool, caps: bool },
515    MonthName { abbrev: bool, caps: WordCaps },
516    MonthOfYear,
517    DayName { abbrev: bool, caps: WordCaps },
518    DayOfWeek,
519    IsoDayOfWeek,
520    DayOfMonth,
521    DayOfYear,
522    IsoDayOfYear,
523    WeekOfMonth,
524    WeekOfYear,
525    IsoWeekOfYear,
526    Century,
527    JulianDay,
528    Quarter,
529    MonthInRomanNumerals { caps: bool },
530    Timezone { caps: bool },
531    TimezoneHours,
532    TimezoneMinutes,
533    TimezoneOffset,
534}
535
536/// An element of a date-time format string.
537#[derive(
538    Clone,
539    Debug,
540    PartialEq,
541    Eq,
542    PartialOrd,
543    Ord,
544    Hash,
545    Serialize,
546    Deserialize,
547    MzReflect
548)]
549enum DateTimeFormatNode {
550    /// A field whose value will be computed from the input timestamp.
551    Field {
552        /// The inner field.
553        field: DateTimeField,
554        /// Whether the field should be padded with spaces to its maximum width.
555        /// Does not have an effect for all fields, as the width of some fields
556        /// is unknowable.
557        fill: bool,
558        /// Whether the field should be followed with an ordinal suffix, like
559        /// "th." Only meaningful for numeric fields.
560        ordinal: OrdinalMode,
561    },
562    /// A literal character.
563    Literal(char),
564}
565
566const WEEKDAYS_ALL_CAPS: [&str; 7] = [
567    "SUNDAY",
568    "MONDAY",
569    "TUESDAY",
570    "WEDNESDAY",
571    "THURSDAY",
572    "FRIDAY",
573    "SATURDAY",
574];
575
576const WEEKDAYS_FIRST_CAPS: [&str; 7] = [
577    "Sunday",
578    "Monday",
579    "Tuesday",
580    "Wednesday",
581    "Thursday",
582    "Friday",
583    "Saturday",
584];
585
586const WEEKDAYS_NO_CAPS: [&str; 7] = [
587    "sunday",
588    "monday",
589    "tuesday",
590    "wednesday",
591    "thursday",
592    "friday",
593    "saturday",
594];
595
596const WEEKDAYS_ABBREV_ALL_CAPS: [&str; 7] = ["SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"];
597
598const WEEKDAYS_ABBREV_FIRST_CAPS: [&str; 7] = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
599
600const WEEKDAYS_ABBREV_NO_CAPS: [&str; 7] = ["sun", "mon", "tue", "wed", "thu", "fri", "sat"];
601
602const MONTHS_ALL_CAPS: [&str; 12] = [
603    "JANUARY",
604    "FEBRUARY",
605    "MARCH",
606    "APRIL",
607    "MAY",
608    "JUNE",
609    "JULY",
610    "AUGUST",
611    "SEPTEMBER",
612    "OCTOBER",
613    "NOVEMBER",
614    "DECEMBER",
615];
616
617const MONTHS_FIRST_CAPS: [&str; 12] = [
618    "January",
619    "February",
620    "March",
621    "April",
622    "May",
623    "June",
624    "July",
625    "August",
626    "September",
627    "October",
628    "November",
629    "December",
630];
631
632const MONTHS_NO_CAPS: [&str; 12] = [
633    "january",
634    "february",
635    "march",
636    "april",
637    "may",
638    "june",
639    "july",
640    "august",
641    "september",
642    "october",
643    "november",
644    "december",
645];
646
647const MONTHS_ABBREV_ALL_CAPS: [&str; 12] = [
648    "JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC",
649];
650
651const MONTHS_ABBREV_FIRST_CAPS: [&str; 12] = [
652    "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
653];
654
655const MONTHS_ABBREV_NO_CAPS: [&str; 12] = [
656    "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec",
657];
658
659const MONTHS_ROMAN_NO_CAPS: [&str; 12] = [
660    "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii",
661];
662
663const MONTHS_ROMAN_CAPS: [&str; 12] = [
664    "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", "XI", "XII",
665];
666
667impl DateTimeFormatNode {
668    fn render(&self, buf: &mut impl fmt::Write, ts: &impl TimestampLike) -> Result<(), fmt::Error> {
669        use WordCaps::*;
670        match self {
671            DateTimeFormatNode::Literal(ch) => buf.write_char(*ch),
672            DateTimeFormatNode::Field {
673                field,
674                fill,
675                ordinal,
676            } => {
677                macro_rules! write_num {
678                    ($n:expr, $width:expr) => {{
679                        write!(
680                            buf,
681                            "{:0width$}",
682                            $n,
683                            width = if *fill { $width } else { 0 }
684                        )?;
685                        ordinal.render(buf, $n)
686                    }};
687                    ($n:expr) => {
688                        write_num!($n, 0)
689                    };
690                }
691
692                macro_rules! write_str {
693                    ($s:expr, $width:expr) => {{ write!(buf, "{:width$}", $s, width = if *fill { $width } else { 0 }) }};
694                    ($s:expr) => {
695                        write_str!($s, 0)
696                    };
697                }
698
699                match field {
700                    DateTimeField::Era {
701                        dots: false,
702                        caps: true,
703                    } => write_str!(if ts.year_ce().0 { "AD" } else { "BC" }),
704                    DateTimeField::Era {
705                        dots: false,
706                        caps: false,
707                    } => write_str!(if ts.year_ce().0 { "ad" } else { "bc" }),
708                    DateTimeField::Era {
709                        dots: true,
710                        caps: true,
711                    } => write_str!(if ts.year_ce().0 { "A.D." } else { "B.C." }),
712                    DateTimeField::Era {
713                        dots: true,
714                        caps: false,
715                    } => write_str!(if ts.year_ce().0 { "a.d." } else { "b.c." }),
716                    DateTimeField::Meridiem {
717                        dots: false,
718                        caps: true,
719                    } => write_str!(if ts.hour12().0 { "PM" } else { "AM" }),
720                    DateTimeField::Meridiem {
721                        dots: false,
722                        caps: false,
723                    } => write_str!(if ts.hour12().0 { "pm" } else { "am" }),
724                    DateTimeField::Meridiem {
725                        dots: true,
726                        caps: true,
727                    } => write_str!(if ts.hour12().0 { "P.M." } else { "A.M." }),
728                    DateTimeField::Meridiem {
729                        dots: true,
730                        caps: false,
731                    } => write_str!(if ts.hour12().0 { "p.m." } else { "a.m." }),
732                    DateTimeField::Century => {
733                        let n = if ts.year() > 0 {
734                            (ts.year() - 1) / 100 + 1
735                        } else {
736                            ts.year() / 100 - 1
737                        };
738                        write_num!(n, if n >= 0 { 2 } else { 3 })
739                    }
740                    DateTimeField::DayOfWeek => write_num!(ts.weekday().number_from_sunday(), 1),
741                    DateTimeField::IsoDayOfWeek => write_num!(ts.weekday().number_from_monday(), 1),
742                    DateTimeField::DayName {
743                        abbrev: false,
744                        caps: AllCaps,
745                    } => write_str!(WEEKDAYS_ALL_CAPS[ts.weekday0()], 9),
746                    DateTimeField::DayName {
747                        abbrev: false,
748                        caps: FirstCaps,
749                    } => write_str!(WEEKDAYS_FIRST_CAPS[ts.weekday0()], 9),
750                    DateTimeField::DayName {
751                        abbrev: false,
752                        caps: NoCaps,
753                    } => write_str!(WEEKDAYS_NO_CAPS[ts.weekday0()], 9),
754                    DateTimeField::DayName {
755                        abbrev: true,
756                        caps: AllCaps,
757                    } => write_str!(WEEKDAYS_ABBREV_ALL_CAPS[ts.weekday0()]),
758                    DateTimeField::DayName {
759                        abbrev: true,
760                        caps: FirstCaps,
761                    } => write_str!(WEEKDAYS_ABBREV_FIRST_CAPS[ts.weekday0()]),
762                    DateTimeField::DayName {
763                        abbrev: true,
764                        caps: NoCaps,
765                    } => write_str!(WEEKDAYS_ABBREV_NO_CAPS[ts.weekday0()]),
766                    DateTimeField::DayOfMonth => write_num!(ts.day(), 2),
767                    DateTimeField::DayOfYear => write_num!(ts.ordinal(), 3),
768                    DateTimeField::Hour12 => write_num!(ts.hour12().1, 2),
769                    DateTimeField::Hour24 => write_num!(ts.hour(), 2),
770                    DateTimeField::IsoYear1 => write_num!(ts.iso_year_ce() % 10, 1),
771                    DateTimeField::IsoYear2 => write_num!(ts.iso_year_ce() % 100, 2),
772                    DateTimeField::IsoYear3 => write_num!(ts.iso_year_ce() % 1000, 3),
773                    DateTimeField::IsoYear4 => write_num!(ts.iso_year_ce(), 4),
774                    DateTimeField::IsoDayOfYear => write_num!(
775                        ts.iso_week().week0() * 7 + ts.weekday().number_from_monday(),
776                        3
777                    ),
778                    DateTimeField::IsoWeekOfYear => write_num!(ts.iso_week().week(), 2),
779                    DateTimeField::JulianDay => write_num!(ts.num_days_from_ce() + 1_721_425),
780                    DateTimeField::Minute => write_num!(ts.minute(), 2),
781                    DateTimeField::MonthOfYear => write_num!(ts.month(), 2),
782                    DateTimeField::MonthName {
783                        abbrev: true,
784                        caps: AllCaps,
785                    } => write_str!(MONTHS_ABBREV_ALL_CAPS[usize::cast_from(ts.month0())]),
786                    DateTimeField::MonthName {
787                        abbrev: true,
788                        caps: FirstCaps,
789                    } => write_str!(MONTHS_ABBREV_FIRST_CAPS[usize::cast_from(ts.month0())]),
790                    DateTimeField::MonthName {
791                        abbrev: true,
792                        caps: NoCaps,
793                    } => write_str!(MONTHS_ABBREV_NO_CAPS[usize::cast_from(ts.month0())]),
794                    DateTimeField::MonthName {
795                        abbrev: false,
796                        caps: AllCaps,
797                    } => write_str!(MONTHS_ALL_CAPS[usize::cast_from(ts.month0())], 9),
798                    DateTimeField::MonthName {
799                        abbrev: false,
800                        caps: FirstCaps,
801                    } => write_str!(MONTHS_FIRST_CAPS[usize::cast_from(ts.month0())], 9),
802                    DateTimeField::MonthName {
803                        abbrev: false,
804                        caps: NoCaps,
805                    } => write_str!(MONTHS_NO_CAPS[usize::cast_from(ts.month0())], 9),
806                    DateTimeField::Millisecond => write_num!(ts.nanosecond() / 1_000_000, 3),
807                    DateTimeField::Quarter => write_num!(ts.month0() / 3 + 1),
808                    DateTimeField::MonthInRomanNumerals { caps: true } => {
809                        write_str!(MONTHS_ROMAN_CAPS[usize::cast_from(ts.month0())], 4)
810                    }
811                    DateTimeField::MonthInRomanNumerals { caps: false } => {
812                        write_str!(MONTHS_ROMAN_NO_CAPS[usize::cast_from(ts.month0())], 4)
813                    }
814                    DateTimeField::Second => write_num!(ts.second(), 2),
815                    DateTimeField::SecondsPastMidnight => {
816                        write_num!(ts.num_seconds_from_midnight())
817                    }
818                    DateTimeField::Timezone { caps } => write_str!(ts.timezone_name(*caps)),
819                    DateTimeField::TimezoneOffset => write_str!(ts.timezone_offset()),
820                    DateTimeField::TimezoneHours => write_str!(ts.timezone_hours()),
821                    DateTimeField::TimezoneMinutes => write_str!(ts.timezone_minutes()),
822                    DateTimeField::Microsecond => write_num!(ts.nanosecond() / 1_000, 6),
823                    DateTimeField::WeekOfMonth => write_num!(ts.day0() / 7 + 1, 1),
824                    DateTimeField::WeekOfYear => write_num!(ts.ordinal0() / 7 + 1, 2),
825                    DateTimeField::Year1 => write_num!(ts.year_ce().1 % 10, 1),
826                    DateTimeField::Year2 => write_num!(ts.year_ce().1 % 100, 2),
827                    DateTimeField::Year3 => write_num!(ts.year_ce().1 % 1000, 3),
828                    DateTimeField::Year4 { separator: false } => write_num!(ts.year_ce().1, 4),
829                    DateTimeField::Year4 { separator: true } => {
830                        let n = ts.year_ce().1;
831                        write!(buf, "{},{:03}", n / 1000, n % 1000)?;
832                        ordinal.render(buf, n)
833                    }
834                }
835            }
836        }
837    }
838}
839
840/// A compiled date-time format string.
841#[derive(
842    Clone,
843    Debug,
844    PartialEq,
845    Eq,
846    PartialOrd,
847    Ord,
848    Hash,
849    Serialize,
850    Deserialize,
851    MzReflect
852)]
853pub struct DateTimeFormat(Vec<DateTimeFormatNode>);
854
855impl DateTimeFormat {
856    /// Compiles a new `DateTimeFormat` from the input string `s`.
857    pub fn compile(s: &str) -> DateTimeFormat {
858        // The approach here uses the Aho-Corasick string searching algorithm to
859        // repeatedly and efficiently find the next token of interest. Tokens of
860        // interest are typically field specifiers, like "DDDD", or field
861        // modifiers, like "FM". Characters in between tokens of interest are
862        // recorded as literals. We also consider a double quote a token of
863        // interest, as a double quote disables matching of field
864        // specifiers/modifiers until the next double quote.
865
866        struct Match {
867            start: usize,
868            end: usize,
869            token: DateTimeToken,
870        }
871
872        let matcher = AhoCorasickBuilder::new()
873            .match_kind(aho_corasick::MatchKind::LeftmostLongest)
874            .build(DateTimeToken::patterns())
875            .unwrap_or_else(|e| panic!("automaton build error: {e}"));
876
877        let matches: Vec<_> = matcher
878            .find_iter(&s)
879            .map(|m| Match {
880                start: m.start(),
881                end: m.end(),
882                token: DateTimeToken::try_from(
883                    u8::try_from(m.pattern().as_u32()).expect("match index fits in a u8"),
884                )
885                .expect("match pattern missing"),
886            })
887            .collect();
888
889        let mut out = Vec::new();
890        let mut pos = 0;
891        let mut in_quotes = false;
892        for i in 0..matches.len() {
893            // Any characters since the last match are to be taken literally.
894            for c in s[pos..matches[i].start].chars() {
895                if !(in_quotes && c == '\\') {
896                    // Backslash is an escape character inside of quotes.
897                    out.push(DateTimeFormatNode::Literal(c));
898                }
899            }
900
901            if in_quotes {
902                // If we see a format specifier inside of a quoted block, it
903                // is taken literally.
904                for c in matches[i].token.as_literal().chars() {
905                    out.push(DateTimeFormatNode::Literal(c))
906                }
907            } else if let Some(field) = matches[i].token.field() {
908                // We found a format specifier. Look backwards for a fill mode
909                // toggle (fill mode is on by default), and forwards for an
910                // ordinal suffix specifier (default is no ordinal suffix).
911                let fill = i == 0
912                    || matches[i - 1].end != matches[i].start
913                    || !matches[i - 1].token.is_fill_mode_toggle();
914                let ordinal = match matches.get(i + 1) {
915                    Some(m) if m.start == matches[i].end => m.token.ordinal_mode(),
916                    _ => OrdinalMode::None,
917                };
918                out.push(DateTimeFormatNode::Field {
919                    field,
920                    fill,
921                    ordinal,
922                });
923            }
924
925            if matches[i].token == DateTimeToken::Quote {
926                in_quotes = !in_quotes;
927            }
928            pos = matches[i].end;
929        }
930        for c in s[pos..].chars() {
931            out.push(DateTimeFormatNode::Literal(c));
932        }
933        DateTimeFormat(out)
934    }
935
936    /// Renders the format string using the timestamp `ts` as the input. The
937    /// placeholders in the format string will be filled in appropriately
938    /// according to the value of `ts`.
939    pub fn render(&self, ts: &impl TimestampLike) -> String {
940        let mut out = String::new();
941        for node in &self.0 {
942            node.render(&mut out, ts)
943                .expect("rendering to string cannot fail");
944        }
945        out
946    }
947}