mz_expr/scalar/func/
format.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Data type formatting functions.
11//!
12//! <https://www.postgresql.org/docs/current/functions-formatting.html>
13
14#![allow(non_camel_case_types)]
15
16use std::fmt;
17
18use aho_corasick::AhoCorasickBuilder;
19use enum_iterator::Sequence;
20use mz_lowertest::MzReflect;
21use mz_ore::cast::CastFrom;
22use mz_proto::{IntoRustIfSome, ProtoType, RustType, TryFromProtoError};
23use num_enum::{IntoPrimitive, TryFromPrimitive};
24use serde::{Deserialize, Serialize};
25
26use crate::scalar::func::TimestampLike;
27
28include!(concat!(env!("OUT_DIR"), "/mz_expr.scalar.func.format.rs"));
29
30/// The raw tokens that can appear in a format string. Many of these tokens
31/// overlap, in which case the longest matching token should be selected.
32#[repr(u8)]
33#[derive(Eq, PartialEq, TryFromPrimitive, IntoPrimitive, Sequence)]
34enum DateTimeToken {
35    a_d,
36    A_D,
37    a_m,
38    A_M,
39    ad,
40    AD,
41    am,
42    AM,
43    b_c,
44    B_C,
45    bc,
46    BC,
47    cc,
48    CC,
49    d,
50    D,
51    day,
52    Day,
53    DAY,
54    dd,
55    DD,
56    ddd,
57    DDD,
58    dy,
59    Dy,
60    DY,
61    fm,
62    FM,
63    fx,
64    FX,
65    hh,
66    HH,
67    hh12,
68    HH12,
69    hh24,
70    HH24,
71    i,
72    I,
73    id,
74    ID,
75    iddd,
76    IDDD,
77    iw,
78    IW,
79    iy,
80    IY,
81    iyy,
82    IYY,
83    iyyy,
84    IYYY,
85    j,
86    J,
87    mi,
88    MI,
89    mm,
90    MM,
91    mon,
92    Mon,
93    MON,
94    month,
95    Month,
96    MONTH,
97    ms,
98    MS,
99    OF,
100    p_m,
101    P_M,
102    pm,
103    PM,
104    q,
105    Q,
106    rm,
107    RM,
108    ss,
109    SS,
110    ssss,
111    SSSS,
112    sssss,
113    SSSSS,
114    tz,
115    TZ,
116    TZH,
117    TZM,
118    us,
119    US,
120    w,
121    W,
122    ww,
123    WW,
124    y_yyy,
125    Y_YYY,
126    y,
127    Y,
128    yy,
129    YY,
130    yyy,
131    YYY,
132    yyyy,
133    YYYY,
134    th,
135    TH,
136    EscQuote,
137    Quote,
138}
139
140impl DateTimeToken {
141    /// Returns the literal sequence of characters that this `DateTimeToken`
142    /// matches.
143    const fn pattern(&self) -> &'static str {
144        match self {
145            DateTimeToken::AD => "AD",
146            DateTimeToken::ad => "ad",
147            DateTimeToken::A_D => "A.D.",
148            DateTimeToken::a_d => "a.d.",
149            DateTimeToken::AM => "AM",
150            DateTimeToken::am => "am",
151            DateTimeToken::A_M => "A.M.",
152            DateTimeToken::a_m => "a.m.",
153            DateTimeToken::BC => "BC",
154            DateTimeToken::bc => "bc",
155            DateTimeToken::B_C => "B.C.",
156            DateTimeToken::b_c => "b.c.",
157            DateTimeToken::CC => "CC",
158            DateTimeToken::cc => "cc",
159            DateTimeToken::D => "D",
160            DateTimeToken::d => "d",
161            DateTimeToken::DAY => "DAY",
162            DateTimeToken::Day => "Day",
163            DateTimeToken::day => "day",
164            DateTimeToken::DD => "DD",
165            DateTimeToken::dd => "dd",
166            DateTimeToken::DDD => "DDD",
167            DateTimeToken::ddd => "ddd",
168            DateTimeToken::DY => "DY",
169            DateTimeToken::Dy => "Dy",
170            DateTimeToken::dy => "dy",
171            DateTimeToken::FM => "FM",
172            DateTimeToken::fm => "fm",
173            DateTimeToken::FX => "FX",
174            DateTimeToken::fx => "fx",
175            DateTimeToken::HH => "HH",
176            DateTimeToken::hh => "hh",
177            DateTimeToken::HH12 => "HH12",
178            DateTimeToken::hh12 => "hh12",
179            DateTimeToken::HH24 => "HH24",
180            DateTimeToken::hh24 => "hh24",
181            DateTimeToken::I => "I",
182            DateTimeToken::i => "i",
183            DateTimeToken::ID => "ID",
184            DateTimeToken::id => "id",
185            DateTimeToken::IDDD => "IDDD",
186            DateTimeToken::iddd => "iddd",
187            DateTimeToken::IW => "IW",
188            DateTimeToken::iw => "iw",
189            DateTimeToken::IY => "IY",
190            DateTimeToken::iy => "iy",
191            DateTimeToken::IYY => "IYY",
192            DateTimeToken::iyy => "iyy",
193            DateTimeToken::IYYY => "IYYY",
194            DateTimeToken::iyyy => "iyyy",
195            DateTimeToken::J => "J",
196            DateTimeToken::j => "j",
197            DateTimeToken::MI => "MI",
198            DateTimeToken::mi => "mi",
199            DateTimeToken::MM => "MM",
200            DateTimeToken::mm => "mm",
201            DateTimeToken::MON => "MON",
202            DateTimeToken::Mon => "Mon",
203            DateTimeToken::mon => "mon",
204            DateTimeToken::MONTH => "MONTH",
205            DateTimeToken::Month => "Month",
206            DateTimeToken::month => "month",
207            DateTimeToken::MS => "MS",
208            DateTimeToken::ms => "ms",
209            DateTimeToken::OF => "OF",
210            DateTimeToken::PM => "PM",
211            DateTimeToken::pm => "pm",
212            DateTimeToken::P_M => "P.M.",
213            DateTimeToken::p_m => "p.m.",
214            DateTimeToken::Q => "Q",
215            DateTimeToken::q => "q",
216            DateTimeToken::rm => "rm",
217            DateTimeToken::RM => "RM",
218            DateTimeToken::SS => "ss",
219            DateTimeToken::ss => "SS",
220            DateTimeToken::SSSS => "SSSS",
221            DateTimeToken::ssss => "ssss",
222            DateTimeToken::SSSSS => "SSSSS",
223            DateTimeToken::sssss => "sssss",
224            DateTimeToken::TZ => "TZ",
225            DateTimeToken::tz => "tz",
226            DateTimeToken::TZH => "TZH",
227            DateTimeToken::TZM => "TZM",
228            DateTimeToken::US => "US",
229            DateTimeToken::us => "us",
230            DateTimeToken::W => "W",
231            DateTimeToken::w => "w",
232            DateTimeToken::WW => "ww",
233            DateTimeToken::ww => "WW",
234            DateTimeToken::Y => "Y",
235            DateTimeToken::y => "y",
236            DateTimeToken::Y_YYY => "Y,YYY",
237            DateTimeToken::y_yyy => "y,yyy",
238            DateTimeToken::YY => "YY",
239            DateTimeToken::yy => "yy",
240            DateTimeToken::YYY => "YYY",
241            DateTimeToken::yyy => "yyy",
242            DateTimeToken::YYYY => "YYYY",
243            DateTimeToken::yyyy => "yyyy",
244            DateTimeToken::Quote => "\"",
245            DateTimeToken::EscQuote => "\\\"",
246            DateTimeToken::TH => "TH",
247            DateTimeToken::th => "th",
248        }
249    }
250
251    /// Returns the list of all known patterns, in the same order as the enum
252    /// variants.
253    fn patterns() -> Vec<&'static str> {
254        enum_iterator::all::<Self>().map(|v| v.pattern()).collect()
255    }
256
257    /// Returns the `DateTimeField` associated with this token, if any.
258    ///
259    /// Some tokens do not correspond directly to a field, but instead modify
260    /// other fields.
261    fn field(&self) -> Option<DateTimeField> {
262        use DateTimeToken::*;
263        use WordCaps::*;
264        match self {
265            AD | BC => Some(DateTimeField::Era {
266                dots: false,
267                caps: true,
268            }),
269            ad | bc => Some(DateTimeField::Era {
270                dots: false,
271                caps: false,
272            }),
273            A_D | B_C => Some(DateTimeField::Era {
274                dots: true,
275                caps: true,
276            }),
277            a_d | b_c => Some(DateTimeField::Era {
278                dots: true,
279                caps: false,
280            }),
281            AM | PM => Some(DateTimeField::Meridiem {
282                dots: false,
283                caps: true,
284            }),
285            am | pm => Some(DateTimeField::Meridiem {
286                dots: false,
287                caps: false,
288            }),
289            A_M | P_M => Some(DateTimeField::Meridiem {
290                dots: true,
291                caps: true,
292            }),
293            a_m | p_m => Some(DateTimeField::Meridiem {
294                dots: true,
295                caps: false,
296            }),
297            cc | CC => Some(DateTimeField::Century),
298            d | D => Some(DateTimeField::DayOfWeek),
299            day => Some(DateTimeField::DayName {
300                abbrev: false,
301                caps: NoCaps,
302            }),
303            Day => Some(DateTimeField::DayName {
304                abbrev: false,
305                caps: FirstCaps,
306            }),
307            DAY => Some(DateTimeField::DayName {
308                abbrev: false,
309                caps: AllCaps,
310            }),
311            dy => Some(DateTimeField::DayName {
312                abbrev: true,
313                caps: NoCaps,
314            }),
315            Dy => Some(DateTimeField::DayName {
316                abbrev: true,
317                caps: FirstCaps,
318            }),
319            DY => Some(DateTimeField::DayName {
320                abbrev: true,
321                caps: AllCaps,
322            }),
323            dd | DD => Some(DateTimeField::DayOfMonth),
324            ddd | DDD => Some(DateTimeField::DayOfYear),
325            fm | FM | fx | FX | th | TH | Quote | EscQuote => None,
326            hh | HH | hh12 | HH12 => Some(DateTimeField::Hour12),
327            hh24 | HH24 => Some(DateTimeField::Hour24),
328            id | ID => Some(DateTimeField::IsoDayOfWeek),
329            iddd | IDDD => Some(DateTimeField::IsoDayOfYear),
330            iw | IW => Some(DateTimeField::IsoWeekOfYear),
331            j | J => Some(DateTimeField::JulianDay),
332            mi | MI => Some(DateTimeField::Minute),
333            mm | MM => Some(DateTimeField::MonthOfYear),
334            mon => Some(DateTimeField::MonthName {
335                abbrev: true,
336                caps: NoCaps,
337            }),
338            Mon => Some(DateTimeField::MonthName {
339                abbrev: true,
340                caps: FirstCaps,
341            }),
342            MON => Some(DateTimeField::MonthName {
343                abbrev: true,
344                caps: AllCaps,
345            }),
346            month => Some(DateTimeField::MonthName {
347                abbrev: false,
348                caps: NoCaps,
349            }),
350            Month => Some(DateTimeField::MonthName {
351                abbrev: false,
352                caps: FirstCaps,
353            }),
354            MONTH => Some(DateTimeField::MonthName {
355                abbrev: false,
356                caps: AllCaps,
357            }),
358            ms | MS => Some(DateTimeField::Millisecond),
359            OF => Some(DateTimeField::TimezoneOffset),
360            q | Q => Some(DateTimeField::Quarter),
361            rm => Some(DateTimeField::MonthInRomanNumerals { caps: false }),
362            RM => Some(DateTimeField::MonthInRomanNumerals { caps: true }),
363            ss | SS => Some(DateTimeField::Second),
364            ssss | SSSS | sssss | SSSSS => Some(DateTimeField::SecondsPastMidnight),
365            tz => Some(DateTimeField::Timezone { caps: false }),
366            TZ => Some(DateTimeField::Timezone { caps: true }),
367            TZH => Some(DateTimeField::TimezoneHours),
368            TZM => Some(DateTimeField::TimezoneMinutes),
369            us | US => Some(DateTimeField::Microsecond),
370            w | W => Some(DateTimeField::WeekOfMonth),
371            ww | WW => Some(DateTimeField::WeekOfYear),
372            y | Y => Some(DateTimeField::Year1),
373            yy | YY => Some(DateTimeField::Year2),
374            yyy | YYY => Some(DateTimeField::Year3),
375            yyyy | YYYY => Some(DateTimeField::Year4 { separator: false }),
376            y_yyy | Y_YYY => Some(DateTimeField::Year4 { separator: true }),
377            i | I => Some(DateTimeField::IsoYear1),
378            iy | IY => Some(DateTimeField::IsoYear2),
379            iyy | IYY => Some(DateTimeField::IsoYear3),
380            iyyy | IYYY => Some(DateTimeField::IsoYear4),
381        }
382    }
383
384    /// Returns how this token should be rendered if it appears within quotes.
385    /// This is usually the same string as the `pattern` method returns, but
386    /// not always.
387    fn as_literal(&self) -> &'static str {
388        match self {
389            DateTimeToken::Quote => "",
390            DateTimeToken::EscQuote => "\"",
391            _ => self.pattern(),
392        }
393    }
394
395    /// Returns whether this token is a fill mode toggle.
396    fn is_fill_mode_toggle(&self) -> bool {
397        matches!(self, DateTimeToken::fm | DateTimeToken::FM)
398    }
399
400    /// Returns how this token affects the ordinal mode, if at all.
401    fn ordinal_mode(&self) -> OrdinalMode {
402        match self {
403            DateTimeToken::th => OrdinalMode::Lower,
404            DateTimeToken::TH => OrdinalMode::Upper,
405            _ => OrdinalMode::None,
406        }
407    }
408}
409
410/// Specifies the ordinal suffix that should be attached to numeric fields.
411#[derive(
412    Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash, Serialize, Deserialize, MzReflect,
413)]
414enum OrdinalMode {
415    /// No ordinal suffix.
416    None,
417    /// A lowercase ordinal suffix.
418    Lower,
419    /// An uppercase ordinal suffix.
420    Upper,
421}
422
423impl OrdinalMode {
424    fn render(self, out: &mut impl fmt::Write, n: impl Into<i64>) -> Result<(), fmt::Error> {
425        let n = n.into();
426        // Numbers that end in teen always use "th" as their ordinal suffix.
427        // Otherwise the last digit determines the ordinal suffix.
428        let n = match n % 100 {
429            10..=19 => 0,
430            _ => n % 10,
431        };
432        match self {
433            OrdinalMode::None => Ok(()),
434            OrdinalMode::Lower => match n {
435                1 => out.write_str("st"),
436                2 => out.write_str("nd"),
437                3 => out.write_str("rd"),
438                _ => out.write_str("th"),
439            },
440            OrdinalMode::Upper => match n {
441                1 => out.write_str("ST"),
442                2 => out.write_str("ND"),
443                3 => out.write_str("RD"),
444                _ => out.write_str("TH"),
445            },
446        }
447    }
448}
449
450impl RustType<ProtoOrdinalMode> for OrdinalMode {
451    fn into_proto(&self) -> ProtoOrdinalMode {
452        use proto_ordinal_mode::*;
453
454        let kind = match self {
455            Self::None => Kind::None(()),
456            Self::Lower => Kind::Lower(()),
457            Self::Upper => Kind::Upper(()),
458        };
459        ProtoOrdinalMode { kind: Some(kind) }
460    }
461
462    fn from_proto(proto: ProtoOrdinalMode) -> Result<Self, TryFromProtoError> {
463        use proto_ordinal_mode::*;
464
465        let kind = proto
466            .kind
467            .ok_or_else(|| TryFromProtoError::missing_field("ProtoOrdinalMode::kind"))?;
468        let x = match kind {
469            Kind::None(()) => Self::None,
470            Kind::Lower(()) => Self::Lower,
471            Kind::Upper(()) => Self::Upper,
472        };
473        Ok(x)
474    }
475}
476
477/// Specifies the capitalization of a word.
478#[allow(clippy::enum_variant_names)] // Having "Caps" in the variant names is clarifying.
479#[derive(
480    Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash, Serialize, Deserialize, MzReflect,
481)]
482enum WordCaps {
483    /// All of the letters should be capitalized.
484    AllCaps,
485    /// Only the first letter should be capitalized.
486    FirstCaps,
487    /// None of the letters should be capitalized.
488    NoCaps,
489}
490
491impl RustType<ProtoWordCaps> for WordCaps {
492    fn into_proto(&self) -> ProtoWordCaps {
493        use proto_word_caps::*;
494
495        let kind = match self {
496            Self::AllCaps => Kind::AllCaps(()),
497            Self::FirstCaps => Kind::FirstCaps(()),
498            Self::NoCaps => Kind::NoCaps(()),
499        };
500        ProtoWordCaps { kind: Some(kind) }
501    }
502
503    fn from_proto(proto: ProtoWordCaps) -> Result<Self, TryFromProtoError> {
504        use proto_word_caps::*;
505
506        let kind = proto
507            .kind
508            .ok_or_else(|| TryFromProtoError::missing_field("ProtoWordCaps::kind"))?;
509        let x = match kind {
510            Kind::AllCaps(()) => Self::AllCaps,
511            Kind::FirstCaps(()) => Self::FirstCaps,
512            Kind::NoCaps(()) => Self::NoCaps,
513        };
514        Ok(x)
515    }
516}
517
518/// A date-time field.
519///
520/// The variants are largely self-evident, but are described in detail in the
521/// PostgreSQL documentation if necessary.
522#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Clone, Hash, Serialize, Deserialize, MzReflect)]
523enum DateTimeField {
524    Hour12,
525    Hour24,
526    Minute,
527    Second,
528    Millisecond,
529    Microsecond,
530    SecondsPastMidnight,
531    Meridiem { dots: bool, caps: bool },
532    Year1,
533    Year2,
534    Year3,
535    Year4 { separator: bool },
536    IsoYear1,
537    IsoYear2,
538    IsoYear3,
539    IsoYear4,
540    Era { dots: bool, caps: bool },
541    MonthName { abbrev: bool, caps: WordCaps },
542    MonthOfYear,
543    DayName { abbrev: bool, caps: WordCaps },
544    DayOfWeek,
545    IsoDayOfWeek,
546    DayOfMonth,
547    DayOfYear,
548    IsoDayOfYear,
549    WeekOfMonth,
550    WeekOfYear,
551    IsoWeekOfYear,
552    Century,
553    JulianDay,
554    Quarter,
555    MonthInRomanNumerals { caps: bool },
556    Timezone { caps: bool },
557    TimezoneHours,
558    TimezoneMinutes,
559    TimezoneOffset,
560}
561
562impl RustType<ProtoDateTimeField> for DateTimeField {
563    fn into_proto(&self) -> ProtoDateTimeField {
564        use proto_date_time_field::*;
565
566        let kind = match self {
567            Self::Hour12 => Kind::Hour12(()),
568            Self::Hour24 => Kind::Hour24(()),
569            Self::Minute => Kind::Minute(()),
570            Self::Second => Kind::Second(()),
571            Self::Millisecond => Kind::Millisecond(()),
572            Self::Microsecond => Kind::Microsecond(()),
573            Self::SecondsPastMidnight => Kind::SecondsPastMidnight(()),
574            Self::Meridiem { dots, caps } => Kind::Meridiem(ProtoIndicator {
575                dots: dots.into_proto(),
576                caps: caps.into_proto(),
577            }),
578            Self::Year1 => Kind::Year1(()),
579            Self::Year2 => Kind::Year2(()),
580            Self::Year3 => Kind::Year3(()),
581            Self::Year4 { separator } => Kind::Year4(separator.into_proto()),
582            Self::IsoYear1 => Kind::IsoYear1(()),
583            Self::IsoYear2 => Kind::IsoYear2(()),
584            Self::IsoYear3 => Kind::IsoYear3(()),
585            Self::IsoYear4 => Kind::IsoYear4(()),
586            Self::Era { dots, caps } => Kind::Era(ProtoIndicator {
587                dots: dots.into_proto(),
588                caps: caps.into_proto(),
589            }),
590            Self::MonthName { abbrev, caps } => Kind::MonthName(ProtoName {
591                abbrev: abbrev.into_proto(),
592                caps: Some(caps.into_proto()),
593            }),
594            Self::MonthOfYear => Kind::MonthOfYear(()),
595            Self::DayName { abbrev, caps } => Kind::DayName(ProtoName {
596                abbrev: abbrev.into_proto(),
597                caps: Some(caps.into_proto()),
598            }),
599            Self::DayOfWeek => Kind::DayOfWeek(()),
600            Self::IsoDayOfWeek => Kind::IsoDayOfWeek(()),
601            Self::DayOfMonth => Kind::DayOfMonth(()),
602            Self::DayOfYear => Kind::DayOfYear(()),
603            Self::IsoDayOfYear => Kind::IsoDayOfYear(()),
604            Self::WeekOfMonth => Kind::WeekOfMonth(()),
605            Self::WeekOfYear => Kind::WeekOfYear(()),
606            Self::IsoWeekOfYear => Kind::IsoWeekOfYear(()),
607            Self::Century => Kind::Century(()),
608            Self::JulianDay => Kind::JulianDay(()),
609            Self::Quarter => Kind::Quarter(()),
610            Self::MonthInRomanNumerals { caps } => Kind::MonthInRomanNumerals(caps.into_proto()),
611            Self::Timezone { caps } => Kind::Timezone(caps.into_proto()),
612            Self::TimezoneHours => Kind::TimezoneHours(()),
613            Self::TimezoneMinutes => Kind::TimezoneMinutes(()),
614            Self::TimezoneOffset => Kind::TimezoneOffset(()),
615        };
616        ProtoDateTimeField { kind: Some(kind) }
617    }
618
619    fn from_proto(proto: ProtoDateTimeField) -> Result<Self, TryFromProtoError> {
620        use proto_date_time_field::*;
621
622        let kind = proto
623            .kind
624            .ok_or_else(|| TryFromProtoError::missing_field("ProtoDateTimeField::kind"))?;
625        let x = match kind {
626            Kind::Hour12(()) => Self::Hour12,
627            Kind::Hour24(()) => Self::Hour24,
628            Kind::Minute(()) => Self::Minute,
629            Kind::Second(()) => Self::Second,
630            Kind::Millisecond(()) => Self::Millisecond,
631            Kind::Microsecond(()) => Self::Microsecond,
632            Kind::SecondsPastMidnight(()) => Self::SecondsPastMidnight,
633            Kind::Meridiem(x) => Self::Meridiem {
634                dots: x.dots.into_rust()?,
635                caps: x.caps.into_rust()?,
636            },
637            Kind::Year1(()) => Self::Year1,
638            Kind::Year2(()) => Self::Year2,
639            Kind::Year3(()) => Self::Year3,
640            Kind::Year4(x) => Self::Year4 {
641                separator: x.into_rust()?,
642            },
643            Kind::IsoYear1(()) => Self::IsoYear1,
644            Kind::IsoYear2(()) => Self::IsoYear2,
645            Kind::IsoYear3(()) => Self::IsoYear3,
646            Kind::IsoYear4(()) => Self::IsoYear4,
647            Kind::Era(x) => Self::Era {
648                dots: x.dots.into_rust()?,
649                caps: x.caps.into_rust()?,
650            },
651            Kind::MonthName(x) => Self::MonthName {
652                abbrev: x.abbrev.into_rust()?,
653                caps: x.caps.into_rust_if_some("ProtoName::caps")?,
654            },
655            Kind::MonthOfYear(()) => Self::MonthOfYear,
656            Kind::DayName(x) => Self::DayName {
657                abbrev: x.abbrev.into_rust()?,
658                caps: x.caps.into_rust_if_some("ProtoName::caps")?,
659            },
660            Kind::DayOfWeek(()) => Self::DayOfWeek,
661            Kind::IsoDayOfWeek(()) => Self::IsoDayOfWeek,
662            Kind::DayOfMonth(()) => Self::DayOfMonth,
663            Kind::DayOfYear(()) => Self::DayOfYear,
664            Kind::IsoDayOfYear(()) => Self::IsoDayOfYear,
665            Kind::WeekOfMonth(()) => Self::WeekOfMonth,
666            Kind::WeekOfYear(()) => Self::WeekOfYear,
667            Kind::IsoWeekOfYear(()) => Self::IsoWeekOfYear,
668            Kind::Century(()) => Self::Century,
669            Kind::JulianDay(()) => Self::JulianDay,
670            Kind::Quarter(()) => Self::Quarter,
671            Kind::MonthInRomanNumerals(x) => Self::MonthInRomanNumerals {
672                caps: x.into_rust()?,
673            },
674            Kind::Timezone(x) => Self::Timezone {
675                caps: x.into_rust()?,
676            },
677            Kind::TimezoneHours(()) => Self::TimezoneHours,
678            Kind::TimezoneMinutes(()) => Self::TimezoneMinutes,
679            Kind::TimezoneOffset(()) => Self::TimezoneOffset,
680        };
681        Ok(x)
682    }
683}
684
685/// An element of a date-time format string.
686#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, MzReflect)]
687enum DateTimeFormatNode {
688    /// A field whose value will be computed from the input timestamp.
689    Field {
690        /// The inner field.
691        field: DateTimeField,
692        /// Whether the field should be padded with spaces to its maximum width.
693        /// Does not have an effect for all fields, as the width of some fields
694        /// is unknowable.
695        fill: bool,
696        /// Whether the field should be followed with an ordinal suffix, like
697        /// "th." Only meaningful for numeric fields.
698        ordinal: OrdinalMode,
699    },
700    /// A literal character.
701    Literal(char),
702}
703
704impl RustType<ProtoDateTimeFormatNode> for DateTimeFormatNode {
705    fn into_proto(&self) -> ProtoDateTimeFormatNode {
706        use proto_date_time_format_node::*;
707
708        let kind = match self {
709            Self::Field {
710                field,
711                fill,
712                ordinal,
713            } => Kind::Field(ProtoField {
714                field: Some(field.into_proto()),
715                fill: fill.into_proto(),
716                ordinal: Some(ordinal.into_proto()),
717            }),
718            Self::Literal(c) => Kind::Literal(c.into_proto()),
719        };
720        ProtoDateTimeFormatNode { kind: Some(kind) }
721    }
722
723    fn from_proto(proto: ProtoDateTimeFormatNode) -> Result<Self, TryFromProtoError> {
724        use proto_date_time_format_node::*;
725
726        let kind = proto
727            .kind
728            .ok_or_else(|| TryFromProtoError::missing_field("ProtoDateTimeFormatNode::kind"))?;
729        let x = match kind {
730            Kind::Field(x) => Self::Field {
731                field: x.field.into_rust_if_some("ProtoField::field")?,
732                fill: x.fill.into_rust()?,
733                ordinal: x.ordinal.into_rust_if_some("ProtoField::ordinal")?,
734            },
735            Kind::Literal(x) => Self::Literal(x.into_rust()?),
736        };
737        Ok(x)
738    }
739}
740
741const WEEKDAYS_ALL_CAPS: [&str; 7] = [
742    "SUNDAY",
743    "MONDAY",
744    "TUESDAY",
745    "WEDNESDAY",
746    "THURSDAY",
747    "FRIDAY",
748    "SATURDAY",
749];
750
751const WEEKDAYS_FIRST_CAPS: [&str; 7] = [
752    "Sunday",
753    "Monday",
754    "Tuesday",
755    "Wednesday",
756    "Thursday",
757    "Friday",
758    "Saturday",
759];
760
761const WEEKDAYS_NO_CAPS: [&str; 7] = [
762    "sunday",
763    "monday",
764    "tuesday",
765    "wednesday",
766    "thursday",
767    "friday",
768    "saturday",
769];
770
771const WEEKDAYS_ABBREV_ALL_CAPS: [&str; 7] = ["SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"];
772
773const WEEKDAYS_ABBREV_FIRST_CAPS: [&str; 7] = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
774
775const WEEKDAYS_ABBREV_NO_CAPS: [&str; 7] = ["sun", "mon", "tue", "wed", "thu", "fri", "sat"];
776
777const MONTHS_ALL_CAPS: [&str; 12] = [
778    "JANUARY",
779    "FEBRUARY",
780    "MARCH",
781    "APRIL",
782    "MAY",
783    "JUNE",
784    "JULY",
785    "AUGUST",
786    "SEPTEMBER",
787    "OCTOBER",
788    "NOVEMBER",
789    "DECEMBER",
790];
791
792const MONTHS_FIRST_CAPS: [&str; 12] = [
793    "January",
794    "February",
795    "March",
796    "April",
797    "May",
798    "June",
799    "July",
800    "August",
801    "September",
802    "October",
803    "November",
804    "December",
805];
806
807const MONTHS_NO_CAPS: [&str; 12] = [
808    "january",
809    "february",
810    "march",
811    "april",
812    "may",
813    "june",
814    "july",
815    "august",
816    "september",
817    "october",
818    "november",
819    "december",
820];
821
822const MONTHS_ABBREV_ALL_CAPS: [&str; 12] = [
823    "JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC",
824];
825
826const MONTHS_ABBREV_FIRST_CAPS: [&str; 12] = [
827    "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
828];
829
830const MONTHS_ABBREV_NO_CAPS: [&str; 12] = [
831    "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec",
832];
833
834const MONTHS_ROMAN_NO_CAPS: [&str; 12] = [
835    "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii",
836];
837
838const MONTHS_ROMAN_CAPS: [&str; 12] = [
839    "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", "XI", "XII",
840];
841
842impl DateTimeFormatNode {
843    fn render(&self, buf: &mut impl fmt::Write, ts: &impl TimestampLike) -> Result<(), fmt::Error> {
844        use WordCaps::*;
845        match self {
846            DateTimeFormatNode::Literal(ch) => buf.write_char(*ch),
847            DateTimeFormatNode::Field {
848                field,
849                fill,
850                ordinal,
851            } => {
852                macro_rules! write_num {
853                    ($n:expr, $width:expr) => {{
854                        write!(
855                            buf,
856                            "{:0width$}",
857                            $n,
858                            width = if *fill { $width } else { 0 }
859                        )?;
860                        ordinal.render(buf, $n)
861                    }};
862                    ($n:expr) => {
863                        write_num!($n, 0)
864                    };
865                }
866
867                macro_rules! write_str {
868                    ($s:expr, $width:expr) => {{ write!(buf, "{:width$}", $s, width = if *fill { $width } else { 0 }) }};
869                    ($s:expr) => {
870                        write_str!($s, 0)
871                    };
872                }
873
874                match field {
875                    DateTimeField::Era {
876                        dots: false,
877                        caps: true,
878                    } => write_str!(if ts.year_ce().0 { "AD" } else { "BC" }),
879                    DateTimeField::Era {
880                        dots: false,
881                        caps: false,
882                    } => write_str!(if ts.year_ce().0 { "ad" } else { "bc" }),
883                    DateTimeField::Era {
884                        dots: true,
885                        caps: true,
886                    } => write_str!(if ts.year_ce().0 { "A.D." } else { "B.C." }),
887                    DateTimeField::Era {
888                        dots: true,
889                        caps: false,
890                    } => write_str!(if ts.year_ce().0 { "a.d." } else { "b.c." }),
891                    DateTimeField::Meridiem {
892                        dots: false,
893                        caps: true,
894                    } => write_str!(if ts.hour12().0 { "PM" } else { "AM" }),
895                    DateTimeField::Meridiem {
896                        dots: false,
897                        caps: false,
898                    } => write_str!(if ts.hour12().0 { "pm" } else { "am" }),
899                    DateTimeField::Meridiem {
900                        dots: true,
901                        caps: true,
902                    } => write_str!(if ts.hour12().0 { "P.M." } else { "A.M." }),
903                    DateTimeField::Meridiem {
904                        dots: true,
905                        caps: false,
906                    } => write_str!(if ts.hour12().0 { "p.m." } else { "a.m." }),
907                    DateTimeField::Century => {
908                        let n = if ts.year() > 0 {
909                            (ts.year() - 1) / 100 + 1
910                        } else {
911                            ts.year() / 100 - 1
912                        };
913                        write_num!(n, if n >= 0 { 2 } else { 3 })
914                    }
915                    DateTimeField::DayOfWeek => write_num!(ts.weekday().number_from_sunday(), 1),
916                    DateTimeField::IsoDayOfWeek => write_num!(ts.weekday().number_from_monday(), 1),
917                    DateTimeField::DayName {
918                        abbrev: false,
919                        caps: AllCaps,
920                    } => write_str!(WEEKDAYS_ALL_CAPS[ts.weekday0()], 9),
921                    DateTimeField::DayName {
922                        abbrev: false,
923                        caps: FirstCaps,
924                    } => write_str!(WEEKDAYS_FIRST_CAPS[ts.weekday0()], 9),
925                    DateTimeField::DayName {
926                        abbrev: false,
927                        caps: NoCaps,
928                    } => write_str!(WEEKDAYS_NO_CAPS[ts.weekday0()], 9),
929                    DateTimeField::DayName {
930                        abbrev: true,
931                        caps: AllCaps,
932                    } => write_str!(WEEKDAYS_ABBREV_ALL_CAPS[ts.weekday0()]),
933                    DateTimeField::DayName {
934                        abbrev: true,
935                        caps: FirstCaps,
936                    } => write_str!(WEEKDAYS_ABBREV_FIRST_CAPS[ts.weekday0()]),
937                    DateTimeField::DayName {
938                        abbrev: true,
939                        caps: NoCaps,
940                    } => write_str!(WEEKDAYS_ABBREV_NO_CAPS[ts.weekday0()]),
941                    DateTimeField::DayOfMonth => write_num!(ts.day(), 2),
942                    DateTimeField::DayOfYear => write_num!(ts.ordinal(), 3),
943                    DateTimeField::Hour12 => write_num!(ts.hour12().1, 2),
944                    DateTimeField::Hour24 => write_num!(ts.hour(), 2),
945                    DateTimeField::IsoYear1 => write_num!(ts.iso_year_ce() % 10, 1),
946                    DateTimeField::IsoYear2 => write_num!(ts.iso_year_ce() % 100, 2),
947                    DateTimeField::IsoYear3 => write_num!(ts.iso_year_ce() % 1000, 3),
948                    DateTimeField::IsoYear4 => write_num!(ts.iso_year_ce(), 4),
949                    DateTimeField::IsoDayOfYear => write_num!(
950                        ts.iso_week().week0() * 7 + ts.weekday().number_from_monday(),
951                        3
952                    ),
953                    DateTimeField::IsoWeekOfYear => write_num!(ts.iso_week().week(), 2),
954                    DateTimeField::JulianDay => write_num!(ts.num_days_from_ce() + 1_721_425),
955                    DateTimeField::Minute => write_num!(ts.minute(), 2),
956                    DateTimeField::MonthOfYear => write_num!(ts.month(), 2),
957                    DateTimeField::MonthName {
958                        abbrev: true,
959                        caps: AllCaps,
960                    } => write_str!(MONTHS_ABBREV_ALL_CAPS[usize::cast_from(ts.month0())]),
961                    DateTimeField::MonthName {
962                        abbrev: true,
963                        caps: FirstCaps,
964                    } => write_str!(MONTHS_ABBREV_FIRST_CAPS[usize::cast_from(ts.month0())]),
965                    DateTimeField::MonthName {
966                        abbrev: true,
967                        caps: NoCaps,
968                    } => write_str!(MONTHS_ABBREV_NO_CAPS[usize::cast_from(ts.month0())]),
969                    DateTimeField::MonthName {
970                        abbrev: false,
971                        caps: AllCaps,
972                    } => write_str!(MONTHS_ALL_CAPS[usize::cast_from(ts.month0())], 9),
973                    DateTimeField::MonthName {
974                        abbrev: false,
975                        caps: FirstCaps,
976                    } => write_str!(MONTHS_FIRST_CAPS[usize::cast_from(ts.month0())], 9),
977                    DateTimeField::MonthName {
978                        abbrev: false,
979                        caps: NoCaps,
980                    } => write_str!(MONTHS_NO_CAPS[usize::cast_from(ts.month0())], 9),
981                    DateTimeField::Millisecond => write_num!(ts.nanosecond() / 1_000_000, 3),
982                    DateTimeField::Quarter => write_num!(ts.month0() / 3 + 1),
983                    DateTimeField::MonthInRomanNumerals { caps: true } => {
984                        write_str!(MONTHS_ROMAN_CAPS[usize::cast_from(ts.month0())], 4)
985                    }
986                    DateTimeField::MonthInRomanNumerals { caps: false } => {
987                        write_str!(MONTHS_ROMAN_NO_CAPS[usize::cast_from(ts.month0())], 4)
988                    }
989                    DateTimeField::Second => write_num!(ts.second(), 2),
990                    DateTimeField::SecondsPastMidnight => {
991                        write_num!(ts.num_seconds_from_midnight())
992                    }
993                    DateTimeField::Timezone { caps } => write_str!(ts.timezone_name(*caps)),
994                    DateTimeField::TimezoneOffset => write_str!(ts.timezone_offset()),
995                    DateTimeField::TimezoneHours => write_str!(ts.timezone_hours()),
996                    DateTimeField::TimezoneMinutes => write_str!(ts.timezone_minutes()),
997                    DateTimeField::Microsecond => write_num!(ts.nanosecond() / 1_000, 6),
998                    DateTimeField::WeekOfMonth => write_num!(ts.day0() / 7 + 1, 1),
999                    DateTimeField::WeekOfYear => write_num!(ts.ordinal0() / 7 + 1, 2),
1000                    DateTimeField::Year1 => write_num!(ts.year_ce().1 % 10, 1),
1001                    DateTimeField::Year2 => write_num!(ts.year_ce().1 % 100, 2),
1002                    DateTimeField::Year3 => write_num!(ts.year_ce().1 % 1000, 3),
1003                    DateTimeField::Year4 { separator: false } => write_num!(ts.year_ce().1, 4),
1004                    DateTimeField::Year4 { separator: true } => {
1005                        let n = ts.year_ce().1;
1006                        write!(buf, "{},{:03}", n / 1000, n % 1000)?;
1007                        ordinal.render(buf, n)
1008                    }
1009                }
1010            }
1011        }
1012    }
1013}
1014
1015/// A compiled date-time format string.
1016#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, MzReflect)]
1017pub struct DateTimeFormat(Vec<DateTimeFormatNode>);
1018
1019impl DateTimeFormat {
1020    /// Compiles a new `DateTimeFormat` from the input string `s`.
1021    pub fn compile(s: &str) -> DateTimeFormat {
1022        // The approach here uses the Aho-Corasick string searching algorithm to
1023        // repeatedly and efficiently find the next token of interest. Tokens of
1024        // interest are typically field specifiers, like "DDDD", or field
1025        // modifiers, like "FM". Characters in between tokens of interest are
1026        // recorded as literals. We also consider a double quote a token of
1027        // interest, as a double quote disables matching of field
1028        // specifiers/modifiers until the next double quote.
1029
1030        struct Match {
1031            start: usize,
1032            end: usize,
1033            token: DateTimeToken,
1034        }
1035
1036        let matcher = AhoCorasickBuilder::new()
1037            .match_kind(aho_corasick::MatchKind::LeftmostLongest)
1038            .build(DateTimeToken::patterns())
1039            .unwrap_or_else(|e| panic!("automaton build error: {e}"));
1040
1041        let matches: Vec<_> = matcher
1042            .find_iter(&s)
1043            .map(|m| Match {
1044                start: m.start(),
1045                end: m.end(),
1046                token: DateTimeToken::try_from(
1047                    u8::try_from(m.pattern().as_u32()).expect("match index fits in a u8"),
1048                )
1049                .expect("match pattern missing"),
1050            })
1051            .collect();
1052
1053        let mut out = Vec::new();
1054        let mut pos = 0;
1055        let mut in_quotes = false;
1056        for i in 0..matches.len() {
1057            // Any characters since the last match are to be taken literally.
1058            for c in s[pos..matches[i].start].chars() {
1059                if !(in_quotes && c == '\\') {
1060                    // Backslash is an escape character inside of quotes.
1061                    out.push(DateTimeFormatNode::Literal(c));
1062                }
1063            }
1064
1065            if in_quotes {
1066                // If we see a format specifier inside of a quoted block, it
1067                // is taken literally.
1068                for c in matches[i].token.as_literal().chars() {
1069                    out.push(DateTimeFormatNode::Literal(c))
1070                }
1071            } else if let Some(field) = matches[i].token.field() {
1072                // We found a format specifier. Look backwards for a fill mode
1073                // toggle (fill mode is on by default), and forwards for an
1074                // ordinal suffix specifier (default is no ordinal suffix).
1075                let fill = i == 0
1076                    || matches[i - 1].end != matches[i].start
1077                    || !matches[i - 1].token.is_fill_mode_toggle();
1078                let ordinal = match matches.get(i + 1) {
1079                    Some(m) if m.start == matches[i].end => m.token.ordinal_mode(),
1080                    _ => OrdinalMode::None,
1081                };
1082                out.push(DateTimeFormatNode::Field {
1083                    field,
1084                    fill,
1085                    ordinal,
1086                });
1087            }
1088
1089            if matches[i].token == DateTimeToken::Quote {
1090                in_quotes = !in_quotes;
1091            }
1092            pos = matches[i].end;
1093        }
1094        for c in s[pos..].chars() {
1095            out.push(DateTimeFormatNode::Literal(c));
1096        }
1097        DateTimeFormat(out)
1098    }
1099
1100    /// Renders the format string using the timestamp `ts` as the input. The
1101    /// placeholders in the format string will be filled in appropriately
1102    /// according to the value of `ts`.
1103    pub fn render(&self, ts: &impl TimestampLike) -> String {
1104        let mut out = String::new();
1105        for node in &self.0 {
1106            node.render(&mut out, ts)
1107                .expect("rendering to string cannot fail");
1108        }
1109        out
1110    }
1111}
1112
1113impl RustType<ProtoDateTimeFormat> for DateTimeFormat {
1114    fn into_proto(&self) -> ProtoDateTimeFormat {
1115        ProtoDateTimeFormat {
1116            nodes: self.0.iter().map(RustType::into_proto).collect(),
1117        }
1118    }
1119
1120    fn from_proto(proto: ProtoDateTimeFormat) -> Result<Self, TryFromProtoError> {
1121        let nodes = proto
1122            .nodes
1123            .into_iter()
1124            .map(RustType::from_proto)
1125            .collect::<Result<_, _>>()?;
1126        Ok(Self(nodes))
1127    }
1128}