mz_pgtz/
timezone.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::cmp::Ordering;
11use std::fmt;
12
13use chrono::FixedOffset;
14use chrono_tz::Tz;
15use itertools::Itertools;
16use mz_lowertest::MzReflect;
17use mz_proto::chrono::{any_fixed_offset, any_timezone};
18use mz_proto::{RustType, TryFromProtoError};
19use proptest_derive::Arbitrary;
20use serde::{Deserialize, Serialize};
21use uncased::UncasedStr;
22
23use crate::abbrev::TIMEZONE_ABBREVS;
24
25include!(concat!(env!("OUT_DIR"), "/mz_pgtz.timezone.rs"));
26
27/// The SQL definition of the contents of the `mz_timezone_names` view.
28pub const MZ_CATALOG_TIMEZONE_NAMES_SQL: &str =
29    include_str!(concat!(env!("OUT_DIR"), "/timezone.gen.sql"));
30
31/// Parsed timezone.
32#[derive(Arbitrary, Debug, Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, MzReflect)]
33pub enum Timezone {
34    #[serde(with = "fixed_offset_serde")]
35    FixedOffset(#[proptest(strategy = "any_fixed_offset()")] FixedOffset),
36    Tz(#[proptest(strategy = "any_timezone()")] Tz),
37}
38
39impl Timezone {
40    pub fn parse(tz: &str, spec: TimezoneSpec) -> Result<Self, String> {
41        build_timezone_offset_second(&tokenize_timezone(tz)?, tz, spec)
42    }
43}
44
45impl RustType<ProtoTimezone> for Timezone {
46    fn into_proto(&self) -> ProtoTimezone {
47        use proto_timezone::Kind;
48        ProtoTimezone {
49            kind: Some(match self {
50                Timezone::FixedOffset(fo) => Kind::FixedOffset(fo.into_proto()),
51                Timezone::Tz(tz) => Kind::Tz(tz.into_proto()),
52            }),
53        }
54    }
55
56    fn from_proto(proto: ProtoTimezone) -> Result<Self, TryFromProtoError> {
57        use proto_timezone::Kind;
58        let kind = proto
59            .kind
60            .ok_or_else(|| TryFromProtoError::missing_field("ProtoTimezone::kind"))?;
61        Ok(match kind {
62            Kind::FixedOffset(pof) => Timezone::FixedOffset(FixedOffset::from_proto(pof)?),
63            Kind::Tz(ptz) => Timezone::Tz(Tz::from_proto(ptz)?),
64        })
65    }
66}
67
68// We need to implement Serialize and Deserialize traits to include Timezone in the UnaryFunc enum.
69// FixedOffset doesn't implement these, even with the "serde" feature enabled.
70mod fixed_offset_serde {
71    use serde::de::Error;
72    use serde::{Deserializer, Serializer};
73
74    use super::*;
75
76    pub fn deserialize<'de, D: Deserializer<'de>>(
77        deserializer: D,
78    ) -> Result<FixedOffset, D::Error> {
79        let offset = i32::deserialize(deserializer)?;
80        FixedOffset::east_opt(offset).ok_or_else(|| {
81            Error::custom(format!("Invalid timezone offset: |{}| >= 86_400", offset))
82        })
83    }
84
85    pub fn serialize<S: Serializer>(
86        offset: &FixedOffset,
87        serializer: S,
88    ) -> Result<S::Ok, S::Error> {
89        serializer.serialize_i32(offset.local_minus_utc())
90    }
91}
92
93impl PartialOrd for Timezone {
94    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
95        Some(self.cmp(other))
96    }
97}
98
99// We need to implement Ord and PartialOrd to include Timezone in the UnaryFunc enum. Neither FixedOffset nor Tz
100// implement these so we do a simple ordinal comparison (FixedOffset variant < Tz variant), and break ties using
101// i32/str comparisons respectively.
102impl Ord for Timezone {
103    fn cmp(&self, other: &Self) -> Ordering {
104        use Timezone::*;
105        match (self, other) {
106            (FixedOffset(a), FixedOffset(b)) => a.local_minus_utc().cmp(&b.local_minus_utc()),
107            (Tz(a), Tz(b)) => a.name().cmp(b.name()),
108            (FixedOffset(_), Tz(_)) => Ordering::Less,
109            (Tz(_), FixedOffset(_)) => Ordering::Greater,
110        }
111    }
112}
113
114impl Default for Timezone {
115    fn default() -> Self {
116        Self::FixedOffset(FixedOffset::east_opt(0).unwrap())
117    }
118}
119
120impl fmt::Display for Timezone {
121    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
122        match self {
123            Timezone::FixedOffset(offset) => offset.fmt(f),
124            Timezone::Tz(tz) => tz.fmt(f),
125        }
126    }
127}
128
129/// TimeStrToken represents valid tokens in time-like strings,
130/// i.e those used in INTERVAL, TIMESTAMP/TZ, DATE, and TIME.
131#[derive(Debug, Clone, PartialEq, Eq)]
132enum TimeStrToken {
133    Dash,
134    Colon,
135    Plus,
136    Zulu,
137    Num(u64, usize),
138    TzName(String),
139    Delim,
140}
141
142impl std::fmt::Display for TimeStrToken {
143    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
144        use TimeStrToken::*;
145        match self {
146            Dash => write!(f, "-"),
147            Colon => write!(f, ":"),
148            Plus => write!(f, "+"),
149            Zulu => write!(f, "Z"),
150            Num(i, digits) => write!(f, "{:01$}", i, digits - 1),
151            TzName(n) => write!(f, "{}", n),
152            Delim => write!(f, " "),
153        }
154    }
155}
156
157fn tokenize_timezone(value: &str) -> Result<Vec<TimeStrToken>, String> {
158    let mut toks: Vec<TimeStrToken> = vec![];
159    let mut num_buf = String::with_capacity(4);
160    // If the timezone string has a colon, we need to parse all numbers naively.
161    // Otherwise we need to parse long sequences of digits as [..hhhhmm]
162    let split_nums: bool = !value.contains(':');
163
164    let value = value.trim_matches(|c: char| {
165        (c.is_ascii_whitespace() || c.is_ascii_punctuation()) && (c != '+' && c != '-')
166    });
167
168    // Takes a string and tries to parse it as a number token and insert it into
169    // the token list
170    fn parse_num(
171        toks: &mut Vec<TimeStrToken>,
172        n: &str,
173        split_nums: bool,
174        idx: usize,
175    ) -> Result<(), String> {
176        if n.is_empty() {
177            return Ok(());
178        }
179
180        let (first, second) = if n.len() > 2 && split_nums {
181            let (first, second) = n.split_at(n.len() - 2);
182            (first, Some(second))
183        } else {
184            (n, None)
185        };
186
187        toks.push(TimeStrToken::Num(
188            first.parse().map_err(|e| {
189                format!(
190                    "Unable to tokenize value {} as a number at index {}: {}",
191                    first, idx, e
192                )
193            })?,
194            first.len(),
195        ));
196
197        if let Some(second) = second {
198            toks.push(TimeStrToken::Num(
199                second.parse().map_err(|e| {
200                    format!(
201                        "Unable to tokenize value {} as a number at index {}: {}",
202                        second, idx, e
203                    )
204                })?,
205                second.len(),
206            ));
207        }
208
209        Ok(())
210    }
211
212    // Toggles whether or not we should skip whitespace. This would be nicer to
213    // do inline but ownership makes that annoying.
214    let mut space_skip_mode = false;
215    for (i, chr) in value.char_indices() {
216        // Stay in space skip mode iff already in it and element is space.
217        if space_skip_mode && chr.is_ascii_whitespace() {
218            continue;
219        } else {
220            space_skip_mode = false;
221        }
222
223        match chr {
224            ':' => {
225                parse_num(&mut toks, &num_buf, split_nums, i)?;
226                num_buf.clear();
227                toks.push(TimeStrToken::Colon);
228            }
229            '-' => {
230                parse_num(&mut toks, &num_buf, split_nums, i)?;
231                num_buf.clear();
232                toks.push(TimeStrToken::Dash);
233                space_skip_mode = true;
234            }
235            '+' => {
236                parse_num(&mut toks, &num_buf, split_nums, i)?;
237                num_buf.clear();
238                toks.push(TimeStrToken::Plus);
239                space_skip_mode = true;
240            }
241            chr if (chr == 'z' || chr == 'Z') && (i == value.len() - 1) => {
242                parse_num(&mut toks, &num_buf, split_nums, i)?;
243                num_buf.clear();
244                toks.push(TimeStrToken::Zulu);
245            }
246            chr if chr.is_digit(10) => num_buf.push(chr),
247            chr if chr.is_ascii_alphabetic() => {
248                parse_num(&mut toks, &num_buf, split_nums, i)?;
249                let substring = &value[i..];
250                toks.push(TimeStrToken::TzName(substring.to_string()));
251                return Ok(toks);
252            }
253            // PG allows arbitrary punctuation marks, which represent delim
254            chr if chr.is_ascii_whitespace() || chr.is_ascii_punctuation() => {
255                parse_num(&mut toks, &num_buf, split_nums, i)?;
256                num_buf.clear();
257                toks.push(TimeStrToken::Delim);
258            }
259            chr => {
260                return Err(format!(
261                    "Error tokenizing timezone string ('{}'): invalid character {:?} at offset {}",
262                    value, chr, i
263                ));
264            }
265        }
266    }
267    parse_num(&mut toks, &num_buf, split_nums, 0)?;
268    Ok(toks)
269}
270
271#[derive(Debug, Clone, Copy)]
272pub enum TimezoneSpec {
273    /// Offsets should be treated as an ISO 8601 time zone specification.
274    Iso,
275    /// Offsets should be treated as a POSIX-style time zone specification.
276    Posix,
277}
278
279fn build_timezone_offset_second(
280    tokens: &[TimeStrToken],
281    value: &str,
282    spec: TimezoneSpec,
283) -> Result<Timezone, String> {
284    use TimeStrToken::*;
285    static ALL_FORMATS: [&[TimeStrToken]; 12] = [
286        &[Plus, Num(0, 1), Colon, Num(0, 1), Colon, Num(0, 1)],
287        &[Dash, Num(0, 1), Colon, Num(0, 1), Colon, Num(0, 1)],
288        &[Plus, Num(0, 1), Colon, Num(0, 1)],
289        &[Dash, Num(0, 1), Colon, Num(0, 1)],
290        &[Plus, Num(0, 1), Num(0, 1), Num(0, 1)],
291        &[Dash, Num(0, 1), Num(0, 1), Num(0, 1)],
292        &[Plus, Num(0, 1), Num(0, 1)],
293        &[Dash, Num(0, 1), Num(0, 1)],
294        &[Plus, Num(0, 1)],
295        &[Dash, Num(0, 1)],
296        &[TzName(String::new())],
297        &[Zulu],
298    ];
299
300    let mut is_positive = true;
301    let mut hour_offset: Option<i32> = None;
302    let mut minute_offset: Option<i32> = None;
303    let mut second_offset: Option<i32> = None;
304
305    for format in ALL_FORMATS {
306        let actual = tokens.iter();
307
308        if actual.len() != format.len() {
309            continue;
310        }
311
312        for (i, (atok, etok)) in actual.zip_eq(format).enumerate() {
313            match (atok, etok) {
314                (Colon, Colon) | (Plus, Plus) => { /* Matching punctuation */ }
315                (Dash, Dash) => {
316                    is_positive = false;
317                }
318                (Num(val, _), Num(_, _)) => {
319                    let val = *val;
320                    match (hour_offset, minute_offset, second_offset) {
321                        (None, None, None) => {
322                            // Postgres allows timezones in the range -15:59:59..15:59:59
323                            if val <= 15 {
324                                hour_offset = Some(i32::try_from(val).expect(
325                                    "number between 0 and 15 should fit in signed 32-bit integer",
326                                ));
327                            } else {
328                                return Err(format!(
329                                    "Invalid timezone string ({}): timezone hour invalid {}",
330                                    value, val
331                                ));
332                            }
333                        }
334                        (Some(_), None, None) => {
335                            if val < 60 {
336                                minute_offset = Some(i32::try_from(val).expect(
337                                    "number between 0 and 59 should fit in signed 32-bit integer",
338                                ));
339                            } else {
340                                return Err(format!(
341                                    "Invalid timezone string ({}): timezone minute invalid {}",
342                                    value, val
343                                ));
344                            }
345                        }
346                        (Some(_), Some(_), None) => {
347                            if val < 60 {
348                                second_offset = Some(i32::try_from(val).expect(
349                                    "number between 0 and 59 should fit in signed 32-bit integer",
350                                ));
351                            } else {
352                                return Err(format!(
353                                    "Invalid timezone string ({}): timezone second invalid {}",
354                                    value, val
355                                ));
356                            }
357                        }
358                        // We've already seen an hour a minute and a second so we should
359                        // never see another number
360                        (Some(_), Some(_), Some(_)) => {
361                            return Err(format!(
362                                "Invalid timezone string ({}): invalid value {} at token index {}",
363                                value, val, i
364                            ));
365                        }
366                        _ => unreachable!("parsed a minute before an hour!"),
367                    }
368                }
369                (Zulu, Zulu) => return Ok(Default::default()),
370                (TzName(val), TzName(_)) => {
371                    if let Some(abbrev) = TIMEZONE_ABBREVS.get(UncasedStr::new(val)) {
372                        return Ok(abbrev.timezone());
373                    }
374
375                    return match Tz::from_str_insensitive(val) {
376                        Ok(tz) => Ok(Timezone::Tz(tz)),
377                        Err(err) => Err(format!(
378                            "Invalid timezone string ({}): {}. \
379                            Failed to parse {} at token index {}",
380                            value, err, val, i
381                        )),
382                    };
383                }
384                (_, _) => {
385                    // Theres a mismatch between this format and the actual
386                    // token stream Stop trying to parse in this format and go
387                    // to the next one
388                    is_positive = true;
389                    hour_offset = None;
390                    minute_offset = None;
391                    second_offset = None;
392                    break;
393                }
394            }
395        }
396
397        // Return the first valid parsed result
398        if let Some(hour_offset) = hour_offset {
399            let mut tz_offset_second = hour_offset * 60 * 60;
400
401            if let Some(minute_offset) = minute_offset {
402                tz_offset_second += minute_offset * 60;
403            }
404
405            if let Some(second_offset) = second_offset {
406                tz_offset_second += second_offset;
407            }
408
409            let offset = match (is_positive, spec) {
410                (true, TimezoneSpec::Iso) | (false, TimezoneSpec::Posix) => {
411                    FixedOffset::east_opt(tz_offset_second).unwrap()
412                }
413                (false, TimezoneSpec::Iso) | (true, TimezoneSpec::Posix) => {
414                    FixedOffset::west_opt(tz_offset_second).unwrap()
415                }
416            };
417
418            return Ok(Timezone::FixedOffset(offset));
419        }
420    }
421
422    Err(format!("Cannot parse timezone offset {}", value))
423}
424
425#[cfg(test)]
426mod tests {
427    use super::*;
428
429    #[mz_ore::test]
430    fn test_parse_timezone_offset_second() {
431        use Timezone::{FixedOffset as F, Tz as T};
432        let test_cases = [
433            ("+0:00", F(FixedOffset::east_opt(0).unwrap())),
434            ("-0:00", F(FixedOffset::east_opt(0).unwrap())),
435            ("+0:000000", F(FixedOffset::east_opt(0).unwrap())),
436            ("+000000:00", F(FixedOffset::east_opt(0).unwrap())),
437            ("+000000:000000", F(FixedOffset::east_opt(0).unwrap())),
438            ("+0", F(FixedOffset::east_opt(0).unwrap())),
439            ("+00", F(FixedOffset::east_opt(0).unwrap())),
440            ("+000", F(FixedOffset::east_opt(0).unwrap())),
441            ("+0000", F(FixedOffset::east_opt(0).unwrap())),
442            ("+00000000", F(FixedOffset::east_opt(0).unwrap())),
443            ("+0000001:000000", F(FixedOffset::east_opt(3600).unwrap())),
444            ("+0000000:000001", F(FixedOffset::east_opt(60).unwrap())),
445            ("+0000001:000001", F(FixedOffset::east_opt(3660).unwrap())),
446            (
447                "+0000001:000001:000001",
448                F(FixedOffset::east_opt(3661).unwrap()),
449            ),
450            ("+4:00", F(FixedOffset::east_opt(14400).unwrap())),
451            ("-4:00", F(FixedOffset::west_opt(14400).unwrap())),
452            ("+2:30", F(FixedOffset::east_opt(9000).unwrap())),
453            ("-5:15", F(FixedOffset::west_opt(18900).unwrap())),
454            ("+0:20", F(FixedOffset::east_opt(1200).unwrap())),
455            ("-0:20", F(FixedOffset::west_opt(1200).unwrap())),
456            ("+0:0:20", F(FixedOffset::east_opt(20).unwrap())),
457            ("+5", F(FixedOffset::east_opt(18000).unwrap())),
458            ("-5", F(FixedOffset::west_opt(18000).unwrap())),
459            ("+05", F(FixedOffset::east_opt(18000).unwrap())),
460            ("-05", F(FixedOffset::west_opt(18000).unwrap())),
461            ("+500", F(FixedOffset::east_opt(18000).unwrap())),
462            ("-500", F(FixedOffset::west_opt(18000).unwrap())),
463            ("+530", F(FixedOffset::east_opt(19800).unwrap())),
464            ("-530", F(FixedOffset::west_opt(19800).unwrap())),
465            ("+050", F(FixedOffset::east_opt(3000).unwrap())),
466            ("-050", F(FixedOffset::west_opt(3000).unwrap())),
467            ("+15", F(FixedOffset::east_opt(54000).unwrap())),
468            ("-15", F(FixedOffset::west_opt(54000).unwrap())),
469            ("+1515", F(FixedOffset::east_opt(54900).unwrap())),
470            ("+15:15:15", F(FixedOffset::east_opt(54915).unwrap())),
471            ("+015", F(FixedOffset::east_opt(900).unwrap())),
472            ("-015", F(FixedOffset::west_opt(900).unwrap())),
473            ("+0015", F(FixedOffset::east_opt(900).unwrap())),
474            ("-0015", F(FixedOffset::west_opt(900).unwrap())),
475            ("+00015", F(FixedOffset::east_opt(900).unwrap())),
476            ("-00015", F(FixedOffset::west_opt(900).unwrap())),
477            ("+005", F(FixedOffset::east_opt(300).unwrap())),
478            ("-005", F(FixedOffset::west_opt(300).unwrap())),
479            ("+0000005", F(FixedOffset::east_opt(300).unwrap())),
480            ("+00000100", F(FixedOffset::east_opt(3600).unwrap())),
481            ("Z", F(FixedOffset::east_opt(0).unwrap())),
482            ("z", F(FixedOffset::east_opt(0).unwrap())),
483            ("UTC", F(FixedOffset::east_opt(0).unwrap())),
484            ("Pacific/Auckland", T(Tz::Pacific__Auckland)),
485            ("America/New_York", T(Tz::America__New_York)),
486            ("America/Los_Angeles", T(Tz::America__Los_Angeles)),
487            ("utc", F(FixedOffset::east_opt(0).unwrap())),
488            ("pAcIfIc/AUcKlAnD", T(Tz::Pacific__Auckland)),
489            ("AMERICA/NEW_YORK", T(Tz::America__New_York)),
490            ("america/los_angeles", T(Tz::America__Los_Angeles)),
491            // Formatting test cases
492            ("+5:", F(FixedOffset::east_opt(18000).unwrap())),
493            ("-5:15:", F(FixedOffset::west_opt(18900).unwrap())),
494            ("-   5:15:", F(FixedOffset::west_opt(18900).unwrap())),
495            (
496                " ! ? ! - 5:15 ? ! ? ",
497                F(FixedOffset::west_opt(18900).unwrap()),
498            ),
499            (" UTC", F(FixedOffset::east_opt(0).unwrap())),
500            (" UTC ", F(FixedOffset::east_opt(0).unwrap())),
501            (" ? UTC ! ", F(FixedOffset::east_opt(0).unwrap())),
502        ];
503
504        for (timezone, expected) in test_cases.iter() {
505            match Timezone::parse(timezone, TimezoneSpec::Iso) {
506                Ok(tz) => assert_eq!(&tz, expected),
507                Err(e) => panic!(
508                    "Test failed when expected to pass test case: {} error: {}",
509                    timezone, e
510                ),
511            }
512        }
513
514        let failure_test_cases = [
515            "+25:00", "+120:00", "+0:61", "+0:500", " 12:30", "+-12:30", "+2525", "+2561",
516            "+255900", "+25", "+5::30", "++5:00", "--5:00", "a", "zzz", "ZZZ", "ZZ Top", " +",
517            " -", " ", "1", "12", "1234", "+16", "-17", "-14:60", "1:30:60",
518        ];
519
520        for test in failure_test_cases.iter() {
521            match Timezone::parse(test, TimezoneSpec::Iso) {
522                Ok(t) => panic!(
523                    "Test passed when expected to fail test case: {} parsed tz offset (seconds): {}",
524                    test, t
525                ),
526                Err(e) => println!("{}", e),
527            }
528        }
529    }
530}