mz_pgtz/
timezone.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::cmp::Ordering;
11use std::fmt;
12
13use chrono::FixedOffset;
14use chrono_tz::Tz;
15use mz_lowertest::MzReflect;
16use mz_proto::chrono::{any_fixed_offset, any_timezone};
17use mz_proto::{RustType, TryFromProtoError};
18use proptest_derive::Arbitrary;
19use serde::{Deserialize, Serialize};
20use uncased::UncasedStr;
21
22use crate::abbrev::TIMEZONE_ABBREVS;
23
24include!(concat!(env!("OUT_DIR"), "/mz_pgtz.timezone.rs"));
25
26/// The SQL definition of the contents of the `mz_timezone_names` view.
27pub const MZ_CATALOG_TIMEZONE_NAMES_SQL: &str =
28    include_str!(concat!(env!("OUT_DIR"), "/timezone.gen.sql"));
29
30/// Parsed timezone.
31#[derive(Arbitrary, Debug, Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, MzReflect)]
32pub enum Timezone {
33    #[serde(with = "fixed_offset_serde")]
34    FixedOffset(#[proptest(strategy = "any_fixed_offset()")] FixedOffset),
35    Tz(#[proptest(strategy = "any_timezone()")] Tz),
36}
37
38impl Timezone {
39    pub fn parse(tz: &str, spec: TimezoneSpec) -> Result<Self, String> {
40        build_timezone_offset_second(&tokenize_timezone(tz)?, tz, spec)
41    }
42}
43
44impl RustType<ProtoTimezone> for Timezone {
45    fn into_proto(&self) -> ProtoTimezone {
46        use proto_timezone::Kind;
47        ProtoTimezone {
48            kind: Some(match self {
49                Timezone::FixedOffset(fo) => Kind::FixedOffset(fo.into_proto()),
50                Timezone::Tz(tz) => Kind::Tz(tz.into_proto()),
51            }),
52        }
53    }
54
55    fn from_proto(proto: ProtoTimezone) -> Result<Self, TryFromProtoError> {
56        use proto_timezone::Kind;
57        let kind = proto
58            .kind
59            .ok_or_else(|| TryFromProtoError::missing_field("ProtoTimezone::kind"))?;
60        Ok(match kind {
61            Kind::FixedOffset(pof) => Timezone::FixedOffset(FixedOffset::from_proto(pof)?),
62            Kind::Tz(ptz) => Timezone::Tz(Tz::from_proto(ptz)?),
63        })
64    }
65}
66
67// We need to implement Serialize and Deserialize traits to include Timezone in the UnaryFunc enum.
68// FixedOffset doesn't implement these, even with the "serde" feature enabled.
69mod fixed_offset_serde {
70    use serde::de::Error;
71    use serde::{Deserializer, Serializer};
72
73    use super::*;
74
75    pub fn deserialize<'de, D: Deserializer<'de>>(
76        deserializer: D,
77    ) -> Result<FixedOffset, D::Error> {
78        let offset = i32::deserialize(deserializer)?;
79        FixedOffset::east_opt(offset).ok_or_else(|| {
80            Error::custom(format!("Invalid timezone offset: |{}| >= 86_400", offset))
81        })
82    }
83
84    pub fn serialize<S: Serializer>(
85        offset: &FixedOffset,
86        serializer: S,
87    ) -> Result<S::Ok, S::Error> {
88        serializer.serialize_i32(offset.local_minus_utc())
89    }
90}
91
92impl PartialOrd for Timezone {
93    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
94        Some(self.cmp(other))
95    }
96}
97
98// We need to implement Ord and PartialOrd to include Timezone in the UnaryFunc enum. Neither FixedOffset nor Tz
99// implement these so we do a simple ordinal comparison (FixedOffset variant < Tz variant), and break ties using
100// i32/str comparisons respectively.
101impl Ord for Timezone {
102    fn cmp(&self, other: &Self) -> Ordering {
103        use Timezone::*;
104        match (self, other) {
105            (FixedOffset(a), FixedOffset(b)) => a.local_minus_utc().cmp(&b.local_minus_utc()),
106            (Tz(a), Tz(b)) => a.name().cmp(b.name()),
107            (FixedOffset(_), Tz(_)) => Ordering::Less,
108            (Tz(_), FixedOffset(_)) => Ordering::Greater,
109        }
110    }
111}
112
113impl Default for Timezone {
114    fn default() -> Self {
115        Self::FixedOffset(FixedOffset::east_opt(0).unwrap())
116    }
117}
118
119impl fmt::Display for Timezone {
120    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
121        match self {
122            Timezone::FixedOffset(offset) => offset.fmt(f),
123            Timezone::Tz(tz) => tz.fmt(f),
124        }
125    }
126}
127
128/// TimeStrToken represents valid tokens in time-like strings,
129/// i.e those used in INTERVAL, TIMESTAMP/TZ, DATE, and TIME.
130#[derive(Debug, Clone, PartialEq, Eq)]
131enum TimeStrToken {
132    Dash,
133    Colon,
134    Plus,
135    Zulu,
136    Num(u64, usize),
137    TzName(String),
138    Delim,
139}
140
141impl std::fmt::Display for TimeStrToken {
142    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
143        use TimeStrToken::*;
144        match self {
145            Dash => write!(f, "-"),
146            Colon => write!(f, ":"),
147            Plus => write!(f, "+"),
148            Zulu => write!(f, "Z"),
149            Num(i, digits) => write!(f, "{:01$}", i, digits - 1),
150            TzName(n) => write!(f, "{}", n),
151            Delim => write!(f, " "),
152        }
153    }
154}
155
156fn tokenize_timezone(value: &str) -> Result<Vec<TimeStrToken>, String> {
157    let mut toks: Vec<TimeStrToken> = vec![];
158    let mut num_buf = String::with_capacity(4);
159    // If the timezone string has a colon, we need to parse all numbers naively.
160    // Otherwise we need to parse long sequences of digits as [..hhhhmm]
161    let split_nums: bool = !value.contains(':');
162
163    let value = value.trim_matches(|c: char| {
164        (c.is_ascii_whitespace() || c.is_ascii_punctuation()) && (c != '+' && c != '-')
165    });
166
167    // Takes a string and tries to parse it as a number token and insert it into
168    // the token list
169    fn parse_num(
170        toks: &mut Vec<TimeStrToken>,
171        n: &str,
172        split_nums: bool,
173        idx: usize,
174    ) -> Result<(), String> {
175        if n.is_empty() {
176            return Ok(());
177        }
178
179        let (first, second) = if n.len() > 2 && split_nums {
180            let (first, second) = n.split_at(n.len() - 2);
181            (first, Some(second))
182        } else {
183            (n, None)
184        };
185
186        toks.push(TimeStrToken::Num(
187            first.parse().map_err(|e| {
188                format!(
189                    "Unable to tokenize value {} as a number at index {}: {}",
190                    first, idx, e
191                )
192            })?,
193            first.len(),
194        ));
195
196        if let Some(second) = second {
197            toks.push(TimeStrToken::Num(
198                second.parse().map_err(|e| {
199                    format!(
200                        "Unable to tokenize value {} as a number at index {}: {}",
201                        second, idx, e
202                    )
203                })?,
204                second.len(),
205            ));
206        }
207
208        Ok(())
209    }
210
211    // Toggles whether or not we should skip whitespace. This would be nicer to
212    // do inline but ownership makes that annoying.
213    let mut space_skip_mode = false;
214    for (i, chr) in value.chars().enumerate() {
215        // Stay in space skip mode iff already in it and element is space.
216        if space_skip_mode && chr.is_ascii_whitespace() {
217            continue;
218        } else {
219            space_skip_mode = false;
220        }
221
222        match chr {
223            ':' => {
224                parse_num(&mut toks, &num_buf, split_nums, i)?;
225                num_buf.clear();
226                toks.push(TimeStrToken::Colon);
227            }
228            '-' => {
229                parse_num(&mut toks, &num_buf, split_nums, i)?;
230                num_buf.clear();
231                toks.push(TimeStrToken::Dash);
232                space_skip_mode = true;
233            }
234            '+' => {
235                parse_num(&mut toks, &num_buf, split_nums, i)?;
236                num_buf.clear();
237                toks.push(TimeStrToken::Plus);
238                space_skip_mode = true;
239            }
240            chr if (chr == 'z' || chr == 'Z') && (i == value.len() - 1) => {
241                parse_num(&mut toks, &num_buf, split_nums, i)?;
242                num_buf.clear();
243                toks.push(TimeStrToken::Zulu);
244            }
245            chr if chr.is_digit(10) => num_buf.push(chr),
246            chr if chr.is_ascii_alphabetic() => {
247                parse_num(&mut toks, &num_buf, split_nums, i)?;
248                let substring = &value[i..];
249                toks.push(TimeStrToken::TzName(substring.to_string()));
250                return Ok(toks);
251            }
252            // PG allows arbitrary punctuation marks, which represent delim
253            chr if chr.is_ascii_whitespace() || chr.is_ascii_punctuation() => {
254                parse_num(&mut toks, &num_buf, split_nums, i)?;
255                num_buf.clear();
256                toks.push(TimeStrToken::Delim);
257            }
258            chr => {
259                return Err(format!(
260                    "Error tokenizing timezone string ('{}'): invalid character {:?} at offset {}",
261                    value, chr, i
262                ));
263            }
264        }
265    }
266    parse_num(&mut toks, &num_buf, split_nums, 0)?;
267    Ok(toks)
268}
269
270#[derive(Debug, Clone, Copy)]
271pub enum TimezoneSpec {
272    /// Offsets should be treated as an ISO 8601 time zone specification.
273    Iso,
274    /// Offsets should be treated as a POSIX-style time zone specification.
275    Posix,
276}
277
278fn build_timezone_offset_second(
279    tokens: &[TimeStrToken],
280    value: &str,
281    spec: TimezoneSpec,
282) -> Result<Timezone, String> {
283    use TimeStrToken::*;
284    let all_formats = [
285        vec![Plus, Num(0, 1), Colon, Num(0, 1), Colon, Num(0, 1)],
286        vec![Dash, Num(0, 1), Colon, Num(0, 1), Colon, Num(0, 1)],
287        vec![Plus, Num(0, 1), Colon, Num(0, 1)],
288        vec![Dash, Num(0, 1), Colon, Num(0, 1)],
289        vec![Plus, Num(0, 1), Num(0, 1), Num(0, 1)],
290        vec![Dash, Num(0, 1), Num(0, 1), Num(0, 1)],
291        vec![Plus, Num(0, 1), Num(0, 1)],
292        vec![Dash, Num(0, 1), Num(0, 1)],
293        vec![Plus, Num(0, 1)],
294        vec![Dash, Num(0, 1)],
295        vec![TzName("".to_string())],
296        vec![Zulu],
297    ];
298
299    let mut is_positive = true;
300    let mut hour_offset: Option<i32> = None;
301    let mut minute_offset: Option<i32> = None;
302    let mut second_offset: Option<i32> = None;
303
304    for format in all_formats.iter() {
305        let actual = tokens.iter();
306
307        if actual.len() != format.len() {
308            continue;
309        }
310
311        for (i, (atok, etok)) in actual.zip(format).enumerate() {
312            match (atok, etok) {
313                (Colon, Colon) | (Plus, Plus) => { /* Matching punctuation */ }
314                (Dash, Dash) => {
315                    is_positive = false;
316                }
317                (Num(val, _), Num(_, _)) => {
318                    let val = *val;
319                    match (hour_offset, minute_offset, second_offset) {
320                        (None, None, None) => {
321                            // Postgres allows timezones in the range -15:59:59..15:59:59
322                            if val <= 15 {
323                                hour_offset = Some(i32::try_from(val).expect(
324                                    "number between 0 and 15 should fit in signed 32-bit integer",
325                                ));
326                            } else {
327                                return Err(format!(
328                                    "Invalid timezone string ({}): timezone hour invalid {}",
329                                    value, val
330                                ));
331                            }
332                        }
333                        (Some(_), None, None) => {
334                            if val < 60 {
335                                minute_offset = Some(i32::try_from(val).expect(
336                                    "number between 0 and 59 should fit in signed 32-bit integer",
337                                ));
338                            } else {
339                                return Err(format!(
340                                    "Invalid timezone string ({}): timezone minute invalid {}",
341                                    value, val
342                                ));
343                            }
344                        }
345                        (Some(_), Some(_), None) => {
346                            if val < 60 {
347                                second_offset = Some(i32::try_from(val).expect(
348                                    "number between 0 and 59 should fit in signed 32-bit integer",
349                                ));
350                            } else {
351                                return Err(format!(
352                                    "Invalid timezone string ({}): timezone second invalid {}",
353                                    value, val
354                                ));
355                            }
356                        }
357                        // We've already seen an hour a minute and a second so we should
358                        // never see another number
359                        (Some(_), Some(_), Some(_)) => {
360                            return Err(format!(
361                                "Invalid timezone string ({}): invalid value {} at token index {}",
362                                value, val, i
363                            ));
364                        }
365                        _ => unreachable!("parsed a minute before an hour!"),
366                    }
367                }
368                (Zulu, Zulu) => return Ok(Default::default()),
369                (TzName(val), TzName(_)) => {
370                    if let Some(abbrev) = TIMEZONE_ABBREVS.get(UncasedStr::new(val)) {
371                        return Ok(abbrev.timezone());
372                    }
373
374                    return match Tz::from_str_insensitive(val) {
375                        Ok(tz) => Ok(Timezone::Tz(tz)),
376                        Err(err) => Err(format!(
377                            "Invalid timezone string ({}): {}. \
378                            Failed to parse {} at token index {}",
379                            value, err, val, i
380                        )),
381                    };
382                }
383                (_, _) => {
384                    // Theres a mismatch between this format and the actual
385                    // token stream Stop trying to parse in this format and go
386                    // to the next one
387                    is_positive = true;
388                    hour_offset = None;
389                    minute_offset = None;
390                    second_offset = None;
391                    break;
392                }
393            }
394        }
395
396        // Return the first valid parsed result
397        if let Some(hour_offset) = hour_offset {
398            let mut tz_offset_second = hour_offset * 60 * 60;
399
400            if let Some(minute_offset) = minute_offset {
401                tz_offset_second += minute_offset * 60;
402            }
403
404            if let Some(second_offset) = second_offset {
405                tz_offset_second += second_offset;
406            }
407
408            let offset = match (is_positive, spec) {
409                (true, TimezoneSpec::Iso) | (false, TimezoneSpec::Posix) => {
410                    FixedOffset::east_opt(tz_offset_second).unwrap()
411                }
412                (false, TimezoneSpec::Iso) | (true, TimezoneSpec::Posix) => {
413                    FixedOffset::west_opt(tz_offset_second).unwrap()
414                }
415            };
416
417            return Ok(Timezone::FixedOffset(offset));
418        }
419    }
420
421    Err(format!("Cannot parse timezone offset {}", value))
422}
423
424#[cfg(test)]
425mod tests {
426    use super::*;
427
428    #[mz_ore::test]
429    fn test_parse_timezone_offset_second() {
430        use Timezone::{FixedOffset as F, Tz as T};
431        let test_cases = [
432            ("+0:00", F(FixedOffset::east_opt(0).unwrap())),
433            ("-0:00", F(FixedOffset::east_opt(0).unwrap())),
434            ("+0:000000", F(FixedOffset::east_opt(0).unwrap())),
435            ("+000000:00", F(FixedOffset::east_opt(0).unwrap())),
436            ("+000000:000000", F(FixedOffset::east_opt(0).unwrap())),
437            ("+0", F(FixedOffset::east_opt(0).unwrap())),
438            ("+00", F(FixedOffset::east_opt(0).unwrap())),
439            ("+000", F(FixedOffset::east_opt(0).unwrap())),
440            ("+0000", F(FixedOffset::east_opt(0).unwrap())),
441            ("+00000000", F(FixedOffset::east_opt(0).unwrap())),
442            ("+0000001:000000", F(FixedOffset::east_opt(3600).unwrap())),
443            ("+0000000:000001", F(FixedOffset::east_opt(60).unwrap())),
444            ("+0000001:000001", F(FixedOffset::east_opt(3660).unwrap())),
445            (
446                "+0000001:000001:000001",
447                F(FixedOffset::east_opt(3661).unwrap()),
448            ),
449            ("+4:00", F(FixedOffset::east_opt(14400).unwrap())),
450            ("-4:00", F(FixedOffset::west_opt(14400).unwrap())),
451            ("+2:30", F(FixedOffset::east_opt(9000).unwrap())),
452            ("-5:15", F(FixedOffset::west_opt(18900).unwrap())),
453            ("+0:20", F(FixedOffset::east_opt(1200).unwrap())),
454            ("-0:20", F(FixedOffset::west_opt(1200).unwrap())),
455            ("+0:0:20", F(FixedOffset::east_opt(20).unwrap())),
456            ("+5", F(FixedOffset::east_opt(18000).unwrap())),
457            ("-5", F(FixedOffset::west_opt(18000).unwrap())),
458            ("+05", F(FixedOffset::east_opt(18000).unwrap())),
459            ("-05", F(FixedOffset::west_opt(18000).unwrap())),
460            ("+500", F(FixedOffset::east_opt(18000).unwrap())),
461            ("-500", F(FixedOffset::west_opt(18000).unwrap())),
462            ("+530", F(FixedOffset::east_opt(19800).unwrap())),
463            ("-530", F(FixedOffset::west_opt(19800).unwrap())),
464            ("+050", F(FixedOffset::east_opt(3000).unwrap())),
465            ("-050", F(FixedOffset::west_opt(3000).unwrap())),
466            ("+15", F(FixedOffset::east_opt(54000).unwrap())),
467            ("-15", F(FixedOffset::west_opt(54000).unwrap())),
468            ("+1515", F(FixedOffset::east_opt(54900).unwrap())),
469            ("+15:15:15", F(FixedOffset::east_opt(54915).unwrap())),
470            ("+015", F(FixedOffset::east_opt(900).unwrap())),
471            ("-015", F(FixedOffset::west_opt(900).unwrap())),
472            ("+0015", F(FixedOffset::east_opt(900).unwrap())),
473            ("-0015", F(FixedOffset::west_opt(900).unwrap())),
474            ("+00015", F(FixedOffset::east_opt(900).unwrap())),
475            ("-00015", F(FixedOffset::west_opt(900).unwrap())),
476            ("+005", F(FixedOffset::east_opt(300).unwrap())),
477            ("-005", F(FixedOffset::west_opt(300).unwrap())),
478            ("+0000005", F(FixedOffset::east_opt(300).unwrap())),
479            ("+00000100", F(FixedOffset::east_opt(3600).unwrap())),
480            ("Z", F(FixedOffset::east_opt(0).unwrap())),
481            ("z", F(FixedOffset::east_opt(0).unwrap())),
482            ("UTC", F(FixedOffset::east_opt(0).unwrap())),
483            ("Pacific/Auckland", T(Tz::Pacific__Auckland)),
484            ("America/New_York", T(Tz::America__New_York)),
485            ("America/Los_Angeles", T(Tz::America__Los_Angeles)),
486            ("utc", F(FixedOffset::east_opt(0).unwrap())),
487            ("pAcIfIc/AUcKlAnD", T(Tz::Pacific__Auckland)),
488            ("AMERICA/NEW_YORK", T(Tz::America__New_York)),
489            ("america/los_angeles", T(Tz::America__Los_Angeles)),
490            // Formatting test cases
491            ("+5:", F(FixedOffset::east_opt(18000).unwrap())),
492            ("-5:15:", F(FixedOffset::west_opt(18900).unwrap())),
493            ("-   5:15:", F(FixedOffset::west_opt(18900).unwrap())),
494            (
495                " ! ? ! - 5:15 ? ! ? ",
496                F(FixedOffset::west_opt(18900).unwrap()),
497            ),
498            (" UTC", F(FixedOffset::east_opt(0).unwrap())),
499            (" UTC ", F(FixedOffset::east_opt(0).unwrap())),
500            (" ? UTC ! ", F(FixedOffset::east_opt(0).unwrap())),
501        ];
502
503        for (timezone, expected) in test_cases.iter() {
504            match Timezone::parse(timezone, TimezoneSpec::Iso) {
505                Ok(tz) => assert_eq!(&tz, expected),
506                Err(e) => panic!(
507                    "Test failed when expected to pass test case: {} error: {}",
508                    timezone, e
509                ),
510            }
511        }
512
513        let failure_test_cases = [
514            "+25:00", "+120:00", "+0:61", "+0:500", " 12:30", "+-12:30", "+2525", "+2561",
515            "+255900", "+25", "+5::30", "++5:00", "--5:00", "a", "zzz", "ZZZ", "ZZ Top", " +",
516            " -", " ", "1", "12", "1234", "+16", "-17", "-14:60", "1:30:60",
517        ];
518
519        for test in failure_test_cases.iter() {
520            match Timezone::parse(test, TimezoneSpec::Iso) {
521                Ok(t) => panic!(
522                    "Test passed when expected to fail test case: {} parsed tz offset (seconds): {}",
523                    test, t
524                ),
525                Err(e) => println!("{}", e),
526            }
527        }
528    }
529}