mz_pgtz/
timezone.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::cmp::Ordering;
11use std::fmt;
12
13use chrono::FixedOffset;
14use chrono_tz::Tz;
15use itertools::Itertools;
16use mz_lowertest::MzReflect;
17use serde::{Deserialize, Serialize};
18use uncased::UncasedStr;
19
20use crate::abbrev::TIMEZONE_ABBREVS;
21
22/// The SQL definition of the contents of the `mz_timezone_names` view.
23pub const MZ_CATALOG_TIMEZONE_NAMES_SQL: &str =
24    include_str!(concat!(env!("OUT_DIR"), "/timezone.gen.sql"));
25
26/// Parsed timezone.
27#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, MzReflect)]
28pub enum Timezone {
29    #[serde(with = "fixed_offset_serde")]
30    FixedOffset(FixedOffset),
31    Tz(Tz),
32}
33
34impl Timezone {
35    pub fn parse(tz: &str, spec: TimezoneSpec) -> Result<Self, String> {
36        build_timezone_offset_second(&tokenize_timezone(tz)?, tz, spec)
37    }
38}
39
40// We need to implement Serialize and Deserialize traits to include Timezone in the UnaryFunc enum.
41// FixedOffset doesn't implement these, even with the "serde" feature enabled.
42mod fixed_offset_serde {
43    use serde::de::Error;
44    use serde::{Deserializer, Serializer};
45
46    use super::*;
47
48    pub fn deserialize<'de, D: Deserializer<'de>>(
49        deserializer: D,
50    ) -> Result<FixedOffset, D::Error> {
51        let offset = i32::deserialize(deserializer)?;
52        FixedOffset::east_opt(offset).ok_or_else(|| {
53            Error::custom(format!("Invalid timezone offset: |{}| >= 86_400", offset))
54        })
55    }
56
57    pub fn serialize<S: Serializer>(
58        offset: &FixedOffset,
59        serializer: S,
60    ) -> Result<S::Ok, S::Error> {
61        serializer.serialize_i32(offset.local_minus_utc())
62    }
63}
64
65impl PartialOrd for Timezone {
66    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
67        Some(self.cmp(other))
68    }
69}
70
71// We need to implement Ord and PartialOrd to include Timezone in the UnaryFunc enum. Neither FixedOffset nor Tz
72// implement these so we do a simple ordinal comparison (FixedOffset variant < Tz variant), and break ties using
73// i32/str comparisons respectively.
74impl Ord for Timezone {
75    fn cmp(&self, other: &Self) -> Ordering {
76        use Timezone::*;
77        match (self, other) {
78            (FixedOffset(a), FixedOffset(b)) => a.local_minus_utc().cmp(&b.local_minus_utc()),
79            (Tz(a), Tz(b)) => a.name().cmp(b.name()),
80            (FixedOffset(_), Tz(_)) => Ordering::Less,
81            (Tz(_), FixedOffset(_)) => Ordering::Greater,
82        }
83    }
84}
85
86impl Default for Timezone {
87    fn default() -> Self {
88        Self::FixedOffset(FixedOffset::east_opt(0).unwrap())
89    }
90}
91
92impl fmt::Display for Timezone {
93    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
94        match self {
95            Timezone::FixedOffset(offset) => offset.fmt(f),
96            Timezone::Tz(tz) => tz.fmt(f),
97        }
98    }
99}
100
101/// TimeStrToken represents valid tokens in time-like strings,
102/// i.e those used in INTERVAL, TIMESTAMP/TZ, DATE, and TIME.
103#[derive(Debug, Clone, PartialEq, Eq)]
104enum TimeStrToken {
105    Dash,
106    Colon,
107    Plus,
108    Zulu,
109    Num(u64, usize),
110    TzName(String),
111    Delim,
112}
113
114impl std::fmt::Display for TimeStrToken {
115    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
116        use TimeStrToken::*;
117        match self {
118            Dash => write!(f, "-"),
119            Colon => write!(f, ":"),
120            Plus => write!(f, "+"),
121            Zulu => write!(f, "Z"),
122            Num(i, digits) => write!(f, "{:01$}", i, digits - 1),
123            TzName(n) => write!(f, "{}", n),
124            Delim => write!(f, " "),
125        }
126    }
127}
128
129fn tokenize_timezone(value: &str) -> Result<Vec<TimeStrToken>, String> {
130    let mut toks: Vec<TimeStrToken> = vec![];
131    let mut num_buf = String::with_capacity(4);
132    // If the timezone string has a colon, we need to parse all numbers naively.
133    // Otherwise we need to parse long sequences of digits as [..hhhhmm]
134    let split_nums: bool = !value.contains(':');
135
136    let value = value.trim_matches(|c: char| {
137        (c.is_ascii_whitespace() || c.is_ascii_punctuation()) && (c != '+' && c != '-')
138    });
139
140    // Takes a string and tries to parse it as a number token and insert it into
141    // the token list
142    fn parse_num(
143        toks: &mut Vec<TimeStrToken>,
144        n: &str,
145        split_nums: bool,
146        idx: usize,
147    ) -> Result<(), String> {
148        if n.is_empty() {
149            return Ok(());
150        }
151
152        let (first, second) = if n.len() > 2 && split_nums {
153            let (first, second) = n.split_at(n.len() - 2);
154            (first, Some(second))
155        } else {
156            (n, None)
157        };
158
159        toks.push(TimeStrToken::Num(
160            first.parse().map_err(|e| {
161                format!(
162                    "Unable to tokenize value {} as a number at index {}: {}",
163                    first, idx, e
164                )
165            })?,
166            first.len(),
167        ));
168
169        if let Some(second) = second {
170            toks.push(TimeStrToken::Num(
171                second.parse().map_err(|e| {
172                    format!(
173                        "Unable to tokenize value {} as a number at index {}: {}",
174                        second, idx, e
175                    )
176                })?,
177                second.len(),
178            ));
179        }
180
181        Ok(())
182    }
183
184    // Toggles whether or not we should skip whitespace. This would be nicer to
185    // do inline but ownership makes that annoying.
186    let mut space_skip_mode = false;
187    for (i, chr) in value.char_indices() {
188        // Stay in space skip mode iff already in it and element is space.
189        if space_skip_mode && chr.is_ascii_whitespace() {
190            continue;
191        } else {
192            space_skip_mode = false;
193        }
194
195        match chr {
196            ':' => {
197                parse_num(&mut toks, &num_buf, split_nums, i)?;
198                num_buf.clear();
199                toks.push(TimeStrToken::Colon);
200            }
201            '-' => {
202                parse_num(&mut toks, &num_buf, split_nums, i)?;
203                num_buf.clear();
204                toks.push(TimeStrToken::Dash);
205                space_skip_mode = true;
206            }
207            '+' => {
208                parse_num(&mut toks, &num_buf, split_nums, i)?;
209                num_buf.clear();
210                toks.push(TimeStrToken::Plus);
211                space_skip_mode = true;
212            }
213            chr if (chr == 'z' || chr == 'Z') && (i == value.len() - 1) => {
214                parse_num(&mut toks, &num_buf, split_nums, i)?;
215                num_buf.clear();
216                toks.push(TimeStrToken::Zulu);
217            }
218            chr if chr.is_digit(10) => num_buf.push(chr),
219            chr if chr.is_ascii_alphabetic() => {
220                parse_num(&mut toks, &num_buf, split_nums, i)?;
221                let substring = &value[i..];
222                toks.push(TimeStrToken::TzName(substring.to_string()));
223                return Ok(toks);
224            }
225            // PG allows arbitrary punctuation marks, which represent delim
226            chr if chr.is_ascii_whitespace() || chr.is_ascii_punctuation() => {
227                parse_num(&mut toks, &num_buf, split_nums, i)?;
228                num_buf.clear();
229                toks.push(TimeStrToken::Delim);
230            }
231            chr => {
232                return Err(format!(
233                    "Error tokenizing timezone string ('{}'): invalid character {:?} at offset {}",
234                    value, chr, i
235                ));
236            }
237        }
238    }
239    parse_num(&mut toks, &num_buf, split_nums, 0)?;
240    Ok(toks)
241}
242
243#[derive(Debug, Clone, Copy)]
244pub enum TimezoneSpec {
245    /// Offsets should be treated as an ISO 8601 time zone specification.
246    Iso,
247    /// Offsets should be treated as a POSIX-style time zone specification.
248    Posix,
249}
250
251fn build_timezone_offset_second(
252    tokens: &[TimeStrToken],
253    value: &str,
254    spec: TimezoneSpec,
255) -> Result<Timezone, String> {
256    use TimeStrToken::*;
257    static ALL_FORMATS: [&[TimeStrToken]; 12] = [
258        &[Plus, Num(0, 1), Colon, Num(0, 1), Colon, Num(0, 1)],
259        &[Dash, Num(0, 1), Colon, Num(0, 1), Colon, Num(0, 1)],
260        &[Plus, Num(0, 1), Colon, Num(0, 1)],
261        &[Dash, Num(0, 1), Colon, Num(0, 1)],
262        &[Plus, Num(0, 1), Num(0, 1), Num(0, 1)],
263        &[Dash, Num(0, 1), Num(0, 1), Num(0, 1)],
264        &[Plus, Num(0, 1), Num(0, 1)],
265        &[Dash, Num(0, 1), Num(0, 1)],
266        &[Plus, Num(0, 1)],
267        &[Dash, Num(0, 1)],
268        &[TzName(String::new())],
269        &[Zulu],
270    ];
271
272    let mut is_positive = true;
273    let mut hour_offset: Option<i32> = None;
274    let mut minute_offset: Option<i32> = None;
275    let mut second_offset: Option<i32> = None;
276
277    for format in ALL_FORMATS {
278        let actual = tokens.iter();
279
280        if actual.len() != format.len() {
281            continue;
282        }
283
284        for (i, (atok, etok)) in actual.zip_eq(format).enumerate() {
285            match (atok, etok) {
286                (Colon, Colon) | (Plus, Plus) => { /* Matching punctuation */ }
287                (Dash, Dash) => {
288                    is_positive = false;
289                }
290                (Num(val, _), Num(_, _)) => {
291                    let val = *val;
292                    match (hour_offset, minute_offset, second_offset) {
293                        (None, None, None) => {
294                            // Postgres allows timezones in the range -15:59:59..15:59:59
295                            if val <= 15 {
296                                hour_offset = Some(i32::try_from(val).expect(
297                                    "number between 0 and 15 should fit in signed 32-bit integer",
298                                ));
299                            } else {
300                                return Err(format!(
301                                    "Invalid timezone string ({}): timezone hour invalid {}",
302                                    value, val
303                                ));
304                            }
305                        }
306                        (Some(_), None, None) => {
307                            if val < 60 {
308                                minute_offset = Some(i32::try_from(val).expect(
309                                    "number between 0 and 59 should fit in signed 32-bit integer",
310                                ));
311                            } else {
312                                return Err(format!(
313                                    "Invalid timezone string ({}): timezone minute invalid {}",
314                                    value, val
315                                ));
316                            }
317                        }
318                        (Some(_), Some(_), None) => {
319                            if val < 60 {
320                                second_offset = Some(i32::try_from(val).expect(
321                                    "number between 0 and 59 should fit in signed 32-bit integer",
322                                ));
323                            } else {
324                                return Err(format!(
325                                    "Invalid timezone string ({}): timezone second invalid {}",
326                                    value, val
327                                ));
328                            }
329                        }
330                        // We've already seen an hour a minute and a second so we should
331                        // never see another number
332                        (Some(_), Some(_), Some(_)) => {
333                            return Err(format!(
334                                "Invalid timezone string ({}): invalid value {} at token index {}",
335                                value, val, i
336                            ));
337                        }
338                        _ => unreachable!("parsed a minute before an hour!"),
339                    }
340                }
341                (Zulu, Zulu) => return Ok(Default::default()),
342                (TzName(val), TzName(_)) => {
343                    if let Some(abbrev) = TIMEZONE_ABBREVS.get(UncasedStr::new(val)) {
344                        return Ok(abbrev.timezone());
345                    }
346
347                    return match Tz::from_str_insensitive(val) {
348                        Ok(tz) => Ok(Timezone::Tz(tz)),
349                        Err(err) => Err(format!(
350                            "Invalid timezone string ({}): {}. \
351                            Failed to parse {} at token index {}",
352                            value, err, val, i
353                        )),
354                    };
355                }
356                (_, _) => {
357                    // Theres a mismatch between this format and the actual
358                    // token stream Stop trying to parse in this format and go
359                    // to the next one
360                    is_positive = true;
361                    hour_offset = None;
362                    minute_offset = None;
363                    second_offset = None;
364                    break;
365                }
366            }
367        }
368
369        // Return the first valid parsed result
370        if let Some(hour_offset) = hour_offset {
371            let mut tz_offset_second = hour_offset * 60 * 60;
372
373            if let Some(minute_offset) = minute_offset {
374                tz_offset_second += minute_offset * 60;
375            }
376
377            if let Some(second_offset) = second_offset {
378                tz_offset_second += second_offset;
379            }
380
381            let offset = match (is_positive, spec) {
382                (true, TimezoneSpec::Iso) | (false, TimezoneSpec::Posix) => {
383                    FixedOffset::east_opt(tz_offset_second).unwrap()
384                }
385                (false, TimezoneSpec::Iso) | (true, TimezoneSpec::Posix) => {
386                    FixedOffset::west_opt(tz_offset_second).unwrap()
387                }
388            };
389
390            return Ok(Timezone::FixedOffset(offset));
391        }
392    }
393
394    Err(format!("Cannot parse timezone offset {}", value))
395}
396
397#[cfg(test)]
398mod tests {
399    use super::*;
400
401    #[mz_ore::test]
402    fn test_parse_timezone_offset_second() {
403        use Timezone::{FixedOffset as F, Tz as T};
404        let test_cases = [
405            ("+0:00", F(FixedOffset::east_opt(0).unwrap())),
406            ("-0:00", F(FixedOffset::east_opt(0).unwrap())),
407            ("+0:000000", F(FixedOffset::east_opt(0).unwrap())),
408            ("+000000:00", F(FixedOffset::east_opt(0).unwrap())),
409            ("+000000:000000", F(FixedOffset::east_opt(0).unwrap())),
410            ("+0", F(FixedOffset::east_opt(0).unwrap())),
411            ("+00", F(FixedOffset::east_opt(0).unwrap())),
412            ("+000", F(FixedOffset::east_opt(0).unwrap())),
413            ("+0000", F(FixedOffset::east_opt(0).unwrap())),
414            ("+00000000", F(FixedOffset::east_opt(0).unwrap())),
415            ("+0000001:000000", F(FixedOffset::east_opt(3600).unwrap())),
416            ("+0000000:000001", F(FixedOffset::east_opt(60).unwrap())),
417            ("+0000001:000001", F(FixedOffset::east_opt(3660).unwrap())),
418            (
419                "+0000001:000001:000001",
420                F(FixedOffset::east_opt(3661).unwrap()),
421            ),
422            ("+4:00", F(FixedOffset::east_opt(14400).unwrap())),
423            ("-4:00", F(FixedOffset::west_opt(14400).unwrap())),
424            ("+2:30", F(FixedOffset::east_opt(9000).unwrap())),
425            ("-5:15", F(FixedOffset::west_opt(18900).unwrap())),
426            ("+0:20", F(FixedOffset::east_opt(1200).unwrap())),
427            ("-0:20", F(FixedOffset::west_opt(1200).unwrap())),
428            ("+0:0:20", F(FixedOffset::east_opt(20).unwrap())),
429            ("+5", F(FixedOffset::east_opt(18000).unwrap())),
430            ("-5", F(FixedOffset::west_opt(18000).unwrap())),
431            ("+05", F(FixedOffset::east_opt(18000).unwrap())),
432            ("-05", F(FixedOffset::west_opt(18000).unwrap())),
433            ("+500", F(FixedOffset::east_opt(18000).unwrap())),
434            ("-500", F(FixedOffset::west_opt(18000).unwrap())),
435            ("+530", F(FixedOffset::east_opt(19800).unwrap())),
436            ("-530", F(FixedOffset::west_opt(19800).unwrap())),
437            ("+050", F(FixedOffset::east_opt(3000).unwrap())),
438            ("-050", F(FixedOffset::west_opt(3000).unwrap())),
439            ("+15", F(FixedOffset::east_opt(54000).unwrap())),
440            ("-15", F(FixedOffset::west_opt(54000).unwrap())),
441            ("+1515", F(FixedOffset::east_opt(54900).unwrap())),
442            ("+15:15:15", F(FixedOffset::east_opt(54915).unwrap())),
443            ("+015", F(FixedOffset::east_opt(900).unwrap())),
444            ("-015", F(FixedOffset::west_opt(900).unwrap())),
445            ("+0015", F(FixedOffset::east_opt(900).unwrap())),
446            ("-0015", F(FixedOffset::west_opt(900).unwrap())),
447            ("+00015", F(FixedOffset::east_opt(900).unwrap())),
448            ("-00015", F(FixedOffset::west_opt(900).unwrap())),
449            ("+005", F(FixedOffset::east_opt(300).unwrap())),
450            ("-005", F(FixedOffset::west_opt(300).unwrap())),
451            ("+0000005", F(FixedOffset::east_opt(300).unwrap())),
452            ("+00000100", F(FixedOffset::east_opt(3600).unwrap())),
453            ("Z", F(FixedOffset::east_opt(0).unwrap())),
454            ("z", F(FixedOffset::east_opt(0).unwrap())),
455            ("UTC", F(FixedOffset::east_opt(0).unwrap())),
456            ("Pacific/Auckland", T(Tz::Pacific__Auckland)),
457            ("America/New_York", T(Tz::America__New_York)),
458            ("America/Los_Angeles", T(Tz::America__Los_Angeles)),
459            ("utc", F(FixedOffset::east_opt(0).unwrap())),
460            ("pAcIfIc/AUcKlAnD", T(Tz::Pacific__Auckland)),
461            ("AMERICA/NEW_YORK", T(Tz::America__New_York)),
462            ("america/los_angeles", T(Tz::America__Los_Angeles)),
463            // Formatting test cases
464            ("+5:", F(FixedOffset::east_opt(18000).unwrap())),
465            ("-5:15:", F(FixedOffset::west_opt(18900).unwrap())),
466            ("-   5:15:", F(FixedOffset::west_opt(18900).unwrap())),
467            (
468                " ! ? ! - 5:15 ? ! ? ",
469                F(FixedOffset::west_opt(18900).unwrap()),
470            ),
471            (" UTC", F(FixedOffset::east_opt(0).unwrap())),
472            (" UTC ", F(FixedOffset::east_opt(0).unwrap())),
473            (" ? UTC ! ", F(FixedOffset::east_opt(0).unwrap())),
474        ];
475
476        for (timezone, expected) in test_cases.iter() {
477            match Timezone::parse(timezone, TimezoneSpec::Iso) {
478                Ok(tz) => assert_eq!(&tz, expected),
479                Err(e) => panic!(
480                    "Test failed when expected to pass test case: {} error: {}",
481                    timezone, e
482                ),
483            }
484        }
485
486        let failure_test_cases = [
487            "+25:00", "+120:00", "+0:61", "+0:500", " 12:30", "+-12:30", "+2525", "+2561",
488            "+255900", "+25", "+5::30", "++5:00", "--5:00", "a", "zzz", "ZZZ", "ZZ Top", " +",
489            " -", " ", "1", "12", "1234", "+16", "-17", "-14:60", "1:30:60",
490        ];
491
492        for test in failure_test_cases.iter() {
493            match Timezone::parse(test, TimezoneSpec::Iso) {
494                Ok(t) => panic!(
495                    "Test passed when expected to fail test case: {} parsed tz offset (seconds): {}",
496                    test, t
497                ),
498                Err(e) => println!("{}", e),
499            }
500        }
501    }
502}