Skip to main content

mz_pgtz/
timezone.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::cmp::Ordering;
11use std::fmt;
12
13use chrono::FixedOffset;
14use chrono_tz::Tz;
15use itertools::Itertools;
16use mz_lowertest::MzReflect;
17use serde::{Deserialize, Serialize};
18use uncased::UncasedStr;
19
20use crate::abbrev::TIMEZONE_ABBREVS;
21
22/// The SQL definition of the contents of the `mz_timezone_names` view.
23pub const MZ_CATALOG_TIMEZONE_NAMES_SQL: &str =
24    include_str!(concat!(env!("OUT_DIR"), "/timezone.gen.sql"));
25
26/// Parsed timezone.
27#[derive(
28    Debug,
29    Copy,
30    Clone,
31    PartialEq,
32    Eq,
33    Hash,
34    Serialize,
35    Deserialize,
36    MzReflect
37)]
38pub enum Timezone {
39    #[serde(with = "fixed_offset_serde")]
40    FixedOffset(FixedOffset),
41    Tz(Tz),
42}
43
44impl Timezone {
45    pub fn parse(tz: &str, spec: TimezoneSpec) -> Result<Self, String> {
46        build_timezone_offset_second(&tokenize_timezone(tz)?, tz, spec)
47    }
48}
49
50// We need to implement Serialize and Deserialize traits to include Timezone in the UnaryFunc enum.
51// FixedOffset doesn't implement these, even with the "serde" feature enabled.
52mod fixed_offset_serde {
53    use serde::de::Error;
54    use serde::{Deserializer, Serializer};
55
56    use super::*;
57
58    pub fn deserialize<'de, D: Deserializer<'de>>(
59        deserializer: D,
60    ) -> Result<FixedOffset, D::Error> {
61        let offset = i32::deserialize(deserializer)?;
62        FixedOffset::east_opt(offset).ok_or_else(|| {
63            Error::custom(format!("Invalid timezone offset: |{}| >= 86_400", offset))
64        })
65    }
66
67    pub fn serialize<S: Serializer>(
68        offset: &FixedOffset,
69        serializer: S,
70    ) -> Result<S::Ok, S::Error> {
71        serializer.serialize_i32(offset.local_minus_utc())
72    }
73}
74
75impl PartialOrd for Timezone {
76    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
77        Some(self.cmp(other))
78    }
79}
80
81// We need to implement Ord and PartialOrd to include Timezone in the UnaryFunc enum. Neither FixedOffset nor Tz
82// implement these so we do a simple ordinal comparison (FixedOffset variant < Tz variant), and break ties using
83// i32/str comparisons respectively.
84impl Ord for Timezone {
85    fn cmp(&self, other: &Self) -> Ordering {
86        use Timezone::*;
87        match (self, other) {
88            (FixedOffset(a), FixedOffset(b)) => a.local_minus_utc().cmp(&b.local_minus_utc()),
89            (Tz(a), Tz(b)) => a.name().cmp(b.name()),
90            (FixedOffset(_), Tz(_)) => Ordering::Less,
91            (Tz(_), FixedOffset(_)) => Ordering::Greater,
92        }
93    }
94}
95
96impl Default for Timezone {
97    fn default() -> Self {
98        Self::FixedOffset(FixedOffset::east_opt(0).unwrap())
99    }
100}
101
102impl fmt::Display for Timezone {
103    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
104        match self {
105            Timezone::FixedOffset(offset) => offset.fmt(f),
106            Timezone::Tz(tz) => tz.fmt(f),
107        }
108    }
109}
110
111/// TimeStrToken represents valid tokens in time-like strings,
112/// i.e those used in INTERVAL, TIMESTAMP/TZ, DATE, and TIME.
113#[derive(Debug, Clone, PartialEq, Eq)]
114enum TimeStrToken {
115    Dash,
116    Colon,
117    Plus,
118    Zulu,
119    Num(u64, usize),
120    TzName(String),
121    Delim,
122}
123
124impl std::fmt::Display for TimeStrToken {
125    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
126        use TimeStrToken::*;
127        match self {
128            Dash => write!(f, "-"),
129            Colon => write!(f, ":"),
130            Plus => write!(f, "+"),
131            Zulu => write!(f, "Z"),
132            Num(i, digits) => write!(f, "{:01$}", i, digits - 1),
133            TzName(n) => write!(f, "{}", n),
134            Delim => write!(f, " "),
135        }
136    }
137}
138
139fn tokenize_timezone(value: &str) -> Result<Vec<TimeStrToken>, String> {
140    let mut toks: Vec<TimeStrToken> = vec![];
141    let mut num_buf = String::with_capacity(4);
142    // If the timezone string has a colon, we need to parse all numbers naively.
143    // Otherwise we need to parse long sequences of digits as [..hhhhmm]
144    let split_nums: bool = !value.contains(':');
145
146    let value = value.trim_matches(|c: char| {
147        (c.is_ascii_whitespace() || c.is_ascii_punctuation()) && (c != '+' && c != '-')
148    });
149
150    // Takes a string and tries to parse it as a number token and insert it into
151    // the token list
152    fn parse_num(
153        toks: &mut Vec<TimeStrToken>,
154        n: &str,
155        split_nums: bool,
156        idx: usize,
157    ) -> Result<(), String> {
158        if n.is_empty() {
159            return Ok(());
160        }
161
162        let (first, second) = if n.len() > 2 && split_nums {
163            let (first, second) = n.split_at(n.len() - 2);
164            (first, Some(second))
165        } else {
166            (n, None)
167        };
168
169        toks.push(TimeStrToken::Num(
170            first.parse().map_err(|e| {
171                format!(
172                    "Unable to tokenize value {} as a number at index {}: {}",
173                    first, idx, e
174                )
175            })?,
176            first.len(),
177        ));
178
179        if let Some(second) = second {
180            toks.push(TimeStrToken::Num(
181                second.parse().map_err(|e| {
182                    format!(
183                        "Unable to tokenize value {} as a number at index {}: {}",
184                        second, idx, e
185                    )
186                })?,
187                second.len(),
188            ));
189        }
190
191        Ok(())
192    }
193
194    // Toggles whether or not we should skip whitespace. This would be nicer to
195    // do inline but ownership makes that annoying.
196    let mut space_skip_mode = false;
197    for (i, chr) in value.char_indices() {
198        // Stay in space skip mode iff already in it and element is space.
199        if space_skip_mode && chr.is_ascii_whitespace() {
200            continue;
201        } else {
202            space_skip_mode = false;
203        }
204
205        match chr {
206            ':' => {
207                parse_num(&mut toks, &num_buf, split_nums, i)?;
208                num_buf.clear();
209                toks.push(TimeStrToken::Colon);
210            }
211            '-' => {
212                parse_num(&mut toks, &num_buf, split_nums, i)?;
213                num_buf.clear();
214                toks.push(TimeStrToken::Dash);
215                space_skip_mode = true;
216            }
217            '+' => {
218                parse_num(&mut toks, &num_buf, split_nums, i)?;
219                num_buf.clear();
220                toks.push(TimeStrToken::Plus);
221                space_skip_mode = true;
222            }
223            chr if (chr == 'z' || chr == 'Z') && (i == value.len() - 1) => {
224                parse_num(&mut toks, &num_buf, split_nums, i)?;
225                num_buf.clear();
226                toks.push(TimeStrToken::Zulu);
227            }
228            chr if chr.is_digit(10) => num_buf.push(chr),
229            chr if chr.is_ascii_alphabetic() => {
230                parse_num(&mut toks, &num_buf, split_nums, i)?;
231                let substring = &value[i..];
232                toks.push(TimeStrToken::TzName(substring.to_string()));
233                return Ok(toks);
234            }
235            // PG allows arbitrary punctuation marks, which represent delim
236            chr if chr.is_ascii_whitespace() || chr.is_ascii_punctuation() => {
237                parse_num(&mut toks, &num_buf, split_nums, i)?;
238                num_buf.clear();
239                toks.push(TimeStrToken::Delim);
240            }
241            chr => {
242                return Err(format!(
243                    "Error tokenizing timezone string ('{}'): invalid character {:?} at offset {}",
244                    value, chr, i
245                ));
246            }
247        }
248    }
249    parse_num(&mut toks, &num_buf, split_nums, 0)?;
250    Ok(toks)
251}
252
253#[derive(Debug, Clone, Copy)]
254pub enum TimezoneSpec {
255    /// Offsets should be treated as an ISO 8601 time zone specification.
256    Iso,
257    /// Offsets should be treated as a POSIX-style time zone specification.
258    Posix,
259}
260
261fn build_timezone_offset_second(
262    tokens: &[TimeStrToken],
263    value: &str,
264    spec: TimezoneSpec,
265) -> Result<Timezone, String> {
266    use TimeStrToken::*;
267    static ALL_FORMATS: [&[TimeStrToken]; 12] = [
268        &[Plus, Num(0, 1), Colon, Num(0, 1), Colon, Num(0, 1)],
269        &[Dash, Num(0, 1), Colon, Num(0, 1), Colon, Num(0, 1)],
270        &[Plus, Num(0, 1), Colon, Num(0, 1)],
271        &[Dash, Num(0, 1), Colon, Num(0, 1)],
272        &[Plus, Num(0, 1), Num(0, 1), Num(0, 1)],
273        &[Dash, Num(0, 1), Num(0, 1), Num(0, 1)],
274        &[Plus, Num(0, 1), Num(0, 1)],
275        &[Dash, Num(0, 1), Num(0, 1)],
276        &[Plus, Num(0, 1)],
277        &[Dash, Num(0, 1)],
278        &[TzName(String::new())],
279        &[Zulu],
280    ];
281
282    let mut is_positive = true;
283    let mut hour_offset: Option<i32> = None;
284    let mut minute_offset: Option<i32> = None;
285    let mut second_offset: Option<i32> = None;
286
287    for format in ALL_FORMATS {
288        let actual = tokens.iter();
289
290        if actual.len() != format.len() {
291            continue;
292        }
293
294        for (i, (atok, etok)) in actual.zip_eq(format).enumerate() {
295            match (atok, etok) {
296                (Colon, Colon) | (Plus, Plus) => { /* Matching punctuation */ }
297                (Dash, Dash) => {
298                    is_positive = false;
299                }
300                (Num(val, _), Num(_, _)) => {
301                    let val = *val;
302                    match (hour_offset, minute_offset, second_offset) {
303                        (None, None, None) => {
304                            // Postgres allows timezones in the range -15:59:59..15:59:59
305                            if val <= 15 {
306                                hour_offset = Some(i32::try_from(val).expect(
307                                    "number between 0 and 15 should fit in signed 32-bit integer",
308                                ));
309                            } else {
310                                return Err(format!(
311                                    "Invalid timezone string ({}): timezone hour invalid {}",
312                                    value, val
313                                ));
314                            }
315                        }
316                        (Some(_), None, None) => {
317                            if val < 60 {
318                                minute_offset = Some(i32::try_from(val).expect(
319                                    "number between 0 and 59 should fit in signed 32-bit integer",
320                                ));
321                            } else {
322                                return Err(format!(
323                                    "Invalid timezone string ({}): timezone minute invalid {}",
324                                    value, val
325                                ));
326                            }
327                        }
328                        (Some(_), Some(_), None) => {
329                            if val < 60 {
330                                second_offset = Some(i32::try_from(val).expect(
331                                    "number between 0 and 59 should fit in signed 32-bit integer",
332                                ));
333                            } else {
334                                return Err(format!(
335                                    "Invalid timezone string ({}): timezone second invalid {}",
336                                    value, val
337                                ));
338                            }
339                        }
340                        // We've already seen an hour a minute and a second so we should
341                        // never see another number
342                        (Some(_), Some(_), Some(_)) => {
343                            return Err(format!(
344                                "Invalid timezone string ({}): invalid value {} at token index {}",
345                                value, val, i
346                            ));
347                        }
348                        _ => unreachable!("parsed a minute before an hour!"),
349                    }
350                }
351                (Zulu, Zulu) => return Ok(Default::default()),
352                (TzName(val), TzName(_)) => {
353                    if let Some(abbrev) = TIMEZONE_ABBREVS.get(UncasedStr::new(val)) {
354                        return Ok(abbrev.timezone());
355                    }
356
357                    return match Tz::from_str_insensitive(val) {
358                        Ok(tz) => Ok(Timezone::Tz(tz)),
359                        Err(err) => Err(format!(
360                            "Invalid timezone string ({}): {}. \
361                            Failed to parse {} at token index {}",
362                            value, err, val, i
363                        )),
364                    };
365                }
366                (_, _) => {
367                    // Theres a mismatch between this format and the actual
368                    // token stream Stop trying to parse in this format and go
369                    // to the next one
370                    is_positive = true;
371                    hour_offset = None;
372                    minute_offset = None;
373                    second_offset = None;
374                    break;
375                }
376            }
377        }
378
379        // Return the first valid parsed result
380        if let Some(hour_offset) = hour_offset {
381            let mut tz_offset_second = hour_offset * 60 * 60;
382
383            if let Some(minute_offset) = minute_offset {
384                tz_offset_second += minute_offset * 60;
385            }
386
387            if let Some(second_offset) = second_offset {
388                tz_offset_second += second_offset;
389            }
390
391            let offset = match (is_positive, spec) {
392                (true, TimezoneSpec::Iso) | (false, TimezoneSpec::Posix) => {
393                    FixedOffset::east_opt(tz_offset_second).unwrap()
394                }
395                (false, TimezoneSpec::Iso) | (true, TimezoneSpec::Posix) => {
396                    FixedOffset::west_opt(tz_offset_second).unwrap()
397                }
398            };
399
400            return Ok(Timezone::FixedOffset(offset));
401        }
402    }
403
404    Err(format!("Cannot parse timezone offset {}", value))
405}
406
407#[cfg(test)]
408mod tests {
409    use super::*;
410
411    #[mz_ore::test]
412    fn test_parse_timezone_offset_second() {
413        use Timezone::{FixedOffset as F, Tz as T};
414        let test_cases = [
415            ("+0:00", F(FixedOffset::east_opt(0).unwrap())),
416            ("-0:00", F(FixedOffset::east_opt(0).unwrap())),
417            ("+0:000000", F(FixedOffset::east_opt(0).unwrap())),
418            ("+000000:00", F(FixedOffset::east_opt(0).unwrap())),
419            ("+000000:000000", F(FixedOffset::east_opt(0).unwrap())),
420            ("+0", F(FixedOffset::east_opt(0).unwrap())),
421            ("+00", F(FixedOffset::east_opt(0).unwrap())),
422            ("+000", F(FixedOffset::east_opt(0).unwrap())),
423            ("+0000", F(FixedOffset::east_opt(0).unwrap())),
424            ("+00000000", F(FixedOffset::east_opt(0).unwrap())),
425            ("+0000001:000000", F(FixedOffset::east_opt(3600).unwrap())),
426            ("+0000000:000001", F(FixedOffset::east_opt(60).unwrap())),
427            ("+0000001:000001", F(FixedOffset::east_opt(3660).unwrap())),
428            (
429                "+0000001:000001:000001",
430                F(FixedOffset::east_opt(3661).unwrap()),
431            ),
432            ("+4:00", F(FixedOffset::east_opt(14400).unwrap())),
433            ("-4:00", F(FixedOffset::west_opt(14400).unwrap())),
434            ("+2:30", F(FixedOffset::east_opt(9000).unwrap())),
435            ("-5:15", F(FixedOffset::west_opt(18900).unwrap())),
436            ("+0:20", F(FixedOffset::east_opt(1200).unwrap())),
437            ("-0:20", F(FixedOffset::west_opt(1200).unwrap())),
438            ("+0:0:20", F(FixedOffset::east_opt(20).unwrap())),
439            ("+5", F(FixedOffset::east_opt(18000).unwrap())),
440            ("-5", F(FixedOffset::west_opt(18000).unwrap())),
441            ("+05", F(FixedOffset::east_opt(18000).unwrap())),
442            ("-05", F(FixedOffset::west_opt(18000).unwrap())),
443            ("+500", F(FixedOffset::east_opt(18000).unwrap())),
444            ("-500", F(FixedOffset::west_opt(18000).unwrap())),
445            ("+530", F(FixedOffset::east_opt(19800).unwrap())),
446            ("-530", F(FixedOffset::west_opt(19800).unwrap())),
447            ("+050", F(FixedOffset::east_opt(3000).unwrap())),
448            ("-050", F(FixedOffset::west_opt(3000).unwrap())),
449            ("+15", F(FixedOffset::east_opt(54000).unwrap())),
450            ("-15", F(FixedOffset::west_opt(54000).unwrap())),
451            ("+1515", F(FixedOffset::east_opt(54900).unwrap())),
452            ("+15:15:15", F(FixedOffset::east_opt(54915).unwrap())),
453            ("+015", F(FixedOffset::east_opt(900).unwrap())),
454            ("-015", F(FixedOffset::west_opt(900).unwrap())),
455            ("+0015", F(FixedOffset::east_opt(900).unwrap())),
456            ("-0015", F(FixedOffset::west_opt(900).unwrap())),
457            ("+00015", F(FixedOffset::east_opt(900).unwrap())),
458            ("-00015", F(FixedOffset::west_opt(900).unwrap())),
459            ("+005", F(FixedOffset::east_opt(300).unwrap())),
460            ("-005", F(FixedOffset::west_opt(300).unwrap())),
461            ("+0000005", F(FixedOffset::east_opt(300).unwrap())),
462            ("+00000100", F(FixedOffset::east_opt(3600).unwrap())),
463            ("Z", F(FixedOffset::east_opt(0).unwrap())),
464            ("z", F(FixedOffset::east_opt(0).unwrap())),
465            ("UTC", F(FixedOffset::east_opt(0).unwrap())),
466            ("Pacific/Auckland", T(Tz::Pacific__Auckland)),
467            ("America/New_York", T(Tz::America__New_York)),
468            ("America/Los_Angeles", T(Tz::America__Los_Angeles)),
469            ("utc", F(FixedOffset::east_opt(0).unwrap())),
470            ("pAcIfIc/AUcKlAnD", T(Tz::Pacific__Auckland)),
471            ("AMERICA/NEW_YORK", T(Tz::America__New_York)),
472            ("america/los_angeles", T(Tz::America__Los_Angeles)),
473            // Formatting test cases
474            ("+5:", F(FixedOffset::east_opt(18000).unwrap())),
475            ("-5:15:", F(FixedOffset::west_opt(18900).unwrap())),
476            ("-   5:15:", F(FixedOffset::west_opt(18900).unwrap())),
477            (
478                " ! ? ! - 5:15 ? ! ? ",
479                F(FixedOffset::west_opt(18900).unwrap()),
480            ),
481            (" UTC", F(FixedOffset::east_opt(0).unwrap())),
482            (" UTC ", F(FixedOffset::east_opt(0).unwrap())),
483            (" ? UTC ! ", F(FixedOffset::east_opt(0).unwrap())),
484        ];
485
486        for (timezone, expected) in test_cases.iter() {
487            match Timezone::parse(timezone, TimezoneSpec::Iso) {
488                Ok(tz) => assert_eq!(&tz, expected),
489                Err(e) => panic!(
490                    "Test failed when expected to pass test case: {} error: {}",
491                    timezone, e
492                ),
493            }
494        }
495
496        let failure_test_cases = [
497            "+25:00", "+120:00", "+0:61", "+0:500", " 12:30", "+-12:30", "+2525", "+2561",
498            "+255900", "+25", "+5::30", "++5:00", "--5:00", "a", "zzz", "ZZZ", "ZZ Top", " +",
499            " -", " ", "1", "12", "1234", "+16", "-17", "-14:60", "1:30:60",
500        ];
501
502        for test in failure_test_cases.iter() {
503            match Timezone::parse(test, TimezoneSpec::Iso) {
504                Ok(t) => panic!(
505                    "Test passed when expected to fail test case: {} parsed tz offset (seconds): {}",
506                    test, t
507                ),
508                Err(e) => println!("{}", e),
509            }
510        }
511    }
512}