1use std::cmp::Ordering;
11use std::fmt;
12
13use chrono::FixedOffset;
14use chrono_tz::Tz;
15use itertools::Itertools;
16use mz_lowertest::MzReflect;
17use serde::{Deserialize, Serialize};
18use uncased::UncasedStr;
19
20use crate::abbrev::TIMEZONE_ABBREVS;
21
22pub const MZ_CATALOG_TIMEZONE_NAMES_SQL: &str =
24 include_str!(concat!(env!("OUT_DIR"), "/timezone.gen.sql"));
25
26#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, MzReflect)]
28pub enum Timezone {
29 #[serde(with = "fixed_offset_serde")]
30 FixedOffset(FixedOffset),
31 Tz(Tz),
32}
33
34impl Timezone {
35 pub fn parse(tz: &str, spec: TimezoneSpec) -> Result<Self, String> {
36 build_timezone_offset_second(&tokenize_timezone(tz)?, tz, spec)
37 }
38}
39
40mod fixed_offset_serde {
43 use serde::de::Error;
44 use serde::{Deserializer, Serializer};
45
46 use super::*;
47
48 pub fn deserialize<'de, D: Deserializer<'de>>(
49 deserializer: D,
50 ) -> Result<FixedOffset, D::Error> {
51 let offset = i32::deserialize(deserializer)?;
52 FixedOffset::east_opt(offset).ok_or_else(|| {
53 Error::custom(format!("Invalid timezone offset: |{}| >= 86_400", offset))
54 })
55 }
56
57 pub fn serialize<S: Serializer>(
58 offset: &FixedOffset,
59 serializer: S,
60 ) -> Result<S::Ok, S::Error> {
61 serializer.serialize_i32(offset.local_minus_utc())
62 }
63}
64
65impl PartialOrd for Timezone {
66 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
67 Some(self.cmp(other))
68 }
69}
70
71impl Ord for Timezone {
75 fn cmp(&self, other: &Self) -> Ordering {
76 use Timezone::*;
77 match (self, other) {
78 (FixedOffset(a), FixedOffset(b)) => a.local_minus_utc().cmp(&b.local_minus_utc()),
79 (Tz(a), Tz(b)) => a.name().cmp(b.name()),
80 (FixedOffset(_), Tz(_)) => Ordering::Less,
81 (Tz(_), FixedOffset(_)) => Ordering::Greater,
82 }
83 }
84}
85
86impl Default for Timezone {
87 fn default() -> Self {
88 Self::FixedOffset(FixedOffset::east_opt(0).unwrap())
89 }
90}
91
92impl fmt::Display for Timezone {
93 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
94 match self {
95 Timezone::FixedOffset(offset) => offset.fmt(f),
96 Timezone::Tz(tz) => tz.fmt(f),
97 }
98 }
99}
100
101#[derive(Debug, Clone, PartialEq, Eq)]
104enum TimeStrToken {
105 Dash,
106 Colon,
107 Plus,
108 Zulu,
109 Num(u64, usize),
110 TzName(String),
111 Delim,
112}
113
114impl std::fmt::Display for TimeStrToken {
115 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
116 use TimeStrToken::*;
117 match self {
118 Dash => write!(f, "-"),
119 Colon => write!(f, ":"),
120 Plus => write!(f, "+"),
121 Zulu => write!(f, "Z"),
122 Num(i, digits) => write!(f, "{:01$}", i, digits - 1),
123 TzName(n) => write!(f, "{}", n),
124 Delim => write!(f, " "),
125 }
126 }
127}
128
129fn tokenize_timezone(value: &str) -> Result<Vec<TimeStrToken>, String> {
130 let mut toks: Vec<TimeStrToken> = vec![];
131 let mut num_buf = String::with_capacity(4);
132 let split_nums: bool = !value.contains(':');
135
136 let value = value.trim_matches(|c: char| {
137 (c.is_ascii_whitespace() || c.is_ascii_punctuation()) && (c != '+' && c != '-')
138 });
139
140 fn parse_num(
143 toks: &mut Vec<TimeStrToken>,
144 n: &str,
145 split_nums: bool,
146 idx: usize,
147 ) -> Result<(), String> {
148 if n.is_empty() {
149 return Ok(());
150 }
151
152 let (first, second) = if n.len() > 2 && split_nums {
153 let (first, second) = n.split_at(n.len() - 2);
154 (first, Some(second))
155 } else {
156 (n, None)
157 };
158
159 toks.push(TimeStrToken::Num(
160 first.parse().map_err(|e| {
161 format!(
162 "Unable to tokenize value {} as a number at index {}: {}",
163 first, idx, e
164 )
165 })?,
166 first.len(),
167 ));
168
169 if let Some(second) = second {
170 toks.push(TimeStrToken::Num(
171 second.parse().map_err(|e| {
172 format!(
173 "Unable to tokenize value {} as a number at index {}: {}",
174 second, idx, e
175 )
176 })?,
177 second.len(),
178 ));
179 }
180
181 Ok(())
182 }
183
184 let mut space_skip_mode = false;
187 for (i, chr) in value.char_indices() {
188 if space_skip_mode && chr.is_ascii_whitespace() {
190 continue;
191 } else {
192 space_skip_mode = false;
193 }
194
195 match chr {
196 ':' => {
197 parse_num(&mut toks, &num_buf, split_nums, i)?;
198 num_buf.clear();
199 toks.push(TimeStrToken::Colon);
200 }
201 '-' => {
202 parse_num(&mut toks, &num_buf, split_nums, i)?;
203 num_buf.clear();
204 toks.push(TimeStrToken::Dash);
205 space_skip_mode = true;
206 }
207 '+' => {
208 parse_num(&mut toks, &num_buf, split_nums, i)?;
209 num_buf.clear();
210 toks.push(TimeStrToken::Plus);
211 space_skip_mode = true;
212 }
213 chr if (chr == 'z' || chr == 'Z') && (i == value.len() - 1) => {
214 parse_num(&mut toks, &num_buf, split_nums, i)?;
215 num_buf.clear();
216 toks.push(TimeStrToken::Zulu);
217 }
218 chr if chr.is_digit(10) => num_buf.push(chr),
219 chr if chr.is_ascii_alphabetic() => {
220 parse_num(&mut toks, &num_buf, split_nums, i)?;
221 let substring = &value[i..];
222 toks.push(TimeStrToken::TzName(substring.to_string()));
223 return Ok(toks);
224 }
225 chr if chr.is_ascii_whitespace() || chr.is_ascii_punctuation() => {
227 parse_num(&mut toks, &num_buf, split_nums, i)?;
228 num_buf.clear();
229 toks.push(TimeStrToken::Delim);
230 }
231 chr => {
232 return Err(format!(
233 "Error tokenizing timezone string ('{}'): invalid character {:?} at offset {}",
234 value, chr, i
235 ));
236 }
237 }
238 }
239 parse_num(&mut toks, &num_buf, split_nums, 0)?;
240 Ok(toks)
241}
242
243#[derive(Debug, Clone, Copy)]
244pub enum TimezoneSpec {
245 Iso,
247 Posix,
249}
250
251fn build_timezone_offset_second(
252 tokens: &[TimeStrToken],
253 value: &str,
254 spec: TimezoneSpec,
255) -> Result<Timezone, String> {
256 use TimeStrToken::*;
257 static ALL_FORMATS: [&[TimeStrToken]; 12] = [
258 &[Plus, Num(0, 1), Colon, Num(0, 1), Colon, Num(0, 1)],
259 &[Dash, Num(0, 1), Colon, Num(0, 1), Colon, Num(0, 1)],
260 &[Plus, Num(0, 1), Colon, Num(0, 1)],
261 &[Dash, Num(0, 1), Colon, Num(0, 1)],
262 &[Plus, Num(0, 1), Num(0, 1), Num(0, 1)],
263 &[Dash, Num(0, 1), Num(0, 1), Num(0, 1)],
264 &[Plus, Num(0, 1), Num(0, 1)],
265 &[Dash, Num(0, 1), Num(0, 1)],
266 &[Plus, Num(0, 1)],
267 &[Dash, Num(0, 1)],
268 &[TzName(String::new())],
269 &[Zulu],
270 ];
271
272 let mut is_positive = true;
273 let mut hour_offset: Option<i32> = None;
274 let mut minute_offset: Option<i32> = None;
275 let mut second_offset: Option<i32> = None;
276
277 for format in ALL_FORMATS {
278 let actual = tokens.iter();
279
280 if actual.len() != format.len() {
281 continue;
282 }
283
284 for (i, (atok, etok)) in actual.zip_eq(format).enumerate() {
285 match (atok, etok) {
286 (Colon, Colon) | (Plus, Plus) => { }
287 (Dash, Dash) => {
288 is_positive = false;
289 }
290 (Num(val, _), Num(_, _)) => {
291 let val = *val;
292 match (hour_offset, minute_offset, second_offset) {
293 (None, None, None) => {
294 if val <= 15 {
296 hour_offset = Some(i32::try_from(val).expect(
297 "number between 0 and 15 should fit in signed 32-bit integer",
298 ));
299 } else {
300 return Err(format!(
301 "Invalid timezone string ({}): timezone hour invalid {}",
302 value, val
303 ));
304 }
305 }
306 (Some(_), None, None) => {
307 if val < 60 {
308 minute_offset = Some(i32::try_from(val).expect(
309 "number between 0 and 59 should fit in signed 32-bit integer",
310 ));
311 } else {
312 return Err(format!(
313 "Invalid timezone string ({}): timezone minute invalid {}",
314 value, val
315 ));
316 }
317 }
318 (Some(_), Some(_), None) => {
319 if val < 60 {
320 second_offset = Some(i32::try_from(val).expect(
321 "number between 0 and 59 should fit in signed 32-bit integer",
322 ));
323 } else {
324 return Err(format!(
325 "Invalid timezone string ({}): timezone second invalid {}",
326 value, val
327 ));
328 }
329 }
330 (Some(_), Some(_), Some(_)) => {
333 return Err(format!(
334 "Invalid timezone string ({}): invalid value {} at token index {}",
335 value, val, i
336 ));
337 }
338 _ => unreachable!("parsed a minute before an hour!"),
339 }
340 }
341 (Zulu, Zulu) => return Ok(Default::default()),
342 (TzName(val), TzName(_)) => {
343 if let Some(abbrev) = TIMEZONE_ABBREVS.get(UncasedStr::new(val)) {
344 return Ok(abbrev.timezone());
345 }
346
347 return match Tz::from_str_insensitive(val) {
348 Ok(tz) => Ok(Timezone::Tz(tz)),
349 Err(err) => Err(format!(
350 "Invalid timezone string ({}): {}. \
351 Failed to parse {} at token index {}",
352 value, err, val, i
353 )),
354 };
355 }
356 (_, _) => {
357 is_positive = true;
361 hour_offset = None;
362 minute_offset = None;
363 second_offset = None;
364 break;
365 }
366 }
367 }
368
369 if let Some(hour_offset) = hour_offset {
371 let mut tz_offset_second = hour_offset * 60 * 60;
372
373 if let Some(minute_offset) = minute_offset {
374 tz_offset_second += minute_offset * 60;
375 }
376
377 if let Some(second_offset) = second_offset {
378 tz_offset_second += second_offset;
379 }
380
381 let offset = match (is_positive, spec) {
382 (true, TimezoneSpec::Iso) | (false, TimezoneSpec::Posix) => {
383 FixedOffset::east_opt(tz_offset_second).unwrap()
384 }
385 (false, TimezoneSpec::Iso) | (true, TimezoneSpec::Posix) => {
386 FixedOffset::west_opt(tz_offset_second).unwrap()
387 }
388 };
389
390 return Ok(Timezone::FixedOffset(offset));
391 }
392 }
393
394 Err(format!("Cannot parse timezone offset {}", value))
395}
396
397#[cfg(test)]
398mod tests {
399 use super::*;
400
401 #[mz_ore::test]
402 fn test_parse_timezone_offset_second() {
403 use Timezone::{FixedOffset as F, Tz as T};
404 let test_cases = [
405 ("+0:00", F(FixedOffset::east_opt(0).unwrap())),
406 ("-0:00", F(FixedOffset::east_opt(0).unwrap())),
407 ("+0:000000", F(FixedOffset::east_opt(0).unwrap())),
408 ("+000000:00", F(FixedOffset::east_opt(0).unwrap())),
409 ("+000000:000000", F(FixedOffset::east_opt(0).unwrap())),
410 ("+0", F(FixedOffset::east_opt(0).unwrap())),
411 ("+00", F(FixedOffset::east_opt(0).unwrap())),
412 ("+000", F(FixedOffset::east_opt(0).unwrap())),
413 ("+0000", F(FixedOffset::east_opt(0).unwrap())),
414 ("+00000000", F(FixedOffset::east_opt(0).unwrap())),
415 ("+0000001:000000", F(FixedOffset::east_opt(3600).unwrap())),
416 ("+0000000:000001", F(FixedOffset::east_opt(60).unwrap())),
417 ("+0000001:000001", F(FixedOffset::east_opt(3660).unwrap())),
418 (
419 "+0000001:000001:000001",
420 F(FixedOffset::east_opt(3661).unwrap()),
421 ),
422 ("+4:00", F(FixedOffset::east_opt(14400).unwrap())),
423 ("-4:00", F(FixedOffset::west_opt(14400).unwrap())),
424 ("+2:30", F(FixedOffset::east_opt(9000).unwrap())),
425 ("-5:15", F(FixedOffset::west_opt(18900).unwrap())),
426 ("+0:20", F(FixedOffset::east_opt(1200).unwrap())),
427 ("-0:20", F(FixedOffset::west_opt(1200).unwrap())),
428 ("+0:0:20", F(FixedOffset::east_opt(20).unwrap())),
429 ("+5", F(FixedOffset::east_opt(18000).unwrap())),
430 ("-5", F(FixedOffset::west_opt(18000).unwrap())),
431 ("+05", F(FixedOffset::east_opt(18000).unwrap())),
432 ("-05", F(FixedOffset::west_opt(18000).unwrap())),
433 ("+500", F(FixedOffset::east_opt(18000).unwrap())),
434 ("-500", F(FixedOffset::west_opt(18000).unwrap())),
435 ("+530", F(FixedOffset::east_opt(19800).unwrap())),
436 ("-530", F(FixedOffset::west_opt(19800).unwrap())),
437 ("+050", F(FixedOffset::east_opt(3000).unwrap())),
438 ("-050", F(FixedOffset::west_opt(3000).unwrap())),
439 ("+15", F(FixedOffset::east_opt(54000).unwrap())),
440 ("-15", F(FixedOffset::west_opt(54000).unwrap())),
441 ("+1515", F(FixedOffset::east_opt(54900).unwrap())),
442 ("+15:15:15", F(FixedOffset::east_opt(54915).unwrap())),
443 ("+015", F(FixedOffset::east_opt(900).unwrap())),
444 ("-015", F(FixedOffset::west_opt(900).unwrap())),
445 ("+0015", F(FixedOffset::east_opt(900).unwrap())),
446 ("-0015", F(FixedOffset::west_opt(900).unwrap())),
447 ("+00015", F(FixedOffset::east_opt(900).unwrap())),
448 ("-00015", F(FixedOffset::west_opt(900).unwrap())),
449 ("+005", F(FixedOffset::east_opt(300).unwrap())),
450 ("-005", F(FixedOffset::west_opt(300).unwrap())),
451 ("+0000005", F(FixedOffset::east_opt(300).unwrap())),
452 ("+00000100", F(FixedOffset::east_opt(3600).unwrap())),
453 ("Z", F(FixedOffset::east_opt(0).unwrap())),
454 ("z", F(FixedOffset::east_opt(0).unwrap())),
455 ("UTC", F(FixedOffset::east_opt(0).unwrap())),
456 ("Pacific/Auckland", T(Tz::Pacific__Auckland)),
457 ("America/New_York", T(Tz::America__New_York)),
458 ("America/Los_Angeles", T(Tz::America__Los_Angeles)),
459 ("utc", F(FixedOffset::east_opt(0).unwrap())),
460 ("pAcIfIc/AUcKlAnD", T(Tz::Pacific__Auckland)),
461 ("AMERICA/NEW_YORK", T(Tz::America__New_York)),
462 ("america/los_angeles", T(Tz::America__Los_Angeles)),
463 ("+5:", F(FixedOffset::east_opt(18000).unwrap())),
465 ("-5:15:", F(FixedOffset::west_opt(18900).unwrap())),
466 ("- 5:15:", F(FixedOffset::west_opt(18900).unwrap())),
467 (
468 " ! ? ! - 5:15 ? ! ? ",
469 F(FixedOffset::west_opt(18900).unwrap()),
470 ),
471 (" UTC", F(FixedOffset::east_opt(0).unwrap())),
472 (" UTC ", F(FixedOffset::east_opt(0).unwrap())),
473 (" ? UTC ! ", F(FixedOffset::east_opt(0).unwrap())),
474 ];
475
476 for (timezone, expected) in test_cases.iter() {
477 match Timezone::parse(timezone, TimezoneSpec::Iso) {
478 Ok(tz) => assert_eq!(&tz, expected),
479 Err(e) => panic!(
480 "Test failed when expected to pass test case: {} error: {}",
481 timezone, e
482 ),
483 }
484 }
485
486 let failure_test_cases = [
487 "+25:00", "+120:00", "+0:61", "+0:500", " 12:30", "+-12:30", "+2525", "+2561",
488 "+255900", "+25", "+5::30", "++5:00", "--5:00", "a", "zzz", "ZZZ", "ZZ Top", " +",
489 " -", " ", "1", "12", "1234", "+16", "-17", "-14:60", "1:30:60",
490 ];
491
492 for test in failure_test_cases.iter() {
493 match Timezone::parse(test, TimezoneSpec::Iso) {
494 Ok(t) => panic!(
495 "Test passed when expected to fail test case: {} parsed tz offset (seconds): {}",
496 test, t
497 ),
498 Err(e) => println!("{}", e),
499 }
500 }
501 }
502}