1use std::cmp::Ordering;
11use std::fmt;
12
13use chrono::FixedOffset;
14use chrono_tz::Tz;
15use itertools::Itertools;
16use mz_lowertest::MzReflect;
17use serde::{Deserialize, Serialize};
18use uncased::UncasedStr;
19
20use crate::abbrev::TIMEZONE_ABBREVS;
21
22pub const MZ_CATALOG_TIMEZONE_NAMES_SQL: &str =
24 include_str!(concat!(env!("OUT_DIR"), "/timezone.gen.sql"));
25
26#[derive(
28 Debug,
29 Copy,
30 Clone,
31 PartialEq,
32 Eq,
33 Hash,
34 Serialize,
35 Deserialize,
36 MzReflect
37)]
38pub enum Timezone {
39 #[serde(with = "fixed_offset_serde")]
40 FixedOffset(FixedOffset),
41 Tz(Tz),
42}
43
44impl Timezone {
45 pub fn parse(tz: &str, spec: TimezoneSpec) -> Result<Self, String> {
46 build_timezone_offset_second(&tokenize_timezone(tz)?, tz, spec)
47 }
48}
49
50mod fixed_offset_serde {
53 use serde::de::Error;
54 use serde::{Deserializer, Serializer};
55
56 use super::*;
57
58 pub fn deserialize<'de, D: Deserializer<'de>>(
59 deserializer: D,
60 ) -> Result<FixedOffset, D::Error> {
61 let offset = i32::deserialize(deserializer)?;
62 FixedOffset::east_opt(offset).ok_or_else(|| {
63 Error::custom(format!("Invalid timezone offset: |{}| >= 86_400", offset))
64 })
65 }
66
67 pub fn serialize<S: Serializer>(
68 offset: &FixedOffset,
69 serializer: S,
70 ) -> Result<S::Ok, S::Error> {
71 serializer.serialize_i32(offset.local_minus_utc())
72 }
73}
74
75impl PartialOrd for Timezone {
76 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
77 Some(self.cmp(other))
78 }
79}
80
81impl Ord for Timezone {
85 fn cmp(&self, other: &Self) -> Ordering {
86 use Timezone::*;
87 match (self, other) {
88 (FixedOffset(a), FixedOffset(b)) => a.local_minus_utc().cmp(&b.local_minus_utc()),
89 (Tz(a), Tz(b)) => a.name().cmp(b.name()),
90 (FixedOffset(_), Tz(_)) => Ordering::Less,
91 (Tz(_), FixedOffset(_)) => Ordering::Greater,
92 }
93 }
94}
95
96impl Default for Timezone {
97 fn default() -> Self {
98 Self::FixedOffset(FixedOffset::east_opt(0).unwrap())
99 }
100}
101
102impl fmt::Display for Timezone {
103 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
104 match self {
105 Timezone::FixedOffset(offset) => offset.fmt(f),
106 Timezone::Tz(tz) => tz.fmt(f),
107 }
108 }
109}
110
111#[derive(Debug, Clone, PartialEq, Eq)]
114enum TimeStrToken {
115 Dash,
116 Colon,
117 Plus,
118 Zulu,
119 Num(u64, usize),
120 TzName(String),
121 Delim,
122}
123
124impl std::fmt::Display for TimeStrToken {
125 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
126 use TimeStrToken::*;
127 match self {
128 Dash => write!(f, "-"),
129 Colon => write!(f, ":"),
130 Plus => write!(f, "+"),
131 Zulu => write!(f, "Z"),
132 Num(i, digits) => write!(f, "{:01$}", i, digits - 1),
133 TzName(n) => write!(f, "{}", n),
134 Delim => write!(f, " "),
135 }
136 }
137}
138
139fn tokenize_timezone(value: &str) -> Result<Vec<TimeStrToken>, String> {
140 let mut toks: Vec<TimeStrToken> = vec![];
141 let mut num_buf = String::with_capacity(4);
142 let split_nums: bool = !value.contains(':');
145
146 let value = value.trim_matches(|c: char| {
147 (c.is_ascii_whitespace() || c.is_ascii_punctuation()) && (c != '+' && c != '-')
148 });
149
150 fn parse_num(
153 toks: &mut Vec<TimeStrToken>,
154 n: &str,
155 split_nums: bool,
156 idx: usize,
157 ) -> Result<(), String> {
158 if n.is_empty() {
159 return Ok(());
160 }
161
162 let (first, second) = if n.len() > 2 && split_nums {
163 let (first, second) = n.split_at(n.len() - 2);
164 (first, Some(second))
165 } else {
166 (n, None)
167 };
168
169 toks.push(TimeStrToken::Num(
170 first.parse().map_err(|e| {
171 format!(
172 "Unable to tokenize value {} as a number at index {}: {}",
173 first, idx, e
174 )
175 })?,
176 first.len(),
177 ));
178
179 if let Some(second) = second {
180 toks.push(TimeStrToken::Num(
181 second.parse().map_err(|e| {
182 format!(
183 "Unable to tokenize value {} as a number at index {}: {}",
184 second, idx, e
185 )
186 })?,
187 second.len(),
188 ));
189 }
190
191 Ok(())
192 }
193
194 let mut space_skip_mode = false;
197 for (i, chr) in value.char_indices() {
198 if space_skip_mode && chr.is_ascii_whitespace() {
200 continue;
201 } else {
202 space_skip_mode = false;
203 }
204
205 match chr {
206 ':' => {
207 parse_num(&mut toks, &num_buf, split_nums, i)?;
208 num_buf.clear();
209 toks.push(TimeStrToken::Colon);
210 }
211 '-' => {
212 parse_num(&mut toks, &num_buf, split_nums, i)?;
213 num_buf.clear();
214 toks.push(TimeStrToken::Dash);
215 space_skip_mode = true;
216 }
217 '+' => {
218 parse_num(&mut toks, &num_buf, split_nums, i)?;
219 num_buf.clear();
220 toks.push(TimeStrToken::Plus);
221 space_skip_mode = true;
222 }
223 chr if (chr == 'z' || chr == 'Z') && (i == value.len() - 1) => {
224 parse_num(&mut toks, &num_buf, split_nums, i)?;
225 num_buf.clear();
226 toks.push(TimeStrToken::Zulu);
227 }
228 chr if chr.is_digit(10) => num_buf.push(chr),
229 chr if chr.is_ascii_alphabetic() => {
230 parse_num(&mut toks, &num_buf, split_nums, i)?;
231 let substring = &value[i..];
232 toks.push(TimeStrToken::TzName(substring.to_string()));
233 return Ok(toks);
234 }
235 chr if chr.is_ascii_whitespace() || chr.is_ascii_punctuation() => {
237 parse_num(&mut toks, &num_buf, split_nums, i)?;
238 num_buf.clear();
239 toks.push(TimeStrToken::Delim);
240 }
241 chr => {
242 return Err(format!(
243 "Error tokenizing timezone string ('{}'): invalid character {:?} at offset {}",
244 value, chr, i
245 ));
246 }
247 }
248 }
249 parse_num(&mut toks, &num_buf, split_nums, 0)?;
250 Ok(toks)
251}
252
253#[derive(Debug, Clone, Copy)]
254pub enum TimezoneSpec {
255 Iso,
257 Posix,
259}
260
261fn build_timezone_offset_second(
262 tokens: &[TimeStrToken],
263 value: &str,
264 spec: TimezoneSpec,
265) -> Result<Timezone, String> {
266 use TimeStrToken::*;
267 static ALL_FORMATS: [&[TimeStrToken]; 12] = [
268 &[Plus, Num(0, 1), Colon, Num(0, 1), Colon, Num(0, 1)],
269 &[Dash, Num(0, 1), Colon, Num(0, 1), Colon, Num(0, 1)],
270 &[Plus, Num(0, 1), Colon, Num(0, 1)],
271 &[Dash, Num(0, 1), Colon, Num(0, 1)],
272 &[Plus, Num(0, 1), Num(0, 1), Num(0, 1)],
273 &[Dash, Num(0, 1), Num(0, 1), Num(0, 1)],
274 &[Plus, Num(0, 1), Num(0, 1)],
275 &[Dash, Num(0, 1), Num(0, 1)],
276 &[Plus, Num(0, 1)],
277 &[Dash, Num(0, 1)],
278 &[TzName(String::new())],
279 &[Zulu],
280 ];
281
282 let mut is_positive = true;
283 let mut hour_offset: Option<i32> = None;
284 let mut minute_offset: Option<i32> = None;
285 let mut second_offset: Option<i32> = None;
286
287 for format in ALL_FORMATS {
288 let actual = tokens.iter();
289
290 if actual.len() != format.len() {
291 continue;
292 }
293
294 for (i, (atok, etok)) in actual.zip_eq(format).enumerate() {
295 match (atok, etok) {
296 (Colon, Colon) | (Plus, Plus) => { }
297 (Dash, Dash) => {
298 is_positive = false;
299 }
300 (Num(val, _), Num(_, _)) => {
301 let val = *val;
302 match (hour_offset, minute_offset, second_offset) {
303 (None, None, None) => {
304 if val <= 15 {
306 hour_offset = Some(i32::try_from(val).expect(
307 "number between 0 and 15 should fit in signed 32-bit integer",
308 ));
309 } else {
310 return Err(format!(
311 "Invalid timezone string ({}): timezone hour invalid {}",
312 value, val
313 ));
314 }
315 }
316 (Some(_), None, None) => {
317 if val < 60 {
318 minute_offset = Some(i32::try_from(val).expect(
319 "number between 0 and 59 should fit in signed 32-bit integer",
320 ));
321 } else {
322 return Err(format!(
323 "Invalid timezone string ({}): timezone minute invalid {}",
324 value, val
325 ));
326 }
327 }
328 (Some(_), Some(_), None) => {
329 if val < 60 {
330 second_offset = Some(i32::try_from(val).expect(
331 "number between 0 and 59 should fit in signed 32-bit integer",
332 ));
333 } else {
334 return Err(format!(
335 "Invalid timezone string ({}): timezone second invalid {}",
336 value, val
337 ));
338 }
339 }
340 (Some(_), Some(_), Some(_)) => {
343 return Err(format!(
344 "Invalid timezone string ({}): invalid value {} at token index {}",
345 value, val, i
346 ));
347 }
348 _ => unreachable!("parsed a minute before an hour!"),
349 }
350 }
351 (Zulu, Zulu) => return Ok(Default::default()),
352 (TzName(val), TzName(_)) => {
353 if let Some(abbrev) = TIMEZONE_ABBREVS.get(UncasedStr::new(val)) {
354 return Ok(abbrev.timezone());
355 }
356
357 return match Tz::from_str_insensitive(val) {
358 Ok(tz) => Ok(Timezone::Tz(tz)),
359 Err(err) => Err(format!(
360 "Invalid timezone string ({}): {}. \
361 Failed to parse {} at token index {}",
362 value, err, val, i
363 )),
364 };
365 }
366 (_, _) => {
367 is_positive = true;
371 hour_offset = None;
372 minute_offset = None;
373 second_offset = None;
374 break;
375 }
376 }
377 }
378
379 if let Some(hour_offset) = hour_offset {
381 let mut tz_offset_second = hour_offset * 60 * 60;
382
383 if let Some(minute_offset) = minute_offset {
384 tz_offset_second += minute_offset * 60;
385 }
386
387 if let Some(second_offset) = second_offset {
388 tz_offset_second += second_offset;
389 }
390
391 let offset = match (is_positive, spec) {
392 (true, TimezoneSpec::Iso) | (false, TimezoneSpec::Posix) => {
393 FixedOffset::east_opt(tz_offset_second).unwrap()
394 }
395 (false, TimezoneSpec::Iso) | (true, TimezoneSpec::Posix) => {
396 FixedOffset::west_opt(tz_offset_second).unwrap()
397 }
398 };
399
400 return Ok(Timezone::FixedOffset(offset));
401 }
402 }
403
404 Err(format!("Cannot parse timezone offset {}", value))
405}
406
407#[cfg(test)]
408mod tests {
409 use super::*;
410
411 #[mz_ore::test]
412 fn test_parse_timezone_offset_second() {
413 use Timezone::{FixedOffset as F, Tz as T};
414 let test_cases = [
415 ("+0:00", F(FixedOffset::east_opt(0).unwrap())),
416 ("-0:00", F(FixedOffset::east_opt(0).unwrap())),
417 ("+0:000000", F(FixedOffset::east_opt(0).unwrap())),
418 ("+000000:00", F(FixedOffset::east_opt(0).unwrap())),
419 ("+000000:000000", F(FixedOffset::east_opt(0).unwrap())),
420 ("+0", F(FixedOffset::east_opt(0).unwrap())),
421 ("+00", F(FixedOffset::east_opt(0).unwrap())),
422 ("+000", F(FixedOffset::east_opt(0).unwrap())),
423 ("+0000", F(FixedOffset::east_opt(0).unwrap())),
424 ("+00000000", F(FixedOffset::east_opt(0).unwrap())),
425 ("+0000001:000000", F(FixedOffset::east_opt(3600).unwrap())),
426 ("+0000000:000001", F(FixedOffset::east_opt(60).unwrap())),
427 ("+0000001:000001", F(FixedOffset::east_opt(3660).unwrap())),
428 (
429 "+0000001:000001:000001",
430 F(FixedOffset::east_opt(3661).unwrap()),
431 ),
432 ("+4:00", F(FixedOffset::east_opt(14400).unwrap())),
433 ("-4:00", F(FixedOffset::west_opt(14400).unwrap())),
434 ("+2:30", F(FixedOffset::east_opt(9000).unwrap())),
435 ("-5:15", F(FixedOffset::west_opt(18900).unwrap())),
436 ("+0:20", F(FixedOffset::east_opt(1200).unwrap())),
437 ("-0:20", F(FixedOffset::west_opt(1200).unwrap())),
438 ("+0:0:20", F(FixedOffset::east_opt(20).unwrap())),
439 ("+5", F(FixedOffset::east_opt(18000).unwrap())),
440 ("-5", F(FixedOffset::west_opt(18000).unwrap())),
441 ("+05", F(FixedOffset::east_opt(18000).unwrap())),
442 ("-05", F(FixedOffset::west_opt(18000).unwrap())),
443 ("+500", F(FixedOffset::east_opt(18000).unwrap())),
444 ("-500", F(FixedOffset::west_opt(18000).unwrap())),
445 ("+530", F(FixedOffset::east_opt(19800).unwrap())),
446 ("-530", F(FixedOffset::west_opt(19800).unwrap())),
447 ("+050", F(FixedOffset::east_opt(3000).unwrap())),
448 ("-050", F(FixedOffset::west_opt(3000).unwrap())),
449 ("+15", F(FixedOffset::east_opt(54000).unwrap())),
450 ("-15", F(FixedOffset::west_opt(54000).unwrap())),
451 ("+1515", F(FixedOffset::east_opt(54900).unwrap())),
452 ("+15:15:15", F(FixedOffset::east_opt(54915).unwrap())),
453 ("+015", F(FixedOffset::east_opt(900).unwrap())),
454 ("-015", F(FixedOffset::west_opt(900).unwrap())),
455 ("+0015", F(FixedOffset::east_opt(900).unwrap())),
456 ("-0015", F(FixedOffset::west_opt(900).unwrap())),
457 ("+00015", F(FixedOffset::east_opt(900).unwrap())),
458 ("-00015", F(FixedOffset::west_opt(900).unwrap())),
459 ("+005", F(FixedOffset::east_opt(300).unwrap())),
460 ("-005", F(FixedOffset::west_opt(300).unwrap())),
461 ("+0000005", F(FixedOffset::east_opt(300).unwrap())),
462 ("+00000100", F(FixedOffset::east_opt(3600).unwrap())),
463 ("Z", F(FixedOffset::east_opt(0).unwrap())),
464 ("z", F(FixedOffset::east_opt(0).unwrap())),
465 ("UTC", F(FixedOffset::east_opt(0).unwrap())),
466 ("Pacific/Auckland", T(Tz::Pacific__Auckland)),
467 ("America/New_York", T(Tz::America__New_York)),
468 ("America/Los_Angeles", T(Tz::America__Los_Angeles)),
469 ("utc", F(FixedOffset::east_opt(0).unwrap())),
470 ("pAcIfIc/AUcKlAnD", T(Tz::Pacific__Auckland)),
471 ("AMERICA/NEW_YORK", T(Tz::America__New_York)),
472 ("america/los_angeles", T(Tz::America__Los_Angeles)),
473 ("+5:", F(FixedOffset::east_opt(18000).unwrap())),
475 ("-5:15:", F(FixedOffset::west_opt(18900).unwrap())),
476 ("- 5:15:", F(FixedOffset::west_opt(18900).unwrap())),
477 (
478 " ! ? ! - 5:15 ? ! ? ",
479 F(FixedOffset::west_opt(18900).unwrap()),
480 ),
481 (" UTC", F(FixedOffset::east_opt(0).unwrap())),
482 (" UTC ", F(FixedOffset::east_opt(0).unwrap())),
483 (" ? UTC ! ", F(FixedOffset::east_opt(0).unwrap())),
484 ];
485
486 for (timezone, expected) in test_cases.iter() {
487 match Timezone::parse(timezone, TimezoneSpec::Iso) {
488 Ok(tz) => assert_eq!(&tz, expected),
489 Err(e) => panic!(
490 "Test failed when expected to pass test case: {} error: {}",
491 timezone, e
492 ),
493 }
494 }
495
496 let failure_test_cases = [
497 "+25:00", "+120:00", "+0:61", "+0:500", " 12:30", "+-12:30", "+2525", "+2561",
498 "+255900", "+25", "+5::30", "++5:00", "--5:00", "a", "zzz", "ZZZ", "ZZ Top", " +",
499 " -", " ", "1", "12", "1234", "+16", "-17", "-14:60", "1:30:60",
500 ];
501
502 for test in failure_test_cases.iter() {
503 match Timezone::parse(test, TimezoneSpec::Iso) {
504 Ok(t) => panic!(
505 "Test passed when expected to fail test case: {} parsed tz offset (seconds): {}",
506 test, t
507 ),
508 Err(e) => println!("{}", e),
509 }
510 }
511 }
512}