tz/parse/
tz_file.rs

1//! Functions used for parsing a TZif file.
2
3use super::tz_string::parse_posix_tz;
4use crate::error::{TzError, TzFileError};
5use crate::timezone::*;
6use crate::utils::*;
7
8use std::convert::TryInto;
9use std::fs::File;
10use std::io;
11use std::iter;
12use std::str;
13
14/// TZif version
15#[derive(Debug, Copy, Clone, Eq, PartialEq)]
16enum Version {
17    /// Version 1
18    V1,
19    /// Version 2
20    V2,
21    /// Version 3
22    V3,
23}
24
25/// TZif header
26#[derive(Debug)]
27struct Header {
28    /// TZif version
29    version: Version,
30    /// Number of UT/local indicators
31    ut_local_count: usize,
32    /// Number of standard/wall indicators
33    std_wall_count: usize,
34    /// Number of leap-second records
35    leap_count: usize,
36    /// Number of transition times
37    transition_count: usize,
38    /// Number of local time type records
39    type_count: usize,
40    /// Number of time zone designations bytes
41    char_count: usize,
42}
43
44/// Parse TZif header
45fn parse_header(cursor: &mut Cursor) -> Result<Header, TzFileError> {
46    let magic = cursor.read_exact(4)?;
47    if magic != *b"TZif" {
48        return Err(TzFileError::InvalidTzFile("invalid magic number"));
49    }
50
51    let version = match cursor.read_exact(1)? {
52        [0x00] => Version::V1,
53        [0x32] => Version::V2,
54        [0x33] => Version::V3,
55        _ => return Err(TzFileError::UnsupportedTzFile("unsupported TZif version")),
56    };
57
58    cursor.read_exact(15)?;
59
60    let ut_local_count = u32::from_be_bytes(cursor.read_exact(4)?.try_into()?);
61    let std_wall_count = u32::from_be_bytes(cursor.read_exact(4)?.try_into()?);
62    let leap_count = u32::from_be_bytes(cursor.read_exact(4)?.try_into()?);
63    let transition_count = u32::from_be_bytes(cursor.read_exact(4)?.try_into()?);
64    let type_count = u32::from_be_bytes(cursor.read_exact(4)?.try_into()?);
65    let char_count = u32::from_be_bytes(cursor.read_exact(4)?.try_into()?);
66
67    if !(type_count != 0 && char_count != 0 && (ut_local_count == 0 || ut_local_count == type_count) && (std_wall_count == 0 || std_wall_count == type_count)) {
68        return Err(TzFileError::InvalidTzFile("invalid header"));
69    }
70
71    Ok(Header {
72        version,
73        ut_local_count: ut_local_count as usize,
74        std_wall_count: std_wall_count as usize,
75        leap_count: leap_count as usize,
76        transition_count: transition_count as usize,
77        type_count: type_count as usize,
78        char_count: char_count as usize,
79    })
80}
81
82/// Parse TZif footer
83fn parse_footer(footer: &[u8], use_string_extensions: bool) -> Result<Option<TransitionRule>, TzError> {
84    let footer = str::from_utf8(footer)?;
85    if !(footer.starts_with('\n') && footer.ends_with('\n')) {
86        return Err(TzFileError::InvalidTzFile("invalid footer").into());
87    }
88
89    let tz_string = footer.trim_matches(|c: char| c.is_ascii_whitespace());
90    if tz_string.starts_with(':') || tz_string.contains('\0') {
91        return Err(TzFileError::InvalidTzFile("invalid footer").into());
92    }
93
94    if !tz_string.is_empty() {
95        Ok(Some(parse_posix_tz(tz_string.as_bytes(), use_string_extensions)).transpose()?)
96    } else {
97        Ok(None)
98    }
99}
100
101/// TZif data blocks
102struct DataBlock<'a> {
103    /// Time size in bytes
104    time_size: usize,
105    /// Transition times data block
106    transition_times: &'a [u8],
107    /// Transition types data block
108    transition_types: &'a [u8],
109    /// Local time types data block
110    local_time_types: &'a [u8],
111    /// Time zone designations data block
112    time_zone_designations: &'a [u8],
113    /// Leap seconds data block
114    leap_seconds: &'a [u8],
115    /// UT/local indicators data block
116    std_walls: &'a [u8],
117    /// Standard/wall indicators data block
118    ut_locals: &'a [u8],
119}
120
121impl<'a> DataBlock<'a> {
122    /// Read TZif data blocks
123    fn new(cursor: &mut Cursor<'a>, header: &Header, version: Version) -> Result<Self, TzFileError> {
124        let time_size = match version {
125            Version::V1 => 4,
126            Version::V2 | Version::V3 => 8,
127        };
128
129        Ok(Self {
130            time_size,
131            transition_times: cursor.read_exact(header.transition_count * time_size)?,
132            transition_types: cursor.read_exact(header.transition_count)?,
133            local_time_types: cursor.read_exact(header.type_count * 6)?,
134            time_zone_designations: cursor.read_exact(header.char_count)?,
135            leap_seconds: cursor.read_exact(header.leap_count * (time_size + 4))?,
136            std_walls: cursor.read_exact(header.std_wall_count)?,
137            ut_locals: cursor.read_exact(header.ut_local_count)?,
138        })
139    }
140
141    /// Parse time values
142    fn parse_time(&self, arr: &[u8], version: Version) -> Result<i64, TzFileError> {
143        Ok(match version {
144            Version::V1 => i32::from_be_bytes(arr.try_into()?).into(),
145            Version::V2 | Version::V3 => i64::from_be_bytes(arr.try_into()?),
146        })
147    }
148
149    /// Parse time zone data
150    fn parse(&self, header: &Header, footer: Option<&[u8]>) -> Result<TimeZone, TzError> {
151        let mut transitions = Vec::with_capacity(header.transition_count);
152        for (arr_time, &local_time_type_index) in self.transition_times.chunks_exact(self.time_size).zip(self.transition_types) {
153            let unix_leap_time = self.parse_time(&arr_time[0..self.time_size], header.version)?;
154            let local_time_type_index = local_time_type_index as usize;
155            transitions.push(Transition::new(unix_leap_time, local_time_type_index));
156        }
157
158        let mut local_time_types = Vec::with_capacity(header.type_count);
159        for arr in self.local_time_types.chunks_exact(6) {
160            let ut_offset = i32::from_be_bytes(arr[0..4].try_into()?);
161
162            let is_dst = match arr[4] {
163                0 => false,
164                1 => true,
165                _ => return Err(TzFileError::InvalidTzFile("invalid DST indicator").into()),
166            };
167
168            let char_index = arr[5] as usize;
169            if char_index >= header.char_count {
170                return Err(TzFileError::InvalidTzFile("invalid time zone designation char index").into());
171            }
172
173            let time_zone_designation = match self.time_zone_designations[char_index..].iter().position(|&c| c == b'\0') {
174                None => return Err(TzFileError::InvalidTzFile("invalid time zone designation char index").into()),
175                Some(position) => {
176                    let time_zone_designation = &self.time_zone_designations[char_index..char_index + position];
177
178                    if !time_zone_designation.is_empty() {
179                        Some(time_zone_designation)
180                    } else {
181                        None
182                    }
183                }
184            };
185
186            local_time_types.push(LocalTimeType::new(ut_offset, is_dst, time_zone_designation)?);
187        }
188
189        let mut leap_seconds = Vec::with_capacity(header.leap_count);
190        for arr in self.leap_seconds.chunks_exact(self.time_size + 4) {
191            let unix_leap_time = self.parse_time(&arr[0..self.time_size], header.version)?;
192            let correction = i32::from_be_bytes(arr[self.time_size..self.time_size + 4].try_into()?);
193            leap_seconds.push(LeapSecond::new(unix_leap_time, correction));
194        }
195
196        let std_walls_iter = self.std_walls.iter().copied().chain(iter::repeat(0));
197        let ut_locals_iter = self.ut_locals.iter().copied().chain(iter::repeat(0));
198        for (std_wall, ut_local) in std_walls_iter.zip(ut_locals_iter).take(header.type_count) {
199            if !matches!((std_wall, ut_local), (0, 0) | (1, 0) | (1, 1)) {
200                return Err(TzFileError::InvalidTzFile("invalid couple of standard/wall and UT/local indicators").into());
201            }
202        }
203
204        let extra_rule = footer.and_then(|footer| parse_footer(footer, header.version == Version::V3).transpose()).transpose()?;
205
206        Ok(TimeZone::new(transitions, local_time_types, leap_seconds, extra_rule)?)
207    }
208}
209
210/// Parse TZif file as described in [RFC 8536](https://datatracker.ietf.org/doc/html/rfc8536)
211pub(crate) fn parse_tz_file(bytes: &[u8]) -> Result<TimeZone, TzError> {
212    let mut cursor = Cursor::new(bytes);
213
214    let header = parse_header(&mut cursor)?;
215
216    match header.version {
217        Version::V1 => {
218            let data_block = DataBlock::new(&mut cursor, &header, header.version)?;
219
220            if !cursor.is_empty() {
221                return Err(TzFileError::InvalidTzFile("remaining data after end of TZif v1 data block").into());
222            }
223
224            Ok(data_block.parse(&header, None)?)
225        }
226        Version::V2 | Version::V3 => {
227            // Skip v1 data block
228            DataBlock::new(&mut cursor, &header, Version::V1)?;
229
230            let header = parse_header(&mut cursor)?;
231            let data_block = DataBlock::new(&mut cursor, &header, header.version)?;
232            let footer = cursor.remaining();
233
234            Ok(data_block.parse(&header, Some(footer))?)
235        }
236    }
237}
238
239/// Open the TZif file corresponding to a TZ string
240pub(crate) fn get_tz_file(tz_string: &str) -> Result<File, TzFileError> {
241    // Don't check system timezone directories on non-UNIX platforms
242    #[cfg(not(unix))]
243    return Ok(File::open(tz_string)?);
244
245    #[cfg(unix)]
246    {
247        // Possible system timezone directories
248        const ZONE_INFO_DIRECTORIES: [&str; 3] = ["/usr/share/zoneinfo", "/share/zoneinfo", "/etc/zoneinfo"];
249
250        if tz_string.starts_with('/') {
251            Ok(File::open(tz_string)?)
252        } else {
253            for folder in &ZONE_INFO_DIRECTORIES {
254                if let Ok(file) = File::open(format!("{}/{}", folder, tz_string)) {
255                    return Ok(file);
256                }
257            }
258            Err(TzFileError::IoError(io::ErrorKind::NotFound.into()))
259        }
260    }
261}
262
263#[cfg(test)]
264mod test {
265    use super::*;
266
267    #[test]
268    fn test_v1_file_with_leap_seconds() -> Result<(), TzError> {
269        let bytes = b"TZif\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x01\0\0\0\x1b\0\0\0\0\0\0\0\x01\0\0\0\x04\0\0\0\0\0\0UTC\0\x04\xb2\x58\0\0\0\0\x01\x05\xa4\xec\x01\0\0\0\x02\x07\x86\x1f\x82\0\0\0\x03\x09\x67\x53\x03\0\0\0\x04\x0b\x48\x86\x84\0\0\0\x05\x0d\x2b\x0b\x85\0\0\0\x06\x0f\x0c\x3f\x06\0\0\0\x07\x10\xed\x72\x87\0\0\0\x08\x12\xce\xa6\x08\0\0\0\x09\x15\x9f\xca\x89\0\0\0\x0a\x17\x80\xfe\x0a\0\0\0\x0b\x19\x62\x31\x8b\0\0\0\x0c\x1d\x25\xea\x0c\0\0\0\x0d\x21\xda\xe5\x0d\0\0\0\x0e\x25\x9e\x9d\x8e\0\0\0\x0f\x27\x7f\xd1\x0f\0\0\0\x10\x2a\x50\xf5\x90\0\0\0\x11\x2c\x32\x29\x11\0\0\0\x12\x2e\x13\x5c\x92\0\0\0\x13\x30\xe7\x24\x13\0\0\0\x14\x33\xb8\x48\x94\0\0\0\x15\x36\x8c\x10\x15\0\0\0\x16\x43\xb7\x1b\x96\0\0\0\x17\x49\x5c\x07\x97\0\0\0\x18\x4f\xef\x93\x18\0\0\0\x19\x55\x93\x2d\x99\0\0\0\x1a\x58\x68\x46\x9a\0\0\0\x1b\0\0";
270
271        let time_zone = parse_tz_file(bytes)?;
272
273        let time_zone_result = TimeZone::new(
274            vec![],
275            vec![LocalTimeType::new(0, false, Some(b"UTC"))?],
276            vec![
277                LeapSecond::new(78796800, 1),
278                LeapSecond::new(94694401, 2),
279                LeapSecond::new(126230402, 3),
280                LeapSecond::new(157766403, 4),
281                LeapSecond::new(189302404, 5),
282                LeapSecond::new(220924805, 6),
283                LeapSecond::new(252460806, 7),
284                LeapSecond::new(283996807, 8),
285                LeapSecond::new(315532808, 9),
286                LeapSecond::new(362793609, 10),
287                LeapSecond::new(394329610, 11),
288                LeapSecond::new(425865611, 12),
289                LeapSecond::new(489024012, 13),
290                LeapSecond::new(567993613, 14),
291                LeapSecond::new(631152014, 15),
292                LeapSecond::new(662688015, 16),
293                LeapSecond::new(709948816, 17),
294                LeapSecond::new(741484817, 18),
295                LeapSecond::new(773020818, 19),
296                LeapSecond::new(820454419, 20),
297                LeapSecond::new(867715220, 21),
298                LeapSecond::new(915148821, 22),
299                LeapSecond::new(1136073622, 23),
300                LeapSecond::new(1230768023, 24),
301                LeapSecond::new(1341100824, 25),
302                LeapSecond::new(1435708825, 26),
303                LeapSecond::new(1483228826, 27),
304            ],
305            None,
306        )?;
307
308        assert_eq!(time_zone, time_zone_result);
309
310        Ok(())
311    }
312
313    #[test]
314    fn test_v2_file() -> Result<(), TzError> {
315        let bytes = b"TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x06\0\0\0\x06\0\0\0\0\0\0\0\x07\0\0\0\x06\0\0\0\x14\x80\0\0\0\xbb\x05\x43\x48\xbb\x21\x71\x58\xcb\x89\x3d\xc8\xd2\x23\xf4\x70\xd2\x61\x49\x38\xd5\x8d\x73\x48\x01\x02\x01\x03\x04\x01\x05\xff\xff\x6c\x02\0\0\xff\xff\x6c\x58\0\x04\xff\xff\x7a\x68\x01\x08\xff\xff\x7a\x68\x01\x0c\xff\xff\x7a\x68\x01\x10\xff\xff\x73\x60\0\x04LMT\0HST\0HDT\0HWT\0HPT\0\0\0\0\0\x01\0\0\0\0\0\x01\0TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x06\0\0\0\x06\0\0\0\0\0\0\0\x07\0\0\0\x06\0\0\0\x14\xff\xff\xff\xff\x74\xe0\x70\xbe\xff\xff\xff\xff\xbb\x05\x43\x48\xff\xff\xff\xff\xbb\x21\x71\x58\xff\xff\xff\xff\xcb\x89\x3d\xc8\xff\xff\xff\xff\xd2\x23\xf4\x70\xff\xff\xff\xff\xd2\x61\x49\x38\xff\xff\xff\xff\xd5\x8d\x73\x48\x01\x02\x01\x03\x04\x01\x05\xff\xff\x6c\x02\0\0\xff\xff\x6c\x58\0\x04\xff\xff\x7a\x68\x01\x08\xff\xff\x7a\x68\x01\x0c\xff\xff\x7a\x68\x01\x10\xff\xff\x73\x60\0\x04LMT\0HST\0HDT\0HWT\0HPT\0\0\0\0\0\x01\0\0\0\0\0\x01\0\x0aHST10\x0a";
316
317        let time_zone = parse_tz_file(bytes)?;
318
319        let time_zone_result = TimeZone::new(
320            vec![
321                Transition::new(-2334101314, 1),
322                Transition::new(-1157283000, 2),
323                Transition::new(-1155436200, 1),
324                Transition::new(-880198200, 3),
325                Transition::new(-769395600, 4),
326                Transition::new(-765376200, 1),
327                Transition::new(-712150200, 5),
328            ],
329            vec![
330                LocalTimeType::new(-37886, false, Some(b"LMT"))?,
331                LocalTimeType::new(-37800, false, Some(b"HST"))?,
332                LocalTimeType::new(-34200, true, Some(b"HDT"))?,
333                LocalTimeType::new(-34200, true, Some(b"HWT"))?,
334                LocalTimeType::new(-34200, true, Some(b"HPT"))?,
335                LocalTimeType::new(-36000, false, Some(b"HST"))?,
336            ],
337            vec![],
338            Some(TransitionRule::Fixed(LocalTimeType::new(-36000, false, Some(b"HST"))?)),
339        )?;
340
341        assert_eq!(time_zone, time_zone_result);
342
343        assert_eq!(*time_zone.find_local_time_type(-1156939200)?, LocalTimeType::new(-34200, true, Some(b"HDT"))?);
344        assert_eq!(*time_zone.find_local_time_type(1546300800)?, LocalTimeType::new(-36000, false, Some(b"HST"))?);
345
346        Ok(())
347    }
348
349    #[test]
350    fn test_v3_file() -> Result<(), TzError> {
351        let bytes = b"TZif3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x04\0\0\x1c\x20\0\0IST\0TZif3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x01\0\0\0\0\0\0\0\x01\0\0\0\x01\0\0\0\x04\0\0\0\0\x7f\xe8\x17\x80\0\0\0\x1c\x20\0\0IST\0\x01\x01\x0aIST-2IDT,M3.4.4/26,M10.5.0\x0a";
352
353        let time_zone = parse_tz_file(bytes)?;
354
355        let time_zone_result = TimeZone::new(
356            vec![Transition::new(2145916800, 0)],
357            vec![LocalTimeType::new(7200, false, Some(b"IST"))?],
358            vec![],
359            Some(TransitionRule::Alternate(AlternateTime::new(
360                LocalTimeType::new(7200, false, Some(b"IST"))?,
361                LocalTimeType::new(10800, true, Some(b"IDT"))?,
362                RuleDay::MonthWeekDay(MonthWeekDay::new(3, 4, 4)?),
363                93600,
364                RuleDay::MonthWeekDay(MonthWeekDay::new(10, 5, 0)?),
365                7200,
366            )?)),
367        )?;
368
369        assert_eq!(time_zone, time_zone_result);
370
371        Ok(())
372    }
373}