domain/base/
scan.rs

1//! Parsing of data from its presentation format.
2//!
3//! This module provides the basic machinery to parse DNS data from its
4//! standard textual representation, known as the presentation format or,
5//! perhaps more commonly, zonefile format. To distinguish this process from
6//! parsing data from its binary wire format, we call this process
7//! _scanning._
8//!
9//! The module provides two important traits which should sound familiar to
10//! anyone who has used Serde before: [`Scan`] and [`Scanner`]. A type that
11//! knows how to create a value from its presentation format implements
12//! [`Scan`]. It uses an implementation of the [`Scanner`] trait as the source
13//! of data in presentation format.
14//!
15//! This module provides a simple scanner that uses a sequence of strings as
16//! its source and can be used to, for instance, read record data from
17//! command line arguments. A “proper” scanner is included in the
18#![cfg_attr(feature = "zonefile", doc = "[zonefile][crate::zonefile]")]
19#![cfg_attr(not(feature = "zonefile"), doc = "zonefile")]
20//! module.
21#![allow(clippy::manual_range_contains)] // Hard disagree.
22#![allow(unused_imports)] // XXX
23
24use crate::base::charstr::{CharStr, CharStrBuilder};
25use crate::base::name::{Name, ToName};
26use crate::base::wire::{Compose, Composer};
27use core::convert::{TryFrom, TryInto};
28use core::iter::Peekable;
29use core::marker::PhantomData;
30use core::{fmt, str};
31use octseq::str::Str;
32use octseq::{
33    EmptyBuilder, FreezeBuilder, FromBuilder, OctetsBuilder, ShortBuf,
34    Truncate,
35};
36#[cfg(feature = "std")]
37use std::error;
38
39use super::Ttl;
40
41//============ Scanning Traits ===============================================
42
43//------------ Scan ---------------------------------------------------------
44
45/// An extension trait to add scanning to foreign types.
46///
47/// This trait is generic over the specific scanner, allowing types to limit
48/// their implementation to a scanners with certain properties.
49pub trait Scan<S: Scanner>: Sized {
50    /// Reads a value from the provided scanner.
51    ///
52    /// An implementation should read as many tokens as it needs from the
53    /// scanner. It can assume that they are all available – the scanner will
54    /// produce an error if it runs out of tokens prematurely.
55    ///
56    /// The implementation does not need to keep reading until the end of
57    /// tokens. It is the responsibility of the user to make sure there are
58    /// no stray tokens at the end of an entry.
59    ///
60    /// Finally, if an implementation needs to read tokens until the end of
61    /// the entry, it can use [`Scanner::continues`] to check if there are
62    /// still tokens left.
63    ///
64    /// If an implementation encounters an error in the presentation data,
65    /// it should report it using [`ScannerError::custom`] unless any of the
66    /// other methods of [`ScannerError`] seem more appropriate.
67    fn scan(scanner: &mut S) -> Result<Self, S::Error>;
68}
69
70macro_rules! impl_scan_unsigned {
71    ( $type:ident) => {
72        impl<S: Scanner> Scan<S> for $type {
73            fn scan(scanner: &mut S) -> Result<Self, S::Error> {
74                let mut res: $type = 0;
75                scanner.scan_symbols(|ch| {
76                    res = res.checked_mul(10).ok_or_else(|| {
77                        S::Error::custom("decimal number overflow")
78                    })?;
79                    res += ch.into_digit(10).map_err(|_| {
80                        S::Error::custom("expected decimal number")
81                    })? as $type;
82                    Ok(())
83                })?;
84                Ok(res)
85            }
86        }
87    };
88}
89
90impl_scan_unsigned!(u8);
91impl_scan_unsigned!(u16);
92impl_scan_unsigned!(u32);
93impl_scan_unsigned!(u64);
94impl_scan_unsigned!(u128);
95
96impl<S: Scanner> Scan<S> for Ttl {
97    fn scan(scanner: &mut S) -> Result<Self, <S as Scanner>::Error> {
98        let mut res: u32 = 0;
99        scanner.scan_symbols(|ch| {
100            res = res
101                .checked_mul(10)
102                .ok_or_else(|| S::Error::custom("decimal number overflow"))?;
103            res += ch
104                .into_digit(10)
105                .map_err(|_| S::Error::custom("expected decimal number"))?;
106            Ok(())
107        })?;
108        Ok(Ttl::from_secs(res))
109    }
110}
111
112//------------ Scanner -------------------------------------------------------
113
114/// A type that can produce tokens of data in presentation format.
115///
116/// The presentation format is a relatively simple text format that provides
117/// a sequence of _entries_ each consisting of a sequence of _tokens._ An
118/// implementation of the `Scanner` trait provides access to the tokens of a
119/// single entry.
120///
121/// Most methods of the trait process a single token to the caller. Exceptions
122/// are those methods suffixed with `_entry`, which process all the remaining
123/// tokens of the entry. In addition, [`has_space`][Scanner::has_space]
124/// reports whether the token was prefixed with white space (which is relevant
125/// in some cases), and [`continues`][Scanner::continues] reports whether
126/// there are more tokens in the entry. It it returns `false, all the other
127/// token and entry methods will return an error. That is, calling these
128/// methods assumes that the caller requires at least one more token.
129///
130/// Because an implementation may be able to optimize the process of
131/// converting tokens into output data types, there are a number of methods
132/// for different output. Each of these methods assumes that the next token
133/// (or the remaining tokens in the entry) is required to contain the
134/// presentation format of the given type and is should produce an error
135/// if that is not the case.
136///
137/// This allows for instance to optimize the creation of domain names and
138/// avoid copying around data in the most usual cases.
139///
140/// As a consequence, an implementation gets to choose how to return tokens.
141/// This mostly concerns the octets types to be used, but also allows it to
142/// creatively employing the [name::Chain](crate::base::name::Chain) type to
143/// deal with a zone’s changing origin.
144pub trait Scanner {
145    /// The type of octet sequences returned by the scanner.
146    type Octets: AsRef<[u8]>;
147
148    /// The octets builder used internally and returned upon request.
149    type OctetsBuilder: OctetsBuilder
150        + AsRef<[u8]>
151        + AsMut<[u8]>
152        + Truncate
153        + FreezeBuilder<Octets = Self::Octets>;
154
155    /// The type of a domain name returned by the scanner.
156    type Name: ToName;
157
158    /// The error type of the scanner.
159    type Error: ScannerError;
160
161    /// Returns whether the next token is preceded by white space.
162    fn has_space(&self) -> bool;
163
164    /// Returns whether there are more tokens in the entry.
165    ///
166    /// This method takes a `&mut self` to allow implementations to peek on
167    /// request.
168    fn continues(&mut self) -> bool;
169
170    /// Scans a token into a sequence of symbols.
171    ///
172    /// Each symbol is passed to the caller via the closure and can be
173    /// processed there.
174    fn scan_symbols<F>(&mut self, op: F) -> Result<(), Self::Error>
175    where
176        F: FnMut(Symbol) -> Result<(), Self::Error>;
177
178    /// Scans the remainder of the entry as symbols.
179    ///
180    /// Each symbol is passed to the caller via the closure and can be
181    /// processed there.
182    fn scan_entry_symbols<F>(&mut self, op: F) -> Result<(), Self::Error>
183    where
184        F: FnMut(EntrySymbol) -> Result<(), Self::Error>;
185
186    /// Converts the symbols of a token into an octets sequence.
187    ///
188    /// Each symbol is passed to the provided converter which can return
189    /// octet slices to be used to construct the returned value. When the
190    /// token is complete, the converter is called again to ask for any
191    /// remaining data to be added.
192    fn convert_token<C: ConvertSymbols<Symbol, Self::Error>>(
193        &mut self,
194        convert: C,
195    ) -> Result<Self::Octets, Self::Error>;
196
197    /// Converts the symbols of a token into an octets sequence.
198    ///
199    /// Each symbol is passed to the provided converter which can return
200    /// octet slices to be used to construct the returned value. When the
201    /// token is complete, the converter is called again to ask for any
202    /// remaining data to be added.
203    fn convert_entry<C: ConvertSymbols<EntrySymbol, Self::Error>>(
204        &mut self,
205        convert: C,
206    ) -> Result<Self::Octets, Self::Error>;
207
208    /// Scans a token into an octets sequence.
209    ///
210    /// The returned sequence has all symbols converted into their octets.
211    /// It can be of any length.
212    fn scan_octets(&mut self) -> Result<Self::Octets, Self::Error>;
213
214    /// Scans a token as a borrowed ASCII string.
215    ///
216    /// If the next token contains non-ascii characters, returns an error.
217    /// The string is given to the caller via the provided closure.
218    fn scan_ascii_str<F, T>(&mut self, op: F) -> Result<T, Self::Error>
219    where
220        F: FnOnce(&str) -> Result<T, Self::Error>;
221
222    /// Scans a token into a domain name.
223    fn scan_name(&mut self) -> Result<Self::Name, Self::Error>;
224
225    /// Scans a token into a character string.
226    ///
227    /// Note that character strings have a length limit.  If you want a
228    /// sequence of indefinite length, use [`scan_octets`][Self::scan_octets]
229    /// instead.
230    fn scan_charstr(&mut self) -> Result<CharStr<Self::Octets>, Self::Error>;
231
232    /// Scans a token as a UTF-8 string.
233    fn scan_string(&mut self) -> Result<Str<Self::Octets>, Self::Error>;
234
235    /// Scans a sequence of character strings until the end of the entry.
236    ///
237    /// The returned octets will contain the sequence of character strings in
238    /// wire format.
239    fn scan_charstr_entry(&mut self) -> Result<Self::Octets, Self::Error>;
240
241    /// Scans an optional unknown rdata marker.
242    ///
243    /// If the next token is `\#`, i.e., an unquoted, escaped hash sign,
244    /// consumes the token and returns `Ok(true)`. If the next token is
245    /// anything else or if there is no next token, does nothing and returns
246    /// `Ok(false)`. If there is an error, returns an error.
247    fn scan_opt_unknown_marker(&mut self) -> Result<bool, Self::Error>;
248
249    /// Returns an empty octets builder.
250    ///
251    /// This builder can be used to create octets sequences in cases where
252    /// the other methods can’t be used.
253    fn octets_builder(&mut self) -> Result<Self::OctetsBuilder, Self::Error>;
254}
255
256//------------ ScannerError --------------------------------------------------
257
258macro_rules! declare_error_trait {
259    (ScannerError: Sized $(+ $($supertrait:ident)::+)*) => {
260        /// A type providing error information for a scanner.
261        pub trait ScannerError: Sized $(+ $($supertrait)::+)* {
262            /// Creates a new error wrapping a supplied error message.
263            fn custom(msg: &'static str) -> Self;
264
265            /// Creates an error when more tokens were expected in the entry.
266            fn end_of_entry() -> Self;
267
268            /// Creates an error when a octets buffer is too short.
269            fn short_buf() -> Self;
270
271            /// Creates an error when there are trailing tokens.
272            fn trailing_tokens() -> Self;
273        }
274    }
275}
276
277#[cfg(feature = "std")]
278declare_error_trait!(ScannerError: Sized + error::Error);
279
280#[cfg(not(feature = "std"))]
281declare_error_trait!(ScannerError: Sized + fmt::Debug + fmt::Display);
282
283#[cfg(feature = "std")]
284impl ScannerError for std::io::Error {
285    fn custom(msg: &'static str) -> Self {
286        std::io::Error::other(msg)
287    }
288
289    fn end_of_entry() -> Self {
290        std::io::Error::new(
291            std::io::ErrorKind::UnexpectedEof,
292            "unexpected end of entry",
293        )
294    }
295
296    fn short_buf() -> Self {
297        std::io::Error::other(ShortBuf)
298    }
299
300    fn trailing_tokens() -> Self {
301        std::io::Error::other("trailing data")
302    }
303}
304
305//------------ ConvertSymbols ------------------------------------------------
306
307/// A type that helps convert the symbols in presentation format.
308///
309/// This trait is used by [`Scanner::convert_token`] with [`Symbol`]s and
310/// [`Scanner::convert_entry`] with [`EntrySymbol]`s.
311///
312/// For each symbol, [`process_symbol`][ConvertSymbols::process_symbol] is
313/// called. When the end of token or entry is reached,
314/// [`process_tail`][ConvertSymbols::process_tail] is called, giving the
315/// implementer a chance to return any remaining data.
316pub trait ConvertSymbols<Sym, Error> {
317    /// Processes the next symbol.
318    ///
319    /// If the method returns some data, it will be appended to the output
320    /// octets sequence.
321    fn process_symbol(&mut self, symbol: Sym)
322        -> Result<Option<&[u8]>, Error>;
323
324    /// Process the end of token.
325    ///
326    /// If the method returns some data, it will be appended to the output
327    /// octets sequence.
328    fn process_tail(&mut self) -> Result<Option<&[u8]>, Error>;
329}
330
331//============ Zone file symbol ==============================================
332
333//------------ Symbol --------------------------------------------------------
334
335/// The zone file representation of a single character.
336///
337/// This is either a regular character or an escape sequence. See the variants
338/// for more details.
339#[derive(Clone, Copy, Debug, Eq, PartialEq)]
340pub enum Symbol {
341    /// An unescaped Unicode character.
342    Char(char),
343
344    /// A character escaped via a preceding backslash.
345    ///
346    /// This escape sequence is only allowed for printable ASCII characters.
347    SimpleEscape(u8),
348
349    /// A raw octet escaped using the decimal escape sequence.
350    ///
351    /// This escape sequence consists of a backslash followed by exactly three
352    /// decimal digits with the value of the octets.
353    DecimalEscape(u8),
354}
355
356impl Symbol {
357    /// Reads a symbol from a character source.
358    ///
359    /// Returns the next symbol in the source, `Ok(None)` if the source has
360    /// been exhausted, or an error if there wasn’t a valid symbol.
361    pub fn from_chars<C: Iterator<Item = char>>(
362        chars: &mut C,
363    ) -> Result<Option<Self>, SymbolCharsError> {
364        #[inline]
365        fn bad_escape() -> SymbolCharsError {
366            SymbolCharsError(SymbolCharsEnum::BadEscape)
367        }
368
369        #[inline]
370        fn short_input() -> SymbolCharsError {
371            SymbolCharsError(SymbolCharsEnum::ShortInput)
372        }
373
374        let ch = match chars.next() {
375            Some(ch) => ch,
376            None => return Ok(None),
377        };
378        if ch != '\\' {
379            return Ok(Some(Symbol::Char(ch)));
380        }
381        match chars.next() {
382            Some(ch) if ch.is_ascii_digit() => {
383                let ch = ch.to_digit(10).unwrap() * 100;
384                let ch2 = match chars.next() {
385                    Some(ch) => match ch.to_digit(10) {
386                        Some(ch) => ch * 10,
387                        None => return Err(bad_escape()),
388                    },
389                    None => return Err(short_input()),
390                };
391                let ch3 = match chars.next() {
392                    Some(ch) => match ch.to_digit(10) {
393                        Some(ch) => ch,
394                        None => return Err(bad_escape()),
395                    },
396                    None => return Err(short_input()),
397                };
398                let res = ch + ch2 + ch3;
399                if res > 255 {
400                    return Err(bad_escape());
401                }
402                Ok(Some(Symbol::DecimalEscape(res as u8)))
403            }
404            Some(ch) => {
405                let ch = u8::try_from(ch).map_err(|_| bad_escape())?;
406                if ch < 0x20 || ch > 0x7e {
407                    Err(bad_escape())
408                } else {
409                    Ok(Some(Symbol::SimpleEscape(ch)))
410                }
411            }
412            None => Err(short_input()),
413        }
414    }
415
416    /// Reads a symbol from the given position in an octets slice.
417    ///
418    /// Returns the symbol and the index of the end of the symbol in the
419    /// slice.
420    pub fn from_slice_index(
421        octets: &[u8],
422        pos: usize,
423    ) -> Result<Option<(Symbol, usize)>, SymbolOctetsError> {
424        #[inline]
425        fn bad_utf8() -> SymbolOctetsError {
426            SymbolOctetsError(SymbolOctetsEnum::BadUtf8)
427        }
428
429        #[inline]
430        fn bad_escape() -> SymbolOctetsError {
431            SymbolOctetsError(SymbolOctetsEnum::BadEscape)
432        }
433
434        #[inline]
435        fn short_input() -> SymbolOctetsError {
436            SymbolOctetsError(SymbolOctetsEnum::ShortInput)
437        }
438
439        let c1 = match octets.get(pos) {
440            Some(c1) => *c1,
441            None => return Ok(None),
442        };
443        let pos = pos + 1;
444
445        if c1 == b'\\' {
446            // Escape sequence
447
448            // Get the next octet.
449            let c2 = match octets.get(pos) {
450                Some(c2) => *c2,
451                None => return Err(short_input()),
452            };
453            let pos = pos + 1;
454
455            if c2.is_ascii_control() {
456                // Only printable ASCII characters allowed.
457                return Err(bad_escape());
458            } else if !c2.is_ascii_digit() {
459                // Simple escape.
460                return Ok(Some((Symbol::SimpleEscape(c2), pos)));
461            }
462
463            // Get two more octets.
464            let c3 = match octets.get(pos) {
465                Some(c) if c.is_ascii_digit() => *c,
466                Some(_) => return Err(bad_escape()),
467                None => return Err(short_input()),
468            };
469            let pos = pos + 1;
470            let c4 = match octets.get(pos) {
471                Some(c) if c.is_ascii_digit() => *c,
472                Some(_) => return Err(bad_escape()),
473                None => return Err(short_input()),
474            };
475            let pos = pos + 1;
476
477            Ok(Some((
478                Symbol::DecimalEscape(
479                    u8::try_from(
480                        (u32::from(c2 - b'0') * 100)
481                            + (u32::from(c3 - b'0') * 10)
482                            + (u32::from(c4 - b'0')),
483                    )
484                    .map_err(|_| bad_escape())?,
485                ),
486                pos,
487            )))
488        } else {
489            // UTF-8 encoded character.
490            //
491            // Looks like there’s nothing in the standard library to help us
492            // do this.
493
494            // ASCII is single byte.
495            if c1 < 128 {
496                return Ok(Some((Symbol::Char(c1.into()), pos)));
497            }
498
499            // Second-to-left but must be 1.
500            if c1 & 0b0100_0000 == 0 {
501                return Err(bad_utf8());
502            }
503
504            // Get the next octet, check that it is valid.
505            let c2 = match octets.get(pos) {
506                Some(c2) => *c2,
507                None => return Err(short_input()),
508            };
509            let pos = pos + 1;
510            if c2 & 0b1100_0000 != 0b1000_0000 {
511                return Err(bad_utf8());
512            }
513
514            // If c1’s third-to-left bit is 0, we have the two octet case.
515            if c1 & 0b0010_0000 == 0 {
516                return Ok(Some((
517                    Symbol::Char(
518                        (u32::from(c2 & 0b0011_1111)
519                            | (u32::from(c1 & 0b0001_1111) << 6))
520                            .try_into()
521                            .map_err(|_| bad_utf8())?,
522                    ),
523                    pos,
524                )));
525            }
526
527            // Get the next octet, check that it is valid.
528            let c3 = match octets.get(pos) {
529                Some(c3) => *c3,
530                None => return Err(short_input()),
531            };
532            let pos = pos + 1;
533            if c3 & 0b1100_0000 != 0b1000_0000 {
534                return Err(bad_utf8());
535            }
536
537            // If c1’s fourth-to-left bit is 0, we have the three octet case.
538            if c1 & 0b0001_0000 == 0 {
539                return Ok(Some((
540                    Symbol::Char(
541                        (u32::from(c3 & 0b0011_1111)
542                            | (u32::from(c2 & 0b0011_1111) << 6)
543                            | (u32::from(c1 & 0b0001_1111) << 12))
544                            .try_into()
545                            .map_err(|_| bad_utf8())?,
546                    ),
547                    pos,
548                )));
549            }
550
551            // Get the next octet, check that it is valid.
552            let c4 = match octets.get(pos) {
553                Some(c4) => *c4,
554                None => return Err(short_input()),
555            };
556            let pos = pos + 1;
557            if c4 & 0b1100_0000 != 0b1000_0000 {
558                return Err(bad_utf8());
559            }
560
561            Ok(Some((
562                Symbol::Char(
563                    (u32::from(c4 & 0b0011_1111)
564                        | (u32::from(c3 & 0b0011_1111) << 6)
565                        | (u32::from(c2 & 0b0011_1111) << 12)
566                        | (u32::from(c1 & 0b0000_1111) << 18))
567                        .try_into()
568                        .map_err(|_| bad_utf8())?,
569                ),
570                pos,
571            )))
572        }
573    }
574
575    /// Provides the best symbol for an octet.
576    ///
577    /// The function will use the simple escape sequence for octet values that
578    /// represent ASCII spaces, quotes, backslashes, and semicolons and the
579    /// plain ASCII value for all other printable ASCII characters. Any other
580    /// value is escaped using the decimal escape sequence.
581    #[must_use]
582    pub fn from_octet(ch: u8) -> Self {
583        if ch == b' ' || ch == b'"' || ch == b'\\' || ch == b';' {
584            Symbol::SimpleEscape(ch)
585        } else if !(0x20..0x7F).contains(&ch) {
586            Symbol::DecimalEscape(ch)
587        } else {
588            Symbol::Char(ch as char)
589        }
590    }
591
592    /// Provides the best symbol for an octet inside a quoted string.
593    ///
594    /// The function will only escape a double quote and backslash using a
595    /// simple escape and all non-printable characters using decimal escapes.
596    #[must_use]
597    pub fn quoted_from_octet(ch: u8) -> Self {
598        if ch == b'"' || ch == b'\\' {
599            Symbol::SimpleEscape(ch)
600        } else if !(0x20..0x7F).contains(&ch) {
601            Symbol::DecimalEscape(ch)
602        } else {
603            Symbol::Char(ch as char)
604        }
605    }
606
607    /// Provides the best symbol for an octet inside a `Display` impl.
608    ///
609    /// The function will only escape a backslash using a simple escape and
610    /// all non-printable characters using decimal escapes.
611    #[must_use]
612    pub fn display_from_octet(ch: u8) -> Self {
613        if ch == b'\\' {
614            Symbol::SimpleEscape(ch)
615        } else if !(0x20..0x7F).contains(&ch) {
616            Symbol::DecimalEscape(ch)
617        } else {
618            Symbol::Char(ch as char)
619        }
620    }
621
622    /// Converts the symbol into an octet if it represents one.
623    ///
624    /// Both domain names and character strings operate on bytes instead of
625    /// (Unicode) characters. These bytes can be represented by printable
626    /// ASCII characters (that is, U+0020 to U+007E), both plain or through
627    /// a simple escape, or by a decimal escape.
628    ///
629    /// This method returns such an octet or an error if the symbol doesn’t
630    /// have value representing an octet. Note that it will succeed for an
631    /// ASCII space character U+0020 which may be used as a word separator
632    /// in some cases.
633    pub fn into_octet(self) -> Result<u8, BadSymbol> {
634        match self {
635            Symbol::Char(ch) => {
636                if ch.is_ascii() && ch >= '\u{20}' && ch <= '\u{7E}' {
637                    Ok(ch as u8)
638                } else {
639                    Err(BadSymbol(BadSymbolEnum::NonAscii))
640                }
641            }
642            Symbol::SimpleEscape(ch) | Symbol::DecimalEscape(ch) => Ok(ch),
643        }
644    }
645
646    /// Converts the symbol into an octet if it is printable ASCII.
647    ///
648    /// This is similar to [`into_octet`][Self::into_octet] but returns an
649    /// error when the resulting octet is not a printable ASCII character,
650    /// i.e., an octet of value 0x20 up to and including 0x7E.
651    pub fn into_ascii(self) -> Result<u8, BadSymbol> {
652        match self {
653            Symbol::Char(ch) => {
654                if ch.is_ascii() && ch >= '\u{20}' && ch <= '\u{7E}' {
655                    Ok(ch as u8)
656                } else {
657                    Err(BadSymbol(BadSymbolEnum::NonAscii))
658                }
659            }
660            Symbol::SimpleEscape(ch) | Symbol::DecimalEscape(ch) => {
661                if ch >= 0x20 && ch <= 0x7E {
662                    Ok(ch)
663                } else {
664                    Err(BadSymbol(BadSymbolEnum::NonAscii))
665                }
666            }
667        }
668    }
669
670    /// Converts the symbol into a `char`.
671    ///
672    /// This will fail for a decimal escape sequence which doesn’t actually
673    /// represent a character.
674    pub fn into_char(self) -> Result<char, BadSymbol> {
675        match self {
676            Symbol::Char(ch) => Ok(ch),
677            Symbol::SimpleEscape(ch) if ch >= 0x20 && ch < 0x7F => {
678                Ok(ch.into())
679            }
680            _ => Err(BadSymbol(BadSymbolEnum::NonUtf8)),
681        }
682    }
683
684    /// Converts the symbol representing a digit into its integer value.
685    pub fn into_digit(self, base: u32) -> Result<u32, BadSymbol> {
686        if let Symbol::Char(ch) = self {
687            match ch.to_digit(base) {
688                Some(ch) => Ok(ch),
689                None => Err(BadSymbol(BadSymbolEnum::NonDigit)),
690            }
691        } else {
692            Err(BadSymbol(BadSymbolEnum::Escape))
693        }
694    }
695
696    /// Returns whether the symbol can occur as part of a word.
697    ///
698    /// This is true for all symbols other than unescaped ASCII space and
699    /// horizontal tabs, opening and closing parentheses, semicolon, and
700    /// double quote.
701    #[must_use]
702    pub fn is_word_char(self) -> bool {
703        match self {
704            Symbol::Char(ch) => {
705                ch != ' '
706                    && ch != '\t'
707                    && ch != '\r'
708                    && ch != '\n'
709                    && ch != '('
710                    && ch != ')'
711                    && ch != ';'
712                    && ch != '"'
713            }
714            _ => true,
715        }
716    }
717}
718
719//--- From
720
721impl From<char> for Symbol {
722    fn from(ch: char) -> Symbol {
723        Symbol::Char(ch)
724    }
725}
726
727//--- Display
728
729impl fmt::Display for Symbol {
730    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
731        match *self {
732            Symbol::Char(ch) => write!(f, "{}", ch),
733            Symbol::SimpleEscape(ch) => write!(f, "\\{}", ch as char),
734            Symbol::DecimalEscape(ch) => write!(f, "\\{:03}", ch),
735        }
736    }
737}
738
739//------------ EntrySymbol ---------------------------------------------------
740
741/// The symbols encountered in the remainder of an entry.
742///
743/// This can either be a regular symbol or the end of a token.
744#[derive(Clone, Copy, Debug, Eq, PartialEq)]
745pub enum EntrySymbol {
746    /// A regular in-token symbol.
747    Symbol(Symbol),
748
749    /// The end of a token.
750    EndOfToken,
751}
752
753//--- From
754
755impl From<Symbol> for EntrySymbol {
756    fn from(symbol: Symbol) -> Self {
757        EntrySymbol::Symbol(symbol)
758    }
759}
760
761//------------ Symbols -------------------------------------------------------
762
763/// An iterator over the symbols in a char sequence.
764///
765/// The iterator stops if a character cannot be converted into symbols. You
766/// can check if that happened via the [`ok`][Self::ok] method.
767#[derive(Clone, Debug)]
768pub struct Symbols<Chars> {
769    /// The chars of the sequence.
770    ///
771    /// This is an option so we can fuse the iterator on error.
772    chars: Result<Chars, SymbolCharsError>,
773}
774
775impl<Chars> Symbols<Chars> {
776    /// Creates a new symbols iterator atop a char iterator.
777    pub fn new(chars: Chars) -> Self {
778        Symbols { chars: Ok(chars) }
779    }
780
781    /// Checks whether there was an error converting symbols.
782    pub fn ok(self) -> Result<(), SymbolCharsError> {
783        self.chars.map(|_| ())
784    }
785
786    pub fn with<F, T, E>(chars: Chars, op: F) -> Result<T, E>
787    where
788        F: FnOnce(&mut Self) -> Result<T, E>,
789        E: From<SymbolCharsError>,
790    {
791        let mut symbols = Self::new(chars);
792        let res = op(&mut symbols)?;
793        symbols.ok()?;
794        Ok(res)
795    }
796}
797
798impl<Chars: Iterator<Item = char>> Iterator for Symbols<Chars> {
799    type Item = Symbol;
800
801    fn next(&mut self) -> Option<Self::Item> {
802        self.chars = {
803            let chars = match self.chars.as_mut() {
804                Ok(chars) => chars,
805                Err(_) => return None,
806            };
807            match Symbol::from_chars(chars) {
808                Ok(res) => return res,
809                Err(err) => Err(err),
810            }
811        };
812        None
813    }
814}
815
816//------------ IterScanner ---------------------------------------------------
817
818/// A simple scanner atop an iterator of strings.
819///
820/// The type is generic over the iterator as well as the octets sequence to
821/// use for returned data. The types associated octets builder is used to
822/// create values.
823pub struct IterScanner<Iter: Iterator, Octets> {
824    /// The source of tokens of the scanner.
825    iter: Peekable<Iter>,
826
827    /// The marker for the output octets sequence type.
828    marker: PhantomData<Octets>,
829}
830
831impl<Iter: Iterator, Octets> IterScanner<Iter, Octets> {
832    /// Creates a new scanner from an iterator.
833    pub fn new<I: IntoIterator<IntoIter = Iter>>(iter: I) -> Self {
834        IterScanner {
835            iter: iter.into_iter().peekable(),
836            marker: PhantomData,
837        }
838    }
839
840    /// Returns whether the iterator is exhausted.
841    pub fn is_exhausted(&mut self) -> bool {
842        self.iter.peek().is_none()
843    }
844}
845
846impl<Iter, Item, Octets> Scanner for IterScanner<Iter, Octets>
847where
848    Item: AsRef<str>,
849    Iter: Iterator<Item = Item>,
850    Octets: FromBuilder,
851    <Octets as FromBuilder>::Builder: EmptyBuilder + Composer,
852{
853    type Octets = Octets;
854    type OctetsBuilder = <Octets as FromBuilder>::Builder;
855    type Name = Name<Octets>;
856    type Error = StrError;
857
858    fn has_space(&self) -> bool {
859        false
860    }
861
862    fn continues(&mut self) -> bool {
863        self.iter.peek().is_some()
864    }
865
866    fn scan_symbols<F>(&mut self, mut op: F) -> Result<(), Self::Error>
867    where
868        F: FnMut(Symbol) -> Result<(), Self::Error>,
869    {
870        let token = match self.iter.next() {
871            Some(token) => token,
872            None => return Err(StrError::end_of_entry()),
873        };
874        for sym in Symbols::new(token.as_ref().chars()) {
875            op(sym)?;
876        }
877        Ok(())
878    }
879
880    fn scan_entry_symbols<F>(&mut self, mut op: F) -> Result<(), Self::Error>
881    where
882        F: FnMut(EntrySymbol) -> Result<(), Self::Error>,
883    {
884        for token in &mut self.iter {
885            for sym in Symbols::new(token.as_ref().chars()) {
886                op(sym.into())?;
887            }
888            op(EntrySymbol::EndOfToken)?;
889        }
890        Ok(())
891    }
892
893    fn convert_token<C: ConvertSymbols<Symbol, Self::Error>>(
894        &mut self,
895        mut convert: C,
896    ) -> Result<Self::Octets, Self::Error> {
897        let token = match self.iter.next() {
898            Some(token) => token,
899            None => return Err(StrError::end_of_entry()),
900        };
901        let mut res = <Octets as FromBuilder>::Builder::empty();
902
903        for sym in Symbols::new(token.as_ref().chars()) {
904            if let Some(data) = convert.process_symbol(sym)? {
905                res.append_slice(data).map_err(Into::into)?;
906            }
907        }
908
909        if let Some(data) = convert.process_tail()? {
910            res.append_slice(data).map_err(Into::into)?;
911        }
912
913        Ok(<Octets as FromBuilder>::from_builder(res))
914    }
915
916    fn convert_entry<C: ConvertSymbols<EntrySymbol, Self::Error>>(
917        &mut self,
918        mut convert: C,
919    ) -> Result<Self::Octets, Self::Error> {
920        let mut res = <Octets as FromBuilder>::Builder::empty();
921        for token in &mut self.iter {
922            for sym in Symbols::new(token.as_ref().chars()) {
923                if let Some(data) = convert.process_symbol(sym.into())? {
924                    res.append_slice(data).map_err(Into::into)?;
925                }
926            }
927        }
928        if let Some(data) = convert.process_tail()? {
929            res.append_slice(data).map_err(Into::into)?;
930        }
931        Ok(<Octets as FromBuilder>::from_builder(res))
932    }
933
934    fn scan_octets(&mut self) -> Result<Self::Octets, Self::Error> {
935        let token = match self.iter.next() {
936            Some(token) => token,
937            None => return Err(StrError::end_of_entry()),
938        };
939        let mut res = <Octets as FromBuilder>::Builder::empty();
940        for sym in Symbols::new(token.as_ref().chars()) {
941            match sym.into_octet() {
942                Ok(ch) => res.append_slice(&[ch]).map_err(Into::into)?,
943                Err(_) => return Err(StrError::custom("bad symbol")),
944            }
945        }
946        Ok(<Octets as FromBuilder>::from_builder(res))
947    }
948
949    fn scan_ascii_str<F, T>(&mut self, op: F) -> Result<T, Self::Error>
950    where
951        F: FnOnce(&str) -> Result<T, Self::Error>,
952    {
953        let res = self.scan_string()?;
954        if res.is_ascii() {
955            op(&res)
956        } else {
957            Err(StrError::custom("non-ASCII characters"))
958        }
959    }
960
961    fn scan_name(&mut self) -> Result<Self::Name, Self::Error> {
962        let token = match self.iter.next() {
963            Some(token) => token,
964            None => return Err(StrError::end_of_entry()),
965        };
966        Name::from_symbols(Symbols::new(token.as_ref().chars()))
967            .map_err(|_| StrError::custom("invalid domain name"))
968    }
969
970    fn scan_charstr(&mut self) -> Result<CharStr<Self::Octets>, Self::Error> {
971        let token = match self.iter.next() {
972            Some(token) => token,
973            None => return Err(StrError::end_of_entry()),
974        };
975        let mut res =
976            CharStrBuilder::<<Octets as FromBuilder>::Builder>::new();
977        for sym in Symbols::new(token.as_ref().chars()) {
978            match sym.into_octet() {
979                Ok(ch) => res.append_slice(&[ch])?,
980                Err(_) => return Err(StrError::custom("bad symbol")),
981            }
982        }
983        Ok(res.finish())
984    }
985
986    fn scan_string(&mut self) -> Result<Str<Self::Octets>, Self::Error> {
987        let token = match self.iter.next() {
988            Some(token) => token,
989            None => return Err(StrError::end_of_entry()),
990        };
991        let mut res = <Octets as FromBuilder>::Builder::empty();
992        let mut buf = [0u8; 4];
993        for sym in Symbols::new(token.as_ref().chars()) {
994            match sym.into_char() {
995                Ok(ch) => res
996                    .append_slice(ch.encode_utf8(&mut buf).as_bytes())
997                    .map_err(Into::into)?,
998                Err(_) => return Err(StrError::custom("bad symbol")),
999            }
1000        }
1001        Ok(Str::from_utf8(<Octets as FromBuilder>::from_builder(res))
1002            .unwrap())
1003    }
1004
1005    fn scan_charstr_entry(&mut self) -> Result<Self::Octets, Self::Error> {
1006        // XXX This implementation is probably a bit too lazy.
1007        let mut res = <Octets as FromBuilder>::Builder::empty();
1008        while self.iter.peek().is_some() {
1009            self.scan_charstr()?.compose(&mut res).map_err(Into::into)?;
1010        }
1011        Ok(<Octets as FromBuilder>::from_builder(res))
1012    }
1013
1014    fn scan_opt_unknown_marker(&mut self) -> Result<bool, Self::Error> {
1015        match self.iter.peek() {
1016            Some(token) if token.as_ref() == "\\#" => Ok(true),
1017            _ => Ok(false),
1018        }
1019    }
1020
1021    fn octets_builder(&mut self) -> Result<Self::OctetsBuilder, Self::Error> {
1022        Ok(<Octets as FromBuilder>::Builder::empty())
1023    }
1024}
1025
1026//============ Error Types ===================================================
1027
1028//------------ SymbolCharsError ----------------------------------------------
1029
1030/// An error happened when reading a symbol.
1031#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1032pub struct SymbolCharsError(SymbolCharsEnum);
1033
1034#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1035enum SymbolCharsEnum {
1036    /// An illegal escape sequence was encountered.
1037    BadEscape,
1038
1039    /// Unexpected end of input.
1040    ///
1041    /// This can only happen in a decimal escape sequence.
1042    ShortInput,
1043}
1044
1045impl SymbolCharsError {
1046    /// Creates a “bad escape” variant of the error.
1047    pub(crate) const fn bad_escape() -> Self {
1048        Self(SymbolCharsEnum::BadEscape)
1049    }
1050
1051    /// Creates a “short input” variant of the error.
1052    pub(crate) const fn short_input() -> Self {
1053        Self(SymbolCharsEnum::ShortInput)
1054    }
1055
1056    /// Returns a static description of the error.
1057    #[must_use]
1058    pub fn as_str(self) -> &'static str {
1059        match self.0 {
1060            SymbolCharsEnum::BadEscape => "illegal escape sequence",
1061            SymbolCharsEnum::ShortInput => "unexpected end of input",
1062        }
1063    }
1064}
1065
1066//--- Display and Error
1067
1068impl fmt::Display for SymbolCharsError {
1069    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1070        f.write_str(self.as_str())
1071    }
1072}
1073
1074#[cfg(feature = "std")]
1075impl std::error::Error for SymbolCharsError {}
1076
1077//------------ SymbolOctetsError ---------------------------------------------
1078
1079/// An error happened when reading a symbol.
1080#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1081pub struct SymbolOctetsError(SymbolOctetsEnum);
1082
1083#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1084enum SymbolOctetsEnum {
1085    /// An illegal UTF-8 sequence was encountered.
1086    BadUtf8,
1087
1088    /// An illegal escape sequence was encountered.
1089    BadEscape,
1090
1091    /// Unexpected end of input.
1092    ///
1093    /// This can only happen in a decimal escape sequence.
1094    ShortInput,
1095}
1096
1097impl SymbolOctetsError {
1098    #[must_use]
1099    pub fn as_str(self) -> &'static str {
1100        match self.0 {
1101            SymbolOctetsEnum::BadUtf8 => "illegal UTF-8 sequence",
1102            SymbolOctetsEnum::BadEscape => "illegal escape sequence",
1103            SymbolOctetsEnum::ShortInput => "unexpected end of data",
1104        }
1105    }
1106}
1107
1108//--- Display and Error
1109
1110impl fmt::Display for SymbolOctetsError {
1111    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1112        f.write_str(self.as_str())
1113    }
1114}
1115
1116#[cfg(feature = "std")]
1117impl std::error::Error for SymbolOctetsError {}
1118
1119//------------ BadSymbol -----------------------------------------------------
1120
1121/// A symbol with an unexpected value was encountered.
1122#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1123pub struct BadSymbol(BadSymbolEnum);
1124
1125#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1126enum BadSymbolEnum {
1127    /// A non-ASCII character was encountered.
1128    NonAscii,
1129
1130    /// A non-UTF8 character was encountered.
1131    NonUtf8,
1132
1133    /// A non-digit character was encountered.
1134    NonDigit,
1135
1136    /// An unexpected escape sequence was encountered.
1137    Escape,
1138}
1139
1140impl BadSymbol {
1141    pub(crate) fn non_ascii() -> Self {
1142        Self(BadSymbolEnum::NonAscii)
1143    }
1144
1145    /// Returns a static description of the error.
1146    #[must_use]
1147    pub fn as_str(self) -> &'static str {
1148        match self.0 {
1149            BadSymbolEnum::NonAscii => "non-ASCII symbol",
1150            BadSymbolEnum::NonUtf8 => "invalid UTF-8 sequence",
1151            BadSymbolEnum::NonDigit => "expected digit",
1152            BadSymbolEnum::Escape => "unexpected escape sequence",
1153        }
1154    }
1155}
1156
1157//--- Display and Error
1158
1159impl fmt::Display for BadSymbol {
1160    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1161        f.write_str(self.as_str())
1162    }
1163}
1164
1165#[cfg(feature = "std")]
1166impl std::error::Error for BadSymbol {}
1167
1168#[cfg(feature = "std")]
1169impl From<BadSymbol> for std::io::Error {
1170    fn from(err: BadSymbol) -> Self {
1171        std::io::Error::other(err)
1172    }
1173}
1174
1175//------------ StrError ------------------------------------------------------
1176
1177/// A simple scanner error that just wraps a static str.
1178#[derive(Debug)]
1179pub struct StrError(&'static str);
1180
1181impl ScannerError for StrError {
1182    fn custom(msg: &'static str) -> Self {
1183        StrError(msg)
1184    }
1185
1186    fn end_of_entry() -> Self {
1187        Self::custom("unexpected end of entry")
1188    }
1189
1190    fn short_buf() -> Self {
1191        Self::custom("short buffer")
1192    }
1193
1194    fn trailing_tokens() -> Self {
1195        Self::custom("trailing data")
1196    }
1197}
1198
1199impl From<ShortBuf> for StrError {
1200    fn from(_: ShortBuf) -> Self {
1201        Self::short_buf()
1202    }
1203}
1204
1205impl fmt::Display for StrError {
1206    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1207        f.write_str(self.0)
1208    }
1209}
1210
1211#[cfg(feature = "std")]
1212impl std::error::Error for StrError {}
1213
1214//============ Testing =======================================================
1215
1216#[cfg(test)]
1217#[cfg(feature = "std")]
1218mod test {
1219    use super::*;
1220
1221    #[test]
1222    fn symbol_from_slice_index() {
1223        let mut buf = [0u8; 4];
1224        for ch in '\0'..char::MAX {
1225            if ch == '\\' {
1226                continue;
1227            }
1228            let slice = ch.encode_utf8(&mut buf).as_bytes();
1229            assert_eq!(
1230                Symbol::from_slice_index(slice, 0),
1231                Ok(Some((Symbol::Char(ch), ch.len_utf8()))),
1232                "char '{}'",
1233                ch,
1234            );
1235        }
1236
1237        for ch in '0'..'\x7f' {
1238            if ch.is_ascii_digit() {
1239                continue;
1240            }
1241            assert_eq!(
1242                Symbol::from_slice_index(format!("\\{}", ch).as_bytes(), 0),
1243                Ok(Some((Symbol::SimpleEscape(ch as u8), 2))),
1244                "sequence \"\\{}\"",
1245                ch
1246            );
1247        }
1248
1249        for ch in 0..256 {
1250            assert_eq!(
1251                Symbol::from_slice_index(
1252                    format!("\\{:03}", ch).as_bytes(),
1253                    0
1254                ),
1255                Ok(Some((Symbol::DecimalEscape(ch as u8), 4))),
1256                "sequence \"\\{:03}\"",
1257                ch
1258            );
1259        }
1260    }
1261}