domain/base/
scan.rs

1//! Parsing of data from its presentation format.
2//!
3//! This module provides the basic machinery to parse DNS data from its
4//! standard textual representation, known as the presentation format or,
5//! perhaps more commonly, zonefile format. To distinguish this process from
6//! parsing data from its binary wire format, we call this process
7//! _scanning._
8//!
9//! The module provides two important traits which should sound familiar to
10//! anyone who has used Serde before: [`Scan`] and [`Scanner`]. A type that
11//! knows how to create a value from its presentation format implements
12//! [`Scan`]. It uses an implementation of the [`Scanner`] trait as the source
13//! of data in presentation format.
14//!
15//! This module provides a simple scanner that uses a sequence of strings as
16//! its source and can be used to, for instance, read record data from
17//! command line arguments. A “proper” scanner is included in the
18#![cfg_attr(feature = "zonefile", doc = "[zonefile][crate::zonefile]")]
19#![cfg_attr(not(feature = "zonefile"), doc = "zonefile")]
20//! module.
21#![allow(clippy::manual_range_contains)] // Hard disagree.
22#![allow(unused_imports)] // XXX
23
24use crate::base::charstr::{CharStr, CharStrBuilder};
25use crate::base::name::{Dname, ToDname};
26use crate::base::wire::{Compose, Composer};
27use core::convert::{TryFrom, TryInto};
28use core::iter::Peekable;
29use core::marker::PhantomData;
30use core::{fmt, str};
31use octseq::str::Str;
32use octseq::{
33    EmptyBuilder, FreezeBuilder, FromBuilder, OctetsBuilder, ShortBuf,
34    Truncate,
35};
36#[cfg(feature = "std")]
37use std::error;
38
39use super::Ttl;
40
41//============ Scanning Traits ===============================================
42
43//------------ Scan ---------------------------------------------------------
44
45/// An extension trait to add scanning to foreign types.
46///
47/// This trait is generic over the specific scanner, allowing types to limit
48/// their implementation to a scanners with certain properties.
49pub trait Scan<S: Scanner>: Sized {
50    /// Reads a value from the provided scanner.
51    ///
52    /// An implementation should read as many tokens as it needs from the
53    /// scanner. It can assume that they are all available – the scanner will
54    /// produce an error if it runs out of tokens prematurely.
55    ///
56    /// The implementation does not need to keep reading until the end of
57    /// tokens. It is the responsibility of the user to make sure there are
58    /// no stray tokens at the end of an entry.
59    ///
60    /// Finally, if an implementation needs to read tokens until the end of
61    /// the entry, it can use [`Scanner::continues`] to check if there are
62    /// still tokens left.
63    ///
64    /// If an implementation encounters an error in the presentation data,
65    /// it should report it using [`ScannerError::custom`] unless any of the
66    /// other methods of [`ScannerError`] seem more appropriate.
67    fn scan(scanner: &mut S) -> Result<Self, S::Error>;
68}
69
70macro_rules! impl_scan_unsigned {
71    ( $type:ident) => {
72        impl<S: Scanner> Scan<S> for $type {
73            fn scan(scanner: &mut S) -> Result<Self, S::Error> {
74                let mut res: $type = 0;
75                scanner.scan_symbols(|ch| {
76                    res = res.checked_mul(10).ok_or_else(|| {
77                        S::Error::custom("decimal number overflow")
78                    })?;
79                    res += ch.into_digit(10).map_err(|_| {
80                        S::Error::custom("expected decimal number")
81                    })? as $type;
82                    Ok(())
83                })?;
84                Ok(res)
85            }
86        }
87    };
88}
89
90impl_scan_unsigned!(u8);
91impl_scan_unsigned!(u16);
92impl_scan_unsigned!(u32);
93impl_scan_unsigned!(u64);
94impl_scan_unsigned!(u128);
95
96impl<S: Scanner> Scan<S> for Ttl {
97    fn scan(scanner: &mut S) -> Result<Self, <S as Scanner>::Error> {
98        let mut res: u32 = 0;
99        scanner.scan_symbols(|ch| {
100            res = res
101                .checked_mul(10)
102                .ok_or_else(|| S::Error::custom("decimal number overflow"))?;
103            res += ch
104                .into_digit(10)
105                .map_err(|_| S::Error::custom("expected decimal number"))?;
106            Ok(())
107        })?;
108        Ok(Ttl::from_secs(res))
109    }
110}
111
112//------------ Scanner -------------------------------------------------------
113
114/// A type that can produce tokens of data in presentation format.
115///
116/// The presentation format is a relatively simple text format that provides
117/// a sequence of _entries_ each consisting of a sequence of _tokens._ An
118/// implementation of the `Scanner` trait provides access to the tokens of a
119/// single entry.
120///
121/// Most methods of the trait process a single token to the caller. Exceptions
122/// are those methods suffixed with `_entry`, which process all the remaining
123/// tokens of the entry. In addition, [`has_space`][Scanner::has_space]
124/// reports whether the token was prefixed with white space (which is relevant
125/// in some cases), and [`continues`][Scanner::continues] reports whether
126/// there are more tokens in the entry. It it returns `false, all the other
127/// token and entry methods will return an error. That is, calling these
128/// methods assumes that the caller requires at least one more token.
129///
130/// Because an implementation may be able to optimize the process of
131/// converting tokens into output data types, there are a number of methods
132/// for different output. Each of these methods assumes that the next token
133/// (or the remaining tokens in the entry) is required to contain the
134/// presentation format of the given type and is should produce an error
135/// if that is not the case.
136///
137/// This allows for instance to optimize the creation of domain names and
138/// avoid copying around data in the most usual cases.
139///
140/// As a consequence, an implementation gets to choose how to return tokens.
141/// This mostly concerns the octets types to be used, but also allows it to
142/// creatively employing the [name::Chain](crate::base::name::Chain) type to
143/// deal with a zone’s changing origin.
144pub trait Scanner {
145    /// The type of octet sequences returned by the scanner.
146    type Octets: AsRef<[u8]>;
147
148    /// The octets builder used internally and returned upon request.
149    type OctetsBuilder: OctetsBuilder
150        + AsRef<[u8]>
151        + AsMut<[u8]>
152        + Truncate
153        + FreezeBuilder<Octets = Self::Octets>;
154
155    /// The type of a domain name returned by the scanner.
156    type Dname: ToDname;
157
158    /// The error type of the scanner.
159    type Error: ScannerError;
160
161    /// Returns whether the next token is preceded by white space.
162    fn has_space(&self) -> bool;
163
164    /// Returns whether there are more tokens in the entry.
165    ///
166    /// This method takes a `&mut self` to allow implementations to peek on
167    /// request.
168    fn continues(&mut self) -> bool;
169
170    /// Scans a token into a sequence of symbols.
171    ///
172    /// Each symbol is passed to the caller via the closure and can be
173    /// processed there.
174    fn scan_symbols<F>(&mut self, op: F) -> Result<(), Self::Error>
175    where
176        F: FnMut(Symbol) -> Result<(), Self::Error>;
177
178    /// Scans the remainder of the entry as symbols.
179    ///
180    /// Each symbol is passed to the caller via the closure and can be
181    /// processed there.
182    fn scan_entry_symbols<F>(&mut self, op: F) -> Result<(), Self::Error>
183    where
184        F: FnMut(EntrySymbol) -> Result<(), Self::Error>;
185
186    /// Converts the symbols of a token into an octets sequence.
187    ///
188    /// Each symbol is passed to the provided converter which can return
189    /// octet slices to be used to construct the returned value. When the
190    /// token is complete, the converter is called again to ask for any
191    /// remaining data to be added.
192    fn convert_token<C: ConvertSymbols<Symbol, Self::Error>>(
193        &mut self,
194        convert: C,
195    ) -> Result<Self::Octets, Self::Error>;
196
197    /// Converts the symbols of a token into an octets sequence.
198    ///
199    /// Each symbol is passed to the provided converter which can return
200    /// octet slices to be used to construct the returned value. When the
201    /// token is complete, the converter is called again to ask for any
202    /// remaining data to be added.
203    fn convert_entry<C: ConvertSymbols<EntrySymbol, Self::Error>>(
204        &mut self,
205        convert: C,
206    ) -> Result<Self::Octets, Self::Error>;
207
208    /// Scans a token into an octets sequence.
209    ///
210    /// The returned sequence has all symbols converted into their octets.
211    /// It can be of any length.
212    fn scan_octets(&mut self) -> Result<Self::Octets, Self::Error>;
213
214    /// Scans a token as a borrowed ASCII string.
215    ///
216    /// If the next token contains non-ascii characters, returns an error.
217    /// The string is given to the caller via the provided closure.
218    fn scan_ascii_str<F, T>(&mut self, op: F) -> Result<T, Self::Error>
219    where
220        F: FnOnce(&str) -> Result<T, Self::Error>;
221
222    /// Scans a token into a domain name.
223    fn scan_dname(&mut self) -> Result<Self::Dname, Self::Error>;
224
225    /// Scans a token into a character string.
226    ///
227    /// Note that character strings have a length limit.  If you want a
228    /// sequence of indefinite length, use [`scan_octets`][Self::scan_octets]
229    /// instead.
230    fn scan_charstr(&mut self) -> Result<CharStr<Self::Octets>, Self::Error>;
231
232    /// Scans a token as a UTF-8 string.
233    fn scan_string(&mut self) -> Result<Str<Self::Octets>, Self::Error>;
234
235    /// Scans a sequence of character strings until the end of the entry.
236    ///
237    /// The returned octets will contain the sequence of character strings in
238    /// wire format.
239    fn scan_charstr_entry(&mut self) -> Result<Self::Octets, Self::Error>;
240
241    /// Scans an optional unknown rdata marker.
242    ///
243    /// If the next token is `\#`, i.e., an unquoted, escaped hash sign,
244    /// consumes the token and returns `Ok(true)`. If the next token is
245    /// anything else or if there is no next token, does nothing and returns
246    /// `Ok(false)`. If there is an error, returns an error.
247    fn scan_opt_unknown_marker(&mut self) -> Result<bool, Self::Error>;
248
249    /// Returns an empty octets builder.
250    ///
251    /// This builder can be used to create octets sequences in cases where
252    /// the other methods can’t be used.
253    fn octets_builder(&mut self) -> Result<Self::OctetsBuilder, Self::Error>;
254}
255
256//------------ ScannerError --------------------------------------------------
257
258macro_rules! declare_error_trait {
259    (ScannerError: Sized $(+ $($supertrait:ident)::+)*) => {
260        /// A type providing error information for a scanner.
261        pub trait ScannerError: Sized $(+ $($supertrait)::+)* {
262            /// Creates a new error wrapping a supplied error message.
263            fn custom(msg: &'static str) -> Self;
264
265            /// Creates an error when more tokens were expected in the entry.
266            fn end_of_entry() -> Self;
267
268            /// Creates an error when a octets buffer is too short.
269            fn short_buf() -> Self;
270
271            /// Creates an error when there are trailing tokens.
272            fn trailing_tokens() -> Self;
273        }
274    }
275}
276
277#[cfg(feature = "std")]
278declare_error_trait!(ScannerError: Sized + error::Error);
279
280#[cfg(not(feature = "std"))]
281declare_error_trait!(ScannerError: Sized + fmt::Debug + fmt::Display);
282
283#[cfg(feature = "std")]
284impl ScannerError for std::io::Error {
285    fn custom(msg: &'static str) -> Self {
286        std::io::Error::new(std::io::ErrorKind::Other, msg)
287    }
288
289    fn end_of_entry() -> Self {
290        std::io::Error::new(
291            std::io::ErrorKind::UnexpectedEof,
292            "unexpected end of entry",
293        )
294    }
295
296    fn short_buf() -> Self {
297        std::io::Error::new(std::io::ErrorKind::Other, ShortBuf)
298    }
299
300    fn trailing_tokens() -> Self {
301        std::io::Error::new(std::io::ErrorKind::Other, "trailing data")
302    }
303}
304
305//------------ ConvertSymbols ------------------------------------------------
306
307/// A type that helps convert the symbols in presentation format.
308///
309/// This trait is used by [`Scanner::convert_token`] with [`Symbol`]s and
310/// [`Scanner::convert_entry`] with [`EntrySymbol]`s.
311///
312/// For each symbol, [`process_symbol`][Self::process_symbol] is called. When
313/// the end of token or entry is reached, [`process_tail`][Self::process_tail]
314/// is called, giving the implementer a chance to return any remaining data.
315pub trait ConvertSymbols<Sym, Error> {
316    /// Processes the next symbol.
317    ///
318    /// If the method returns some data, it will be appended to the output
319    /// octets sequence.
320    fn process_symbol(&mut self, symbol: Sym)
321        -> Result<Option<&[u8]>, Error>;
322
323    /// Process the end of token.
324    ///
325    /// If the method returns some data, it will be appended to the output
326    /// octets sequence.
327    fn process_tail(&mut self) -> Result<Option<&[u8]>, Error>;
328}
329
330//============ Zone file symbol ==============================================
331
332//------------ Symbol --------------------------------------------------------
333
334/// The zone file representation of a single character.
335///
336/// This is either a regular character or an escape sequence. See the variants
337/// for more details.
338#[derive(Clone, Copy, Debug, Eq, PartialEq)]
339pub enum Symbol {
340    /// An unescaped Unicode character.
341    Char(char),
342
343    /// A character escaped via a preceding backslash.
344    ///
345    /// This escape sequence is only allowed for printable ASCII characters.
346    SimpleEscape(u8),
347
348    /// A raw octet escaped using the decimal escape sequence.
349    ///
350    /// This escape sequence consists of a backslash followed by exactly three
351    /// decimal digits with the value of the octets.
352    DecimalEscape(u8),
353}
354
355impl Symbol {
356    /// Reads a symbol from a character source.
357    ///
358    /// Returns the next symbol in the source, `Ok(None)` if the source has
359    /// been exhausted, or an error if there wasn’t a valid symbol.
360    pub fn from_chars<C: Iterator<Item = char>>(
361        chars: &mut C,
362    ) -> Result<Option<Self>, SymbolCharsError> {
363        #[inline]
364        fn bad_escape() -> SymbolCharsError {
365            SymbolCharsError(SymbolCharsEnum::BadEscape)
366        }
367
368        #[inline]
369        fn short_input() -> SymbolCharsError {
370            SymbolCharsError(SymbolCharsEnum::ShortInput)
371        }
372
373        let ch = match chars.next() {
374            Some(ch) => ch,
375            None => return Ok(None),
376        };
377        if ch != '\\' {
378            return Ok(Some(Symbol::Char(ch)));
379        }
380        match chars.next() {
381            Some(ch) if ch.is_ascii_digit() => {
382                let ch = ch.to_digit(10).unwrap() * 100;
383                let ch2 = match chars.next() {
384                    Some(ch) => match ch.to_digit(10) {
385                        Some(ch) => ch * 10,
386                        None => return Err(bad_escape()),
387                    },
388                    None => return Err(short_input()),
389                };
390                let ch3 = match chars.next() {
391                    Some(ch) => match ch.to_digit(10) {
392                        Some(ch) => ch,
393                        None => return Err(bad_escape()),
394                    },
395                    None => return Err(short_input()),
396                };
397                let res = ch + ch2 + ch3;
398                if res > 255 {
399                    return Err(bad_escape());
400                }
401                Ok(Some(Symbol::DecimalEscape(res as u8)))
402            }
403            Some(ch) => {
404                let ch = u8::try_from(ch).map_err(|_| bad_escape())?;
405                if ch < 0x20 || ch > 0x7e {
406                    Err(bad_escape())
407                } else {
408                    Ok(Some(Symbol::SimpleEscape(ch)))
409                }
410            }
411            None => Err(short_input()),
412        }
413    }
414
415    /// Reads a symbol from the given position in an octets slice.
416    ///
417    /// Returns the symbol and the index of the end of the symbol in the
418    /// slice.
419    pub fn from_slice_index(
420        octets: &[u8],
421        pos: usize,
422    ) -> Result<Option<(Symbol, usize)>, SymbolOctetsError> {
423        #[inline]
424        fn bad_utf8() -> SymbolOctetsError {
425            SymbolOctetsError(SymbolOctetsEnum::BadUtf8)
426        }
427
428        #[inline]
429        fn bad_escape() -> SymbolOctetsError {
430            SymbolOctetsError(SymbolOctetsEnum::BadEscape)
431        }
432
433        #[inline]
434        fn short_input() -> SymbolOctetsError {
435            SymbolOctetsError(SymbolOctetsEnum::ShortInput)
436        }
437
438        let c1 = match octets.get(pos) {
439            Some(c1) => *c1,
440            None => return Ok(None),
441        };
442        let pos = pos + 1;
443
444        if c1 == b'\\' {
445            // Escape sequence
446
447            // Get the next octet.
448            let c2 = match octets.get(pos) {
449                Some(c2) => *c2,
450                None => return Err(short_input()),
451            };
452            let pos = pos + 1;
453
454            if c2.is_ascii_control() {
455                // Only printable ASCII characters allowed.
456                return Err(bad_escape());
457            } else if !c2.is_ascii_digit() {
458                // Simple escape.
459                return Ok(Some((Symbol::SimpleEscape(c2), pos)));
460            }
461
462            // Get two more octets.
463            let c3 = match octets.get(pos) {
464                Some(c) if c.is_ascii_digit() => *c,
465                Some(_) => return Err(bad_escape()),
466                None => return Err(short_input()),
467            };
468            let pos = pos + 1;
469            let c4 = match octets.get(pos) {
470                Some(c) if c.is_ascii_digit() => *c,
471                Some(_) => return Err(bad_escape()),
472                None => return Err(short_input()),
473            };
474            let pos = pos + 1;
475
476            Ok(Some((
477                Symbol::DecimalEscape(
478                    u8::try_from(
479                        (u32::from(c2 - b'0') * 100)
480                            + (u32::from(c3 - b'0') * 10)
481                            + (u32::from(c4 - b'0')),
482                    )
483                    .map_err(|_| bad_escape())?,
484                ),
485                pos,
486            )))
487        } else {
488            // UTF-8 encoded character.
489            //
490            // Looks like there’s nothing in the standard library to help us
491            // do this.
492
493            // ASCII is single byte.
494            if c1 < 128 {
495                return Ok(Some((Symbol::Char(c1.into()), pos)));
496            }
497
498            // Second-to-left but must be 1.
499            if c1 & 0b0100_0000 == 0 {
500                return Err(bad_utf8());
501            }
502
503            // Get the next octet, check that it is valid.
504            let c2 = match octets.get(pos) {
505                Some(c2) => *c2,
506                None => return Err(short_input()),
507            };
508            let pos = pos + 1;
509            if c2 & 0b1100_0000 != 0b1000_0000 {
510                return Err(bad_utf8());
511            }
512
513            // If c1’s third-to-left bit is 0, we have the two octet case.
514            if c1 & 0b0010_0000 == 0 {
515                return Ok(Some((
516                    Symbol::Char(
517                        (u32::from(c2 & 0b0011_1111)
518                            | (u32::from(c1 & 0b0001_1111) << 6))
519                            .try_into()
520                            .map_err(|_| bad_utf8())?,
521                    ),
522                    pos,
523                )));
524            }
525
526            // Get the next octet, check that it is valid.
527            let c3 = match octets.get(pos) {
528                Some(c3) => *c3,
529                None => return Err(short_input()),
530            };
531            let pos = pos + 1;
532            if c3 & 0b1100_0000 != 0b1000_0000 {
533                return Err(bad_utf8());
534            }
535
536            // If c1’s fourth-to-left bit is 0, we have the three octet case.
537            if c1 & 0b0001_0000 == 0 {
538                return Ok(Some((
539                    Symbol::Char(
540                        (u32::from(c3 & 0b0011_1111)
541                            | (u32::from(c2 & 0b0011_1111) << 6)
542                            | (u32::from(c1 & 0b0001_1111) << 12))
543                            .try_into()
544                            .map_err(|_| bad_utf8())?,
545                    ),
546                    pos,
547                )));
548            }
549
550            // Get the next octet, check that it is valid.
551            let c4 = match octets.get(pos) {
552                Some(c4) => *c4,
553                None => return Err(short_input()),
554            };
555            let pos = pos + 1;
556            if c4 & 0b1100_0000 != 0b1000_0000 {
557                return Err(bad_utf8());
558            }
559
560            Ok(Some((
561                Symbol::Char(
562                    (u32::from(c4 & 0b0011_1111)
563                        | (u32::from(c3 & 0b0011_1111) << 6)
564                        | (u32::from(c2 & 0b0011_1111) << 12)
565                        | (u32::from(c1 & 0b0000_1111) << 18))
566                        .try_into()
567                        .map_err(|_| bad_utf8())?,
568                ),
569                pos,
570            )))
571        }
572    }
573
574    /// Provides the best symbol for an octet.
575    ///
576    /// The function will use the simple escape sequence for octet values that
577    /// represent ASCII spaces, quotes, backslashes, and semicolons and the
578    /// plain ASCII value for all other printable ASCII characters. Any other
579    /// value is escaped using the decimal escape sequence.
580    #[must_use]
581    pub fn from_octet(ch: u8) -> Self {
582        if ch == b' ' || ch == b'"' || ch == b'\\' || ch == b';' {
583            Symbol::SimpleEscape(ch)
584        } else if !(0x20..0x7F).contains(&ch) {
585            Symbol::DecimalEscape(ch)
586        } else {
587            Symbol::Char(ch as char)
588        }
589    }
590
591    /// Converts the symbol into an octet if it represents one.
592    ///
593    /// Both domain names and character strings operate on bytes instead of
594    /// (Unicode) characters. These bytes can be represented by printable
595    /// ASCII characters (that is, U+0020 to U+007E), both plain or through
596    /// a simple escape, or by a decimal escape.
597    ///
598    /// This method returns such an octet or an error if the symbol doesn’t
599    /// have value representing an octet. Note that it will succeed for an
600    /// ASCII space character U+0020 which may be used as a word separator
601    /// in some cases.
602    pub fn into_octet(self) -> Result<u8, BadSymbol> {
603        match self {
604            Symbol::Char(ch) => {
605                if ch.is_ascii() && ch >= '\u{20}' && ch <= '\u{7E}' {
606                    Ok(ch as u8)
607                } else {
608                    Err(BadSymbol(BadSymbolEnum::NonAscii))
609                }
610            }
611            Symbol::SimpleEscape(ch) | Symbol::DecimalEscape(ch) => Ok(ch),
612        }
613    }
614
615    /// Converts the symbol into an octet if it is printable ASCII.
616    ///
617    /// This is similar to [`into_octet`][Self::into_octet] but returns an
618    /// error when the resulting octet is not a printable ASCII character,
619    /// i.e., an octet of value 0x20 up to and including 0x7E.
620    pub fn into_ascii(self) -> Result<u8, BadSymbol> {
621        match self {
622            Symbol::Char(ch) => {
623                if ch.is_ascii() && ch >= '\u{20}' && ch <= '\u{7E}' {
624                    Ok(ch as u8)
625                } else {
626                    Err(BadSymbol(BadSymbolEnum::NonAscii))
627                }
628            }
629            Symbol::SimpleEscape(ch) | Symbol::DecimalEscape(ch) => {
630                if ch >= 0x20 && ch <= 0x7E {
631                    Ok(ch)
632                } else {
633                    Err(BadSymbol(BadSymbolEnum::NonAscii))
634                }
635            }
636        }
637    }
638
639    /// Converts the symbol into a `char`.
640    ///
641    /// This will fail for a decimal escape sequence which doesn’t actually
642    /// represent a character.
643    pub fn into_char(self) -> Result<char, BadSymbol> {
644        match self {
645            Symbol::Char(ch) => Ok(ch),
646            Symbol::SimpleEscape(ch) if ch >= 0x20 && ch < 0x7F => {
647                Ok(ch.into())
648            }
649            _ => Err(BadSymbol(BadSymbolEnum::NonUtf8)),
650        }
651    }
652
653    /// Converts the symbol representing a digit into its integer value.
654    pub fn into_digit(self, base: u32) -> Result<u32, BadSymbol> {
655        if let Symbol::Char(ch) = self {
656            match ch.to_digit(base) {
657                Some(ch) => Ok(ch),
658                None => Err(BadSymbol(BadSymbolEnum::NonDigit)),
659            }
660        } else {
661            Err(BadSymbol(BadSymbolEnum::Escape))
662        }
663    }
664
665    /// Returns whether the symbol can occur as part of a word.
666    ///
667    /// This is true for all symbols other than unescaped ASCII space and
668    /// horizontal tabs, opening and closing parentheses, semicolon, and
669    /// double quote.
670    #[must_use]
671    pub fn is_word_char(self) -> bool {
672        match self {
673            Symbol::Char(ch) => {
674                ch != ' '
675                    && ch != '\t'
676                    && ch != '\n'
677                    && ch != '('
678                    && ch != ')'
679                    && ch != ';'
680                    && ch != '"'
681            }
682            _ => true,
683        }
684    }
685}
686
687//--- From
688
689impl From<char> for Symbol {
690    fn from(ch: char) -> Symbol {
691        Symbol::Char(ch)
692    }
693}
694
695//--- Display
696
697impl fmt::Display for Symbol {
698    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
699        match *self {
700            Symbol::Char(ch) => write!(f, "{}", ch),
701            Symbol::SimpleEscape(ch) => write!(f, "\\{}", ch),
702            Symbol::DecimalEscape(ch) => write!(f, "\\{:03}", ch),
703        }
704    }
705}
706
707//------------ EntrySymbol ---------------------------------------------------
708
709/// The symbols encountered in the remainder of an entry.
710///
711/// This can either be a regular symbol or the end of a token.
712#[derive(Clone, Copy, Debug, Eq, PartialEq)]
713pub enum EntrySymbol {
714    /// A regular in-token symbol.
715    Symbol(Symbol),
716
717    /// The end of a token.
718    EndOfToken,
719}
720
721//--- From
722
723impl From<Symbol> for EntrySymbol {
724    fn from(symbol: Symbol) -> Self {
725        EntrySymbol::Symbol(symbol)
726    }
727}
728
729//------------ Symbols -------------------------------------------------------
730
731/// An iterator over the symbols in a char sequence.
732///
733/// The iterator stops if a character cannot be converted into symbols. You
734/// can check if that happened via the [`ok`][Self::ok] method.
735#[derive(Clone, Debug)]
736pub struct Symbols<Chars> {
737    /// The chars of the sequence.
738    ///
739    /// This is an option so we can fuse the iterator on error.
740    chars: Result<Chars, SymbolCharsError>,
741}
742
743impl<Chars> Symbols<Chars> {
744    /// Creates a new symbols iterator atop a char iterator.
745    pub fn new(chars: Chars) -> Self {
746        Symbols { chars: Ok(chars) }
747    }
748
749    /// Checks whether there was an error converting symbols.
750    pub fn ok(self) -> Result<(), SymbolCharsError> {
751        self.chars.map(|_| ())
752    }
753
754    pub fn with<F, T, E>(chars: Chars, op: F) -> Result<T, E>
755    where
756        F: FnOnce(&mut Self) -> Result<T, E>,
757        E: From<SymbolCharsError>,
758    {
759        let mut symbols = Self::new(chars);
760        let res = op(&mut symbols)?;
761        symbols.ok()?;
762        Ok(res)
763    }
764}
765
766impl<Chars: Iterator<Item = char>> Iterator for Symbols<Chars> {
767    type Item = Symbol;
768
769    fn next(&mut self) -> Option<Self::Item> {
770        self.chars = {
771            let chars = match self.chars.as_mut() {
772                Ok(chars) => chars,
773                Err(_) => return None,
774            };
775            match Symbol::from_chars(chars) {
776                Ok(res) => return res,
777                Err(err) => Err(err),
778            }
779        };
780        None
781    }
782}
783
784//------------ IterScanner ---------------------------------------------------
785
786/// A simple scanner atop an iterator of strings.
787///
788/// The type is generic over the iterator as well as the octets sequence to
789/// use for returned data. The types associated octets builder is used to
790/// create values.
791pub struct IterScanner<Iter: Iterator, Octets> {
792    /// The source of tokens of the scanner.
793    iter: Peekable<Iter>,
794
795    /// The marker for the output octets sequence type.
796    marker: PhantomData<Octets>,
797}
798
799impl<Iter: Iterator, Octets> IterScanner<Iter, Octets> {
800    /// Creates a new scanner from an iterator.
801    pub fn new<I: IntoIterator<IntoIter = Iter>>(iter: I) -> Self {
802        IterScanner {
803            iter: iter.into_iter().peekable(),
804            marker: PhantomData,
805        }
806    }
807
808    /// Returns whether the iterator is exhausted.
809    pub fn is_exhausted(&mut self) -> bool {
810        self.iter.peek().is_none()
811    }
812}
813
814impl<Iter, Item, Octets> Scanner for IterScanner<Iter, Octets>
815where
816    Item: AsRef<str>,
817    Iter: Iterator<Item = Item>,
818    Octets: FromBuilder,
819    <Octets as FromBuilder>::Builder: EmptyBuilder + Composer,
820{
821    type Octets = Octets;
822    type OctetsBuilder = <Octets as FromBuilder>::Builder;
823    type Dname = Dname<Octets>;
824    type Error = StrError;
825
826    fn has_space(&self) -> bool {
827        false
828    }
829
830    fn continues(&mut self) -> bool {
831        self.iter.peek().is_some()
832    }
833
834    fn scan_symbols<F>(&mut self, mut op: F) -> Result<(), Self::Error>
835    where
836        F: FnMut(Symbol) -> Result<(), Self::Error>,
837    {
838        let token = match self.iter.next() {
839            Some(token) => token,
840            None => return Err(StrError::end_of_entry()),
841        };
842        for sym in Symbols::new(token.as_ref().chars()) {
843            op(sym)?;
844        }
845        Ok(())
846    }
847
848    fn scan_entry_symbols<F>(&mut self, mut op: F) -> Result<(), Self::Error>
849    where
850        F: FnMut(EntrySymbol) -> Result<(), Self::Error>,
851    {
852        for token in &mut self.iter {
853            for sym in Symbols::new(token.as_ref().chars()) {
854                op(sym.into())?;
855            }
856            op(EntrySymbol::EndOfToken)?;
857        }
858        Ok(())
859    }
860
861    fn convert_token<C: ConvertSymbols<Symbol, Self::Error>>(
862        &mut self,
863        mut convert: C,
864    ) -> Result<Self::Octets, Self::Error> {
865        let token = match self.iter.next() {
866            Some(token) => token,
867            None => return Err(StrError::end_of_entry()),
868        };
869        let mut res = <Octets as FromBuilder>::Builder::empty();
870
871        for sym in Symbols::new(token.as_ref().chars()) {
872            if let Some(data) = convert.process_symbol(sym)? {
873                res.append_slice(data).map_err(Into::into)?;
874            }
875        }
876
877        if let Some(data) = convert.process_tail()? {
878            res.append_slice(data).map_err(Into::into)?;
879        }
880
881        Ok(<Octets as FromBuilder>::from_builder(res))
882    }
883
884    fn convert_entry<C: ConvertSymbols<EntrySymbol, Self::Error>>(
885        &mut self,
886        mut convert: C,
887    ) -> Result<Self::Octets, Self::Error> {
888        let mut res = <Octets as FromBuilder>::Builder::empty();
889        for token in &mut self.iter {
890            for sym in Symbols::new(token.as_ref().chars()) {
891                if let Some(data) = convert.process_symbol(sym.into())? {
892                    res.append_slice(data).map_err(Into::into)?;
893                }
894            }
895        }
896        if let Some(data) = convert.process_tail()? {
897            res.append_slice(data).map_err(Into::into)?;
898        }
899        Ok(<Octets as FromBuilder>::from_builder(res))
900    }
901
902    fn scan_octets(&mut self) -> Result<Self::Octets, Self::Error> {
903        let token = match self.iter.next() {
904            Some(token) => token,
905            None => return Err(StrError::end_of_entry()),
906        };
907        let mut res = <Octets as FromBuilder>::Builder::empty();
908        for sym in Symbols::new(token.as_ref().chars()) {
909            match sym.into_octet() {
910                Ok(ch) => res.append_slice(&[ch]).map_err(Into::into)?,
911                Err(_) => return Err(StrError::custom("bad symbol")),
912            }
913        }
914        Ok(<Octets as FromBuilder>::from_builder(res))
915    }
916
917    fn scan_ascii_str<F, T>(&mut self, op: F) -> Result<T, Self::Error>
918    where
919        F: FnOnce(&str) -> Result<T, Self::Error>,
920    {
921        let res = self.scan_string()?;
922        if res.is_ascii() {
923            op(&res)
924        } else {
925            Err(StrError::custom("non-ASCII characters"))
926        }
927    }
928
929    fn scan_dname(&mut self) -> Result<Self::Dname, Self::Error> {
930        let token = match self.iter.next() {
931            Some(token) => token,
932            None => return Err(StrError::end_of_entry()),
933        };
934        Dname::from_symbols(Symbols::new(token.as_ref().chars()))
935            .map_err(|_| StrError::custom("invalid domain name"))
936    }
937
938    fn scan_charstr(&mut self) -> Result<CharStr<Self::Octets>, Self::Error> {
939        let token = match self.iter.next() {
940            Some(token) => token,
941            None => return Err(StrError::end_of_entry()),
942        };
943        let mut res =
944            CharStrBuilder::<<Octets as FromBuilder>::Builder>::new();
945        for sym in Symbols::new(token.as_ref().chars()) {
946            match sym.into_octet() {
947                Ok(ch) => res.append_slice(&[ch])?,
948                Err(_) => return Err(StrError::custom("bad symbol")),
949            }
950        }
951        Ok(res.finish())
952    }
953
954    fn scan_string(&mut self) -> Result<Str<Self::Octets>, Self::Error> {
955        let token = match self.iter.next() {
956            Some(token) => token,
957            None => return Err(StrError::end_of_entry()),
958        };
959        let mut res = <Octets as FromBuilder>::Builder::empty();
960        let mut buf = [0u8; 4];
961        for sym in Symbols::new(token.as_ref().chars()) {
962            match sym.into_char() {
963                Ok(ch) => res
964                    .append_slice(ch.encode_utf8(&mut buf).as_bytes())
965                    .map_err(Into::into)?,
966                Err(_) => return Err(StrError::custom("bad symbol")),
967            }
968        }
969        Ok(Str::from_utf8(<Octets as FromBuilder>::from_builder(res))
970            .unwrap())
971    }
972
973    fn scan_charstr_entry(&mut self) -> Result<Self::Octets, Self::Error> {
974        // XXX This implementation is probably a bit too lazy.
975        let mut res = <Octets as FromBuilder>::Builder::empty();
976        while self.iter.peek().is_some() {
977            self.scan_charstr()?.compose(&mut res).map_err(Into::into)?;
978        }
979        Ok(<Octets as FromBuilder>::from_builder(res))
980    }
981
982    fn scan_opt_unknown_marker(&mut self) -> Result<bool, Self::Error> {
983        match self.iter.peek() {
984            Some(token) if token.as_ref() == "\\#" => Ok(true),
985            _ => Ok(false),
986        }
987    }
988
989    fn octets_builder(&mut self) -> Result<Self::OctetsBuilder, Self::Error> {
990        Ok(<Octets as FromBuilder>::Builder::empty())
991    }
992}
993
994//============ Error Types ===================================================
995
996//------------ SymbolCharsError ----------------------------------------------
997
998/// An error happened when reading a symbol.
999#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1000pub struct SymbolCharsError(pub(super) SymbolCharsEnum);
1001
1002#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1003pub(super) enum SymbolCharsEnum {
1004    /// An illegal escape sequence was encountered.
1005    BadEscape,
1006
1007    /// Unexpected end of input.
1008    ///
1009    /// This can only happen in a decimal escape sequence.
1010    ShortInput,
1011}
1012
1013impl SymbolCharsError {
1014    /// Returns a static description of the error.
1015    #[must_use]
1016    pub fn as_str(self) -> &'static str {
1017        match self.0 {
1018            SymbolCharsEnum::BadEscape => "illegale escape sequence",
1019            SymbolCharsEnum::ShortInput => "unexpected end of input",
1020        }
1021    }
1022}
1023
1024//--- Display and Error
1025
1026impl fmt::Display for SymbolCharsError {
1027    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1028        f.write_str(self.as_str())
1029    }
1030}
1031
1032#[cfg(feature = "std")]
1033impl std::error::Error for SymbolCharsError {}
1034
1035//------------ SymbolOctetsError ---------------------------------------------
1036
1037/// An error happened when reading a symbol.
1038#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1039pub struct SymbolOctetsError(SymbolOctetsEnum);
1040
1041#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1042enum SymbolOctetsEnum {
1043    /// An illegal UTF-8 sequence was encountered.
1044    BadUtf8,
1045
1046    /// An illegal escape sequence was encountered.
1047    BadEscape,
1048
1049    /// Unexpected end of input.
1050    ///
1051    /// This can only happen in a decimal escape sequence.
1052    ShortInput,
1053}
1054
1055impl SymbolOctetsError {
1056    #[must_use]
1057    pub fn as_str(self) -> &'static str {
1058        match self.0 {
1059            SymbolOctetsEnum::BadUtf8 => "illegal UTF-8 sequence",
1060            SymbolOctetsEnum::BadEscape => "illegal escape sequence",
1061            SymbolOctetsEnum::ShortInput => "unexpected end of data",
1062        }
1063    }
1064}
1065
1066//--- Display and Error
1067
1068impl fmt::Display for SymbolOctetsError {
1069    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1070        f.write_str(self.as_str())
1071    }
1072}
1073
1074#[cfg(feature = "std")]
1075impl std::error::Error for SymbolOctetsError {}
1076
1077//------------ BadSymbol -----------------------------------------------------
1078
1079/// A symbol with an unexpected value was encountered.
1080#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1081pub struct BadSymbol(BadSymbolEnum);
1082
1083#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1084enum BadSymbolEnum {
1085    /// A non-ASCII character was encountered.
1086    NonAscii,
1087
1088    /// A non-UTF8 character was encountered.
1089    NonUtf8,
1090
1091    /// A non-digit character was encountered.
1092    NonDigit,
1093
1094    /// An unexpected escape sequence was encountered.
1095    Escape,
1096}
1097
1098impl BadSymbol {
1099    /// Returns a static description of the error.
1100    #[must_use]
1101    pub fn as_str(self) -> &'static str {
1102        match self.0 {
1103            BadSymbolEnum::NonAscii => "non-ASCII symbol",
1104            BadSymbolEnum::NonUtf8 => "invalid UTF-8 sequence",
1105            BadSymbolEnum::NonDigit => "expected digit",
1106            BadSymbolEnum::Escape => "unexpected escape sequence",
1107        }
1108    }
1109}
1110
1111//--- Display and Error
1112
1113impl fmt::Display for BadSymbol {
1114    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1115        f.write_str(self.as_str())
1116    }
1117}
1118
1119#[cfg(feature = "std")]
1120impl std::error::Error for BadSymbol {}
1121
1122#[cfg(feature = "std")]
1123impl From<BadSymbol> for std::io::Error {
1124    fn from(err: BadSymbol) -> Self {
1125        std::io::Error::new(std::io::ErrorKind::Other, err)
1126    }
1127}
1128
1129//------------ StrError ------------------------------------------------------
1130
1131/// A simple scanner error that just wraps a static str.
1132#[derive(Debug)]
1133pub struct StrError(&'static str);
1134
1135impl ScannerError for StrError {
1136    fn custom(msg: &'static str) -> Self {
1137        StrError(msg)
1138    }
1139
1140    fn end_of_entry() -> Self {
1141        Self::custom("unexpected end of entry")
1142    }
1143
1144    fn short_buf() -> Self {
1145        Self::custom("short buffer")
1146    }
1147
1148    fn trailing_tokens() -> Self {
1149        Self::custom("trailing data")
1150    }
1151}
1152
1153impl From<ShortBuf> for StrError {
1154    fn from(_: ShortBuf) -> Self {
1155        Self::short_buf()
1156    }
1157}
1158
1159impl fmt::Display for StrError {
1160    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1161        f.write_str(self.0)
1162    }
1163}
1164
1165#[cfg(feature = "std")]
1166impl std::error::Error for StrError {}
1167
1168//============ Testing =======================================================
1169
1170#[cfg(test)]
1171#[cfg(feature = "std")]
1172mod test {
1173    use super::*;
1174
1175    #[test]
1176    fn symbol_from_slice_index() {
1177        let mut buf = [0u8; 4];
1178        for ch in '\0'..char::MAX {
1179            if ch == '\\' {
1180                continue;
1181            }
1182            let slice = ch.encode_utf8(&mut buf).as_bytes();
1183            assert_eq!(
1184                Symbol::from_slice_index(slice, 0),
1185                Ok(Some((Symbol::Char(ch), ch.len_utf8()))),
1186                "char '{}'",
1187                ch,
1188            );
1189        }
1190
1191        for ch in '0'..'\x7f' {
1192            if ch.is_ascii_digit() {
1193                continue;
1194            }
1195            assert_eq!(
1196                Symbol::from_slice_index(format!("\\{}", ch).as_bytes(), 0),
1197                Ok(Some((Symbol::SimpleEscape(ch as u8), 2))),
1198                "sequence \"\\{}\"",
1199                ch
1200            );
1201        }
1202
1203        for ch in 0..256 {
1204            assert_eq!(
1205                Symbol::from_slice_index(
1206                    format!("\\{:03}", ch).as_bytes(),
1207                    0
1208                ),
1209                Ok(Some((Symbol::DecimalEscape(ch as u8), 4))),
1210                "sequence \"\\{:03}\"",
1211                ch
1212            );
1213        }
1214    }
1215}