domain/utils/
base32.rs

1//! Decoding and encoding of base 32.
2//!
3//! The base 32 encoding is defined in [RFC 4648]. It is essentially a
4//! case-insensitive version of [base64][super::base64] which is necessary
5//! when encoding binary data in domain names. The RFC defines two separate
6//! encodings, called *base32* and *base32hex*. The DNS uses the latter
7//! version, particularly in [NSEC3], for encoding binary data in domain
8//! names, because it has the property that the encoding maintains the order
9//! of the original data.
10//!
11//! This module currently only implements *base32hex* but is prepared for
12//! adding the other option by using the prefix `_hex` wherever distinction
13//! is necessary.
14//!
15//! The module defines the type [`Decoder`] which keeps the state necessary
16//! for decoding. The various functions offered use such a decoder to decode
17//! and encode octets in various forms.
18//!
19//! [RFC 4648]: https://tools.ietf.org/html/rfc4648
20//! [NSEC3]: ../../rdata/rfc5155/index.html
21//! [`Decoder`]: struct.Decoder.html
22
23use crate::base::scan::{ConvertSymbols, EntrySymbol, ScannerError};
24use core::fmt;
25use octseq::builder::{
26    EmptyBuilder, FreezeBuilder, FromBuilder, OctetsBuilder,
27};
28#[cfg(feature = "std")]
29use std::string::String;
30
31//------------ Re-exports ----------------------------------------------------
32
33pub use super::base64::DecodeError;
34
35//------------ Convenience Functions -----------------------------------------
36
37/// Decodes a string with *base32hex* encoded data.
38///
39/// The function attempts to decode the entire string and returns the result
40/// as an `Octets` value.
41pub fn decode_hex<Octets>(s: &str) -> Result<Octets, DecodeError>
42where
43    Octets: FromBuilder,
44    <Octets as FromBuilder>::Builder: OctetsBuilder + EmptyBuilder,
45{
46    let mut decoder = Decoder::<<Octets as FromBuilder>::Builder>::new_hex();
47    for ch in s.chars() {
48        decoder.push(ch)?;
49    }
50    decoder.finalize()
51}
52
53/// Encodes binary data in *base32hex* and writes it into a format stream.
54///
55/// This function is intended to be used in implementations of formatting
56/// traits:
57///
58/// ```
59/// use core::fmt;
60/// use domain::utils::base32;
61///
62/// struct Foo<'a>(&'a [u8]);
63///
64/// impl<'a> fmt::Display for Foo<'a> {
65///     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
66///         base32::display_hex(&self.0, f)
67///     }
68/// }
69/// ```
70pub fn display_hex<B, W>(bytes: &B, f: &mut W) -> fmt::Result
71where
72    B: AsRef<[u8]> + ?Sized,
73    W: fmt::Write,
74{
75    fn ch(i: u8) -> char {
76        ENCODE_HEX_ALPHABET[i as usize]
77    }
78
79    for chunk in bytes.as_ref().chunks(5) {
80        f.write_char(ch(chunk[0] >> 3))?; // 0
81        if chunk.len() == 1 {
82            f.write_char(ch((chunk[0] & 0x07) << 2))?; // 1
83            break;
84        }
85        f.write_char(ch((chunk[0] & 0x07) << 2 | chunk[1] >> 6))?; // 1
86        f.write_char(ch((chunk[1] & 0x3F) >> 1))?; // 2
87        if chunk.len() == 2 {
88            f.write_char(ch((chunk[1] & 0x01) << 4))?; // 3
89            break;
90        }
91        f.write_char(ch((chunk[1] & 0x01) << 4 | chunk[2] >> 4))?; // 3
92        if chunk.len() == 3 {
93            f.write_char(ch((chunk[2] & 0x0F) << 1))?; // 4
94            break;
95        }
96        f.write_char(ch((chunk[2] & 0x0F) << 1 | chunk[3] >> 7))?; // 4
97        f.write_char(ch((chunk[3] & 0x7F) >> 2))?; // 5
98        if chunk.len() == 4 {
99            f.write_char(ch((chunk[3] & 0x03) << 3))?; // 6
100            break;
101        }
102        f.write_char(ch((chunk[3] & 0x03) << 3 | chunk[4] >> 5))?; // 6
103        f.write_char(ch(chunk[4] & 0x1F))?; // 7
104    }
105    Ok(())
106}
107
108/// Encodes binary data in *base32hex* and returns the encoded data as a string.
109#[cfg(feature = "std")]
110pub fn encode_string_hex<B: AsRef<[u8]> + ?Sized>(bytes: &B) -> String {
111    let mut res = String::with_capacity((bytes.as_ref().len() / 5 + 1) * 8);
112    display_hex(bytes, &mut res).unwrap();
113    res
114}
115
116/// Returns a placeholder value that implements `Display` for encoded data.
117pub fn encode_display_hex<Octets: AsRef<[u8]>>(
118    octets: &Octets,
119) -> impl fmt::Display + '_ {
120    struct Display<'a>(&'a [u8]);
121
122    impl<'a> fmt::Display for Display<'a> {
123        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
124            display_hex(self.0, f)
125        }
126    }
127
128    Display(octets.as_ref())
129}
130
131/// Serialize and deserialize octets Base64 encoded or binary.
132///
133/// This module can be used with Serde’s `with` attribute. It will serialize
134/// an octets sequence as a Base64 encoded string with human readable
135/// serializers or as a raw octets sequence for compact serializers.
136#[cfg(feature = "serde")]
137pub mod serde {
138    use core::fmt;
139    use octseq::builder::{EmptyBuilder, FromBuilder, OctetsBuilder};
140    use octseq::serde::{DeserializeOctets, SerializeOctets};
141
142    pub fn serialize<Octets, S>(
143        octets: &Octets,
144        serializer: S,
145    ) -> Result<S::Ok, S::Error>
146    where
147        Octets: AsRef<[u8]> + SerializeOctets,
148        S: serde::Serializer,
149    {
150        if serializer.is_human_readable() {
151            serializer.collect_str(&super::encode_display_hex(octets))
152        } else {
153            octets.serialize_octets(serializer)
154        }
155    }
156
157    pub fn deserialize<'de, Octets, D: serde::Deserializer<'de>>(
158        deserializer: D,
159    ) -> Result<Octets, D::Error>
160    where
161        Octets: FromBuilder + DeserializeOctets<'de>,
162        <Octets as FromBuilder>::Builder: EmptyBuilder,
163    {
164        struct Visitor<'de, Octets: DeserializeOctets<'de>>(Octets::Visitor);
165
166        impl<'de, Octets> serde::de::Visitor<'de> for Visitor<'de, Octets>
167        where
168            Octets: FromBuilder + DeserializeOctets<'de>,
169            <Octets as FromBuilder>::Builder: OctetsBuilder + EmptyBuilder,
170        {
171            type Value = Octets;
172
173            fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
174                f.write_str("an Base32-encoded string")
175            }
176
177            fn visit_str<E: serde::de::Error>(
178                self,
179                v: &str,
180            ) -> Result<Self::Value, E> {
181                super::decode_hex(v).map_err(E::custom)
182            }
183
184            fn visit_borrowed_bytes<E: serde::de::Error>(
185                self,
186                value: &'de [u8],
187            ) -> Result<Self::Value, E> {
188                self.0.visit_borrowed_bytes(value)
189            }
190
191            #[cfg(feature = "std")]
192            fn visit_byte_buf<E: serde::de::Error>(
193                self,
194                value: std::vec::Vec<u8>,
195            ) -> Result<Self::Value, E> {
196                self.0.visit_byte_buf(value)
197            }
198        }
199
200        if deserializer.is_human_readable() {
201            deserializer.deserialize_str(Visitor(Octets::visitor()))
202        } else {
203            Octets::deserialize_with_visitor(
204                deserializer,
205                Visitor(Octets::visitor()),
206            )
207        }
208    }
209}
210
211//------------ Decoder -------------------------------------------------------
212
213/// A base 32 decoder.
214///
215/// This type keeps all the state for decoding a sequence of characters
216/// representing data encoded in base 32. Upon success, the decoder returns
217/// the decoded data.
218///
219/// # Limitations
220///
221/// The decoder does not support padding.
222pub struct Decoder<Builder> {
223    /// The alphabet we are using.
224    alphabet: &'static [u8; 128],
225
226    /// A buffer for up to eight characters.
227    ///
228    /// We only keep `u8`s here because only ASCII characters are used by
229    /// Base32.
230    buf: [u8; 8],
231
232    /// The index in `buf` where we place the next character.
233    next: usize,
234
235    /// The target or an error if something went wrong.
236    target: Result<Builder, DecodeError>,
237}
238
239impl<Builder: EmptyBuilder> Decoder<Builder> {
240    /// Creates a new, empty decoder using the *base32hex* variant.
241    #[must_use]
242    pub fn new_hex() -> Self {
243        Decoder {
244            alphabet: &DECODE_HEX_ALPHABET,
245            buf: [0; 8],
246            next: 0,
247            target: Ok(Builder::empty()),
248        }
249    }
250}
251
252impl<Builder: OctetsBuilder> Decoder<Builder> {
253    /// Finalizes decoding and returns the decoded data.
254    #[allow(clippy::question_mark)] // false positive
255    pub fn finalize(mut self) -> Result<Builder::Octets, DecodeError>
256    where
257        Builder: FreezeBuilder,
258    {
259        if let Err(err) = self.target {
260            return Err(err);
261        }
262
263        match self.next {
264            0 => {}
265            1 | 3 | 6 => return Err(DecodeError::ShortInput),
266            2 => {
267                self.octet_0();
268            }
269            4 => {
270                self.octet_0();
271                self.octet_1();
272            }
273            5 => {
274                self.octet_0();
275                self.octet_1();
276                self.octet_2();
277            }
278            7 => {
279                self.octet_0();
280                self.octet_1();
281                self.octet_2();
282                self.octet_3();
283            }
284            _ => unreachable!(),
285        }
286        self.target.map(FreezeBuilder::freeze)
287    }
288
289    /// Decodes one more character of data.
290    ///
291    /// Returns an error as soon as the encoded data is determined to be
292    /// illegal. It is okay to push more data after the first error. The
293    /// method will just keep returning errors.
294    pub fn push(&mut self, ch: char) -> Result<(), DecodeError> {
295        if ch > (127 as char) {
296            self.target = Err(DecodeError::IllegalChar(ch));
297            return Err(DecodeError::IllegalChar(ch));
298        }
299        let val = self.alphabet[ch as usize];
300        if val == 0xFF {
301            self.target = Err(DecodeError::IllegalChar(ch));
302            return Err(DecodeError::IllegalChar(ch));
303        }
304        self.buf[self.next] = val;
305        self.next += 1;
306
307        if self.next == 8 {
308            self.octet_0();
309            self.octet_1();
310            self.octet_2();
311            self.octet_3();
312            self.octet_4();
313            self.next = 0;
314        }
315        match self.target {
316            Ok(_) => Ok(()),
317            Err(err) => Err(err),
318        }
319    }
320
321    /// Decodes the zeroth octet in a base 32 sequence.
322    fn octet_0(&mut self) {
323        let ch = self.buf[0] << 3 | self.buf[1] >> 2;
324        self.append(ch)
325    }
326
327    /// Decodes the first octet in a base 32 sequence.
328    fn octet_1(&mut self) {
329        let ch = self.buf[1] << 6 | self.buf[2] << 1 | self.buf[3] >> 4;
330        self.append(ch)
331    }
332
333    /// Decodes the second octet in a base 32 sequence.
334    fn octet_2(&mut self) {
335        let ch = self.buf[3] << 4 | self.buf[4] >> 1;
336        self.append(ch)
337    }
338
339    /// Decodes the third octet in a base 32 sequence.
340    fn octet_3(&mut self) {
341        let ch = self.buf[4] << 7 | self.buf[5] << 2 | self.buf[6] >> 3;
342        self.append(ch)
343    }
344
345    /// Decodes the forth octet in a base 32 sequence.
346    fn octet_4(&mut self) {
347        let ch = self.buf[6] << 5 | self.buf[7];
348        self.append(ch)
349    }
350
351    /// Appends a decoded octet to the target.
352    fn append(&mut self, value: u8) {
353        let target = match self.target.as_mut() {
354            Ok(target) => target,
355            Err(_) => return,
356        };
357        if let Err(err) = target.append_slice(&[value]) {
358            self.target = Err(err.into().into());
359        }
360    }
361}
362
363//------------ SymbolConverter -----------------------------------------------
364
365/// A Base 32 decoder that can be used as a converter with a scanner.
366#[derive(Clone, Debug)]
367pub struct SymbolConverter {
368    /// The alphabet we are using.
369    alphabet: &'static [u8; 128],
370
371    /// A buffer for up to eight input characters.
372    ///
373    /// We only keep `u8`s here because only ASCII characters are used by
374    /// Base64.
375    input: [u8; 8],
376
377    /// The index in `input` where we place the next character.
378    ///
379    /// We also abuse this to mark when we are done (because there was
380    /// padding, in which case we set it to 0xF0).
381    next: usize,
382
383    /// A buffer to return a slice for the output.
384    output: [u8; 5],
385}
386
387impl Default for SymbolConverter {
388    fn default() -> Self {
389        SymbolConverter {
390            alphabet: &DECODE_HEX_ALPHABET,
391            input: [0; 8],
392            next: 0,
393            output: Default::default(),
394        }
395    }
396}
397
398impl SymbolConverter {
399    /// Creates a new symbol converter.
400    #[must_use]
401    pub fn new() -> Self {
402        Default::default()
403    }
404
405    fn process_char<Error: ScannerError>(
406        &mut self,
407        ch: char,
408    ) -> Result<Option<&[u8]>, Error> {
409        if ch > (127 as char) {
410            return Err(Error::custom("illegal Base 32 data"));
411        }
412        let val = self.alphabet[ch as usize];
413        if val == 0xFF {
414            return Err(Error::custom("illegal Base 32 data"));
415        }
416        self.input[self.next] = val;
417        self.next += 1;
418
419        if self.next == 8 {
420            self.output = [
421                self.input[0] << 3 | self.input[1] >> 2,
422                self.input[1] << 6 | self.input[2] << 1 | self.input[3] >> 4,
423                self.input[3] << 4 | self.input[4] >> 1,
424                self.input[4] << 7 | self.input[5] << 2 | self.input[6] >> 3,
425                self.input[6] << 5 | self.input[7],
426            ];
427            self.next = 0;
428            Ok(Some(&self.output))
429        } else {
430            Ok(None)
431        }
432    }
433}
434
435impl<Sym, Error> ConvertSymbols<Sym, Error> for SymbolConverter
436where
437    Sym: Into<EntrySymbol>,
438    Error: ScannerError,
439{
440    fn process_symbol(
441        &mut self,
442        symbol: Sym,
443    ) -> Result<Option<&[u8]>, Error> {
444        match symbol.into() {
445            EntrySymbol::Symbol(symbol) => self.process_char(
446                symbol
447                    .into_char()
448                    .map_err(|_| Error::custom("illegal Base 32 data"))?,
449            ),
450            EntrySymbol::EndOfToken => Ok(None),
451        }
452    }
453
454    /// Process the end of token.
455    ///
456    /// The method may return data to be added to the output octets sequence.
457    fn process_tail(&mut self) -> Result<Option<&[u8]>, Error> {
458        match self.next {
459            0 => return Ok(None),
460            1 | 3 | 6 => return Err(Error::custom("short Base 32 input")),
461            _ => {}
462        }
463        self.output[0] = self.input[0] << 3 | self.input[1] >> 2;
464        if self.next == 2 {
465            return Ok(Some(&self.output[0..1]));
466        }
467        self.output[1] =
468            self.input[1] << 6 | self.input[2] << 1 | self.input[3] >> 4;
469        if self.next == 4 {
470            return Ok(Some(&self.output[0..2]));
471        }
472        self.output[2] = self.input[3] << 4 | self.input[4] >> 1;
473        if self.next == 5 {
474            return Ok(Some(&self.output[0..3]));
475        }
476        self.output[3] =
477            self.input[4] << 7 | self.input[5] << 2 | self.input[6] >> 3;
478        Ok(Some(&self.output[0..4]))
479    }
480}
481
482//------------ Constants -----------------------------------------------------
483
484/// The alphabet used for decoding *base32hex.*
485///
486/// This maps encoding characters into their values. A value of 0xFF stands in
487/// for illegal characters. We only provide the first 128 characters since the
488/// alphabet will only use ASCII characters.
489const DECODE_HEX_ALPHABET: [u8; 128] = [
490    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x00 .. 0x07
491    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x08 .. 0x0F
492    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x10 .. 0x17
493    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x18 .. 0x1F
494    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x20 .. 0x27
495    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x28 .. 0x2F
496    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // 0x30 .. 0x37
497    0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x38 .. 0x3F
498    0xFF, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, // 0x40 .. 0x47
499    0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, // 0x48 .. 0x4F
500    0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0xFF, // 0x50 .. 0x57
501    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x58 .. 0x5F
502    0xFF, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, // 0x60 .. 0x67
503    0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, // 0x68 .. 0x6F
504    0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0xFF, // 0x70 .. 0x77
505    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x78 .. 0x7F
506];
507
508/// The alphabet used for encoding *base32hex.*
509const ENCODE_HEX_ALPHABET: [char; 32] = [
510    '0', '1', '2', '3', '4', '5', '6', '7', // 0x00 .. 0x07
511    '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', // 0x08 .. 0x0F
512    'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', // 0x10 .. 0x17
513    'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', // 0x18 .. 0x1F
514];
515
516//============ Test ==========================================================
517
518#[cfg(test)]
519#[cfg(feature = "std")]
520mod test {
521    use super::*;
522    use std::string::String;
523
524    #[test]
525    #[cfg(feature = "bytes")]
526    fn decode_str_hex() {
527        use super::DecodeError;
528
529        fn decode_hex(s: &str) -> Result<std::vec::Vec<u8>, DecodeError> {
530            super::decode_hex(s)
531        }
532
533        assert_eq!(&decode_hex("").unwrap(), b"");
534        assert_eq!(&decode_hex("CO").unwrap(), b"f");
535        assert_eq!(&decode_hex("CPNG").unwrap(), b"fo");
536        assert_eq!(&decode_hex("CPNMU").unwrap(), b"foo");
537        assert_eq!(&decode_hex("CPNMUOG").unwrap(), b"foob");
538        assert_eq!(&decode_hex("CPNMUOJ1").unwrap(), b"fooba");
539        assert_eq!(&decode_hex("CPNMUOJ1E8").unwrap(), b"foobar");
540        assert_eq!(&decode_hex("co").unwrap(), b"f");
541        assert_eq!(&decode_hex("cpng").unwrap(), b"fo");
542        assert_eq!(&decode_hex("cpnmu").unwrap(), b"foo");
543        assert_eq!(&decode_hex("cpnmuog").unwrap(), b"foob");
544        assert_eq!(&decode_hex("cpnmuoj1").unwrap(), b"fooba");
545        assert_eq!(&decode_hex("cpnmuoj1e8").unwrap(), b"foobar");
546    }
547
548    #[test]
549    fn test_display_hex() {
550        fn fmt(s: &[u8]) -> String {
551            let mut out = String::new();
552            display_hex(s, &mut out).unwrap();
553            out
554        }
555
556        assert_eq!(fmt(b""), "");
557        assert_eq!(fmt(b"f"), "CO");
558        assert_eq!(fmt(b"fo"), "CPNG");
559        assert_eq!(fmt(b"foo"), "CPNMU");
560        assert_eq!(fmt(b"foob"), "CPNMUOG");
561        assert_eq!(fmt(b"fooba"), "CPNMUOJ1");
562        assert_eq!(fmt(b"foobar"), "CPNMUOJ1E8");
563    }
564}