domain/utils/
base32.rs

1//! Decoding and encoding of base 32.
2//!
3//! The base 32 encoding is defined in [RFC 4648]. It is essentially a
4//! case-insensitive version of [base64][super::base64] which is necessary
5//! when encoding binary data in domain names. The RFC defines two separate
6//! encodings, called *base32* and *base32hex*. The DNS uses the latter
7//! version, particularly in [NSEC3], for encoding binary data in domain
8//! names, because it has the property that the encoding maintains the order
9//! of the original data.
10//!
11//! This module currently only implements *base32hex* but is prepared for
12//! adding the other option by using the prefix `_hex` wherever distinction
13//! is necessary.
14//!
15//! The module defines the type [`Decoder`] which keeps the state necessary
16//! for decoding. The various functions offered use such a decoder to decode
17//! and encode octets in various forms.
18//!
19//! [RFC 4648]: https://tools.ietf.org/html/rfc4648
20//! [NSEC3]: ../../rdata/rfc5155/index.html
21
22use crate::base::scan::{ConvertSymbols, EntrySymbol, ScannerError};
23use core::fmt;
24use octseq::builder::{
25    EmptyBuilder, FreezeBuilder, FromBuilder, OctetsBuilder,
26};
27#[cfg(feature = "std")]
28use std::string::String;
29
30//------------ Re-exports ----------------------------------------------------
31
32pub use super::base64::DecodeError;
33
34//------------ Convenience Functions -----------------------------------------
35
36/// Decodes a string with *base32hex* encoded data.
37///
38/// The function attempts to decode the entire string and returns the result
39/// as an `Octets` value.
40pub fn decode_hex<Octets>(s: &str) -> Result<Octets, DecodeError>
41where
42    Octets: FromBuilder,
43    <Octets as FromBuilder>::Builder: OctetsBuilder + EmptyBuilder,
44{
45    let mut decoder = Decoder::<<Octets as FromBuilder>::Builder>::new_hex();
46    for ch in s.chars() {
47        decoder.push(ch)?;
48    }
49    decoder.finalize()
50}
51
52/// Encodes binary data in *base32hex* and writes it into a format stream.
53///
54/// This function is intended to be used in implementations of formatting
55/// traits:
56///
57/// ```
58/// use core::fmt;
59/// use domain::utils::base32;
60///
61/// struct Foo<'a>(&'a [u8]);
62///
63/// impl<'a> fmt::Display for Foo<'a> {
64///     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
65///         base32::display_hex(&self.0, f)
66///     }
67/// }
68/// ```
69pub fn display_hex<B, W>(bytes: &B, f: &mut W) -> fmt::Result
70where
71    B: AsRef<[u8]> + ?Sized,
72    W: fmt::Write,
73{
74    fn ch(i: u8) -> char {
75        ENCODE_HEX_ALPHABET[i as usize]
76    }
77
78    for chunk in bytes.as_ref().chunks(5) {
79        f.write_char(ch(chunk[0] >> 3))?; // 0
80        if chunk.len() == 1 {
81            f.write_char(ch((chunk[0] & 0x07) << 2))?; // 1
82            break;
83        }
84        f.write_char(ch(((chunk[0] & 0x07) << 2) | (chunk[1] >> 6)))?; // 1
85        f.write_char(ch((chunk[1] & 0x3F) >> 1))?; // 2
86        if chunk.len() == 2 {
87            f.write_char(ch((chunk[1] & 0x01) << 4))?; // 3
88            break;
89        }
90        f.write_char(ch(((chunk[1] & 0x01) << 4) | (chunk[2] >> 4)))?; // 3
91        if chunk.len() == 3 {
92            f.write_char(ch((chunk[2] & 0x0F) << 1))?; // 4
93            break;
94        }
95        f.write_char(ch(((chunk[2] & 0x0F) << 1) | (chunk[3] >> 7)))?; // 4
96        f.write_char(ch((chunk[3] & 0x7F) >> 2))?; // 5
97        if chunk.len() == 4 {
98            f.write_char(ch((chunk[3] & 0x03) << 3))?; // 6
99            break;
100        }
101        f.write_char(ch(((chunk[3] & 0x03) << 3) | (chunk[4] >> 5)))?; // 6
102        f.write_char(ch(chunk[4] & 0x1F))?; // 7
103    }
104    Ok(())
105}
106
107/// Encodes binary data in *base32hex* and returns the encoded data as a string.
108#[cfg(feature = "std")]
109pub fn encode_string_hex<B: AsRef<[u8]> + ?Sized>(bytes: &B) -> String {
110    let mut res = String::with_capacity((bytes.as_ref().len() / 5 + 1) * 8);
111    display_hex(bytes, &mut res).unwrap();
112    res
113}
114
115/// Returns a placeholder value that implements `Display` for encoded data.
116pub fn encode_display_hex<Octets: AsRef<[u8]>>(
117    octets: &Octets,
118) -> impl fmt::Display + '_ {
119    struct Display<'a>(&'a [u8]);
120
121    impl fmt::Display for Display<'_> {
122        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
123            display_hex(self.0, f)
124        }
125    }
126
127    Display(octets.as_ref())
128}
129
130/// Serialize and deserialize octets Base64 encoded or binary.
131///
132/// This module can be used with Serde’s `with` attribute. It will serialize
133/// an octets sequence as a Base64 encoded string with human readable
134/// serializers or as a raw octets sequence for compact serializers.
135#[cfg(feature = "serde")]
136pub mod serde {
137    use core::fmt;
138    use octseq::builder::{EmptyBuilder, FromBuilder, OctetsBuilder};
139    use octseq::serde::{DeserializeOctets, SerializeOctets};
140
141    pub fn serialize<Octets, S>(
142        octets: &Octets,
143        serializer: S,
144    ) -> Result<S::Ok, S::Error>
145    where
146        Octets: AsRef<[u8]> + SerializeOctets,
147        S: serde::Serializer,
148    {
149        if serializer.is_human_readable() {
150            serializer.collect_str(&super::encode_display_hex(octets))
151        } else {
152            octets.serialize_octets(serializer)
153        }
154    }
155
156    pub fn deserialize<'de, Octets, D: serde::Deserializer<'de>>(
157        deserializer: D,
158    ) -> Result<Octets, D::Error>
159    where
160        Octets: FromBuilder + DeserializeOctets<'de>,
161        <Octets as FromBuilder>::Builder: EmptyBuilder,
162    {
163        struct Visitor<'de, Octets: DeserializeOctets<'de>>(Octets::Visitor);
164
165        impl<'de, Octets> serde::de::Visitor<'de> for Visitor<'de, Octets>
166        where
167            Octets: FromBuilder + DeserializeOctets<'de>,
168            <Octets as FromBuilder>::Builder: OctetsBuilder + EmptyBuilder,
169        {
170            type Value = Octets;
171
172            fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
173                f.write_str("an Base32-encoded string")
174            }
175
176            fn visit_str<E: serde::de::Error>(
177                self,
178                v: &str,
179            ) -> Result<Self::Value, E> {
180                super::decode_hex(v).map_err(E::custom)
181            }
182
183            fn visit_borrowed_bytes<E: serde::de::Error>(
184                self,
185                value: &'de [u8],
186            ) -> Result<Self::Value, E> {
187                self.0.visit_borrowed_bytes(value)
188            }
189
190            #[cfg(feature = "std")]
191            fn visit_byte_buf<E: serde::de::Error>(
192                self,
193                value: std::vec::Vec<u8>,
194            ) -> Result<Self::Value, E> {
195                self.0.visit_byte_buf(value)
196            }
197        }
198
199        if deserializer.is_human_readable() {
200            deserializer.deserialize_str(Visitor(Octets::visitor()))
201        } else {
202            Octets::deserialize_with_visitor(
203                deserializer,
204                Visitor(Octets::visitor()),
205            )
206        }
207    }
208}
209
210//------------ Decoder -------------------------------------------------------
211
212/// A base 32 decoder.
213///
214/// This type keeps all the state for decoding a sequence of characters
215/// representing data encoded in base 32. Upon success, the decoder returns
216/// the decoded data.
217///
218/// # Limitations
219///
220/// The decoder does not support padding.
221pub struct Decoder<Builder> {
222    /// The alphabet we are using.
223    alphabet: &'static [u8; 128],
224
225    /// A buffer for up to eight characters.
226    ///
227    /// We only keep `u8`s here because only ASCII characters are used by
228    /// Base32.
229    buf: [u8; 8],
230
231    /// The index in `buf` where we place the next character.
232    next: usize,
233
234    /// The target or an error if something went wrong.
235    target: Result<Builder, DecodeError>,
236}
237
238impl<Builder: EmptyBuilder> Decoder<Builder> {
239    /// Creates a new, empty decoder using the *base32hex* variant.
240    #[must_use]
241    pub fn new_hex() -> Self {
242        Decoder {
243            alphabet: &DECODE_HEX_ALPHABET,
244            buf: [0; 8],
245            next: 0,
246            target: Ok(Builder::empty()),
247        }
248    }
249}
250
251impl<Builder: OctetsBuilder> Decoder<Builder> {
252    /// Finalizes decoding and returns the decoded data.
253    #[allow(clippy::question_mark)] // false positive
254    pub fn finalize(mut self) -> Result<Builder::Octets, DecodeError>
255    where
256        Builder: FreezeBuilder,
257    {
258        if let Err(err) = self.target {
259            return Err(err);
260        }
261
262        match self.next {
263            0 => {}
264            1 | 3 | 6 => return Err(DecodeError::ShortInput),
265            2 => {
266                self.octet_0();
267            }
268            4 => {
269                self.octet_0();
270                self.octet_1();
271            }
272            5 => {
273                self.octet_0();
274                self.octet_1();
275                self.octet_2();
276            }
277            7 => {
278                self.octet_0();
279                self.octet_1();
280                self.octet_2();
281                self.octet_3();
282            }
283            _ => unreachable!(),
284        }
285        self.target.map(FreezeBuilder::freeze)
286    }
287
288    /// Decodes one more character of data.
289    ///
290    /// Returns an error as soon as the encoded data is determined to be
291    /// illegal. It is okay to push more data after the first error. The
292    /// method will just keep returning errors.
293    pub fn push(&mut self, ch: char) -> Result<(), DecodeError> {
294        if ch > (127 as char) {
295            self.target = Err(DecodeError::IllegalChar(ch));
296            return Err(DecodeError::IllegalChar(ch));
297        }
298        let val = self.alphabet[ch as usize];
299        if val == 0xFF {
300            self.target = Err(DecodeError::IllegalChar(ch));
301            return Err(DecodeError::IllegalChar(ch));
302        }
303        self.buf[self.next] = val;
304        self.next += 1;
305
306        if self.next == 8 {
307            self.octet_0();
308            self.octet_1();
309            self.octet_2();
310            self.octet_3();
311            self.octet_4();
312            self.next = 0;
313        }
314        match self.target {
315            Ok(_) => Ok(()),
316            Err(err) => Err(err),
317        }
318    }
319
320    /// Decodes the zeroth octet in a base 32 sequence.
321    fn octet_0(&mut self) {
322        let ch = (self.buf[0] << 3) | (self.buf[1] >> 2);
323        self.append(ch)
324    }
325
326    /// Decodes the first octet in a base 32 sequence.
327    fn octet_1(&mut self) {
328        let ch = (self.buf[1] << 6) | (self.buf[2] << 1) | (self.buf[3] >> 4);
329        self.append(ch)
330    }
331
332    /// Decodes the second octet in a base 32 sequence.
333    fn octet_2(&mut self) {
334        let ch = (self.buf[3] << 4) | (self.buf[4] >> 1);
335        self.append(ch)
336    }
337
338    /// Decodes the third octet in a base 32 sequence.
339    fn octet_3(&mut self) {
340        let ch = (self.buf[4] << 7) | (self.buf[5] << 2) | (self.buf[6] >> 3);
341        self.append(ch)
342    }
343
344    /// Decodes the forth octet in a base 32 sequence.
345    fn octet_4(&mut self) {
346        let ch = (self.buf[6] << 5) | self.buf[7];
347        self.append(ch)
348    }
349
350    /// Appends a decoded octet to the target.
351    fn append(&mut self, value: u8) {
352        let target = match self.target.as_mut() {
353            Ok(target) => target,
354            Err(_) => return,
355        };
356        if let Err(err) = target.append_slice(&[value]) {
357            self.target = Err(err.into().into());
358        }
359    }
360}
361
362//------------ SymbolConverter -----------------------------------------------
363
364/// A Base 32 decoder that can be used as a converter with a scanner.
365#[derive(Clone, Debug)]
366pub struct SymbolConverter {
367    /// The alphabet we are using.
368    alphabet: &'static [u8; 128],
369
370    /// A buffer for up to eight input characters.
371    ///
372    /// We only keep `u8`s here because only ASCII characters are used by
373    /// Base64.
374    input: [u8; 8],
375
376    /// The index in `input` where we place the next character.
377    ///
378    /// We also abuse this to mark when we are done (because there was
379    /// padding, in which case we set it to 0xF0).
380    next: usize,
381
382    /// A buffer to return a slice for the output.
383    output: [u8; 5],
384}
385
386impl Default for SymbolConverter {
387    fn default() -> Self {
388        SymbolConverter {
389            alphabet: &DECODE_HEX_ALPHABET,
390            input: [0; 8],
391            next: 0,
392            output: Default::default(),
393        }
394    }
395}
396
397impl SymbolConverter {
398    /// Creates a new symbol converter.
399    #[must_use]
400    pub fn new() -> Self {
401        Default::default()
402    }
403
404    fn process_char<Error: ScannerError>(
405        &mut self,
406        ch: char,
407    ) -> Result<Option<&[u8]>, Error> {
408        if ch > (127 as char) {
409            return Err(Error::custom("illegal Base 32 data"));
410        }
411        let val = self.alphabet[ch as usize];
412        if val == 0xFF {
413            return Err(Error::custom("illegal Base 32 data"));
414        }
415        self.input[self.next] = val;
416        self.next += 1;
417
418        if self.next == 8 {
419            self.output = [
420                (self.input[0] << 3) | (self.input[1] >> 2),
421                (self.input[1] << 6)
422                    | (self.input[2] << 1)
423                    | (self.input[3] >> 4),
424                (self.input[3] << 4) | (self.input[4] >> 1),
425                (self.input[4] << 7)
426                    | (self.input[5] << 2)
427                    | (self.input[6] >> 3),
428                (self.input[6] << 5) | self.input[7],
429            ];
430            self.next = 0;
431            Ok(Some(&self.output))
432        } else {
433            Ok(None)
434        }
435    }
436}
437
438impl<Sym, Error> ConvertSymbols<Sym, Error> for SymbolConverter
439where
440    Sym: Into<EntrySymbol>,
441    Error: ScannerError,
442{
443    fn process_symbol(
444        &mut self,
445        symbol: Sym,
446    ) -> Result<Option<&[u8]>, Error> {
447        match symbol.into() {
448            EntrySymbol::Symbol(symbol) => self.process_char(
449                symbol
450                    .into_char()
451                    .map_err(|_| Error::custom("illegal Base 32 data"))?,
452            ),
453            EntrySymbol::EndOfToken => Ok(None),
454        }
455    }
456
457    /// Process the end of token.
458    ///
459    /// The method may return data to be added to the output octets sequence.
460    fn process_tail(&mut self) -> Result<Option<&[u8]>, Error> {
461        match self.next {
462            0 => return Ok(None),
463            1 | 3 | 6 => return Err(Error::custom("short Base 32 input")),
464            _ => {}
465        }
466        self.output[0] = (self.input[0] << 3) | (self.input[1] >> 2);
467        if self.next == 2 {
468            return Ok(Some(&self.output[0..1]));
469        }
470        self.output[1] = (self.input[1] << 6)
471            | (self.input[2] << 1)
472            | (self.input[3] >> 4);
473        if self.next == 4 {
474            return Ok(Some(&self.output[0..2]));
475        }
476        self.output[2] = (self.input[3] << 4) | (self.input[4] >> 1);
477        if self.next == 5 {
478            return Ok(Some(&self.output[0..3]));
479        }
480        self.output[3] = (self.input[4] << 7)
481            | (self.input[5] << 2)
482            | (self.input[6] >> 3);
483        Ok(Some(&self.output[0..4]))
484    }
485}
486
487//------------ Constants -----------------------------------------------------
488
489/// The alphabet used for decoding *base32hex.*
490///
491/// This maps encoding characters into their values. A value of 0xFF stands in
492/// for illegal characters. We only provide the first 128 characters since the
493/// alphabet will only use ASCII characters.
494const DECODE_HEX_ALPHABET: [u8; 128] = [
495    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x00 .. 0x07
496    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x08 .. 0x0F
497    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x10 .. 0x17
498    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x18 .. 0x1F
499    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x20 .. 0x27
500    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x28 .. 0x2F
501    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // 0x30 .. 0x37
502    0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x38 .. 0x3F
503    0xFF, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, // 0x40 .. 0x47
504    0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, // 0x48 .. 0x4F
505    0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0xFF, // 0x50 .. 0x57
506    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x58 .. 0x5F
507    0xFF, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, // 0x60 .. 0x67
508    0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, // 0x68 .. 0x6F
509    0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0xFF, // 0x70 .. 0x77
510    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x78 .. 0x7F
511];
512
513/// The alphabet used for encoding *base32hex.*
514const ENCODE_HEX_ALPHABET: [char; 32] = [
515    '0', '1', '2', '3', '4', '5', '6', '7', // 0x00 .. 0x07
516    '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', // 0x08 .. 0x0F
517    'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', // 0x10 .. 0x17
518    'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', // 0x18 .. 0x1F
519];
520
521//============ Test ==========================================================
522
523#[cfg(test)]
524#[cfg(feature = "std")]
525mod test {
526    use super::*;
527
528    #[test]
529    #[cfg(feature = "bytes")]
530    fn decode_str_hex() {
531        use super::DecodeError;
532
533        fn decode_hex(s: &str) -> Result<std::vec::Vec<u8>, DecodeError> {
534            super::decode_hex(s)
535        }
536
537        assert_eq!(&decode_hex("").unwrap(), b"");
538        assert_eq!(&decode_hex("CO").unwrap(), b"f");
539        assert_eq!(&decode_hex("CPNG").unwrap(), b"fo");
540        assert_eq!(&decode_hex("CPNMU").unwrap(), b"foo");
541        assert_eq!(&decode_hex("CPNMUOG").unwrap(), b"foob");
542        assert_eq!(&decode_hex("CPNMUOJ1").unwrap(), b"fooba");
543        assert_eq!(&decode_hex("CPNMUOJ1E8").unwrap(), b"foobar");
544        assert_eq!(&decode_hex("co").unwrap(), b"f");
545        assert_eq!(&decode_hex("cpng").unwrap(), b"fo");
546        assert_eq!(&decode_hex("cpnmu").unwrap(), b"foo");
547        assert_eq!(&decode_hex("cpnmuog").unwrap(), b"foob");
548        assert_eq!(&decode_hex("cpnmuoj1").unwrap(), b"fooba");
549        assert_eq!(&decode_hex("cpnmuoj1e8").unwrap(), b"foobar");
550    }
551
552    #[test]
553    fn test_display_hex() {
554        fn fmt(s: &[u8]) -> String {
555            let mut out = String::new();
556            display_hex(s, &mut out).unwrap();
557            out
558        }
559
560        assert_eq!(fmt(b""), "");
561        assert_eq!(fmt(b"f"), "CO");
562        assert_eq!(fmt(b"fo"), "CPNG");
563        assert_eq!(fmt(b"foo"), "CPNMU");
564        assert_eq!(fmt(b"foob"), "CPNMUOG");
565        assert_eq!(fmt(b"fooba"), "CPNMUOJ1");
566        assert_eq!(fmt(b"foobar"), "CPNMUOJ1E8");
567    }
568}