domain/utils/
base16.rs

1//! Decoding and encoding of Base 16 a.k.a. hex digits.
2//!
3//! The Base 16 encoding is defined in [RFC 4648]. It really is just a normal
4//! hex-encoding using the (case-insensitive) letters ‘A’ to ‘F’ as
5//! additional values for the digits.
6//!
7//! The module defines the type [`Decoder`] which keeps the state necessary
8//! for decoding. The various functions offered use such a decoder to decode
9//! and encode octets in various forms.
10//!
11//! [RFC 4648]: https://tools.ietf.org/html/rfc4648
12
13use crate::base::scan::{ConvertSymbols, EntrySymbol, ScannerError};
14use core::fmt;
15use octseq::builder::{
16    EmptyBuilder, FreezeBuilder, FromBuilder, OctetsBuilder,
17};
18#[cfg(feature = "std")]
19use std::string::String;
20
21//------------ Re-exports ----------------------------------------------------
22
23pub use super::base64::DecodeError;
24
25//------------ Convenience Functions -----------------------------------------
26
27/// Decodes a string with Base 16 encoded data.
28///
29/// The function attempts to decode the entire string and returns the result
30/// as an `Octets` value.
31pub fn decode<Octets>(s: &str) -> Result<Octets, DecodeError>
32where
33    Octets: FromBuilder,
34    <Octets as FromBuilder>::Builder: OctetsBuilder + EmptyBuilder,
35{
36    let mut decoder = Decoder::<<Octets as FromBuilder>::Builder>::new();
37    for ch in s.chars() {
38        decoder.push(ch)?;
39    }
40    decoder.finalize()
41}
42
43/// Decodes a string with Base 16 data and returns it as a vec.
44#[cfg(feature = "std")]
45pub fn decode_vec(s: &str) -> Result<std::vec::Vec<u8>, DecodeError> {
46    decode(s)
47}
48
49/// Encodes binary data in Base 16 and writes it into a format stream.
50///
51/// This function is intended to be used in implementations of formatting
52/// traits:
53///
54/// ```
55/// use core::fmt;
56/// use domain::utils::base16;
57///
58/// struct Foo<'a>(&'a [u8]);
59///
60/// impl<'a> fmt::Display for Foo<'a> {
61///     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
62///         base16::display(&self.0, f)
63///     }
64/// }
65/// ```
66pub fn display<Octets, Target>(octets: &Octets, f: &mut Target) -> fmt::Result
67where
68    Octets: AsRef<[u8]> + ?Sized,
69    Target: fmt::Write,
70{
71    for &octet in octets.as_ref() {
72        f.write_str(ENCODE_ALPHABET[usize::from(octet)])?;
73    }
74    Ok(())
75}
76
77/// Encodes binary data in Base 16 and returns the encoded data as a string.
78#[cfg(feature = "std")]
79pub fn encode_string<B: AsRef<[u8]> + ?Sized>(bytes: &B) -> String {
80    let mut res = String::with_capacity(bytes.as_ref().len() * 2);
81    display(bytes, &mut res).unwrap();
82    res
83}
84
85/// Returns a placeholder value that implements `Display` for encoded data.
86pub fn encode_display<Octets: AsRef<[u8]> + ?Sized>(
87    octets: &Octets,
88) -> impl fmt::Display + '_ {
89    struct Display<'a>(&'a [u8]);
90
91    impl<'a> fmt::Display for Display<'a> {
92        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
93            display(self.0, f)
94        }
95    }
96
97    Display(octets.as_ref())
98}
99
100/// Serialize and deserialize octets Base 16 encoded or binary.
101///
102/// This module can be used with Serde’s `with` attribute. It will serialize
103/// an octets sequence as a Base 16 encoded string with human readable
104/// serializers or as a raw octets sequence for compact serializers.
105#[cfg(feature = "serde")]
106pub mod serde {
107    use core::fmt;
108    use octseq::builder::{EmptyBuilder, FromBuilder, OctetsBuilder};
109    use octseq::serde::{DeserializeOctets, SerializeOctets};
110
111    pub fn serialize<Octets, S>(
112        octets: &Octets,
113        serializer: S,
114    ) -> Result<S::Ok, S::Error>
115    where
116        Octets: AsRef<[u8]> + SerializeOctets,
117        S: serde::Serializer,
118    {
119        if serializer.is_human_readable() {
120            serializer.collect_str(&super::encode_display(octets))
121        } else {
122            octets.serialize_octets(serializer)
123        }
124    }
125
126    pub fn deserialize<'de, Octets, D: serde::Deserializer<'de>>(
127        deserializer: D,
128    ) -> Result<Octets, D::Error>
129    where
130        Octets: FromBuilder + DeserializeOctets<'de>,
131        <Octets as FromBuilder>::Builder: EmptyBuilder,
132    {
133        struct Visitor<'de, Octets: DeserializeOctets<'de>>(Octets::Visitor);
134
135        impl<'de, Octets> serde::de::Visitor<'de> for Visitor<'de, Octets>
136        where
137            Octets: FromBuilder + DeserializeOctets<'de>,
138            <Octets as FromBuilder>::Builder: OctetsBuilder + EmptyBuilder,
139        {
140            type Value = Octets;
141
142            fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
143                f.write_str("an Base16-encoded string")
144            }
145
146            fn visit_str<E: serde::de::Error>(
147                self,
148                v: &str,
149            ) -> Result<Self::Value, E> {
150                super::decode(v).map_err(E::custom)
151            }
152
153            fn visit_borrowed_bytes<E: serde::de::Error>(
154                self,
155                value: &'de [u8],
156            ) -> Result<Self::Value, E> {
157                self.0.visit_borrowed_bytes(value)
158            }
159
160            #[cfg(feature = "std")]
161            fn visit_byte_buf<E: serde::de::Error>(
162                self,
163                value: std::vec::Vec<u8>,
164            ) -> Result<Self::Value, E> {
165                self.0.visit_byte_buf(value)
166            }
167        }
168
169        if deserializer.is_human_readable() {
170            deserializer.deserialize_str(Visitor(Octets::visitor()))
171        } else {
172            Octets::deserialize_with_visitor(
173                deserializer,
174                Visitor(Octets::visitor()),
175            )
176        }
177    }
178}
179
180//------------ Decoder -------------------------------------------------------
181
182/// A Base 16 decoder.
183///
184/// This type keeps all the state for decoding a sequence of characters
185/// representing data encoded in Base 16. Upon success, the decoder returns
186/// the decoded data.
187pub struct Decoder<Builder> {
188    /// A buffer for the first half of an octet.
189    buf: Option<u8>,
190
191    /// The target or an error if something went wrong.
192    target: Result<Builder, DecodeError>,
193}
194
195impl<Builder: EmptyBuilder> Decoder<Builder> {
196    /// Creates a new, empty decoder using the *base32hex* variant.
197    #[must_use]
198    pub fn new() -> Self {
199        Decoder {
200            buf: None,
201            target: Ok(Builder::empty()),
202        }
203    }
204}
205
206impl<Builder: OctetsBuilder> Decoder<Builder> {
207    /// Finalizes decoding and returns the decoded data.
208    pub fn finalize(self) -> Result<Builder::Octets, DecodeError>
209    where
210        Builder: FreezeBuilder,
211    {
212        if self.buf.is_some() {
213            return Err(DecodeError::ShortInput);
214        }
215
216        self.target.map(FreezeBuilder::freeze)
217    }
218
219    /// Decodes one more character of data.
220    ///
221    /// Returns an error as soon as the encoded data is determined to be
222    /// illegal. It is okay to push more data after the first error. The
223    /// method will just keep returning errors.
224    pub fn push(&mut self, ch: char) -> Result<(), DecodeError> {
225        let value = match ch.to_digit(16) {
226            Some(value) => value as u8,
227            None => {
228                self.target = Err(DecodeError::IllegalChar(ch));
229                return Err(DecodeError::IllegalChar(ch));
230            }
231        };
232        if let Some(upper) = self.buf.take() {
233            self.append(upper | value);
234        } else {
235            self.buf = Some(value << 4)
236        }
237        match self.target {
238            Ok(_) => Ok(()),
239            Err(err) => Err(err),
240        }
241    }
242
243    /// Appends a decoded octet to the target.
244    fn append(&mut self, value: u8) {
245        let target = match self.target.as_mut() {
246            Ok(target) => target,
247            Err(_) => return,
248        };
249        if let Err(err) = target.append_slice(&[value]) {
250            self.target = Err(err.into().into());
251        }
252    }
253}
254
255impl<Builder: EmptyBuilder> Default for Decoder<Builder> {
256    fn default() -> Self {
257        Self::new()
258    }
259}
260
261//------------ SymbolConverter -----------------------------------------------
262
263/// A Base 16 decoder that can be used as a converter for a scanner.
264#[derive(Clone, Debug, Default)]
265pub struct SymbolConverter {
266    /// A buffer for the returned data.
267    buf: [u8; 1],
268
269    /// Do we already have the upper half in `buf`?
270    pending: bool,
271}
272
273impl SymbolConverter {
274    /// Creates a new symbol converter.
275    #[must_use]
276    pub fn new() -> Self {
277        Default::default()
278    }
279}
280
281impl<Sym, Error> ConvertSymbols<Sym, Error> for SymbolConverter
282where
283    Sym: Into<EntrySymbol>,
284    Error: ScannerError,
285{
286    fn process_symbol(
287        &mut self,
288        symbol: Sym,
289    ) -> Result<Option<&[u8]>, Error> {
290        match symbol.into() {
291            EntrySymbol::Symbol(symbol) => {
292                let symbol = symbol
293                    .into_char()
294                    .map_err(|_| Error::custom("expected hex digits"))?
295                    .to_digit(16)
296                    .ok_or_else(|| Error::custom("expected hex digits"))?;
297
298                if self.pending {
299                    self.buf[0] |= symbol as u8;
300                    self.pending = false;
301                    Ok(Some(&self.buf))
302                } else {
303                    self.buf[0] = (symbol << 4) as u8;
304                    self.pending = true;
305                    Ok(None)
306                }
307            }
308            EntrySymbol::EndOfToken => Ok(None),
309        }
310    }
311
312    /// Process the end of token.
313    ///
314    /// The method may return data to be added to the output octets sequence.
315    fn process_tail(&mut self) -> Result<Option<&[u8]>, Error> {
316        if self.pending {
317            Err(Error::custom("uneven number of hex digits"))
318        } else {
319            Ok(None)
320        }
321    }
322}
323
324//------------ Constants -----------------------------------------------------
325
326/// The alphabet used for encoding.
327///
328/// We have to have this because `char::from_digit` prefers lower case letters
329/// while the RFC prefers upper case.
330const ENCODE_ALPHABET: [&str; 256] = [
331    "00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "0A", "0B",
332    "0C", "0D", "0E", "0F", "10", "11", "12", "13", "14", "15", "16", "17",
333    "18", "19", "1A", "1B", "1C", "1D", "1E", "1F", "20", "21", "22", "23",
334    "24", "25", "26", "27", "28", "29", "2A", "2B", "2C", "2D", "2E", "2F",
335    "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "3A", "3B",
336    "3C", "3D", "3E", "3F", "40", "41", "42", "43", "44", "45", "46", "47",
337    "48", "49", "4A", "4B", "4C", "4D", "4E", "4F", "50", "51", "52", "53",
338    "54", "55", "56", "57", "58", "59", "5A", "5B", "5C", "5D", "5E", "5F",
339    "60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "6A", "6B",
340    "6C", "6D", "6E", "6F", "70", "71", "72", "73", "74", "75", "76", "77",
341    "78", "79", "7A", "7B", "7C", "7D", "7E", "7F", "80", "81", "82", "83",
342    "84", "85", "86", "87", "88", "89", "8A", "8B", "8C", "8D", "8E", "8F",
343    "90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "9A", "9B",
344    "9C", "9D", "9E", "9F", "A0", "A1", "A2", "A3", "A4", "A5", "A6", "A7",
345    "A8", "A9", "AA", "AB", "AC", "AD", "AE", "AF", "B0", "B1", "B2", "B3",
346    "B4", "B5", "B6", "B7", "B8", "B9", "BA", "BB", "BC", "BD", "BE", "BF",
347    "C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9", "CA", "CB",
348    "CC", "CD", "CE", "CF", "D0", "D1", "D2", "D3", "D4", "D5", "D6", "D7",
349    "D8", "D9", "DA", "DB", "DC", "DD", "DE", "DF", "E0", "E1", "E2", "E3",
350    "E4", "E5", "E6", "E7", "E8", "E9", "EA", "EB", "EC", "ED", "EE", "EF",
351    "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "FA", "FB",
352    "FC", "FD", "FE", "FF",
353];
354
355//============ Test ==========================================================
356
357#[cfg(test)]
358#[cfg(feature = "std")]
359mod test {
360    use super::*;
361    use std::string::String;
362
363    #[test]
364    #[cfg(feature = "bytes")]
365    fn decode_str() {
366        use super::DecodeError;
367
368        fn decode(s: &str) -> Result<std::vec::Vec<u8>, DecodeError> {
369            super::decode(s)
370        }
371
372        assert_eq!(&decode("").unwrap(), b"");
373        assert_eq!(&decode("F0").unwrap(), b"\xF0");
374        assert_eq!(&decode("F00f").unwrap(), b"\xF0\x0F");
375    }
376
377    #[test]
378    fn test_display() {
379        fn fmt(s: &[u8]) -> String {
380            let mut out = String::new();
381            display(s, &mut out).unwrap();
382            out
383        }
384
385        assert_eq!(fmt(b""), "");
386        assert_eq!(fmt(b"\xf0"), "F0");
387        assert_eq!(fmt(b"\xf0\x0f"), "F00F");
388    }
389}