pem_rfc7468/
decoder.rs

1//! Decoder for PEM encapsulated data.
2//!
3//! From RFC 7468 Section 2:
4//!
5//! > Textual encoding begins with a line comprising "-----BEGIN ", a
6//! > label, and "-----", and ends with a line comprising "-----END ", a
7//! > label, and "-----".  Between these lines, or "encapsulation
8//! > boundaries", are base64-encoded data according to Section 4 of
9//! > [RFC 4648].
10//!
11//! [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648
12
13use crate::{
14    grammar, Base64Decoder, Error, Result, BASE64_WRAP_WIDTH, POST_ENCAPSULATION_BOUNDARY,
15    PRE_ENCAPSULATION_BOUNDARY,
16};
17use core::str;
18
19#[cfg(feature = "alloc")]
20use alloc::vec::Vec;
21
22#[cfg(feature = "std")]
23use std::io;
24
25/// Decode a PEM document according to RFC 7468's "Strict" grammar.
26///
27/// On success, writes the decoded document into the provided buffer, returning
28/// the decoded label and the portion of the provided buffer containing the
29/// decoded message.
30pub fn decode<'i, 'o>(pem: &'i [u8], buf: &'o mut [u8]) -> Result<(&'i str, &'o [u8])> {
31    let mut decoder = Decoder::new(pem).map_err(|e| check_for_headers(pem, e))?;
32    let type_label = decoder.type_label();
33    let buf = buf
34        .get_mut(..decoder.remaining_len())
35        .ok_or(Error::Length)?;
36    let decoded = decoder.decode(buf).map_err(|e| check_for_headers(pem, e))?;
37
38    if decoder.base64.is_finished() {
39        Ok((type_label, decoded))
40    } else {
41        Err(Error::Length)
42    }
43}
44
45/// Decode a PEM document according to RFC 7468's "Strict" grammar, returning
46/// the result as a [`Vec`] upon success.
47#[cfg(feature = "alloc")]
48#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
49pub fn decode_vec(pem: &[u8]) -> Result<(&str, Vec<u8>)> {
50    let mut decoder = Decoder::new(pem).map_err(|e| check_for_headers(pem, e))?;
51    let type_label = decoder.type_label();
52    let mut buf = Vec::new();
53    decoder
54        .decode_to_end(&mut buf)
55        .map_err(|e| check_for_headers(pem, e))?;
56    Ok((type_label, buf))
57}
58
59/// Decode the encapsulation boundaries of a PEM document according to RFC 7468's "Strict" grammar.
60///
61/// On success, returning the decoded label.
62pub fn decode_label(pem: &[u8]) -> Result<&str> {
63    Ok(Encapsulation::try_from(pem)?.label())
64}
65
66/// Buffered PEM decoder.
67///
68/// Stateful buffered decoder type which decodes an input PEM document according
69/// to RFC 7468's "Strict" grammar.
70#[derive(Clone)]
71pub struct Decoder<'i> {
72    /// PEM type label.
73    type_label: &'i str,
74
75    /// Buffered Base64 decoder.
76    base64: Base64Decoder<'i>,
77}
78
79impl<'i> Decoder<'i> {
80    /// Create a new PEM [`Decoder`] with the default options.
81    ///
82    /// Uses the default 64-character line wrapping.
83    pub fn new(pem: &'i [u8]) -> Result<Self> {
84        Self::new_wrapped(pem, BASE64_WRAP_WIDTH)
85    }
86
87    /// Create a new PEM [`Decoder`] which wraps at the given line width.
88    pub fn new_wrapped(pem: &'i [u8], line_width: usize) -> Result<Self> {
89        let encapsulation = Encapsulation::try_from(pem)?;
90        let type_label = encapsulation.label();
91        let base64 = Base64Decoder::new_wrapped(encapsulation.encapsulated_text, line_width)?;
92        Ok(Self { type_label, base64 })
93    }
94
95    /// Get the PEM type label for the input document.
96    pub fn type_label(&self) -> &'i str {
97        self.type_label
98    }
99
100    /// Decode data into the provided output buffer.
101    ///
102    /// There must be at least as much remaining Base64 input to be decoded
103    /// in order to completely fill `buf`.
104    pub fn decode<'o>(&mut self, buf: &'o mut [u8]) -> Result<&'o [u8]> {
105        Ok(self.base64.decode(buf)?)
106    }
107
108    /// Decode all of the remaining data in the input buffer into `buf`.
109    #[cfg(feature = "alloc")]
110    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
111    pub fn decode_to_end<'o>(&mut self, buf: &'o mut Vec<u8>) -> Result<&'o [u8]> {
112        Ok(self.base64.decode_to_end(buf)?)
113    }
114
115    /// Get the decoded length of the remaining PEM data after Base64 decoding.
116    pub fn remaining_len(&self) -> usize {
117        self.base64.remaining_len()
118    }
119
120    /// Are we finished decoding the PEM input?
121    pub fn is_finished(&self) -> bool {
122        self.base64.is_finished()
123    }
124}
125
126impl<'i> From<Decoder<'i>> for Base64Decoder<'i> {
127    fn from(decoder: Decoder<'i>) -> Base64Decoder<'i> {
128        decoder.base64
129    }
130}
131
132#[cfg(feature = "std")]
133#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
134impl<'i> io::Read for Decoder<'i> {
135    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
136        self.base64.read(buf)
137    }
138
139    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
140        self.base64.read_to_end(buf)
141    }
142
143    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
144        self.base64.read_exact(buf)
145    }
146}
147
148/// PEM encapsulation parser.
149///
150/// This parser performs an initial pass over the data, locating the
151/// pre-encapsulation (`---BEGIN [...]---`) and post-encapsulation
152/// (`---END [...]`) boundaries while attempting to avoid branching
153/// on the potentially secret Base64-encoded data encapsulated between
154/// the two boundaries.
155///
156/// It only supports a single encapsulated message at present. Future work
157/// could potentially include extending it provide an iterator over a series
158/// of encapsulated messages.
159#[derive(Copy, Clone, Debug)]
160struct Encapsulation<'a> {
161    /// Type label extracted from the pre/post-encapsulation boundaries.
162    ///
163    /// From RFC 7468 Section 2:
164    ///
165    /// > The type of data encoded is labeled depending on the type label in
166    /// > the "-----BEGIN " line (pre-encapsulation boundary).  For example,
167    /// > the line may be "-----BEGIN CERTIFICATE-----" to indicate that the
168    /// > content is a PKIX certificate (see further below).  Generators MUST
169    /// > put the same label on the "-----END " line (post-encapsulation
170    /// > boundary) as the corresponding "-----BEGIN " line.  Labels are
171    /// > formally case-sensitive, uppercase, and comprised of zero or more
172    /// > characters; they do not contain consecutive spaces or hyphen-minuses,
173    /// > nor do they contain spaces or hyphen-minuses at either end.  Parsers
174    /// > MAY disregard the label in the post-encapsulation boundary instead of
175    /// > signaling an error if there is a label mismatch: some extant
176    /// > implementations require the labels to match; others do not.
177    label: &'a str,
178
179    /// Encapsulated text portion contained between the boundaries.
180    ///
181    /// This data should be encoded as Base64, however this type performs no
182    /// validation of it so it can be handled in constant-time.
183    encapsulated_text: &'a [u8],
184}
185
186impl<'a> Encapsulation<'a> {
187    /// Parse the type label and encapsulated text from between the
188    /// pre/post-encapsulation boundaries.
189    pub fn parse(data: &'a [u8]) -> Result<Self> {
190        // Strip the "preamble": optional text occurring before the pre-encapsulation boundary
191        let data = grammar::strip_preamble(data)?;
192
193        // Parse pre-encapsulation boundary (including label)
194        let data = data
195            .strip_prefix(PRE_ENCAPSULATION_BOUNDARY)
196            .ok_or(Error::PreEncapsulationBoundary)?;
197
198        let (label, body) = grammar::split_label(data).ok_or(Error::Label)?;
199
200        let mut body = match grammar::strip_trailing_eol(body).unwrap_or(body) {
201            [head @ .., b'-', b'-', b'-', b'-', b'-'] => head,
202            _ => return Err(Error::PreEncapsulationBoundary),
203        };
204
205        // Ensure body ends with a properly labeled post-encapsulation boundary
206        for &slice in [POST_ENCAPSULATION_BOUNDARY, label.as_bytes()].iter().rev() {
207            // Ensure the input ends with the post encapsulation boundary as
208            // well as a matching label
209            if !body.ends_with(slice) {
210                return Err(Error::PostEncapsulationBoundary);
211            }
212
213            let len = body.len().checked_sub(slice.len()).ok_or(Error::Length)?;
214            body = body.get(..len).ok_or(Error::PostEncapsulationBoundary)?;
215        }
216
217        let encapsulated_text =
218            grammar::strip_trailing_eol(body).ok_or(Error::PostEncapsulationBoundary)?;
219
220        Ok(Self {
221            label,
222            encapsulated_text,
223        })
224    }
225
226    /// Get the label parsed from the encapsulation boundaries.
227    pub fn label(self) -> &'a str {
228        self.label
229    }
230}
231
232impl<'a> TryFrom<&'a [u8]> for Encapsulation<'a> {
233    type Error = Error;
234
235    fn try_from(bytes: &'a [u8]) -> Result<Self> {
236        Self::parse(bytes)
237    }
238}
239
240/// Check for PEM headers in the input, as they are disallowed by RFC7468.
241///
242/// Returns `Error::HeaderDisallowed` if headers are encountered.
243fn check_for_headers(pem: &[u8], err: Error) -> Error {
244    if err == Error::Base64(base64ct::Error::InvalidEncoding)
245        && pem.iter().any(|&b| b == grammar::CHAR_COLON)
246    {
247        Error::HeaderDisallowed
248    } else {
249        err
250    }
251}
252
253#[cfg(test)]
254mod tests {
255    use super::Encapsulation;
256
257    #[test]
258    fn pkcs8_example() {
259        let pem = include_bytes!("../tests/examples/pkcs8.pem");
260        let encapsulation = Encapsulation::parse(pem).unwrap();
261        assert_eq!(encapsulation.label, "PRIVATE KEY");
262
263        assert_eq!(
264            encapsulation.encapsulated_text,
265            &[
266                77, 67, 52, 67, 65, 81, 65, 119, 66, 81, 89, 68, 75, 50, 86, 119, 66, 67, 73, 69,
267                73, 66, 102, 116, 110, 72, 80, 112, 50, 50, 83, 101, 119, 89, 109, 109, 69, 111,
268                77, 99, 88, 56, 86, 119, 73, 52, 73, 72, 119, 97, 113, 100, 43, 57, 76, 70, 80,
269                106, 47, 49, 53, 101, 113, 70
270            ]
271        );
272    }
273}