debugid/
lib.rs

1//! This crate provides types for identifiers of object files, such as executables, dynamic
2//! libraries or debug companion files. The concept originates in Google Breakpad and defines two
3//! types:
4//!
5//!  - [`CodeId`]: Identifies the file containing source code, i.e. the actual library or
6//!    executable. The identifier is platform dependent and implementation defined. Thus, there is
7//!    no canonical representation.
8//!  - [`DebugId`]: Identifies a debug information file, which may or may not use information from
9//!    the Code ID. The contents are also implementation defined, but as opposed to `CodeId`, the
10//!    structure is streamlined across platforms. It is also guaranteed to be 32 bytes in size.
11//!
12//! [`CodeId`]: struct.CodeId.html
13//! [`DebugId`]: struct.DebugId.html
14
15#![warn(missing_docs)]
16
17use std::error;
18use std::fmt;
19use std::fmt::Write;
20use std::str;
21
22use uuid::{Bytes, Uuid};
23
24/// Indicates an error parsing a [`DebugId`](struct.DebugId.html).
25#[derive(Clone, Copy, Debug, Eq, PartialEq)]
26pub struct ParseDebugIdError;
27
28impl error::Error for ParseDebugIdError {}
29
30impl fmt::Display for ParseDebugIdError {
31    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
32        write!(f, "invalid debug identifier")
33    }
34}
35
36#[derive(Clone, Copy, Debug)]
37struct ParseOptions {
38    allow_hyphens: bool,
39    require_appendix: bool,
40    allow_tail: bool,
41}
42
43/// Unique identifier for debug information files and their debug information.
44///
45/// This type is analogous to [`CodeId`], except that it identifies a debug file instead of the
46/// actual library or executable. One some platforms, a `DebugId` is an alias for a `CodeId` but the
47/// exact rules around this are complex. On Windows, the identifiers are completely different and
48/// refer to separate files.
49///
50/// The string representation must be between 33 and 40 characters long and consist of:
51///
52/// 1. 36 character hyphenated hex representation of the UUID field
53/// 2. 1-16 character lowercase hex representation of the u32 appendix
54///
55/// The debug identifier is compatible to Google Breakpad. Use [`DebugId::breakpad`] to get a
56/// breakpad string representation of this debug identifier.
57///
58/// There is one exception to this: for the old PDB 2.0 format the debug identifier consists
59/// of only a 32-bit integer + age resulting in a string representation of between 9 and 16
60/// hex characters.
61///
62/// # Example
63///
64/// ```
65/// # extern crate debugid;
66/// use std::str::FromStr;
67/// use debugid::DebugId;
68///
69/// # fn foo() -> Result<(), ::debugid::ParseDebugIdError> {
70/// let id = DebugId::from_str("dfb8e43a-f242-3d73-a453-aeb6a777ef75-a")?;
71/// assert_eq!("dfb8e43a-f242-3d73-a453-aeb6a777ef75-a".to_string(), id.to_string());
72/// # Ok(())
73/// # }
74///
75/// # fn main() { foo().unwrap() }
76/// ```
77///
78/// # In-memory representation
79///
80/// The in-memory representation takes up 32 bytes and can be directly written to storage
81/// and mapped back into an object reference.
82///
83/// ```
84/// use std::str::FromStr;
85/// use debugid::DebugId;
86///
87/// let debug_id = DebugId::from_str("dfb8e43a-f242-3d73-a453-aeb6a777ef75-a").unwrap();
88///
89/// let slice = &[debug_id];
90/// let ptr = slice.as_ptr() as *const u8;
91/// let len = std::mem::size_of_val(slice);
92/// let buf: &[u8] = unsafe { std::slice::from_raw_parts(ptr, len) };
93///
94/// let mut new_buf: Vec<u8> = Vec::new();
95/// std::io::copy(&mut std::io::Cursor::new(buf), &mut new_buf).unwrap();
96///
97/// let ptr = new_buf.as_ptr() as *const DebugId;
98/// let new_debug_id = unsafe { &*ptr };
99///
100/// assert_eq!(*new_debug_id, debug_id);
101/// ```
102///
103/// As long the bytes were written using the same major version of this crate you will be
104/// able to read it again like this.
105///
106/// [`CodeId`]: struct.CodeId.html
107/// [`DebugId::breakpad`]: struct.DebugId.html#method.breakpad
108// This needs to be backwards compatible also in its exact in-memory byte-layout since this
109// struct is directly mapped from disk in e.g. Symbolic SymCache formats.  The first version
110// of this struct was defined as:
111//
112// ```rust
113// struct DebugId {
114//     uuid: Uuid,
115//     appendix: u32,
116//     _padding: [u8; 12],
117// }
118// ```
119//
120// For this reason the current `typ` byte represents the type of `DebugId` stored in the
121// `Bytes`:
122//
123// - `0u8`: The `bytes` field contains a UUID.
124// - `1u8`: The first 4 bytes of the `bytes` field contain a big-endian u32, the remaining
125//   bytes are 0.
126#[repr(C, packed)]
127#[derive(Default, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)]
128pub struct DebugId {
129    bytes: Bytes,
130    appendix: u32,
131    _padding: [u8; 11],
132    typ: u8,
133}
134
135impl DebugId {
136    /// Constructs an empty debug identifier, containing only zeros.
137    pub fn nil() -> Self {
138        Self::default()
139    }
140
141    /// Constructs a `DebugId` from its `uuid`.
142    pub fn from_uuid(uuid: Uuid) -> Self {
143        Self::from_parts(uuid, 0)
144    }
145
146    /// Constructs a `DebugId` from its `uuid` and `appendix` parts.
147    pub fn from_parts(uuid: Uuid, appendix: u32) -> Self {
148        DebugId {
149            bytes: *uuid.as_bytes(),
150            appendix,
151            typ: 0,
152            _padding: [0; 11],
153        }
154    }
155
156    /// Constructs a `DebugId` from a Microsoft little-endian GUID and age.
157    pub fn from_guid_age(guid: &[u8], age: u32) -> Result<Self, ParseDebugIdError> {
158        if guid.len() != 16 {
159            return Err(ParseDebugIdError);
160        }
161
162        let uuid = Uuid::from_bytes([
163            guid[3], guid[2], guid[1], guid[0], guid[5], guid[4], guid[7], guid[6], guid[8],
164            guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15],
165        ]);
166
167        Ok(DebugId::from_parts(uuid, age))
168    }
169
170    /// Constructs a `DebugId` from a PDB 2.0 timestamp and age.
171    pub fn from_pdb20(timestamp: u32, age: u32) -> Self {
172        // The big-endian byte-order here has to match the one used to read this number in
173        // the DebugId::timestamp method.
174        DebugId {
175            bytes: [
176                (timestamp >> 24) as u8,
177                (timestamp >> 16) as u8,
178                (timestamp >> 8) as u8,
179                timestamp as u8,
180                0u8,
181                0u8,
182                0u8,
183                0u8,
184                0u8,
185                0u8,
186                0u8,
187                0u8,
188                0u8,
189                0u8,
190                0u8,
191                0u8,
192            ],
193            appendix: age,
194            _padding: [0u8; 11],
195            typ: 1u8,
196        }
197    }
198
199    /// Parses a breakpad identifier from a string.
200    pub fn from_breakpad(string: &str) -> Result<Self, ParseDebugIdError> {
201        let options = ParseOptions {
202            allow_hyphens: false,
203            require_appendix: true,
204            allow_tail: false,
205        };
206        Self::parse_str(string, options).ok_or(ParseDebugIdError)
207    }
208
209    /// Returns the UUID part of the code module's debug_identifier.
210    ///
211    /// If this is a debug identifier for the PDB 2.0 format an invalid UUID is returned
212    /// where only the first 4 bytes are filled in and the remainder of the bytes are 0.
213    /// This means the UUID has variant [`uuid::Variant::NCS`] and an unknown version,
214    /// [`Uuid::get_version`] will return `None`, which is not a valid UUID.
215    ///
216    /// This may seem odd however does seem reasonable:
217    ///
218    /// - Every [`DebugId`] can be represented as [`Uuid`] and will still mostly look
219    ///   reasonable e.g. in comparisons etc.
220    /// - The PDB 2.0 format is very old and very unlikely to appear practically.
221    pub fn uuid(&self) -> Uuid {
222        Uuid::from_bytes(self.bytes)
223    }
224
225    /// Returns the appendix part of the code module's debug identifier.
226    ///
227    /// On Windows, this is an incrementing counter to identify the build.
228    /// On all other platforms, this value will always be zero.
229    pub fn appendix(&self) -> u32 {
230        self.appendix
231    }
232
233    /// Returns whether this identifier is nil, i.e. it consists only of zeros.
234    pub fn is_nil(&self) -> bool {
235        self.bytes == [0u8; 16] && self.appendix == 0
236    }
237
238    /// Returns whether this identifier is from the PDB 2.0 format.
239    pub fn is_pdb20(&self) -> bool {
240        self.typ == 1
241    }
242
243    /// Returns a wrapper which when formatted via `fmt::Display` will format a
244    /// a breakpad identifier.
245    pub fn breakpad(&self) -> BreakpadFormat<'_> {
246        BreakpadFormat { inner: self }
247    }
248
249    fn parse_str(string: &str, options: ParseOptions) -> Option<Self> {
250        let is_hyphenated = string.get(8..9) == Some("-");
251        if is_hyphenated && !options.allow_hyphens || !string.is_ascii() {
252            return None;
253        }
254
255        // Can the PDB 2.0 format match?  This can never be true for a valid UUID.
256        let min_len = if is_hyphenated { 10 } else { 9 };
257        let max_len = if is_hyphenated { 17 } else { 16 };
258        if min_len <= string.len() && string.len() <= max_len {
259            let timestamp_str = string.get(..8)?;
260            let timestamp = u32::from_str_radix(timestamp_str, 16).ok()?;
261            let appendix_str = match is_hyphenated {
262                true => string.get(9..)?,
263                false => string.get(8..)?,
264            };
265            let appendix = u32::from_str_radix(appendix_str, 16).ok()?;
266            return Some(Self::from_pdb20(timestamp, appendix));
267        }
268
269        let uuid_len = if is_hyphenated { 36 } else { 32 };
270        let uuid = string.get(..uuid_len)?.parse().ok()?;
271        if !options.require_appendix && string.len() == uuid_len {
272            return Some(Self::from_parts(uuid, 0));
273        }
274
275        let mut appendix_str = &string[uuid_len..];
276        if is_hyphenated ^ appendix_str.starts_with('-') {
277            return None; // Require a hyphen if and only if we're hyphenated.
278        } else if is_hyphenated {
279            appendix_str = &appendix_str[1..]; // Skip the hyphen for parsing.
280        }
281
282        if options.allow_tail && appendix_str.len() > 8 {
283            appendix_str = &appendix_str[..8];
284        }
285
286        // Parse the appendix, which fails on empty strings.
287        let appendix = u32::from_str_radix(appendix_str, 16).ok()?;
288        Some(Self::from_parts(uuid, appendix))
289    }
290
291    /// Returns the PDB 2.0 timestamp.
292    ///
293    /// Only valid if you know this is a PDB 2.0 debug identifier.
294    fn timestamp(&self) -> u32 {
295        u32::from_be_bytes([self.bytes[0], self.bytes[1], self.bytes[2], self.bytes[3]])
296    }
297}
298
299impl fmt::Debug for DebugId {
300    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
301        let uuid = self.uuid();
302        f.debug_struct("DebugId")
303            .field("uuid", &uuid.hyphenated().to_string())
304            .field("appendix", &self.appendix())
305            .finish()
306    }
307}
308
309impl fmt::Display for DebugId {
310    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
311        match self.is_pdb20() {
312            true => {
313                let timestamp = self.timestamp();
314                write!(f, "{:08X}", timestamp)?;
315            }
316            false => {
317                let uuid = self.uuid();
318                uuid.fmt(f)?;
319            }
320        }
321        if self.appendix > 0 {
322            write!(f, "-{:x}", { self.appendix })?;
323        }
324        Ok(())
325    }
326}
327
328impl str::FromStr for DebugId {
329    type Err = ParseDebugIdError;
330
331    fn from_str(string: &str) -> Result<Self, ParseDebugIdError> {
332        let options = ParseOptions {
333            allow_hyphens: true,
334            require_appendix: false,
335            allow_tail: true,
336        };
337        Self::parse_str(string, options).ok_or(ParseDebugIdError)
338    }
339}
340
341impl From<Uuid> for DebugId {
342    fn from(uuid: Uuid) -> Self {
343        DebugId::from_uuid(uuid)
344    }
345}
346
347impl From<(Uuid, u32)> for DebugId {
348    fn from(tuple: (Uuid, u32)) -> Self {
349        let (uuid, appendix) = tuple;
350        DebugId::from_parts(uuid, appendix)
351    }
352}
353
354/// Wrapper around [`DebugId`] for Breakpad formatting.
355///
356/// **Example:**
357///
358/// ```
359/// # extern crate debugid;
360/// use std::str::FromStr;
361/// use debugid::DebugId;
362///
363/// # fn foo() -> Result<(), debugid::ParseDebugIdError> {
364/// let id = DebugId::from_breakpad("DFB8E43AF2423D73A453AEB6A777EF75a")?;
365/// assert_eq!("DFB8E43AF2423D73A453AEB6A777EF75a".to_string(), id.breakpad().to_string());
366/// # Ok(())
367/// # }
368///
369/// # fn main() { foo().unwrap() }
370/// ```
371///
372/// [`DebugId`]: struct.DebugId.html
373#[derive(Debug)]
374pub struct BreakpadFormat<'a> {
375    inner: &'a DebugId,
376}
377
378impl<'a> fmt::Display for BreakpadFormat<'a> {
379    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
380        match self.inner.is_pdb20() {
381            true => {
382                let timestamp = self.inner.timestamp();
383                write!(f, "{:08X}{:x}", timestamp, self.inner.appendix())
384            }
385            false => {
386                let uuid = self.inner.uuid();
387                write!(f, "{:X}{:x}", uuid.simple(), self.inner.appendix())
388            }
389        }
390    }
391}
392
393/// Indicates an error parsing a [`CodeId`](struct.CodeId.html).
394#[derive(Clone, Copy, Debug, Eq, PartialEq)]
395pub struct ParseCodeIdError;
396
397impl error::Error for ParseCodeIdError {}
398
399impl fmt::Display for ParseCodeIdError {
400    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
401        write!(f, "invalid code identifier")
402    }
403}
404
405/// Unique platform-dependent identifier of code files.
406///
407/// This identifier assumes a string representation that depends on the platform and compiler used.
408/// The representation only retains hex characters and canonically stores lower case.
409///
410/// There are the following known formats:
411///
412///  - **MachO UUID**: The unique identifier of a Mach binary, specified in the `LC_UUID` load
413///    command header.
414///  - **GNU Build ID**: Contents of the `.gnu.build-id` note or section contents formatted as
415///    lowercase hex string.
416///  - **PE Timestamp**: Timestamp and size of image values from a Windows PE header. The size of
417///    image value is truncated, so the length of the `CodeId` might not be a multiple of 2.
418#[derive(Clone, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
419pub struct CodeId {
420    inner: String,
421}
422
423impl CodeId {
424    /// Constructs an empty code identifier.
425    pub fn nil() -> Self {
426        Self::default()
427    }
428
429    /// Constructs a `CodeId` from its string representation.
430    pub fn new(mut string: String) -> Self {
431        string.retain(|c| c.is_ascii_hexdigit());
432        string.make_ascii_lowercase();
433        CodeId { inner: string }
434    }
435
436    /// Constructs a `CodeId` from a binary slice.
437    pub fn from_binary(slice: &[u8]) -> Self {
438        let mut string = String::with_capacity(slice.len() * 2);
439
440        for byte in slice {
441            write!(&mut string, "{:02x}", byte).expect("");
442        }
443
444        Self::new(string)
445    }
446
447    /// Returns whether this identifier is nil, i.e. it is empty.
448    pub fn is_nil(&self) -> bool {
449        self.inner.is_empty()
450    }
451
452    /// Returns the string representation of this code identifier.
453    pub fn as_str(&self) -> &str {
454        self.inner.as_str()
455    }
456}
457
458impl fmt::Display for CodeId {
459    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
460        f.write_str(&self.inner)
461    }
462}
463
464impl fmt::Debug for CodeId {
465    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
466        write!(f, "CodeId({})", self)
467    }
468}
469
470impl From<String> for CodeId {
471    fn from(string: String) -> Self {
472        Self::new(string)
473    }
474}
475
476impl From<&'_ str> for CodeId {
477    fn from(string: &str) -> Self {
478        Self::new(string.into())
479    }
480}
481
482impl AsRef<str> for CodeId {
483    fn as_ref(&self) -> &str {
484        self.as_str()
485    }
486}
487
488impl str::FromStr for CodeId {
489    type Err = ParseCodeIdError;
490
491    fn from_str(string: &str) -> Result<Self, ParseCodeIdError> {
492        Ok(Self::new(string.into()))
493    }
494}
495
496#[cfg(feature = "serde")]
497mod serde_support {
498    use serde::de::{self, Deserialize, Deserializer, Unexpected, Visitor};
499    use serde::ser::{Serialize, Serializer};
500
501    use super::*;
502
503    impl Serialize for CodeId {
504        fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
505            serializer.serialize_str(self.as_str())
506        }
507    }
508
509    impl<'de> Deserialize<'de> for CodeId {
510        fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
511            let string = String::deserialize(deserializer)?;
512            Ok(CodeId::new(string))
513        }
514    }
515
516    impl<'de> Deserialize<'de> for DebugId {
517        fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
518            struct V;
519
520            impl<'de> Visitor<'de> for V {
521                type Value = DebugId;
522
523                fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
524                    formatter.write_str("DebugId")
525                }
526
527                fn visit_str<E: de::Error>(self, value: &str) -> Result<DebugId, E> {
528                    value
529                        .parse()
530                        .map_err(|_| de::Error::invalid_value(Unexpected::Str(value), &self))
531                }
532            }
533
534            deserializer.deserialize_str(V)
535        }
536    }
537
538    impl Serialize for DebugId {
539        fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
540            serializer.serialize_str(&self.to_string())
541        }
542    }
543}