debugid/lib.rs
1//! This crate provides types for identifiers of object files, such as executables, dynamic
2//! libraries or debug companion files. The concept originates in Google Breakpad and defines two
3//! types:
4//!
5//! - [`CodeId`]: Identifies the file containing source code, i.e. the actual library or
6//! executable. The identifier is platform dependent and implementation defined. Thus, there is
7//! no canonical representation.
8//! - [`DebugId`]: Identifies a debug information file, which may or may not use information from
9//! the Code ID. The contents are also implementation defined, but as opposed to `CodeId`, the
10//! structure is streamlined across platforms. It is also guaranteed to be 32 bytes in size.
11//!
12//! [`CodeId`]: struct.CodeId.html
13//! [`DebugId`]: struct.DebugId.html
14
15#![warn(missing_docs)]
16
17use std::error;
18use std::fmt;
19use std::fmt::Write;
20use std::str;
21
22use uuid::{Bytes, Uuid};
23
24/// Indicates an error parsing a [`DebugId`](struct.DebugId.html).
25#[derive(Clone, Copy, Debug, Eq, PartialEq)]
26pub struct ParseDebugIdError;
27
28impl error::Error for ParseDebugIdError {}
29
30impl fmt::Display for ParseDebugIdError {
31 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
32 write!(f, "invalid debug identifier")
33 }
34}
35
36#[derive(Clone, Copy, Debug)]
37struct ParseOptions {
38 allow_hyphens: bool,
39 require_appendix: bool,
40 allow_tail: bool,
41}
42
43/// Unique identifier for debug information files and their debug information.
44///
45/// This type is analogous to [`CodeId`], except that it identifies a debug file instead of the
46/// actual library or executable. One some platforms, a `DebugId` is an alias for a `CodeId` but the
47/// exact rules around this are complex. On Windows, the identifiers are completely different and
48/// refer to separate files.
49///
50/// The string representation must be between 33 and 40 characters long and consist of:
51///
52/// 1. 36 character hyphenated hex representation of the UUID field
53/// 2. 1-16 character lowercase hex representation of the u32 appendix
54///
55/// The debug identifier is compatible to Google Breakpad. Use [`DebugId::breakpad`] to get a
56/// breakpad string representation of this debug identifier.
57///
58/// There is one exception to this: for the old PDB 2.0 format the debug identifier consists
59/// of only a 32-bit integer + age resulting in a string representation of between 9 and 16
60/// hex characters.
61///
62/// # Example
63///
64/// ```
65/// # extern crate debugid;
66/// use std::str::FromStr;
67/// use debugid::DebugId;
68///
69/// # fn foo() -> Result<(), ::debugid::ParseDebugIdError> {
70/// let id = DebugId::from_str("dfb8e43a-f242-3d73-a453-aeb6a777ef75-a")?;
71/// assert_eq!("dfb8e43a-f242-3d73-a453-aeb6a777ef75-a".to_string(), id.to_string());
72/// # Ok(())
73/// # }
74///
75/// # fn main() { foo().unwrap() }
76/// ```
77///
78/// # In-memory representation
79///
80/// The in-memory representation takes up 32 bytes and can be directly written to storage
81/// and mapped back into an object reference.
82///
83/// ```
84/// use std::str::FromStr;
85/// use debugid::DebugId;
86///
87/// let debug_id = DebugId::from_str("dfb8e43a-f242-3d73-a453-aeb6a777ef75-a").unwrap();
88///
89/// let slice = &[debug_id];
90/// let ptr = slice.as_ptr() as *const u8;
91/// let len = std::mem::size_of_val(slice);
92/// let buf: &[u8] = unsafe { std::slice::from_raw_parts(ptr, len) };
93///
94/// let mut new_buf: Vec<u8> = Vec::new();
95/// std::io::copy(&mut std::io::Cursor::new(buf), &mut new_buf).unwrap();
96///
97/// let ptr = new_buf.as_ptr() as *const DebugId;
98/// let new_debug_id = unsafe { &*ptr };
99///
100/// assert_eq!(*new_debug_id, debug_id);
101/// ```
102///
103/// As long the bytes were written using the same major version of this crate you will be
104/// able to read it again like this.
105///
106/// [`CodeId`]: struct.CodeId.html
107/// [`DebugId::breakpad`]: struct.DebugId.html#method.breakpad
108// This needs to be backwards compatible also in its exact in-memory byte-layout since this
109// struct is directly mapped from disk in e.g. Symbolic SymCache formats. The first version
110// of this struct was defined as:
111//
112// ```rust
113// struct DebugId {
114// uuid: Uuid,
115// appendix: u32,
116// _padding: [u8; 12],
117// }
118// ```
119//
120// For this reason the current `typ` byte represents the type of `DebugId` stored in the
121// `Bytes`:
122//
123// - `0u8`: The `bytes` field contains a UUID.
124// - `1u8`: The first 4 bytes of the `bytes` field contain a big-endian u32, the remaining
125// bytes are 0.
126#[repr(C, packed)]
127#[derive(Default, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)]
128pub struct DebugId {
129 bytes: Bytes,
130 appendix: u32,
131 _padding: [u8; 11],
132 typ: u8,
133}
134
135impl DebugId {
136 /// Constructs an empty debug identifier, containing only zeros.
137 pub fn nil() -> Self {
138 Self::default()
139 }
140
141 /// Constructs a `DebugId` from its `uuid`.
142 pub fn from_uuid(uuid: Uuid) -> Self {
143 Self::from_parts(uuid, 0)
144 }
145
146 /// Constructs a `DebugId` from its `uuid` and `appendix` parts.
147 pub fn from_parts(uuid: Uuid, appendix: u32) -> Self {
148 DebugId {
149 bytes: *uuid.as_bytes(),
150 appendix,
151 typ: 0,
152 _padding: [0; 11],
153 }
154 }
155
156 /// Constructs a `DebugId` from a Microsoft little-endian GUID and age.
157 pub fn from_guid_age(guid: &[u8], age: u32) -> Result<Self, ParseDebugIdError> {
158 if guid.len() != 16 {
159 return Err(ParseDebugIdError);
160 }
161
162 let uuid = Uuid::from_bytes([
163 guid[3], guid[2], guid[1], guid[0], guid[5], guid[4], guid[7], guid[6], guid[8],
164 guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15],
165 ]);
166
167 Ok(DebugId::from_parts(uuid, age))
168 }
169
170 /// Constructs a `DebugId` from a PDB 2.0 timestamp and age.
171 pub fn from_pdb20(timestamp: u32, age: u32) -> Self {
172 // The big-endian byte-order here has to match the one used to read this number in
173 // the DebugId::timestamp method.
174 DebugId {
175 bytes: [
176 (timestamp >> 24) as u8,
177 (timestamp >> 16) as u8,
178 (timestamp >> 8) as u8,
179 timestamp as u8,
180 0u8,
181 0u8,
182 0u8,
183 0u8,
184 0u8,
185 0u8,
186 0u8,
187 0u8,
188 0u8,
189 0u8,
190 0u8,
191 0u8,
192 ],
193 appendix: age,
194 _padding: [0u8; 11],
195 typ: 1u8,
196 }
197 }
198
199 /// Parses a breakpad identifier from a string.
200 pub fn from_breakpad(string: &str) -> Result<Self, ParseDebugIdError> {
201 let options = ParseOptions {
202 allow_hyphens: false,
203 require_appendix: true,
204 allow_tail: false,
205 };
206 Self::parse_str(string, options).ok_or(ParseDebugIdError)
207 }
208
209 /// Returns the UUID part of the code module's debug_identifier.
210 ///
211 /// If this is a debug identifier for the PDB 2.0 format an invalid UUID is returned
212 /// where only the first 4 bytes are filled in and the remainder of the bytes are 0.
213 /// This means the UUID has variant [`uuid::Variant::NCS`] and an unknown version,
214 /// [`Uuid::get_version`] will return `None`, which is not a valid UUID.
215 ///
216 /// This may seem odd however does seem reasonable:
217 ///
218 /// - Every [`DebugId`] can be represented as [`Uuid`] and will still mostly look
219 /// reasonable e.g. in comparisons etc.
220 /// - The PDB 2.0 format is very old and very unlikely to appear practically.
221 pub fn uuid(&self) -> Uuid {
222 Uuid::from_bytes(self.bytes)
223 }
224
225 /// Returns the appendix part of the code module's debug identifier.
226 ///
227 /// On Windows, this is an incrementing counter to identify the build.
228 /// On all other platforms, this value will always be zero.
229 pub fn appendix(&self) -> u32 {
230 self.appendix
231 }
232
233 /// Returns whether this identifier is nil, i.e. it consists only of zeros.
234 pub fn is_nil(&self) -> bool {
235 self.bytes == [0u8; 16] && self.appendix == 0
236 }
237
238 /// Returns whether this identifier is from the PDB 2.0 format.
239 pub fn is_pdb20(&self) -> bool {
240 self.typ == 1
241 }
242
243 /// Returns a wrapper which when formatted via `fmt::Display` will format a
244 /// a breakpad identifier.
245 pub fn breakpad(&self) -> BreakpadFormat<'_> {
246 BreakpadFormat { inner: self }
247 }
248
249 fn parse_str(string: &str, options: ParseOptions) -> Option<Self> {
250 let is_hyphenated = string.get(8..9) == Some("-");
251 if is_hyphenated && !options.allow_hyphens || !string.is_ascii() {
252 return None;
253 }
254
255 // Can the PDB 2.0 format match? This can never be true for a valid UUID.
256 let min_len = if is_hyphenated { 10 } else { 9 };
257 let max_len = if is_hyphenated { 17 } else { 16 };
258 if min_len <= string.len() && string.len() <= max_len {
259 let timestamp_str = string.get(..8)?;
260 let timestamp = u32::from_str_radix(timestamp_str, 16).ok()?;
261 let appendix_str = match is_hyphenated {
262 true => string.get(9..)?,
263 false => string.get(8..)?,
264 };
265 let appendix = u32::from_str_radix(appendix_str, 16).ok()?;
266 return Some(Self::from_pdb20(timestamp, appendix));
267 }
268
269 let uuid_len = if is_hyphenated { 36 } else { 32 };
270 let uuid = string.get(..uuid_len)?.parse().ok()?;
271 if !options.require_appendix && string.len() == uuid_len {
272 return Some(Self::from_parts(uuid, 0));
273 }
274
275 let mut appendix_str = &string[uuid_len..];
276 if is_hyphenated ^ appendix_str.starts_with('-') {
277 return None; // Require a hyphen if and only if we're hyphenated.
278 } else if is_hyphenated {
279 appendix_str = &appendix_str[1..]; // Skip the hyphen for parsing.
280 }
281
282 if options.allow_tail && appendix_str.len() > 8 {
283 appendix_str = &appendix_str[..8];
284 }
285
286 // Parse the appendix, which fails on empty strings.
287 let appendix = u32::from_str_radix(appendix_str, 16).ok()?;
288 Some(Self::from_parts(uuid, appendix))
289 }
290
291 /// Returns the PDB 2.0 timestamp.
292 ///
293 /// Only valid if you know this is a PDB 2.0 debug identifier.
294 fn timestamp(&self) -> u32 {
295 u32::from_be_bytes([self.bytes[0], self.bytes[1], self.bytes[2], self.bytes[3]])
296 }
297}
298
299impl fmt::Debug for DebugId {
300 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
301 let uuid = self.uuid();
302 f.debug_struct("DebugId")
303 .field("uuid", &uuid.hyphenated().to_string())
304 .field("appendix", &self.appendix())
305 .finish()
306 }
307}
308
309impl fmt::Display for DebugId {
310 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
311 match self.is_pdb20() {
312 true => {
313 let timestamp = self.timestamp();
314 write!(f, "{:08X}", timestamp)?;
315 }
316 false => {
317 let uuid = self.uuid();
318 uuid.fmt(f)?;
319 }
320 }
321 if self.appendix > 0 {
322 write!(f, "-{:x}", { self.appendix })?;
323 }
324 Ok(())
325 }
326}
327
328impl str::FromStr for DebugId {
329 type Err = ParseDebugIdError;
330
331 fn from_str(string: &str) -> Result<Self, ParseDebugIdError> {
332 let options = ParseOptions {
333 allow_hyphens: true,
334 require_appendix: false,
335 allow_tail: true,
336 };
337 Self::parse_str(string, options).ok_or(ParseDebugIdError)
338 }
339}
340
341impl From<Uuid> for DebugId {
342 fn from(uuid: Uuid) -> Self {
343 DebugId::from_uuid(uuid)
344 }
345}
346
347impl From<(Uuid, u32)> for DebugId {
348 fn from(tuple: (Uuid, u32)) -> Self {
349 let (uuid, appendix) = tuple;
350 DebugId::from_parts(uuid, appendix)
351 }
352}
353
354/// Wrapper around [`DebugId`] for Breakpad formatting.
355///
356/// **Example:**
357///
358/// ```
359/// # extern crate debugid;
360/// use std::str::FromStr;
361/// use debugid::DebugId;
362///
363/// # fn foo() -> Result<(), debugid::ParseDebugIdError> {
364/// let id = DebugId::from_breakpad("DFB8E43AF2423D73A453AEB6A777EF75a")?;
365/// assert_eq!("DFB8E43AF2423D73A453AEB6A777EF75a".to_string(), id.breakpad().to_string());
366/// # Ok(())
367/// # }
368///
369/// # fn main() { foo().unwrap() }
370/// ```
371///
372/// [`DebugId`]: struct.DebugId.html
373#[derive(Debug)]
374pub struct BreakpadFormat<'a> {
375 inner: &'a DebugId,
376}
377
378impl<'a> fmt::Display for BreakpadFormat<'a> {
379 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
380 match self.inner.is_pdb20() {
381 true => {
382 let timestamp = self.inner.timestamp();
383 write!(f, "{:08X}{:x}", timestamp, self.inner.appendix())
384 }
385 false => {
386 let uuid = self.inner.uuid();
387 write!(f, "{:X}{:x}", uuid.simple(), self.inner.appendix())
388 }
389 }
390 }
391}
392
393/// Indicates an error parsing a [`CodeId`](struct.CodeId.html).
394#[derive(Clone, Copy, Debug, Eq, PartialEq)]
395pub struct ParseCodeIdError;
396
397impl error::Error for ParseCodeIdError {}
398
399impl fmt::Display for ParseCodeIdError {
400 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
401 write!(f, "invalid code identifier")
402 }
403}
404
405/// Unique platform-dependent identifier of code files.
406///
407/// This identifier assumes a string representation that depends on the platform and compiler used.
408/// The representation only retains hex characters and canonically stores lower case.
409///
410/// There are the following known formats:
411///
412/// - **MachO UUID**: The unique identifier of a Mach binary, specified in the `LC_UUID` load
413/// command header.
414/// - **GNU Build ID**: Contents of the `.gnu.build-id` note or section contents formatted as
415/// lowercase hex string.
416/// - **PE Timestamp**: Timestamp and size of image values from a Windows PE header. The size of
417/// image value is truncated, so the length of the `CodeId` might not be a multiple of 2.
418#[derive(Clone, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
419pub struct CodeId {
420 inner: String,
421}
422
423impl CodeId {
424 /// Constructs an empty code identifier.
425 pub fn nil() -> Self {
426 Self::default()
427 }
428
429 /// Constructs a `CodeId` from its string representation.
430 pub fn new(mut string: String) -> Self {
431 string.retain(|c| c.is_ascii_hexdigit());
432 string.make_ascii_lowercase();
433 CodeId { inner: string }
434 }
435
436 /// Constructs a `CodeId` from a binary slice.
437 pub fn from_binary(slice: &[u8]) -> Self {
438 let mut string = String::with_capacity(slice.len() * 2);
439
440 for byte in slice {
441 write!(&mut string, "{:02x}", byte).expect("");
442 }
443
444 Self::new(string)
445 }
446
447 /// Returns whether this identifier is nil, i.e. it is empty.
448 pub fn is_nil(&self) -> bool {
449 self.inner.is_empty()
450 }
451
452 /// Returns the string representation of this code identifier.
453 pub fn as_str(&self) -> &str {
454 self.inner.as_str()
455 }
456}
457
458impl fmt::Display for CodeId {
459 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
460 f.write_str(&self.inner)
461 }
462}
463
464impl fmt::Debug for CodeId {
465 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
466 write!(f, "CodeId({})", self)
467 }
468}
469
470impl From<String> for CodeId {
471 fn from(string: String) -> Self {
472 Self::new(string)
473 }
474}
475
476impl From<&'_ str> for CodeId {
477 fn from(string: &str) -> Self {
478 Self::new(string.into())
479 }
480}
481
482impl AsRef<str> for CodeId {
483 fn as_ref(&self) -> &str {
484 self.as_str()
485 }
486}
487
488impl str::FromStr for CodeId {
489 type Err = ParseCodeIdError;
490
491 fn from_str(string: &str) -> Result<Self, ParseCodeIdError> {
492 Ok(Self::new(string.into()))
493 }
494}
495
496#[cfg(feature = "serde")]
497mod serde_support {
498 use serde::de::{self, Deserialize, Deserializer, Unexpected, Visitor};
499 use serde::ser::{Serialize, Serializer};
500
501 use super::*;
502
503 impl Serialize for CodeId {
504 fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
505 serializer.serialize_str(self.as_str())
506 }
507 }
508
509 impl<'de> Deserialize<'de> for CodeId {
510 fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
511 let string = String::deserialize(deserializer)?;
512 Ok(CodeId::new(string))
513 }
514 }
515
516 impl<'de> Deserialize<'de> for DebugId {
517 fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
518 struct V;
519
520 impl<'de> Visitor<'de> for V {
521 type Value = DebugId;
522
523 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
524 formatter.write_str("DebugId")
525 }
526
527 fn visit_str<E: de::Error>(self, value: &str) -> Result<DebugId, E> {
528 value
529 .parse()
530 .map_err(|_| de::Error::invalid_value(Unexpected::Str(value), &self))
531 }
532 }
533
534 deserializer.deserialize_str(V)
535 }
536 }
537
538 impl Serialize for DebugId {
539 fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
540 serializer.serialize_str(&self.to_string())
541 }
542 }
543}