buildid/
elf.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
use crate::align::align_up;
use core::mem;
use core::mem::MaybeUninit;
use core::{convert::TryInto, fmt};
use log::{debug, error, warn};

// FIXME: dl_phdr_info references are actually unsafe here because of how glibc defines
// dl_phdr_info (to have some fields present depending on the size provided (<glibc-2.4 omits
// them).
//
// We'll probably get away with this because we're not accessing those fields, but this is
// technically wrong because we can access those fields in safe code.
//

#[cfg(target_pointer_width = "64")]
mod elf {
    pub type ElfPhdr = libc::Elf64_Phdr;
}
#[cfg(target_pointer_width = "32")]
mod elf {
    pub type ElfPhdr = libc::Elf32_Phdr;
}
use elf::*;

const NT_GNU_BUILD_ID: u32 = 3;

// TODO: experiment with using more custom DST here to avoid manually extracting fields
#[derive(Debug)]
struct Note {
    data: [u8],
}

const MIN_NOTE_SIZE: usize = mem::size_of::<usize>() * 3;

#[derive(Debug)]
enum NoteError {
    MissingHeader { size: usize },
    Truncated { have: usize, need: usize },
}

impl fmt::Display for NoteError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::MissingHeader { size } => write!(
                f,
                "have {} bytes, but need at least {}",
                size, MIN_NOTE_SIZE
            ),
            Self::Truncated { have, need } => {
                write!(f, "have {} bytes, but need at least {}", have, need)
            }
        }
    }
}

impl Note {
    // NOTE: the _standards_ say to use 8 byte alignment in 64-bit land. But llvm and others note
    // that everyone actually uses 4 byte alignment. Perfect. Hopefully this always works.
    const ALIGN: usize = 4;

    // technically safe because we'll only panic if I screw up
    fn from_bytes_raw(data: &[u8]) -> &Self {
        unsafe { &*(data as *const [u8] as *const Note) }
    }

    fn from_bytes(data: &[u8]) -> Result<(&Self, &[u8]), NoteError> {
        let u = core::mem::size_of::<u32>();
        if data.len() < u * 3 {
            return Err(NoteError::MissingHeader { size: data.len() });
        }

        Self::from_bytes_raw(data).split_trailing()
    }

    fn name_len(&self) -> usize {
        u32::from_ne_bytes(self.data[..core::mem::size_of::<u32>()].try_into().unwrap()) as usize
    }

    fn desc_len(&self) -> usize {
        let u = core::mem::size_of::<u32>();
        u32::from_ne_bytes(self.data[u..(u + u)].try_into().unwrap()) as usize
    }

    fn type_(&self) -> u32 {
        let u = core::mem::size_of::<u32>();
        u32::from_ne_bytes(self.data[(u + u)..(u + u + u)].try_into().unwrap())
    }

    fn name(&self) -> &[u8] {
        let u = core::mem::size_of::<u32>();
        let b = u * 3;
        &self.data[b..(b + self.name_len())]
    }

    fn desc(&self) -> &[u8] {
        let u = core::mem::size_of::<u32>();
        let b = u * 3 + align_up(self.name_len(), Self::ALIGN);
        &self.data[b..(b + self.desc_len())]
    }

    fn split_trailing(&self) -> Result<(&Self, &[u8]), NoteError> {
        let u = core::mem::size_of::<u32>();
        let end =
            u * 3 + align_up(self.name_len(), Self::ALIGN) + align_up(self.desc_len(), Self::ALIGN);
        if end > self.data.len() {
            Err(NoteError::Truncated {
                need: end,
                have: self.data.len(),
            })
        } else {
            Ok((Self::from_bytes_raw(&self.data[0..end]), &self.data[end..]))
        }
    }
}

// Ideally, we'd use a trait alias instead of a type alias and construct the type out of the
// trait. But that's not stable right now (see https://github.com/rust-lang/rust/issues/41517)
unsafe extern "C" fn phdr_cb(
    info: *mut libc::dl_phdr_info,
    size: libc::size_t,
    data: *mut libc::c_void,
) -> libc::c_int {
    let closure: &mut &mut dyn FnMut(&libc::dl_phdr_info, usize) -> libc::c_int = &mut *(data
        as *mut &mut dyn for<'r> core::ops::FnMut(&'r libc::dl_phdr_info, usize) -> i32);
    let info = &*info;

    closure(info, size)
}

fn object_map<F: FnMut(&'static libc::dl_phdr_info, usize) -> libc::c_int>(
    mut cb: F,
) -> libc::c_int {
    let mut cb: &mut dyn FnMut(&'static libc::dl_phdr_info, usize) -> libc::c_int = &mut cb;
    let cb = &mut cb;
    unsafe { libc::dl_iterate_phdr(Some(phdr_cb), cb as *mut _ as *mut _) }
}

/// Iterate over program headers described by a dl_phdr_info
struct PhdrIter<'a> {
    info: &'a libc::dl_phdr_info,
    i: u16,
}

impl<'a> Iterator for PhdrIter<'a> {
    type Item = &'a libc::Elf64_Phdr;

    fn next(&mut self) -> Option<Self::Item> {
        if self.i >= self.info.dlpi_phnum {
            return None;
        }

        let phdr = unsafe { &*self.info.dlpi_phdr.add(self.i as usize) };
        self.i += 1;
        Some(phdr)
    }
}

impl<'a> From<&'a libc::dl_phdr_info> for PhdrIter<'a> {
    fn from(info: &'a libc::dl_phdr_info) -> Self {
        PhdrIter { info, i: 0 }
    }
}

/// Iterate over notes stored in a PT_NOTE program section
#[derive(Debug)]
struct NoteIter<'a> {
    segment: &'a [u8],
}

impl<'a> NoteIter<'a> {
    fn new(info: &'a libc::dl_phdr_info, phdr: &'a ElfPhdr) -> Option<Self> {
        // NOTE: each dl_phdr_info describes multiple program segments. In this iterator, we're
        // only examining one of them.
        //
        // We'll also need to iterate over program segments to pick out the PT_NOTE one we need.

        if phdr.p_type != libc::PT_NOTE {
            None
        } else {
            let segment_base = (info.dlpi_addr + phdr.p_vaddr) as *const u8;
            let segment = unsafe {
                // FIXME: consider p_memsz vs p_filesz question here.
                // llvm appears to use filesz
                core::slice::from_raw_parts(segment_base, phdr.p_filesz as usize)
            };
            Some(NoteIter { segment })
        }
    }
}

impl<'a> Iterator for NoteIter<'a> {
    type Item = Result<&'a Note, NoteError>;
    fn next(&mut self) -> Option<Self::Item> {
        if self.segment.is_empty() {
            return None;
        }

        let (n, r) = match Note::from_bytes(self.segment) {
            Err(e) => return Some(Err(e)),
            Ok(v) => v,
        };

        self.segment = r;

        Some(Ok(n))
    }
}

pub fn build_id() -> Option<&'static [u8]> {
    // find the shared object that contains our own `build_id()` fn (this fn itself)
    let data = {
        let mut data = MaybeUninit::uninit();
        let addr = build_id as *const libc::c_void;
        if unsafe { libc::dladdr(addr, data.as_mut_ptr()) } == 0 {
            // TODO: consider if we have fallback options here
            error!("dladdr failed to find our own symbol");
            return None;
        }

        unsafe { data.assume_init() }
    };

    let mut res = None;
    // FIXME: we probably should avoid ignoring `size` here so we can bounds check our
    // accesses. Basically need to treat this data as a big array we happen to have pointers
    // into, and convert those pointers to offsets.
    object_map(|info, _size| {
        let mut map_start = None;

        for phdr in PhdrIter::from(info) {
            // FIXME: unclear why the first PT_LOAD is the right thing to use here
            if phdr.p_type == libc::PT_LOAD {
                map_start = Some(info.dlpi_addr + phdr.p_vaddr);
                break;
            }
        }

        let map_start = match map_start {
            Some(v) => v,
            None => {
                debug!(
                    "no PT_LOAD segment found in object {:?}, skipping",
                    info.dlpi_name
                );
                return 0;
            }
        };

        // check if this phdr (map_start) is the one that contains our fn (data.dli_fbase)
        if map_start != data.dli_fbase as u64 {
            debug!(
                "map_start ({:?}) != data.dli_fbase ({:?}), skipping",
                map_start, data.dli_fbase
            );
            return 0;
        }

        'phdr: for phdr in PhdrIter::from(info) {
            let ni = match NoteIter::new(info, phdr) {
                Some(v) => v,
                None => continue,
            };

            // iterate over notes
            for note in ni {
                let note = match note {
                    Err(e) => {
                        warn!("note program segment had invalid note {}", e);
                        continue 'phdr;
                    }
                    Ok(v) => v,
                };
                if note.type_() == NT_GNU_BUILD_ID
                    && !note.desc().is_empty()
                    && note.name() == b"GNU\0"
                {
                    res = Some(note.desc());
                    break 'phdr;
                }
            }
        }

        0
    });

    res
}