pprof_util/
lib.rs

1mod cast;
2
3use std::collections::BTreeMap;
4use std::fmt;
5use std::io::BufRead;
6use std::io::Write;
7use std::path::PathBuf;
8use std::time::{Instant, SystemTime, UNIX_EPOCH};
9
10use anyhow::bail;
11use flate2::write::GzEncoder;
12use flate2::Compression;
13use prost::Message;
14
15pub use cast::CastFrom;
16pub use cast::TryCastFrom;
17
18#[cfg(feature = "flamegraph")]
19pub use inferno::flamegraph::Options as FlamegraphOptions;
20
21/// Start times of the profiler.
22#[derive(Copy, Clone, Debug)]
23pub enum ProfStartTime {
24    Instant(Instant),
25    TimeImmemorial,
26}
27
28/// Helper struct to simplify building a `string_table` for the pprof format.
29#[derive(Default)]
30struct StringTable(BTreeMap<String, i64>);
31
32impl StringTable {
33    fn new() -> Self {
34        // Element 0 must always be the emtpy string.
35        let inner = [("".into(), 0)].into();
36        Self(inner)
37    }
38
39    fn insert(&mut self, s: &str) -> i64 {
40        if let Some(idx) = self.0.get(s) {
41            *idx
42        } else {
43            let idx = i64::try_from(self.0.len()).expect("must fit");
44            self.0.insert(s.into(), idx);
45            idx
46        }
47    }
48
49    fn finish(self) -> Vec<String> {
50        let mut vec: Vec<_> = self.0.into_iter().collect();
51        vec.sort_by_key(|(_, idx)| *idx);
52        vec.into_iter().map(|(s, _)| s).collect()
53    }
54}
55
56#[path = "perftools.profiles.rs"]
57mod proto;
58
59/// A single sample in the profile. The stack is a list of addresses.
60#[derive(Clone, Debug)]
61pub struct WeightedStack {
62    pub addrs: Vec<usize>,
63    pub weight: f64,
64}
65
66/// A mapping of a single shared object.
67#[derive(Clone, Debug)]
68pub struct Mapping {
69    pub memory_start: usize,
70    pub memory_end: usize,
71    pub memory_offset: usize,
72    pub file_offset: u64,
73    pub pathname: PathBuf,
74    pub build_id: Option<BuildId>,
75}
76
77/// Build ID of a shared object.
78#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
79pub struct BuildId(pub Vec<u8>);
80
81impl fmt::Display for BuildId {
82    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83        for byte in &self.0 {
84            write!(f, "{byte:02x}")?;
85        }
86        Ok(())
87    }
88}
89
90/// A minimal representation of a profile that can be parsed from the jemalloc heap profile.
91#[derive(Default)]
92pub struct StackProfile {
93    pub annotations: Vec<String>,
94    // The second element is the index in `annotations`, if one exists.
95    pub stacks: Vec<(WeightedStack, Option<usize>)>,
96    pub mappings: Vec<Mapping>,
97}
98
99impl StackProfile {
100    /// Converts the profile into the pprof format.
101    ///
102    /// pprof encodes profiles as gzipped protobuf messages of the Profile message type
103    /// (see `pprof/profile.proto`).
104    pub fn to_pprof(
105        &self,
106        sample_type: (&str, &str),
107        period_type: (&str, &str),
108        anno_key: Option<String>,
109    ) -> Vec<u8> {
110        let profile = self.to_pprof_proto(sample_type, period_type, anno_key);
111        let encoded = profile.encode_to_vec();
112
113        let mut gz = GzEncoder::new(Vec::new(), Compression::default());
114        gz.write_all(&encoded).unwrap();
115        gz.finish().unwrap()
116    }
117
118    /// Converts the profile into the pprof Protobuf format (see `pprof/profile.proto`).
119    fn to_pprof_proto(
120        &self,
121        sample_type: (&str, &str),
122        period_type: (&str, &str),
123        anno_key: Option<String>,
124    ) -> proto::Profile {
125        let mut profile = proto::Profile::default();
126        let mut strings = StringTable::new();
127
128        let anno_key = anno_key.unwrap_or_else(|| "annotation".into());
129
130        profile.sample_type = vec![proto::ValueType {
131            r#type: strings.insert(sample_type.0),
132            unit: strings.insert(sample_type.1),
133        }];
134        profile.period_type = Some(proto::ValueType {
135            r#type: strings.insert(period_type.0),
136            unit: strings.insert(period_type.1),
137        });
138
139        profile.time_nanos = SystemTime::now()
140            .duration_since(UNIX_EPOCH)
141            .expect("now is later than UNIX epoch")
142            .as_nanos()
143            .try_into()
144            .expect("the year 2554 is far away");
145
146        for (mapping, mapping_id) in self.mappings.iter().zip(1..) {
147            let pathname = mapping.pathname.to_string_lossy();
148            let filename_idx = strings.insert(&pathname);
149
150            let build_id_idx = match &mapping.build_id {
151                Some(build_id) => strings.insert(&build_id.to_string()),
152                None => 0,
153            };
154
155            profile.mapping.push(proto::Mapping {
156                id: mapping_id,
157                memory_start: u64::cast_from(mapping.memory_start),
158                memory_limit: u64::cast_from(mapping.memory_end),
159                file_offset: mapping.file_offset,
160                filename: filename_idx,
161                build_id: build_id_idx,
162                ..Default::default()
163            });
164
165            // This is a is a Polar Signals-specific extension: For correct offline symbolization
166            // they need access to the memory offset of mappings, but the pprof format only has a
167            // field for the file offset. So we instead encode additional information about
168            // mappings in magic comments. There must be exactly one comment for each mapping.
169
170            // Take a shortcut and assume the ELF type is always `ET_DYN`. This is true for shared
171            // libraries and for position-independent executable, so it should always be true for
172            // any mappings we have.
173            // Getting the actual information is annoying. It's in the ELF header (the `e_type`
174            // field), but there is no guarantee that the full ELF header gets mapped, so we might
175            // not be able to find it in memory. We could try to load it from disk instead, but
176            // then we'd have to worry about blocking disk I/O.
177            let elf_type = 3;
178
179            let comment = format!(
180                "executableInfo={:x};{:x};{:x}",
181                elf_type, mapping.file_offset, mapping.memory_offset
182            );
183            profile.comment.push(strings.insert(&comment));
184        }
185
186        let mut location_ids = BTreeMap::new();
187        #[cfg(feature = "symbolize")]
188        let mut function_ids = BTreeMap::new();
189        for (stack, anno) in self.iter() {
190            let mut sample = proto::Sample::default();
191
192            let value = stack.weight.trunc();
193            let value = i64::try_cast_from(value).expect("no exabyte heap sizes");
194            sample.value.push(value);
195
196            for addr in stack.addrs.iter().rev() {
197                // See the comment
198                // [here](https://github.com/rust-lang/backtrace-rs/blob/036d4909e1fb9c08c2bb0f59ac81994e39489b2f/src/symbolize/mod.rs#L123-L147)
199                // for why we need to subtract one. tl;dr addresses
200                // in stack traces are actually the return address of
201                // the called function, which is one past the call
202                // itself.
203                //
204                // Of course, the `call` instruction can be more than one byte, so after subtracting
205                // one, we might point somewhere in the middle of it, rather
206                // than to the beginning of the instruction. That's fine; symbolization
207                // tools don't seem to get confused by this.
208                let addr = u64::cast_from(*addr) - 1;
209
210                let loc_id = *location_ids.entry(addr).or_insert_with(|| {
211                    // profile.proto says the location id may be the address, but Polar Signals
212                    // insists that location ids are sequential, starting with 1.
213                    let id = u64::cast_from(profile.location.len()) + 1;
214
215                    #[allow(unused_mut)] // for feature = "symbolize"
216                    let mut mapping = profile
217                        .mapping
218                        .iter_mut()
219                        .find(|m| m.memory_start <= addr && m.memory_limit > addr);
220
221                    // If online symbolization is enabled, resolve the function and line.
222                    #[allow(unused_mut)]
223                    let mut line = Vec::new();
224                    #[cfg(feature = "symbolize")]
225                    backtrace::resolve(addr as *mut std::ffi::c_void, |symbol| {
226                        let Some(symbol_name) = symbol.name() else {
227                            return;
228                        };
229                        let function_name = format!("{symbol_name:#}");
230                        let lineno = symbol.lineno().unwrap_or(0) as i64;
231
232                        let function_id = *function_ids.entry(function_name).or_insert_with_key(
233                            |function_name| {
234                                let function_id = profile.function.len() as u64 + 1;
235                                let system_name = String::from_utf8_lossy(symbol_name.as_bytes());
236                                let filename = symbol
237                                    .filename()
238                                    .map(|path| path.to_string_lossy())
239                                    .unwrap_or(std::borrow::Cow::Borrowed(""));
240
241                                if let Some(ref mut mapping) = mapping {
242                                    mapping.has_functions = true;
243                                    mapping.has_filenames |= !filename.is_empty();
244                                    mapping.has_line_numbers |= lineno > 0;
245                                }
246
247                                profile.function.push(proto::Function {
248                                    id: function_id,
249                                    name: strings.insert(function_name),
250                                    system_name: strings.insert(&system_name),
251                                    filename: strings.insert(&filename),
252                                    ..Default::default()
253                                });
254                                function_id
255                            },
256                        );
257
258                        line.push(proto::Line {
259                            function_id,
260                            line: lineno,
261                        });
262
263                        if let Some(ref mut mapping) = mapping {
264                            mapping.has_inline_frames |= line.len() > 1;
265                        }
266                    });
267
268                    profile.location.push(proto::Location {
269                        id,
270                        mapping_id: mapping.map_or(0, |m| m.id),
271                        address: addr,
272                        line,
273                        ..Default::default()
274                    });
275                    id
276                });
277
278                sample.location_id.push(loc_id);
279
280                if let Some(anno) = anno {
281                    sample.label.push(proto::Label {
282                        key: strings.insert(&anno_key),
283                        str: strings.insert(anno),
284                        ..Default::default()
285                    })
286                }
287            }
288
289            profile.sample.push(sample);
290        }
291
292        profile.string_table = strings.finish();
293
294        profile
295    }
296
297    /// Converts the profile into a flamegraph SVG, using the given options.
298    #[cfg(feature = "flamegraph")]
299    pub fn to_flamegraph(&self, opts: &mut FlamegraphOptions) -> anyhow::Result<Vec<u8>> {
300        use std::collections::HashMap;
301
302        // We start from a symbolized Protobuf profile. We just pass in empty type names, since
303        // they're not used in the final flamegraph.
304        let profile = self.to_pprof_proto(("", ""), ("", ""), None);
305
306        // Index locations, functions, and strings.
307        let locations: HashMap<u64, proto::Location> =
308            profile.location.into_iter().map(|l| (l.id, l)).collect();
309        let functions: HashMap<u64, proto::Function> =
310            profile.function.into_iter().map(|f| (f.id, f)).collect();
311        let strings = profile.string_table;
312
313        // Resolve stacks as function name vectors, and sum sample values per stack. Also reverse
314        // the stack, since inferno expects it bottom-up.
315        let mut stacks: HashMap<Vec<&str>, i64> = HashMap::new();
316        for sample in profile.sample {
317            let mut stack = Vec::with_capacity(sample.location_id.len());
318            for location in sample.location_id.into_iter().rev() {
319                let location = locations.get(&location).expect("missing location");
320                for line in location.line.iter().rev() {
321                    let function = functions.get(&line.function_id).expect("missing function");
322                    let name = strings.get(function.name as usize).expect("missing string");
323                    stack.push(name.as_str());
324                }
325            }
326            let value = sample.value.first().expect("missing value");
327            *stacks.entry(stack).or_default() += value;
328        }
329
330        // Construct stack lines for inferno.
331        let mut lines = stacks
332            .into_iter()
333            .map(|(stack, value)| format!("{} {}", stack.join(";"), value))
334            .collect::<Vec<_>>();
335        lines.sort();
336
337        // Generate the flamegraph SVG.
338        let mut bytes = Vec::new();
339        let lines = lines.iter().map(|line| line.as_str());
340        inferno::flamegraph::from_lines(opts, lines, &mut bytes)?;
341        Ok(bytes)
342    }
343}
344
345pub struct StackProfileIter<'a> {
346    inner: &'a StackProfile,
347    idx: usize,
348}
349
350impl<'a> Iterator for StackProfileIter<'a> {
351    type Item = (&'a WeightedStack, Option<&'a str>);
352
353    fn next(&mut self) -> Option<Self::Item> {
354        let (stack, anno) = self.inner.stacks.get(self.idx)?;
355        self.idx += 1;
356        let anno = anno.map(|idx| self.inner.annotations.get(idx).unwrap().as_str());
357        Some((stack, anno))
358    }
359}
360
361impl StackProfile {
362    pub fn push_stack(&mut self, stack: WeightedStack, annotation: Option<&str>) {
363        let anno_idx = if let Some(annotation) = annotation {
364            Some(
365                self.annotations
366                    .iter()
367                    .position(|anno| annotation == anno.as_str())
368                    .unwrap_or_else(|| {
369                        self.annotations.push(annotation.to_string());
370                        self.annotations.len() - 1
371                    }),
372            )
373        } else {
374            None
375        };
376        self.stacks.push((stack, anno_idx))
377    }
378
379    pub fn push_mapping(&mut self, mapping: Mapping) {
380        self.mappings.push(mapping);
381    }
382
383    pub fn iter(&self) -> StackProfileIter<'_> {
384        StackProfileIter {
385            inner: self,
386            idx: 0,
387        }
388    }
389}
390
391/// Parse a jemalloc profile file, producing a vector of stack traces along with their weights.
392pub fn parse_jeheap<R: BufRead>(
393    r: R,
394    mappings: Option<&[Mapping]>,
395) -> anyhow::Result<StackProfile> {
396    let mut cur_stack = None;
397    let mut profile = StackProfile::default();
398    let mut lines = r.lines();
399
400    let first_line = match lines.next() {
401        Some(s) => s?,
402        None => bail!("Heap dump file was empty"),
403    };
404    // The first line of the file should be e.g. "heap_v2/524288", where the trailing
405    // number is the inverse probability of a byte being sampled.
406    let sampling_rate: f64 = str::parse(first_line.trim_start_matches("heap_v2/"))?;
407
408    for line in &mut lines {
409        let line = line?;
410        let line = line.trim();
411
412        let words: Vec<_> = line.split_ascii_whitespace().collect();
413        if !words.is_empty() && words[0] == "@" {
414            if cur_stack.is_some() {
415                bail!("Stack without corresponding weight!")
416            }
417            let mut addrs = words[1..]
418                .iter()
419                .map(|w| {
420                    let raw = w.trim_start_matches("0x");
421                    usize::from_str_radix(raw, 16)
422                })
423                .collect::<Result<Vec<_>, _>>()?;
424            addrs.reverse();
425            cur_stack = Some(addrs);
426        }
427        if words.len() > 2 && words[0] == "t*:" {
428            if let Some(addrs) = cur_stack.take() {
429                // The format here is e.g.:
430                // t*: 40274: 2822125696 [0: 0]
431                //
432                // "t*" means summary across all threads; someday we will support per-thread dumps but don't now.
433                // "40274" is the number of sampled allocations (`n_objs` here).
434                // On all released versions of jemalloc, "2822125696" is the total number of bytes in those allocations.
435                //
436                // To get the predicted number of total bytes from the sample, we need to un-bias it by following the logic in
437                // jeprof's `AdjustSamples`: https://github.com/jemalloc/jemalloc/blob/498f47e1ec83431426cdff256c23eceade41b4ef/bin/jeprof.in#L4064-L4074
438                //
439                // However, this algorithm is actually wrong: you actually need to unbias each sample _before_ you add them together, rather
440                // than adding them together first and then unbiasing the average allocation size. But the heap profile format in released versions of jemalloc
441                // does not give us access to each individual allocation, so this is the best we can do (and `jeprof` does the same).
442                //
443                // It usually seems to be at least close enough to being correct to be useful, but could be very wrong if for the same stack, there is a
444                // very large amount of variance in the amount of bytes allocated (e.g., if there is one allocation of 8 MB and 1,000,000 of 8 bytes)
445                //
446                // In the latest unreleased jemalloc sources from github, the issue is worked around by unbiasing the numbers for each sampled allocation,
447                // and then fudging them to maintain compatibility with jeprof's logic. So, once those are released and we start using them,
448                // this will become even more correct.
449                //
450                // For more details, see this doc: https://github.com/jemalloc/jemalloc/pull/1902
451                //
452                // And this gitter conversation between me (Brennan Vincent) and David Goldblatt: https://gitter.im/jemalloc/jemalloc?at=5f31b673811d3571b3bb9b6b
453                let n_objs: f64 = str::parse(words[1].trim_end_matches(':'))?;
454                let bytes_in_sampled_objs: f64 = str::parse(words[2])?;
455                let ratio = (bytes_in_sampled_objs / n_objs) / sampling_rate;
456                let scale_factor = 1.0 / (1.0 - (-ratio).exp());
457                let weight = bytes_in_sampled_objs * scale_factor;
458                profile.push_stack(WeightedStack { addrs, weight }, None);
459            }
460        }
461    }
462    if cur_stack.is_some() {
463        bail!("Stack without corresponding weight!");
464    }
465
466    if let Some(mappings) = mappings {
467        for mapping in mappings {
468            profile.push_mapping(mapping.clone());
469        }
470    }
471
472    Ok(profile)
473}