1mod cast;
23use std::collections::BTreeMap;
4use std::fmt;
5use std::io::BufRead;
6use std::io::Write;
7use std::path::PathBuf;
8use std::time::{Instant, SystemTime, UNIX_EPOCH};
910use anyhow::bail;
11use flate2::write::GzEncoder;
12use flate2::Compression;
13use prost::Message;
1415pub use cast::CastFrom;
16pub use cast::TryCastFrom;
1718/// Start times of the profiler.
19#[derive(Copy, Clone, Debug)]
20pub enum ProfStartTime {
21 Instant(Instant),
22 TimeImmemorial,
23}
2425/// Helper struct to simplify building a `string_table` for the pprof format.
26#[derive(Default)]
27struct StringTable(BTreeMap<String, i64>);
2829impl StringTable {
30fn new() -> Self {
31// Element 0 must always be the emtpy string.
32let inner = [("".into(), 0)].into();
33Self(inner)
34 }
3536fn insert(&mut self, s: &str) -> i64 {
37if let Some(idx) = self.0.get(s) {
38*idx
39 } else {
40let idx = i64::try_from(self.0.len()).expect("must fit");
41self.0.insert(s.into(), idx);
42 idx
43 }
44 }
4546fn finish(self) -> Vec<String> {
47let mut vec: Vec<_> = self.0.into_iter().collect();
48 vec.sort_by_key(|(_, idx)| *idx);
49 vec.into_iter().map(|(s, _)| s).collect()
50 }
51}
5253#[path = "perftools.profiles.rs"]
54mod pprof_types;
5556/// A single sample in the profile. The stack is a list of addresses.
57#[derive(Clone, Debug)]
58pub struct WeightedStack {
59pub addrs: Vec<usize>,
60pub weight: f64,
61}
6263/// A mapping of a single shared object.
64#[derive(Clone, Debug)]
65pub struct Mapping {
66pub memory_start: usize,
67pub memory_end: usize,
68pub memory_offset: usize,
69pub file_offset: u64,
70pub pathname: PathBuf,
71pub build_id: Option<BuildId>,
72}
7374/// Build ID of a shared object.
75#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
76pub struct BuildId(pub Vec<u8>);
7778impl fmt::Display for BuildId {
79fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80for byte in &self.0 {
81write!(f, "{byte:02x}")?;
82 }
83Ok(())
84 }
85}
8687/// A minimal representation of a profile that can be parsed from the jemalloc heap profile.
88#[derive(Default)]
89pub struct StackProfile {
90pub annotations: Vec<String>,
91// The second element is the index in `annotations`, if one exists.
92pub stacks: Vec<(WeightedStack, Option<usize>)>,
93pub mappings: Vec<Mapping>,
94}
9596impl StackProfile {
97/// Converts the profile into the pprof format.
98 ///
99 /// pprof encodes profiles as gzipped protobuf messages of the Profile message type
100 /// (see `pprof/profile.proto`).
101pub fn to_pprof(
102&self,
103 sample_type: (&str, &str),
104 period_type: (&str, &str),
105 anno_key: Option<String>,
106 ) -> Vec<u8> {
107use crate::pprof_types as proto;
108109let mut profile = proto::Profile::default();
110let mut strings = StringTable::new();
111112let anno_key = anno_key.unwrap_or_else(|| "annotation".into());
113114 profile.sample_type = vec![proto::ValueType {
115 r#type: strings.insert(sample_type.0),
116 unit: strings.insert(sample_type.1),
117 }];
118 profile.period_type = Some(proto::ValueType {
119 r#type: strings.insert(period_type.0),
120 unit: strings.insert(period_type.1),
121 });
122123 profile.time_nanos = SystemTime::now()
124 .duration_since(UNIX_EPOCH)
125 .expect("now is later than UNIX epoch")
126 .as_nanos()
127 .try_into()
128 .expect("the year 2554 is far away");
129130for (mapping, mapping_id) in self.mappings.iter().zip(1..) {
131let pathname = mapping.pathname.to_string_lossy();
132let filename_idx = strings.insert(&pathname);
133134let build_id_idx = match &mapping.build_id {
135Some(build_id) => strings.insert(&build_id.to_string()),
136None => 0,
137 };
138139 profile.mapping.push(proto::Mapping {
140 id: mapping_id,
141 memory_start: u64::cast_from(mapping.memory_start),
142 memory_limit: u64::cast_from(mapping.memory_end),
143 file_offset: mapping.file_offset,
144 filename: filename_idx,
145 build_id: build_id_idx,
146 ..Default::default()
147 });
148149// This is a is a Polar Signals-specific extension: For correct offline symbolization
150 // they need access to the memory offset of mappings, but the pprof format only has a
151 // field for the file offset. So we instead encode additional information about
152 // mappings in magic comments. There must be exactly one comment for each mapping.
153154 // Take a shortcut and assume the ELF type is always `ET_DYN`. This is true for shared
155 // libraries and for position-independent executable, so it should always be true for
156 // any mappings we have.
157 // Getting the actual information is annoying. It's in the ELF header (the `e_type`
158 // field), but there is no guarantee that the full ELF header gets mapped, so we might
159 // not be able to find it in memory. We could try to load it from disk instead, but
160 // then we'd have to worry about blocking disk I/O.
161let elf_type = 3;
162163let comment = format!(
164"executableInfo={:x};{:x};{:x}",
165 elf_type, mapping.file_offset, mapping.memory_offset
166 );
167 profile.comment.push(strings.insert(&comment));
168 }
169170let mut location_ids = BTreeMap::new();
171for (stack, anno) in self.iter() {
172let mut sample = proto::Sample::default();
173174let value = stack.weight.trunc();
175let value = i64::try_cast_from(value).expect("no exabyte heap sizes");
176 sample.value.push(value);
177178for addr in stack.addrs.iter().rev() {
179// See the comment
180 // [here](https://github.com/rust-lang/backtrace-rs/blob/036d4909e1fb9c08c2bb0f59ac81994e39489b2f/src/symbolize/mod.rs#L123-L147)
181 // for why we need to subtract one. tl;dr addresses
182 // in stack traces are actually the return address of
183 // the called function, which is one past the call
184 // itself.
185 //
186 // Of course, the `call` instruction can be more than one byte, so after subtracting
187 // one, we might point somewhere in the middle of it, rather
188 // than to the beginning of the instruction. That's fine; symbolization
189 // tools don't seem to get confused by this.
190let addr = u64::cast_from(*addr) - 1;
191192let loc_id = *location_ids.entry(addr).or_insert_with(|| {
193// pprof_types.proto says the location id may be the address, but Polar Signals
194 // insists that location ids are sequential, starting with 1.
195let id = u64::cast_from(profile.location.len()) + 1;
196let mapping_id = profile
197 .mapping
198 .iter()
199 .find(|m| m.memory_start <= addr && m.memory_limit > addr)
200 .map_or(0, |m| m.id);
201 profile.location.push(proto::Location {
202 id,
203 mapping_id,
204 address: addr,
205 ..Default::default()
206 });
207 id
208 });
209210 sample.location_id.push(loc_id);
211212if let Some(anno) = anno {
213 sample.label.push(proto::Label {
214 key: strings.insert(&anno_key),
215 str: strings.insert(anno),
216 ..Default::default()
217 })
218 }
219 }
220221 profile.sample.push(sample);
222 }
223224 profile.string_table = strings.finish();
225226let encoded = profile.encode_to_vec();
227228let mut gz = GzEncoder::new(Vec::new(), Compression::default());
229 gz.write_all(&encoded).unwrap();
230 gz.finish().unwrap()
231 }
232}
233234pub struct StackProfileIter<'a> {
235 inner: &'a StackProfile,
236 idx: usize,
237}
238239impl<'a> Iterator for StackProfileIter<'a> {
240type Item = (&'a WeightedStack, Option<&'a str>);
241242fn next(&mut self) -> Option<Self::Item> {
243let (stack, anno) = self.inner.stacks.get(self.idx)?;
244self.idx += 1;
245let anno = anno.map(|idx| self.inner.annotations.get(idx).unwrap().as_str());
246Some((stack, anno))
247 }
248}
249250impl StackProfile {
251pub fn push_stack(&mut self, stack: WeightedStack, annotation: Option<&str>) {
252let anno_idx = if let Some(annotation) = annotation {
253Some(
254self.annotations
255 .iter()
256 .position(|anno| annotation == anno.as_str())
257 .unwrap_or_else(|| {
258self.annotations.push(annotation.to_string());
259self.annotations.len() - 1
260}),
261 )
262 } else {
263None
264};
265self.stacks.push((stack, anno_idx))
266 }
267268pub fn push_mapping(&mut self, mapping: Mapping) {
269self.mappings.push(mapping);
270 }
271272pub fn iter(&self) -> StackProfileIter<'_> {
273 StackProfileIter {
274 inner: self,
275 idx: 0,
276 }
277 }
278}
279280/// Parse a jemalloc profile file, producing a vector of stack traces along with their weights.
281pub fn parse_jeheap<R: BufRead>(
282 r: R,
283 mappings: Option<&[Mapping]>,
284) -> anyhow::Result<StackProfile> {
285let mut cur_stack = None;
286let mut profile = StackProfile::default();
287let mut lines = r.lines();
288289let first_line = match lines.next() {
290Some(s) => s?,
291None => bail!("Heap dump file was empty"),
292 };
293// The first line of the file should be e.g. "heap_v2/524288", where the trailing
294 // number is the inverse probability of a byte being sampled.
295let sampling_rate: f64 = str::parse(first_line.trim_start_matches("heap_v2/"))?;
296297for line in &mut lines {
298let line = line?;
299let line = line.trim();
300301let words: Vec<_> = line.split_ascii_whitespace().collect();
302if !words.is_empty() && words[0] == "@" {
303if cur_stack.is_some() {
304bail!("Stack without corresponding weight!")
305 }
306let mut addrs = words[1..]
307 .iter()
308 .map(|w| {
309let raw = w.trim_start_matches("0x");
310 usize::from_str_radix(raw, 16)
311 })
312 .collect::<Result<Vec<_>, _>>()?;
313 addrs.reverse();
314 cur_stack = Some(addrs);
315 }
316if words.len() > 2 && words[0] == "t*:" {
317if let Some(addrs) = cur_stack.take() {
318// The format here is e.g.:
319 // t*: 40274: 2822125696 [0: 0]
320 //
321 // "t*" means summary across all threads; someday we will support per-thread dumps but don't now.
322 // "40274" is the number of sampled allocations (`n_objs` here).
323 // On all released versions of jemalloc, "2822125696" is the total number of bytes in those allocations.
324 //
325 // To get the predicted number of total bytes from the sample, we need to un-bias it by following the logic in
326 // jeprof's `AdjustSamples`: https://github.com/jemalloc/jemalloc/blob/498f47e1ec83431426cdff256c23eceade41b4ef/bin/jeprof.in#L4064-L4074
327 //
328 // However, this algorithm is actually wrong: you actually need to unbias each sample _before_ you add them together, rather
329 // than adding them together first and then unbiasing the average allocation size. But the heap profile format in released versions of jemalloc
330 // does not give us access to each individual allocation, so this is the best we can do (and `jeprof` does the same).
331 //
332 // It usually seems to be at least close enough to being correct to be useful, but could be very wrong if for the same stack, there is a
333 // very large amount of variance in the amount of bytes allocated (e.g., if there is one allocation of 8 MB and 1,000,000 of 8 bytes)
334 //
335 // In the latest unreleased jemalloc sources from github, the issue is worked around by unbiasing the numbers for each sampled allocation,
336 // and then fudging them to maintain compatibility with jeprof's logic. So, once those are released and we start using them,
337 // this will become even more correct.
338 //
339 // For more details, see this doc: https://github.com/jemalloc/jemalloc/pull/1902
340 //
341 // And this gitter conversation between me (Brennan Vincent) and David Goldblatt: https://gitter.im/jemalloc/jemalloc?at=5f31b673811d3571b3bb9b6b
342let n_objs: f64 = str::parse(words[1].trim_end_matches(':'))?;
343let bytes_in_sampled_objs: f64 = str::parse(words[2])?;
344let ratio = (bytes_in_sampled_objs / n_objs) / sampling_rate;
345let scale_factor = 1.0 / (1.0 - (-ratio).exp());
346let weight = bytes_in_sampled_objs * scale_factor;
347 profile.push_stack(WeightedStack { addrs, weight }, None);
348 }
349 }
350 }
351if cur_stack.is_some() {
352bail!("Stack without corresponding weight!");
353 }
354355if let Some(mappings) = mappings {
356for mapping in mappings {
357 profile.push_mapping(mapping.clone());
358 }
359 }
360361Ok(profile)
362}