mz_prof/
lib.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License in the LICENSE file at the
6// root of this repository, or online at
7//
8//     http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16use std::collections::BTreeMap;
17use std::ffi::c_void;
18use std::io::Write;
19use std::sync::atomic::AtomicBool;
20use std::time::{SystemTime, UNIX_EPOCH};
21
22use flate2::Compression;
23use flate2::write::GzEncoder;
24use mz_ore::cast::{CastFrom, TryCastFrom};
25use pprof_util::{StackProfile, WeightedStack};
26use prost::Message;
27
28mod pprof_types;
29pub mod time;
30
31#[cfg(feature = "jemalloc")]
32pub mod jemalloc;
33
34pub trait StackProfileExt {
35    /// Writes out the `.mzfg` format, which is fully described in flamegraph.js.
36    fn to_mzfg(&self, symbolize: bool, header_extra: &[(&str, &str)]) -> String;
37    /// Converts the profile into the pprof format.
38    ///
39    /// pprof encodes profiles as gzipped protobuf messages of the Profile message type
40    /// (see `pprof/profile.proto`).
41    fn to_pprof(
42        &self,
43        sample_type: (&str, &str),
44        period_type: (&str, &str),
45        anno_key: Option<String>,
46    ) -> Vec<u8>;
47}
48
49impl StackProfileExt for StackProfile {
50    fn to_mzfg(&self, symbolize: bool, header_extra: &[(&str, &str)]) -> String {
51        // All the unwraps in this function are justified by the fact that
52        // String's fmt::Write impl is infallible.
53        use std::fmt::Write;
54        let mut builder = r#"!!! COMMENT !!!: Open with bin/fgviz /path/to/mzfg
55mz_fg_version: 1
56"#
57        .to_owned();
58        for (k, v) in header_extra {
59            assert!(!(k.contains(':') || k.contains('\n') || v.contains('\n')));
60            writeln!(&mut builder, "{k}: {v}").unwrap();
61        }
62        writeln!(&mut builder, "").unwrap();
63
64        for (WeightedStack { addrs, weight }, anno) in &self.stacks {
65            let anno = anno.map(|i| &self.annotations[i]);
66            for &addr in addrs {
67                write!(&mut builder, "{addr:#x};").unwrap();
68            }
69            write!(&mut builder, " {weight}").unwrap();
70            if let Some(anno) = anno {
71                write!(&mut builder, " {anno}").unwrap()
72            }
73            writeln!(&mut builder, "").unwrap();
74        }
75
76        if symbolize {
77            let symbols = crate::symbolize(self);
78            writeln!(&mut builder, "").unwrap();
79
80            for (addr, names) in symbols {
81                if !names.is_empty() {
82                    write!(&mut builder, "{addr:#x} ").unwrap();
83                    for mut name in names {
84                        // The client splits on semicolons, so
85                        // we have to escape them.
86                        name = name.replace('\\', "\\\\");
87                        name = name.replace(';', "\\;");
88                        write!(&mut builder, "{name};").unwrap();
89                    }
90                    writeln!(&mut builder, "").unwrap();
91                }
92            }
93        }
94
95        builder
96    }
97
98    fn to_pprof(
99        &self,
100        sample_type: (&str, &str),
101        period_type: (&str, &str),
102        anno_key: Option<String>,
103    ) -> Vec<u8> {
104        use crate::pprof_types as proto;
105
106        let mut profile = proto::Profile::default();
107        let mut strings = StringTable::new();
108
109        let anno_key = anno_key.unwrap_or_else(|| "annotation".into());
110
111        profile.sample_type = vec![proto::ValueType {
112            r#type: strings.insert(sample_type.0),
113            unit: strings.insert(sample_type.1),
114        }];
115        profile.period_type = Some(proto::ValueType {
116            r#type: strings.insert(period_type.0),
117            unit: strings.insert(period_type.1),
118        });
119
120        profile.time_nanos = SystemTime::now()
121            .duration_since(UNIX_EPOCH)
122            .expect("now is later than UNIX epoch")
123            .as_nanos()
124            .try_into()
125            .expect("the year 2554 is far away");
126
127        for (mapping, mapping_id) in self.mappings.iter().zip(1..) {
128            let pathname = mapping.pathname.to_string_lossy();
129            let filename_idx = strings.insert(&pathname);
130
131            let build_id_idx = match &mapping.build_id {
132                Some(build_id) => strings.insert(&build_id.to_string()),
133                None => 0,
134            };
135
136            profile.mapping.push(proto::Mapping {
137                id: mapping_id,
138                memory_start: u64::cast_from(mapping.memory_start),
139                memory_limit: u64::cast_from(mapping.memory_end),
140                file_offset: mapping.file_offset,
141                filename: filename_idx,
142                build_id: build_id_idx,
143                ..Default::default()
144            });
145
146            // This is a is a Polar Signals-specific extension: For correct offline symbolization
147            // they need access to the memory offset of mappings, but the pprof format only has a
148            // field for the file offset. So we instead encode additional information about
149            // mappings in magic comments. There must be exactly one comment for each mapping.
150
151            // Take a shortcut and assume the ELF type is always `ET_DYN`. This is true for shared
152            // libraries and for position-independent executable, so it should always be true for
153            // any mappings we have.
154            // Getting the actual information is annoying. It's in the ELF header (the `e_type`
155            // field), but there is no guarantee that the full ELF header gets mapped, so we might
156            // not be able to find it in memory. We could try to load it from disk instead, but
157            // then we'd have to worry about blocking disk I/O.
158            let elf_type = 3;
159
160            let comment = format!(
161                "executableInfo={:x};{:x};{:x}",
162                elf_type, mapping.file_offset, mapping.memory_offset
163            );
164            profile.comment.push(strings.insert(&comment));
165        }
166
167        let mut location_ids = BTreeMap::new();
168        for (stack, anno) in self.iter() {
169            let mut sample = proto::Sample::default();
170
171            let value = stack.weight.trunc();
172            let value = i64::try_cast_from(value).expect("no exabyte heap sizes");
173            sample.value.push(value);
174
175            for addr in stack.addrs.iter().rev() {
176                // See the comment
177                // [here](https://github.com/rust-lang/backtrace-rs/blob/036d4909e1fb9c08c2bb0f59ac81994e39489b2f/src/symbolize/mod.rs#L123-L147)
178                // for why we need to subtract one. tl;dr addresses
179                // in stack traces are actually the return address of
180                // the called function, which is one past the call
181                // itself.
182                //
183                // Of course, the `call` instruction can be more than one byte, so after subtracting
184                // one, we might point somewhere in the middle of it, rather
185                // than to the beginning of the instruction. That's fine; symbolization
186                // tools don't seem to get confused by this.
187                let addr = u64::cast_from(*addr) - 1;
188
189                let loc_id = *location_ids.entry(addr).or_insert_with(|| {
190                    // pprof_types.proto says the location id may be the address, but Polar Signals
191                    // insists that location ids are sequential, starting with 1.
192                    let id = u64::cast_from(profile.location.len()) + 1;
193                    let mapping_id = profile
194                        .mapping
195                        .iter()
196                        .find(|m| m.memory_start <= addr && m.memory_limit > addr)
197                        .map_or(0, |m| m.id);
198                    profile.location.push(proto::Location {
199                        id,
200                        mapping_id,
201                        address: addr,
202                        ..Default::default()
203                    });
204                    id
205                });
206
207                sample.location_id.push(loc_id);
208
209                if let Some(anno) = anno {
210                    sample.label.push(proto::Label {
211                        key: strings.insert(&anno_key),
212                        str: strings.insert(anno),
213                        ..Default::default()
214                    })
215                }
216            }
217
218            profile.sample.push(sample);
219        }
220
221        profile.string_table = strings.finish();
222
223        let encoded = profile.encode_to_vec();
224
225        let mut gz = GzEncoder::new(Vec::new(), Compression::default());
226        gz.write_all(&encoded).unwrap();
227        gz.finish().unwrap()
228    }
229}
230
231/// Helper struct to simplify building a `string_table` for the pprof format.
232#[derive(Default)]
233struct StringTable(BTreeMap<String, i64>);
234
235impl StringTable {
236    fn new() -> Self {
237        // Element 0 must always be the emtpy string.
238        let inner = [("".into(), 0)].into();
239        Self(inner)
240    }
241
242    fn insert(&mut self, s: &str) -> i64 {
243        if let Some(idx) = self.0.get(s) {
244            *idx
245        } else {
246            let idx = i64::try_from(self.0.len()).expect("must fit");
247            self.0.insert(s.into(), idx);
248            idx
249        }
250    }
251
252    fn finish(self) -> Vec<String> {
253        let mut vec: Vec<_> = self.0.into_iter().collect();
254        vec.sort_by_key(|(_, idx)| *idx);
255        vec.into_iter().map(|(s, _)| s).collect()
256    }
257}
258
259static EVER_SYMBOLIZED: AtomicBool = AtomicBool::new(false);
260
261/// Check whether symbolization has ever been run in this process.
262/// This controls whether we display a warning about increasing RAM usage
263/// due to the backtrace cache on the
264/// profiler page. (Because the RAM hit is one-time, we don't need to warn if it's already happened).
265pub fn ever_symbolized() -> bool {
266    EVER_SYMBOLIZED.load(std::sync::atomic::Ordering::SeqCst)
267}
268
269/// Given some stack traces, generate a map of addresses to their
270/// corresponding symbols.
271///
272/// Each address could correspond to more than one symbol, because
273/// of inlining. (E.g. if 0x1234 comes from "g", which is inlined in "f", the corresponding vec of symbols will be ["f", "g"].)
274pub fn symbolize(profile: &StackProfile) -> BTreeMap<usize, Vec<String>> {
275    EVER_SYMBOLIZED.store(true, std::sync::atomic::Ordering::SeqCst);
276    let mut all_addrs = vec![];
277    for (stack, _annotation) in profile.stacks.iter() {
278        all_addrs.extend(stack.addrs.iter().cloned());
279    }
280    // Sort so addresses from the same images are together,
281    // to avoid thrashing `backtrace::resolve`'s cache of
282    // parsed images.
283    all_addrs.sort_unstable();
284    all_addrs.dedup();
285    all_addrs
286        .into_iter()
287        .map(|addr| {
288            let mut syms = vec![];
289            // No other known way to convert usize to pointer.
290            #[allow(clippy::as_conversions)]
291            let addr_ptr = addr as *mut c_void;
292            backtrace::resolve(addr_ptr, |sym| {
293                let name = sym
294                    .name()
295                    .map(|sn| sn.to_string())
296                    .unwrap_or_else(|| "???".to_string());
297                syms.push(name);
298            });
299            syms.reverse();
300            (addr, syms)
301        })
302        .collect()
303}