mz_prof/
lib.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License in the LICENSE file at the
6// root of this repository, or online at
7//
8//     http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16use std::collections::BTreeMap;
17use std::ffi::c_void;
18use std::io::Write;
19use std::sync::atomic::AtomicBool;
20use std::time::{SystemTime, UNIX_EPOCH};
21
22use flate2::Compression;
23use flate2::write::GzEncoder;
24use mz_ore::cast::{CastFrom, TryCastFrom};
25use pprof_util::{StackProfile, WeightedStack};
26use prost::Message;
27
28mod pprof_types;
29pub mod time;
30
31#[cfg(feature = "jemalloc")]
32pub mod jemalloc;
33
34pub trait StackProfileExt {
35    /// Writes out the `.mzfg` format, which is fully described in flamegraph.js.
36    fn to_mzfg(&self, symbolize: bool, header_extra: &[(&str, &str)]) -> String;
37    /// Converts the profile into the pprof format.
38    ///
39    /// pprof encodes profiles as gzipped protobuf messages of the Profile message type
40    /// (see `pprof/profile.proto`).
41    fn to_pprof(
42        &self,
43        sample_type: (&str, &str),
44        period_type: (&str, &str),
45        anno_key: Option<String>,
46    ) -> Vec<u8>;
47}
48
49impl StackProfileExt for StackProfile {
50    fn to_mzfg(&self, symbolize: bool, header_extra: &[(&str, &str)]) -> String {
51        // All the unwraps in this function are justified by the fact that
52        // String's fmt::Write impl is infallible.
53        use std::fmt::Write;
54        let mut builder = r#"!!! COMMENT !!!: Open with bin/fgviz /path/to/mzfg
55mz_fg_version: 1
56"#
57        .to_owned();
58        for (k, v) in header_extra {
59            assert!(!(k.contains(':') || k.contains('\n') || v.contains('\n')));
60            writeln!(&mut builder, "{k}: {v}").unwrap();
61        }
62        writeln!(&mut builder, "").unwrap();
63
64        for (WeightedStack { addrs, weight }, anno) in &self.stacks {
65            let anno = anno.map(|i| &self.annotations[i]);
66            for &addr in addrs {
67                write!(&mut builder, "{addr:#x};").unwrap();
68            }
69            write!(&mut builder, " {weight}").unwrap();
70            if let Some(anno) = anno {
71                write!(&mut builder, " {anno}").unwrap()
72            }
73            writeln!(&mut builder, "").unwrap();
74        }
75
76        if symbolize {
77            let symbols = crate::symbolize(self);
78            writeln!(&mut builder, "").unwrap();
79
80            for (addr, names) in symbols {
81                if !names.is_empty() {
82                    write!(&mut builder, "{addr:#x} ").unwrap();
83                    for mut name in names {
84                        // The client splits on semicolons, so
85                        // we have to escape them.
86                        name = name.replace('\\', "\\\\");
87                        name = name.replace(';', "\\;");
88                        write!(&mut builder, "{name};").unwrap();
89                    }
90                    writeln!(&mut builder, "").unwrap();
91                }
92            }
93        }
94
95        builder
96    }
97
98    fn to_pprof(
99        &self,
100        sample_type: (&str, &str),
101        period_type: (&str, &str),
102        anno_key: Option<String>,
103    ) -> Vec<u8> {
104        use crate::pprof_types as proto;
105
106        let mut profile = proto::Profile::default();
107        let mut strings = StringTable::new();
108
109        let anno_key = anno_key.unwrap_or_else(|| "annotation".into());
110
111        profile.sample_type = vec![proto::ValueType {
112            r#type: strings.insert(sample_type.0),
113            unit: strings.insert(sample_type.1),
114        }];
115        profile.period_type = Some(proto::ValueType {
116            r#type: strings.insert(period_type.0),
117            unit: strings.insert(period_type.1),
118        });
119
120        profile.time_nanos = SystemTime::now()
121            .duration_since(UNIX_EPOCH)
122            .expect("now is later than UNIX epoch")
123            .as_nanos()
124            .try_into()
125            .expect("the year 2554 is far away");
126
127        for (idx, mapping) in self.mappings.iter().enumerate() {
128            let mapping_id = u64::cast_from(idx + 1);
129            let pathname = mapping.pathname.to_string_lossy();
130            let filename_idx = strings.insert(&pathname);
131
132            let build_id_idx = match &mapping.build_id {
133                Some(build_id) => strings.insert(&build_id.to_string()),
134                None => 0,
135            };
136
137            profile.mapping.push(proto::Mapping {
138                id: mapping_id,
139                memory_start: u64::cast_from(mapping.memory_start),
140                memory_limit: u64::cast_from(mapping.memory_end),
141                file_offset: mapping.file_offset,
142                filename: filename_idx,
143                build_id: build_id_idx,
144                ..Default::default()
145            });
146
147            // This is a is a Polar Signals-specific extension: For correct offline symbolization
148            // they need access to the memory offset of mappings, but the pprof format only has a
149            // field for the file offset. So we instead encode additional information about
150            // mappings in magic comments. There must be exactly one comment for each mapping.
151
152            // Take a shortcut and assume the ELF type is always `ET_DYN`. This is true for shared
153            // libraries and for position-independent executable, so it should always be true for
154            // any mappings we have.
155            // Getting the actual information is annoying. It's in the ELF header (the `e_type`
156            // field), but there is no guarantee that the full ELF header gets mapped, so we might
157            // not be able to find it in memory. We could try to load it from disk instead, but
158            // then we'd have to worry about blocking disk I/O.
159            let elf_type = 3;
160
161            let comment = format!(
162                "executableInfo={:x};{:x};{:x}",
163                elf_type, mapping.file_offset, mapping.memory_offset
164            );
165            profile.comment.push(strings.insert(&comment));
166        }
167
168        let mut location_ids = BTreeMap::new();
169        for (stack, anno) in self.iter() {
170            let mut sample = proto::Sample::default();
171
172            let value = stack.weight.trunc();
173            let value = i64::try_cast_from(value).expect("no exabyte heap sizes");
174            sample.value.push(value);
175
176            for addr in stack.addrs.iter().rev() {
177                // See the comment
178                // [here](https://github.com/rust-lang/backtrace-rs/blob/036d4909e1fb9c08c2bb0f59ac81994e39489b2f/src/symbolize/mod.rs#L123-L147)
179                // for why we need to subtract one. tl;dr addresses
180                // in stack traces are actually the return address of
181                // the called function, which is one past the call
182                // itself.
183                //
184                // Of course, the `call` instruction can be more than one byte, so after subtracting
185                // one, we might point somewhere in the middle of it, rather
186                // than to the beginning of the instruction. That's fine; symbolization
187                // tools don't seem to get confused by this.
188                let addr = u64::cast_from(*addr) - 1;
189
190                let loc_id = *location_ids.entry(addr).or_insert_with(|| {
191                    // pprof_types.proto says the location id may be the address, but Polar Signals
192                    // insists that location ids are sequential, starting with 1.
193                    let id = u64::cast_from(profile.location.len()) + 1;
194                    let mapping_id = profile
195                        .mapping
196                        .iter()
197                        .find(|m| m.memory_start <= addr && m.memory_limit > addr)
198                        .map_or(0, |m| m.id);
199                    profile.location.push(proto::Location {
200                        id,
201                        mapping_id,
202                        address: addr,
203                        ..Default::default()
204                    });
205                    id
206                });
207
208                sample.location_id.push(loc_id);
209
210                if let Some(anno) = anno {
211                    sample.label.push(proto::Label {
212                        key: strings.insert(&anno_key),
213                        str: strings.insert(anno),
214                        ..Default::default()
215                    })
216                }
217            }
218
219            profile.sample.push(sample);
220        }
221
222        profile.string_table = strings.finish();
223
224        let encoded = profile.encode_to_vec();
225
226        let mut gz = GzEncoder::new(Vec::new(), Compression::default());
227        gz.write_all(&encoded).unwrap();
228        gz.finish().unwrap()
229    }
230}
231
232/// Helper struct to simplify building a `string_table` for the pprof format.
233#[derive(Default)]
234struct StringTable(BTreeMap<String, i64>);
235
236impl StringTable {
237    fn new() -> Self {
238        // Element 0 must always be the emtpy string.
239        let inner = [("".into(), 0)].into();
240        Self(inner)
241    }
242
243    fn insert(&mut self, s: &str) -> i64 {
244        if let Some(idx) = self.0.get(s) {
245            *idx
246        } else {
247            let idx = i64::try_from(self.0.len()).expect("must fit");
248            self.0.insert(s.into(), idx);
249            idx
250        }
251    }
252
253    fn finish(self) -> Vec<String> {
254        let mut vec: Vec<_> = self.0.into_iter().collect();
255        vec.sort_by_key(|(_, idx)| *idx);
256        vec.into_iter().map(|(s, _)| s).collect()
257    }
258}
259
260static EVER_SYMBOLIZED: AtomicBool = AtomicBool::new(false);
261
262/// Check whether symbolization has ever been run in this process.
263/// This controls whether we display a warning about increasing RAM usage
264/// due to the backtrace cache on the
265/// profiler page. (Because the RAM hit is one-time, we don't need to warn if it's already happened).
266pub fn ever_symbolized() -> bool {
267    EVER_SYMBOLIZED.load(std::sync::atomic::Ordering::SeqCst)
268}
269
270/// Given some stack traces, generate a map of addresses to their
271/// corresponding symbols.
272///
273/// Each address could correspond to more than one symbol, because
274/// of inlining. (E.g. if 0x1234 comes from "g", which is inlined in "f", the corresponding vec of symbols will be ["f", "g"].)
275pub fn symbolize(profile: &StackProfile) -> BTreeMap<usize, Vec<String>> {
276    EVER_SYMBOLIZED.store(true, std::sync::atomic::Ordering::SeqCst);
277    let mut all_addrs = vec![];
278    for (stack, _annotation) in profile.stacks.iter() {
279        all_addrs.extend(stack.addrs.iter().cloned());
280    }
281    // Sort so addresses from the same images are together,
282    // to avoid thrashing `backtrace::resolve`'s cache of
283    // parsed images.
284    all_addrs.sort_unstable();
285    all_addrs.dedup();
286    all_addrs
287        .into_iter()
288        .map(|addr| {
289            let mut syms = vec![];
290            // No other known way to convert usize to pointer.
291            #[allow(clippy::as_conversions)]
292            let addr_ptr = addr as *mut c_void;
293            backtrace::resolve(addr_ptr, |sym| {
294                let name = sym
295                    .name()
296                    .map(|sn| sn.to_string())
297                    .unwrap_or_else(|| "???".to_string());
298                syms.push(name);
299            });
300            syms.reverse();
301            (addr, syms)
302        })
303        .collect()
304}