hdrhistogram/serialization/
v2_deflate_serializer.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
use super::v2_serializer::{V2SerializeError, V2Serializer};
use super::{Serializer, V2_COMPRESSED_COOKIE};
use crate::core::counter::Counter;
use crate::Histogram;
use byteorder::{BigEndian, WriteBytesExt};
use flate2::write::ZlibEncoder;
use flate2::Compression;
use std::io::{self, Write};
use std::{self, error, fmt};

/// Errors that occur during serialization.
#[derive(Debug)]
pub enum V2DeflateSerializeError {
    /// The underlying serialization failed
    InternalSerializationError(V2SerializeError),
    /// An i/o operation failed.
    IoError(io::Error),
}

impl std::convert::From<std::io::Error> for V2DeflateSerializeError {
    fn from(e: std::io::Error) -> Self {
        V2DeflateSerializeError::IoError(e)
    }
}

impl fmt::Display for V2DeflateSerializeError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            V2DeflateSerializeError::InternalSerializationError(e) => {
                write!(f, "The underlying serialization failed: {}", e)
            }
            V2DeflateSerializeError::IoError(e) => {
                write!(f, "The underlying serialization failed: {}", e)
            }
        }
    }
}

impl error::Error for V2DeflateSerializeError {
    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
        match self {
            V2DeflateSerializeError::InternalSerializationError(e) => Some(e),
            V2DeflateSerializeError::IoError(e) => Some(e),
        }
    }
}

/// Serializer for the V2 + DEFLATE binary format.
///
/// It's called "deflate" to stay consistent with the naming used in the Java implementation, but
/// it actually uses zlib's wrapper format around plain DEFLATE.
pub struct V2DeflateSerializer {
    uncompressed_buf: Vec<u8>,
    compressed_buf: Vec<u8>,
    v2_serializer: V2Serializer,
}

impl Default for V2DeflateSerializer {
    fn default() -> Self {
        Self::new()
    }
}

impl V2DeflateSerializer {
    /// Create a new serializer.
    pub fn new() -> V2DeflateSerializer {
        V2DeflateSerializer {
            uncompressed_buf: Vec::new(),
            compressed_buf: Vec::new(),
            v2_serializer: V2Serializer::new(),
        }
    }
}

impl Serializer for V2DeflateSerializer {
    type SerializeError = V2DeflateSerializeError;

    fn serialize<T: Counter, W: Write>(
        &mut self,
        h: &Histogram<T>,
        writer: &mut W,
    ) -> Result<usize, V2DeflateSerializeError> {
        // TODO benchmark serializing in chunks rather than all at once: each uncompressed v2 chunk
        // could be compressed and written to the compressed buf, possibly using an approach like
        // that of https://github.com/HdrHistogram/HdrHistogram_rust/issues/32#issuecomment-287583055.
        // This would reduce the overall buffer size needed for plain v2 serialization, and be
        // more cache friendly.

        self.uncompressed_buf.clear();
        self.compressed_buf.clear();
        // TODO serialize directly into uncompressed_buf without the buffering inside v2_serializer
        let uncompressed_len = self
            .v2_serializer
            .serialize(h, &mut self.uncompressed_buf)
            .map_err(V2DeflateSerializeError::InternalSerializationError)?;

        debug_assert_eq!(self.uncompressed_buf.len(), uncompressed_len);
        // On randomized test histograms we get about 10% compression, but of course random data
        // doesn't compress well. Real-world data may compress better, so let's assume a more
        // optimistic 50% compression as a baseline to reserve. If we're overly optimistic that's
        // still only one more allocation the first time it's needed.
        self.compressed_buf.reserve(self.uncompressed_buf.len() / 2);

        self.compressed_buf
            .write_u32::<BigEndian>(V2_COMPRESSED_COOKIE)?;
        // placeholder for length
        self.compressed_buf.write_u32::<BigEndian>(0)?;

        // TODO pluggable compressors? configurable compression levels?
        // TODO benchmark https://github.com/sile/libflate
        // TODO if uncompressed_len is near the limit of 16-bit usize, and compression grows the
        // data instead of shrinking it (which we cannot really predict), writing to compressed_buf
        // could panic as Vec overflows its internal `usize`.

        {
            // TODO reuse deflate buf, or switch to lower-level flate2::Compress
            let mut compressor = ZlibEncoder::new(&mut self.compressed_buf, Compression::default());
            compressor.write_all(&self.uncompressed_buf[0..uncompressed_len])?;
            let _ = compressor.finish()?;
        }

        // fill in length placeholder. Won't underflow since length is always at least 8, and won't
        // overflow u32 as the largest array is about 6 million entries, so about 54MiB encoded (if
        // counter is u64).
        let total_compressed_len = self.compressed_buf.len();
        (&mut self.compressed_buf[4..8])
            .write_u32::<BigEndian>((total_compressed_len as u32) - 8)?;

        writer.write_all(&self.compressed_buf)?;

        Ok(total_compressed_len)
    }
}