Skip to main content

mz_repr/
bytes.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::fmt::{self, Display};
11use std::str::FromStr;
12
13#[cfg(any(test, feature = "proptest"))]
14use proptest_derive::Arbitrary;
15use serde::{Deserialize, Serialize};
16
17use mz_ore::cast::CastLossy;
18
19/// Struct for postgres compatible size units which is different from
20/// `bytesize::ByteSize`. Instead of MiB or GiB and so on, it uses MB, GB for the sizes
21/// with 1024 multiplier. Valid units are B, kB, MB, GB, TB with multiples of 1024
22/// where 1MB = 1024kB.
23///
24/// In postgres, each setting has a base unit (for eg. B, kB) and the value can either
25/// be integer or float. The base unit serves as the default unit if a number is provided
26/// without a unit and it's also the minimum unit in which values
27/// can be rounded to. For example, with base unit of kB, 30.1kB will be rounded to
28/// 30kB since it can't have a lower unit, but 30.1MB will be rounded to 30822kB.
29/// For [`ByteSize`], the value is an integer and the base unit is bytes (`B`).
30#[derive(
31    Debug,
32    Clone,
33    PartialEq,
34    Eq,
35    Hash,
36    PartialOrd,
37    Ord,
38    Serialize,
39    Deserialize,
40    Default
41)]
42#[cfg_attr(any(test, feature = "proptest"), derive(Arbitrary))]
43pub struct ByteSize(u64);
44
45impl ByteSize {
46    pub const fn b(size: u64) -> ByteSize {
47        ByteSize(size)
48    }
49
50    pub const fn kb(size: u64) -> ByteSize {
51        ByteSize(size * BytesUnit::Kb.value())
52    }
53
54    pub const fn mb(size: u64) -> ByteSize {
55        ByteSize(size * BytesUnit::Mb.value())
56    }
57
58    pub const fn gb(size: u64) -> ByteSize {
59        ByteSize(size * BytesUnit::Gb.value())
60    }
61
62    pub const fn tb(size: u64) -> ByteSize {
63        ByteSize(size * BytesUnit::Tb.value())
64    }
65
66    pub fn as_bytes(&self) -> u64 {
67        self.0
68    }
69
70    fn format_string(&self) -> String {
71        match self.0 {
72            0 => "0".to_string(),
73            tb if tb % BytesUnit::Tb.value() == 0 => {
74                format!("{}{}", tb / BytesUnit::Tb.value(), BytesUnit::Tb)
75            }
76            gb if gb % BytesUnit::Gb.value() == 0 => {
77                format!("{}{}", gb / BytesUnit::Gb.value(), BytesUnit::Gb)
78            }
79            mb if mb % BytesUnit::Mb.value() == 0 => {
80                format!("{}{}", mb / BytesUnit::Mb.value(), BytesUnit::Mb)
81            }
82            kb if kb % BytesUnit::Kb.value() == 0 => {
83                format!("{}{}", kb / BytesUnit::Kb.value(), BytesUnit::Kb)
84            }
85            b => format!("{}{}", b, BytesUnit::B),
86        }
87    }
88}
89
90impl Display for ByteSize {
91    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
92        f.pad(&self.format_string())
93    }
94}
95
96impl FromStr for ByteSize {
97    type Err = String;
98
99    // To behave the same as in postgres, this always
100    // rounds down to the next lower unit if possible.
101    // For example 30.9B, will be rounded to 31B, since there's no
102    // lower unit than B. But 30.1kB will be rounded to
103    // 31642B.
104    fn from_str(value: &str) -> Result<Self, Self::Err> {
105        let number: String = value
106            .chars()
107            .take_while(|c| c.is_digit(10) || c == &'.')
108            .collect();
109
110        let suffix: String = value
111            .chars()
112            .skip_while(|c| c.is_whitespace() || c.is_digit(10) || c == &'.')
113            .collect();
114
115        let unit = if suffix.is_empty() {
116            BytesUnit::B
117        } else {
118            suffix
119            .parse::<BytesUnit>()
120            .map_err(|e| format!("couldn't parse {:?} into a known SI unit, {}. Valid units are B, kB, MB, GB, and TB", suffix, e))?
121        };
122
123        let (size, unit) = if let Ok(integer) = number.parse::<u64>() {
124            (integer, unit)
125        } else {
126            let num = number
127                .parse::<f64>()
128                .map_err(|e| format!("couldn't parse {} as a number, {}", number, e))?;
129
130            // checking if number has no fractional part
131            if num.trunc() == num {
132                let size = u64::cast_lossy(num);
133                (size, unit)
134            } else {
135                match unit {
136                    BytesUnit::B => (u64::cast_lossy(num.round()), BytesUnit::B),
137                    BytesUnit::Kb => (u64::cast_lossy((num * 1024.0).round()), BytesUnit::B),
138                    BytesUnit::Mb => (u64::cast_lossy((num * 1024.0).round()), BytesUnit::Kb),
139                    BytesUnit::Gb => (u64::cast_lossy((num * 1024.0).round()), BytesUnit::Mb),
140                    BytesUnit::Tb => (u64::cast_lossy((num * 1024.0).round()), BytesUnit::Gb),
141                }
142            }
143        };
144
145        let bytes = size
146            .checked_mul(unit.value())
147            .ok_or_else(|| "bytes value exceeds u64 range".to_string())?;
148        Ok(Self(bytes))
149    }
150}
151
152/// Valid units for representing bytes
153#[derive(
154    Debug,
155    Clone,
156    PartialEq,
157    Eq,
158    Hash,
159    PartialOrd,
160    Ord,
161    Serialize,
162    Deserialize,
163    Default
164)]
165#[cfg_attr(any(test, feature = "proptest"), derive(Arbitrary))]
166pub enum BytesUnit {
167    #[default]
168    B,
169    Kb,
170    Mb,
171    Gb,
172    Tb,
173}
174
175impl BytesUnit {
176    const fn value(&self) -> u64 {
177        match &self {
178            BytesUnit::B => 1,
179            BytesUnit::Kb => 1_024,
180            BytesUnit::Mb => 1_048_576,
181            BytesUnit::Gb => 1_073_741_824,
182            BytesUnit::Tb => 1_099_511_627_776,
183        }
184    }
185}
186
187impl fmt::Display for BytesUnit {
188    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
189        f.write_str(match self {
190            BytesUnit::B => "B",
191            BytesUnit::Kb => "kB",
192            BytesUnit::Mb => "MB",
193            BytesUnit::Gb => "GB",
194            BytesUnit::Tb => "TB",
195        })
196    }
197}
198
199impl FromStr for BytesUnit {
200    type Err = String;
201
202    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
203        match s {
204            "B" => Ok(Self::B),
205            "kB" => Ok(Self::Kb),
206            "MB" => Ok(Self::Mb),
207            "GB" => Ok(Self::Gb),
208            "TB" => Ok(Self::Tb),
209            _ => Err(format!(
210                "invalid BytesUnit: {}. Valid units are B, kB, MB, GB, and TB",
211                s
212            )),
213        }
214    }
215}
216
217#[cfg(test)]
218mod tests {
219    use crate::bytes::ByteSize;
220    use mz_ore::assert_err;
221    use proptest::prelude::*;
222    use proptest::proptest;
223
224    #[mz_ore::test]
225    fn test_to_string() {
226        fn assert_to_string(expected: &str, b: ByteSize) {
227            assert_eq!(expected.to_string(), b.to_string());
228        }
229        assert_to_string("0", ByteSize::gb(0));
230        assert_to_string("1GB", ByteSize::mb(1024));
231        assert_to_string("215B", ByteSize::b(215));
232        assert_to_string("1kB", ByteSize::kb(1));
233        assert_to_string("301kB", ByteSize::kb(301));
234        assert_to_string("419MB", ByteSize::mb(419));
235        assert_to_string("518GB", ByteSize::gb(518));
236        assert_to_string("815TB", ByteSize::tb(815));
237        assert_to_string("10kB", ByteSize::b(10240));
238        assert_to_string("10MB", ByteSize::kb(10240));
239        assert_to_string("10GB", ByteSize::mb(10240));
240        assert_to_string("10TB", ByteSize::gb(10240));
241        assert_to_string("10240TB", ByteSize::tb(10240));
242    }
243
244    #[mz_ore::test]
245    fn test_parse() {
246        // shortcut for writing test cases
247        fn parse(s: &str) -> ByteSize {
248            s.parse::<ByteSize>().unwrap()
249        }
250
251        assert_eq!(parse("0"), ByteSize::b(0));
252        assert_eq!(parse("9.9"), ByteSize::b(10));
253        assert_eq!(parse("0B"), ByteSize::b(0));
254        assert_eq!(parse("0MB"), ByteSize::b(0));
255        assert_eq!(parse("500"), ByteSize::b(500));
256        assert_eq!(parse("1kB"), ByteSize::kb(1));
257        assert_eq!(parse("1.5kB"), ByteSize::b(1536));
258        assert_eq!(parse("1 kB"), ByteSize::kb(1));
259        assert_eq!(parse("3 MB"), ByteSize::mb(3));
260        assert_eq!(parse("6 GB"), ByteSize::gb(6));
261        assert_eq!(parse("4GB"), ByteSize::gb(4));
262        assert_eq!(parse("88TB"), ByteSize::tb(88));
263        assert_eq!(parse("521  TB"), ByteSize::tb(521));
264
265        // parsing errors
266        assert_err!("".parse::<ByteSize>());
267        assert_err!("a124GB".parse::<ByteSize>());
268        assert_err!("1K".parse::<ByteSize>());
269        assert_err!("B".parse::<ByteSize>());
270        // postgres is strict about matching capitalization
271        assert_err!("1gb".parse::<ByteSize>());
272        assert_err!("1KB".parse::<ByteSize>());
273    }
274
275    #[mz_ore::test]
276    fn test_rounding() {
277        // shortcut for writing test cases
278        fn parse(s: &str) -> ByteSize {
279            s.parse::<ByteSize>().unwrap()
280        }
281
282        fn assert_equivalent(v1: &str, v2: &str) {
283            assert_eq!(parse(v1), parse(v2))
284        }
285
286        assert_equivalent("0", "0");
287        assert_equivalent("0 TB", "0");
288        assert_equivalent("0kB", "0");
289        assert_equivalent("13.89", "14B");
290        assert_equivalent("500", "500B");
291        assert_equivalent("1073741824", "1GB");
292        assert_equivalent("1073741824.0", "1GB");
293        assert_equivalent("1073741824.1", "1GB");
294        assert_equivalent("1073741824.9", "1073741825B");
295        assert_equivalent("2147483648", "2GB");
296        assert_equivalent("3221225472", "3GB");
297        assert_equivalent("4294967296", "4GB");
298        assert_equivalent("4294967295", "4294967295B");
299        assert_equivalent("1024.1", "1kB");
300        assert_equivalent("1024.9", "1025B");
301        assert_equivalent("1024.1MB", "1048678kB");
302        assert_equivalent("1024.9MB", "1049498kB");
303        assert_equivalent("1.01B", "1B");
304        assert_equivalent("1.01kB", "1034B");
305        assert_equivalent("1.0kB", "1kB");
306        assert_equivalent("10240B", "10kB");
307        assert_equivalent("1.5kB", "1536B");
308        assert_equivalent("30.1GB", "30822MB");
309        assert_equivalent("30.1MB", "30822kB");
310        assert_equivalent("30.1TB", "30822GB");
311        assert_equivalent("39.9TB", "40858GB");
312        assert_equivalent("30.9B", "31B");
313    }
314
315    proptest! {
316      #[mz_ore::test]
317      fn proptest_bytes_roundtrips_string(og: ByteSize) {
318        // Not all [`ByteSize`] values can successfully roundtrip.
319        // For example, '30.1 MB' will be rounded off to '30822 kB'.
320        // So instead testing roundtrip of string representations to
321        // [`ByteSize`] and vice versa.
322        let og_string = og.to_string();
323        let roundtrip = og_string.parse::<ByteSize>().expect("roundtrip").to_string();
324        prop_assert_eq!(og_string, roundtrip);
325      }
326    }
327}