mz_repr/
bytes.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::fmt::{self, Display};
11use std::str::FromStr;
12
13use proptest_derive::Arbitrary;
14use serde::{Deserialize, Serialize};
15
16use mz_ore::cast::CastLossy;
17
18/// Struct for postgres compatible size units which is different from
19/// `bytesize::ByteSize`. Instead of MiB or GiB and so on, it uses MB, GB for the sizes
20/// with 1024 multiplier. Valid units are B, kB, MB, GB, TB with multiples of 1024
21/// where 1MB = 1024kB.
22///
23/// In postgres, each setting has a base unit (for eg. B, kB) and the value can either
24/// be integer or float. The base unit serves as the default unit if a number is provided
25/// without a unit and it's also the minimum unit in which values
26/// can be rounded to. For example, with base unit of kB, 30.1kB will be rounded to
27/// 30kB since it can't have a lower unit, but 30.1MB will be rounded to 30822kB.
28/// For [`ByteSize`], the value is an integer and the base unit is bytes (`B`).
29#[derive(
30    Arbitrary, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default,
31)]
32pub struct ByteSize(u64);
33
34impl ByteSize {
35    pub const fn b(size: u64) -> ByteSize {
36        ByteSize(size)
37    }
38
39    pub const fn kb(size: u64) -> ByteSize {
40        ByteSize(size * BytesUnit::Kb.value())
41    }
42
43    pub const fn mb(size: u64) -> ByteSize {
44        ByteSize(size * BytesUnit::Mb.value())
45    }
46
47    pub const fn gb(size: u64) -> ByteSize {
48        ByteSize(size * BytesUnit::Gb.value())
49    }
50
51    pub const fn tb(size: u64) -> ByteSize {
52        ByteSize(size * BytesUnit::Tb.value())
53    }
54
55    pub fn as_bytes(&self) -> u64 {
56        self.0
57    }
58
59    fn format_string(&self) -> String {
60        match self.0 {
61            zero if zero == 0 => "0".to_string(),
62            tb if tb % BytesUnit::Tb.value() == 0 => {
63                format!("{}{}", tb / BytesUnit::Tb.value(), BytesUnit::Tb)
64            }
65            gb if gb % BytesUnit::Gb.value() == 0 => {
66                format!("{}{}", gb / BytesUnit::Gb.value(), BytesUnit::Gb)
67            }
68            mb if mb % BytesUnit::Mb.value() == 0 => {
69                format!("{}{}", mb / BytesUnit::Mb.value(), BytesUnit::Mb)
70            }
71            kb if kb % BytesUnit::Kb.value() == 0 => {
72                format!("{}{}", kb / BytesUnit::Kb.value(), BytesUnit::Kb)
73            }
74            b => format!("{}{}", b, BytesUnit::B),
75        }
76    }
77}
78
79impl Display for ByteSize {
80    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
81        f.pad(&self.format_string())
82    }
83}
84
85impl FromStr for ByteSize {
86    type Err = String;
87
88    // To behave the same as in postgres, this always
89    // rounds down to the next lower unit if possible.
90    // For example 30.9B, will be rounded to 31B, since there's no
91    // lower unit than B. But 30.1kB will be rounded to
92    // 31642B.
93    fn from_str(value: &str) -> Result<Self, Self::Err> {
94        let number: String = value
95            .chars()
96            .take_while(|c| c.is_digit(10) || c == &'.')
97            .collect();
98
99        let suffix: String = value
100            .chars()
101            .skip_while(|c| c.is_whitespace() || c.is_digit(10) || c == &'.')
102            .collect();
103
104        let unit = if suffix.is_empty() {
105            BytesUnit::B
106        } else {
107            suffix
108            .parse::<BytesUnit>()
109            .map_err(|e| format!("couldn't parse {:?} into a known SI unit, {}. Valid units are B, kB, MB, GB, and TB", suffix, e))?
110        };
111
112        let (size, unit) = if let Ok(integer) = number.parse::<u64>() {
113            (integer, unit)
114        } else {
115            let num = number
116                .parse::<f64>()
117                .map_err(|e| format!("couldn't parse {} as a number, {}", number, e))?;
118
119            // checking if number has no fractional part
120            if num.trunc() == num {
121                let size = u64::cast_lossy(num);
122                (size, unit)
123            } else {
124                match unit {
125                    BytesUnit::B => (u64::cast_lossy(num.round()), BytesUnit::B),
126                    BytesUnit::Kb => (u64::cast_lossy((num * 1024.0).round()), BytesUnit::B),
127                    BytesUnit::Mb => (u64::cast_lossy((num * 1024.0).round()), BytesUnit::Kb),
128                    BytesUnit::Gb => (u64::cast_lossy((num * 1024.0).round()), BytesUnit::Mb),
129                    BytesUnit::Tb => (u64::cast_lossy((num * 1024.0).round()), BytesUnit::Gb),
130                }
131            }
132        };
133
134        let bytes = size
135            .checked_mul(unit.value())
136            .ok_or_else(|| "bytes value exceeds u64 range".to_string())?;
137        Ok(Self(bytes))
138    }
139}
140
141/// Valid units for representing bytes
142#[derive(
143    Arbitrary, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default,
144)]
145pub enum BytesUnit {
146    #[default]
147    B,
148    Kb,
149    Mb,
150    Gb,
151    Tb,
152}
153
154impl BytesUnit {
155    const fn value(&self) -> u64 {
156        match &self {
157            BytesUnit::B => 1,
158            BytesUnit::Kb => 1_024,
159            BytesUnit::Mb => 1_048_576,
160            BytesUnit::Gb => 1_073_741_824,
161            BytesUnit::Tb => 1_099_511_627_776,
162        }
163    }
164}
165
166impl fmt::Display for BytesUnit {
167    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
168        f.write_str(match self {
169            BytesUnit::B => "B",
170            BytesUnit::Kb => "kB",
171            BytesUnit::Mb => "MB",
172            BytesUnit::Gb => "GB",
173            BytesUnit::Tb => "TB",
174        })
175    }
176}
177
178impl FromStr for BytesUnit {
179    type Err = String;
180
181    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
182        match s {
183            "B" => Ok(Self::B),
184            "kB" => Ok(Self::Kb),
185            "MB" => Ok(Self::Mb),
186            "GB" => Ok(Self::Gb),
187            "TB" => Ok(Self::Tb),
188            _ => Err(format!(
189                "invalid BytesUnit: {}. Valid units are B, kB, MB, GB, and TB",
190                s
191            )),
192        }
193    }
194}
195
196#[cfg(test)]
197mod tests {
198    use crate::bytes::ByteSize;
199    use mz_ore::assert_err;
200    use proptest::prelude::*;
201    use proptest::proptest;
202
203    #[mz_ore::test]
204    fn test_to_string() {
205        fn assert_to_string(expected: &str, b: ByteSize) {
206            assert_eq!(expected.to_string(), b.to_string());
207        }
208        assert_to_string("0", ByteSize::gb(0));
209        assert_to_string("1GB", ByteSize::mb(1024));
210        assert_to_string("215B", ByteSize::b(215));
211        assert_to_string("1kB", ByteSize::kb(1));
212        assert_to_string("301kB", ByteSize::kb(301));
213        assert_to_string("419MB", ByteSize::mb(419));
214        assert_to_string("518GB", ByteSize::gb(518));
215        assert_to_string("815TB", ByteSize::tb(815));
216        assert_to_string("10kB", ByteSize::b(10240));
217        assert_to_string("10MB", ByteSize::kb(10240));
218        assert_to_string("10GB", ByteSize::mb(10240));
219        assert_to_string("10TB", ByteSize::gb(10240));
220        assert_to_string("10240TB", ByteSize::tb(10240));
221    }
222
223    #[mz_ore::test]
224    fn test_parse() {
225        // shortcut for writing test cases
226        fn parse(s: &str) -> ByteSize {
227            s.parse::<ByteSize>().unwrap()
228        }
229
230        assert_eq!(parse("0"), ByteSize::b(0));
231        assert_eq!(parse("9.9"), ByteSize::b(10));
232        assert_eq!(parse("0B"), ByteSize::b(0));
233        assert_eq!(parse("0MB"), ByteSize::b(0));
234        assert_eq!(parse("500"), ByteSize::b(500));
235        assert_eq!(parse("1kB"), ByteSize::kb(1));
236        assert_eq!(parse("1.5kB"), ByteSize::b(1536));
237        assert_eq!(parse("1 kB"), ByteSize::kb(1));
238        assert_eq!(parse("3 MB"), ByteSize::mb(3));
239        assert_eq!(parse("6 GB"), ByteSize::gb(6));
240        assert_eq!(parse("4GB"), ByteSize::gb(4));
241        assert_eq!(parse("88TB"), ByteSize::tb(88));
242        assert_eq!(parse("521  TB"), ByteSize::tb(521));
243
244        // parsing errors
245        assert_err!("".parse::<ByteSize>());
246        assert_err!("a124GB".parse::<ByteSize>());
247        assert_err!("1K".parse::<ByteSize>());
248        assert_err!("B".parse::<ByteSize>());
249        // postgres is strict about matching capitalization
250        assert_err!("1gb".parse::<ByteSize>());
251        assert_err!("1KB".parse::<ByteSize>());
252    }
253
254    #[mz_ore::test]
255    fn test_rounding() {
256        // shortcut for writing test cases
257        fn parse(s: &str) -> ByteSize {
258            s.parse::<ByteSize>().unwrap()
259        }
260
261        fn assert_equivalent(v1: &str, v2: &str) {
262            assert_eq!(parse(v1), parse(v2))
263        }
264
265        assert_equivalent("0", "0");
266        assert_equivalent("0 TB", "0");
267        assert_equivalent("0kB", "0");
268        assert_equivalent("13.89", "14B");
269        assert_equivalent("500", "500B");
270        assert_equivalent("1073741824", "1GB");
271        assert_equivalent("1073741824.0", "1GB");
272        assert_equivalent("1073741824.1", "1GB");
273        assert_equivalent("1073741824.9", "1073741825B");
274        assert_equivalent("2147483648", "2GB");
275        assert_equivalent("3221225472", "3GB");
276        assert_equivalent("4294967296", "4GB");
277        assert_equivalent("4294967295", "4294967295B");
278        assert_equivalent("1024.1", "1kB");
279        assert_equivalent("1024.9", "1025B");
280        assert_equivalent("1024.1MB", "1048678kB");
281        assert_equivalent("1024.9MB", "1049498kB");
282        assert_equivalent("1.01B", "1B");
283        assert_equivalent("1.01kB", "1034B");
284        assert_equivalent("1.0kB", "1kB");
285        assert_equivalent("10240B", "10kB");
286        assert_equivalent("1.5kB", "1536B");
287        assert_equivalent("30.1GB", "30822MB");
288        assert_equivalent("30.1MB", "30822kB");
289        assert_equivalent("30.1TB", "30822GB");
290        assert_equivalent("39.9TB", "40858GB");
291        assert_equivalent("30.9B", "31B");
292    }
293
294    proptest! {
295      #[mz_ore::test]
296      fn proptest_bytes_roundtrips_string(og: ByteSize) {
297        // Not all [`ByteSize`] values can successfully roundtrip.
298        // For example, '30.1 MB' will be rounded off to '30822 kB'.
299        // So instead testing roundtrip of string representations to
300        // [`ByteSize`] and vice versa.
301        let og_string = og.to_string();
302        let roundtrip = og_string.parse::<ByteSize>().expect("roundtrip").to_string();
303        prop_assert_eq!(og_string, roundtrip);
304      }
305    }
306}