1use std::fmt::{self, Display};
11use std::str::FromStr;
12
13#[cfg(any(test, feature = "proptest"))]
14use proptest_derive::Arbitrary;
15use serde::{Deserialize, Serialize};
16
17use mz_ore::cast::CastLossy;
18
19#[derive(
31 Debug,
32 Clone,
33 PartialEq,
34 Eq,
35 Hash,
36 PartialOrd,
37 Ord,
38 Serialize,
39 Deserialize,
40 Default
41)]
42#[cfg_attr(any(test, feature = "proptest"), derive(Arbitrary))]
43pub struct ByteSize(u64);
44
45impl ByteSize {
46 pub const fn b(size: u64) -> ByteSize {
47 ByteSize(size)
48 }
49
50 pub const fn kb(size: u64) -> ByteSize {
51 ByteSize(size * BytesUnit::Kb.value())
52 }
53
54 pub const fn mb(size: u64) -> ByteSize {
55 ByteSize(size * BytesUnit::Mb.value())
56 }
57
58 pub const fn gb(size: u64) -> ByteSize {
59 ByteSize(size * BytesUnit::Gb.value())
60 }
61
62 pub const fn tb(size: u64) -> ByteSize {
63 ByteSize(size * BytesUnit::Tb.value())
64 }
65
66 pub fn as_bytes(&self) -> u64 {
67 self.0
68 }
69
70 fn format_string(&self) -> String {
71 match self.0 {
72 0 => "0".to_string(),
73 tb if tb % BytesUnit::Tb.value() == 0 => {
74 format!("{}{}", tb / BytesUnit::Tb.value(), BytesUnit::Tb)
75 }
76 gb if gb % BytesUnit::Gb.value() == 0 => {
77 format!("{}{}", gb / BytesUnit::Gb.value(), BytesUnit::Gb)
78 }
79 mb if mb % BytesUnit::Mb.value() == 0 => {
80 format!("{}{}", mb / BytesUnit::Mb.value(), BytesUnit::Mb)
81 }
82 kb if kb % BytesUnit::Kb.value() == 0 => {
83 format!("{}{}", kb / BytesUnit::Kb.value(), BytesUnit::Kb)
84 }
85 b => format!("{}{}", b, BytesUnit::B),
86 }
87 }
88}
89
90impl Display for ByteSize {
91 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
92 f.pad(&self.format_string())
93 }
94}
95
96impl FromStr for ByteSize {
97 type Err = String;
98
99 fn from_str(value: &str) -> Result<Self, Self::Err> {
105 let number: String = value
106 .chars()
107 .take_while(|c| c.is_digit(10) || c == &'.')
108 .collect();
109
110 let suffix: String = value
111 .chars()
112 .skip_while(|c| c.is_whitespace() || c.is_digit(10) || c == &'.')
113 .collect();
114
115 let unit = if suffix.is_empty() {
116 BytesUnit::B
117 } else {
118 suffix
119 .parse::<BytesUnit>()
120 .map_err(|e| format!("couldn't parse {:?} into a known SI unit, {}. Valid units are B, kB, MB, GB, and TB", suffix, e))?
121 };
122
123 let (size, unit) = if let Ok(integer) = number.parse::<u64>() {
124 (integer, unit)
125 } else {
126 let num = number
127 .parse::<f64>()
128 .map_err(|e| format!("couldn't parse {} as a number, {}", number, e))?;
129
130 if num.trunc() == num {
132 let size = u64::cast_lossy(num);
133 (size, unit)
134 } else {
135 match unit {
136 BytesUnit::B => (u64::cast_lossy(num.round()), BytesUnit::B),
137 BytesUnit::Kb => (u64::cast_lossy((num * 1024.0).round()), BytesUnit::B),
138 BytesUnit::Mb => (u64::cast_lossy((num * 1024.0).round()), BytesUnit::Kb),
139 BytesUnit::Gb => (u64::cast_lossy((num * 1024.0).round()), BytesUnit::Mb),
140 BytesUnit::Tb => (u64::cast_lossy((num * 1024.0).round()), BytesUnit::Gb),
141 }
142 }
143 };
144
145 let bytes = size
146 .checked_mul(unit.value())
147 .ok_or_else(|| "bytes value exceeds u64 range".to_string())?;
148 Ok(Self(bytes))
149 }
150}
151
152#[derive(
154 Debug,
155 Clone,
156 PartialEq,
157 Eq,
158 Hash,
159 PartialOrd,
160 Ord,
161 Serialize,
162 Deserialize,
163 Default
164)]
165#[cfg_attr(any(test, feature = "proptest"), derive(Arbitrary))]
166pub enum BytesUnit {
167 #[default]
168 B,
169 Kb,
170 Mb,
171 Gb,
172 Tb,
173}
174
175impl BytesUnit {
176 const fn value(&self) -> u64 {
177 match &self {
178 BytesUnit::B => 1,
179 BytesUnit::Kb => 1_024,
180 BytesUnit::Mb => 1_048_576,
181 BytesUnit::Gb => 1_073_741_824,
182 BytesUnit::Tb => 1_099_511_627_776,
183 }
184 }
185}
186
187impl fmt::Display for BytesUnit {
188 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
189 f.write_str(match self {
190 BytesUnit::B => "B",
191 BytesUnit::Kb => "kB",
192 BytesUnit::Mb => "MB",
193 BytesUnit::Gb => "GB",
194 BytesUnit::Tb => "TB",
195 })
196 }
197}
198
199impl FromStr for BytesUnit {
200 type Err = String;
201
202 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
203 match s {
204 "B" => Ok(Self::B),
205 "kB" => Ok(Self::Kb),
206 "MB" => Ok(Self::Mb),
207 "GB" => Ok(Self::Gb),
208 "TB" => Ok(Self::Tb),
209 _ => Err(format!(
210 "invalid BytesUnit: {}. Valid units are B, kB, MB, GB, and TB",
211 s
212 )),
213 }
214 }
215}
216
217#[cfg(test)]
218mod tests {
219 use crate::bytes::ByteSize;
220 use mz_ore::assert_err;
221 use proptest::prelude::*;
222 use proptest::proptest;
223
224 #[mz_ore::test]
225 fn test_to_string() {
226 fn assert_to_string(expected: &str, b: ByteSize) {
227 assert_eq!(expected.to_string(), b.to_string());
228 }
229 assert_to_string("0", ByteSize::gb(0));
230 assert_to_string("1GB", ByteSize::mb(1024));
231 assert_to_string("215B", ByteSize::b(215));
232 assert_to_string("1kB", ByteSize::kb(1));
233 assert_to_string("301kB", ByteSize::kb(301));
234 assert_to_string("419MB", ByteSize::mb(419));
235 assert_to_string("518GB", ByteSize::gb(518));
236 assert_to_string("815TB", ByteSize::tb(815));
237 assert_to_string("10kB", ByteSize::b(10240));
238 assert_to_string("10MB", ByteSize::kb(10240));
239 assert_to_string("10GB", ByteSize::mb(10240));
240 assert_to_string("10TB", ByteSize::gb(10240));
241 assert_to_string("10240TB", ByteSize::tb(10240));
242 }
243
244 #[mz_ore::test]
245 fn test_parse() {
246 fn parse(s: &str) -> ByteSize {
248 s.parse::<ByteSize>().unwrap()
249 }
250
251 assert_eq!(parse("0"), ByteSize::b(0));
252 assert_eq!(parse("9.9"), ByteSize::b(10));
253 assert_eq!(parse("0B"), ByteSize::b(0));
254 assert_eq!(parse("0MB"), ByteSize::b(0));
255 assert_eq!(parse("500"), ByteSize::b(500));
256 assert_eq!(parse("1kB"), ByteSize::kb(1));
257 assert_eq!(parse("1.5kB"), ByteSize::b(1536));
258 assert_eq!(parse("1 kB"), ByteSize::kb(1));
259 assert_eq!(parse("3 MB"), ByteSize::mb(3));
260 assert_eq!(parse("6 GB"), ByteSize::gb(6));
261 assert_eq!(parse("4GB"), ByteSize::gb(4));
262 assert_eq!(parse("88TB"), ByteSize::tb(88));
263 assert_eq!(parse("521 TB"), ByteSize::tb(521));
264
265 assert_err!("".parse::<ByteSize>());
267 assert_err!("a124GB".parse::<ByteSize>());
268 assert_err!("1K".parse::<ByteSize>());
269 assert_err!("B".parse::<ByteSize>());
270 assert_err!("1gb".parse::<ByteSize>());
272 assert_err!("1KB".parse::<ByteSize>());
273 }
274
275 #[mz_ore::test]
276 fn test_rounding() {
277 fn parse(s: &str) -> ByteSize {
279 s.parse::<ByteSize>().unwrap()
280 }
281
282 fn assert_equivalent(v1: &str, v2: &str) {
283 assert_eq!(parse(v1), parse(v2))
284 }
285
286 assert_equivalent("0", "0");
287 assert_equivalent("0 TB", "0");
288 assert_equivalent("0kB", "0");
289 assert_equivalent("13.89", "14B");
290 assert_equivalent("500", "500B");
291 assert_equivalent("1073741824", "1GB");
292 assert_equivalent("1073741824.0", "1GB");
293 assert_equivalent("1073741824.1", "1GB");
294 assert_equivalent("1073741824.9", "1073741825B");
295 assert_equivalent("2147483648", "2GB");
296 assert_equivalent("3221225472", "3GB");
297 assert_equivalent("4294967296", "4GB");
298 assert_equivalent("4294967295", "4294967295B");
299 assert_equivalent("1024.1", "1kB");
300 assert_equivalent("1024.9", "1025B");
301 assert_equivalent("1024.1MB", "1048678kB");
302 assert_equivalent("1024.9MB", "1049498kB");
303 assert_equivalent("1.01B", "1B");
304 assert_equivalent("1.01kB", "1034B");
305 assert_equivalent("1.0kB", "1kB");
306 assert_equivalent("10240B", "10kB");
307 assert_equivalent("1.5kB", "1536B");
308 assert_equivalent("30.1GB", "30822MB");
309 assert_equivalent("30.1MB", "30822kB");
310 assert_equivalent("30.1TB", "30822GB");
311 assert_equivalent("39.9TB", "40858GB");
312 assert_equivalent("30.9B", "31B");
313 }
314
315 proptest! {
316 #[mz_ore::test]
317 fn proptest_bytes_roundtrips_string(og: ByteSize) {
318 let og_string = og.to_string();
323 let roundtrip = og_string.parse::<ByteSize>().expect("roundtrip").to_string();
324 prop_assert_eq!(og_string, roundtrip);
325 }
326 }
327}