mz_repr/adt/
array.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! A multi-dimensional array data type.
11
12use std::cmp::Ordering;
13use std::error::Error;
14use std::{fmt, mem};
15
16use mz_lowertest::MzReflect;
17use mz_ore::cast::CastFrom;
18use mz_persist_types::columnar::FixedSizeCodec;
19use mz_proto::{RustType, TryFromProtoError};
20use proptest_derive::Arbitrary;
21use serde::{Deserialize, Serialize};
22
23use crate::row::DatumList;
24
25include!(concat!(env!("OUT_DIR"), "/mz_repr.adt.array.rs"));
26
27/// The maximum number of dimensions permitted in an array.
28pub const MAX_ARRAY_DIMENSIONS: u8 = 6;
29
30/// A variable-length multi-dimensional array.
31#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
32pub struct Array<'a> {
33    /// The elements in the array.
34    pub(crate) elements: DatumList<'a>,
35    /// The dimensions of the array.
36    pub(crate) dims: ArrayDimensions<'a>,
37}
38
39impl<'a> Array<'a> {
40    /// Returns the dimensions of the array.
41    pub fn dims(&self) -> ArrayDimensions<'a> {
42        self.dims
43    }
44
45    /// Returns the elements of the array.
46    pub fn elements(&self) -> DatumList<'a> {
47        self.elements
48    }
49}
50
51/// The dimensions of an [`Array`].
52#[derive(Clone, Copy, Eq, PartialEq, Hash)]
53pub struct ArrayDimensions<'a> {
54    pub(crate) data: &'a [u8],
55}
56
57impl ArrayDimensions<'_> {
58    /// Returns the number of dimensions in the array as a [`u8`].
59    pub fn ndims(&self) -> u8 {
60        let ndims = self.data.len() / (mem::size_of::<usize>() * 2);
61        ndims.try_into().expect("ndims is known to fit in a u8")
62    }
63
64    /// Returns the number of the dimensions in the array as a [`usize`].
65    pub fn len(&self) -> usize {
66        self.ndims().into()
67    }
68
69    /// Reports whether the number of dimensions in the array is zero.
70    pub fn is_empty(&self) -> bool {
71        self.len() == 0
72    }
73}
74
75impl Ord for ArrayDimensions<'_> {
76    fn cmp(&self, other: &Self) -> Ordering {
77        self.ndims().cmp(&other.ndims())
78    }
79}
80
81impl PartialOrd for ArrayDimensions<'_> {
82    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
83        Some(self.cmp(other))
84    }
85}
86
87impl fmt::Debug for ArrayDimensions<'_> {
88    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
89        f.debug_list().entries(*self).finish()
90    }
91}
92
93impl<'a> IntoIterator for ArrayDimensions<'a> {
94    type Item = ArrayDimension;
95    type IntoIter = ArrayDimensionsIter<'a>;
96
97    fn into_iter(self) -> ArrayDimensionsIter<'a> {
98        ArrayDimensionsIter { data: self.data }
99    }
100}
101
102/// An iterator over the dimensions in an [`ArrayDimensions`].
103#[derive(Debug)]
104pub struct ArrayDimensionsIter<'a> {
105    data: &'a [u8],
106}
107
108impl Iterator for ArrayDimensionsIter<'_> {
109    type Item = ArrayDimension;
110
111    fn next(&mut self) -> Option<ArrayDimension> {
112        if self.data.is_empty() {
113            None
114        } else {
115            let sz = mem::size_of::<usize>();
116            let lower_bound = isize::from_ne_bytes(self.data[..sz].try_into().unwrap());
117            self.data = &self.data[sz..];
118            let length = usize::from_ne_bytes(self.data[..sz].try_into().unwrap());
119            self.data = &self.data[sz..];
120            Some(ArrayDimension {
121                lower_bound,
122                length,
123            })
124        }
125    }
126}
127
128/// The specification of one dimension of an [`Array`].
129#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
130pub struct ArrayDimension {
131    /// The "logical" index at which this dimension begins. This value has no bearing on the
132    /// physical layout of the data, only how users want to use its indices (which may be negative).
133    pub lower_bound: isize,
134    /// The number of elements in this array.
135    pub length: usize,
136}
137
138impl ArrayDimension {
139    /// Presents the "logical indices" of the array, i.e. those that are revealed to the user.
140    ///
141    /// # Panics
142    /// - If the array contain more than [`isize::MAX`] elements (i.e. more than 9EB of data).
143    pub fn dimension_bounds(&self) -> (isize, isize) {
144        (
145            self.lower_bound,
146            self.lower_bound
147                + isize::try_from(self.length).expect("fewer than isize::MAX elements")
148                - 1,
149        )
150    }
151}
152
153/// An error that can occur when constructing an array.
154#[derive(
155    Arbitrary,
156    Clone,
157    Copy,
158    Debug,
159    Eq,
160    PartialEq,
161    Hash,
162    Ord,
163    PartialOrd,
164    Serialize,
165    Deserialize,
166    MzReflect,
167)]
168pub enum InvalidArrayError {
169    /// The number of dimensions in the array exceeds [`MAX_ARRAY_DIMENSIONS]`.
170    TooManyDimensions(usize),
171    /// The number of array elements does not match the cardinality derived from
172    /// its dimensions.
173    WrongCardinality { actual: usize, expected: usize },
174}
175
176impl fmt::Display for InvalidArrayError {
177    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
178        match self {
179            InvalidArrayError::TooManyDimensions(n) => write!(
180                f,
181                "number of array dimensions ({}) exceeds the maximum allowed ({})",
182                n, MAX_ARRAY_DIMENSIONS
183            ),
184            InvalidArrayError::WrongCardinality { actual, expected } => write!(
185                f,
186                "number of array elements ({}) does not match declared cardinality ({})",
187                actual, expected
188            ),
189        }
190    }
191}
192
193impl Error for InvalidArrayError {
194    fn source(&self) -> Option<&(dyn Error + 'static)> {
195        None
196    }
197}
198
199impl RustType<ProtoInvalidArrayError> for InvalidArrayError {
200    fn into_proto(&self) -> ProtoInvalidArrayError {
201        use Kind::*;
202        use proto_invalid_array_error::*;
203        let kind = match self {
204            InvalidArrayError::TooManyDimensions(dims) => TooManyDimensions(dims.into_proto()),
205            InvalidArrayError::WrongCardinality { actual, expected } => {
206                WrongCardinality(ProtoWrongCardinality {
207                    actual: actual.into_proto(),
208                    expected: expected.into_proto(),
209                })
210            }
211        };
212        ProtoInvalidArrayError { kind: Some(kind) }
213    }
214
215    fn from_proto(proto: ProtoInvalidArrayError) -> Result<Self, TryFromProtoError> {
216        use proto_invalid_array_error::Kind::*;
217        match proto.kind {
218            Some(kind) => match kind {
219                TooManyDimensions(dims) => Ok(InvalidArrayError::TooManyDimensions(
220                    usize::from_proto(dims)?,
221                )),
222                WrongCardinality(v) => Ok(InvalidArrayError::WrongCardinality {
223                    actual: usize::from_proto(v.actual)?,
224                    expected: usize::from_proto(v.expected)?,
225                }),
226            },
227            None => Err(TryFromProtoError::missing_field(
228                "`ProtoInvalidArrayError::kind`",
229            )),
230        }
231    }
232}
233
234/// An encoded packed variant of [`ArrayDimension`].
235///
236/// We uphold the variant that [`PackedArrayDimension`] sorts the same as
237/// [`ArrayDimension`].
238#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
239pub struct PackedArrayDimension([u8; Self::SIZE]);
240
241// `as` conversions are okay here because we're doing bit level logic to make
242// sure the sort order of the packed binary is correct. This is implementation
243// is proptest-ed below.
244#[allow(clippy::as_conversions)]
245impl FixedSizeCodec<ArrayDimension> for PackedArrayDimension {
246    const SIZE: usize = 16;
247
248    fn as_bytes(&self) -> &[u8] {
249        &self.0
250    }
251
252    fn from_bytes(slice: &[u8]) -> Result<Self, String> {
253        let buf: [u8; Self::SIZE] = slice.try_into().map_err(|_| {
254            format!(
255                "size for PackedArrayDimension is {} bytes, got {}",
256                Self::SIZE,
257                slice.len()
258            )
259        })?;
260        Ok(PackedArrayDimension(buf))
261    }
262
263    fn from_value(value: ArrayDimension) -> Self {
264        let mut buf = [0; 16];
265
266        let lower_bound = (i64::cast_from(value.lower_bound) as u64) ^ (0x8000_0000_0000_0000u64);
267        buf[..8].copy_from_slice(&lower_bound.to_be_bytes());
268        let length = u64::cast_from(value.length);
269        buf[8..].copy_from_slice(&length.to_be_bytes());
270
271        PackedArrayDimension(buf)
272    }
273
274    fn into_value(self) -> ArrayDimension {
275        let mut lower_bound: [u8; 8] = self.0[..8].try_into().unwrap();
276        lower_bound.copy_from_slice(&self.0[..8]);
277        let lower_bound = u64::from_be_bytes(lower_bound) ^ 0x8000_0000_0000_0000u64;
278
279        let mut length: [u8; 8] = self.0[8..].try_into().unwrap();
280        length.copy_from_slice(&self.0[8..]);
281        let length = u64::from_be_bytes(length);
282
283        ArrayDimension {
284            lower_bound: isize::cast_from(lower_bound as i64),
285            length: usize::cast_from(length),
286        }
287    }
288}
289
290#[cfg(test)]
291mod tests {
292    use mz_ore::assert_ok;
293    use mz_proto::protobuf_roundtrip;
294    use proptest::prelude::*;
295
296    use super::*;
297
298    proptest! {
299        #[mz_ore::test]
300        fn invalid_array_error_protobuf_roundtrip(expect in any::<InvalidArrayError>()) {
301            let actual = protobuf_roundtrip::<_, ProtoInvalidArrayError>(&expect);
302            assert_ok!(actual);
303            assert_eq!(actual.unwrap(), expect);
304        }
305    }
306
307    fn arb_array_dimension() -> impl Strategy<Value = ArrayDimension> {
308        (any::<isize>(), any::<usize>()).prop_map(|(lower, length)| ArrayDimension {
309            lower_bound: lower,
310            length,
311        })
312    }
313
314    #[mz_ore::test]
315    fn proptest_packed_array_dimension_roundtrip() {
316        fn test(og: ArrayDimension) {
317            let packed = PackedArrayDimension::from_value(og);
318            let rnd = packed.into_value();
319            assert_eq!(og, rnd);
320        }
321
322        proptest!(|(dim in arb_array_dimension())| test(dim))
323    }
324
325    #[mz_ore::test]
326    fn proptest_packed_array_dimension_sorts() {
327        fn test(mut og: Vec<ArrayDimension>) {
328            let mut packed: Vec<_> = og
329                .iter()
330                .copied()
331                .map(PackedArrayDimension::from_value)
332                .collect();
333
334            packed.sort();
335            og.sort();
336
337            let rnd: Vec<_> = packed
338                .into_iter()
339                .map(PackedArrayDimension::into_value)
340                .collect();
341            assert_eq!(og, rnd);
342        }
343
344        let strat = proptest::collection::vec(arb_array_dimension(), 0..16);
345        proptest!(|(dim in strat)| test(dim))
346    }
347}