Skip to main content

mz_repr/adt/
array.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! A multi-dimensional array data type.
11
12use std::cmp::Ordering;
13use std::error::Error;
14use std::{fmt, mem};
15
16use mz_lowertest::MzReflect;
17use mz_ore::cast::CastFrom;
18use mz_persist_types::columnar::FixedSizeCodec;
19use mz_proto::{RustType, TryFromProtoError};
20use proptest_derive::Arbitrary;
21use serde::{Deserialize, Serialize};
22
23use crate::Datum;
24use crate::row::DatumList;
25use crate::scalar::SqlScalarType;
26
27include!(concat!(env!("OUT_DIR"), "/mz_repr.adt.array.rs"));
28
29/// The maximum number of dimensions permitted in an array.
30pub const MAX_ARRAY_DIMENSIONS: u8 = 6;
31
32/// A variable-length multi-dimensional array.
33///
34/// The type parameter `T` represents the element type of the array. It is a
35/// phantom parameter propagated through the inner [`DatumList`] — the actual
36/// elements are stored as serialized bytes and `T` is not enforced at
37/// runtime. It is up to the caller to ensure `T` matches the actual element
38/// type. The default `T = Datum<'a>` means existing code that writes
39/// `Array<'a>` continues to work unchanged.
40#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
41pub struct Array<'a, T = Datum<'a>> {
42    /// The elements in the array.
43    pub(crate) elements: DatumList<'a, T>,
44    /// The dimensions of the array.
45    pub(crate) dims: ArrayDimensions<'a>,
46}
47
48impl<'a, T> Array<'a, T> {
49    /// Returns the dimensions of the array.
50    pub fn dims(&self) -> ArrayDimensions<'a> {
51        self.dims
52    }
53
54    /// Returns the elements of the array.
55    pub fn elements(&self) -> DatumList<'a, T> {
56        self.elements
57    }
58
59    /// Returns true if this array's dimensions are valid for the Int2Vector type.
60    /// Int2Vector is 1-D; empty arrays use 0 dimensions (PostgreSQL convention).
61    pub fn has_int2vector_dims(&self) -> bool {
62        self.dims().len() == 1
63            || (self.dims().len() == 0 && self.elements().iter().next().is_none())
64    }
65}
66
67impl<'a> crate::scalar::SqlContainerType for Array<'a, Datum<'a>> {
68    fn unwrap_element_type(container: &SqlScalarType) -> &SqlScalarType {
69        container.unwrap_array_element_type()
70    }
71    fn wrap_element_type(element: SqlScalarType) -> SqlScalarType {
72        SqlScalarType::Array(Box::new(element))
73    }
74}
75
76/// The dimensions of an [`Array`].
77#[derive(Clone, Copy, Eq, PartialEq, Hash)]
78pub struct ArrayDimensions<'a> {
79    pub(crate) data: &'a [u8],
80}
81
82impl Default for ArrayDimensions<'static> {
83    fn default() -> Self {
84        Self { data: &[] }
85    }
86}
87
88impl ArrayDimensions<'_> {
89    /// Returns the number of dimensions in the array as a [`u8`].
90    pub fn ndims(&self) -> u8 {
91        let ndims = self.data.len() / (mem::size_of::<usize>() * 2);
92        ndims.try_into().expect("ndims is known to fit in a u8")
93    }
94
95    /// Returns the number of the dimensions in the array as a [`usize`].
96    pub fn len(&self) -> usize {
97        self.ndims().into()
98    }
99
100    /// Reports whether the number of dimensions in the array is zero.
101    pub fn is_empty(&self) -> bool {
102        self.len() == 0
103    }
104}
105
106impl Ord for ArrayDimensions<'_> {
107    fn cmp(&self, other: &Self) -> Ordering {
108        self.ndims().cmp(&other.ndims())
109    }
110}
111
112impl PartialOrd for ArrayDimensions<'_> {
113    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
114        Some(self.cmp(other))
115    }
116}
117
118impl fmt::Debug for ArrayDimensions<'_> {
119    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
120        f.debug_list().entries(*self).finish()
121    }
122}
123
124impl<'a> IntoIterator for ArrayDimensions<'a> {
125    type Item = ArrayDimension;
126    type IntoIter = ArrayDimensionsIter<'a>;
127
128    fn into_iter(self) -> ArrayDimensionsIter<'a> {
129        ArrayDimensionsIter { data: self.data }
130    }
131}
132
133/// An iterator over the dimensions in an [`ArrayDimensions`].
134#[derive(Debug)]
135pub struct ArrayDimensionsIter<'a> {
136    data: &'a [u8],
137}
138
139impl Iterator for ArrayDimensionsIter<'_> {
140    type Item = ArrayDimension;
141
142    fn next(&mut self) -> Option<ArrayDimension> {
143        if self.data.is_empty() {
144            None
145        } else {
146            let sz = mem::size_of::<usize>();
147            let lower_bound = isize::from_ne_bytes(self.data[..sz].try_into().unwrap());
148            self.data = &self.data[sz..];
149            let length = usize::from_ne_bytes(self.data[..sz].try_into().unwrap());
150            self.data = &self.data[sz..];
151            Some(ArrayDimension {
152                lower_bound,
153                length,
154            })
155        }
156    }
157}
158
159/// The specification of one dimension of an [`Array`].
160#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
161pub struct ArrayDimension {
162    /// The "logical" index at which this dimension begins. This value has no bearing on the
163    /// physical layout of the data, only how users want to use its indices (which may be negative).
164    pub lower_bound: isize,
165    /// The number of elements in this array.
166    pub length: usize,
167}
168
169impl ArrayDimension {
170    /// Presents the "logical indices" of the array, i.e. those that are revealed to the user.
171    ///
172    /// # Panics
173    /// - If the array contain more than [`isize::MAX`] elements (i.e. more than 9EB of data).
174    pub fn dimension_bounds(&self) -> (isize, isize) {
175        (
176            self.lower_bound,
177            self.lower_bound
178                + isize::try_from(self.length).expect("fewer than isize::MAX elements")
179                - 1,
180        )
181    }
182}
183
184/// An error that can occur when constructing an array.
185#[derive(
186    Arbitrary,
187    Clone,
188    Copy,
189    Debug,
190    Eq,
191    PartialEq,
192    Hash,
193    Ord,
194    PartialOrd,
195    Serialize,
196    Deserialize,
197    MzReflect
198)]
199pub enum InvalidArrayError {
200    /// The number of dimensions in the array exceeds [`MAX_ARRAY_DIMENSIONS]`.
201    TooManyDimensions(usize),
202    /// The number of array elements does not match the cardinality derived from
203    /// its dimensions.
204    WrongCardinality { actual: usize, expected: usize },
205}
206
207impl fmt::Display for InvalidArrayError {
208    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
209        match self {
210            InvalidArrayError::TooManyDimensions(n) => write!(
211                f,
212                "number of array dimensions ({}) exceeds the maximum allowed ({})",
213                n, MAX_ARRAY_DIMENSIONS
214            ),
215            InvalidArrayError::WrongCardinality { actual, expected } => write!(
216                f,
217                "number of array elements ({}) does not match declared cardinality ({})",
218                actual, expected
219            ),
220        }
221    }
222}
223
224impl Error for InvalidArrayError {
225    fn source(&self) -> Option<&(dyn Error + 'static)> {
226        None
227    }
228}
229
230impl RustType<ProtoInvalidArrayError> for InvalidArrayError {
231    fn into_proto(&self) -> ProtoInvalidArrayError {
232        use Kind::*;
233        use proto_invalid_array_error::*;
234        let kind = match self {
235            InvalidArrayError::TooManyDimensions(dims) => TooManyDimensions(dims.into_proto()),
236            InvalidArrayError::WrongCardinality { actual, expected } => {
237                WrongCardinality(ProtoWrongCardinality {
238                    actual: actual.into_proto(),
239                    expected: expected.into_proto(),
240                })
241            }
242        };
243        ProtoInvalidArrayError { kind: Some(kind) }
244    }
245
246    fn from_proto(proto: ProtoInvalidArrayError) -> Result<Self, TryFromProtoError> {
247        use proto_invalid_array_error::Kind::*;
248        match proto.kind {
249            Some(kind) => match kind {
250                TooManyDimensions(dims) => Ok(InvalidArrayError::TooManyDimensions(
251                    usize::from_proto(dims)?,
252                )),
253                WrongCardinality(v) => Ok(InvalidArrayError::WrongCardinality {
254                    actual: usize::from_proto(v.actual)?,
255                    expected: usize::from_proto(v.expected)?,
256                }),
257            },
258            None => Err(TryFromProtoError::missing_field(
259                "`ProtoInvalidArrayError::kind`",
260            )),
261        }
262    }
263}
264
265/// An encoded packed variant of [`ArrayDimension`].
266///
267/// We uphold the variant that [`PackedArrayDimension`] sorts the same as
268/// [`ArrayDimension`].
269#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
270pub struct PackedArrayDimension([u8; Self::SIZE]);
271
272// `as` conversions are okay here because we're doing bit level logic to make
273// sure the sort order of the packed binary is correct. This is implementation
274// is proptest-ed below.
275#[allow(clippy::as_conversions)]
276impl FixedSizeCodec<ArrayDimension> for PackedArrayDimension {
277    const SIZE: usize = 16;
278
279    fn as_bytes(&self) -> &[u8] {
280        &self.0
281    }
282
283    fn from_bytes(slice: &[u8]) -> Result<Self, String> {
284        let buf: [u8; Self::SIZE] = slice.try_into().map_err(|_| {
285            format!(
286                "size for PackedArrayDimension is {} bytes, got {}",
287                Self::SIZE,
288                slice.len()
289            )
290        })?;
291        Ok(PackedArrayDimension(buf))
292    }
293
294    fn from_value(value: ArrayDimension) -> Self {
295        let mut buf = [0; 16];
296
297        let lower_bound = (i64::cast_from(value.lower_bound) as u64) ^ (0x8000_0000_0000_0000u64);
298        buf[..8].copy_from_slice(&lower_bound.to_be_bytes());
299        let length = u64::cast_from(value.length);
300        buf[8..].copy_from_slice(&length.to_be_bytes());
301
302        PackedArrayDimension(buf)
303    }
304
305    fn into_value(self) -> ArrayDimension {
306        let mut lower_bound: [u8; 8] = self.0[..8].try_into().unwrap();
307        lower_bound.copy_from_slice(&self.0[..8]);
308        let lower_bound = u64::from_be_bytes(lower_bound) ^ 0x8000_0000_0000_0000u64;
309
310        let mut length: [u8; 8] = self.0[8..].try_into().unwrap();
311        length.copy_from_slice(&self.0[8..]);
312        let length = u64::from_be_bytes(length);
313
314        ArrayDimension {
315            lower_bound: isize::cast_from(lower_bound as i64),
316            length: usize::cast_from(length),
317        }
318    }
319}
320
321#[cfg(test)]
322mod tests {
323    use std::iter::empty;
324
325    use mz_ore::assert_ok;
326    use mz_proto::protobuf_roundtrip;
327    use proptest::prelude::*;
328
329    use crate::Datum;
330    use crate::row::Row;
331
332    use super::*;
333
334    #[mz_ore::test]
335    fn test_has_int2vector_dims() {
336        // 1-D array with elements: valid for int2vector
337        let mut row = Row::default();
338        row.packer()
339            .try_push_array(
340                &[ArrayDimension {
341                    lower_bound: 1,
342                    length: 2,
343                }],
344                [Datum::Int16(1), Datum::Int16(2)],
345            )
346            .unwrap();
347        let arr = row.unpack_first().unwrap_array();
348        assert!(
349            arr.has_int2vector_dims(),
350            "1-D array should have int2vector dims"
351        );
352
353        // 1-D empty array (length 0): valid
354        let mut row = Row::default();
355        row.packer()
356            .try_push_array(
357                &[ArrayDimension {
358                    lower_bound: 1,
359                    length: 0,
360                }],
361                empty::<Datum>(),
362            )
363            .unwrap();
364        let arr = row.unpack_first().unwrap_array();
365        assert!(
366            arr.has_int2vector_dims(),
367            "1-D empty array should have int2vector dims"
368        );
369
370        // 0-D empty array (PostgreSQL convention for empty): valid
371        let mut row = Row::default();
372        row.packer().try_push_array(&[], empty::<Datum>()).unwrap();
373        let arr = row.unpack_first().unwrap_array();
374        assert!(
375            arr.has_int2vector_dims(),
376            "0-D empty array should have int2vector dims"
377        );
378
379        // 2-D array: invalid for int2vector
380        let mut row = Row::default();
381        row.packer()
382            .try_push_array(
383                &[
384                    ArrayDimension {
385                        lower_bound: 1,
386                        length: 1,
387                    },
388                    ArrayDimension {
389                        lower_bound: 1,
390                        length: 2,
391                    },
392                ],
393                [Datum::Int16(1), Datum::Int16(2)],
394            )
395            .unwrap();
396        let arr = row.unpack_first().unwrap_array();
397        assert!(
398            !arr.has_int2vector_dims(),
399            "2-D array should not have int2vector dims"
400        );
401    }
402
403    proptest! {
404        #[mz_ore::test]
405        fn invalid_array_error_protobuf_roundtrip(expect in any::<InvalidArrayError>()) {
406            let actual = protobuf_roundtrip::<_, ProtoInvalidArrayError>(&expect);
407            assert_ok!(actual);
408            assert_eq!(actual.unwrap(), expect);
409        }
410    }
411
412    fn arb_array_dimension() -> impl Strategy<Value = ArrayDimension> {
413        (any::<isize>(), any::<usize>()).prop_map(|(lower, length)| ArrayDimension {
414            lower_bound: lower,
415            length,
416        })
417    }
418
419    #[mz_ore::test]
420    fn proptest_packed_array_dimension_roundtrip() {
421        fn test(og: ArrayDimension) {
422            let packed = PackedArrayDimension::from_value(og);
423            let rnd = packed.into_value();
424            assert_eq!(og, rnd);
425        }
426
427        proptest!(|(dim in arb_array_dimension())| test(dim))
428    }
429
430    #[mz_ore::test]
431    fn proptest_packed_array_dimension_sorts() {
432        fn test(mut og: Vec<ArrayDimension>) {
433            let mut packed: Vec<_> = og
434                .iter()
435                .copied()
436                .map(PackedArrayDimension::from_value)
437                .collect();
438
439            packed.sort();
440            og.sort();
441
442            let rnd: Vec<_> = packed
443                .into_iter()
444                .map(PackedArrayDimension::into_value)
445                .collect();
446            assert_eq!(og, rnd);
447        }
448
449        let strat = proptest::collection::vec(arb_array_dimension(), 0..16);
450        proptest!(|(dim in strat)| test(dim))
451    }
452}