Skip to main content

mz_repr/adt/
array.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! A multi-dimensional array data type.
11
12use std::cmp::Ordering;
13use std::error::Error;
14use std::{fmt, mem};
15
16use mz_lowertest::MzReflect;
17use mz_ore::cast::CastFrom;
18use mz_persist_types::columnar::FixedSizeCodec;
19use mz_proto::{RustType, TryFromProtoError};
20#[cfg(any(test, feature = "proptest"))]
21use proptest_derive::Arbitrary;
22use serde::{Deserialize, Serialize};
23
24use crate::Datum;
25use crate::row::DatumList;
26use crate::scalar::SqlScalarType;
27
28include!(concat!(env!("OUT_DIR"), "/mz_repr.adt.array.rs"));
29
30/// The maximum number of dimensions permitted in an array.
31pub const MAX_ARRAY_DIMENSIONS: u8 = 6;
32
33/// A variable-length multi-dimensional array.
34///
35/// The type parameter `T` represents the element type of the array. It is a
36/// phantom parameter propagated through the inner [`DatumList`] — the actual
37/// elements are stored as serialized bytes and `T` is not enforced at
38/// runtime. It is up to the caller to ensure `T` matches the actual element
39/// type. The default `T = Datum<'a>` means existing code that writes
40/// `Array<'a>` continues to work unchanged.
41#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
42pub struct Array<'a, T = Datum<'a>> {
43    /// The elements in the array.
44    pub(crate) elements: DatumList<'a, T>,
45    /// The dimensions of the array.
46    pub(crate) dims: ArrayDimensions<'a>,
47}
48
49impl<'a, T> Array<'a, T> {
50    /// Returns the dimensions of the array.
51    pub fn dims(&self) -> ArrayDimensions<'a> {
52        self.dims
53    }
54
55    /// Returns the elements of the array.
56    pub fn elements(&self) -> DatumList<'a, T> {
57        self.elements
58    }
59
60    /// Returns true if this array's dimensions are valid for the Int2Vector type.
61    /// Int2Vector is 1-D; empty arrays use 0 dimensions (PostgreSQL convention).
62    pub fn has_int2vector_dims(&self) -> bool {
63        self.dims().len() == 1
64            || (self.dims().len() == 0 && self.elements().iter().next().is_none())
65    }
66}
67
68impl<'a> crate::scalar::SqlContainerType for Array<'a, Datum<'a>> {
69    fn unwrap_element_type(container: &SqlScalarType) -> &SqlScalarType {
70        container.unwrap_array_element_type()
71    }
72    fn wrap_element_type(element: SqlScalarType) -> SqlScalarType {
73        SqlScalarType::Array(Box::new(element))
74    }
75}
76
77/// The dimensions of an [`Array`].
78#[derive(Clone, Copy, Eq, PartialEq, Hash)]
79pub struct ArrayDimensions<'a> {
80    pub(crate) data: &'a [u8],
81}
82
83impl Default for ArrayDimensions<'static> {
84    fn default() -> Self {
85        Self { data: &[] }
86    }
87}
88
89impl ArrayDimensions<'_> {
90    /// Returns the number of dimensions in the array as a [`u8`].
91    pub fn ndims(&self) -> u8 {
92        let ndims = self.data.len() / (mem::size_of::<usize>() * 2);
93        ndims.try_into().expect("ndims is known to fit in a u8")
94    }
95
96    /// Returns the number of the dimensions in the array as a [`usize`].
97    pub fn len(&self) -> usize {
98        self.ndims().into()
99    }
100
101    /// Reports whether the number of dimensions in the array is zero.
102    pub fn is_empty(&self) -> bool {
103        self.len() == 0
104    }
105}
106
107impl Ord for ArrayDimensions<'_> {
108    fn cmp(&self, other: &Self) -> Ordering {
109        self.ndims().cmp(&other.ndims())
110    }
111}
112
113impl PartialOrd for ArrayDimensions<'_> {
114    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
115        Some(self.cmp(other))
116    }
117}
118
119impl fmt::Debug for ArrayDimensions<'_> {
120    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
121        f.debug_list().entries(*self).finish()
122    }
123}
124
125impl<'a> IntoIterator for ArrayDimensions<'a> {
126    type Item = ArrayDimension;
127    type IntoIter = ArrayDimensionsIter<'a>;
128
129    fn into_iter(self) -> ArrayDimensionsIter<'a> {
130        ArrayDimensionsIter { data: self.data }
131    }
132}
133
134/// An iterator over the dimensions in an [`ArrayDimensions`].
135#[derive(Debug)]
136pub struct ArrayDimensionsIter<'a> {
137    data: &'a [u8],
138}
139
140impl Iterator for ArrayDimensionsIter<'_> {
141    type Item = ArrayDimension;
142
143    fn next(&mut self) -> Option<ArrayDimension> {
144        if self.data.is_empty() {
145            None
146        } else {
147            let sz = mem::size_of::<usize>();
148            let lower_bound = isize::from_ne_bytes(self.data[..sz].try_into().unwrap());
149            self.data = &self.data[sz..];
150            let length = usize::from_ne_bytes(self.data[..sz].try_into().unwrap());
151            self.data = &self.data[sz..];
152            Some(ArrayDimension {
153                lower_bound,
154                length,
155            })
156        }
157    }
158}
159
160/// The specification of one dimension of an [`Array`].
161#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
162pub struct ArrayDimension {
163    /// The "logical" index at which this dimension begins. This value has no bearing on the
164    /// physical layout of the data, only how users want to use its indices (which may be negative).
165    pub lower_bound: isize,
166    /// The number of elements in this array.
167    pub length: usize,
168}
169
170impl ArrayDimension {
171    /// Presents the "logical indices" of the array, i.e. those that are revealed to the user.
172    ///
173    /// # Panics
174    /// - If the array contain more than [`isize::MAX`] elements (i.e. more than 9EB of data).
175    pub fn dimension_bounds(&self) -> (isize, isize) {
176        (
177            self.lower_bound,
178            self.lower_bound
179                + isize::try_from(self.length).expect("fewer than isize::MAX elements")
180                - 1,
181        )
182    }
183}
184
185/// An error that can occur when constructing an array.
186#[derive(
187    Clone,
188    Copy,
189    Debug,
190    Eq,
191    PartialEq,
192    Hash,
193    Ord,
194    PartialOrd,
195    Serialize,
196    Deserialize,
197    MzReflect
198)]
199#[cfg_attr(any(test, feature = "proptest"), derive(Arbitrary))]
200pub enum InvalidArrayError {
201    /// The number of dimensions in the array exceeds [`MAX_ARRAY_DIMENSIONS]`.
202    TooManyDimensions(usize),
203    /// The number of array elements does not match the cardinality derived from
204    /// its dimensions.
205    WrongCardinality { actual: usize, expected: usize },
206}
207
208impl fmt::Display for InvalidArrayError {
209    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
210        match self {
211            InvalidArrayError::TooManyDimensions(n) => write!(
212                f,
213                "number of array dimensions ({}) exceeds the maximum allowed ({})",
214                n, MAX_ARRAY_DIMENSIONS
215            ),
216            InvalidArrayError::WrongCardinality { actual, expected } => write!(
217                f,
218                "number of array elements ({}) does not match declared cardinality ({})",
219                actual, expected
220            ),
221        }
222    }
223}
224
225impl Error for InvalidArrayError {
226    fn source(&self) -> Option<&(dyn Error + 'static)> {
227        None
228    }
229}
230
231impl RustType<ProtoInvalidArrayError> for InvalidArrayError {
232    fn into_proto(&self) -> ProtoInvalidArrayError {
233        use Kind::*;
234        use proto_invalid_array_error::*;
235        let kind = match self {
236            InvalidArrayError::TooManyDimensions(dims) => TooManyDimensions(dims.into_proto()),
237            InvalidArrayError::WrongCardinality { actual, expected } => {
238                WrongCardinality(ProtoWrongCardinality {
239                    actual: actual.into_proto(),
240                    expected: expected.into_proto(),
241                })
242            }
243        };
244        ProtoInvalidArrayError { kind: Some(kind) }
245    }
246
247    fn from_proto(proto: ProtoInvalidArrayError) -> Result<Self, TryFromProtoError> {
248        use proto_invalid_array_error::Kind::*;
249        match proto.kind {
250            Some(kind) => match kind {
251                TooManyDimensions(dims) => Ok(InvalidArrayError::TooManyDimensions(
252                    usize::from_proto(dims)?,
253                )),
254                WrongCardinality(v) => Ok(InvalidArrayError::WrongCardinality {
255                    actual: usize::from_proto(v.actual)?,
256                    expected: usize::from_proto(v.expected)?,
257                }),
258            },
259            None => Err(TryFromProtoError::missing_field(
260                "`ProtoInvalidArrayError::kind`",
261            )),
262        }
263    }
264}
265
266/// An encoded packed variant of [`ArrayDimension`].
267///
268/// We uphold the variant that [`PackedArrayDimension`] sorts the same as
269/// [`ArrayDimension`].
270#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
271pub struct PackedArrayDimension([u8; Self::SIZE]);
272
273// `as` conversions are okay here because we're doing bit level logic to make
274// sure the sort order of the packed binary is correct. This is implementation
275// is proptest-ed below.
276#[allow(clippy::as_conversions)]
277impl FixedSizeCodec<ArrayDimension> for PackedArrayDimension {
278    const SIZE: usize = 16;
279
280    fn as_bytes(&self) -> &[u8] {
281        &self.0
282    }
283
284    fn from_bytes(slice: &[u8]) -> Result<Self, String> {
285        let buf: [u8; Self::SIZE] = slice.try_into().map_err(|_| {
286            format!(
287                "size for PackedArrayDimension is {} bytes, got {}",
288                Self::SIZE,
289                slice.len()
290            )
291        })?;
292        Ok(PackedArrayDimension(buf))
293    }
294
295    fn from_value(value: ArrayDimension) -> Self {
296        let mut buf = [0; 16];
297
298        let lower_bound = (i64::cast_from(value.lower_bound) as u64) ^ (0x8000_0000_0000_0000u64);
299        buf[..8].copy_from_slice(&lower_bound.to_be_bytes());
300        let length = u64::cast_from(value.length);
301        buf[8..].copy_from_slice(&length.to_be_bytes());
302
303        PackedArrayDimension(buf)
304    }
305
306    fn into_value(self) -> ArrayDimension {
307        let mut lower_bound: [u8; 8] = self.0[..8].try_into().unwrap();
308        lower_bound.copy_from_slice(&self.0[..8]);
309        let lower_bound = u64::from_be_bytes(lower_bound) ^ 0x8000_0000_0000_0000u64;
310
311        let mut length: [u8; 8] = self.0[8..].try_into().unwrap();
312        length.copy_from_slice(&self.0[8..]);
313        let length = u64::from_be_bytes(length);
314
315        ArrayDimension {
316            lower_bound: isize::cast_from(lower_bound as i64),
317            length: usize::cast_from(length),
318        }
319    }
320}
321
322#[cfg(test)]
323mod tests {
324    use std::iter::empty;
325
326    use mz_ore::assert_ok;
327    use mz_proto::protobuf_roundtrip;
328    use proptest::prelude::*;
329
330    use crate::Datum;
331    use crate::row::Row;
332
333    use super::*;
334
335    #[mz_ore::test]
336    fn test_has_int2vector_dims() {
337        // 1-D array with elements: valid for int2vector
338        let mut row = Row::default();
339        row.packer()
340            .try_push_array(
341                &[ArrayDimension {
342                    lower_bound: 1,
343                    length: 2,
344                }],
345                [Datum::Int16(1), Datum::Int16(2)],
346            )
347            .unwrap();
348        let arr = row.unpack_first().unwrap_array();
349        assert!(
350            arr.has_int2vector_dims(),
351            "1-D array should have int2vector dims"
352        );
353
354        // 1-D empty array (length 0): valid
355        let mut row = Row::default();
356        row.packer()
357            .try_push_array(
358                &[ArrayDimension {
359                    lower_bound: 1,
360                    length: 0,
361                }],
362                empty::<Datum>(),
363            )
364            .unwrap();
365        let arr = row.unpack_first().unwrap_array();
366        assert!(
367            arr.has_int2vector_dims(),
368            "1-D empty array should have int2vector dims"
369        );
370
371        // 0-D empty array (PostgreSQL convention for empty): valid
372        let mut row = Row::default();
373        row.packer().try_push_array(&[], empty::<Datum>()).unwrap();
374        let arr = row.unpack_first().unwrap_array();
375        assert!(
376            arr.has_int2vector_dims(),
377            "0-D empty array should have int2vector dims"
378        );
379
380        // 2-D array: invalid for int2vector
381        let mut row = Row::default();
382        row.packer()
383            .try_push_array(
384                &[
385                    ArrayDimension {
386                        lower_bound: 1,
387                        length: 1,
388                    },
389                    ArrayDimension {
390                        lower_bound: 1,
391                        length: 2,
392                    },
393                ],
394                [Datum::Int16(1), Datum::Int16(2)],
395            )
396            .unwrap();
397        let arr = row.unpack_first().unwrap_array();
398        assert!(
399            !arr.has_int2vector_dims(),
400            "2-D array should not have int2vector dims"
401        );
402    }
403
404    proptest! {
405        #[mz_ore::test]
406        fn invalid_array_error_protobuf_roundtrip(expect in any::<InvalidArrayError>()) {
407            let actual = protobuf_roundtrip::<_, ProtoInvalidArrayError>(&expect);
408            assert_ok!(actual);
409            assert_eq!(actual.unwrap(), expect);
410        }
411    }
412
413    fn arb_array_dimension() -> impl Strategy<Value = ArrayDimension> {
414        (any::<isize>(), any::<usize>()).prop_map(|(lower, length)| ArrayDimension {
415            lower_bound: lower,
416            length,
417        })
418    }
419
420    #[mz_ore::test]
421    fn proptest_packed_array_dimension_roundtrip() {
422        fn test(og: ArrayDimension) {
423            let packed = PackedArrayDimension::from_value(og);
424            let rnd = packed.into_value();
425            assert_eq!(og, rnd);
426        }
427
428        proptest!(|(dim in arb_array_dimension())| test(dim))
429    }
430
431    #[mz_ore::test]
432    fn proptest_packed_array_dimension_sorts() {
433        fn test(mut og: Vec<ArrayDimension>) {
434            let mut packed: Vec<_> = og
435                .iter()
436                .copied()
437                .map(PackedArrayDimension::from_value)
438                .collect();
439
440            packed.sort();
441            og.sort();
442
443            let rnd: Vec<_> = packed
444                .into_iter()
445                .map(PackedArrayDimension::into_value)
446                .collect();
447            assert_eq!(og, rnd);
448        }
449
450        let strat = proptest::collection::vec(arb_array_dimension(), 0..16);
451        proptest!(|(dim in strat)| test(dim))
452    }
453}