Skip to main content

mz_repr/adt/
array.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! A multi-dimensional array data type.
11
12use std::cmp::Ordering;
13use std::error::Error;
14use std::{fmt, mem};
15
16use mz_lowertest::MzReflect;
17use mz_ore::cast::CastFrom;
18use mz_persist_types::columnar::FixedSizeCodec;
19use mz_proto::{RustType, TryFromProtoError};
20use proptest_derive::Arbitrary;
21use serde::{Deserialize, Serialize};
22
23use crate::row::DatumList;
24
25include!(concat!(env!("OUT_DIR"), "/mz_repr.adt.array.rs"));
26
27/// The maximum number of dimensions permitted in an array.
28pub const MAX_ARRAY_DIMENSIONS: u8 = 6;
29
30/// A variable-length multi-dimensional array.
31#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
32pub struct Array<'a> {
33    /// The elements in the array.
34    pub(crate) elements: DatumList<'a>,
35    /// The dimensions of the array.
36    pub(crate) dims: ArrayDimensions<'a>,
37}
38
39impl<'a> Array<'a> {
40    /// Returns the dimensions of the array.
41    pub fn dims(&self) -> ArrayDimensions<'a> {
42        self.dims
43    }
44
45    /// Returns the elements of the array.
46    pub fn elements(&self) -> DatumList<'a> {
47        self.elements
48    }
49
50    /// Returns true if this array's dimensions are valid for the Int2Vector type.
51    /// Int2Vector is 1-D; empty arrays use 0 dimensions (PostgreSQL convention).
52    pub fn has_int2vector_dims(&self) -> bool {
53        self.dims().len() == 1
54            || (self.dims().len() == 0 && self.elements().iter().next().is_none())
55    }
56}
57
58/// The dimensions of an [`Array`].
59#[derive(Clone, Copy, Eq, PartialEq, Hash)]
60pub struct ArrayDimensions<'a> {
61    pub(crate) data: &'a [u8],
62}
63
64impl Default for ArrayDimensions<'static> {
65    fn default() -> Self {
66        Self { data: &[] }
67    }
68}
69
70impl ArrayDimensions<'_> {
71    /// Returns the number of dimensions in the array as a [`u8`].
72    pub fn ndims(&self) -> u8 {
73        let ndims = self.data.len() / (mem::size_of::<usize>() * 2);
74        ndims.try_into().expect("ndims is known to fit in a u8")
75    }
76
77    /// Returns the number of the dimensions in the array as a [`usize`].
78    pub fn len(&self) -> usize {
79        self.ndims().into()
80    }
81
82    /// Reports whether the number of dimensions in the array is zero.
83    pub fn is_empty(&self) -> bool {
84        self.len() == 0
85    }
86}
87
88impl Ord for ArrayDimensions<'_> {
89    fn cmp(&self, other: &Self) -> Ordering {
90        self.ndims().cmp(&other.ndims())
91    }
92}
93
94impl PartialOrd for ArrayDimensions<'_> {
95    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
96        Some(self.cmp(other))
97    }
98}
99
100impl fmt::Debug for ArrayDimensions<'_> {
101    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
102        f.debug_list().entries(*self).finish()
103    }
104}
105
106impl<'a> IntoIterator for ArrayDimensions<'a> {
107    type Item = ArrayDimension;
108    type IntoIter = ArrayDimensionsIter<'a>;
109
110    fn into_iter(self) -> ArrayDimensionsIter<'a> {
111        ArrayDimensionsIter { data: self.data }
112    }
113}
114
115/// An iterator over the dimensions in an [`ArrayDimensions`].
116#[derive(Debug)]
117pub struct ArrayDimensionsIter<'a> {
118    data: &'a [u8],
119}
120
121impl Iterator for ArrayDimensionsIter<'_> {
122    type Item = ArrayDimension;
123
124    fn next(&mut self) -> Option<ArrayDimension> {
125        if self.data.is_empty() {
126            None
127        } else {
128            let sz = mem::size_of::<usize>();
129            let lower_bound = isize::from_ne_bytes(self.data[..sz].try_into().unwrap());
130            self.data = &self.data[sz..];
131            let length = usize::from_ne_bytes(self.data[..sz].try_into().unwrap());
132            self.data = &self.data[sz..];
133            Some(ArrayDimension {
134                lower_bound,
135                length,
136            })
137        }
138    }
139}
140
141/// The specification of one dimension of an [`Array`].
142#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
143pub struct ArrayDimension {
144    /// The "logical" index at which this dimension begins. This value has no bearing on the
145    /// physical layout of the data, only how users want to use its indices (which may be negative).
146    pub lower_bound: isize,
147    /// The number of elements in this array.
148    pub length: usize,
149}
150
151impl ArrayDimension {
152    /// Presents the "logical indices" of the array, i.e. those that are revealed to the user.
153    ///
154    /// # Panics
155    /// - If the array contain more than [`isize::MAX`] elements (i.e. more than 9EB of data).
156    pub fn dimension_bounds(&self) -> (isize, isize) {
157        (
158            self.lower_bound,
159            self.lower_bound
160                + isize::try_from(self.length).expect("fewer than isize::MAX elements")
161                - 1,
162        )
163    }
164}
165
166/// An error that can occur when constructing an array.
167#[derive(
168    Arbitrary,
169    Clone,
170    Copy,
171    Debug,
172    Eq,
173    PartialEq,
174    Hash,
175    Ord,
176    PartialOrd,
177    Serialize,
178    Deserialize,
179    MzReflect
180)]
181pub enum InvalidArrayError {
182    /// The number of dimensions in the array exceeds [`MAX_ARRAY_DIMENSIONS]`.
183    TooManyDimensions(usize),
184    /// The number of array elements does not match the cardinality derived from
185    /// its dimensions.
186    WrongCardinality { actual: usize, expected: usize },
187}
188
189impl fmt::Display for InvalidArrayError {
190    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
191        match self {
192            InvalidArrayError::TooManyDimensions(n) => write!(
193                f,
194                "number of array dimensions ({}) exceeds the maximum allowed ({})",
195                n, MAX_ARRAY_DIMENSIONS
196            ),
197            InvalidArrayError::WrongCardinality { actual, expected } => write!(
198                f,
199                "number of array elements ({}) does not match declared cardinality ({})",
200                actual, expected
201            ),
202        }
203    }
204}
205
206impl Error for InvalidArrayError {
207    fn source(&self) -> Option<&(dyn Error + 'static)> {
208        None
209    }
210}
211
212impl RustType<ProtoInvalidArrayError> for InvalidArrayError {
213    fn into_proto(&self) -> ProtoInvalidArrayError {
214        use Kind::*;
215        use proto_invalid_array_error::*;
216        let kind = match self {
217            InvalidArrayError::TooManyDimensions(dims) => TooManyDimensions(dims.into_proto()),
218            InvalidArrayError::WrongCardinality { actual, expected } => {
219                WrongCardinality(ProtoWrongCardinality {
220                    actual: actual.into_proto(),
221                    expected: expected.into_proto(),
222                })
223            }
224        };
225        ProtoInvalidArrayError { kind: Some(kind) }
226    }
227
228    fn from_proto(proto: ProtoInvalidArrayError) -> Result<Self, TryFromProtoError> {
229        use proto_invalid_array_error::Kind::*;
230        match proto.kind {
231            Some(kind) => match kind {
232                TooManyDimensions(dims) => Ok(InvalidArrayError::TooManyDimensions(
233                    usize::from_proto(dims)?,
234                )),
235                WrongCardinality(v) => Ok(InvalidArrayError::WrongCardinality {
236                    actual: usize::from_proto(v.actual)?,
237                    expected: usize::from_proto(v.expected)?,
238                }),
239            },
240            None => Err(TryFromProtoError::missing_field(
241                "`ProtoInvalidArrayError::kind`",
242            )),
243        }
244    }
245}
246
247/// An encoded packed variant of [`ArrayDimension`].
248///
249/// We uphold the variant that [`PackedArrayDimension`] sorts the same as
250/// [`ArrayDimension`].
251#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
252pub struct PackedArrayDimension([u8; Self::SIZE]);
253
254// `as` conversions are okay here because we're doing bit level logic to make
255// sure the sort order of the packed binary is correct. This is implementation
256// is proptest-ed below.
257#[allow(clippy::as_conversions)]
258impl FixedSizeCodec<ArrayDimension> for PackedArrayDimension {
259    const SIZE: usize = 16;
260
261    fn as_bytes(&self) -> &[u8] {
262        &self.0
263    }
264
265    fn from_bytes(slice: &[u8]) -> Result<Self, String> {
266        let buf: [u8; Self::SIZE] = slice.try_into().map_err(|_| {
267            format!(
268                "size for PackedArrayDimension is {} bytes, got {}",
269                Self::SIZE,
270                slice.len()
271            )
272        })?;
273        Ok(PackedArrayDimension(buf))
274    }
275
276    fn from_value(value: ArrayDimension) -> Self {
277        let mut buf = [0; 16];
278
279        let lower_bound = (i64::cast_from(value.lower_bound) as u64) ^ (0x8000_0000_0000_0000u64);
280        buf[..8].copy_from_slice(&lower_bound.to_be_bytes());
281        let length = u64::cast_from(value.length);
282        buf[8..].copy_from_slice(&length.to_be_bytes());
283
284        PackedArrayDimension(buf)
285    }
286
287    fn into_value(self) -> ArrayDimension {
288        let mut lower_bound: [u8; 8] = self.0[..8].try_into().unwrap();
289        lower_bound.copy_from_slice(&self.0[..8]);
290        let lower_bound = u64::from_be_bytes(lower_bound) ^ 0x8000_0000_0000_0000u64;
291
292        let mut length: [u8; 8] = self.0[8..].try_into().unwrap();
293        length.copy_from_slice(&self.0[8..]);
294        let length = u64::from_be_bytes(length);
295
296        ArrayDimension {
297            lower_bound: isize::cast_from(lower_bound as i64),
298            length: usize::cast_from(length),
299        }
300    }
301}
302
303#[cfg(test)]
304mod tests {
305    use std::iter::empty;
306
307    use mz_ore::assert_ok;
308    use mz_proto::protobuf_roundtrip;
309    use proptest::prelude::*;
310
311    use crate::Datum;
312    use crate::row::Row;
313
314    use super::*;
315
316    #[mz_ore::test]
317    fn test_has_int2vector_dims() {
318        // 1-D array with elements: valid for int2vector
319        let mut row = Row::default();
320        row.packer()
321            .try_push_array(
322                &[ArrayDimension {
323                    lower_bound: 1,
324                    length: 2,
325                }],
326                [Datum::Int16(1), Datum::Int16(2)],
327            )
328            .unwrap();
329        let arr = row.unpack_first().unwrap_array();
330        assert!(
331            arr.has_int2vector_dims(),
332            "1-D array should have int2vector dims"
333        );
334
335        // 1-D empty array (length 0): valid
336        let mut row = Row::default();
337        row.packer()
338            .try_push_array(
339                &[ArrayDimension {
340                    lower_bound: 1,
341                    length: 0,
342                }],
343                empty::<Datum>(),
344            )
345            .unwrap();
346        let arr = row.unpack_first().unwrap_array();
347        assert!(
348            arr.has_int2vector_dims(),
349            "1-D empty array should have int2vector dims"
350        );
351
352        // 0-D empty array (PostgreSQL convention for empty): valid
353        let mut row = Row::default();
354        row.packer().try_push_array(&[], empty::<Datum>()).unwrap();
355        let arr = row.unpack_first().unwrap_array();
356        assert!(
357            arr.has_int2vector_dims(),
358            "0-D empty array should have int2vector dims"
359        );
360
361        // 2-D array: invalid for int2vector
362        let mut row = Row::default();
363        row.packer()
364            .try_push_array(
365                &[
366                    ArrayDimension {
367                        lower_bound: 1,
368                        length: 1,
369                    },
370                    ArrayDimension {
371                        lower_bound: 1,
372                        length: 2,
373                    },
374                ],
375                [Datum::Int16(1), Datum::Int16(2)],
376            )
377            .unwrap();
378        let arr = row.unpack_first().unwrap_array();
379        assert!(
380            !arr.has_int2vector_dims(),
381            "2-D array should not have int2vector dims"
382        );
383    }
384
385    proptest! {
386        #[mz_ore::test]
387        fn invalid_array_error_protobuf_roundtrip(expect in any::<InvalidArrayError>()) {
388            let actual = protobuf_roundtrip::<_, ProtoInvalidArrayError>(&expect);
389            assert_ok!(actual);
390            assert_eq!(actual.unwrap(), expect);
391        }
392    }
393
394    fn arb_array_dimension() -> impl Strategy<Value = ArrayDimension> {
395        (any::<isize>(), any::<usize>()).prop_map(|(lower, length)| ArrayDimension {
396            lower_bound: lower,
397            length,
398        })
399    }
400
401    #[mz_ore::test]
402    fn proptest_packed_array_dimension_roundtrip() {
403        fn test(og: ArrayDimension) {
404            let packed = PackedArrayDimension::from_value(og);
405            let rnd = packed.into_value();
406            assert_eq!(og, rnd);
407        }
408
409        proptest!(|(dim in arb_array_dimension())| test(dim))
410    }
411
412    #[mz_ore::test]
413    fn proptest_packed_array_dimension_sorts() {
414        fn test(mut og: Vec<ArrayDimension>) {
415            let mut packed: Vec<_> = og
416                .iter()
417                .copied()
418                .map(PackedArrayDimension::from_value)
419                .collect();
420
421            packed.sort();
422            og.sort();
423
424            let rnd: Vec<_> = packed
425                .into_iter()
426                .map(PackedArrayDimension::into_value)
427                .collect();
428            assert_eq!(og, rnd);
429        }
430
431        let strat = proptest::collection::vec(arb_array_dimension(), 0..16);
432        proptest!(|(dim in strat)| test(dim))
433    }
434}