arrow_array/builder/
map_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::builder::{ArrayBuilder, BufferBuilder};
19use crate::{Array, ArrayRef, MapArray, StructArray};
20use arrow_buffer::Buffer;
21use arrow_buffer::{NullBuffer, NullBufferBuilder};
22use arrow_data::ArrayData;
23use arrow_schema::{ArrowError, DataType, Field, FieldRef};
24use std::any::Any;
25use std::sync::Arc;
26
27/// Builder for [`MapArray`]
28///
29/// ```
30/// # use arrow_array::builder::{Int32Builder, MapBuilder, StringBuilder};
31/// # use arrow_array::{Int32Array, StringArray};
32///
33/// let string_builder = StringBuilder::new();
34/// let int_builder = Int32Builder::with_capacity(4);
35///
36/// // Construct `[{"joe": 1}, {"blogs": 2, "foo": 4}, {}, null]`
37/// let mut builder = MapBuilder::new(None, string_builder, int_builder);
38///
39/// builder.keys().append_value("joe");
40/// builder.values().append_value(1);
41/// builder.append(true).unwrap();
42///
43/// builder.keys().append_value("blogs");
44/// builder.values().append_value(2);
45/// builder.keys().append_value("foo");
46/// builder.values().append_value(4);
47/// builder.append(true).unwrap();
48/// builder.append(true).unwrap();
49/// builder.append(false).unwrap();
50///
51/// let array = builder.finish();
52/// assert_eq!(array.value_offsets(), &[0, 1, 3, 3, 3]);
53/// assert_eq!(array.values().as_ref(), &Int32Array::from(vec![1, 2, 4]));
54/// assert_eq!(array.keys().as_ref(), &StringArray::from(vec!["joe", "blogs", "foo"]));
55///
56/// ```
57#[derive(Debug)]
58pub struct MapBuilder<K: ArrayBuilder, V: ArrayBuilder> {
59    offsets_builder: BufferBuilder<i32>,
60    null_buffer_builder: NullBufferBuilder,
61    field_names: MapFieldNames,
62    key_builder: K,
63    value_builder: V,
64    value_field: Option<FieldRef>,
65}
66
67/// The [`Field`] names for a [`MapArray`]
68#[derive(Debug, Clone)]
69pub struct MapFieldNames {
70    /// [`Field`] name for map entries
71    pub entry: String,
72    /// [`Field`] name for map key
73    pub key: String,
74    /// [`Field`] name for map value
75    pub value: String,
76}
77
78impl Default for MapFieldNames {
79    fn default() -> Self {
80        Self {
81            entry: "entries".to_string(),
82            key: "keys".to_string(),
83            value: "values".to_string(),
84        }
85    }
86}
87
88impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
89    /// Creates a new `MapBuilder`
90    pub fn new(field_names: Option<MapFieldNames>, key_builder: K, value_builder: V) -> Self {
91        let capacity = key_builder.len();
92        Self::with_capacity(field_names, key_builder, value_builder, capacity)
93    }
94
95    /// Creates a new `MapBuilder` with capacity
96    pub fn with_capacity(
97        field_names: Option<MapFieldNames>,
98        key_builder: K,
99        value_builder: V,
100        capacity: usize,
101    ) -> Self {
102        let mut offsets_builder = BufferBuilder::<i32>::new(capacity + 1);
103        offsets_builder.append(0);
104        Self {
105            offsets_builder,
106            null_buffer_builder: NullBufferBuilder::new(capacity),
107            field_names: field_names.unwrap_or_default(),
108            key_builder,
109            value_builder,
110            value_field: None,
111        }
112    }
113
114    /// Override the field passed to [`MapBuilder::new`]
115    ///
116    /// By default a nullable field is created with the name `values`
117    ///
118    /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the
119    /// field's data type does not match that of `V`
120    pub fn with_values_field(self, field: impl Into<FieldRef>) -> Self {
121        Self {
122            value_field: Some(field.into()),
123            ..self
124        }
125    }
126
127    /// Returns the key array builder of the map
128    pub fn keys(&mut self) -> &mut K {
129        &mut self.key_builder
130    }
131
132    /// Returns the value array builder of the map
133    pub fn values(&mut self) -> &mut V {
134        &mut self.value_builder
135    }
136
137    /// Returns both the key and value array builders of the map
138    pub fn entries(&mut self) -> (&mut K, &mut V) {
139        (&mut self.key_builder, &mut self.value_builder)
140    }
141
142    /// Finish the current map array slot
143    ///
144    /// Returns an error if the key and values builders are in an inconsistent state.
145    #[inline]
146    pub fn append(&mut self, is_valid: bool) -> Result<(), ArrowError> {
147        if self.key_builder.len() != self.value_builder.len() {
148            return Err(ArrowError::InvalidArgumentError(format!(
149                "Cannot append to a map builder when its keys and values have unequal lengths of {} and {}",
150                self.key_builder.len(),
151                self.value_builder.len()
152            )));
153        }
154        self.offsets_builder.append(self.key_builder.len() as i32);
155        self.null_buffer_builder.append(is_valid);
156        Ok(())
157    }
158
159    /// Builds the [`MapArray`]
160    pub fn finish(&mut self) -> MapArray {
161        let len = self.len();
162        // Build the keys
163        let keys_arr = self.key_builder.finish();
164        let values_arr = self.value_builder.finish();
165        let offset_buffer = self.offsets_builder.finish();
166        self.offsets_builder.append(0);
167        let null_bit_buffer = self.null_buffer_builder.finish();
168
169        self.finish_helper(keys_arr, values_arr, offset_buffer, null_bit_buffer, len)
170    }
171
172    /// Builds the [`MapArray`] without resetting the builder.
173    pub fn finish_cloned(&self) -> MapArray {
174        let len = self.len();
175        // Build the keys
176        let keys_arr = self.key_builder.finish_cloned();
177        let values_arr = self.value_builder.finish_cloned();
178        let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
179        let nulls = self.null_buffer_builder.finish_cloned();
180        self.finish_helper(keys_arr, values_arr, offset_buffer, nulls, len)
181    }
182
183    fn finish_helper(
184        &self,
185        keys_arr: Arc<dyn Array>,
186        values_arr: Arc<dyn Array>,
187        offset_buffer: Buffer,
188        nulls: Option<NullBuffer>,
189        len: usize,
190    ) -> MapArray {
191        assert!(
192            keys_arr.null_count() == 0,
193            "Keys array must have no null values, found {} null value(s)",
194            keys_arr.null_count()
195        );
196
197        let keys_field = Arc::new(Field::new(
198            self.field_names.key.as_str(),
199            keys_arr.data_type().clone(),
200            false, // always non-nullable
201        ));
202        let values_field = match &self.value_field {
203            Some(f) => f.clone(),
204            None => Arc::new(Field::new(
205                self.field_names.value.as_str(),
206                values_arr.data_type().clone(),
207                true,
208            )),
209        };
210
211        let struct_array =
212            StructArray::from(vec![(keys_field, keys_arr), (values_field, values_arr)]);
213
214        let map_field = Arc::new(Field::new(
215            self.field_names.entry.as_str(),
216            struct_array.data_type().clone(),
217            false, // always non-nullable
218        ));
219        let array_data = ArrayData::builder(DataType::Map(map_field, false)) // TODO: support sorted keys
220            .len(len)
221            .add_buffer(offset_buffer)
222            .add_child_data(struct_array.into_data())
223            .nulls(nulls);
224
225        let array_data = unsafe { array_data.build_unchecked() };
226
227        MapArray::from(array_data)
228    }
229
230    /// Returns the current null buffer as a slice
231    pub fn validity_slice(&self) -> Option<&[u8]> {
232        self.null_buffer_builder.as_slice()
233    }
234}
235
236impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for MapBuilder<K, V> {
237    fn len(&self) -> usize {
238        self.null_buffer_builder.len()
239    }
240
241    fn finish(&mut self) -> ArrayRef {
242        Arc::new(self.finish())
243    }
244
245    /// Builds the array without resetting the builder.
246    fn finish_cloned(&self) -> ArrayRef {
247        Arc::new(self.finish_cloned())
248    }
249
250    fn as_any(&self) -> &dyn Any {
251        self
252    }
253
254    fn as_any_mut(&mut self) -> &mut dyn Any {
255        self
256    }
257
258    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
259        self
260    }
261}
262
263#[cfg(test)]
264mod tests {
265    use crate::builder::{make_builder, Int32Builder, StringBuilder};
266    use crate::{Int32Array, StringArray};
267
268    use super::*;
269
270    #[test]
271    #[should_panic(expected = "Keys array must have no null values, found 1 null value(s)")]
272    fn test_map_builder_with_null_keys_panics() {
273        let mut builder = MapBuilder::new(None, StringBuilder::new(), Int32Builder::new());
274        builder.keys().append_null();
275        builder.values().append_value(42);
276        builder.append(true).unwrap();
277
278        builder.finish();
279    }
280
281    #[test]
282    fn test_boxed_map_builder() {
283        let keys_builder = make_builder(&DataType::Utf8, 5);
284        let values_builder = make_builder(&DataType::Int32, 5);
285
286        let mut builder = MapBuilder::new(None, keys_builder, values_builder);
287        builder
288            .keys()
289            .as_any_mut()
290            .downcast_mut::<StringBuilder>()
291            .expect("should be an StringBuilder")
292            .append_value("1");
293        builder
294            .values()
295            .as_any_mut()
296            .downcast_mut::<Int32Builder>()
297            .expect("should be an Int32Builder")
298            .append_value(42);
299        builder.append(true).unwrap();
300
301        let map_array = builder.finish();
302
303        assert_eq!(
304            map_array
305                .keys()
306                .as_any()
307                .downcast_ref::<StringArray>()
308                .expect("should be an StringArray")
309                .value(0),
310            "1"
311        );
312        assert_eq!(
313            map_array
314                .values()
315                .as_any()
316                .downcast_ref::<Int32Array>()
317                .expect("should be an Int32Array")
318                .value(0),
319            42
320        );
321    }
322
323    #[test]
324    fn test_with_values_field() {
325        let value_field = Arc::new(Field::new("bars", DataType::Int32, false));
326        let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new())
327            .with_values_field(value_field.clone());
328        builder.keys().append_value(1);
329        builder.values().append_value(2);
330        builder.append(true).unwrap();
331        builder.append(false).unwrap(); // This is fine as nullability refers to nullability of values
332        builder.keys().append_value(3);
333        builder.values().append_value(4);
334        builder.append(true).unwrap();
335        let map = builder.finish();
336
337        assert_eq!(map.len(), 3);
338        assert_eq!(
339            map.data_type(),
340            &DataType::Map(
341                Arc::new(Field::new(
342                    "entries",
343                    DataType::Struct(
344                        vec![
345                            Arc::new(Field::new("keys", DataType::Int32, false)),
346                            value_field.clone()
347                        ]
348                        .into()
349                    ),
350                    false,
351                )),
352                false
353            )
354        );
355
356        builder.keys().append_value(5);
357        builder.values().append_value(6);
358        builder.append(true).unwrap();
359        let map = builder.finish();
360
361        assert_eq!(map.len(), 1);
362        assert_eq!(
363            map.data_type(),
364            &DataType::Map(
365                Arc::new(Field::new(
366                    "entries",
367                    DataType::Struct(
368                        vec![
369                            Arc::new(Field::new("keys", DataType::Int32, false)),
370                            value_field
371                        ]
372                        .into()
373                    ),
374                    false,
375                )),
376                false
377            )
378        );
379    }
380}