1use crate::builder::{ArrayBuilder, BufferBuilder};
19use crate::{Array, ArrayRef, MapArray, StructArray};
20use arrow_buffer::Buffer;
21use arrow_buffer::{NullBuffer, NullBufferBuilder};
22use arrow_data::ArrayData;
23use arrow_schema::{ArrowError, DataType, Field, FieldRef};
24use std::any::Any;
25use std::sync::Arc;
26
27#[derive(Debug)]
58pub struct MapBuilder<K: ArrayBuilder, V: ArrayBuilder> {
59 offsets_builder: BufferBuilder<i32>,
60 null_buffer_builder: NullBufferBuilder,
61 field_names: MapFieldNames,
62 key_builder: K,
63 value_builder: V,
64 value_field: Option<FieldRef>,
65}
66
67#[derive(Debug, Clone)]
69pub struct MapFieldNames {
70 pub entry: String,
72 pub key: String,
74 pub value: String,
76}
77
78impl Default for MapFieldNames {
79 fn default() -> Self {
80 Self {
81 entry: "entries".to_string(),
82 key: "keys".to_string(),
83 value: "values".to_string(),
84 }
85 }
86}
87
88impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
89 pub fn new(field_names: Option<MapFieldNames>, key_builder: K, value_builder: V) -> Self {
91 let capacity = key_builder.len();
92 Self::with_capacity(field_names, key_builder, value_builder, capacity)
93 }
94
95 pub fn with_capacity(
97 field_names: Option<MapFieldNames>,
98 key_builder: K,
99 value_builder: V,
100 capacity: usize,
101 ) -> Self {
102 let mut offsets_builder = BufferBuilder::<i32>::new(capacity + 1);
103 offsets_builder.append(0);
104 Self {
105 offsets_builder,
106 null_buffer_builder: NullBufferBuilder::new(capacity),
107 field_names: field_names.unwrap_or_default(),
108 key_builder,
109 value_builder,
110 value_field: None,
111 }
112 }
113
114 pub fn with_values_field(self, field: impl Into<FieldRef>) -> Self {
121 Self {
122 value_field: Some(field.into()),
123 ..self
124 }
125 }
126
127 pub fn keys(&mut self) -> &mut K {
129 &mut self.key_builder
130 }
131
132 pub fn values(&mut self) -> &mut V {
134 &mut self.value_builder
135 }
136
137 pub fn entries(&mut self) -> (&mut K, &mut V) {
139 (&mut self.key_builder, &mut self.value_builder)
140 }
141
142 #[inline]
146 pub fn append(&mut self, is_valid: bool) -> Result<(), ArrowError> {
147 if self.key_builder.len() != self.value_builder.len() {
148 return Err(ArrowError::InvalidArgumentError(format!(
149 "Cannot append to a map builder when its keys and values have unequal lengths of {} and {}",
150 self.key_builder.len(),
151 self.value_builder.len()
152 )));
153 }
154 self.offsets_builder.append(self.key_builder.len() as i32);
155 self.null_buffer_builder.append(is_valid);
156 Ok(())
157 }
158
159 pub fn finish(&mut self) -> MapArray {
161 let len = self.len();
162 let keys_arr = self.key_builder.finish();
164 let values_arr = self.value_builder.finish();
165 let offset_buffer = self.offsets_builder.finish();
166 self.offsets_builder.append(0);
167 let null_bit_buffer = self.null_buffer_builder.finish();
168
169 self.finish_helper(keys_arr, values_arr, offset_buffer, null_bit_buffer, len)
170 }
171
172 pub fn finish_cloned(&self) -> MapArray {
174 let len = self.len();
175 let keys_arr = self.key_builder.finish_cloned();
177 let values_arr = self.value_builder.finish_cloned();
178 let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
179 let nulls = self.null_buffer_builder.finish_cloned();
180 self.finish_helper(keys_arr, values_arr, offset_buffer, nulls, len)
181 }
182
183 fn finish_helper(
184 &self,
185 keys_arr: Arc<dyn Array>,
186 values_arr: Arc<dyn Array>,
187 offset_buffer: Buffer,
188 nulls: Option<NullBuffer>,
189 len: usize,
190 ) -> MapArray {
191 assert!(
192 keys_arr.null_count() == 0,
193 "Keys array must have no null values, found {} null value(s)",
194 keys_arr.null_count()
195 );
196
197 let keys_field = Arc::new(Field::new(
198 self.field_names.key.as_str(),
199 keys_arr.data_type().clone(),
200 false, ));
202 let values_field = match &self.value_field {
203 Some(f) => f.clone(),
204 None => Arc::new(Field::new(
205 self.field_names.value.as_str(),
206 values_arr.data_type().clone(),
207 true,
208 )),
209 };
210
211 let struct_array =
212 StructArray::from(vec![(keys_field, keys_arr), (values_field, values_arr)]);
213
214 let map_field = Arc::new(Field::new(
215 self.field_names.entry.as_str(),
216 struct_array.data_type().clone(),
217 false, ));
219 let array_data = ArrayData::builder(DataType::Map(map_field, false)) .len(len)
221 .add_buffer(offset_buffer)
222 .add_child_data(struct_array.into_data())
223 .nulls(nulls);
224
225 let array_data = unsafe { array_data.build_unchecked() };
226
227 MapArray::from(array_data)
228 }
229
230 pub fn validity_slice(&self) -> Option<&[u8]> {
232 self.null_buffer_builder.as_slice()
233 }
234}
235
236impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for MapBuilder<K, V> {
237 fn len(&self) -> usize {
238 self.null_buffer_builder.len()
239 }
240
241 fn finish(&mut self) -> ArrayRef {
242 Arc::new(self.finish())
243 }
244
245 fn finish_cloned(&self) -> ArrayRef {
247 Arc::new(self.finish_cloned())
248 }
249
250 fn as_any(&self) -> &dyn Any {
251 self
252 }
253
254 fn as_any_mut(&mut self) -> &mut dyn Any {
255 self
256 }
257
258 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
259 self
260 }
261}
262
263#[cfg(test)]
264mod tests {
265 use crate::builder::{make_builder, Int32Builder, StringBuilder};
266 use crate::{Int32Array, StringArray};
267
268 use super::*;
269
270 #[test]
271 #[should_panic(expected = "Keys array must have no null values, found 1 null value(s)")]
272 fn test_map_builder_with_null_keys_panics() {
273 let mut builder = MapBuilder::new(None, StringBuilder::new(), Int32Builder::new());
274 builder.keys().append_null();
275 builder.values().append_value(42);
276 builder.append(true).unwrap();
277
278 builder.finish();
279 }
280
281 #[test]
282 fn test_boxed_map_builder() {
283 let keys_builder = make_builder(&DataType::Utf8, 5);
284 let values_builder = make_builder(&DataType::Int32, 5);
285
286 let mut builder = MapBuilder::new(None, keys_builder, values_builder);
287 builder
288 .keys()
289 .as_any_mut()
290 .downcast_mut::<StringBuilder>()
291 .expect("should be an StringBuilder")
292 .append_value("1");
293 builder
294 .values()
295 .as_any_mut()
296 .downcast_mut::<Int32Builder>()
297 .expect("should be an Int32Builder")
298 .append_value(42);
299 builder.append(true).unwrap();
300
301 let map_array = builder.finish();
302
303 assert_eq!(
304 map_array
305 .keys()
306 .as_any()
307 .downcast_ref::<StringArray>()
308 .expect("should be an StringArray")
309 .value(0),
310 "1"
311 );
312 assert_eq!(
313 map_array
314 .values()
315 .as_any()
316 .downcast_ref::<Int32Array>()
317 .expect("should be an Int32Array")
318 .value(0),
319 42
320 );
321 }
322
323 #[test]
324 fn test_with_values_field() {
325 let value_field = Arc::new(Field::new("bars", DataType::Int32, false));
326 let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new())
327 .with_values_field(value_field.clone());
328 builder.keys().append_value(1);
329 builder.values().append_value(2);
330 builder.append(true).unwrap();
331 builder.append(false).unwrap(); builder.keys().append_value(3);
333 builder.values().append_value(4);
334 builder.append(true).unwrap();
335 let map = builder.finish();
336
337 assert_eq!(map.len(), 3);
338 assert_eq!(
339 map.data_type(),
340 &DataType::Map(
341 Arc::new(Field::new(
342 "entries",
343 DataType::Struct(
344 vec![
345 Arc::new(Field::new("keys", DataType::Int32, false)),
346 value_field.clone()
347 ]
348 .into()
349 ),
350 false,
351 )),
352 false
353 )
354 );
355
356 builder.keys().append_value(5);
357 builder.values().append_value(6);
358 builder.append(true).unwrap();
359 let map = builder.finish();
360
361 assert_eq!(map.len(), 1);
362 assert_eq!(
363 map.data_type(),
364 &DataType::Map(
365 Arc::new(Field::new(
366 "entries",
367 DataType::Struct(
368 vec![
369 Arc::new(Field::new("keys", DataType::Int32, false)),
370 value_field
371 ]
372 .into()
373 ),
374 false,
375 )),
376 false
377 )
378 );
379 }
380}