arrow_array/builder/mod.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines push-based APIs for constructing arrays
19//!
20//! # Basic Usage
21//!
22//! Builders can be used to build simple, non-nested arrays
23//!
24//! ```
25//! # use arrow_array::builder::Int32Builder;
26//! # use arrow_array::PrimitiveArray;
27//! let mut a = Int32Builder::new();
28//! a.append_value(1);
29//! a.append_null();
30//! a.append_value(2);
31//! let a = a.finish();
32//!
33//! assert_eq!(a, PrimitiveArray::from(vec![Some(1), None, Some(2)]));
34//! ```
35//!
36//! ```
37//! # use arrow_array::builder::StringBuilder;
38//! # use arrow_array::{Array, StringArray};
39//! let mut a = StringBuilder::new();
40//! a.append_value("foo");
41//! a.append_value("bar");
42//! a.append_null();
43//! let a = a.finish();
44//!
45//! assert_eq!(a, StringArray::from_iter([Some("foo"), Some("bar"), None]));
46//! ```
47//!
48//! # Nested Usage
49//!
50//! Builders can also be used to build more complex nested arrays, such as lists
51//!
52//! ```
53//! # use arrow_array::builder::{Int32Builder, ListBuilder};
54//! # use arrow_array::ListArray;
55//! # use arrow_array::types::Int32Type;
56//! let mut a = ListBuilder::new(Int32Builder::new());
57//! // [1, 2]
58//! a.values().append_value(1);
59//! a.values().append_value(2);
60//! a.append(true);
61//! // null
62//! a.append(false);
63//! // []
64//! a.append(true);
65//! // [3, null]
66//! a.values().append_value(3);
67//! a.values().append_null();
68//! a.append(true);
69//!
70//! // [[1, 2], null, [], [3, null]]
71//! let a = a.finish();
72//!
73//! assert_eq!(a, ListArray::from_iter_primitive::<Int32Type, _, _>([
74//! Some(vec![Some(1), Some(2)]),
75//! None,
76//! Some(vec![]),
77//! Some(vec![Some(3), None])]
78//! ))
79//! ```
80//!
81//! # Custom Builders
82//!
83//! It is common to have a collection of statically defined Rust types that
84//! you want to convert to Arrow arrays.
85//!
86//! An example of doing so is below
87//!
88//! ```
89//! # use std::any::Any;
90//! # use arrow_array::builder::{ArrayBuilder, Int32Builder, ListBuilder, StringBuilder};
91//! # use arrow_array::{ArrayRef, RecordBatch, StructArray};
92//! # use arrow_schema::{DataType, Field};
93//! # use std::sync::Arc;
94//! /// A custom row representation
95//! struct MyRow {
96//! i32: i32,
97//! optional_i32: Option<i32>,
98//! string: Option<String>,
99//! i32_list: Option<Vec<Option<i32>>>,
100//! }
101//!
102//! /// Converts `Vec<Row>` into `StructArray`
103//! #[derive(Debug, Default)]
104//! struct MyRowBuilder {
105//! i32: Int32Builder,
106//! string: StringBuilder,
107//! i32_list: ListBuilder<Int32Builder>,
108//! }
109//!
110//! impl MyRowBuilder {
111//! fn append(&mut self, row: &MyRow) {
112//! self.i32.append_value(row.i32);
113//! self.string.append_option(row.string.as_ref());
114//! self.i32_list.append_option(row.i32_list.as_ref().map(|x| x.iter().copied()));
115//! }
116//!
117//! /// Note: returns StructArray to allow nesting within another array if desired
118//! fn finish(&mut self) -> StructArray {
119//! let i32 = Arc::new(self.i32.finish()) as ArrayRef;
120//! let i32_field = Arc::new(Field::new("i32", DataType::Int32, false));
121//!
122//! let string = Arc::new(self.string.finish()) as ArrayRef;
123//! let string_field = Arc::new(Field::new("i32", DataType::Utf8, false));
124//!
125//! let i32_list = Arc::new(self.i32_list.finish()) as ArrayRef;
126//! let value_field = Arc::new(Field::new("item", DataType::Int32, true));
127//! let i32_list_field = Arc::new(Field::new("i32_list", DataType::List(value_field), true));
128//!
129//! StructArray::from(vec![
130//! (i32_field, i32),
131//! (string_field, string),
132//! (i32_list_field, i32_list),
133//! ])
134//! }
135//! }
136//!
137//! impl<'a> Extend<&'a MyRow> for MyRowBuilder {
138//! fn extend<T: IntoIterator<Item = &'a MyRow>>(&mut self, iter: T) {
139//! iter.into_iter().for_each(|row| self.append(row));
140//! }
141//! }
142//!
143//! /// Converts a slice of [`MyRow`] to a [`RecordBatch`]
144//! fn rows_to_batch(rows: &[MyRow]) -> RecordBatch {
145//! let mut builder = MyRowBuilder::default();
146//! builder.extend(rows);
147//! RecordBatch::from(&builder.finish())
148//! }
149//! ```
150
151pub use arrow_buffer::BooleanBufferBuilder;
152
153mod boolean_builder;
154pub use boolean_builder::*;
155mod buffer_builder;
156pub use buffer_builder::*;
157mod fixed_size_binary_builder;
158pub use fixed_size_binary_builder::*;
159mod fixed_size_list_builder;
160pub use fixed_size_list_builder::*;
161mod generic_bytes_builder;
162pub use generic_bytes_builder::*;
163mod generic_list_builder;
164pub use generic_list_builder::*;
165mod map_builder;
166pub use map_builder::*;
167mod null_builder;
168pub use null_builder::*;
169mod primitive_builder;
170pub use primitive_builder::*;
171mod primitive_dictionary_builder;
172pub use primitive_dictionary_builder::*;
173mod primitive_run_builder;
174pub use primitive_run_builder::*;
175mod struct_builder;
176pub use struct_builder::*;
177mod generic_bytes_dictionary_builder;
178pub use generic_bytes_dictionary_builder::*;
179mod generic_byte_run_builder;
180pub use generic_byte_run_builder::*;
181mod generic_bytes_view_builder;
182pub use generic_bytes_view_builder::*;
183mod union_builder;
184
185pub use union_builder::*;
186
187use crate::ArrayRef;
188use std::any::Any;
189
190/// Trait for dealing with different array builders at runtime
191///
192/// # Example
193///
194/// ```
195/// // Create
196/// # use arrow_array::{ArrayRef, StringArray};
197/// # use arrow_array::builder::{ArrayBuilder, Float64Builder, Int64Builder, StringBuilder};
198///
199/// let mut data_builders: Vec<Box<dyn ArrayBuilder>> = vec![
200/// Box::new(Float64Builder::new()),
201/// Box::new(Int64Builder::new()),
202/// Box::new(StringBuilder::new()),
203/// ];
204///
205/// // Fill
206/// data_builders[0]
207/// .as_any_mut()
208/// .downcast_mut::<Float64Builder>()
209/// .unwrap()
210/// .append_value(3.14);
211/// data_builders[1]
212/// .as_any_mut()
213/// .downcast_mut::<Int64Builder>()
214/// .unwrap()
215/// .append_value(-1);
216/// data_builders[2]
217/// .as_any_mut()
218/// .downcast_mut::<StringBuilder>()
219/// .unwrap()
220/// .append_value("🍎");
221///
222/// // Finish
223/// let array_refs: Vec<ArrayRef> = data_builders
224/// .iter_mut()
225/// .map(|builder| builder.finish())
226/// .collect();
227/// assert_eq!(array_refs[0].len(), 1);
228/// assert_eq!(array_refs[1].is_null(0), false);
229/// assert_eq!(
230/// array_refs[2]
231/// .as_any()
232/// .downcast_ref::<StringArray>()
233/// .unwrap()
234/// .value(0),
235/// "🍎"
236/// );
237/// ```
238pub trait ArrayBuilder: Any + Send + Sync {
239 /// Returns the number of array slots in the builder
240 fn len(&self) -> usize;
241
242 /// Returns whether number of array slots is zero
243 fn is_empty(&self) -> bool {
244 self.len() == 0
245 }
246
247 /// Builds the array
248 fn finish(&mut self) -> ArrayRef;
249
250 /// Builds the array without resetting the underlying builder.
251 fn finish_cloned(&self) -> ArrayRef;
252
253 /// Returns the builder as a non-mutable `Any` reference.
254 ///
255 /// This is most useful when one wants to call non-mutable APIs on a specific builder
256 /// type. In this case, one can first cast this into a `Any`, and then use
257 /// `downcast_ref` to get a reference on the specific builder.
258 fn as_any(&self) -> &dyn Any;
259
260 /// Returns the builder as a mutable `Any` reference.
261 ///
262 /// This is most useful when one wants to call mutable APIs on a specific builder
263 /// type. In this case, one can first cast this into a `Any`, and then use
264 /// `downcast_mut` to get a reference on the specific builder.
265 fn as_any_mut(&mut self) -> &mut dyn Any;
266
267 /// Returns the boxed builder as a box of `Any`.
268 fn into_box_any(self: Box<Self>) -> Box<dyn Any>;
269}
270
271impl ArrayBuilder for Box<dyn ArrayBuilder> {
272 fn len(&self) -> usize {
273 (**self).len()
274 }
275
276 fn is_empty(&self) -> bool {
277 (**self).is_empty()
278 }
279
280 fn finish(&mut self) -> ArrayRef {
281 (**self).finish()
282 }
283
284 fn finish_cloned(&self) -> ArrayRef {
285 (**self).finish_cloned()
286 }
287
288 fn as_any(&self) -> &dyn Any {
289 (**self).as_any()
290 }
291
292 fn as_any_mut(&mut self) -> &mut dyn Any {
293 (**self).as_any_mut()
294 }
295
296 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
297 self
298 }
299}
300
301/// Builder for [`ListArray`](crate::array::ListArray)
302pub type ListBuilder<T> = GenericListBuilder<i32, T>;
303
304/// Builder for [`LargeListArray`](crate::array::LargeListArray)
305pub type LargeListBuilder<T> = GenericListBuilder<i64, T>;
306
307/// Builder for [`BinaryArray`](crate::array::BinaryArray)
308///
309/// See examples on [`GenericBinaryBuilder`]
310pub type BinaryBuilder = GenericBinaryBuilder<i32>;
311
312/// Builder for [`LargeBinaryArray`](crate::array::LargeBinaryArray)
313///
314/// See examples on [`GenericBinaryBuilder`]
315pub type LargeBinaryBuilder = GenericBinaryBuilder<i64>;
316
317/// Builder for [`StringArray`](crate::array::StringArray)
318///
319/// See examples on [`GenericStringBuilder`]
320pub type StringBuilder = GenericStringBuilder<i32>;
321
322/// Builder for [`LargeStringArray`](crate::array::LargeStringArray)
323///
324/// See examples on [`GenericStringBuilder`]
325pub type LargeStringBuilder = GenericStringBuilder<i64>;