arrow_array/builder/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines push-based APIs for constructing arrays
19//!
20//! # Basic Usage
21//!
22//! Builders can be used to build simple, non-nested arrays
23//!
24//! ```
25//! # use arrow_array::builder::Int32Builder;
26//! # use arrow_array::PrimitiveArray;
27//! let mut a = Int32Builder::new();
28//! a.append_value(1);
29//! a.append_null();
30//! a.append_value(2);
31//! let a = a.finish();
32//!
33//! assert_eq!(a, PrimitiveArray::from(vec![Some(1), None, Some(2)]));
34//! ```
35//!
36//! ```
37//! # use arrow_array::builder::StringBuilder;
38//! # use arrow_array::{Array, StringArray};
39//! let mut a = StringBuilder::new();
40//! a.append_value("foo");
41//! a.append_value("bar");
42//! a.append_null();
43//! let a = a.finish();
44//!
45//! assert_eq!(a, StringArray::from_iter([Some("foo"), Some("bar"), None]));
46//! ```
47//!
48//! # Nested Usage
49//!
50//! Builders can also be used to build more complex nested arrays, such as lists
51//!
52//! ```
53//! # use arrow_array::builder::{Int32Builder, ListBuilder};
54//! # use arrow_array::ListArray;
55//! # use arrow_array::types::Int32Type;
56//! let mut a = ListBuilder::new(Int32Builder::new());
57//! // [1, 2]
58//! a.values().append_value(1);
59//! a.values().append_value(2);
60//! a.append(true);
61//! // null
62//! a.append(false);
63//! // []
64//! a.append(true);
65//! // [3, null]
66//! a.values().append_value(3);
67//! a.values().append_null();
68//! a.append(true);
69//!
70//! // [[1, 2], null, [], [3, null]]
71//! let a = a.finish();
72//!
73//! assert_eq!(a, ListArray::from_iter_primitive::<Int32Type, _, _>([
74//!     Some(vec![Some(1), Some(2)]),
75//!     None,
76//!     Some(vec![]),
77//!     Some(vec![Some(3), None])]
78//! ))
79//! ```
80//!
81//! # Custom Builders
82//!
83//! It is common to have a collection of statically defined Rust types that
84//! you want to convert to Arrow arrays.
85//!
86//! An example of doing so is below
87//!
88//! ```
89//! # use std::any::Any;
90//! # use arrow_array::builder::{ArrayBuilder, Int32Builder, ListBuilder, StringBuilder};
91//! # use arrow_array::{ArrayRef, RecordBatch, StructArray};
92//! # use arrow_schema::{DataType, Field};
93//! # use std::sync::Arc;
94//! /// A custom row representation
95//! struct MyRow {
96//!     i32: i32,
97//!     optional_i32: Option<i32>,
98//!     string: Option<String>,
99//!     i32_list: Option<Vec<Option<i32>>>,
100//! }
101//!
102//! /// Converts `Vec<Row>` into `StructArray`
103//! #[derive(Debug, Default)]
104//! struct MyRowBuilder {
105//!     i32: Int32Builder,
106//!     string: StringBuilder,
107//!     i32_list: ListBuilder<Int32Builder>,
108//! }
109//!
110//! impl MyRowBuilder {
111//!     fn append(&mut self, row: &MyRow) {
112//!         self.i32.append_value(row.i32);
113//!         self.string.append_option(row.string.as_ref());
114//!         self.i32_list.append_option(row.i32_list.as_ref().map(|x| x.iter().copied()));
115//!     }
116//!
117//!     /// Note: returns StructArray to allow nesting within another array if desired
118//!     fn finish(&mut self) -> StructArray {
119//!         let i32 = Arc::new(self.i32.finish()) as ArrayRef;
120//!         let i32_field = Arc::new(Field::new("i32", DataType::Int32, false));
121//!
122//!         let string = Arc::new(self.string.finish()) as ArrayRef;
123//!         let string_field = Arc::new(Field::new("i32", DataType::Utf8, false));
124//!
125//!         let i32_list = Arc::new(self.i32_list.finish()) as ArrayRef;
126//!         let value_field = Arc::new(Field::new("item", DataType::Int32, true));
127//!         let i32_list_field = Arc::new(Field::new("i32_list", DataType::List(value_field), true));
128//!
129//!         StructArray::from(vec![
130//!             (i32_field, i32),
131//!             (string_field, string),
132//!             (i32_list_field, i32_list),
133//!         ])
134//!     }
135//! }
136//!
137//! impl<'a> Extend<&'a MyRow> for MyRowBuilder {
138//!     fn extend<T: IntoIterator<Item = &'a MyRow>>(&mut self, iter: T) {
139//!         iter.into_iter().for_each(|row| self.append(row));
140//!     }
141//! }
142//!
143//! /// Converts a slice of [`MyRow`] to a [`RecordBatch`]
144//! fn rows_to_batch(rows: &[MyRow]) -> RecordBatch {
145//!     let mut builder = MyRowBuilder::default();
146//!     builder.extend(rows);
147//!     RecordBatch::from(&builder.finish())
148//! }
149//! ```
150
151pub use arrow_buffer::BooleanBufferBuilder;
152
153mod boolean_builder;
154pub use boolean_builder::*;
155mod buffer_builder;
156pub use buffer_builder::*;
157mod fixed_size_binary_builder;
158pub use fixed_size_binary_builder::*;
159mod fixed_size_list_builder;
160pub use fixed_size_list_builder::*;
161mod generic_bytes_builder;
162pub use generic_bytes_builder::*;
163mod generic_list_builder;
164pub use generic_list_builder::*;
165mod map_builder;
166pub use map_builder::*;
167mod null_builder;
168pub use null_builder::*;
169mod primitive_builder;
170pub use primitive_builder::*;
171mod primitive_dictionary_builder;
172pub use primitive_dictionary_builder::*;
173mod primitive_run_builder;
174pub use primitive_run_builder::*;
175mod struct_builder;
176pub use struct_builder::*;
177mod generic_bytes_dictionary_builder;
178pub use generic_bytes_dictionary_builder::*;
179mod generic_byte_run_builder;
180pub use generic_byte_run_builder::*;
181mod generic_bytes_view_builder;
182pub use generic_bytes_view_builder::*;
183mod union_builder;
184
185pub use union_builder::*;
186
187use crate::ArrayRef;
188use std::any::Any;
189
190/// Trait for dealing with different array builders at runtime
191///
192/// # Example
193///
194/// ```
195/// // Create
196/// # use arrow_array::{ArrayRef, StringArray};
197/// # use arrow_array::builder::{ArrayBuilder, Float64Builder, Int64Builder, StringBuilder};
198///
199/// let mut data_builders: Vec<Box<dyn ArrayBuilder>> = vec![
200///     Box::new(Float64Builder::new()),
201///     Box::new(Int64Builder::new()),
202///     Box::new(StringBuilder::new()),
203/// ];
204///
205/// // Fill
206/// data_builders[0]
207///     .as_any_mut()
208///     .downcast_mut::<Float64Builder>()
209///     .unwrap()
210///     .append_value(3.14);
211/// data_builders[1]
212///     .as_any_mut()
213///     .downcast_mut::<Int64Builder>()
214///     .unwrap()
215///     .append_value(-1);
216/// data_builders[2]
217///     .as_any_mut()
218///     .downcast_mut::<StringBuilder>()
219///     .unwrap()
220///     .append_value("🍎");
221///
222/// // Finish
223/// let array_refs: Vec<ArrayRef> = data_builders
224///     .iter_mut()
225///     .map(|builder| builder.finish())
226///     .collect();
227/// assert_eq!(array_refs[0].len(), 1);
228/// assert_eq!(array_refs[1].is_null(0), false);
229/// assert_eq!(
230///     array_refs[2]
231///         .as_any()
232///         .downcast_ref::<StringArray>()
233///         .unwrap()
234///         .value(0),
235///     "🍎"
236/// );
237/// ```
238pub trait ArrayBuilder: Any + Send + Sync {
239    /// Returns the number of array slots in the builder
240    fn len(&self) -> usize;
241
242    /// Returns whether number of array slots is zero
243    fn is_empty(&self) -> bool {
244        self.len() == 0
245    }
246
247    /// Builds the array
248    fn finish(&mut self) -> ArrayRef;
249
250    /// Builds the array without resetting the underlying builder.
251    fn finish_cloned(&self) -> ArrayRef;
252
253    /// Returns the builder as a non-mutable `Any` reference.
254    ///
255    /// This is most useful when one wants to call non-mutable APIs on a specific builder
256    /// type. In this case, one can first cast this into a `Any`, and then use
257    /// `downcast_ref` to get a reference on the specific builder.
258    fn as_any(&self) -> &dyn Any;
259
260    /// Returns the builder as a mutable `Any` reference.
261    ///
262    /// This is most useful when one wants to call mutable APIs on a specific builder
263    /// type. In this case, one can first cast this into a `Any`, and then use
264    /// `downcast_mut` to get a reference on the specific builder.
265    fn as_any_mut(&mut self) -> &mut dyn Any;
266
267    /// Returns the boxed builder as a box of `Any`.
268    fn into_box_any(self: Box<Self>) -> Box<dyn Any>;
269}
270
271impl ArrayBuilder for Box<dyn ArrayBuilder> {
272    fn len(&self) -> usize {
273        (**self).len()
274    }
275
276    fn is_empty(&self) -> bool {
277        (**self).is_empty()
278    }
279
280    fn finish(&mut self) -> ArrayRef {
281        (**self).finish()
282    }
283
284    fn finish_cloned(&self) -> ArrayRef {
285        (**self).finish_cloned()
286    }
287
288    fn as_any(&self) -> &dyn Any {
289        (**self).as_any()
290    }
291
292    fn as_any_mut(&mut self) -> &mut dyn Any {
293        (**self).as_any_mut()
294    }
295
296    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
297        self
298    }
299}
300
301/// Builder for [`ListArray`](crate::array::ListArray)
302pub type ListBuilder<T> = GenericListBuilder<i32, T>;
303
304/// Builder for [`LargeListArray`](crate::array::LargeListArray)
305pub type LargeListBuilder<T> = GenericListBuilder<i64, T>;
306
307/// Builder for [`BinaryArray`](crate::array::BinaryArray)
308///
309/// See examples on [`GenericBinaryBuilder`]
310pub type BinaryBuilder = GenericBinaryBuilder<i32>;
311
312/// Builder for [`LargeBinaryArray`](crate::array::LargeBinaryArray)
313///
314/// See examples on [`GenericBinaryBuilder`]
315pub type LargeBinaryBuilder = GenericBinaryBuilder<i64>;
316
317/// Builder for [`StringArray`](crate::array::StringArray)
318///
319/// See examples on [`GenericStringBuilder`]
320pub type StringBuilder = GenericStringBuilder<i32>;
321
322/// Builder for [`LargeStringArray`](crate::array::LargeStringArray)
323///
324/// See examples on [`GenericStringBuilder`]
325pub type LargeStringBuilder = GenericStringBuilder<i64>;