duckdb/core/
vector.rs

1use std::{any::Any, ffi::CString, slice};
2
3use libduckdb_sys::{
4    duckdb_array_type_array_size, duckdb_array_vector_get_child, duckdb_validity_row_is_valid, DuckDbString,
5};
6
7use super::LogicalTypeHandle;
8use crate::ffi::{
9    duckdb_list_entry, duckdb_list_vector_get_child, duckdb_list_vector_get_size, duckdb_list_vector_reserve,
10    duckdb_list_vector_set_size, duckdb_struct_type_child_count, duckdb_struct_type_child_name,
11    duckdb_struct_vector_get_child, duckdb_validity_set_row_invalid, duckdb_vector,
12    duckdb_vector_assign_string_element, duckdb_vector_assign_string_element_len,
13    duckdb_vector_ensure_validity_writable, duckdb_vector_get_column_type, duckdb_vector_get_data,
14    duckdb_vector_get_validity, duckdb_vector_size,
15};
16
17/// Vector trait.
18pub trait Vector {
19    /// Returns a reference to the underlying Any type that this trait object
20    fn as_any(&self) -> &dyn Any;
21    /// Returns a mutable reference to the underlying Any type that this trait object
22    fn as_mut_any(&mut self) -> &mut dyn Any;
23}
24
25/// A flat vector
26pub struct FlatVector {
27    ptr: duckdb_vector,
28    capacity: usize,
29}
30
31impl From<duckdb_vector> for FlatVector {
32    fn from(ptr: duckdb_vector) -> Self {
33        Self {
34            ptr,
35            capacity: unsafe { duckdb_vector_size() as usize },
36        }
37    }
38}
39
40impl Vector for FlatVector {
41    fn as_any(&self) -> &dyn Any {
42        self
43    }
44
45    fn as_mut_any(&mut self) -> &mut dyn Any {
46        self
47    }
48}
49
50impl FlatVector {
51    fn with_capacity(ptr: duckdb_vector, capacity: usize) -> Self {
52        Self { ptr, capacity }
53    }
54
55    /// Returns the capacity of the vector
56    pub fn capacity(&self) -> usize {
57        self.capacity
58    }
59
60    /// Returns true if the row at the given index is null
61    pub fn row_is_null(&self, row: u64) -> bool {
62        // use idx_t entry_idx = row_idx / 64; idx_t idx_in_entry = row_idx % 64; bool is_valid = validity_mask[entry_idx] & (1 « idx_in_entry);
63        // as the row is valid function is slower
64        let valid = unsafe {
65            let validity = duckdb_vector_get_validity(self.ptr);
66
67            // validity can return a NULL pointer if the entire vector is valid
68            if validity.is_null() {
69                return false;
70            }
71
72            duckdb_validity_row_is_valid(validity, row)
73        };
74
75        !valid
76    }
77
78    /// Returns an unsafe mutable pointer to the vector’s
79    pub fn as_mut_ptr<T>(&self) -> *mut T {
80        unsafe { duckdb_vector_get_data(self.ptr).cast() }
81    }
82
83    /// Returns a slice of the vector
84    pub fn as_slice<T>(&self) -> &[T] {
85        unsafe { slice::from_raw_parts(self.as_mut_ptr(), self.capacity()) }
86    }
87
88    /// Returns a slice of the vector up to a certain length
89    pub fn as_slice_with_len<T>(&self, len: usize) -> &[T] {
90        unsafe { slice::from_raw_parts(self.as_mut_ptr(), len) }
91    }
92
93    /// Returns a mutable slice of the vector
94    pub fn as_mut_slice<T>(&mut self) -> &mut [T] {
95        unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), self.capacity()) }
96    }
97
98    /// Returns a mutable slice of the vector up to a certain length
99    pub fn as_mut_slice_with_len<T>(&mut self, len: usize) -> &mut [T] {
100        unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), len) }
101    }
102
103    /// Returns the logical type of the vector
104    pub fn logical_type(&self) -> LogicalTypeHandle {
105        unsafe { LogicalTypeHandle::new(duckdb_vector_get_column_type(self.ptr)) }
106    }
107
108    /// Set row as null
109    pub fn set_null(&mut self, row: usize) {
110        unsafe {
111            duckdb_vector_ensure_validity_writable(self.ptr);
112            let idx = duckdb_vector_get_validity(self.ptr);
113            duckdb_validity_set_row_invalid(idx, row as u64);
114        }
115    }
116
117    /// Copy data to the vector.
118    pub fn copy<T: Copy>(&mut self, data: &[T]) {
119        assert!(data.len() <= self.capacity());
120        self.as_mut_slice::<T>()[0..data.len()].copy_from_slice(data);
121    }
122}
123
124/// A trait for inserting data into a vector.
125pub trait Inserter<T> {
126    /// Insert a value into the vector.
127    fn insert(&self, index: usize, value: T);
128}
129
130impl Inserter<CString> for FlatVector {
131    fn insert(&self, index: usize, value: CString) {
132        unsafe {
133            duckdb_vector_assign_string_element(self.ptr, index as u64, value.as_ptr());
134        }
135    }
136}
137
138impl Inserter<&str> for FlatVector {
139    fn insert(&self, index: usize, value: &str) {
140        let cstr = CString::new(value.as_bytes()).unwrap();
141        unsafe {
142            duckdb_vector_assign_string_element(self.ptr, index as u64, cstr.as_ptr());
143        }
144    }
145}
146
147impl Inserter<&String> for FlatVector {
148    fn insert(&self, index: usize, value: &String) {
149        self.insert(index, value.as_str());
150    }
151}
152
153impl Inserter<&[u8]> for FlatVector {
154    fn insert(&self, index: usize, value: &[u8]) {
155        let value_size = value.len();
156        unsafe {
157            // This function also works for binary data. https://duckdb.org/docs/api/c/api#duckdb_vector_assign_string_element_len
158            duckdb_vector_assign_string_element_len(
159                self.ptr,
160                index as u64,
161                value.as_ptr() as *const ::std::os::raw::c_char,
162                value_size as u64,
163            );
164        }
165    }
166}
167
168impl Inserter<&Vec<u8>> for FlatVector {
169    fn insert(&self, index: usize, value: &Vec<u8>) {
170        self.insert(index, value.as_slice());
171    }
172}
173
174/// A list vector.
175pub struct ListVector {
176    /// ListVector does not own the vector pointer.
177    entries: FlatVector,
178}
179
180impl From<duckdb_vector> for ListVector {
181    fn from(ptr: duckdb_vector) -> Self {
182        Self {
183            entries: FlatVector::from(ptr),
184        }
185    }
186}
187
188impl ListVector {
189    /// Returns the number of entries in the list vector.
190    pub fn len(&self) -> usize {
191        unsafe { duckdb_list_vector_get_size(self.entries.ptr) as usize }
192    }
193
194    /// Returns true if the list vector is empty.
195    pub fn is_empty(&self) -> bool {
196        self.len() == 0
197    }
198
199    /// Returns the child vector.
200    // TODO: not ideal interface. Where should we keep capacity.
201    pub fn child(&self, capacity: usize) -> FlatVector {
202        self.reserve(capacity);
203        FlatVector::with_capacity(unsafe { duckdb_list_vector_get_child(self.entries.ptr) }, capacity)
204    }
205
206    /// Take the child as [StructVector].
207    pub fn struct_child(&self, capacity: usize) -> StructVector {
208        self.reserve(capacity);
209        StructVector::from(unsafe { duckdb_list_vector_get_child(self.entries.ptr) })
210    }
211
212    /// Take the child as [ArrayVector].
213    pub fn array_child(&self) -> ArrayVector {
214        ArrayVector::from(unsafe { duckdb_list_vector_get_child(self.entries.ptr) })
215    }
216
217    /// Take the child as [ListVector].
218    pub fn list_child(&self) -> Self {
219        Self::from(unsafe { duckdb_list_vector_get_child(self.entries.ptr) })
220    }
221
222    /// Set primitive data to the child node.
223    pub fn set_child<T: Copy>(&self, data: &[T]) {
224        self.child(data.len()).copy(data);
225        self.set_len(data.len());
226    }
227
228    /// Set offset and length to the entry.
229    pub fn set_entry(&mut self, idx: usize, offset: usize, length: usize) {
230        self.entries.as_mut_slice::<duckdb_list_entry>()[idx].offset = offset as u64;
231        self.entries.as_mut_slice::<duckdb_list_entry>()[idx].length = length as u64;
232    }
233
234    /// Get offset and length for the entry at index.
235    pub fn get_entry(&self, idx: usize) -> (usize, usize) {
236        let entry = self.entries.as_slice::<duckdb_list_entry>()[idx];
237        (entry.offset as usize, entry.length as usize)
238    }
239
240    /// Set row as null
241    pub fn set_null(&mut self, row: usize) {
242        unsafe {
243            duckdb_vector_ensure_validity_writable(self.entries.ptr);
244            let idx = duckdb_vector_get_validity(self.entries.ptr);
245            duckdb_validity_set_row_invalid(idx, row as u64);
246        }
247    }
248
249    /// Reserve the capacity for its child node.
250    fn reserve(&self, capacity: usize) {
251        unsafe {
252            duckdb_list_vector_reserve(self.entries.ptr, capacity as u64);
253        }
254    }
255
256    /// Set the length of the list vector.
257    pub fn set_len(&self, new_len: usize) {
258        unsafe {
259            duckdb_list_vector_set_size(self.entries.ptr, new_len as u64);
260        }
261    }
262}
263
264/// A array vector. (fixed-size list)
265pub struct ArrayVector {
266    ptr: duckdb_vector,
267}
268
269impl From<duckdb_vector> for ArrayVector {
270    fn from(ptr: duckdb_vector) -> Self {
271        Self { ptr }
272    }
273}
274
275impl ArrayVector {
276    /// Get the logical type of this ArrayVector.
277    pub fn logical_type(&self) -> LogicalTypeHandle {
278        unsafe { LogicalTypeHandle::new(duckdb_vector_get_column_type(self.ptr)) }
279    }
280
281    /// Returns the size of the array type.
282    pub fn get_array_size(&self) -> u64 {
283        let ty = self.logical_type();
284        unsafe { duckdb_array_type_array_size(ty.ptr) as u64 }
285    }
286
287    /// Returns the child vector.
288    /// capacity should be a multiple of the array size.
289    // TODO: not ideal interface. Where should we keep count.
290    pub fn child(&self, capacity: usize) -> FlatVector {
291        FlatVector::with_capacity(unsafe { duckdb_array_vector_get_child(self.ptr) }, capacity)
292    }
293
294    /// Set primitive data to the child node.
295    pub fn set_child<T: Copy>(&self, data: &[T]) {
296        self.child(data.len()).copy(data);
297    }
298
299    /// Set row as null
300    pub fn set_null(&mut self, row: usize) {
301        unsafe {
302            duckdb_vector_ensure_validity_writable(self.ptr);
303            let idx = duckdb_vector_get_validity(self.ptr);
304            duckdb_validity_set_row_invalid(idx, row as u64);
305        }
306    }
307}
308
309/// A struct vector.
310pub struct StructVector {
311    ptr: duckdb_vector,
312}
313
314impl From<duckdb_vector> for StructVector {
315    fn from(ptr: duckdb_vector) -> Self {
316        Self { ptr }
317    }
318}
319
320impl StructVector {
321    /// Returns the child by idx in the list vector.
322    pub fn child(&self, idx: usize, capacity: usize) -> FlatVector {
323        FlatVector::with_capacity(
324            unsafe { duckdb_struct_vector_get_child(self.ptr, idx as u64) },
325            capacity,
326        )
327    }
328
329    /// Take the child as [StructVector].
330    pub fn struct_vector_child(&self, idx: usize) -> Self {
331        Self::from(unsafe { duckdb_struct_vector_get_child(self.ptr, idx as u64) })
332    }
333
334    /// Take the child as [ListVector].
335    pub fn list_vector_child(&self, idx: usize) -> ListVector {
336        ListVector::from(unsafe { duckdb_struct_vector_get_child(self.ptr, idx as u64) })
337    }
338
339    /// Take the child as [ArrayVector].
340    pub fn array_vector_child(&self, idx: usize) -> ArrayVector {
341        ArrayVector::from(unsafe { duckdb_struct_vector_get_child(self.ptr, idx as u64) })
342    }
343
344    /// Get the logical type of this struct vector.
345    pub fn logical_type(&self) -> LogicalTypeHandle {
346        unsafe { LogicalTypeHandle::new(duckdb_vector_get_column_type(self.ptr)) }
347    }
348
349    /// Get the name of the child by idx.
350    pub fn child_name(&self, idx: usize) -> DuckDbString {
351        let logical_type = self.logical_type();
352        unsafe {
353            let child_name_ptr = duckdb_struct_type_child_name(logical_type.ptr, idx as u64);
354            DuckDbString::from_ptr(child_name_ptr)
355        }
356    }
357
358    /// Get the number of children.
359    pub fn num_children(&self) -> usize {
360        let logical_type = self.logical_type();
361        unsafe { duckdb_struct_type_child_count(logical_type.ptr) as usize }
362    }
363
364    /// Set row as null
365    pub fn set_null(&mut self, row: usize) {
366        unsafe {
367            duckdb_vector_ensure_validity_writable(self.ptr);
368            let idx = duckdb_vector_get_validity(self.ptr);
369            duckdb_validity_set_row_invalid(idx, row as u64);
370        }
371    }
372}
373
374#[cfg(test)]
375mod tests {
376    use super::*;
377    use crate::core::{DataChunkHandle, LogicalTypeHandle, LogicalTypeId};
378    use std::ffi::CString;
379
380    #[test]
381    fn test_insert_string_values() {
382        let chunk = DataChunkHandle::new(&[LogicalTypeId::Varchar.into()]);
383        let vector = chunk.flat_vector(0);
384        chunk.set_len(3);
385
386        vector.insert(0, "first");
387        vector.insert(1, &String::from("second"));
388        let cstring = CString::new("third").unwrap();
389        vector.insert(2, cstring);
390    }
391
392    #[test]
393    fn test_insert_byte_values() {
394        let chunk = DataChunkHandle::new(&[LogicalTypeId::Blob.into()]);
395        let vector = chunk.flat_vector(0);
396        chunk.set_len(2);
397
398        vector.insert(0, b"hello world".as_slice());
399        vector.insert(
400            1,
401            &vec![0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64],
402        );
403    }
404
405    #[test]
406    fn test_list_vector_get_entry() {
407        let list_type = LogicalTypeHandle::list(&LogicalTypeId::Integer.into());
408        let chunk = DataChunkHandle::new(&[list_type]);
409        chunk.set_len(3);
410
411        let mut list_vector = chunk.list_vector(0);
412
413        list_vector.set_entry(0, 0, 2);
414        list_vector.set_entry(1, 2, 1);
415        list_vector.set_entry(2, 3, 2);
416
417        assert_eq!(list_vector.get_entry(0), (0, 2));
418        assert_eq!(list_vector.get_entry(1), (2, 1));
419        assert_eq!(list_vector.get_entry(2), (3, 2));
420    }
421}