Struct ArrowWriter

Source

pub struct ArrowWriter<W: Write> { /* private fields */ }

Expand description

Encodes RecordBatch to parquet

Writes Arrow RecordBatches to a Parquet writer. Multiple RecordBatch will be encoded to the same row group, up to max_row_group_size rows. Any remaining rows will be flushed on close, leading the final row group in the output file to potentially contain fewer than max_row_group_size rows

§Example: Writing `RecordBatch`es

let col = Arc::new(Int64Array::from_iter_values([1, 2, 3])) as ArrayRef;
let to_write = RecordBatch::try_from_iter([("col", col)]).unwrap();

let mut buffer = Vec::new();
let mut writer = ArrowWriter::try_new(&mut buffer, to_write.schema(), None).unwrap();
writer.write(&to_write).unwrap();
writer.close().unwrap();

let mut reader = ParquetRecordBatchReader::try_new(Bytes::from(buffer), 1024).unwrap();
let read = reader.next().unwrap().unwrap();

assert_eq!(to_write, read);

§Memory Usage and Limiting

The nature of Parquet requires buffering of an entire row group before it can be flushed to the underlying writer. Data is mostly buffered in its encoded form, reducing memory usage. However, some data such as dictionary keys, large strings or very nested data may still result in non-trivial memory usage.

§Type Support

The writer supports writing all Arrow DataTypes that have a direct mapping to Parquet types including StructArray and ListArray.

The following are not supported:

IntervalMonthDayNanoArray: Parquet does not support nanosecond intervals.

Struct ArrowWriterCopy item path

§Example: Writing RecordBatches

§Memory Usage and Limiting

§Type Support

Implementations§

impl<W: Write + Send> ArrowWriter<W>

pub fn try_new( writer: W, arrow_schema: SchemaRef, props: Option<WriterProperties>, ) -> Result<Self>

pub fn try_new_with_options( writer: W, arrow_schema: SchemaRef, options: ArrowWriterOptions, ) -> Result<Self>

pub fn flushed_row_groups(&self) -> &[RowGroupMetaData]

pub fn memory_size(&self) -> usize

pub fn in_progress_size(&self) -> usize

pub fn in_progress_rows(&self) -> usize

pub fn bytes_written(&self) -> usize

pub fn write(&mut self, batch: &RecordBatch) -> Result<()>

pub fn write_all(&mut self, buf: &[u8]) -> Result<()>

pub fn flush(&mut self) -> Result<()>

pub fn append_key_value_metadata(&mut self, kv_metadata: KeyValue)

pub fn inner(&self) -> &W

pub fn inner_mut(&mut self) -> &mut W

pub fn into_inner(self) -> Result<W>

pub fn finish(&mut self) -> Result<FileMetaData>

pub fn close(self) -> Result<FileMetaData>

Trait Implementations§

impl<W: Write + Send> Debug for ArrowWriter<W>

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl<W: Write + Send> RecordBatchWriter for ArrowWriter<W>

fn write(&mut self, batch: &RecordBatch) -> Result<(), ArrowError>

fn close(self) -> Result<(), ArrowError>

Auto Trait Implementations§

impl<W> Freeze for ArrowWriter<W>where W: Freeze,

impl<W> !RefUnwindSafe for ArrowWriter<W>

impl<W> Send for ArrowWriter<W>where W: Send,

impl<W> !Sync for ArrowWriter<W>

impl<W> Unpin for ArrowWriter<W>where W: Unpin,

impl<W> !UnwindSafe for ArrowWriter<W>

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

Struct ArrowWriter

§Example: Writing `RecordBatch`es

impl<W> Freeze for ArrowWriter<W>
where W: Freeze,

impl<W> Send for ArrowWriter<W>
where W: Send,

impl<W> Unpin for ArrowWriter<W>
where W: Unpin,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,