iceberg/
table.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Table API for Apache Iceberg
19
20use std::sync::Arc;
21
22use crate::arrow::ArrowReaderBuilder;
23use crate::inspect::MetadataTable;
24use crate::io::FileIO;
25use crate::io::object_cache::ObjectCache;
26use crate::scan::TableScanBuilder;
27use crate::spec::{TableMetadata, TableMetadataRef};
28use crate::{Error, ErrorKind, Result, TableIdent};
29
30/// Builder to create table scan.
31pub struct TableBuilder {
32    file_io: Option<FileIO>,
33    metadata_location: Option<String>,
34    metadata: Option<TableMetadataRef>,
35    identifier: Option<TableIdent>,
36    readonly: bool,
37    disable_cache: bool,
38    cache_size_bytes: Option<u64>,
39}
40
41impl TableBuilder {
42    pub(crate) fn new() -> Self {
43        Self {
44            file_io: None,
45            metadata_location: None,
46            metadata: None,
47            identifier: None,
48            readonly: false,
49            disable_cache: false,
50            cache_size_bytes: None,
51        }
52    }
53
54    /// required - sets the necessary FileIO to use for the table
55    pub fn file_io(mut self, file_io: FileIO) -> Self {
56        self.file_io = Some(file_io);
57        self
58    }
59
60    /// optional - sets the tables metadata location
61    pub fn metadata_location<T: Into<String>>(mut self, metadata_location: T) -> Self {
62        self.metadata_location = Some(metadata_location.into());
63        self
64    }
65
66    /// required - passes in the TableMetadata to use for the Table
67    pub fn metadata<T: Into<TableMetadataRef>>(mut self, metadata: T) -> Self {
68        self.metadata = Some(metadata.into());
69        self
70    }
71
72    /// required - passes in the TableIdent to use for the Table
73    pub fn identifier(mut self, identifier: TableIdent) -> Self {
74        self.identifier = Some(identifier);
75        self
76    }
77
78    /// specifies if the Table is readonly or not (default not)
79    pub fn readonly(mut self, readonly: bool) -> Self {
80        self.readonly = readonly;
81        self
82    }
83
84    /// specifies if the Table's metadata cache will be disabled,
85    /// so that reads of Manifests and ManifestLists will never
86    /// get cached.
87    pub fn disable_cache(mut self) -> Self {
88        self.disable_cache = true;
89        self
90    }
91
92    /// optionally set a non-default metadata cache size
93    pub fn cache_size_bytes(mut self, cache_size_bytes: u64) -> Self {
94        self.cache_size_bytes = Some(cache_size_bytes);
95        self
96    }
97
98    /// build the Table
99    pub fn build(self) -> Result<Table> {
100        let Self {
101            file_io,
102            metadata_location,
103            metadata,
104            identifier,
105            readonly,
106            disable_cache,
107            cache_size_bytes,
108        } = self;
109
110        let Some(file_io) = file_io else {
111            return Err(Error::new(
112                ErrorKind::DataInvalid,
113                "FileIO must be provided with TableBuilder.file_io()",
114            ));
115        };
116
117        let Some(metadata) = metadata else {
118            return Err(Error::new(
119                ErrorKind::DataInvalid,
120                "TableMetadataRef must be provided with TableBuilder.metadata()",
121            ));
122        };
123
124        let Some(identifier) = identifier else {
125            return Err(Error::new(
126                ErrorKind::DataInvalid,
127                "TableIdent must be provided with TableBuilder.identifier()",
128            ));
129        };
130
131        let object_cache = if disable_cache {
132            Arc::new(ObjectCache::with_disabled_cache(file_io.clone()))
133        } else if let Some(cache_size_bytes) = cache_size_bytes {
134            Arc::new(ObjectCache::new_with_capacity(
135                file_io.clone(),
136                cache_size_bytes,
137            ))
138        } else {
139            Arc::new(ObjectCache::new(file_io.clone()))
140        };
141
142        Ok(Table {
143            file_io,
144            metadata_location,
145            metadata,
146            identifier,
147            readonly,
148            object_cache,
149        })
150    }
151}
152
153/// Table represents a table in the catalog.
154#[derive(Debug, Clone)]
155pub struct Table {
156    file_io: FileIO,
157    metadata_location: Option<String>,
158    metadata: TableMetadataRef,
159    identifier: TableIdent,
160    readonly: bool,
161    object_cache: Arc<ObjectCache>,
162}
163
164impl Table {
165    /// Sets the [`Table`] metadata and returns an updated instance with the new metadata applied.
166    pub(crate) fn with_metadata(mut self, metadata: TableMetadataRef) -> Self {
167        self.metadata = metadata;
168        self
169    }
170
171    /// Sets the [`Table`] metadata location and returns an updated instance.
172    pub(crate) fn with_metadata_location(mut self, metadata_location: String) -> Self {
173        self.metadata_location = Some(metadata_location);
174        self
175    }
176
177    /// Returns a TableBuilder to build a table
178    pub fn builder() -> TableBuilder {
179        TableBuilder::new()
180    }
181
182    /// Returns table identifier.
183    pub fn identifier(&self) -> &TableIdent {
184        &self.identifier
185    }
186    /// Returns current metadata.
187    pub fn metadata(&self) -> &TableMetadata {
188        &self.metadata
189    }
190
191    /// Returns current metadata ref.
192    pub fn metadata_ref(&self) -> TableMetadataRef {
193        self.metadata.clone()
194    }
195
196    /// Returns current metadata location.
197    pub fn metadata_location(&self) -> Option<&str> {
198        self.metadata_location.as_deref()
199    }
200
201    /// Returns current metadata location in a result.
202    pub fn metadata_location_result(&self) -> Result<&str> {
203        self.metadata_location.as_deref().ok_or(Error::new(
204            ErrorKind::DataInvalid,
205            format!(
206                "Metadata location does not exist for table: {}",
207                self.identifier
208            ),
209        ))
210    }
211
212    /// Returns file io used in this table.
213    pub fn file_io(&self) -> &FileIO {
214        &self.file_io
215    }
216
217    /// Returns this table's object cache
218    pub(crate) fn object_cache(&self) -> Arc<ObjectCache> {
219        self.object_cache.clone()
220    }
221
222    /// Creates a table scan.
223    pub fn scan(&self) -> TableScanBuilder<'_> {
224        TableScanBuilder::new(self)
225    }
226
227    /// Creates a metadata table which provides table-like APIs for inspecting metadata.
228    /// See [`MetadataTable`] for more details.
229    pub fn inspect(&self) -> MetadataTable<'_> {
230        MetadataTable::new(self)
231    }
232
233    /// Returns the flag indicating whether the `Table` is readonly or not
234    pub fn readonly(&self) -> bool {
235        self.readonly
236    }
237
238    /// Create a reader for the table.
239    pub fn reader_builder(&self) -> ArrowReaderBuilder {
240        ArrowReaderBuilder::new(self.file_io.clone())
241    }
242}
243
244/// `StaticTable` is a read-only table struct that can be created from a metadata file or from `TableMetaData` without a catalog.
245/// It can only be used to read metadata and for table scan.
246/// # Examples
247///
248/// ```rust, no_run
249/// # use iceberg::io::FileIO;
250/// # use iceberg::table::StaticTable;
251/// # use iceberg::TableIdent;
252/// # async fn example() {
253/// let metadata_file_location = "s3://bucket_name/path/to/metadata.json";
254/// let file_io = FileIO::from_path(&metadata_file_location)
255///     .unwrap()
256///     .build()
257///     .unwrap();
258/// let static_identifier = TableIdent::from_strs(["static_ns", "static_table"]).unwrap();
259/// let static_table =
260///     StaticTable::from_metadata_file(&metadata_file_location, static_identifier, file_io)
261///         .await
262///         .unwrap();
263/// let snapshot_id = static_table
264///     .metadata()
265///     .current_snapshot()
266///     .unwrap()
267///     .snapshot_id();
268/// # }
269/// ```
270#[derive(Debug, Clone)]
271pub struct StaticTable(Table);
272
273impl StaticTable {
274    /// Creates a static table from a given `TableMetadata` and `FileIO`
275    pub async fn from_metadata(
276        metadata: TableMetadata,
277        table_ident: TableIdent,
278        file_io: FileIO,
279    ) -> Result<Self> {
280        let table = Table::builder()
281            .metadata(metadata)
282            .identifier(table_ident)
283            .file_io(file_io.clone())
284            .readonly(true)
285            .build();
286
287        Ok(Self(table?))
288    }
289    /// Creates a static table directly from metadata file and `FileIO`
290    pub async fn from_metadata_file(
291        metadata_location: &str,
292        table_ident: TableIdent,
293        file_io: FileIO,
294    ) -> Result<Self> {
295        let metadata_file = file_io.new_input(metadata_location)?;
296        let metadata_file_content = metadata_file.read().await?;
297        let metadata = serde_json::from_slice::<TableMetadata>(&metadata_file_content)?;
298
299        let table = Table::builder()
300            .metadata(metadata)
301            .metadata_location(metadata_location)
302            .identifier(table_ident)
303            .file_io(file_io.clone())
304            .readonly(true)
305            .build();
306
307        Ok(Self(table?))
308    }
309
310    /// Create a TableScanBuilder for the static table.
311    pub fn scan(&self) -> TableScanBuilder<'_> {
312        self.0.scan()
313    }
314
315    /// Get TableMetadataRef for the static table
316    pub fn metadata(&self) -> TableMetadataRef {
317        self.0.metadata_ref()
318    }
319
320    /// Consumes the `StaticTable` and return it as a `Table`
321    /// Please use this method carefully as the Table it returns remains detached from a catalog
322    /// and can't be used to perform modifications on the table.
323    pub fn into_table(self) -> Table {
324        self.0
325    }
326
327    /// Create a reader for the table.
328    pub fn reader_builder(&self) -> ArrowReaderBuilder {
329        ArrowReaderBuilder::new(self.0.file_io.clone())
330    }
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336
337    #[tokio::test]
338    async fn test_static_table_from_file() {
339        let metadata_file_name = "TableMetadataV2Valid.json";
340        let metadata_file_path = format!(
341            "{}/testdata/table_metadata/{}",
342            env!("CARGO_MANIFEST_DIR"),
343            metadata_file_name
344        );
345        let file_io = FileIO::from_path(&metadata_file_path)
346            .unwrap()
347            .build()
348            .unwrap();
349        let static_identifier = TableIdent::from_strs(["static_ns", "static_table"]).unwrap();
350        let static_table =
351            StaticTable::from_metadata_file(&metadata_file_path, static_identifier, file_io)
352                .await
353                .unwrap();
354        let snapshot_id = static_table
355            .metadata()
356            .current_snapshot()
357            .unwrap()
358            .snapshot_id();
359        assert_eq!(
360            snapshot_id, 3055729675574597004,
361            "snapshot id from metadata don't match"
362        );
363    }
364
365    #[tokio::test]
366    async fn test_static_into_table() {
367        let metadata_file_name = "TableMetadataV2Valid.json";
368        let metadata_file_path = format!(
369            "{}/testdata/table_metadata/{}",
370            env!("CARGO_MANIFEST_DIR"),
371            metadata_file_name
372        );
373        let file_io = FileIO::from_path(&metadata_file_path)
374            .unwrap()
375            .build()
376            .unwrap();
377        let static_identifier = TableIdent::from_strs(["static_ns", "static_table"]).unwrap();
378        let static_table =
379            StaticTable::from_metadata_file(&metadata_file_path, static_identifier, file_io)
380                .await
381                .unwrap();
382        let table = static_table.into_table();
383        assert!(table.readonly());
384        assert_eq!(table.identifier.name(), "static_table");
385        assert_eq!(
386            table.metadata_location(),
387            Some(metadata_file_path).as_deref()
388        );
389    }
390
391    #[tokio::test]
392    async fn test_table_readonly_flag() {
393        let metadata_file_name = "TableMetadataV2Valid.json";
394        let metadata_file_path = format!(
395            "{}/testdata/table_metadata/{}",
396            env!("CARGO_MANIFEST_DIR"),
397            metadata_file_name
398        );
399        let file_io = FileIO::from_path(&metadata_file_path)
400            .unwrap()
401            .build()
402            .unwrap();
403        let metadata_file = file_io.new_input(metadata_file_path).unwrap();
404        let metadata_file_content = metadata_file.read().await.unwrap();
405        let table_metadata =
406            serde_json::from_slice::<TableMetadata>(&metadata_file_content).unwrap();
407        let static_identifier = TableIdent::from_strs(["ns", "table"]).unwrap();
408        let table = Table::builder()
409            .metadata(table_metadata)
410            .identifier(static_identifier)
411            .file_io(file_io.clone())
412            .build()
413            .unwrap();
414        assert!(!table.readonly());
415        assert_eq!(table.identifier.name(), "table");
416    }
417}