iceberg/transform/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Transform function used to compute partition values.
19
20use arrow_array::ArrayRef;
21
22use crate::spec::{Datum, Transform};
23use crate::{Error, ErrorKind, Result};
24
25mod bucket;
26mod identity;
27mod temporal;
28mod truncate;
29mod void;
30
31/// TransformFunction is a trait that defines the interface for all transform functions.
32pub trait TransformFunction: Send + Sync {
33    /// transform will take an input array and transform it into a new array.
34    /// The implementation of this function will need to check and downcast the input to specific
35    /// type.
36    fn transform(&self, input: ArrayRef) -> Result<ArrayRef>;
37    /// transform_literal will take an input literal and transform it into a new literal.
38    fn transform_literal(&self, input: &Datum) -> Result<Option<Datum>>;
39    /// A thin wrapper around `transform_literal`
40    /// to return an error even when it's `None`.
41    fn transform_literal_result(&self, input: &Datum) -> Result<Datum> {
42        self.transform_literal(input)?.ok_or_else(|| {
43            Error::new(
44                ErrorKind::Unexpected,
45                format!("Returns 'None' for literal {}", input),
46            )
47        })
48    }
49}
50
51/// BoxedTransformFunction is a boxed trait object of TransformFunction.
52pub type BoxedTransformFunction = Box<dyn TransformFunction>;
53
54/// create_transform_function creates a boxed trait object of TransformFunction from a Transform.
55pub fn create_transform_function(transform: &Transform) -> Result<BoxedTransformFunction> {
56    match transform {
57        Transform::Identity => Ok(Box::new(identity::Identity {})),
58        Transform::Void => Ok(Box::new(void::Void {})),
59        Transform::Year => Ok(Box::new(temporal::Year {})),
60        Transform::Month => Ok(Box::new(temporal::Month {})),
61        Transform::Day => Ok(Box::new(temporal::Day {})),
62        Transform::Hour => Ok(Box::new(temporal::Hour {})),
63        Transform::Bucket(mod_n) => Ok(Box::new(bucket::Bucket::new(*mod_n))),
64        Transform::Truncate(width) => Ok(Box::new(truncate::Truncate::new(*width))),
65        Transform::Unknown => Err(crate::error::Error::new(
66            crate::ErrorKind::FeatureUnsupported,
67            "Transform Unknown is not implemented",
68        )),
69    }
70}
71
72#[cfg(test)]
73mod test {
74    use std::collections::HashSet;
75    use std::sync::Arc;
76
77    use crate::Result;
78    use crate::expr::accessor::StructAccessor;
79    use crate::expr::{
80        BinaryExpression, BoundPredicate, BoundReference, PredicateOperator, SetExpression,
81    };
82    use crate::spec::{Datum, NestedField, NestedFieldRef, PrimitiveType, Transform, Type};
83
84    /// A utitily struct, test fixture
85    /// used for testing the projection on `Transform`
86    pub(crate) struct TestProjectionFixture {
87        transform: Transform,
88        name: String,
89        field: NestedFieldRef,
90    }
91
92    impl TestProjectionFixture {
93        pub(crate) fn new(
94            transform: Transform,
95            name: impl Into<String>,
96            field: NestedField,
97        ) -> Self {
98            TestProjectionFixture {
99                transform,
100                name: name.into(),
101                field: Arc::new(field),
102            }
103        }
104        pub(crate) fn binary_predicate(
105            &self,
106            op: PredicateOperator,
107            literal: Datum,
108        ) -> BoundPredicate {
109            BoundPredicate::Binary(BinaryExpression::new(
110                op,
111                BoundReference::new(
112                    self.name.clone(),
113                    self.field.clone(),
114                    Arc::new(StructAccessor::new(1, PrimitiveType::Boolean)),
115                ),
116                literal,
117            ))
118        }
119        pub(crate) fn set_predicate(
120            &self,
121            op: PredicateOperator,
122            literals: Vec<Datum>,
123        ) -> BoundPredicate {
124            BoundPredicate::Set(SetExpression::new(
125                op,
126                BoundReference::new(
127                    self.name.clone(),
128                    self.field.clone(),
129                    Arc::new(StructAccessor::new(1, PrimitiveType::Boolean)),
130                ),
131                HashSet::from_iter(literals),
132            ))
133        }
134        pub(crate) fn assert_projection(
135            &self,
136            predicate: &BoundPredicate,
137            expected: Option<&str>,
138        ) -> Result<()> {
139            let result = self.transform.project(&self.name, predicate)?;
140            match expected {
141                Some(exp) => assert_eq!(format!("{}", result.unwrap()), exp),
142                None => assert!(result.is_none()),
143            }
144            Ok(())
145        }
146    }
147
148    /// A utility struct, test fixture
149    /// used for testing the transform on `Transform`
150    pub(crate) struct TestTransformFixture {
151        pub display: String,
152        pub json: String,
153        pub dedup_name: String,
154        pub preserves_order: bool,
155        pub satisfies_order_of: Vec<(Transform, bool)>,
156        pub trans_types: Vec<(Type, Option<Type>)>,
157    }
158
159    impl TestTransformFixture {
160        #[track_caller]
161        pub(crate) fn assert_transform(&self, trans: Transform) {
162            assert_eq!(self.display, format!("{trans}"));
163            assert_eq!(self.json, serde_json::to_string(&trans).unwrap());
164            assert_eq!(trans, serde_json::from_str(self.json.as_str()).unwrap());
165            assert_eq!(self.dedup_name, trans.dedup_name());
166            assert_eq!(self.preserves_order, trans.preserves_order());
167
168            for (other_trans, satisfies_order_of) in &self.satisfies_order_of {
169                assert_eq!(
170                    satisfies_order_of,
171                    &trans.satisfies_order_of(other_trans),
172                    "Failed to check satisfies order {}, {}, {}",
173                    trans,
174                    other_trans,
175                    satisfies_order_of
176                );
177            }
178
179            for (i, (input_type, result_type)) in self.trans_types.iter().enumerate() {
180                let actual = trans.result_type(input_type).ok();
181                assert_eq!(
182                    result_type, &actual,
183                    "type mismatch at index {}, input: {}, expected: {:?}, actual: {:?}",
184                    i, input_type, result_type, actual
185                );
186            }
187        }
188    }
189}