parquet/geospatial/
bounding_box.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Bounding box for GEOMETRY or GEOGRAPHY type in the representation of min/max
19//! value pair of coordinates from each axis.
20//!
21//! Derived from the parquet format spec: <https://github.com/apache/parquet-format/blob/master/Geospatial.md>
22//!
23//!
24
25use crate::file::metadata::HeapSize;
26
27/// A geospatial instance has at least two coordinate dimensions: X and Y for 2D coordinates of each point.
28/// X represents longitude/easting and Y represents latitude/northing. A geospatial instance can optionally
29/// have Z and/or M values associated with each point.
30///
31/// The Z values introduce the third dimension coordinate, typically used to indicate height or elevation.
32///
33/// M values allow tracking a value in a fourth dimension. These can represent:
34/// - Linear reference values (e.g., highway milepost)
35/// - Timestamps
36/// - Other values defined by the CRS
37///
38/// The bounding box is defined as min/max value pairs of coordinates from each axis. X and Y values are
39/// always present, while Z and M are omitted for 2D geospatial instances.
40///
41/// When calculating a bounding box:
42/// - Null or NaN values in a coordinate dimension are skipped
43/// - If a dimension has only null/NaN values, that dimension is omitted
44/// - If either X or Y dimension is missing, no bounding box is produced
45/// - Example: POINT (1 NaN) contributes to X but not to Y, Z, or M dimensions
46///
47/// Special cases:
48/// - For X values only, xmin may exceed xmax. In this case, a point matches if x >= xmin OR x <= xmax
49/// - This wraparound can occur when the bounding box crosses the antimeridian line.
50/// - In geographic terms: xmin=westernmost, xmax=easternmost, ymin=southernmost, ymax=northernmost
51///
52/// For GEOGRAPHY types:
53/// - X values must be within [-180, 180] (longitude)
54/// - Y values must be within [-90, 90] (latitude)
55///
56/// Derived from the parquet format [spec][bounding-box-spec]
57///
58/// # Examples
59///
60/// ```
61/// use parquet::geospatial::bounding_box::BoundingBox;
62///
63/// // 2D bounding box
64/// let bbox_2d = BoundingBox::new(0.0, 0.0, 100.0, 100.0);
65///
66/// // 3D bounding box with elevation
67/// let bbox_3d = BoundingBox::new(0.0, 0.0, 100.0, 100.0)
68///     .with_zrange(0.0, 1000.0);
69///
70/// // 3D bounding box with elevation and measured value
71/// let bbox_3d_m = BoundingBox::new(0.0, 0.0, 100.0, 100.0)
72///     .with_zrange(0.0, 1000.0)
73///     .with_mrange(0.0, 1000.0);
74/// ```
75///
76/// [bounding-box-spec]: https://github.com/apache/parquet-format/blob/master/Geospatial.md#bounding-box
77#[derive(Clone, Debug, PartialEq)]
78pub struct BoundingBox {
79    /// X coordinates (longitude or easting): (min, max)
80    x_range: (f64, f64),
81    /// Y coordinates (latitude or northing): (min, max)
82    y_range: (f64, f64),
83    /// Z coordinates (elevation/height): (min, max), if present
84    z_range: Option<(f64, f64)>,
85    /// M coordinates (measured value): (min, max), if present
86    m_range: Option<(f64, f64)>,
87}
88
89impl BoundingBox {
90    /// Creates a new bounding box with the specified coordinates.
91    pub fn new(xmin: f64, xmax: f64, ymin: f64, ymax: f64) -> Self {
92        Self {
93            x_range: (xmin, xmax),
94            y_range: (ymin, ymax),
95            z_range: None,
96            m_range: None,
97        }
98    }
99
100    /// Updates the bounding box with specified X-coordinate range.
101    pub fn with_xrange(mut self, xmin: f64, xmax: f64) -> Self {
102        self.x_range = (xmin, xmax);
103        self
104    }
105
106    /// Updates the bounding box with specified Y-coordinate range.
107    pub fn with_yrange(mut self, ymin: f64, ymax: f64) -> Self {
108        self.y_range = (ymin, ymax);
109        self
110    }
111
112    /// Creates a new bounding box with the specified Z-coordinate range.
113    pub fn with_zrange(mut self, zmin: f64, zmax: f64) -> Self {
114        self.z_range = Some((zmin, zmax));
115        self
116    }
117
118    /// Creates a new bounding box with the specified M-coordinate range.
119    pub fn with_mrange(mut self, mmin: f64, mmax: f64) -> Self {
120        self.m_range = Some((mmin, mmax));
121        self
122    }
123
124    /// Returns the minimum x-coordinate.
125    pub fn get_xmin(&self) -> f64 {
126        self.x_range.0
127    }
128
129    /// Returns the maximum x-coordinate.
130    pub fn get_xmax(&self) -> f64 {
131        self.x_range.1
132    }
133
134    /// Returns the minimum y-coordinate.
135    pub fn get_ymin(&self) -> f64 {
136        self.y_range.0
137    }
138
139    /// Returns the maximum y-coordinate.
140    pub fn get_ymax(&self) -> f64 {
141        self.y_range.1
142    }
143
144    /// Returns the minimum z-coordinate, if present.
145    pub fn get_zmin(&self) -> Option<f64> {
146        self.z_range.map(|z| z.0)
147    }
148
149    /// Returns the maximum z-coordinate, if present.
150    pub fn get_zmax(&self) -> Option<f64> {
151        self.z_range.map(|z| z.1)
152    }
153
154    /// Returns the minimum m-value (measure), if present.
155    pub fn get_mmin(&self) -> Option<f64> {
156        self.m_range.map(|m| m.0)
157    }
158
159    /// Returns the maximum m-value (measure), if present.
160    pub fn get_mmax(&self) -> Option<f64> {
161        self.m_range.map(|m| m.1)
162    }
163
164    /// Returns `true` if both zmin and zmax are present.
165    pub fn is_z_valid(&self) -> bool {
166        self.z_range.is_some()
167    }
168
169    /// Returns `true` if both mmin and mmax are present.
170    pub fn is_m_valid(&self) -> bool {
171        self.m_range.is_some()
172    }
173}
174
175impl HeapSize for BoundingBox {
176    fn heap_size(&self) -> usize {
177        0 // no heap allocations
178    }
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184
185    #[test]
186    fn test_bounding_box() {
187        let bbox = BoundingBox::new(0.0, 0.0, 10.0, 10.0);
188        assert_eq!(bbox.get_xmin(), 0.0);
189        assert_eq!(bbox.get_xmax(), 0.0);
190        assert_eq!(bbox.get_ymin(), 10.0);
191        assert_eq!(bbox.get_ymax(), 10.0);
192        assert_eq!(bbox.get_zmin(), None);
193        assert_eq!(bbox.get_zmax(), None);
194        assert_eq!(bbox.get_mmin(), None);
195        assert_eq!(bbox.get_mmax(), None);
196        assert!(!bbox.is_z_valid());
197        assert!(!bbox.is_m_valid());
198
199        // test with zrange
200        let bbox_z = BoundingBox::new(0.0, 0.0, 10.0, 10.0).with_zrange(5.0, 15.0);
201        assert_eq!(bbox_z.get_zmin(), Some(5.0));
202        assert_eq!(bbox_z.get_zmax(), Some(15.0));
203        assert!(bbox_z.is_z_valid());
204        assert!(!bbox_z.is_m_valid());
205
206        // test with mrange
207        let bbox_m = BoundingBox::new(0.0, 0.0, 10.0, 10.0).with_mrange(10.0, 20.0);
208        assert_eq!(bbox_m.get_mmin(), Some(10.0));
209        assert_eq!(bbox_m.get_mmax(), Some(20.0));
210        assert!(!bbox_m.is_z_valid());
211        assert!(bbox_m.is_m_valid());
212
213        // test with zrange and mrange
214        let bbox_zm = BoundingBox::new(0.0, 0.0, 10.0, 10.0)
215            .with_zrange(5.0, 15.0)
216            .with_mrange(10.0, 20.0);
217        assert_eq!(bbox_zm.get_zmin(), Some(5.0));
218        assert_eq!(bbox_zm.get_zmax(), Some(15.0));
219        assert_eq!(bbox_zm.get_mmin(), Some(10.0));
220        assert_eq!(bbox_zm.get_mmax(), Some(20.0));
221        assert!(bbox_zm.is_z_valid());
222        assert!(bbox_zm.is_m_valid());
223    }
224}