parquet/geospatial/bounding_box.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Bounding box for GEOMETRY or GEOGRAPHY type in the representation of min/max
19//! value pair of coordinates from each axis.
20//!
21//! Derived from the parquet format spec: <https://github.com/apache/parquet-format/blob/master/Geospatial.md>
22//!
23//!
24
25use crate::file::metadata::HeapSize;
26
27/// A geospatial instance has at least two coordinate dimensions: X and Y for 2D coordinates of each point.
28/// X represents longitude/easting and Y represents latitude/northing. A geospatial instance can optionally
29/// have Z and/or M values associated with each point.
30///
31/// The Z values introduce the third dimension coordinate, typically used to indicate height or elevation.
32///
33/// M values allow tracking a value in a fourth dimension. These can represent:
34/// - Linear reference values (e.g., highway milepost)
35/// - Timestamps
36/// - Other values defined by the CRS
37///
38/// The bounding box is defined as min/max value pairs of coordinates from each axis. X and Y values are
39/// always present, while Z and M are omitted for 2D geospatial instances.
40///
41/// When calculating a bounding box:
42/// - Null or NaN values in a coordinate dimension are skipped
43/// - If a dimension has only null/NaN values, that dimension is omitted
44/// - If either X or Y dimension is missing, no bounding box is produced
45/// - Example: POINT (1 NaN) contributes to X but not to Y, Z, or M dimensions
46///
47/// Special cases:
48/// - For X values only, xmin may exceed xmax. In this case, a point matches if x >= xmin OR x <= xmax
49/// - This wraparound can occur when the bounding box crosses the antimeridian line.
50/// - In geographic terms: xmin=westernmost, xmax=easternmost, ymin=southernmost, ymax=northernmost
51///
52/// For GEOGRAPHY types:
53/// - X values must be within [-180, 180] (longitude)
54/// - Y values must be within [-90, 90] (latitude)
55///
56/// Derived from the parquet format [spec][bounding-box-spec]
57///
58/// # Examples
59///
60/// ```
61/// use parquet::geospatial::bounding_box::BoundingBox;
62///
63/// // 2D bounding box
64/// let bbox_2d = BoundingBox::new(0.0, 0.0, 100.0, 100.0);
65///
66/// // 3D bounding box with elevation
67/// let bbox_3d = BoundingBox::new(0.0, 0.0, 100.0, 100.0)
68/// .with_zrange(0.0, 1000.0);
69///
70/// // 3D bounding box with elevation and measured value
71/// let bbox_3d_m = BoundingBox::new(0.0, 0.0, 100.0, 100.0)
72/// .with_zrange(0.0, 1000.0)
73/// .with_mrange(0.0, 1000.0);
74/// ```
75///
76/// [bounding-box-spec]: https://github.com/apache/parquet-format/blob/master/Geospatial.md#bounding-box
77#[derive(Clone, Debug, PartialEq)]
78pub struct BoundingBox {
79 /// X coordinates (longitude or easting): (min, max)
80 x_range: (f64, f64),
81 /// Y coordinates (latitude or northing): (min, max)
82 y_range: (f64, f64),
83 /// Z coordinates (elevation/height): (min, max), if present
84 z_range: Option<(f64, f64)>,
85 /// M coordinates (measured value): (min, max), if present
86 m_range: Option<(f64, f64)>,
87}
88
89impl BoundingBox {
90 /// Creates a new bounding box with the specified coordinates.
91 pub fn new(xmin: f64, xmax: f64, ymin: f64, ymax: f64) -> Self {
92 Self {
93 x_range: (xmin, xmax),
94 y_range: (ymin, ymax),
95 z_range: None,
96 m_range: None,
97 }
98 }
99
100 /// Updates the bounding box with specified X-coordinate range.
101 pub fn with_xrange(mut self, xmin: f64, xmax: f64) -> Self {
102 self.x_range = (xmin, xmax);
103 self
104 }
105
106 /// Updates the bounding box with specified Y-coordinate range.
107 pub fn with_yrange(mut self, ymin: f64, ymax: f64) -> Self {
108 self.y_range = (ymin, ymax);
109 self
110 }
111
112 /// Creates a new bounding box with the specified Z-coordinate range.
113 pub fn with_zrange(mut self, zmin: f64, zmax: f64) -> Self {
114 self.z_range = Some((zmin, zmax));
115 self
116 }
117
118 /// Creates a new bounding box with the specified M-coordinate range.
119 pub fn with_mrange(mut self, mmin: f64, mmax: f64) -> Self {
120 self.m_range = Some((mmin, mmax));
121 self
122 }
123
124 /// Returns the minimum x-coordinate.
125 pub fn get_xmin(&self) -> f64 {
126 self.x_range.0
127 }
128
129 /// Returns the maximum x-coordinate.
130 pub fn get_xmax(&self) -> f64 {
131 self.x_range.1
132 }
133
134 /// Returns the minimum y-coordinate.
135 pub fn get_ymin(&self) -> f64 {
136 self.y_range.0
137 }
138
139 /// Returns the maximum y-coordinate.
140 pub fn get_ymax(&self) -> f64 {
141 self.y_range.1
142 }
143
144 /// Returns the minimum z-coordinate, if present.
145 pub fn get_zmin(&self) -> Option<f64> {
146 self.z_range.map(|z| z.0)
147 }
148
149 /// Returns the maximum z-coordinate, if present.
150 pub fn get_zmax(&self) -> Option<f64> {
151 self.z_range.map(|z| z.1)
152 }
153
154 /// Returns the minimum m-value (measure), if present.
155 pub fn get_mmin(&self) -> Option<f64> {
156 self.m_range.map(|m| m.0)
157 }
158
159 /// Returns the maximum m-value (measure), if present.
160 pub fn get_mmax(&self) -> Option<f64> {
161 self.m_range.map(|m| m.1)
162 }
163
164 /// Returns `true` if both zmin and zmax are present.
165 pub fn is_z_valid(&self) -> bool {
166 self.z_range.is_some()
167 }
168
169 /// Returns `true` if both mmin and mmax are present.
170 pub fn is_m_valid(&self) -> bool {
171 self.m_range.is_some()
172 }
173}
174
175impl HeapSize for BoundingBox {
176 fn heap_size(&self) -> usize {
177 0 // no heap allocations
178 }
179}
180
181#[cfg(test)]
182mod tests {
183 use super::*;
184
185 #[test]
186 fn test_bounding_box() {
187 let bbox = BoundingBox::new(0.0, 0.0, 10.0, 10.0);
188 assert_eq!(bbox.get_xmin(), 0.0);
189 assert_eq!(bbox.get_xmax(), 0.0);
190 assert_eq!(bbox.get_ymin(), 10.0);
191 assert_eq!(bbox.get_ymax(), 10.0);
192 assert_eq!(bbox.get_zmin(), None);
193 assert_eq!(bbox.get_zmax(), None);
194 assert_eq!(bbox.get_mmin(), None);
195 assert_eq!(bbox.get_mmax(), None);
196 assert!(!bbox.is_z_valid());
197 assert!(!bbox.is_m_valid());
198
199 // test with zrange
200 let bbox_z = BoundingBox::new(0.0, 0.0, 10.0, 10.0).with_zrange(5.0, 15.0);
201 assert_eq!(bbox_z.get_zmin(), Some(5.0));
202 assert_eq!(bbox_z.get_zmax(), Some(15.0));
203 assert!(bbox_z.is_z_valid());
204 assert!(!bbox_z.is_m_valid());
205
206 // test with mrange
207 let bbox_m = BoundingBox::new(0.0, 0.0, 10.0, 10.0).with_mrange(10.0, 20.0);
208 assert_eq!(bbox_m.get_mmin(), Some(10.0));
209 assert_eq!(bbox_m.get_mmax(), Some(20.0));
210 assert!(!bbox_m.is_z_valid());
211 assert!(bbox_m.is_m_valid());
212
213 // test with zrange and mrange
214 let bbox_zm = BoundingBox::new(0.0, 0.0, 10.0, 10.0)
215 .with_zrange(5.0, 15.0)
216 .with_mrange(10.0, 20.0);
217 assert_eq!(bbox_zm.get_zmin(), Some(5.0));
218 assert_eq!(bbox_zm.get_zmax(), Some(15.0));
219 assert_eq!(bbox_zm.get_mmin(), Some(10.0));
220 assert_eq!(bbox_zm.get_mmax(), Some(20.0));
221 assert!(bbox_zm.is_z_valid());
222 assert!(bbox_zm.is_m_valid());
223 }
224}