use std::str::FromStr;
use std::{fmt, str};
pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel};
use crate::format as parquet;
use crate::errors::{ParquetError, Result};
pub use crate::format::{
BsonType, DateType, DecimalType, EnumType, IntType, JsonType, ListType, MapType, NullType,
StringType, TimeType, TimeUnit, TimestampType, UUIDType,
};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[allow(non_camel_case_types)]
pub enum Type {
BOOLEAN,
INT32,
INT64,
INT96,
FLOAT,
DOUBLE,
BYTE_ARRAY,
FIXED_LEN_BYTE_ARRAY,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[allow(non_camel_case_types)]
pub enum ConvertedType {
NONE,
UTF8,
MAP,
MAP_KEY_VALUE,
LIST,
ENUM,
DECIMAL,
DATE,
TIME_MILLIS,
TIME_MICROS,
TIMESTAMP_MILLIS,
TIMESTAMP_MICROS,
UINT_8,
UINT_16,
UINT_32,
UINT_64,
INT_8,
INT_16,
INT_32,
INT_64,
JSON,
BSON,
INTERVAL,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LogicalType {
String,
Map,
List,
Enum,
Decimal {
scale: i32,
precision: i32,
},
Date,
Time {
is_adjusted_to_u_t_c: bool,
unit: TimeUnit,
},
Timestamp {
is_adjusted_to_u_t_c: bool,
unit: TimeUnit,
},
Integer {
bit_width: i8,
is_signed: bool,
},
Unknown,
Json,
Bson,
Uuid,
Float16,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[allow(non_camel_case_types)]
pub enum Repetition {
REQUIRED,
OPTIONAL,
REPEATED,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
#[allow(non_camel_case_types)]
pub enum Encoding {
PLAIN,
PLAIN_DICTIONARY,
RLE,
#[deprecated(
note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
)]
BIT_PACKED,
DELTA_BINARY_PACKED,
DELTA_LENGTH_BYTE_ARRAY,
DELTA_BYTE_ARRAY,
RLE_DICTIONARY,
BYTE_STREAM_SPLIT,
}
impl FromStr for Encoding {
type Err = ParquetError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"PLAIN" | "plain" => Ok(Encoding::PLAIN),
"PLAIN_DICTIONARY" | "plain_dictionary" => Ok(Encoding::PLAIN_DICTIONARY),
"RLE" | "rle" => Ok(Encoding::RLE),
#[allow(deprecated)]
"BIT_PACKED" | "bit_packed" => Ok(Encoding::BIT_PACKED),
"DELTA_BINARY_PACKED" | "delta_binary_packed" => Ok(Encoding::DELTA_BINARY_PACKED),
"DELTA_LENGTH_BYTE_ARRAY" | "delta_length_byte_array" => {
Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY)
}
"DELTA_BYTE_ARRAY" | "delta_byte_array" => Ok(Encoding::DELTA_BYTE_ARRAY),
"RLE_DICTIONARY" | "rle_dictionary" => Ok(Encoding::RLE_DICTIONARY),
"BYTE_STREAM_SPLIT" | "byte_stream_split" => Ok(Encoding::BYTE_STREAM_SPLIT),
_ => Err(general_err!("unknown encoding: {}", s)),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[allow(non_camel_case_types)]
pub enum Compression {
UNCOMPRESSED,
SNAPPY,
GZIP(GzipLevel),
LZO,
BROTLI(BrotliLevel),
LZ4,
ZSTD(ZstdLevel),
LZ4_RAW,
}
fn split_compression_string(str_setting: &str) -> Result<(&str, Option<u32>), ParquetError> {
let split_setting = str_setting.split_once('(');
match split_setting {
Some((codec, level_str)) => {
let level = &level_str[..level_str.len() - 1]
.parse::<u32>()
.map_err(|_| {
ParquetError::General(format!("invalid compression level: {}", level_str))
})?;
Ok((codec, Some(*level)))
}
None => Ok((str_setting, None)),
}
}
fn check_level_is_none(level: &Option<u32>) -> Result<(), ParquetError> {
if level.is_some() {
return Err(ParquetError::General("level is not support".to_string()));
}
Ok(())
}
fn require_level(codec: &str, level: Option<u32>) -> Result<u32, ParquetError> {
level.ok_or(ParquetError::General(format!("{} require level", codec)))
}
impl FromStr for Compression {
type Err = ParquetError;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
let (codec, level) = split_compression_string(s)?;
let c = match codec {
"UNCOMPRESSED" | "uncompressed" => {
check_level_is_none(&level)?;
Compression::UNCOMPRESSED
}
"SNAPPY" | "snappy" => {
check_level_is_none(&level)?;
Compression::SNAPPY
}
"GZIP" | "gzip" => {
let level = require_level(codec, level)?;
Compression::GZIP(GzipLevel::try_new(level)?)
}
"LZO" | "lzo" => {
check_level_is_none(&level)?;
Compression::LZO
}
"BROTLI" | "brotli" => {
let level = require_level(codec, level)?;
Compression::BROTLI(BrotliLevel::try_new(level)?)
}
"LZ4" | "lz4" => {
check_level_is_none(&level)?;
Compression::LZ4
}
"ZSTD" | "zstd" => {
let level = require_level(codec, level)?;
Compression::ZSTD(ZstdLevel::try_new(level as i32)?)
}
"LZ4_RAW" | "lz4_raw" => {
check_level_is_none(&level)?;
Compression::LZ4_RAW
}
_ => {
return Err(ParquetError::General(format!(
"unsupport compression {codec}"
)));
}
};
Ok(c)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[allow(non_camel_case_types)]
pub enum PageType {
DATA_PAGE,
INDEX_PAGE,
DICTIONARY_PAGE,
DATA_PAGE_V2,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[allow(non_camel_case_types)]
pub enum SortOrder {
SIGNED,
UNSIGNED,
UNDEFINED,
}
impl SortOrder {
pub fn is_signed(&self) -> bool {
matches!(self, Self::SIGNED)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[allow(non_camel_case_types)]
pub enum ColumnOrder {
TYPE_DEFINED_ORDER(SortOrder),
UNDEFINED,
}
impl ColumnOrder {
pub fn get_sort_order(
logical_type: Option<LogicalType>,
converted_type: ConvertedType,
physical_type: Type,
) -> SortOrder {
match logical_type {
Some(logical) => match logical {
LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
SortOrder::UNSIGNED
}
LogicalType::Integer { is_signed, .. } => match is_signed {
true => SortOrder::SIGNED,
false => SortOrder::UNSIGNED,
},
LogicalType::Map | LogicalType::List => SortOrder::UNDEFINED,
LogicalType::Decimal { .. } => SortOrder::SIGNED,
LogicalType::Date => SortOrder::SIGNED,
LogicalType::Time { .. } => SortOrder::SIGNED,
LogicalType::Timestamp { .. } => SortOrder::SIGNED,
LogicalType::Unknown => SortOrder::UNDEFINED,
LogicalType::Uuid => SortOrder::UNSIGNED,
LogicalType::Float16 => SortOrder::SIGNED,
},
None => Self::get_converted_sort_order(converted_type, physical_type),
}
}
fn get_converted_sort_order(converted_type: ConvertedType, physical_type: Type) -> SortOrder {
match converted_type {
ConvertedType::UTF8
| ConvertedType::JSON
| ConvertedType::BSON
| ConvertedType::ENUM => SortOrder::UNSIGNED,
ConvertedType::INT_8
| ConvertedType::INT_16
| ConvertedType::INT_32
| ConvertedType::INT_64 => SortOrder::SIGNED,
ConvertedType::UINT_8
| ConvertedType::UINT_16
| ConvertedType::UINT_32
| ConvertedType::UINT_64 => SortOrder::UNSIGNED,
ConvertedType::DECIMAL => SortOrder::SIGNED,
ConvertedType::DATE => SortOrder::SIGNED,
ConvertedType::TIME_MILLIS
| ConvertedType::TIME_MICROS
| ConvertedType::TIMESTAMP_MILLIS
| ConvertedType::TIMESTAMP_MICROS => SortOrder::SIGNED,
ConvertedType::INTERVAL => SortOrder::UNDEFINED,
ConvertedType::LIST | ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
SortOrder::UNDEFINED
}
ConvertedType::NONE => Self::get_default_sort_order(physical_type),
}
}
fn get_default_sort_order(physical_type: Type) -> SortOrder {
match physical_type {
Type::BOOLEAN => SortOrder::UNSIGNED,
Type::INT32 | Type::INT64 => SortOrder::SIGNED,
Type::INT96 => SortOrder::UNDEFINED,
Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
}
}
pub fn sort_order(&self) -> SortOrder {
match *self {
ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
ColumnOrder::UNDEFINED => SortOrder::SIGNED,
}
}
}
impl fmt::Display for Type {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{self:?}")
}
}
impl fmt::Display for ConvertedType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{self:?}")
}
}
impl fmt::Display for Repetition {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{self:?}")
}
}
impl fmt::Display for Encoding {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{self:?}")
}
}
impl fmt::Display for Compression {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{self:?}")
}
}
impl fmt::Display for PageType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{self:?}")
}
}
impl fmt::Display for SortOrder {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{self:?}")
}
}
impl fmt::Display for ColumnOrder {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{self:?}")
}
}
impl TryFrom<parquet::Type> for Type {
type Error = ParquetError;
fn try_from(value: parquet::Type) -> Result<Self> {
Ok(match value {
parquet::Type::BOOLEAN => Type::BOOLEAN,
parquet::Type::INT32 => Type::INT32,
parquet::Type::INT64 => Type::INT64,
parquet::Type::INT96 => Type::INT96,
parquet::Type::FLOAT => Type::FLOAT,
parquet::Type::DOUBLE => Type::DOUBLE,
parquet::Type::BYTE_ARRAY => Type::BYTE_ARRAY,
parquet::Type::FIXED_LEN_BYTE_ARRAY => Type::FIXED_LEN_BYTE_ARRAY,
_ => return Err(general_err!("unexpected parquet type: {}", value.0)),
})
}
}
impl From<Type> for parquet::Type {
fn from(value: Type) -> Self {
match value {
Type::BOOLEAN => parquet::Type::BOOLEAN,
Type::INT32 => parquet::Type::INT32,
Type::INT64 => parquet::Type::INT64,
Type::INT96 => parquet::Type::INT96,
Type::FLOAT => parquet::Type::FLOAT,
Type::DOUBLE => parquet::Type::DOUBLE,
Type::BYTE_ARRAY => parquet::Type::BYTE_ARRAY,
Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FIXED_LEN_BYTE_ARRAY,
}
}
}
impl TryFrom<Option<parquet::ConvertedType>> for ConvertedType {
type Error = ParquetError;
fn try_from(option: Option<parquet::ConvertedType>) -> Result<Self> {
Ok(match option {
None => ConvertedType::NONE,
Some(value) => match value {
parquet::ConvertedType::UTF8 => ConvertedType::UTF8,
parquet::ConvertedType::MAP => ConvertedType::MAP,
parquet::ConvertedType::MAP_KEY_VALUE => ConvertedType::MAP_KEY_VALUE,
parquet::ConvertedType::LIST => ConvertedType::LIST,
parquet::ConvertedType::ENUM => ConvertedType::ENUM,
parquet::ConvertedType::DECIMAL => ConvertedType::DECIMAL,
parquet::ConvertedType::DATE => ConvertedType::DATE,
parquet::ConvertedType::TIME_MILLIS => ConvertedType::TIME_MILLIS,
parquet::ConvertedType::TIME_MICROS => ConvertedType::TIME_MICROS,
parquet::ConvertedType::TIMESTAMP_MILLIS => ConvertedType::TIMESTAMP_MILLIS,
parquet::ConvertedType::TIMESTAMP_MICROS => ConvertedType::TIMESTAMP_MICROS,
parquet::ConvertedType::UINT_8 => ConvertedType::UINT_8,
parquet::ConvertedType::UINT_16 => ConvertedType::UINT_16,
parquet::ConvertedType::UINT_32 => ConvertedType::UINT_32,
parquet::ConvertedType::UINT_64 => ConvertedType::UINT_64,
parquet::ConvertedType::INT_8 => ConvertedType::INT_8,
parquet::ConvertedType::INT_16 => ConvertedType::INT_16,
parquet::ConvertedType::INT_32 => ConvertedType::INT_32,
parquet::ConvertedType::INT_64 => ConvertedType::INT_64,
parquet::ConvertedType::JSON => ConvertedType::JSON,
parquet::ConvertedType::BSON => ConvertedType::BSON,
parquet::ConvertedType::INTERVAL => ConvertedType::INTERVAL,
_ => {
return Err(general_err!(
"unexpected parquet converted type: {}",
value.0
))
}
},
})
}
}
impl From<ConvertedType> for Option<parquet::ConvertedType> {
fn from(value: ConvertedType) -> Self {
match value {
ConvertedType::NONE => None,
ConvertedType::UTF8 => Some(parquet::ConvertedType::UTF8),
ConvertedType::MAP => Some(parquet::ConvertedType::MAP),
ConvertedType::MAP_KEY_VALUE => Some(parquet::ConvertedType::MAP_KEY_VALUE),
ConvertedType::LIST => Some(parquet::ConvertedType::LIST),
ConvertedType::ENUM => Some(parquet::ConvertedType::ENUM),
ConvertedType::DECIMAL => Some(parquet::ConvertedType::DECIMAL),
ConvertedType::DATE => Some(parquet::ConvertedType::DATE),
ConvertedType::TIME_MILLIS => Some(parquet::ConvertedType::TIME_MILLIS),
ConvertedType::TIME_MICROS => Some(parquet::ConvertedType::TIME_MICROS),
ConvertedType::TIMESTAMP_MILLIS => Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
ConvertedType::TIMESTAMP_MICROS => Some(parquet::ConvertedType::TIMESTAMP_MICROS),
ConvertedType::UINT_8 => Some(parquet::ConvertedType::UINT_8),
ConvertedType::UINT_16 => Some(parquet::ConvertedType::UINT_16),
ConvertedType::UINT_32 => Some(parquet::ConvertedType::UINT_32),
ConvertedType::UINT_64 => Some(parquet::ConvertedType::UINT_64),
ConvertedType::INT_8 => Some(parquet::ConvertedType::INT_8),
ConvertedType::INT_16 => Some(parquet::ConvertedType::INT_16),
ConvertedType::INT_32 => Some(parquet::ConvertedType::INT_32),
ConvertedType::INT_64 => Some(parquet::ConvertedType::INT_64),
ConvertedType::JSON => Some(parquet::ConvertedType::JSON),
ConvertedType::BSON => Some(parquet::ConvertedType::BSON),
ConvertedType::INTERVAL => Some(parquet::ConvertedType::INTERVAL),
}
}
}
impl From<parquet::LogicalType> for LogicalType {
fn from(value: parquet::LogicalType) -> Self {
match value {
parquet::LogicalType::STRING(_) => LogicalType::String,
parquet::LogicalType::MAP(_) => LogicalType::Map,
parquet::LogicalType::LIST(_) => LogicalType::List,
parquet::LogicalType::ENUM(_) => LogicalType::Enum,
parquet::LogicalType::DECIMAL(t) => LogicalType::Decimal {
scale: t.scale,
precision: t.precision,
},
parquet::LogicalType::DATE(_) => LogicalType::Date,
parquet::LogicalType::TIME(t) => LogicalType::Time {
is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c,
unit: t.unit,
},
parquet::LogicalType::TIMESTAMP(t) => LogicalType::Timestamp {
is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c,
unit: t.unit,
},
parquet::LogicalType::INTEGER(t) => LogicalType::Integer {
bit_width: t.bit_width,
is_signed: t.is_signed,
},
parquet::LogicalType::UNKNOWN(_) => LogicalType::Unknown,
parquet::LogicalType::JSON(_) => LogicalType::Json,
parquet::LogicalType::BSON(_) => LogicalType::Bson,
parquet::LogicalType::UUID(_) => LogicalType::Uuid,
parquet::LogicalType::FLOAT16(_) => LogicalType::Float16,
}
}
}
impl From<LogicalType> for parquet::LogicalType {
fn from(value: LogicalType) -> Self {
match value {
LogicalType::String => parquet::LogicalType::STRING(Default::default()),
LogicalType::Map => parquet::LogicalType::MAP(Default::default()),
LogicalType::List => parquet::LogicalType::LIST(Default::default()),
LogicalType::Enum => parquet::LogicalType::ENUM(Default::default()),
LogicalType::Decimal { scale, precision } => {
parquet::LogicalType::DECIMAL(DecimalType { scale, precision })
}
LogicalType::Date => parquet::LogicalType::DATE(Default::default()),
LogicalType::Time {
is_adjusted_to_u_t_c,
unit,
} => parquet::LogicalType::TIME(TimeType {
is_adjusted_to_u_t_c,
unit,
}),
LogicalType::Timestamp {
is_adjusted_to_u_t_c,
unit,
} => parquet::LogicalType::TIMESTAMP(TimestampType {
is_adjusted_to_u_t_c,
unit,
}),
LogicalType::Integer {
bit_width,
is_signed,
} => parquet::LogicalType::INTEGER(IntType {
bit_width,
is_signed,
}),
LogicalType::Unknown => parquet::LogicalType::UNKNOWN(Default::default()),
LogicalType::Json => parquet::LogicalType::JSON(Default::default()),
LogicalType::Bson => parquet::LogicalType::BSON(Default::default()),
LogicalType::Uuid => parquet::LogicalType::UUID(Default::default()),
LogicalType::Float16 => parquet::LogicalType::FLOAT16(Default::default()),
}
}
}
impl From<Option<LogicalType>> for ConvertedType {
fn from(value: Option<LogicalType>) -> Self {
match value {
Some(value) => match value {
LogicalType::String => ConvertedType::UTF8,
LogicalType::Map => ConvertedType::MAP,
LogicalType::List => ConvertedType::LIST,
LogicalType::Enum => ConvertedType::ENUM,
LogicalType::Decimal { .. } => ConvertedType::DECIMAL,
LogicalType::Date => ConvertedType::DATE,
LogicalType::Time { unit, .. } => match unit {
TimeUnit::MILLIS(_) => ConvertedType::TIME_MILLIS,
TimeUnit::MICROS(_) => ConvertedType::TIME_MICROS,
TimeUnit::NANOS(_) => ConvertedType::NONE,
},
LogicalType::Timestamp { unit, .. } => match unit {
TimeUnit::MILLIS(_) => ConvertedType::TIMESTAMP_MILLIS,
TimeUnit::MICROS(_) => ConvertedType::TIMESTAMP_MICROS,
TimeUnit::NANOS(_) => ConvertedType::NONE,
},
LogicalType::Integer {
bit_width,
is_signed,
} => match (bit_width, is_signed) {
(8, true) => ConvertedType::INT_8,
(16, true) => ConvertedType::INT_16,
(32, true) => ConvertedType::INT_32,
(64, true) => ConvertedType::INT_64,
(8, false) => ConvertedType::UINT_8,
(16, false) => ConvertedType::UINT_16,
(32, false) => ConvertedType::UINT_32,
(64, false) => ConvertedType::UINT_64,
t => panic!("Integer type {t:?} is not supported"),
},
LogicalType::Json => ConvertedType::JSON,
LogicalType::Bson => ConvertedType::BSON,
LogicalType::Uuid | LogicalType::Float16 | LogicalType::Unknown => {
ConvertedType::NONE
}
},
None => ConvertedType::NONE,
}
}
}
impl TryFrom<parquet::FieldRepetitionType> for Repetition {
type Error = ParquetError;
fn try_from(value: parquet::FieldRepetitionType) -> Result<Self> {
Ok(match value {
parquet::FieldRepetitionType::REQUIRED => Repetition::REQUIRED,
parquet::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL,
parquet::FieldRepetitionType::REPEATED => Repetition::REPEATED,
_ => {
return Err(general_err!(
"unexpected parquet repetition type: {}",
value.0
))
}
})
}
}
impl From<Repetition> for parquet::FieldRepetitionType {
fn from(value: Repetition) -> Self {
match value {
Repetition::REQUIRED => parquet::FieldRepetitionType::REQUIRED,
Repetition::OPTIONAL => parquet::FieldRepetitionType::OPTIONAL,
Repetition::REPEATED => parquet::FieldRepetitionType::REPEATED,
}
}
}
impl TryFrom<parquet::Encoding> for Encoding {
type Error = ParquetError;
fn try_from(value: parquet::Encoding) -> Result<Self> {
Ok(match value {
parquet::Encoding::PLAIN => Encoding::PLAIN,
parquet::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY,
parquet::Encoding::RLE => Encoding::RLE,
#[allow(deprecated)]
parquet::Encoding::BIT_PACKED => Encoding::BIT_PACKED,
parquet::Encoding::DELTA_BINARY_PACKED => Encoding::DELTA_BINARY_PACKED,
parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY => Encoding::DELTA_LENGTH_BYTE_ARRAY,
parquet::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY,
parquet::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY,
parquet::Encoding::BYTE_STREAM_SPLIT => Encoding::BYTE_STREAM_SPLIT,
_ => return Err(general_err!("unexpected parquet encoding: {}", value.0)),
})
}
}
impl From<Encoding> for parquet::Encoding {
fn from(value: Encoding) -> Self {
match value {
Encoding::PLAIN => parquet::Encoding::PLAIN,
Encoding::PLAIN_DICTIONARY => parquet::Encoding::PLAIN_DICTIONARY,
Encoding::RLE => parquet::Encoding::RLE,
#[allow(deprecated)]
Encoding::BIT_PACKED => parquet::Encoding::BIT_PACKED,
Encoding::DELTA_BINARY_PACKED => parquet::Encoding::DELTA_BINARY_PACKED,
Encoding::DELTA_LENGTH_BYTE_ARRAY => parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
Encoding::DELTA_BYTE_ARRAY => parquet::Encoding::DELTA_BYTE_ARRAY,
Encoding::RLE_DICTIONARY => parquet::Encoding::RLE_DICTIONARY,
Encoding::BYTE_STREAM_SPLIT => parquet::Encoding::BYTE_STREAM_SPLIT,
}
}
}
impl TryFrom<parquet::CompressionCodec> for Compression {
type Error = ParquetError;
fn try_from(value: parquet::CompressionCodec) -> Result<Self> {
Ok(match value {
parquet::CompressionCodec::UNCOMPRESSED => Compression::UNCOMPRESSED,
parquet::CompressionCodec::SNAPPY => Compression::SNAPPY,
parquet::CompressionCodec::GZIP => Compression::GZIP(Default::default()),
parquet::CompressionCodec::LZO => Compression::LZO,
parquet::CompressionCodec::BROTLI => Compression::BROTLI(Default::default()),
parquet::CompressionCodec::LZ4 => Compression::LZ4,
parquet::CompressionCodec::ZSTD => Compression::ZSTD(Default::default()),
parquet::CompressionCodec::LZ4_RAW => Compression::LZ4_RAW,
_ => {
return Err(general_err!(
"unexpected parquet compression codec: {}",
value.0
))
}
})
}
}
impl From<Compression> for parquet::CompressionCodec {
fn from(value: Compression) -> Self {
match value {
Compression::UNCOMPRESSED => parquet::CompressionCodec::UNCOMPRESSED,
Compression::SNAPPY => parquet::CompressionCodec::SNAPPY,
Compression::GZIP(_) => parquet::CompressionCodec::GZIP,
Compression::LZO => parquet::CompressionCodec::LZO,
Compression::BROTLI(_) => parquet::CompressionCodec::BROTLI,
Compression::LZ4 => parquet::CompressionCodec::LZ4,
Compression::ZSTD(_) => parquet::CompressionCodec::ZSTD,
Compression::LZ4_RAW => parquet::CompressionCodec::LZ4_RAW,
}
}
}
impl TryFrom<parquet::PageType> for PageType {
type Error = ParquetError;
fn try_from(value: parquet::PageType) -> Result<Self> {
Ok(match value {
parquet::PageType::DATA_PAGE => PageType::DATA_PAGE,
parquet::PageType::INDEX_PAGE => PageType::INDEX_PAGE,
parquet::PageType::DICTIONARY_PAGE => PageType::DICTIONARY_PAGE,
parquet::PageType::DATA_PAGE_V2 => PageType::DATA_PAGE_V2,
_ => return Err(general_err!("unexpected parquet page type: {}", value.0)),
})
}
}
impl From<PageType> for parquet::PageType {
fn from(value: PageType) -> Self {
match value {
PageType::DATA_PAGE => parquet::PageType::DATA_PAGE,
PageType::INDEX_PAGE => parquet::PageType::INDEX_PAGE,
PageType::DICTIONARY_PAGE => parquet::PageType::DICTIONARY_PAGE,
PageType::DATA_PAGE_V2 => parquet::PageType::DATA_PAGE_V2,
}
}
}
impl str::FromStr for Repetition {
type Err = ParquetError;
fn from_str(s: &str) -> Result<Self> {
match s {
"REQUIRED" => Ok(Repetition::REQUIRED),
"OPTIONAL" => Ok(Repetition::OPTIONAL),
"REPEATED" => Ok(Repetition::REPEATED),
other => Err(general_err!("Invalid parquet repetition {}", other)),
}
}
}
impl str::FromStr for Type {
type Err = ParquetError;
fn from_str(s: &str) -> Result<Self> {
match s {
"BOOLEAN" => Ok(Type::BOOLEAN),
"INT32" => Ok(Type::INT32),
"INT64" => Ok(Type::INT64),
"INT96" => Ok(Type::INT96),
"FLOAT" => Ok(Type::FLOAT),
"DOUBLE" => Ok(Type::DOUBLE),
"BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
"FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
other => Err(general_err!("Invalid parquet type {}", other)),
}
}
}
impl str::FromStr for ConvertedType {
type Err = ParquetError;
fn from_str(s: &str) -> Result<Self> {
match s {
"NONE" => Ok(ConvertedType::NONE),
"UTF8" => Ok(ConvertedType::UTF8),
"MAP" => Ok(ConvertedType::MAP),
"MAP_KEY_VALUE" => Ok(ConvertedType::MAP_KEY_VALUE),
"LIST" => Ok(ConvertedType::LIST),
"ENUM" => Ok(ConvertedType::ENUM),
"DECIMAL" => Ok(ConvertedType::DECIMAL),
"DATE" => Ok(ConvertedType::DATE),
"TIME_MILLIS" => Ok(ConvertedType::TIME_MILLIS),
"TIME_MICROS" => Ok(ConvertedType::TIME_MICROS),
"TIMESTAMP_MILLIS" => Ok(ConvertedType::TIMESTAMP_MILLIS),
"TIMESTAMP_MICROS" => Ok(ConvertedType::TIMESTAMP_MICROS),
"UINT_8" => Ok(ConvertedType::UINT_8),
"UINT_16" => Ok(ConvertedType::UINT_16),
"UINT_32" => Ok(ConvertedType::UINT_32),
"UINT_64" => Ok(ConvertedType::UINT_64),
"INT_8" => Ok(ConvertedType::INT_8),
"INT_16" => Ok(ConvertedType::INT_16),
"INT_32" => Ok(ConvertedType::INT_32),
"INT_64" => Ok(ConvertedType::INT_64),
"JSON" => Ok(ConvertedType::JSON),
"BSON" => Ok(ConvertedType::BSON),
"INTERVAL" => Ok(ConvertedType::INTERVAL),
other => Err(general_err!("Invalid parquet converted type {}", other)),
}
}
}
impl str::FromStr for LogicalType {
type Err = ParquetError;
fn from_str(s: &str) -> Result<Self> {
match s {
"INTEGER" => Ok(LogicalType::Integer {
bit_width: 8,
is_signed: false,
}),
"MAP" => Ok(LogicalType::Map),
"LIST" => Ok(LogicalType::List),
"ENUM" => Ok(LogicalType::Enum),
"DECIMAL" => Ok(LogicalType::Decimal {
precision: -1,
scale: -1,
}),
"DATE" => Ok(LogicalType::Date),
"TIME" => Ok(LogicalType::Time {
is_adjusted_to_u_t_c: false,
unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
}),
"TIMESTAMP" => Ok(LogicalType::Timestamp {
is_adjusted_to_u_t_c: false,
unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
}),
"STRING" => Ok(LogicalType::String),
"JSON" => Ok(LogicalType::Json),
"BSON" => Ok(LogicalType::Bson),
"UUID" => Ok(LogicalType::Uuid),
"UNKNOWN" => Ok(LogicalType::Unknown),
"INTERVAL" => Err(general_err!(
"Interval parquet logical type not yet supported"
)),
"FLOAT16" => Ok(LogicalType::Float16),
other => Err(general_err!("Invalid parquet logical type {}", other)),
}
}
}
#[cfg(test)]
#[allow(deprecated)] mod tests {
use super::*;
#[test]
fn test_display_type() {
assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
assert_eq!(Type::INT32.to_string(), "INT32");
assert_eq!(Type::INT64.to_string(), "INT64");
assert_eq!(Type::INT96.to_string(), "INT96");
assert_eq!(Type::FLOAT.to_string(), "FLOAT");
assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
assert_eq!(
Type::FIXED_LEN_BYTE_ARRAY.to_string(),
"FIXED_LEN_BYTE_ARRAY"
);
}
#[test]
fn test_from_type() {
assert_eq!(
Type::try_from(parquet::Type::BOOLEAN).unwrap(),
Type::BOOLEAN
);
assert_eq!(Type::try_from(parquet::Type::INT32).unwrap(), Type::INT32);
assert_eq!(Type::try_from(parquet::Type::INT64).unwrap(), Type::INT64);
assert_eq!(Type::try_from(parquet::Type::INT96).unwrap(), Type::INT96);
assert_eq!(Type::try_from(parquet::Type::FLOAT).unwrap(), Type::FLOAT);
assert_eq!(Type::try_from(parquet::Type::DOUBLE).unwrap(), Type::DOUBLE);
assert_eq!(
Type::try_from(parquet::Type::BYTE_ARRAY).unwrap(),
Type::BYTE_ARRAY
);
assert_eq!(
Type::try_from(parquet::Type::FIXED_LEN_BYTE_ARRAY).unwrap(),
Type::FIXED_LEN_BYTE_ARRAY
);
}
#[test]
fn test_into_type() {
assert_eq!(parquet::Type::BOOLEAN, Type::BOOLEAN.into());
assert_eq!(parquet::Type::INT32, Type::INT32.into());
assert_eq!(parquet::Type::INT64, Type::INT64.into());
assert_eq!(parquet::Type::INT96, Type::INT96.into());
assert_eq!(parquet::Type::FLOAT, Type::FLOAT.into());
assert_eq!(parquet::Type::DOUBLE, Type::DOUBLE.into());
assert_eq!(parquet::Type::BYTE_ARRAY, Type::BYTE_ARRAY.into());
assert_eq!(
parquet::Type::FIXED_LEN_BYTE_ARRAY,
Type::FIXED_LEN_BYTE_ARRAY.into()
);
}
#[test]
fn test_from_string_into_type() {
assert_eq!(
Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
Type::BOOLEAN
);
assert_eq!(
Type::INT32.to_string().parse::<Type>().unwrap(),
Type::INT32
);
assert_eq!(
Type::INT64.to_string().parse::<Type>().unwrap(),
Type::INT64
);
assert_eq!(
Type::INT96.to_string().parse::<Type>().unwrap(),
Type::INT96
);
assert_eq!(
Type::FLOAT.to_string().parse::<Type>().unwrap(),
Type::FLOAT
);
assert_eq!(
Type::DOUBLE.to_string().parse::<Type>().unwrap(),
Type::DOUBLE
);
assert_eq!(
Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
Type::BYTE_ARRAY
);
assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
assert_eq!(
Type::FIXED_LEN_BYTE_ARRAY
.to_string()
.parse::<Type>()
.unwrap(),
Type::FIXED_LEN_BYTE_ARRAY
);
}
#[test]
fn test_display_converted_type() {
assert_eq!(ConvertedType::NONE.to_string(), "NONE");
assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
assert_eq!(ConvertedType::MAP.to_string(), "MAP");
assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
assert_eq!(ConvertedType::LIST.to_string(), "LIST");
assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
assert_eq!(ConvertedType::DATE.to_string(), "DATE");
assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
assert_eq!(ConvertedType::DATE.to_string(), "DATE");
assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
assert_eq!(
ConvertedType::TIMESTAMP_MILLIS.to_string(),
"TIMESTAMP_MILLIS"
);
assert_eq!(
ConvertedType::TIMESTAMP_MICROS.to_string(),
"TIMESTAMP_MICROS"
);
assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
assert_eq!(ConvertedType::JSON.to_string(), "JSON");
assert_eq!(ConvertedType::BSON.to_string(), "BSON");
assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL")
}
#[test]
fn test_from_converted_type() {
let parquet_conv_none: Option<parquet::ConvertedType> = None;
assert_eq!(
ConvertedType::try_from(parquet_conv_none).unwrap(),
ConvertedType::NONE
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::UTF8)).unwrap(),
ConvertedType::UTF8
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::MAP)).unwrap(),
ConvertedType::MAP
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::MAP_KEY_VALUE)).unwrap(),
ConvertedType::MAP_KEY_VALUE
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::LIST)).unwrap(),
ConvertedType::LIST
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::ENUM)).unwrap(),
ConvertedType::ENUM
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(),
ConvertedType::DECIMAL
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::DATE)).unwrap(),
ConvertedType::DATE
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MILLIS)).unwrap(),
ConvertedType::TIME_MILLIS
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MICROS)).unwrap(),
ConvertedType::TIME_MICROS
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MILLIS)).unwrap(),
ConvertedType::TIMESTAMP_MILLIS
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MICROS)).unwrap(),
ConvertedType::TIMESTAMP_MICROS
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::UINT_8)).unwrap(),
ConvertedType::UINT_8
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::UINT_16)).unwrap(),
ConvertedType::UINT_16
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::UINT_32)).unwrap(),
ConvertedType::UINT_32
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::UINT_64)).unwrap(),
ConvertedType::UINT_64
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::INT_8)).unwrap(),
ConvertedType::INT_8
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::INT_16)).unwrap(),
ConvertedType::INT_16
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::INT_32)).unwrap(),
ConvertedType::INT_32
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::INT_64)).unwrap(),
ConvertedType::INT_64
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::JSON)).unwrap(),
ConvertedType::JSON
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::BSON)).unwrap(),
ConvertedType::BSON
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::INTERVAL)).unwrap(),
ConvertedType::INTERVAL
);
assert_eq!(
ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(),
ConvertedType::DECIMAL
)
}
#[test]
fn test_into_converted_type() {
let converted_type: Option<parquet::ConvertedType> = None;
assert_eq!(converted_type, ConvertedType::NONE.into());
assert_eq!(
Some(parquet::ConvertedType::UTF8),
ConvertedType::UTF8.into()
);
assert_eq!(Some(parquet::ConvertedType::MAP), ConvertedType::MAP.into());
assert_eq!(
Some(parquet::ConvertedType::MAP_KEY_VALUE),
ConvertedType::MAP_KEY_VALUE.into()
);
assert_eq!(
Some(parquet::ConvertedType::LIST),
ConvertedType::LIST.into()
);
assert_eq!(
Some(parquet::ConvertedType::ENUM),
ConvertedType::ENUM.into()
);
assert_eq!(
Some(parquet::ConvertedType::DECIMAL),
ConvertedType::DECIMAL.into()
);
assert_eq!(
Some(parquet::ConvertedType::DATE),
ConvertedType::DATE.into()
);
assert_eq!(
Some(parquet::ConvertedType::TIME_MILLIS),
ConvertedType::TIME_MILLIS.into()
);
assert_eq!(
Some(parquet::ConvertedType::TIME_MICROS),
ConvertedType::TIME_MICROS.into()
);
assert_eq!(
Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
ConvertedType::TIMESTAMP_MILLIS.into()
);
assert_eq!(
Some(parquet::ConvertedType::TIMESTAMP_MICROS),
ConvertedType::TIMESTAMP_MICROS.into()
);
assert_eq!(
Some(parquet::ConvertedType::UINT_8),
ConvertedType::UINT_8.into()
);
assert_eq!(
Some(parquet::ConvertedType::UINT_16),
ConvertedType::UINT_16.into()
);
assert_eq!(
Some(parquet::ConvertedType::UINT_32),
ConvertedType::UINT_32.into()
);
assert_eq!(
Some(parquet::ConvertedType::UINT_64),
ConvertedType::UINT_64.into()
);
assert_eq!(
Some(parquet::ConvertedType::INT_8),
ConvertedType::INT_8.into()
);
assert_eq!(
Some(parquet::ConvertedType::INT_16),
ConvertedType::INT_16.into()
);
assert_eq!(
Some(parquet::ConvertedType::INT_32),
ConvertedType::INT_32.into()
);
assert_eq!(
Some(parquet::ConvertedType::INT_64),
ConvertedType::INT_64.into()
);
assert_eq!(
Some(parquet::ConvertedType::JSON),
ConvertedType::JSON.into()
);
assert_eq!(
Some(parquet::ConvertedType::BSON),
ConvertedType::BSON.into()
);
assert_eq!(
Some(parquet::ConvertedType::INTERVAL),
ConvertedType::INTERVAL.into()
);
assert_eq!(
Some(parquet::ConvertedType::DECIMAL),
ConvertedType::DECIMAL.into()
)
}
#[test]
fn test_from_string_into_converted_type() {
assert_eq!(
ConvertedType::NONE
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::NONE
);
assert_eq!(
ConvertedType::UTF8
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::UTF8
);
assert_eq!(
ConvertedType::MAP
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::MAP
);
assert_eq!(
ConvertedType::MAP_KEY_VALUE
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::MAP_KEY_VALUE
);
assert_eq!(
ConvertedType::LIST
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::LIST
);
assert_eq!(
ConvertedType::ENUM
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::ENUM
);
assert_eq!(
ConvertedType::DECIMAL
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::DECIMAL
);
assert_eq!(
ConvertedType::DATE
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::DATE
);
assert_eq!(
ConvertedType::TIME_MILLIS
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::TIME_MILLIS
);
assert_eq!(
ConvertedType::TIME_MICROS
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::TIME_MICROS
);
assert_eq!(
ConvertedType::TIMESTAMP_MILLIS
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::TIMESTAMP_MILLIS
);
assert_eq!(
ConvertedType::TIMESTAMP_MICROS
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::TIMESTAMP_MICROS
);
assert_eq!(
ConvertedType::UINT_8
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::UINT_8
);
assert_eq!(
ConvertedType::UINT_16
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::UINT_16
);
assert_eq!(
ConvertedType::UINT_32
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::UINT_32
);
assert_eq!(
ConvertedType::UINT_64
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::UINT_64
);
assert_eq!(
ConvertedType::INT_8
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::INT_8
);
assert_eq!(
ConvertedType::INT_16
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::INT_16
);
assert_eq!(
ConvertedType::INT_32
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::INT_32
);
assert_eq!(
ConvertedType::INT_64
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::INT_64
);
assert_eq!(
ConvertedType::JSON
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::JSON
);
assert_eq!(
ConvertedType::BSON
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::BSON
);
assert_eq!(
ConvertedType::INTERVAL
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::INTERVAL
);
assert_eq!(
ConvertedType::DECIMAL
.to_string()
.parse::<ConvertedType>()
.unwrap(),
ConvertedType::DECIMAL
)
}
#[test]
fn test_logical_to_converted_type() {
let logical_none: Option<LogicalType> = None;
assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
assert_eq!(
ConvertedType::from(Some(LogicalType::Decimal {
precision: 20,
scale: 5
})),
ConvertedType::DECIMAL
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Bson)),
ConvertedType::BSON
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Json)),
ConvertedType::JSON
);
assert_eq!(
ConvertedType::from(Some(LogicalType::String)),
ConvertedType::UTF8
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Date)),
ConvertedType::DATE
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Time {
unit: TimeUnit::MILLIS(Default::default()),
is_adjusted_to_u_t_c: true,
})),
ConvertedType::TIME_MILLIS
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Time {
unit: TimeUnit::MICROS(Default::default()),
is_adjusted_to_u_t_c: true,
})),
ConvertedType::TIME_MICROS
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Time {
unit: TimeUnit::NANOS(Default::default()),
is_adjusted_to_u_t_c: false,
})),
ConvertedType::NONE
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Timestamp {
unit: TimeUnit::MILLIS(Default::default()),
is_adjusted_to_u_t_c: true,
})),
ConvertedType::TIMESTAMP_MILLIS
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Timestamp {
unit: TimeUnit::MICROS(Default::default()),
is_adjusted_to_u_t_c: false,
})),
ConvertedType::TIMESTAMP_MICROS
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Timestamp {
unit: TimeUnit::NANOS(Default::default()),
is_adjusted_to_u_t_c: false,
})),
ConvertedType::NONE
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Integer {
bit_width: 8,
is_signed: false
})),
ConvertedType::UINT_8
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Integer {
bit_width: 8,
is_signed: true
})),
ConvertedType::INT_8
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Integer {
bit_width: 16,
is_signed: false
})),
ConvertedType::UINT_16
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Integer {
bit_width: 16,
is_signed: true
})),
ConvertedType::INT_16
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Integer {
bit_width: 32,
is_signed: false
})),
ConvertedType::UINT_32
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Integer {
bit_width: 32,
is_signed: true
})),
ConvertedType::INT_32
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Integer {
bit_width: 64,
is_signed: false
})),
ConvertedType::UINT_64
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Integer {
bit_width: 64,
is_signed: true
})),
ConvertedType::INT_64
);
assert_eq!(
ConvertedType::from(Some(LogicalType::List)),
ConvertedType::LIST
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Map)),
ConvertedType::MAP
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Uuid)),
ConvertedType::NONE
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Enum)),
ConvertedType::ENUM
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Float16)),
ConvertedType::NONE
);
assert_eq!(
ConvertedType::from(Some(LogicalType::Unknown)),
ConvertedType::NONE
);
}
#[test]
fn test_display_repetition() {
assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
}
#[test]
fn test_from_repetition() {
assert_eq!(
Repetition::try_from(parquet::FieldRepetitionType::REQUIRED).unwrap(),
Repetition::REQUIRED
);
assert_eq!(
Repetition::try_from(parquet::FieldRepetitionType::OPTIONAL).unwrap(),
Repetition::OPTIONAL
);
assert_eq!(
Repetition::try_from(parquet::FieldRepetitionType::REPEATED).unwrap(),
Repetition::REPEATED
);
}
#[test]
fn test_into_repetition() {
assert_eq!(
parquet::FieldRepetitionType::REQUIRED,
Repetition::REQUIRED.into()
);
assert_eq!(
parquet::FieldRepetitionType::OPTIONAL,
Repetition::OPTIONAL.into()
);
assert_eq!(
parquet::FieldRepetitionType::REPEATED,
Repetition::REPEATED.into()
);
}
#[test]
fn test_from_string_into_repetition() {
assert_eq!(
Repetition::REQUIRED
.to_string()
.parse::<Repetition>()
.unwrap(),
Repetition::REQUIRED
);
assert_eq!(
Repetition::OPTIONAL
.to_string()
.parse::<Repetition>()
.unwrap(),
Repetition::OPTIONAL
);
assert_eq!(
Repetition::REPEATED
.to_string()
.parse::<Repetition>()
.unwrap(),
Repetition::REPEATED
);
}
#[test]
fn test_display_encoding() {
assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
assert_eq!(Encoding::RLE.to_string(), "RLE");
assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
assert_eq!(
Encoding::DELTA_BINARY_PACKED.to_string(),
"DELTA_BINARY_PACKED"
);
assert_eq!(
Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
"DELTA_LENGTH_BYTE_ARRAY"
);
assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
}
#[test]
fn test_from_encoding() {
assert_eq!(
Encoding::try_from(parquet::Encoding::PLAIN).unwrap(),
Encoding::PLAIN
);
assert_eq!(
Encoding::try_from(parquet::Encoding::PLAIN_DICTIONARY).unwrap(),
Encoding::PLAIN_DICTIONARY
);
assert_eq!(
Encoding::try_from(parquet::Encoding::RLE).unwrap(),
Encoding::RLE
);
assert_eq!(
Encoding::try_from(parquet::Encoding::BIT_PACKED).unwrap(),
Encoding::BIT_PACKED
);
assert_eq!(
Encoding::try_from(parquet::Encoding::DELTA_BINARY_PACKED).unwrap(),
Encoding::DELTA_BINARY_PACKED
);
assert_eq!(
Encoding::try_from(parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY).unwrap(),
Encoding::DELTA_LENGTH_BYTE_ARRAY
);
assert_eq!(
Encoding::try_from(parquet::Encoding::DELTA_BYTE_ARRAY).unwrap(),
Encoding::DELTA_BYTE_ARRAY
);
}
#[test]
fn test_into_encoding() {
assert_eq!(parquet::Encoding::PLAIN, Encoding::PLAIN.into());
assert_eq!(
parquet::Encoding::PLAIN_DICTIONARY,
Encoding::PLAIN_DICTIONARY.into()
);
assert_eq!(parquet::Encoding::RLE, Encoding::RLE.into());
assert_eq!(parquet::Encoding::BIT_PACKED, Encoding::BIT_PACKED.into());
assert_eq!(
parquet::Encoding::DELTA_BINARY_PACKED,
Encoding::DELTA_BINARY_PACKED.into()
);
assert_eq!(
parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
Encoding::DELTA_LENGTH_BYTE_ARRAY.into()
);
assert_eq!(
parquet::Encoding::DELTA_BYTE_ARRAY,
Encoding::DELTA_BYTE_ARRAY.into()
);
}
#[test]
fn test_display_compression() {
assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
assert_eq!(
Compression::GZIP(Default::default()).to_string(),
"GZIP(GzipLevel(6))"
);
assert_eq!(Compression::LZO.to_string(), "LZO");
assert_eq!(
Compression::BROTLI(Default::default()).to_string(),
"BROTLI(BrotliLevel(1))"
);
assert_eq!(Compression::LZ4.to_string(), "LZ4");
assert_eq!(
Compression::ZSTD(Default::default()).to_string(),
"ZSTD(ZstdLevel(1))"
);
}
#[test]
fn test_from_compression() {
assert_eq!(
Compression::try_from(parquet::CompressionCodec::UNCOMPRESSED).unwrap(),
Compression::UNCOMPRESSED
);
assert_eq!(
Compression::try_from(parquet::CompressionCodec::SNAPPY).unwrap(),
Compression::SNAPPY
);
assert_eq!(
Compression::try_from(parquet::CompressionCodec::GZIP).unwrap(),
Compression::GZIP(Default::default())
);
assert_eq!(
Compression::try_from(parquet::CompressionCodec::LZO).unwrap(),
Compression::LZO
);
assert_eq!(
Compression::try_from(parquet::CompressionCodec::BROTLI).unwrap(),
Compression::BROTLI(Default::default())
);
assert_eq!(
Compression::try_from(parquet::CompressionCodec::LZ4).unwrap(),
Compression::LZ4
);
assert_eq!(
Compression::try_from(parquet::CompressionCodec::ZSTD).unwrap(),
Compression::ZSTD(Default::default())
);
}
#[test]
fn test_into_compression() {
assert_eq!(
parquet::CompressionCodec::UNCOMPRESSED,
Compression::UNCOMPRESSED.into()
);
assert_eq!(
parquet::CompressionCodec::SNAPPY,
Compression::SNAPPY.into()
);
assert_eq!(
parquet::CompressionCodec::GZIP,
Compression::GZIP(Default::default()).into()
);
assert_eq!(parquet::CompressionCodec::LZO, Compression::LZO.into());
assert_eq!(
parquet::CompressionCodec::BROTLI,
Compression::BROTLI(Default::default()).into()
);
assert_eq!(parquet::CompressionCodec::LZ4, Compression::LZ4.into());
assert_eq!(
parquet::CompressionCodec::ZSTD,
Compression::ZSTD(Default::default()).into()
);
}
#[test]
fn test_display_page_type() {
assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
}
#[test]
fn test_from_page_type() {
assert_eq!(
PageType::try_from(parquet::PageType::DATA_PAGE).unwrap(),
PageType::DATA_PAGE
);
assert_eq!(
PageType::try_from(parquet::PageType::INDEX_PAGE).unwrap(),
PageType::INDEX_PAGE
);
assert_eq!(
PageType::try_from(parquet::PageType::DICTIONARY_PAGE).unwrap(),
PageType::DICTIONARY_PAGE
);
assert_eq!(
PageType::try_from(parquet::PageType::DATA_PAGE_V2).unwrap(),
PageType::DATA_PAGE_V2
);
}
#[test]
fn test_into_page_type() {
assert_eq!(parquet::PageType::DATA_PAGE, PageType::DATA_PAGE.into());
assert_eq!(parquet::PageType::INDEX_PAGE, PageType::INDEX_PAGE.into());
assert_eq!(
parquet::PageType::DICTIONARY_PAGE,
PageType::DICTIONARY_PAGE.into()
);
assert_eq!(
parquet::PageType::DATA_PAGE_V2,
PageType::DATA_PAGE_V2.into()
);
}
#[test]
fn test_display_sort_order() {
assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
}
#[test]
fn test_display_column_order() {
assert_eq!(
ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
"TYPE_DEFINED_ORDER(SIGNED)"
);
assert_eq!(
ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
"TYPE_DEFINED_ORDER(UNSIGNED)"
);
assert_eq!(
ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
"TYPE_DEFINED_ORDER(UNDEFINED)"
);
assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
}
#[test]
fn test_column_order_get_logical_type_sort_order() {
fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
for tpe in types {
assert_eq!(
ColumnOrder::get_sort_order(Some(tpe), ConvertedType::NONE, Type::BYTE_ARRAY),
expected_order
);
}
}
let unsigned = vec![
LogicalType::String,
LogicalType::Json,
LogicalType::Bson,
LogicalType::Enum,
LogicalType::Uuid,
LogicalType::Integer {
bit_width: 8,
is_signed: false,
},
LogicalType::Integer {
bit_width: 16,
is_signed: false,
},
LogicalType::Integer {
bit_width: 32,
is_signed: false,
},
LogicalType::Integer {
bit_width: 64,
is_signed: false,
},
];
check_sort_order(unsigned, SortOrder::UNSIGNED);
let signed = vec![
LogicalType::Integer {
bit_width: 8,
is_signed: true,
},
LogicalType::Integer {
bit_width: 8,
is_signed: true,
},
LogicalType::Integer {
bit_width: 8,
is_signed: true,
},
LogicalType::Integer {
bit_width: 8,
is_signed: true,
},
LogicalType::Decimal {
scale: 20,
precision: 4,
},
LogicalType::Date,
LogicalType::Time {
is_adjusted_to_u_t_c: false,
unit: TimeUnit::MILLIS(Default::default()),
},
LogicalType::Time {
is_adjusted_to_u_t_c: false,
unit: TimeUnit::MICROS(Default::default()),
},
LogicalType::Time {
is_adjusted_to_u_t_c: true,
unit: TimeUnit::NANOS(Default::default()),
},
LogicalType::Timestamp {
is_adjusted_to_u_t_c: false,
unit: TimeUnit::MILLIS(Default::default()),
},
LogicalType::Timestamp {
is_adjusted_to_u_t_c: false,
unit: TimeUnit::MICROS(Default::default()),
},
LogicalType::Timestamp {
is_adjusted_to_u_t_c: true,
unit: TimeUnit::NANOS(Default::default()),
},
LogicalType::Float16,
];
check_sort_order(signed, SortOrder::SIGNED);
let undefined = vec![LogicalType::List, LogicalType::Map];
check_sort_order(undefined, SortOrder::UNDEFINED);
}
#[test]
fn test_column_order_get_converted_type_sort_order() {
fn check_sort_order(types: Vec<ConvertedType>, expected_order: SortOrder) {
for tpe in types {
assert_eq!(
ColumnOrder::get_sort_order(None, tpe, Type::BYTE_ARRAY),
expected_order
);
}
}
let unsigned = vec![
ConvertedType::UTF8,
ConvertedType::JSON,
ConvertedType::BSON,
ConvertedType::ENUM,
ConvertedType::UINT_8,
ConvertedType::UINT_16,
ConvertedType::UINT_32,
ConvertedType::UINT_64,
];
check_sort_order(unsigned, SortOrder::UNSIGNED);
let signed = vec![
ConvertedType::INT_8,
ConvertedType::INT_16,
ConvertedType::INT_32,
ConvertedType::INT_64,
ConvertedType::DECIMAL,
ConvertedType::DATE,
ConvertedType::TIME_MILLIS,
ConvertedType::TIME_MICROS,
ConvertedType::TIMESTAMP_MILLIS,
ConvertedType::TIMESTAMP_MICROS,
];
check_sort_order(signed, SortOrder::SIGNED);
let undefined = vec![
ConvertedType::LIST,
ConvertedType::MAP,
ConvertedType::MAP_KEY_VALUE,
ConvertedType::INTERVAL,
];
check_sort_order(undefined, SortOrder::UNDEFINED);
check_sort_order(vec![ConvertedType::NONE], SortOrder::UNSIGNED);
}
#[test]
fn test_column_order_get_default_sort_order() {
assert_eq!(
ColumnOrder::get_default_sort_order(Type::BOOLEAN),
SortOrder::UNSIGNED
);
assert_eq!(
ColumnOrder::get_default_sort_order(Type::INT32),
SortOrder::SIGNED
);
assert_eq!(
ColumnOrder::get_default_sort_order(Type::INT64),
SortOrder::SIGNED
);
assert_eq!(
ColumnOrder::get_default_sort_order(Type::INT96),
SortOrder::UNDEFINED
);
assert_eq!(
ColumnOrder::get_default_sort_order(Type::FLOAT),
SortOrder::SIGNED
);
assert_eq!(
ColumnOrder::get_default_sort_order(Type::DOUBLE),
SortOrder::SIGNED
);
assert_eq!(
ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
SortOrder::UNSIGNED
);
assert_eq!(
ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
SortOrder::UNSIGNED
);
}
#[test]
fn test_column_order_sort_order() {
assert_eq!(
ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
SortOrder::SIGNED
);
assert_eq!(
ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
SortOrder::UNSIGNED
);
assert_eq!(
ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
SortOrder::UNDEFINED
);
assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
}
#[test]
fn test_parse_encoding() {
let mut encoding: Encoding = "PLAIN".parse().unwrap();
assert_eq!(encoding, Encoding::PLAIN);
encoding = "PLAIN_DICTIONARY".parse().unwrap();
assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
encoding = "RLE".parse().unwrap();
assert_eq!(encoding, Encoding::RLE);
encoding = "BIT_PACKED".parse().unwrap();
assert_eq!(encoding, Encoding::BIT_PACKED);
encoding = "DELTA_BINARY_PACKED".parse().unwrap();
assert_eq!(encoding, Encoding::DELTA_BINARY_PACKED);
encoding = "DELTA_LENGTH_BYTE_ARRAY".parse().unwrap();
assert_eq!(encoding, Encoding::DELTA_LENGTH_BYTE_ARRAY);
encoding = "DELTA_BYTE_ARRAY".parse().unwrap();
assert_eq!(encoding, Encoding::DELTA_BYTE_ARRAY);
encoding = "RLE_DICTIONARY".parse().unwrap();
assert_eq!(encoding, Encoding::RLE_DICTIONARY);
encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
encoding = "byte_stream_split".parse().unwrap();
assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
match "plain_xxx".parse::<Encoding>() {
Ok(e) => {
panic!("Should not be able to parse {:?}", e);
}
Err(e) => {
assert_eq!(e.to_string(), "Parquet error: unknown encoding: plain_xxx");
}
}
}
#[test]
fn test_parse_compression() {
let mut compress: Compression = "snappy".parse().unwrap();
assert_eq!(compress, Compression::SNAPPY);
compress = "lzo".parse().unwrap();
assert_eq!(compress, Compression::LZO);
compress = "zstd(3)".parse().unwrap();
assert_eq!(compress, Compression::ZSTD(ZstdLevel::try_new(3).unwrap()));
compress = "LZ4_RAW".parse().unwrap();
assert_eq!(compress, Compression::LZ4_RAW);
compress = "uncompressed".parse().unwrap();
assert_eq!(compress, Compression::UNCOMPRESSED);
compress = "snappy".parse().unwrap();
assert_eq!(compress, Compression::SNAPPY);
compress = "gzip(9)".parse().unwrap();
assert_eq!(compress, Compression::GZIP(GzipLevel::try_new(9).unwrap()));
compress = "lzo".parse().unwrap();
assert_eq!(compress, Compression::LZO);
compress = "brotli(3)".parse().unwrap();
assert_eq!(
compress,
Compression::BROTLI(BrotliLevel::try_new(3).unwrap())
);
compress = "lz4".parse().unwrap();
assert_eq!(compress, Compression::LZ4);
let mut err = "plain_xxx".parse::<Encoding>().unwrap_err();
assert_eq!(
err.to_string(),
"Parquet error: unknown encoding: plain_xxx"
);
err = "gzip(-10)".parse::<Encoding>().unwrap_err();
assert_eq!(
err.to_string(),
"Parquet error: unknown encoding: gzip(-10)"
);
}
}