1use std::str::FromStr;
22use std::{fmt, str};
23
24pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel};
25use crate::format as parquet;
26
27use crate::errors::{ParquetError, Result};
28
29pub use crate::format::{
31 BsonType, DateType, DecimalType, EnumType, IntType, JsonType, ListType, MapType, NullType,
32 StringType, TimeType, TimeUnit, TimestampType, UUIDType,
33};
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
48#[allow(non_camel_case_types)]
49pub enum Type {
50 BOOLEAN,
52 INT32,
54 INT64,
56 INT96,
58 FLOAT,
60 DOUBLE,
62 BYTE_ARRAY,
64 FIXED_LEN_BYTE_ARRAY,
66}
67
68#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79#[allow(non_camel_case_types)]
80pub enum ConvertedType {
81 NONE,
83 UTF8,
85
86 MAP,
88
89 MAP_KEY_VALUE,
91
92 LIST,
95
96 ENUM,
98
99 DECIMAL,
111
112 DATE,
114
115 TIME_MILLIS,
118
119 TIME_MICROS,
122
123 TIMESTAMP_MILLIS,
126
127 TIMESTAMP_MICROS,
130
131 UINT_8,
133
134 UINT_16,
136
137 UINT_32,
139
140 UINT_64,
142
143 INT_8,
145
146 INT_16,
148
149 INT_32,
151
152 INT_64,
154
155 JSON,
157
158 BSON,
160
161 INTERVAL,
171}
172
173#[derive(Debug, Clone, PartialEq, Eq)]
182pub enum LogicalType {
183 String,
185 Map,
187 List,
189 Enum,
191 Decimal {
193 scale: i32,
195 precision: i32,
197 },
198 Date,
200 Time {
202 is_adjusted_to_u_t_c: bool,
204 unit: TimeUnit,
206 },
207 Timestamp {
209 is_adjusted_to_u_t_c: bool,
211 unit: TimeUnit,
213 },
214 Integer {
216 bit_width: i8,
218 is_signed: bool,
220 },
221 Unknown,
223 Json,
225 Bson,
227 Uuid,
229 Float16,
231}
232
233#[derive(Debug, Clone, Copy, PartialEq, Eq)]
238#[allow(non_camel_case_types)]
239pub enum Repetition {
240 REQUIRED,
242 OPTIONAL,
244 REPEATED,
246}
247
248#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
268#[allow(non_camel_case_types)]
269pub enum Encoding {
270 PLAIN,
279
280 PLAIN_DICTIONARY,
286
287 RLE,
291
292 #[deprecated(
305 note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
306 )]
307 BIT_PACKED,
308
309 DELTA_BINARY_PACKED,
313
314 DELTA_LENGTH_BYTE_ARRAY,
318
319 DELTA_BYTE_ARRAY,
324
325 RLE_DICTIONARY,
329
330 BYTE_STREAM_SPLIT,
339}
340
341impl FromStr for Encoding {
342 type Err = ParquetError;
343
344 fn from_str(s: &str) -> Result<Self, Self::Err> {
345 match s {
346 "PLAIN" | "plain" => Ok(Encoding::PLAIN),
347 "PLAIN_DICTIONARY" | "plain_dictionary" => Ok(Encoding::PLAIN_DICTIONARY),
348 "RLE" | "rle" => Ok(Encoding::RLE),
349 #[allow(deprecated)]
350 "BIT_PACKED" | "bit_packed" => Ok(Encoding::BIT_PACKED),
351 "DELTA_BINARY_PACKED" | "delta_binary_packed" => Ok(Encoding::DELTA_BINARY_PACKED),
352 "DELTA_LENGTH_BYTE_ARRAY" | "delta_length_byte_array" => {
353 Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY)
354 }
355 "DELTA_BYTE_ARRAY" | "delta_byte_array" => Ok(Encoding::DELTA_BYTE_ARRAY),
356 "RLE_DICTIONARY" | "rle_dictionary" => Ok(Encoding::RLE_DICTIONARY),
357 "BYTE_STREAM_SPLIT" | "byte_stream_split" => Ok(Encoding::BYTE_STREAM_SPLIT),
358 _ => Err(general_err!("unknown encoding: {}", s)),
359 }
360 }
361}
362
363#[derive(Debug, Clone, Copy, PartialEq, Eq)]
382#[allow(non_camel_case_types)]
383pub enum Compression {
384 UNCOMPRESSED,
386 SNAPPY,
388 GZIP(GzipLevel),
390 LZO,
392 BROTLI(BrotliLevel),
394 LZ4,
396 ZSTD(ZstdLevel),
398 LZ4_RAW,
400}
401
402impl Compression {
403 pub(crate) fn codec_to_string(self) -> String {
406 format!("{:?}", self).split('(').next().unwrap().to_owned()
407 }
408}
409
410fn split_compression_string(str_setting: &str) -> Result<(&str, Option<u32>), ParquetError> {
411 let split_setting = str_setting.split_once('(');
412
413 match split_setting {
414 Some((codec, level_str)) => {
415 let level = &level_str[..level_str.len() - 1]
416 .parse::<u32>()
417 .map_err(|_| {
418 ParquetError::General(format!("invalid compression level: {}", level_str))
419 })?;
420 Ok((codec, Some(*level)))
421 }
422 None => Ok((str_setting, None)),
423 }
424}
425
426fn check_level_is_none(level: &Option<u32>) -> Result<(), ParquetError> {
427 if level.is_some() {
428 return Err(ParquetError::General("level is not support".to_string()));
429 }
430
431 Ok(())
432}
433
434fn require_level(codec: &str, level: Option<u32>) -> Result<u32, ParquetError> {
435 level.ok_or(ParquetError::General(format!("{} require level", codec)))
436}
437
438impl FromStr for Compression {
439 type Err = ParquetError;
440
441 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
442 let (codec, level) = split_compression_string(s)?;
443
444 let c = match codec {
445 "UNCOMPRESSED" | "uncompressed" => {
446 check_level_is_none(&level)?;
447 Compression::UNCOMPRESSED
448 }
449 "SNAPPY" | "snappy" => {
450 check_level_is_none(&level)?;
451 Compression::SNAPPY
452 }
453 "GZIP" | "gzip" => {
454 let level = require_level(codec, level)?;
455 Compression::GZIP(GzipLevel::try_new(level)?)
456 }
457 "LZO" | "lzo" => {
458 check_level_is_none(&level)?;
459 Compression::LZO
460 }
461 "BROTLI" | "brotli" => {
462 let level = require_level(codec, level)?;
463 Compression::BROTLI(BrotliLevel::try_new(level)?)
464 }
465 "LZ4" | "lz4" => {
466 check_level_is_none(&level)?;
467 Compression::LZ4
468 }
469 "ZSTD" | "zstd" => {
470 let level = require_level(codec, level)?;
471 Compression::ZSTD(ZstdLevel::try_new(level as i32)?)
472 }
473 "LZ4_RAW" | "lz4_raw" => {
474 check_level_is_none(&level)?;
475 Compression::LZ4_RAW
476 }
477 _ => {
478 return Err(ParquetError::General(format!(
479 "unsupport compression {codec}"
480 )));
481 }
482 };
483
484 Ok(c)
485 }
486}
487
488#[derive(Debug, Clone, Copy, PartialEq, Eq)]
494#[allow(non_camel_case_types)]
495pub enum PageType {
496 DATA_PAGE,
498 INDEX_PAGE,
500 DICTIONARY_PAGE,
502 DATA_PAGE_V2,
504}
505
506#[derive(Debug, Clone, Copy, PartialEq, Eq)]
518#[allow(non_camel_case_types)]
519pub enum SortOrder {
520 SIGNED,
522 UNSIGNED,
524 UNDEFINED,
526}
527
528impl SortOrder {
529 pub fn is_signed(&self) -> bool {
531 matches!(self, Self::SIGNED)
532 }
533}
534
535#[derive(Debug, Clone, Copy, PartialEq, Eq)]
541#[allow(non_camel_case_types)]
542pub enum ColumnOrder {
543 TYPE_DEFINED_ORDER(SortOrder),
546 UNDEFINED,
549}
550
551impl ColumnOrder {
552 pub fn get_sort_order(
554 logical_type: Option<LogicalType>,
555 converted_type: ConvertedType,
556 physical_type: Type,
557 ) -> SortOrder {
558 match logical_type {
560 Some(logical) => match logical {
561 LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
562 SortOrder::UNSIGNED
563 }
564 LogicalType::Integer { is_signed, .. } => match is_signed {
565 true => SortOrder::SIGNED,
566 false => SortOrder::UNSIGNED,
567 },
568 LogicalType::Map | LogicalType::List => SortOrder::UNDEFINED,
569 LogicalType::Decimal { .. } => SortOrder::SIGNED,
570 LogicalType::Date => SortOrder::SIGNED,
571 LogicalType::Time { .. } => SortOrder::SIGNED,
572 LogicalType::Timestamp { .. } => SortOrder::SIGNED,
573 LogicalType::Unknown => SortOrder::UNDEFINED,
574 LogicalType::Uuid => SortOrder::UNSIGNED,
575 LogicalType::Float16 => SortOrder::SIGNED,
576 },
577 None => Self::get_converted_sort_order(converted_type, physical_type),
579 }
580 }
581
582 fn get_converted_sort_order(converted_type: ConvertedType, physical_type: Type) -> SortOrder {
583 match converted_type {
584 ConvertedType::UTF8
586 | ConvertedType::JSON
587 | ConvertedType::BSON
588 | ConvertedType::ENUM => SortOrder::UNSIGNED,
589
590 ConvertedType::INT_8
591 | ConvertedType::INT_16
592 | ConvertedType::INT_32
593 | ConvertedType::INT_64 => SortOrder::SIGNED,
594
595 ConvertedType::UINT_8
596 | ConvertedType::UINT_16
597 | ConvertedType::UINT_32
598 | ConvertedType::UINT_64 => SortOrder::UNSIGNED,
599
600 ConvertedType::DECIMAL => SortOrder::SIGNED,
602
603 ConvertedType::DATE => SortOrder::SIGNED,
604
605 ConvertedType::TIME_MILLIS
606 | ConvertedType::TIME_MICROS
607 | ConvertedType::TIMESTAMP_MILLIS
608 | ConvertedType::TIMESTAMP_MICROS => SortOrder::SIGNED,
609
610 ConvertedType::INTERVAL => SortOrder::UNDEFINED,
611
612 ConvertedType::LIST | ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
613 SortOrder::UNDEFINED
614 }
615
616 ConvertedType::NONE => Self::get_default_sort_order(physical_type),
618 }
619 }
620
621 fn get_default_sort_order(physical_type: Type) -> SortOrder {
623 match physical_type {
624 Type::BOOLEAN => SortOrder::UNSIGNED,
626 Type::INT32 | Type::INT64 => SortOrder::SIGNED,
627 Type::INT96 => SortOrder::UNDEFINED,
628 Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
635 Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
637 }
638 }
639
640 pub fn sort_order(&self) -> SortOrder {
642 match *self {
643 ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
644 ColumnOrder::UNDEFINED => SortOrder::SIGNED,
645 }
646 }
647}
648
649impl fmt::Display for Type {
650 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
651 write!(f, "{self:?}")
652 }
653}
654
655impl fmt::Display for ConvertedType {
656 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
657 write!(f, "{self:?}")
658 }
659}
660
661impl fmt::Display for Repetition {
662 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
663 write!(f, "{self:?}")
664 }
665}
666
667impl fmt::Display for Encoding {
668 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
669 write!(f, "{self:?}")
670 }
671}
672
673impl fmt::Display for Compression {
674 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
675 write!(f, "{self:?}")
676 }
677}
678
679impl fmt::Display for PageType {
680 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
681 write!(f, "{self:?}")
682 }
683}
684
685impl fmt::Display for SortOrder {
686 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
687 write!(f, "{self:?}")
688 }
689}
690
691impl fmt::Display for ColumnOrder {
692 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
693 write!(f, "{self:?}")
694 }
695}
696
697impl TryFrom<parquet::Type> for Type {
701 type Error = ParquetError;
702
703 fn try_from(value: parquet::Type) -> Result<Self> {
704 Ok(match value {
705 parquet::Type::BOOLEAN => Type::BOOLEAN,
706 parquet::Type::INT32 => Type::INT32,
707 parquet::Type::INT64 => Type::INT64,
708 parquet::Type::INT96 => Type::INT96,
709 parquet::Type::FLOAT => Type::FLOAT,
710 parquet::Type::DOUBLE => Type::DOUBLE,
711 parquet::Type::BYTE_ARRAY => Type::BYTE_ARRAY,
712 parquet::Type::FIXED_LEN_BYTE_ARRAY => Type::FIXED_LEN_BYTE_ARRAY,
713 _ => return Err(general_err!("unexpected parquet type: {}", value.0)),
714 })
715 }
716}
717
718impl From<Type> for parquet::Type {
719 fn from(value: Type) -> Self {
720 match value {
721 Type::BOOLEAN => parquet::Type::BOOLEAN,
722 Type::INT32 => parquet::Type::INT32,
723 Type::INT64 => parquet::Type::INT64,
724 Type::INT96 => parquet::Type::INT96,
725 Type::FLOAT => parquet::Type::FLOAT,
726 Type::DOUBLE => parquet::Type::DOUBLE,
727 Type::BYTE_ARRAY => parquet::Type::BYTE_ARRAY,
728 Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FIXED_LEN_BYTE_ARRAY,
729 }
730 }
731}
732
733impl TryFrom<Option<parquet::ConvertedType>> for ConvertedType {
737 type Error = ParquetError;
738
739 fn try_from(option: Option<parquet::ConvertedType>) -> Result<Self> {
740 Ok(match option {
741 None => ConvertedType::NONE,
742 Some(value) => match value {
743 parquet::ConvertedType::UTF8 => ConvertedType::UTF8,
744 parquet::ConvertedType::MAP => ConvertedType::MAP,
745 parquet::ConvertedType::MAP_KEY_VALUE => ConvertedType::MAP_KEY_VALUE,
746 parquet::ConvertedType::LIST => ConvertedType::LIST,
747 parquet::ConvertedType::ENUM => ConvertedType::ENUM,
748 parquet::ConvertedType::DECIMAL => ConvertedType::DECIMAL,
749 parquet::ConvertedType::DATE => ConvertedType::DATE,
750 parquet::ConvertedType::TIME_MILLIS => ConvertedType::TIME_MILLIS,
751 parquet::ConvertedType::TIME_MICROS => ConvertedType::TIME_MICROS,
752 parquet::ConvertedType::TIMESTAMP_MILLIS => ConvertedType::TIMESTAMP_MILLIS,
753 parquet::ConvertedType::TIMESTAMP_MICROS => ConvertedType::TIMESTAMP_MICROS,
754 parquet::ConvertedType::UINT_8 => ConvertedType::UINT_8,
755 parquet::ConvertedType::UINT_16 => ConvertedType::UINT_16,
756 parquet::ConvertedType::UINT_32 => ConvertedType::UINT_32,
757 parquet::ConvertedType::UINT_64 => ConvertedType::UINT_64,
758 parquet::ConvertedType::INT_8 => ConvertedType::INT_8,
759 parquet::ConvertedType::INT_16 => ConvertedType::INT_16,
760 parquet::ConvertedType::INT_32 => ConvertedType::INT_32,
761 parquet::ConvertedType::INT_64 => ConvertedType::INT_64,
762 parquet::ConvertedType::JSON => ConvertedType::JSON,
763 parquet::ConvertedType::BSON => ConvertedType::BSON,
764 parquet::ConvertedType::INTERVAL => ConvertedType::INTERVAL,
765 _ => {
766 return Err(general_err!(
767 "unexpected parquet converted type: {}",
768 value.0
769 ))
770 }
771 },
772 })
773 }
774}
775
776impl From<ConvertedType> for Option<parquet::ConvertedType> {
777 fn from(value: ConvertedType) -> Self {
778 match value {
779 ConvertedType::NONE => None,
780 ConvertedType::UTF8 => Some(parquet::ConvertedType::UTF8),
781 ConvertedType::MAP => Some(parquet::ConvertedType::MAP),
782 ConvertedType::MAP_KEY_VALUE => Some(parquet::ConvertedType::MAP_KEY_VALUE),
783 ConvertedType::LIST => Some(parquet::ConvertedType::LIST),
784 ConvertedType::ENUM => Some(parquet::ConvertedType::ENUM),
785 ConvertedType::DECIMAL => Some(parquet::ConvertedType::DECIMAL),
786 ConvertedType::DATE => Some(parquet::ConvertedType::DATE),
787 ConvertedType::TIME_MILLIS => Some(parquet::ConvertedType::TIME_MILLIS),
788 ConvertedType::TIME_MICROS => Some(parquet::ConvertedType::TIME_MICROS),
789 ConvertedType::TIMESTAMP_MILLIS => Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
790 ConvertedType::TIMESTAMP_MICROS => Some(parquet::ConvertedType::TIMESTAMP_MICROS),
791 ConvertedType::UINT_8 => Some(parquet::ConvertedType::UINT_8),
792 ConvertedType::UINT_16 => Some(parquet::ConvertedType::UINT_16),
793 ConvertedType::UINT_32 => Some(parquet::ConvertedType::UINT_32),
794 ConvertedType::UINT_64 => Some(parquet::ConvertedType::UINT_64),
795 ConvertedType::INT_8 => Some(parquet::ConvertedType::INT_8),
796 ConvertedType::INT_16 => Some(parquet::ConvertedType::INT_16),
797 ConvertedType::INT_32 => Some(parquet::ConvertedType::INT_32),
798 ConvertedType::INT_64 => Some(parquet::ConvertedType::INT_64),
799 ConvertedType::JSON => Some(parquet::ConvertedType::JSON),
800 ConvertedType::BSON => Some(parquet::ConvertedType::BSON),
801 ConvertedType::INTERVAL => Some(parquet::ConvertedType::INTERVAL),
802 }
803 }
804}
805
806impl From<parquet::LogicalType> for LogicalType {
810 fn from(value: parquet::LogicalType) -> Self {
811 match value {
812 parquet::LogicalType::STRING(_) => LogicalType::String,
813 parquet::LogicalType::MAP(_) => LogicalType::Map,
814 parquet::LogicalType::LIST(_) => LogicalType::List,
815 parquet::LogicalType::ENUM(_) => LogicalType::Enum,
816 parquet::LogicalType::DECIMAL(t) => LogicalType::Decimal {
817 scale: t.scale,
818 precision: t.precision,
819 },
820 parquet::LogicalType::DATE(_) => LogicalType::Date,
821 parquet::LogicalType::TIME(t) => LogicalType::Time {
822 is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c,
823 unit: t.unit,
824 },
825 parquet::LogicalType::TIMESTAMP(t) => LogicalType::Timestamp {
826 is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c,
827 unit: t.unit,
828 },
829 parquet::LogicalType::INTEGER(t) => LogicalType::Integer {
830 bit_width: t.bit_width,
831 is_signed: t.is_signed,
832 },
833 parquet::LogicalType::UNKNOWN(_) => LogicalType::Unknown,
834 parquet::LogicalType::JSON(_) => LogicalType::Json,
835 parquet::LogicalType::BSON(_) => LogicalType::Bson,
836 parquet::LogicalType::UUID(_) => LogicalType::Uuid,
837 parquet::LogicalType::FLOAT16(_) => LogicalType::Float16,
838 }
839 }
840}
841
842impl From<LogicalType> for parquet::LogicalType {
843 fn from(value: LogicalType) -> Self {
844 match value {
845 LogicalType::String => parquet::LogicalType::STRING(Default::default()),
846 LogicalType::Map => parquet::LogicalType::MAP(Default::default()),
847 LogicalType::List => parquet::LogicalType::LIST(Default::default()),
848 LogicalType::Enum => parquet::LogicalType::ENUM(Default::default()),
849 LogicalType::Decimal { scale, precision } => {
850 parquet::LogicalType::DECIMAL(DecimalType { scale, precision })
851 }
852 LogicalType::Date => parquet::LogicalType::DATE(Default::default()),
853 LogicalType::Time {
854 is_adjusted_to_u_t_c,
855 unit,
856 } => parquet::LogicalType::TIME(TimeType {
857 is_adjusted_to_u_t_c,
858 unit,
859 }),
860 LogicalType::Timestamp {
861 is_adjusted_to_u_t_c,
862 unit,
863 } => parquet::LogicalType::TIMESTAMP(TimestampType {
864 is_adjusted_to_u_t_c,
865 unit,
866 }),
867 LogicalType::Integer {
868 bit_width,
869 is_signed,
870 } => parquet::LogicalType::INTEGER(IntType {
871 bit_width,
872 is_signed,
873 }),
874 LogicalType::Unknown => parquet::LogicalType::UNKNOWN(Default::default()),
875 LogicalType::Json => parquet::LogicalType::JSON(Default::default()),
876 LogicalType::Bson => parquet::LogicalType::BSON(Default::default()),
877 LogicalType::Uuid => parquet::LogicalType::UUID(Default::default()),
878 LogicalType::Float16 => parquet::LogicalType::FLOAT16(Default::default()),
879 }
880 }
881}
882
883impl From<Option<LogicalType>> for ConvertedType {
893 fn from(value: Option<LogicalType>) -> Self {
894 match value {
895 Some(value) => match value {
896 LogicalType::String => ConvertedType::UTF8,
897 LogicalType::Map => ConvertedType::MAP,
898 LogicalType::List => ConvertedType::LIST,
899 LogicalType::Enum => ConvertedType::ENUM,
900 LogicalType::Decimal { .. } => ConvertedType::DECIMAL,
901 LogicalType::Date => ConvertedType::DATE,
902 LogicalType::Time { unit, .. } => match unit {
903 TimeUnit::MILLIS(_) => ConvertedType::TIME_MILLIS,
904 TimeUnit::MICROS(_) => ConvertedType::TIME_MICROS,
905 TimeUnit::NANOS(_) => ConvertedType::NONE,
906 },
907 LogicalType::Timestamp { unit, .. } => match unit {
908 TimeUnit::MILLIS(_) => ConvertedType::TIMESTAMP_MILLIS,
909 TimeUnit::MICROS(_) => ConvertedType::TIMESTAMP_MICROS,
910 TimeUnit::NANOS(_) => ConvertedType::NONE,
911 },
912 LogicalType::Integer {
913 bit_width,
914 is_signed,
915 } => match (bit_width, is_signed) {
916 (8, true) => ConvertedType::INT_8,
917 (16, true) => ConvertedType::INT_16,
918 (32, true) => ConvertedType::INT_32,
919 (64, true) => ConvertedType::INT_64,
920 (8, false) => ConvertedType::UINT_8,
921 (16, false) => ConvertedType::UINT_16,
922 (32, false) => ConvertedType::UINT_32,
923 (64, false) => ConvertedType::UINT_64,
924 t => panic!("Integer type {t:?} is not supported"),
925 },
926 LogicalType::Json => ConvertedType::JSON,
927 LogicalType::Bson => ConvertedType::BSON,
928 LogicalType::Uuid | LogicalType::Float16 | LogicalType::Unknown => {
929 ConvertedType::NONE
930 }
931 },
932 None => ConvertedType::NONE,
933 }
934 }
935}
936
937impl TryFrom<parquet::FieldRepetitionType> for Repetition {
941 type Error = ParquetError;
942
943 fn try_from(value: parquet::FieldRepetitionType) -> Result<Self> {
944 Ok(match value {
945 parquet::FieldRepetitionType::REQUIRED => Repetition::REQUIRED,
946 parquet::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL,
947 parquet::FieldRepetitionType::REPEATED => Repetition::REPEATED,
948 _ => {
949 return Err(general_err!(
950 "unexpected parquet repetition type: {}",
951 value.0
952 ))
953 }
954 })
955 }
956}
957
958impl From<Repetition> for parquet::FieldRepetitionType {
959 fn from(value: Repetition) -> Self {
960 match value {
961 Repetition::REQUIRED => parquet::FieldRepetitionType::REQUIRED,
962 Repetition::OPTIONAL => parquet::FieldRepetitionType::OPTIONAL,
963 Repetition::REPEATED => parquet::FieldRepetitionType::REPEATED,
964 }
965 }
966}
967
968impl TryFrom<parquet::Encoding> for Encoding {
972 type Error = ParquetError;
973
974 fn try_from(value: parquet::Encoding) -> Result<Self> {
975 Ok(match value {
976 parquet::Encoding::PLAIN => Encoding::PLAIN,
977 parquet::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY,
978 parquet::Encoding::RLE => Encoding::RLE,
979 #[allow(deprecated)]
980 parquet::Encoding::BIT_PACKED => Encoding::BIT_PACKED,
981 parquet::Encoding::DELTA_BINARY_PACKED => Encoding::DELTA_BINARY_PACKED,
982 parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY => Encoding::DELTA_LENGTH_BYTE_ARRAY,
983 parquet::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY,
984 parquet::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY,
985 parquet::Encoding::BYTE_STREAM_SPLIT => Encoding::BYTE_STREAM_SPLIT,
986 _ => return Err(general_err!("unexpected parquet encoding: {}", value.0)),
987 })
988 }
989}
990
991impl From<Encoding> for parquet::Encoding {
992 fn from(value: Encoding) -> Self {
993 match value {
994 Encoding::PLAIN => parquet::Encoding::PLAIN,
995 Encoding::PLAIN_DICTIONARY => parquet::Encoding::PLAIN_DICTIONARY,
996 Encoding::RLE => parquet::Encoding::RLE,
997 #[allow(deprecated)]
998 Encoding::BIT_PACKED => parquet::Encoding::BIT_PACKED,
999 Encoding::DELTA_BINARY_PACKED => parquet::Encoding::DELTA_BINARY_PACKED,
1000 Encoding::DELTA_LENGTH_BYTE_ARRAY => parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
1001 Encoding::DELTA_BYTE_ARRAY => parquet::Encoding::DELTA_BYTE_ARRAY,
1002 Encoding::RLE_DICTIONARY => parquet::Encoding::RLE_DICTIONARY,
1003 Encoding::BYTE_STREAM_SPLIT => parquet::Encoding::BYTE_STREAM_SPLIT,
1004 }
1005 }
1006}
1007
1008impl TryFrom<parquet::CompressionCodec> for Compression {
1012 type Error = ParquetError;
1013
1014 fn try_from(value: parquet::CompressionCodec) -> Result<Self> {
1015 Ok(match value {
1016 parquet::CompressionCodec::UNCOMPRESSED => Compression::UNCOMPRESSED,
1017 parquet::CompressionCodec::SNAPPY => Compression::SNAPPY,
1018 parquet::CompressionCodec::GZIP => Compression::GZIP(Default::default()),
1019 parquet::CompressionCodec::LZO => Compression::LZO,
1020 parquet::CompressionCodec::BROTLI => Compression::BROTLI(Default::default()),
1021 parquet::CompressionCodec::LZ4 => Compression::LZ4,
1022 parquet::CompressionCodec::ZSTD => Compression::ZSTD(Default::default()),
1023 parquet::CompressionCodec::LZ4_RAW => Compression::LZ4_RAW,
1024 _ => {
1025 return Err(general_err!(
1026 "unexpected parquet compression codec: {}",
1027 value.0
1028 ))
1029 }
1030 })
1031 }
1032}
1033
1034impl From<Compression> for parquet::CompressionCodec {
1035 fn from(value: Compression) -> Self {
1036 match value {
1037 Compression::UNCOMPRESSED => parquet::CompressionCodec::UNCOMPRESSED,
1038 Compression::SNAPPY => parquet::CompressionCodec::SNAPPY,
1039 Compression::GZIP(_) => parquet::CompressionCodec::GZIP,
1040 Compression::LZO => parquet::CompressionCodec::LZO,
1041 Compression::BROTLI(_) => parquet::CompressionCodec::BROTLI,
1042 Compression::LZ4 => parquet::CompressionCodec::LZ4,
1043 Compression::ZSTD(_) => parquet::CompressionCodec::ZSTD,
1044 Compression::LZ4_RAW => parquet::CompressionCodec::LZ4_RAW,
1045 }
1046 }
1047}
1048
1049impl TryFrom<parquet::PageType> for PageType {
1053 type Error = ParquetError;
1054
1055 fn try_from(value: parquet::PageType) -> Result<Self> {
1056 Ok(match value {
1057 parquet::PageType::DATA_PAGE => PageType::DATA_PAGE,
1058 parquet::PageType::INDEX_PAGE => PageType::INDEX_PAGE,
1059 parquet::PageType::DICTIONARY_PAGE => PageType::DICTIONARY_PAGE,
1060 parquet::PageType::DATA_PAGE_V2 => PageType::DATA_PAGE_V2,
1061 _ => return Err(general_err!("unexpected parquet page type: {}", value.0)),
1062 })
1063 }
1064}
1065
1066impl From<PageType> for parquet::PageType {
1067 fn from(value: PageType) -> Self {
1068 match value {
1069 PageType::DATA_PAGE => parquet::PageType::DATA_PAGE,
1070 PageType::INDEX_PAGE => parquet::PageType::INDEX_PAGE,
1071 PageType::DICTIONARY_PAGE => parquet::PageType::DICTIONARY_PAGE,
1072 PageType::DATA_PAGE_V2 => parquet::PageType::DATA_PAGE_V2,
1073 }
1074 }
1075}
1076
1077impl str::FromStr for Repetition {
1081 type Err = ParquetError;
1082
1083 fn from_str(s: &str) -> Result<Self> {
1084 match s {
1085 "REQUIRED" => Ok(Repetition::REQUIRED),
1086 "OPTIONAL" => Ok(Repetition::OPTIONAL),
1087 "REPEATED" => Ok(Repetition::REPEATED),
1088 other => Err(general_err!("Invalid parquet repetition {}", other)),
1089 }
1090 }
1091}
1092
1093impl str::FromStr for Type {
1094 type Err = ParquetError;
1095
1096 fn from_str(s: &str) -> Result<Self> {
1097 match s {
1098 "BOOLEAN" => Ok(Type::BOOLEAN),
1099 "INT32" => Ok(Type::INT32),
1100 "INT64" => Ok(Type::INT64),
1101 "INT96" => Ok(Type::INT96),
1102 "FLOAT" => Ok(Type::FLOAT),
1103 "DOUBLE" => Ok(Type::DOUBLE),
1104 "BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
1105 "FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
1106 other => Err(general_err!("Invalid parquet type {}", other)),
1107 }
1108 }
1109}
1110
1111impl str::FromStr for ConvertedType {
1112 type Err = ParquetError;
1113
1114 fn from_str(s: &str) -> Result<Self> {
1115 match s {
1116 "NONE" => Ok(ConvertedType::NONE),
1117 "UTF8" => Ok(ConvertedType::UTF8),
1118 "MAP" => Ok(ConvertedType::MAP),
1119 "MAP_KEY_VALUE" => Ok(ConvertedType::MAP_KEY_VALUE),
1120 "LIST" => Ok(ConvertedType::LIST),
1121 "ENUM" => Ok(ConvertedType::ENUM),
1122 "DECIMAL" => Ok(ConvertedType::DECIMAL),
1123 "DATE" => Ok(ConvertedType::DATE),
1124 "TIME_MILLIS" => Ok(ConvertedType::TIME_MILLIS),
1125 "TIME_MICROS" => Ok(ConvertedType::TIME_MICROS),
1126 "TIMESTAMP_MILLIS" => Ok(ConvertedType::TIMESTAMP_MILLIS),
1127 "TIMESTAMP_MICROS" => Ok(ConvertedType::TIMESTAMP_MICROS),
1128 "UINT_8" => Ok(ConvertedType::UINT_8),
1129 "UINT_16" => Ok(ConvertedType::UINT_16),
1130 "UINT_32" => Ok(ConvertedType::UINT_32),
1131 "UINT_64" => Ok(ConvertedType::UINT_64),
1132 "INT_8" => Ok(ConvertedType::INT_8),
1133 "INT_16" => Ok(ConvertedType::INT_16),
1134 "INT_32" => Ok(ConvertedType::INT_32),
1135 "INT_64" => Ok(ConvertedType::INT_64),
1136 "JSON" => Ok(ConvertedType::JSON),
1137 "BSON" => Ok(ConvertedType::BSON),
1138 "INTERVAL" => Ok(ConvertedType::INTERVAL),
1139 other => Err(general_err!("Invalid parquet converted type {}", other)),
1140 }
1141 }
1142}
1143
1144impl str::FromStr for LogicalType {
1145 type Err = ParquetError;
1146
1147 fn from_str(s: &str) -> Result<Self> {
1148 match s {
1149 "INTEGER" => Ok(LogicalType::Integer {
1151 bit_width: 8,
1152 is_signed: false,
1153 }),
1154 "MAP" => Ok(LogicalType::Map),
1155 "LIST" => Ok(LogicalType::List),
1156 "ENUM" => Ok(LogicalType::Enum),
1157 "DECIMAL" => Ok(LogicalType::Decimal {
1158 precision: -1,
1159 scale: -1,
1160 }),
1161 "DATE" => Ok(LogicalType::Date),
1162 "TIME" => Ok(LogicalType::Time {
1163 is_adjusted_to_u_t_c: false,
1164 unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
1165 }),
1166 "TIMESTAMP" => Ok(LogicalType::Timestamp {
1167 is_adjusted_to_u_t_c: false,
1168 unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
1169 }),
1170 "STRING" => Ok(LogicalType::String),
1171 "JSON" => Ok(LogicalType::Json),
1172 "BSON" => Ok(LogicalType::Bson),
1173 "UUID" => Ok(LogicalType::Uuid),
1174 "UNKNOWN" => Ok(LogicalType::Unknown),
1175 "INTERVAL" => Err(general_err!(
1176 "Interval parquet logical type not yet supported"
1177 )),
1178 "FLOAT16" => Ok(LogicalType::Float16),
1179 other => Err(general_err!("Invalid parquet logical type {}", other)),
1180 }
1181 }
1182}
1183
1184#[cfg(test)]
1185#[allow(deprecated)] mod tests {
1187 use super::*;
1188
1189 #[test]
1190 fn test_display_type() {
1191 assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
1192 assert_eq!(Type::INT32.to_string(), "INT32");
1193 assert_eq!(Type::INT64.to_string(), "INT64");
1194 assert_eq!(Type::INT96.to_string(), "INT96");
1195 assert_eq!(Type::FLOAT.to_string(), "FLOAT");
1196 assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
1197 assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
1198 assert_eq!(
1199 Type::FIXED_LEN_BYTE_ARRAY.to_string(),
1200 "FIXED_LEN_BYTE_ARRAY"
1201 );
1202 }
1203
1204 #[test]
1205 fn test_from_type() {
1206 assert_eq!(
1207 Type::try_from(parquet::Type::BOOLEAN).unwrap(),
1208 Type::BOOLEAN
1209 );
1210 assert_eq!(Type::try_from(parquet::Type::INT32).unwrap(), Type::INT32);
1211 assert_eq!(Type::try_from(parquet::Type::INT64).unwrap(), Type::INT64);
1212 assert_eq!(Type::try_from(parquet::Type::INT96).unwrap(), Type::INT96);
1213 assert_eq!(Type::try_from(parquet::Type::FLOAT).unwrap(), Type::FLOAT);
1214 assert_eq!(Type::try_from(parquet::Type::DOUBLE).unwrap(), Type::DOUBLE);
1215 assert_eq!(
1216 Type::try_from(parquet::Type::BYTE_ARRAY).unwrap(),
1217 Type::BYTE_ARRAY
1218 );
1219 assert_eq!(
1220 Type::try_from(parquet::Type::FIXED_LEN_BYTE_ARRAY).unwrap(),
1221 Type::FIXED_LEN_BYTE_ARRAY
1222 );
1223 }
1224
1225 #[test]
1226 fn test_into_type() {
1227 assert_eq!(parquet::Type::BOOLEAN, Type::BOOLEAN.into());
1228 assert_eq!(parquet::Type::INT32, Type::INT32.into());
1229 assert_eq!(parquet::Type::INT64, Type::INT64.into());
1230 assert_eq!(parquet::Type::INT96, Type::INT96.into());
1231 assert_eq!(parquet::Type::FLOAT, Type::FLOAT.into());
1232 assert_eq!(parquet::Type::DOUBLE, Type::DOUBLE.into());
1233 assert_eq!(parquet::Type::BYTE_ARRAY, Type::BYTE_ARRAY.into());
1234 assert_eq!(
1235 parquet::Type::FIXED_LEN_BYTE_ARRAY,
1236 Type::FIXED_LEN_BYTE_ARRAY.into()
1237 );
1238 }
1239
1240 #[test]
1241 fn test_from_string_into_type() {
1242 assert_eq!(
1243 Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
1244 Type::BOOLEAN
1245 );
1246 assert_eq!(
1247 Type::INT32.to_string().parse::<Type>().unwrap(),
1248 Type::INT32
1249 );
1250 assert_eq!(
1251 Type::INT64.to_string().parse::<Type>().unwrap(),
1252 Type::INT64
1253 );
1254 assert_eq!(
1255 Type::INT96.to_string().parse::<Type>().unwrap(),
1256 Type::INT96
1257 );
1258 assert_eq!(
1259 Type::FLOAT.to_string().parse::<Type>().unwrap(),
1260 Type::FLOAT
1261 );
1262 assert_eq!(
1263 Type::DOUBLE.to_string().parse::<Type>().unwrap(),
1264 Type::DOUBLE
1265 );
1266 assert_eq!(
1267 Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
1268 Type::BYTE_ARRAY
1269 );
1270 assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
1271 assert_eq!(
1272 Type::FIXED_LEN_BYTE_ARRAY
1273 .to_string()
1274 .parse::<Type>()
1275 .unwrap(),
1276 Type::FIXED_LEN_BYTE_ARRAY
1277 );
1278 }
1279
1280 #[test]
1281 fn test_display_converted_type() {
1282 assert_eq!(ConvertedType::NONE.to_string(), "NONE");
1283 assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
1284 assert_eq!(ConvertedType::MAP.to_string(), "MAP");
1285 assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
1286 assert_eq!(ConvertedType::LIST.to_string(), "LIST");
1287 assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
1288 assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
1289 assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1290 assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
1291 assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1292 assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
1293 assert_eq!(
1294 ConvertedType::TIMESTAMP_MILLIS.to_string(),
1295 "TIMESTAMP_MILLIS"
1296 );
1297 assert_eq!(
1298 ConvertedType::TIMESTAMP_MICROS.to_string(),
1299 "TIMESTAMP_MICROS"
1300 );
1301 assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
1302 assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
1303 assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
1304 assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
1305 assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
1306 assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
1307 assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
1308 assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
1309 assert_eq!(ConvertedType::JSON.to_string(), "JSON");
1310 assert_eq!(ConvertedType::BSON.to_string(), "BSON");
1311 assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
1312 assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL")
1313 }
1314
1315 #[test]
1316 fn test_from_converted_type() {
1317 let parquet_conv_none: Option<parquet::ConvertedType> = None;
1318 assert_eq!(
1319 ConvertedType::try_from(parquet_conv_none).unwrap(),
1320 ConvertedType::NONE
1321 );
1322 assert_eq!(
1323 ConvertedType::try_from(Some(parquet::ConvertedType::UTF8)).unwrap(),
1324 ConvertedType::UTF8
1325 );
1326 assert_eq!(
1327 ConvertedType::try_from(Some(parquet::ConvertedType::MAP)).unwrap(),
1328 ConvertedType::MAP
1329 );
1330 assert_eq!(
1331 ConvertedType::try_from(Some(parquet::ConvertedType::MAP_KEY_VALUE)).unwrap(),
1332 ConvertedType::MAP_KEY_VALUE
1333 );
1334 assert_eq!(
1335 ConvertedType::try_from(Some(parquet::ConvertedType::LIST)).unwrap(),
1336 ConvertedType::LIST
1337 );
1338 assert_eq!(
1339 ConvertedType::try_from(Some(parquet::ConvertedType::ENUM)).unwrap(),
1340 ConvertedType::ENUM
1341 );
1342 assert_eq!(
1343 ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(),
1344 ConvertedType::DECIMAL
1345 );
1346 assert_eq!(
1347 ConvertedType::try_from(Some(parquet::ConvertedType::DATE)).unwrap(),
1348 ConvertedType::DATE
1349 );
1350 assert_eq!(
1351 ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MILLIS)).unwrap(),
1352 ConvertedType::TIME_MILLIS
1353 );
1354 assert_eq!(
1355 ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MICROS)).unwrap(),
1356 ConvertedType::TIME_MICROS
1357 );
1358 assert_eq!(
1359 ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MILLIS)).unwrap(),
1360 ConvertedType::TIMESTAMP_MILLIS
1361 );
1362 assert_eq!(
1363 ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MICROS)).unwrap(),
1364 ConvertedType::TIMESTAMP_MICROS
1365 );
1366 assert_eq!(
1367 ConvertedType::try_from(Some(parquet::ConvertedType::UINT_8)).unwrap(),
1368 ConvertedType::UINT_8
1369 );
1370 assert_eq!(
1371 ConvertedType::try_from(Some(parquet::ConvertedType::UINT_16)).unwrap(),
1372 ConvertedType::UINT_16
1373 );
1374 assert_eq!(
1375 ConvertedType::try_from(Some(parquet::ConvertedType::UINT_32)).unwrap(),
1376 ConvertedType::UINT_32
1377 );
1378 assert_eq!(
1379 ConvertedType::try_from(Some(parquet::ConvertedType::UINT_64)).unwrap(),
1380 ConvertedType::UINT_64
1381 );
1382 assert_eq!(
1383 ConvertedType::try_from(Some(parquet::ConvertedType::INT_8)).unwrap(),
1384 ConvertedType::INT_8
1385 );
1386 assert_eq!(
1387 ConvertedType::try_from(Some(parquet::ConvertedType::INT_16)).unwrap(),
1388 ConvertedType::INT_16
1389 );
1390 assert_eq!(
1391 ConvertedType::try_from(Some(parquet::ConvertedType::INT_32)).unwrap(),
1392 ConvertedType::INT_32
1393 );
1394 assert_eq!(
1395 ConvertedType::try_from(Some(parquet::ConvertedType::INT_64)).unwrap(),
1396 ConvertedType::INT_64
1397 );
1398 assert_eq!(
1399 ConvertedType::try_from(Some(parquet::ConvertedType::JSON)).unwrap(),
1400 ConvertedType::JSON
1401 );
1402 assert_eq!(
1403 ConvertedType::try_from(Some(parquet::ConvertedType::BSON)).unwrap(),
1404 ConvertedType::BSON
1405 );
1406 assert_eq!(
1407 ConvertedType::try_from(Some(parquet::ConvertedType::INTERVAL)).unwrap(),
1408 ConvertedType::INTERVAL
1409 );
1410 assert_eq!(
1411 ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(),
1412 ConvertedType::DECIMAL
1413 )
1414 }
1415
1416 #[test]
1417 fn test_into_converted_type() {
1418 let converted_type: Option<parquet::ConvertedType> = None;
1419 assert_eq!(converted_type, ConvertedType::NONE.into());
1420 assert_eq!(
1421 Some(parquet::ConvertedType::UTF8),
1422 ConvertedType::UTF8.into()
1423 );
1424 assert_eq!(Some(parquet::ConvertedType::MAP), ConvertedType::MAP.into());
1425 assert_eq!(
1426 Some(parquet::ConvertedType::MAP_KEY_VALUE),
1427 ConvertedType::MAP_KEY_VALUE.into()
1428 );
1429 assert_eq!(
1430 Some(parquet::ConvertedType::LIST),
1431 ConvertedType::LIST.into()
1432 );
1433 assert_eq!(
1434 Some(parquet::ConvertedType::ENUM),
1435 ConvertedType::ENUM.into()
1436 );
1437 assert_eq!(
1438 Some(parquet::ConvertedType::DECIMAL),
1439 ConvertedType::DECIMAL.into()
1440 );
1441 assert_eq!(
1442 Some(parquet::ConvertedType::DATE),
1443 ConvertedType::DATE.into()
1444 );
1445 assert_eq!(
1446 Some(parquet::ConvertedType::TIME_MILLIS),
1447 ConvertedType::TIME_MILLIS.into()
1448 );
1449 assert_eq!(
1450 Some(parquet::ConvertedType::TIME_MICROS),
1451 ConvertedType::TIME_MICROS.into()
1452 );
1453 assert_eq!(
1454 Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
1455 ConvertedType::TIMESTAMP_MILLIS.into()
1456 );
1457 assert_eq!(
1458 Some(parquet::ConvertedType::TIMESTAMP_MICROS),
1459 ConvertedType::TIMESTAMP_MICROS.into()
1460 );
1461 assert_eq!(
1462 Some(parquet::ConvertedType::UINT_8),
1463 ConvertedType::UINT_8.into()
1464 );
1465 assert_eq!(
1466 Some(parquet::ConvertedType::UINT_16),
1467 ConvertedType::UINT_16.into()
1468 );
1469 assert_eq!(
1470 Some(parquet::ConvertedType::UINT_32),
1471 ConvertedType::UINT_32.into()
1472 );
1473 assert_eq!(
1474 Some(parquet::ConvertedType::UINT_64),
1475 ConvertedType::UINT_64.into()
1476 );
1477 assert_eq!(
1478 Some(parquet::ConvertedType::INT_8),
1479 ConvertedType::INT_8.into()
1480 );
1481 assert_eq!(
1482 Some(parquet::ConvertedType::INT_16),
1483 ConvertedType::INT_16.into()
1484 );
1485 assert_eq!(
1486 Some(parquet::ConvertedType::INT_32),
1487 ConvertedType::INT_32.into()
1488 );
1489 assert_eq!(
1490 Some(parquet::ConvertedType::INT_64),
1491 ConvertedType::INT_64.into()
1492 );
1493 assert_eq!(
1494 Some(parquet::ConvertedType::JSON),
1495 ConvertedType::JSON.into()
1496 );
1497 assert_eq!(
1498 Some(parquet::ConvertedType::BSON),
1499 ConvertedType::BSON.into()
1500 );
1501 assert_eq!(
1502 Some(parquet::ConvertedType::INTERVAL),
1503 ConvertedType::INTERVAL.into()
1504 );
1505 assert_eq!(
1506 Some(parquet::ConvertedType::DECIMAL),
1507 ConvertedType::DECIMAL.into()
1508 )
1509 }
1510
1511 #[test]
1512 fn test_from_string_into_converted_type() {
1513 assert_eq!(
1514 ConvertedType::NONE
1515 .to_string()
1516 .parse::<ConvertedType>()
1517 .unwrap(),
1518 ConvertedType::NONE
1519 );
1520 assert_eq!(
1521 ConvertedType::UTF8
1522 .to_string()
1523 .parse::<ConvertedType>()
1524 .unwrap(),
1525 ConvertedType::UTF8
1526 );
1527 assert_eq!(
1528 ConvertedType::MAP
1529 .to_string()
1530 .parse::<ConvertedType>()
1531 .unwrap(),
1532 ConvertedType::MAP
1533 );
1534 assert_eq!(
1535 ConvertedType::MAP_KEY_VALUE
1536 .to_string()
1537 .parse::<ConvertedType>()
1538 .unwrap(),
1539 ConvertedType::MAP_KEY_VALUE
1540 );
1541 assert_eq!(
1542 ConvertedType::LIST
1543 .to_string()
1544 .parse::<ConvertedType>()
1545 .unwrap(),
1546 ConvertedType::LIST
1547 );
1548 assert_eq!(
1549 ConvertedType::ENUM
1550 .to_string()
1551 .parse::<ConvertedType>()
1552 .unwrap(),
1553 ConvertedType::ENUM
1554 );
1555 assert_eq!(
1556 ConvertedType::DECIMAL
1557 .to_string()
1558 .parse::<ConvertedType>()
1559 .unwrap(),
1560 ConvertedType::DECIMAL
1561 );
1562 assert_eq!(
1563 ConvertedType::DATE
1564 .to_string()
1565 .parse::<ConvertedType>()
1566 .unwrap(),
1567 ConvertedType::DATE
1568 );
1569 assert_eq!(
1570 ConvertedType::TIME_MILLIS
1571 .to_string()
1572 .parse::<ConvertedType>()
1573 .unwrap(),
1574 ConvertedType::TIME_MILLIS
1575 );
1576 assert_eq!(
1577 ConvertedType::TIME_MICROS
1578 .to_string()
1579 .parse::<ConvertedType>()
1580 .unwrap(),
1581 ConvertedType::TIME_MICROS
1582 );
1583 assert_eq!(
1584 ConvertedType::TIMESTAMP_MILLIS
1585 .to_string()
1586 .parse::<ConvertedType>()
1587 .unwrap(),
1588 ConvertedType::TIMESTAMP_MILLIS
1589 );
1590 assert_eq!(
1591 ConvertedType::TIMESTAMP_MICROS
1592 .to_string()
1593 .parse::<ConvertedType>()
1594 .unwrap(),
1595 ConvertedType::TIMESTAMP_MICROS
1596 );
1597 assert_eq!(
1598 ConvertedType::UINT_8
1599 .to_string()
1600 .parse::<ConvertedType>()
1601 .unwrap(),
1602 ConvertedType::UINT_8
1603 );
1604 assert_eq!(
1605 ConvertedType::UINT_16
1606 .to_string()
1607 .parse::<ConvertedType>()
1608 .unwrap(),
1609 ConvertedType::UINT_16
1610 );
1611 assert_eq!(
1612 ConvertedType::UINT_32
1613 .to_string()
1614 .parse::<ConvertedType>()
1615 .unwrap(),
1616 ConvertedType::UINT_32
1617 );
1618 assert_eq!(
1619 ConvertedType::UINT_64
1620 .to_string()
1621 .parse::<ConvertedType>()
1622 .unwrap(),
1623 ConvertedType::UINT_64
1624 );
1625 assert_eq!(
1626 ConvertedType::INT_8
1627 .to_string()
1628 .parse::<ConvertedType>()
1629 .unwrap(),
1630 ConvertedType::INT_8
1631 );
1632 assert_eq!(
1633 ConvertedType::INT_16
1634 .to_string()
1635 .parse::<ConvertedType>()
1636 .unwrap(),
1637 ConvertedType::INT_16
1638 );
1639 assert_eq!(
1640 ConvertedType::INT_32
1641 .to_string()
1642 .parse::<ConvertedType>()
1643 .unwrap(),
1644 ConvertedType::INT_32
1645 );
1646 assert_eq!(
1647 ConvertedType::INT_64
1648 .to_string()
1649 .parse::<ConvertedType>()
1650 .unwrap(),
1651 ConvertedType::INT_64
1652 );
1653 assert_eq!(
1654 ConvertedType::JSON
1655 .to_string()
1656 .parse::<ConvertedType>()
1657 .unwrap(),
1658 ConvertedType::JSON
1659 );
1660 assert_eq!(
1661 ConvertedType::BSON
1662 .to_string()
1663 .parse::<ConvertedType>()
1664 .unwrap(),
1665 ConvertedType::BSON
1666 );
1667 assert_eq!(
1668 ConvertedType::INTERVAL
1669 .to_string()
1670 .parse::<ConvertedType>()
1671 .unwrap(),
1672 ConvertedType::INTERVAL
1673 );
1674 assert_eq!(
1675 ConvertedType::DECIMAL
1676 .to_string()
1677 .parse::<ConvertedType>()
1678 .unwrap(),
1679 ConvertedType::DECIMAL
1680 )
1681 }
1682
1683 #[test]
1684 fn test_logical_to_converted_type() {
1685 let logical_none: Option<LogicalType> = None;
1686 assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
1687 assert_eq!(
1688 ConvertedType::from(Some(LogicalType::Decimal {
1689 precision: 20,
1690 scale: 5
1691 })),
1692 ConvertedType::DECIMAL
1693 );
1694 assert_eq!(
1695 ConvertedType::from(Some(LogicalType::Bson)),
1696 ConvertedType::BSON
1697 );
1698 assert_eq!(
1699 ConvertedType::from(Some(LogicalType::Json)),
1700 ConvertedType::JSON
1701 );
1702 assert_eq!(
1703 ConvertedType::from(Some(LogicalType::String)),
1704 ConvertedType::UTF8
1705 );
1706 assert_eq!(
1707 ConvertedType::from(Some(LogicalType::Date)),
1708 ConvertedType::DATE
1709 );
1710 assert_eq!(
1711 ConvertedType::from(Some(LogicalType::Time {
1712 unit: TimeUnit::MILLIS(Default::default()),
1713 is_adjusted_to_u_t_c: true,
1714 })),
1715 ConvertedType::TIME_MILLIS
1716 );
1717 assert_eq!(
1718 ConvertedType::from(Some(LogicalType::Time {
1719 unit: TimeUnit::MICROS(Default::default()),
1720 is_adjusted_to_u_t_c: true,
1721 })),
1722 ConvertedType::TIME_MICROS
1723 );
1724 assert_eq!(
1725 ConvertedType::from(Some(LogicalType::Time {
1726 unit: TimeUnit::NANOS(Default::default()),
1727 is_adjusted_to_u_t_c: false,
1728 })),
1729 ConvertedType::NONE
1730 );
1731 assert_eq!(
1732 ConvertedType::from(Some(LogicalType::Timestamp {
1733 unit: TimeUnit::MILLIS(Default::default()),
1734 is_adjusted_to_u_t_c: true,
1735 })),
1736 ConvertedType::TIMESTAMP_MILLIS
1737 );
1738 assert_eq!(
1739 ConvertedType::from(Some(LogicalType::Timestamp {
1740 unit: TimeUnit::MICROS(Default::default()),
1741 is_adjusted_to_u_t_c: false,
1742 })),
1743 ConvertedType::TIMESTAMP_MICROS
1744 );
1745 assert_eq!(
1746 ConvertedType::from(Some(LogicalType::Timestamp {
1747 unit: TimeUnit::NANOS(Default::default()),
1748 is_adjusted_to_u_t_c: false,
1749 })),
1750 ConvertedType::NONE
1751 );
1752 assert_eq!(
1753 ConvertedType::from(Some(LogicalType::Integer {
1754 bit_width: 8,
1755 is_signed: false
1756 })),
1757 ConvertedType::UINT_8
1758 );
1759 assert_eq!(
1760 ConvertedType::from(Some(LogicalType::Integer {
1761 bit_width: 8,
1762 is_signed: true
1763 })),
1764 ConvertedType::INT_8
1765 );
1766 assert_eq!(
1767 ConvertedType::from(Some(LogicalType::Integer {
1768 bit_width: 16,
1769 is_signed: false
1770 })),
1771 ConvertedType::UINT_16
1772 );
1773 assert_eq!(
1774 ConvertedType::from(Some(LogicalType::Integer {
1775 bit_width: 16,
1776 is_signed: true
1777 })),
1778 ConvertedType::INT_16
1779 );
1780 assert_eq!(
1781 ConvertedType::from(Some(LogicalType::Integer {
1782 bit_width: 32,
1783 is_signed: false
1784 })),
1785 ConvertedType::UINT_32
1786 );
1787 assert_eq!(
1788 ConvertedType::from(Some(LogicalType::Integer {
1789 bit_width: 32,
1790 is_signed: true
1791 })),
1792 ConvertedType::INT_32
1793 );
1794 assert_eq!(
1795 ConvertedType::from(Some(LogicalType::Integer {
1796 bit_width: 64,
1797 is_signed: false
1798 })),
1799 ConvertedType::UINT_64
1800 );
1801 assert_eq!(
1802 ConvertedType::from(Some(LogicalType::Integer {
1803 bit_width: 64,
1804 is_signed: true
1805 })),
1806 ConvertedType::INT_64
1807 );
1808 assert_eq!(
1809 ConvertedType::from(Some(LogicalType::List)),
1810 ConvertedType::LIST
1811 );
1812 assert_eq!(
1813 ConvertedType::from(Some(LogicalType::Map)),
1814 ConvertedType::MAP
1815 );
1816 assert_eq!(
1817 ConvertedType::from(Some(LogicalType::Uuid)),
1818 ConvertedType::NONE
1819 );
1820 assert_eq!(
1821 ConvertedType::from(Some(LogicalType::Enum)),
1822 ConvertedType::ENUM
1823 );
1824 assert_eq!(
1825 ConvertedType::from(Some(LogicalType::Float16)),
1826 ConvertedType::NONE
1827 );
1828 assert_eq!(
1829 ConvertedType::from(Some(LogicalType::Unknown)),
1830 ConvertedType::NONE
1831 );
1832 }
1833
1834 #[test]
1835 fn test_display_repetition() {
1836 assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
1837 assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
1838 assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
1839 }
1840
1841 #[test]
1842 fn test_from_repetition() {
1843 assert_eq!(
1844 Repetition::try_from(parquet::FieldRepetitionType::REQUIRED).unwrap(),
1845 Repetition::REQUIRED
1846 );
1847 assert_eq!(
1848 Repetition::try_from(parquet::FieldRepetitionType::OPTIONAL).unwrap(),
1849 Repetition::OPTIONAL
1850 );
1851 assert_eq!(
1852 Repetition::try_from(parquet::FieldRepetitionType::REPEATED).unwrap(),
1853 Repetition::REPEATED
1854 );
1855 }
1856
1857 #[test]
1858 fn test_into_repetition() {
1859 assert_eq!(
1860 parquet::FieldRepetitionType::REQUIRED,
1861 Repetition::REQUIRED.into()
1862 );
1863 assert_eq!(
1864 parquet::FieldRepetitionType::OPTIONAL,
1865 Repetition::OPTIONAL.into()
1866 );
1867 assert_eq!(
1868 parquet::FieldRepetitionType::REPEATED,
1869 Repetition::REPEATED.into()
1870 );
1871 }
1872
1873 #[test]
1874 fn test_from_string_into_repetition() {
1875 assert_eq!(
1876 Repetition::REQUIRED
1877 .to_string()
1878 .parse::<Repetition>()
1879 .unwrap(),
1880 Repetition::REQUIRED
1881 );
1882 assert_eq!(
1883 Repetition::OPTIONAL
1884 .to_string()
1885 .parse::<Repetition>()
1886 .unwrap(),
1887 Repetition::OPTIONAL
1888 );
1889 assert_eq!(
1890 Repetition::REPEATED
1891 .to_string()
1892 .parse::<Repetition>()
1893 .unwrap(),
1894 Repetition::REPEATED
1895 );
1896 }
1897
1898 #[test]
1899 fn test_display_encoding() {
1900 assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
1901 assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
1902 assert_eq!(Encoding::RLE.to_string(), "RLE");
1903 assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
1904 assert_eq!(
1905 Encoding::DELTA_BINARY_PACKED.to_string(),
1906 "DELTA_BINARY_PACKED"
1907 );
1908 assert_eq!(
1909 Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
1910 "DELTA_LENGTH_BYTE_ARRAY"
1911 );
1912 assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
1913 assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
1914 }
1915
1916 #[test]
1917 fn test_from_encoding() {
1918 assert_eq!(
1919 Encoding::try_from(parquet::Encoding::PLAIN).unwrap(),
1920 Encoding::PLAIN
1921 );
1922 assert_eq!(
1923 Encoding::try_from(parquet::Encoding::PLAIN_DICTIONARY).unwrap(),
1924 Encoding::PLAIN_DICTIONARY
1925 );
1926 assert_eq!(
1927 Encoding::try_from(parquet::Encoding::RLE).unwrap(),
1928 Encoding::RLE
1929 );
1930 assert_eq!(
1931 Encoding::try_from(parquet::Encoding::BIT_PACKED).unwrap(),
1932 Encoding::BIT_PACKED
1933 );
1934 assert_eq!(
1935 Encoding::try_from(parquet::Encoding::DELTA_BINARY_PACKED).unwrap(),
1936 Encoding::DELTA_BINARY_PACKED
1937 );
1938 assert_eq!(
1939 Encoding::try_from(parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY).unwrap(),
1940 Encoding::DELTA_LENGTH_BYTE_ARRAY
1941 );
1942 assert_eq!(
1943 Encoding::try_from(parquet::Encoding::DELTA_BYTE_ARRAY).unwrap(),
1944 Encoding::DELTA_BYTE_ARRAY
1945 );
1946 }
1947
1948 #[test]
1949 fn test_into_encoding() {
1950 assert_eq!(parquet::Encoding::PLAIN, Encoding::PLAIN.into());
1951 assert_eq!(
1952 parquet::Encoding::PLAIN_DICTIONARY,
1953 Encoding::PLAIN_DICTIONARY.into()
1954 );
1955 assert_eq!(parquet::Encoding::RLE, Encoding::RLE.into());
1956 assert_eq!(parquet::Encoding::BIT_PACKED, Encoding::BIT_PACKED.into());
1957 assert_eq!(
1958 parquet::Encoding::DELTA_BINARY_PACKED,
1959 Encoding::DELTA_BINARY_PACKED.into()
1960 );
1961 assert_eq!(
1962 parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
1963 Encoding::DELTA_LENGTH_BYTE_ARRAY.into()
1964 );
1965 assert_eq!(
1966 parquet::Encoding::DELTA_BYTE_ARRAY,
1967 Encoding::DELTA_BYTE_ARRAY.into()
1968 );
1969 }
1970
1971 #[test]
1972 fn test_compression_codec_to_string() {
1973 assert_eq!(Compression::UNCOMPRESSED.codec_to_string(), "UNCOMPRESSED");
1974 assert_eq!(
1975 Compression::ZSTD(ZstdLevel::default()).codec_to_string(),
1976 "ZSTD"
1977 );
1978 }
1979
1980 #[test]
1981 fn test_display_compression() {
1982 assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
1983 assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
1984 assert_eq!(
1985 Compression::GZIP(Default::default()).to_string(),
1986 "GZIP(GzipLevel(6))"
1987 );
1988 assert_eq!(Compression::LZO.to_string(), "LZO");
1989 assert_eq!(
1990 Compression::BROTLI(Default::default()).to_string(),
1991 "BROTLI(BrotliLevel(1))"
1992 );
1993 assert_eq!(Compression::LZ4.to_string(), "LZ4");
1994 assert_eq!(
1995 Compression::ZSTD(Default::default()).to_string(),
1996 "ZSTD(ZstdLevel(1))"
1997 );
1998 }
1999
2000 #[test]
2001 fn test_from_compression() {
2002 assert_eq!(
2003 Compression::try_from(parquet::CompressionCodec::UNCOMPRESSED).unwrap(),
2004 Compression::UNCOMPRESSED
2005 );
2006 assert_eq!(
2007 Compression::try_from(parquet::CompressionCodec::SNAPPY).unwrap(),
2008 Compression::SNAPPY
2009 );
2010 assert_eq!(
2011 Compression::try_from(parquet::CompressionCodec::GZIP).unwrap(),
2012 Compression::GZIP(Default::default())
2013 );
2014 assert_eq!(
2015 Compression::try_from(parquet::CompressionCodec::LZO).unwrap(),
2016 Compression::LZO
2017 );
2018 assert_eq!(
2019 Compression::try_from(parquet::CompressionCodec::BROTLI).unwrap(),
2020 Compression::BROTLI(Default::default())
2021 );
2022 assert_eq!(
2023 Compression::try_from(parquet::CompressionCodec::LZ4).unwrap(),
2024 Compression::LZ4
2025 );
2026 assert_eq!(
2027 Compression::try_from(parquet::CompressionCodec::ZSTD).unwrap(),
2028 Compression::ZSTD(Default::default())
2029 );
2030 }
2031
2032 #[test]
2033 fn test_into_compression() {
2034 assert_eq!(
2035 parquet::CompressionCodec::UNCOMPRESSED,
2036 Compression::UNCOMPRESSED.into()
2037 );
2038 assert_eq!(
2039 parquet::CompressionCodec::SNAPPY,
2040 Compression::SNAPPY.into()
2041 );
2042 assert_eq!(
2043 parquet::CompressionCodec::GZIP,
2044 Compression::GZIP(Default::default()).into()
2045 );
2046 assert_eq!(parquet::CompressionCodec::LZO, Compression::LZO.into());
2047 assert_eq!(
2048 parquet::CompressionCodec::BROTLI,
2049 Compression::BROTLI(Default::default()).into()
2050 );
2051 assert_eq!(parquet::CompressionCodec::LZ4, Compression::LZ4.into());
2052 assert_eq!(
2053 parquet::CompressionCodec::ZSTD,
2054 Compression::ZSTD(Default::default()).into()
2055 );
2056 }
2057
2058 #[test]
2059 fn test_display_page_type() {
2060 assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
2061 assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
2062 assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
2063 assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
2064 }
2065
2066 #[test]
2067 fn test_from_page_type() {
2068 assert_eq!(
2069 PageType::try_from(parquet::PageType::DATA_PAGE).unwrap(),
2070 PageType::DATA_PAGE
2071 );
2072 assert_eq!(
2073 PageType::try_from(parquet::PageType::INDEX_PAGE).unwrap(),
2074 PageType::INDEX_PAGE
2075 );
2076 assert_eq!(
2077 PageType::try_from(parquet::PageType::DICTIONARY_PAGE).unwrap(),
2078 PageType::DICTIONARY_PAGE
2079 );
2080 assert_eq!(
2081 PageType::try_from(parquet::PageType::DATA_PAGE_V2).unwrap(),
2082 PageType::DATA_PAGE_V2
2083 );
2084 }
2085
2086 #[test]
2087 fn test_into_page_type() {
2088 assert_eq!(parquet::PageType::DATA_PAGE, PageType::DATA_PAGE.into());
2089 assert_eq!(parquet::PageType::INDEX_PAGE, PageType::INDEX_PAGE.into());
2090 assert_eq!(
2091 parquet::PageType::DICTIONARY_PAGE,
2092 PageType::DICTIONARY_PAGE.into()
2093 );
2094 assert_eq!(
2095 parquet::PageType::DATA_PAGE_V2,
2096 PageType::DATA_PAGE_V2.into()
2097 );
2098 }
2099
2100 #[test]
2101 fn test_display_sort_order() {
2102 assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
2103 assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
2104 assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
2105 }
2106
2107 #[test]
2108 fn test_display_column_order() {
2109 assert_eq!(
2110 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
2111 "TYPE_DEFINED_ORDER(SIGNED)"
2112 );
2113 assert_eq!(
2114 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
2115 "TYPE_DEFINED_ORDER(UNSIGNED)"
2116 );
2117 assert_eq!(
2118 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
2119 "TYPE_DEFINED_ORDER(UNDEFINED)"
2120 );
2121 assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
2122 }
2123
2124 #[test]
2125 fn test_column_order_get_logical_type_sort_order() {
2126 fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
2129 for tpe in types {
2130 assert_eq!(
2131 ColumnOrder::get_sort_order(Some(tpe), ConvertedType::NONE, Type::BYTE_ARRAY),
2132 expected_order
2133 );
2134 }
2135 }
2136
2137 let unsigned = vec![
2139 LogicalType::String,
2140 LogicalType::Json,
2141 LogicalType::Bson,
2142 LogicalType::Enum,
2143 LogicalType::Uuid,
2144 LogicalType::Integer {
2145 bit_width: 8,
2146 is_signed: false,
2147 },
2148 LogicalType::Integer {
2149 bit_width: 16,
2150 is_signed: false,
2151 },
2152 LogicalType::Integer {
2153 bit_width: 32,
2154 is_signed: false,
2155 },
2156 LogicalType::Integer {
2157 bit_width: 64,
2158 is_signed: false,
2159 },
2160 ];
2161 check_sort_order(unsigned, SortOrder::UNSIGNED);
2162
2163 let signed = vec![
2165 LogicalType::Integer {
2166 bit_width: 8,
2167 is_signed: true,
2168 },
2169 LogicalType::Integer {
2170 bit_width: 8,
2171 is_signed: true,
2172 },
2173 LogicalType::Integer {
2174 bit_width: 8,
2175 is_signed: true,
2176 },
2177 LogicalType::Integer {
2178 bit_width: 8,
2179 is_signed: true,
2180 },
2181 LogicalType::Decimal {
2182 scale: 20,
2183 precision: 4,
2184 },
2185 LogicalType::Date,
2186 LogicalType::Time {
2187 is_adjusted_to_u_t_c: false,
2188 unit: TimeUnit::MILLIS(Default::default()),
2189 },
2190 LogicalType::Time {
2191 is_adjusted_to_u_t_c: false,
2192 unit: TimeUnit::MICROS(Default::default()),
2193 },
2194 LogicalType::Time {
2195 is_adjusted_to_u_t_c: true,
2196 unit: TimeUnit::NANOS(Default::default()),
2197 },
2198 LogicalType::Timestamp {
2199 is_adjusted_to_u_t_c: false,
2200 unit: TimeUnit::MILLIS(Default::default()),
2201 },
2202 LogicalType::Timestamp {
2203 is_adjusted_to_u_t_c: false,
2204 unit: TimeUnit::MICROS(Default::default()),
2205 },
2206 LogicalType::Timestamp {
2207 is_adjusted_to_u_t_c: true,
2208 unit: TimeUnit::NANOS(Default::default()),
2209 },
2210 LogicalType::Float16,
2211 ];
2212 check_sort_order(signed, SortOrder::SIGNED);
2213
2214 let undefined = vec![LogicalType::List, LogicalType::Map];
2216 check_sort_order(undefined, SortOrder::UNDEFINED);
2217 }
2218
2219 #[test]
2220 fn test_column_order_get_converted_type_sort_order() {
2221 fn check_sort_order(types: Vec<ConvertedType>, expected_order: SortOrder) {
2224 for tpe in types {
2225 assert_eq!(
2226 ColumnOrder::get_sort_order(None, tpe, Type::BYTE_ARRAY),
2227 expected_order
2228 );
2229 }
2230 }
2231
2232 let unsigned = vec![
2234 ConvertedType::UTF8,
2235 ConvertedType::JSON,
2236 ConvertedType::BSON,
2237 ConvertedType::ENUM,
2238 ConvertedType::UINT_8,
2239 ConvertedType::UINT_16,
2240 ConvertedType::UINT_32,
2241 ConvertedType::UINT_64,
2242 ];
2243 check_sort_order(unsigned, SortOrder::UNSIGNED);
2244
2245 let signed = vec![
2247 ConvertedType::INT_8,
2248 ConvertedType::INT_16,
2249 ConvertedType::INT_32,
2250 ConvertedType::INT_64,
2251 ConvertedType::DECIMAL,
2252 ConvertedType::DATE,
2253 ConvertedType::TIME_MILLIS,
2254 ConvertedType::TIME_MICROS,
2255 ConvertedType::TIMESTAMP_MILLIS,
2256 ConvertedType::TIMESTAMP_MICROS,
2257 ];
2258 check_sort_order(signed, SortOrder::SIGNED);
2259
2260 let undefined = vec![
2262 ConvertedType::LIST,
2263 ConvertedType::MAP,
2264 ConvertedType::MAP_KEY_VALUE,
2265 ConvertedType::INTERVAL,
2266 ];
2267 check_sort_order(undefined, SortOrder::UNDEFINED);
2268
2269 check_sort_order(vec![ConvertedType::NONE], SortOrder::UNSIGNED);
2272 }
2273
2274 #[test]
2275 fn test_column_order_get_default_sort_order() {
2276 assert_eq!(
2278 ColumnOrder::get_default_sort_order(Type::BOOLEAN),
2279 SortOrder::UNSIGNED
2280 );
2281 assert_eq!(
2282 ColumnOrder::get_default_sort_order(Type::INT32),
2283 SortOrder::SIGNED
2284 );
2285 assert_eq!(
2286 ColumnOrder::get_default_sort_order(Type::INT64),
2287 SortOrder::SIGNED
2288 );
2289 assert_eq!(
2290 ColumnOrder::get_default_sort_order(Type::INT96),
2291 SortOrder::UNDEFINED
2292 );
2293 assert_eq!(
2294 ColumnOrder::get_default_sort_order(Type::FLOAT),
2295 SortOrder::SIGNED
2296 );
2297 assert_eq!(
2298 ColumnOrder::get_default_sort_order(Type::DOUBLE),
2299 SortOrder::SIGNED
2300 );
2301 assert_eq!(
2302 ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
2303 SortOrder::UNSIGNED
2304 );
2305 assert_eq!(
2306 ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
2307 SortOrder::UNSIGNED
2308 );
2309 }
2310
2311 #[test]
2312 fn test_column_order_sort_order() {
2313 assert_eq!(
2314 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
2315 SortOrder::SIGNED
2316 );
2317 assert_eq!(
2318 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
2319 SortOrder::UNSIGNED
2320 );
2321 assert_eq!(
2322 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
2323 SortOrder::UNDEFINED
2324 );
2325 assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
2326 }
2327
2328 #[test]
2329 fn test_parse_encoding() {
2330 let mut encoding: Encoding = "PLAIN".parse().unwrap();
2331 assert_eq!(encoding, Encoding::PLAIN);
2332 encoding = "PLAIN_DICTIONARY".parse().unwrap();
2333 assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
2334 encoding = "RLE".parse().unwrap();
2335 assert_eq!(encoding, Encoding::RLE);
2336 encoding = "BIT_PACKED".parse().unwrap();
2337 assert_eq!(encoding, Encoding::BIT_PACKED);
2338 encoding = "DELTA_BINARY_PACKED".parse().unwrap();
2339 assert_eq!(encoding, Encoding::DELTA_BINARY_PACKED);
2340 encoding = "DELTA_LENGTH_BYTE_ARRAY".parse().unwrap();
2341 assert_eq!(encoding, Encoding::DELTA_LENGTH_BYTE_ARRAY);
2342 encoding = "DELTA_BYTE_ARRAY".parse().unwrap();
2343 assert_eq!(encoding, Encoding::DELTA_BYTE_ARRAY);
2344 encoding = "RLE_DICTIONARY".parse().unwrap();
2345 assert_eq!(encoding, Encoding::RLE_DICTIONARY);
2346 encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
2347 assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2348
2349 encoding = "byte_stream_split".parse().unwrap();
2351 assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2352
2353 match "plain_xxx".parse::<Encoding>() {
2355 Ok(e) => {
2356 panic!("Should not be able to parse {:?}", e);
2357 }
2358 Err(e) => {
2359 assert_eq!(e.to_string(), "Parquet error: unknown encoding: plain_xxx");
2360 }
2361 }
2362 }
2363
2364 #[test]
2365 fn test_parse_compression() {
2366 let mut compress: Compression = "snappy".parse().unwrap();
2367 assert_eq!(compress, Compression::SNAPPY);
2368 compress = "lzo".parse().unwrap();
2369 assert_eq!(compress, Compression::LZO);
2370 compress = "zstd(3)".parse().unwrap();
2371 assert_eq!(compress, Compression::ZSTD(ZstdLevel::try_new(3).unwrap()));
2372 compress = "LZ4_RAW".parse().unwrap();
2373 assert_eq!(compress, Compression::LZ4_RAW);
2374 compress = "uncompressed".parse().unwrap();
2375 assert_eq!(compress, Compression::UNCOMPRESSED);
2376 compress = "snappy".parse().unwrap();
2377 assert_eq!(compress, Compression::SNAPPY);
2378 compress = "gzip(9)".parse().unwrap();
2379 assert_eq!(compress, Compression::GZIP(GzipLevel::try_new(9).unwrap()));
2380 compress = "lzo".parse().unwrap();
2381 assert_eq!(compress, Compression::LZO);
2382 compress = "brotli(3)".parse().unwrap();
2383 assert_eq!(
2384 compress,
2385 Compression::BROTLI(BrotliLevel::try_new(3).unwrap())
2386 );
2387 compress = "lz4".parse().unwrap();
2388 assert_eq!(compress, Compression::LZ4);
2389
2390 let mut err = "plain_xxx".parse::<Encoding>().unwrap_err();
2392 assert_eq!(
2393 err.to_string(),
2394 "Parquet error: unknown encoding: plain_xxx"
2395 );
2396
2397 err = "gzip(-10)".parse::<Encoding>().unwrap_err();
2399 assert_eq!(
2400 err.to_string(),
2401 "Parquet error: unknown encoding: gzip(-10)"
2402 );
2403 }
2404}