use parquet2::schema::{
types::{
FieldInfo, GroupConvertedType, GroupLogicalType, IntegerType, ParquetType, PhysicalType,
PrimitiveConvertedType, PrimitiveLogicalType, PrimitiveType, TimeUnit as ParquetTimeUnit,
},
Repetition,
};
use crate::datatypes::{DataType, Field, IntervalUnit, TimeUnit};
pub fn parquet_to_arrow_schema(fields: &[ParquetType]) -> Vec<Field> {
fields.iter().filter_map(to_field).collect::<Vec<_>>()
}
fn from_int32(
logical_type: Option<PrimitiveLogicalType>,
converted_type: Option<PrimitiveConvertedType>,
) -> DataType {
use PrimitiveLogicalType::*;
match (logical_type, converted_type) {
(Some(Integer(t)), _) => match t {
IntegerType::Int8 => DataType::Int8,
IntegerType::Int16 => DataType::Int16,
IntegerType::Int32 => DataType::Int32,
IntegerType::UInt8 => DataType::UInt8,
IntegerType::UInt16 => DataType::UInt16,
IntegerType::UInt32 => DataType::UInt32,
_ => DataType::Int32,
},
(Some(Decimal(precision, scale)), _) => DataType::Decimal(precision, scale),
(Some(Date), _) => DataType::Date32,
(Some(Time { unit, .. }), _) => match unit {
ParquetTimeUnit::Milliseconds => DataType::Time32(TimeUnit::Millisecond),
_ => DataType::Int32,
},
(_, Some(PrimitiveConvertedType::Uint8)) => DataType::UInt8,
(_, Some(PrimitiveConvertedType::Uint16)) => DataType::UInt16,
(_, Some(PrimitiveConvertedType::Uint32)) => DataType::UInt32,
(_, Some(PrimitiveConvertedType::Int8)) => DataType::Int8,
(_, Some(PrimitiveConvertedType::Int16)) => DataType::Int16,
(_, Some(PrimitiveConvertedType::Int32)) => DataType::Int32,
(_, Some(PrimitiveConvertedType::Date)) => DataType::Date32,
(_, Some(PrimitiveConvertedType::TimeMillis)) => DataType::Time32(TimeUnit::Millisecond),
(_, Some(PrimitiveConvertedType::Decimal(precision, scale))) => {
DataType::Decimal(precision, scale)
}
(_, _) => DataType::Int32,
}
}
fn from_int64(
logical_type: Option<PrimitiveLogicalType>,
converted_type: Option<PrimitiveConvertedType>,
) -> DataType {
use PrimitiveLogicalType::*;
match (logical_type, converted_type) {
(Some(Integer(integer)), _) => match integer {
IntegerType::UInt64 => DataType::UInt64,
IntegerType::Int64 => DataType::Int64,
_ => DataType::Int64,
},
(
Some(Timestamp {
is_adjusted_to_utc,
unit,
}),
_,
) => {
let timezone = if is_adjusted_to_utc {
Some("+00:00".to_string())
} else {
None
};
match unit {
ParquetTimeUnit::Milliseconds => {
DataType::Timestamp(TimeUnit::Millisecond, timezone)
}
ParquetTimeUnit::Microseconds => {
DataType::Timestamp(TimeUnit::Microsecond, timezone)
}
ParquetTimeUnit::Nanoseconds => DataType::Timestamp(TimeUnit::Nanosecond, timezone),
}
}
(Some(Time { unit, .. }), _) => match unit {
ParquetTimeUnit::Microseconds => DataType::Time64(TimeUnit::Microsecond),
ParquetTimeUnit::Nanoseconds => DataType::Time64(TimeUnit::Nanosecond),
_ => DataType::Int64,
},
(Some(Decimal(precision, scale)), _) => DataType::Decimal(precision, scale),
(_, Some(PrimitiveConvertedType::TimeMicros)) => DataType::Time64(TimeUnit::Microsecond),
(_, Some(PrimitiveConvertedType::TimestampMillis)) => {
DataType::Timestamp(TimeUnit::Millisecond, None)
}
(_, Some(PrimitiveConvertedType::TimestampMicros)) => {
DataType::Timestamp(TimeUnit::Microsecond, None)
}
(_, Some(PrimitiveConvertedType::Int64)) => DataType::Int64,
(_, Some(PrimitiveConvertedType::Uint64)) => DataType::UInt64,
(_, Some(PrimitiveConvertedType::Decimal(precision, scale))) => {
DataType::Decimal(precision, scale)
}
(_, _) => DataType::Int64,
}
}
fn from_byte_array(
logical_type: &Option<PrimitiveLogicalType>,
converted_type: &Option<PrimitiveConvertedType>,
) -> DataType {
match (logical_type, converted_type) {
(Some(PrimitiveLogicalType::String), _) => DataType::Utf8,
(Some(PrimitiveLogicalType::Json), _) => DataType::Binary,
(Some(PrimitiveLogicalType::Bson), _) => DataType::Binary,
(Some(PrimitiveLogicalType::Enum), _) => DataType::Binary,
(_, Some(PrimitiveConvertedType::Json)) => DataType::Binary,
(_, Some(PrimitiveConvertedType::Bson)) => DataType::Binary,
(_, Some(PrimitiveConvertedType::Enum)) => DataType::Binary,
(_, Some(PrimitiveConvertedType::Utf8)) => DataType::Utf8,
(_, _) => DataType::Binary,
}
}
fn from_fixed_len_byte_array(
length: usize,
logical_type: Option<PrimitiveLogicalType>,
converted_type: Option<PrimitiveConvertedType>,
) -> DataType {
match (logical_type, converted_type) {
(Some(PrimitiveLogicalType::Decimal(precision, scale)), _) => {
DataType::Decimal(precision, scale)
}
(None, Some(PrimitiveConvertedType::Decimal(precision, scale))) => {
DataType::Decimal(precision, scale)
}
(None, Some(PrimitiveConvertedType::Interval)) => {
DataType::Interval(IntervalUnit::DayTime)
}
_ => DataType::FixedSizeBinary(length),
}
}
fn to_primitive_type_inner(primitive_type: &PrimitiveType) -> DataType {
match primitive_type.physical_type {
PhysicalType::Boolean => DataType::Boolean,
PhysicalType::Int32 => {
from_int32(primitive_type.logical_type, primitive_type.converted_type)
}
PhysicalType::Int64 => {
from_int64(primitive_type.logical_type, primitive_type.converted_type)
}
PhysicalType::Int96 => DataType::Timestamp(TimeUnit::Nanosecond, None),
PhysicalType::Float => DataType::Float32,
PhysicalType::Double => DataType::Float64,
PhysicalType::ByteArray => {
from_byte_array(&primitive_type.logical_type, &primitive_type.converted_type)
}
PhysicalType::FixedLenByteArray(length) => from_fixed_len_byte_array(
length,
primitive_type.logical_type,
primitive_type.converted_type,
),
}
}
fn to_primitive_type(primitive_type: &PrimitiveType) -> DataType {
let base_type = to_primitive_type_inner(primitive_type);
if primitive_type.field_info.repetition == Repetition::Repeated {
DataType::List(Box::new(Field::new(
&primitive_type.field_info.name,
base_type,
is_nullable(&primitive_type.field_info),
)))
} else {
base_type
}
}
fn non_repeated_group(
logical_type: &Option<GroupLogicalType>,
converted_type: &Option<GroupConvertedType>,
fields: &[ParquetType],
parent_name: &str,
) -> Option<DataType> {
debug_assert!(!fields.is_empty());
match (logical_type, converted_type) {
(Some(GroupLogicalType::List), _) => to_list(fields, parent_name),
(None, Some(GroupConvertedType::List)) => to_list(fields, parent_name),
(Some(GroupLogicalType::Map), _) => to_list(fields, parent_name),
(None, Some(GroupConvertedType::Map) | Some(GroupConvertedType::MapKeyValue)) => {
to_map(fields)
}
_ => to_struct(fields),
}
}
fn to_struct(fields: &[ParquetType]) -> Option<DataType> {
let fields = fields.iter().filter_map(to_field).collect::<Vec<Field>>();
if fields.is_empty() {
None
} else {
Some(DataType::Struct(fields))
}
}
fn to_map(fields: &[ParquetType]) -> Option<DataType> {
let inner = to_field(&fields[0])?;
Some(DataType::Map(Box::new(inner), false))
}
fn to_group_type(
field_info: &FieldInfo,
logical_type: &Option<GroupLogicalType>,
converted_type: &Option<GroupConvertedType>,
fields: &[ParquetType],
parent_name: &str,
) -> Option<DataType> {
debug_assert!(!fields.is_empty());
if field_info.repetition == Repetition::Repeated {
Some(DataType::List(Box::new(Field::new(
&field_info.name,
to_struct(fields)?,
is_nullable(field_info),
))))
} else {
non_repeated_group(logical_type, converted_type, fields, parent_name)
}
}
pub(crate) fn is_nullable(field_info: &FieldInfo) -> bool {
match field_info.repetition {
Repetition::Optional => true,
Repetition::Repeated => true,
Repetition::Required => false,
}
}
fn to_field(type_: &ParquetType) -> Option<Field> {
Some(Field::new(
&type_.get_field_info().name,
to_data_type(type_)?,
is_nullable(type_.get_field_info()),
))
}
fn to_list(fields: &[ParquetType], parent_name: &str) -> Option<DataType> {
let item = fields.first().unwrap();
let item_type = match item {
ParquetType::PrimitiveType(primitive) => Some(to_primitive_type_inner(primitive)),
ParquetType::GroupType { fields, .. } => {
if fields.len() == 1
&& item.name() != "array"
&& item.name() != format!("{parent_name}_tuple")
{
let nested_item = fields.first().unwrap();
to_data_type(nested_item)
} else {
to_struct(fields)
}
}
}?;
let (list_item_name, item_is_optional) = match item {
ParquetType::GroupType {
field_info, fields, ..
} if field_info.name == "list" && fields.len() == 1 => {
let field = fields.first().unwrap();
(
&field.get_field_info().name,
field.get_field_info().repetition != Repetition::Required,
)
}
_ => (
&item.get_field_info().name,
item.get_field_info().repetition != Repetition::Required,
),
};
Some(DataType::List(Box::new(Field::new(
list_item_name,
item_type,
item_is_optional,
))))
}
pub(crate) fn to_data_type(type_: &ParquetType) -> Option<DataType> {
match type_ {
ParquetType::PrimitiveType(primitive) => Some(to_primitive_type(primitive)),
ParquetType::GroupType {
field_info,
logical_type,
converted_type,
fields,
} => {
if fields.is_empty() {
None
} else {
to_group_type(
field_info,
logical_type,
converted_type,
fields,
&field_info.name,
)
}
}
}
}
#[cfg(test)]
mod tests {
use parquet2::metadata::SchemaDescriptor;
use super::*;
use crate::datatypes::{DataType, Field, TimeUnit};
use crate::error::Result;
#[test]
fn test_flat_primitives() -> Result<()> {
let message = "
message test_schema {
REQUIRED BOOLEAN boolean;
REQUIRED INT32 int8 (INT_8);
REQUIRED INT32 int16 (INT_16);
REQUIRED INT32 uint8 (INTEGER(8,false));
REQUIRED INT32 uint16 (INTEGER(16,false));
REQUIRED INT32 int32;
REQUIRED INT64 int64 ;
OPTIONAL DOUBLE double;
OPTIONAL FLOAT float;
OPTIONAL BINARY string (UTF8);
OPTIONAL BINARY string_2 (STRING);
}
";
let expected = &[
Field::new("boolean", DataType::Boolean, false),
Field::new("int8", DataType::Int8, false),
Field::new("int16", DataType::Int16, false),
Field::new("uint8", DataType::UInt8, false),
Field::new("uint16", DataType::UInt16, false),
Field::new("int32", DataType::Int32, false),
Field::new("int64", DataType::Int64, false),
Field::new("double", DataType::Float64, true),
Field::new("float", DataType::Float32, true),
Field::new("string", DataType::Utf8, true),
Field::new("string_2", DataType::Utf8, true),
];
let parquet_schema = SchemaDescriptor::try_from_message(message)?;
let fields = parquet_to_arrow_schema(parquet_schema.fields());
assert_eq!(fields, expected);
Ok(())
}
#[test]
fn test_byte_array_fields() -> Result<()> {
let message = "
message test_schema {
REQUIRED BYTE_ARRAY binary;
REQUIRED FIXED_LEN_BYTE_ARRAY (20) fixed_binary;
}
";
let expected = vec![
Field::new("binary", DataType::Binary, false),
Field::new("fixed_binary", DataType::FixedSizeBinary(20), false),
];
let parquet_schema = SchemaDescriptor::try_from_message(message)?;
let fields = parquet_to_arrow_schema(parquet_schema.fields());
assert_eq!(fields, expected);
Ok(())
}
#[test]
fn test_duplicate_fields() -> Result<()> {
let message = "
message test_schema {
REQUIRED BOOLEAN boolean;
REQUIRED INT32 int8 (INT_8);
}
";
let expected = &[
Field::new("boolean", DataType::Boolean, false),
Field::new("int8", DataType::Int8, false),
];
let parquet_schema = SchemaDescriptor::try_from_message(message)?;
let fields = parquet_to_arrow_schema(parquet_schema.fields());
assert_eq!(fields, expected);
Ok(())
}
#[test]
fn test_parquet_lists() -> Result<()> {
let mut arrow_fields = Vec::new();
let message_type = "
message test_schema {
REQUIRED GROUP my_list (LIST) {
REPEATED GROUP list {
OPTIONAL BINARY element (UTF8);
}
}
OPTIONAL GROUP my_list (LIST) {
REPEATED GROUP list {
REQUIRED BINARY element (UTF8);
}
}
OPTIONAL GROUP array_of_arrays (LIST) {
REPEATED GROUP list {
REQUIRED GROUP element (LIST) {
REPEATED GROUP list {
REQUIRED INT32 element;
}
}
}
}
OPTIONAL GROUP my_list (LIST) {
REPEATED GROUP element {
REQUIRED BINARY str (UTF8);
}
}
OPTIONAL GROUP my_list (LIST) {
REPEATED INT32 element;
}
OPTIONAL GROUP my_list (LIST) {
REPEATED GROUP element {
REQUIRED BINARY str (UTF8);
REQUIRED INT32 num;
}
}
OPTIONAL GROUP my_list (LIST) {
REPEATED GROUP array {
REQUIRED BINARY str (UTF8);
}
}
OPTIONAL GROUP my_list (LIST) {
REPEATED GROUP my_list_tuple {
REQUIRED BINARY str (UTF8);
}
}
REPEATED INT32 name;
}
";
{
arrow_fields.push(Field::new(
"my_list",
DataType::List(Box::new(Field::new("element", DataType::Utf8, true))),
false,
));
}
{
arrow_fields.push(Field::new(
"my_list",
DataType::List(Box::new(Field::new("element", DataType::Utf8, false))),
true,
));
}
{
let arrow_inner_list =
DataType::List(Box::new(Field::new("element", DataType::Int32, false)));
arrow_fields.push(Field::new(
"array_of_arrays",
DataType::List(Box::new(Field::new("element", arrow_inner_list, false))),
true,
));
}
{
arrow_fields.push(Field::new(
"my_list",
DataType::List(Box::new(Field::new("element", DataType::Utf8, true))),
true,
));
}
{
arrow_fields.push(Field::new(
"my_list",
DataType::List(Box::new(Field::new("element", DataType::Int32, true))),
true,
));
}
{
let arrow_struct = DataType::Struct(vec![
Field::new("str", DataType::Utf8, false),
Field::new("num", DataType::Int32, false),
]);
arrow_fields.push(Field::new(
"my_list",
DataType::List(Box::new(Field::new("element", arrow_struct, true))),
true,
));
}
{
let arrow_struct = DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]);
arrow_fields.push(Field::new(
"my_list",
DataType::List(Box::new(Field::new("array", arrow_struct, true))),
true,
));
}
{
let arrow_struct = DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]);
arrow_fields.push(Field::new(
"my_list",
DataType::List(Box::new(Field::new("my_list_tuple", arrow_struct, true))),
true,
));
}
{
arrow_fields.push(Field::new(
"name",
DataType::List(Box::new(Field::new("name", DataType::Int32, true))),
true,
));
}
let parquet_schema = SchemaDescriptor::try_from_message(message_type)?;
let fields = parquet_to_arrow_schema(parquet_schema.fields());
assert_eq!(arrow_fields, fields);
Ok(())
}
#[test]
fn test_parquet_list_nullable() -> Result<()> {
let mut arrow_fields = Vec::new();
let message_type = "
message test_schema {
REQUIRED GROUP my_list1 (LIST) {
REPEATED GROUP list {
OPTIONAL BINARY element (UTF8);
}
}
OPTIONAL GROUP my_list2 (LIST) {
REPEATED GROUP list {
REQUIRED BINARY element (UTF8);
}
}
REQUIRED GROUP my_list3 (LIST) {
REPEATED GROUP list {
REQUIRED BINARY element (UTF8);
}
}
}
";
{
arrow_fields.push(Field::new(
"my_list1",
DataType::List(Box::new(Field::new("element", DataType::Utf8, true))),
false,
));
}
{
arrow_fields.push(Field::new(
"my_list2",
DataType::List(Box::new(Field::new("element", DataType::Utf8, false))),
true,
));
}
{
arrow_fields.push(Field::new(
"my_list3",
DataType::List(Box::new(Field::new("element", DataType::Utf8, false))),
false,
));
}
let parquet_schema = SchemaDescriptor::try_from_message(message_type)?;
let fields = parquet_to_arrow_schema(parquet_schema.fields());
assert_eq!(arrow_fields, fields);
Ok(())
}
#[test]
fn test_nested_schema() -> Result<()> {
let mut arrow_fields = Vec::new();
{
let group1_fields = vec![
Field::new("leaf1", DataType::Boolean, false),
Field::new("leaf2", DataType::Int32, false),
];
let group1_struct = Field::new("group1", DataType::Struct(group1_fields), false);
arrow_fields.push(group1_struct);
let leaf3_field = Field::new("leaf3", DataType::Int64, false);
arrow_fields.push(leaf3_field);
}
let message_type = "
message test_schema {
REQUIRED GROUP group1 {
REQUIRED BOOLEAN leaf1;
REQUIRED INT32 leaf2;
}
REQUIRED INT64 leaf3;
}
";
let parquet_schema = SchemaDescriptor::try_from_message(message_type)?;
let fields = parquet_to_arrow_schema(parquet_schema.fields());
assert_eq!(arrow_fields, fields);
Ok(())
}
#[test]
fn test_repeated_nested_schema() -> Result<()> {
let mut arrow_fields = Vec::new();
{
arrow_fields.push(Field::new("leaf1", DataType::Int32, true));
let inner_group_list = Field::new(
"innerGroup",
DataType::List(Box::new(Field::new(
"innerGroup",
DataType::Struct(vec![Field::new("leaf3", DataType::Int32, true)]),
true,
))),
true,
);
let outer_group_list = Field::new(
"outerGroup",
DataType::List(Box::new(Field::new(
"outerGroup",
DataType::Struct(vec![
Field::new("leaf2", DataType::Int32, true),
inner_group_list,
]),
true,
))),
true,
);
arrow_fields.push(outer_group_list);
}
let message_type = "
message test_schema {
OPTIONAL INT32 leaf1;
REPEATED GROUP outerGroup {
OPTIONAL INT32 leaf2;
REPEATED GROUP innerGroup {
OPTIONAL INT32 leaf3;
}
}
}
";
let parquet_schema = SchemaDescriptor::try_from_message(message_type)?;
let fields = parquet_to_arrow_schema(parquet_schema.fields());
assert_eq!(arrow_fields, fields);
Ok(())
}
#[test]
fn test_column_desc_to_field() -> Result<()> {
let message_type = "
message test_schema {
REQUIRED BOOLEAN boolean;
REQUIRED INT32 int8 (INT_8);
REQUIRED INT32 uint8 (INTEGER(8,false));
REQUIRED INT32 int16 (INT_16);
REQUIRED INT32 uint16 (INTEGER(16,false));
REQUIRED INT32 int32;
REQUIRED INT64 int64;
OPTIONAL DOUBLE double;
OPTIONAL FLOAT float;
OPTIONAL BINARY string (UTF8);
REPEATED BOOLEAN bools;
OPTIONAL INT32 date (DATE);
OPTIONAL INT32 time_milli (TIME_MILLIS);
OPTIONAL INT64 time_micro (TIME_MICROS);
OPTIONAL INT64 time_nano (TIME(NANOS,false));
OPTIONAL INT64 ts_milli (TIMESTAMP_MILLIS);
REQUIRED INT64 ts_micro (TIMESTAMP_MICROS);
REQUIRED INT64 ts_nano (TIMESTAMP(NANOS,true));
}
";
let arrow_fields = vec![
Field::new("boolean", DataType::Boolean, false),
Field::new("int8", DataType::Int8, false),
Field::new("uint8", DataType::UInt8, false),
Field::new("int16", DataType::Int16, false),
Field::new("uint16", DataType::UInt16, false),
Field::new("int32", DataType::Int32, false),
Field::new("int64", DataType::Int64, false),
Field::new("double", DataType::Float64, true),
Field::new("float", DataType::Float32, true),
Field::new("string", DataType::Utf8, true),
Field::new(
"bools",
DataType::List(Box::new(Field::new("bools", DataType::Boolean, true))),
true,
),
Field::new("date", DataType::Date32, true),
Field::new("time_milli", DataType::Time32(TimeUnit::Millisecond), true),
Field::new("time_micro", DataType::Time64(TimeUnit::Microsecond), true),
Field::new("time_nano", DataType::Time64(TimeUnit::Nanosecond), true),
Field::new(
"ts_milli",
DataType::Timestamp(TimeUnit::Millisecond, None),
true,
),
Field::new(
"ts_micro",
DataType::Timestamp(TimeUnit::Microsecond, None),
false,
),
Field::new(
"ts_nano",
DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_string())),
false,
),
];
let parquet_schema = SchemaDescriptor::try_from_message(message_type)?;
let fields = parquet_to_arrow_schema(parquet_schema.fields());
assert_eq!(arrow_fields, fields);
Ok(())
}
#[test]
fn test_field_to_column_desc() -> Result<()> {
let message_type = "
message arrow_schema {
REQUIRED BOOLEAN boolean;
REQUIRED INT32 int8 (INT_8);
REQUIRED INT32 int16 (INTEGER(16,true));
REQUIRED INT32 int32;
REQUIRED INT64 int64;
OPTIONAL DOUBLE double;
OPTIONAL FLOAT float;
OPTIONAL BINARY string (STRING);
OPTIONAL GROUP bools (LIST) {
REPEATED GROUP list {
OPTIONAL BOOLEAN element;
}
}
REQUIRED GROUP bools_non_null (LIST) {
REPEATED GROUP list {
REQUIRED BOOLEAN element;
}
}
OPTIONAL INT32 date (DATE);
OPTIONAL INT32 time_milli (TIME(MILLIS,false));
OPTIONAL INT64 time_micro (TIME_MICROS);
OPTIONAL INT64 ts_milli (TIMESTAMP_MILLIS);
REQUIRED INT64 ts_micro (TIMESTAMP(MICROS,false));
REQUIRED GROUP struct {
REQUIRED BOOLEAN bools;
REQUIRED INT32 uint32 (INTEGER(32,false));
REQUIRED GROUP int32 (LIST) {
REPEATED GROUP list {
OPTIONAL INT32 element;
}
}
}
REQUIRED BINARY dictionary_strings (STRING);
}
";
let arrow_fields = vec![
Field::new("boolean", DataType::Boolean, false),
Field::new("int8", DataType::Int8, false),
Field::new("int16", DataType::Int16, false),
Field::new("int32", DataType::Int32, false),
Field::new("int64", DataType::Int64, false),
Field::new("double", DataType::Float64, true),
Field::new("float", DataType::Float32, true),
Field::new("string", DataType::Utf8, true),
Field::new(
"bools",
DataType::List(Box::new(Field::new("element", DataType::Boolean, true))),
true,
),
Field::new(
"bools_non_null",
DataType::List(Box::new(Field::new("element", DataType::Boolean, false))),
false,
),
Field::new("date", DataType::Date32, true),
Field::new("time_milli", DataType::Time32(TimeUnit::Millisecond), true),
Field::new("time_micro", DataType::Time64(TimeUnit::Microsecond), true),
Field::new(
"ts_milli",
DataType::Timestamp(TimeUnit::Millisecond, None),
true,
),
Field::new(
"ts_micro",
DataType::Timestamp(TimeUnit::Microsecond, None),
false,
),
Field::new(
"struct",
DataType::Struct(vec![
Field::new("bools", DataType::Boolean, false),
Field::new("uint32", DataType::UInt32, false),
Field::new(
"int32",
DataType::List(Box::new(Field::new("element", DataType::Int32, true))),
false,
),
]),
false,
),
Field::new("dictionary_strings", DataType::Utf8, false),
];
let parquet_schema = SchemaDescriptor::try_from_message(message_type)?;
let fields = parquet_to_arrow_schema(parquet_schema.fields());
assert_eq!(arrow_fields, fields);
Ok(())
}
}