1use crate::cast::*;
19
20pub(crate) fn dictionary_cast<K: ArrowDictionaryKeyType>(
25 array: &dyn Array,
26 to_type: &DataType,
27 cast_options: &CastOptions,
28) -> Result<ArrayRef, ArrowError> {
29 use DataType::*;
30
31 match to_type {
32 Dictionary(to_index_type, to_value_type) => {
33 let dict_array = array
34 .as_any()
35 .downcast_ref::<DictionaryArray<K>>()
36 .ok_or_else(|| {
37 ArrowError::ComputeError(
38 "Internal Error: Cannot cast dictionary to DictionaryArray of expected type".to_string(),
39 )
40 })?;
41
42 let keys_array: ArrayRef =
43 Arc::new(PrimitiveArray::<K>::from(dict_array.keys().to_data()));
44 let values_array = dict_array.values();
45 let cast_keys = cast_with_options(&keys_array, to_index_type, cast_options)?;
46 let cast_values = cast_with_options(values_array, to_value_type, cast_options)?;
47
48 if cast_keys.null_count() > keys_array.null_count() {
51 return Err(ArrowError::ComputeError(format!(
52 "Could not convert {} dictionary indexes from {:?} to {:?}",
53 cast_keys.null_count() - keys_array.null_count(),
54 keys_array.data_type(),
55 to_index_type
56 )));
57 }
58
59 let data = cast_keys.into_data();
60 let builder = data
61 .into_builder()
62 .data_type(to_type.clone())
63 .child_data(vec![cast_values.into_data()]);
64
65 let data = unsafe { builder.build_unchecked() };
68
69 let new_array: ArrayRef = match **to_index_type {
71 Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)),
72 Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)),
73 Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)),
74 Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)),
75 UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)),
76 UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)),
77 UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)),
78 UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)),
79 _ => {
80 return Err(ArrowError::CastError(format!(
81 "Unsupported type {to_index_type:?} for dictionary index"
82 )));
83 }
84 };
85
86 Ok(new_array)
87 }
88 Utf8View => {
89 let dict_array = array
92 .as_dictionary::<K>()
93 .downcast_dict::<StringArray>()
94 .ok_or_else(|| {
95 ArrowError::ComputeError(
96 "Internal Error: Cannot cast Utf8View to StringArray of expected type"
97 .to_string(),
98 )
99 })?;
100
101 let string_view = view_from_dict_values::<K, StringViewType, GenericStringType<i32>>(
102 dict_array.values(),
103 dict_array.keys(),
104 )?;
105 Ok(Arc::new(string_view))
106 }
107 BinaryView => {
108 let dict_array = array
111 .as_dictionary::<K>()
112 .downcast_dict::<BinaryArray>()
113 .ok_or_else(|| {
114 ArrowError::ComputeError(
115 "Internal Error: Cannot cast BinaryView to BinaryArray of expected type"
116 .to_string(),
117 )
118 })?;
119
120 let binary_view = view_from_dict_values::<K, BinaryViewType, BinaryType>(
121 dict_array.values(),
122 dict_array.keys(),
123 )?;
124 Ok(Arc::new(binary_view))
125 }
126 _ => unpack_dictionary::<K>(array, to_type, cast_options),
127 }
128}
129
130fn view_from_dict_values<K: ArrowDictionaryKeyType, T: ByteViewType, V: ByteArrayType>(
131 array: &GenericByteArray<V>,
132 keys: &PrimitiveArray<K>,
133) -> Result<GenericByteViewArray<T>, ArrowError> {
134 let value_buffer = array.values();
135 let value_offsets = array.value_offsets();
136 let mut builder = GenericByteViewBuilder::<T>::with_capacity(keys.len());
137 builder.append_block(value_buffer.clone());
138 for i in keys.iter() {
139 match i {
140 Some(v) => {
141 let idx = v.to_usize().ok_or_else(|| {
142 ArrowError::ComputeError("Invalid dictionary index".to_string())
143 })?;
144
145 unsafe {
149 let offset = value_offsets.get_unchecked(idx).as_usize();
150 let end = value_offsets.get_unchecked(idx + 1).as_usize();
151 let length = end - offset;
152 builder.append_view_unchecked(0, offset as u32, length as u32)
153 }
154 }
155 None => {
156 builder.append_null();
157 }
158 }
159 }
160 Ok(builder.finish())
161}
162
163pub(crate) fn unpack_dictionary<K>(
165 array: &dyn Array,
166 to_type: &DataType,
167 cast_options: &CastOptions,
168) -> Result<ArrayRef, ArrowError>
169where
170 K: ArrowDictionaryKeyType,
171{
172 let dict_array = array.as_dictionary::<K>();
173 let cast_dict_values = cast_with_options(dict_array.values(), to_type, cast_options)?;
174 take(cast_dict_values.as_ref(), dict_array.keys(), None)
175}
176
177pub(crate) fn pack_array_to_dictionary_via_primitive<K: ArrowDictionaryKeyType>(
179 array: &dyn Array,
180 primitive_type: DataType,
181 dict_value_type: &DataType,
182 cast_options: &CastOptions,
183) -> Result<ArrayRef, ArrowError> {
184 let primitive = cast_with_options(array, &primitive_type, cast_options)?;
185 let dict = cast_with_options(
186 primitive.as_ref(),
187 &DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(primitive_type)),
188 cast_options,
189 )?;
190 cast_with_options(
191 dict.as_ref(),
192 &DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(dict_value_type.clone())),
193 cast_options,
194 )
195}
196
197pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
202 array: &dyn Array,
203 dict_value_type: &DataType,
204 cast_options: &CastOptions,
205) -> Result<ArrayRef, ArrowError> {
206 use DataType::*;
207
208 match *dict_value_type {
209 Int8 => pack_numeric_to_dictionary::<K, Int8Type>(array, dict_value_type, cast_options),
210 Int16 => pack_numeric_to_dictionary::<K, Int16Type>(array, dict_value_type, cast_options),
211 Int32 => pack_numeric_to_dictionary::<K, Int32Type>(array, dict_value_type, cast_options),
212 Int64 => pack_numeric_to_dictionary::<K, Int64Type>(array, dict_value_type, cast_options),
213 UInt8 => pack_numeric_to_dictionary::<K, UInt8Type>(array, dict_value_type, cast_options),
214 UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(array, dict_value_type, cast_options),
215 UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(array, dict_value_type, cast_options),
216 UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(array, dict_value_type, cast_options),
217 Decimal128(p, s) => {
218 let dict = pack_numeric_to_dictionary::<K, Decimal128Type>(
219 array,
220 dict_value_type,
221 cast_options,
222 )?;
223 let dict = dict
224 .as_dictionary::<K>()
225 .downcast_dict::<Decimal128Array>()
226 .ok_or_else(|| {
227 ArrowError::ComputeError(
228 "Internal Error: Cannot cast dict to Decimal128Array".to_string(),
229 )
230 })?;
231 let value = dict.values().clone();
232 let value = value.with_precision_and_scale(p, s)?;
234 Ok(Arc::new(DictionaryArray::<K>::try_new(
235 dict.keys().clone(),
236 Arc::new(value),
237 )?))
238 }
239 Decimal256(p, s) => {
240 let dict = pack_numeric_to_dictionary::<K, Decimal256Type>(
241 array,
242 dict_value_type,
243 cast_options,
244 )?;
245 let dict = dict
246 .as_dictionary::<K>()
247 .downcast_dict::<Decimal256Array>()
248 .ok_or_else(|| {
249 ArrowError::ComputeError(
250 "Internal Error: Cannot cast dict to Decimal256Array".to_string(),
251 )
252 })?;
253 let value = dict.values().clone();
254 let value = value.with_precision_and_scale(p, s)?;
256 Ok(Arc::new(DictionaryArray::<K>::try_new(
257 dict.keys().clone(),
258 Arc::new(value),
259 )?))
260 }
261 Float16 => {
262 pack_numeric_to_dictionary::<K, Float16Type>(array, dict_value_type, cast_options)
263 }
264 Float32 => {
265 pack_numeric_to_dictionary::<K, Float32Type>(array, dict_value_type, cast_options)
266 }
267 Float64 => {
268 pack_numeric_to_dictionary::<K, Float64Type>(array, dict_value_type, cast_options)
269 }
270 Date32 => pack_array_to_dictionary_via_primitive::<K>(
271 array,
272 DataType::Int32,
273 dict_value_type,
274 cast_options,
275 ),
276 Date64 => pack_array_to_dictionary_via_primitive::<K>(
277 array,
278 DataType::Int64,
279 dict_value_type,
280 cast_options,
281 ),
282 Time32(_) => pack_array_to_dictionary_via_primitive::<K>(
283 array,
284 DataType::Int32,
285 dict_value_type,
286 cast_options,
287 ),
288 Time64(_) => pack_array_to_dictionary_via_primitive::<K>(
289 array,
290 DataType::Int64,
291 dict_value_type,
292 cast_options,
293 ),
294 Timestamp(_, _) => pack_array_to_dictionary_via_primitive::<K>(
295 array,
296 DataType::Int64,
297 dict_value_type,
298 cast_options,
299 ),
300 Utf8 => {
301 if array.data_type() == &DataType::Utf8View {
303 return string_view_to_dictionary::<K, i32>(array);
304 }
305 pack_byte_to_dictionary::<K, GenericStringType<i32>>(array, cast_options)
306 }
307 LargeUtf8 => {
308 if array.data_type() == &DataType::Utf8View {
310 return string_view_to_dictionary::<K, i64>(array);
311 }
312 pack_byte_to_dictionary::<K, GenericStringType<i64>>(array, cast_options)
313 }
314 Binary => {
315 if array.data_type() == &DataType::BinaryView {
317 return binary_view_to_dictionary::<K, i32>(array);
318 }
319 pack_byte_to_dictionary::<K, GenericBinaryType<i32>>(array, cast_options)
320 }
321 LargeBinary => {
322 if array.data_type() == &DataType::BinaryView {
324 return binary_view_to_dictionary::<K, i64>(array);
325 }
326 pack_byte_to_dictionary::<K, GenericBinaryType<i64>>(array, cast_options)
327 }
328 _ => Err(ArrowError::CastError(format!(
329 "Unsupported output type for dictionary packing: {dict_value_type:?}"
330 ))),
331 }
332}
333
334pub(crate) fn pack_numeric_to_dictionary<K, V>(
337 array: &dyn Array,
338 dict_value_type: &DataType,
339 cast_options: &CastOptions,
340) -> Result<ArrayRef, ArrowError>
341where
342 K: ArrowDictionaryKeyType,
343 V: ArrowPrimitiveType,
344{
345 let cast_values = cast_with_options(array, dict_value_type, cast_options)?;
347 let values = cast_values.as_primitive::<V>();
348
349 let mut b = PrimitiveDictionaryBuilder::<K, V>::with_capacity(values.len(), values.len());
350
351 for i in 0..values.len() {
353 if values.is_null(i) {
354 b.append_null();
355 } else {
356 b.append(values.value(i))?;
357 }
358 }
359 Ok(Arc::new(b.finish()))
360}
361
362pub(crate) fn string_view_to_dictionary<K, O: OffsetSizeTrait>(
363 array: &dyn Array,
364) -> Result<ArrayRef, ArrowError>
365where
366 K: ArrowDictionaryKeyType,
367{
368 let mut b = GenericByteDictionaryBuilder::<K, GenericStringType<O>>::with_capacity(
369 array.len(),
370 1024,
371 1024,
372 );
373 let string_view = array
374 .as_any()
375 .downcast_ref::<StringViewArray>()
376 .ok_or_else(|| {
377 ArrowError::ComputeError("Internal Error: Cannot cast to StringViewArray".to_string())
378 })?;
379 for v in string_view.iter() {
380 match v {
381 Some(v) => {
382 b.append(v)?;
383 }
384 None => {
385 b.append_null();
386 }
387 }
388 }
389
390 Ok(Arc::new(b.finish()))
391}
392
393pub(crate) fn binary_view_to_dictionary<K, O: OffsetSizeTrait>(
394 array: &dyn Array,
395) -> Result<ArrayRef, ArrowError>
396where
397 K: ArrowDictionaryKeyType,
398{
399 let mut b = GenericByteDictionaryBuilder::<K, GenericBinaryType<O>>::with_capacity(
400 array.len(),
401 1024,
402 1024,
403 );
404 let binary_view = array
405 .as_any()
406 .downcast_ref::<BinaryViewArray>()
407 .ok_or_else(|| {
408 ArrowError::ComputeError("Internal Error: Cannot cast to BinaryViewArray".to_string())
409 })?;
410 for v in binary_view.iter() {
411 match v {
412 Some(v) => {
413 b.append(v)?;
414 }
415 None => {
416 b.append_null();
417 }
418 }
419 }
420
421 Ok(Arc::new(b.finish()))
422}
423
424pub(crate) fn pack_byte_to_dictionary<K, T>(
427 array: &dyn Array,
428 cast_options: &CastOptions,
429) -> Result<ArrayRef, ArrowError>
430where
431 K: ArrowDictionaryKeyType,
432 T: ByteArrayType,
433{
434 let cast_values = cast_with_options(array, &T::DATA_TYPE, cast_options)?;
435 let values = cast_values
436 .as_any()
437 .downcast_ref::<GenericByteArray<T>>()
438 .ok_or_else(|| {
439 ArrowError::ComputeError("Internal Error: Cannot cast to GenericByteArray".to_string())
440 })?;
441 let mut b = GenericByteDictionaryBuilder::<K, T>::with_capacity(values.len(), 1024, 1024);
442
443 for i in 0..values.len() {
445 if values.is_null(i) {
446 b.append_null();
447 } else {
448 b.append(values.value(i))?;
449 }
450 }
451 Ok(Arc::new(b.finish()))
452}