1use crate::cast::*;
19use arrow_buffer::NullBuffer;
20
21pub(crate) fn value_to_string<O: OffsetSizeTrait>(
22 array: &dyn Array,
23 options: &CastOptions,
24) -> Result<ArrayRef, ArrowError> {
25 let mut builder = GenericStringBuilder::<O>::new();
26 let formatter = ArrayFormatter::try_new(array, &options.format_options)?;
27 let nulls = array.nulls();
28 for i in 0..array.len() {
29 match nulls.map(|x| x.is_null(i)).unwrap_or_default() {
30 true => builder.append_null(),
31 false => {
32 formatter.value(i).write(&mut builder)?;
33 builder.append_value("");
35 }
36 }
37 }
38 Ok(Arc::new(builder.finish()))
39}
40
41pub(crate) fn parse_string<P: Parser, O: OffsetSizeTrait>(
43 array: &dyn Array,
44 cast_options: &CastOptions,
45) -> Result<ArrayRef, ArrowError> {
46 let string_array = array.as_string::<O>();
47 parse_string_iter::<P, _, _>(string_array.iter(), cast_options, || {
48 string_array.nulls().cloned()
49 })
50}
51
52pub(crate) fn parse_string_view<P: Parser>(
54 array: &dyn Array,
55 cast_options: &CastOptions,
56) -> Result<ArrayRef, ArrowError> {
57 let string_view_array = array.as_string_view();
58 parse_string_iter::<P, _, _>(string_view_array.iter(), cast_options, || {
59 string_view_array.nulls().cloned()
60 })
61}
62
63fn parse_string_iter<
64 'a,
65 P: Parser,
66 I: Iterator<Item = Option<&'a str>>,
67 F: FnOnce() -> Option<NullBuffer>,
68>(
69 iter: I,
70 cast_options: &CastOptions,
71 nulls: F,
72) -> Result<ArrayRef, ArrowError> {
73 let array = if cast_options.safe {
74 let iter = iter.map(|x| x.and_then(P::parse));
75
76 unsafe { PrimitiveArray::<P>::from_trusted_len_iter(iter) }
81 } else {
82 let v = iter
83 .map(|x| match x {
84 Some(v) => P::parse(v).ok_or_else(|| {
85 ArrowError::CastError(format!(
86 "Cannot cast string '{}' to value of {:?} type",
87 v,
88 P::DATA_TYPE
89 ))
90 }),
91 None => Ok(P::Native::default()),
92 })
93 .collect::<Result<Vec<_>, ArrowError>>()?;
94 PrimitiveArray::new(v.into(), nulls())
95 };
96
97 Ok(Arc::new(array) as ArrayRef)
98}
99
100pub(crate) fn cast_string_to_timestamp<O: OffsetSizeTrait, T: ArrowTimestampType>(
102 array: &dyn Array,
103 to_tz: &Option<Arc<str>>,
104 cast_options: &CastOptions,
105) -> Result<ArrayRef, ArrowError> {
106 let array = array.as_string::<O>();
107 let out: PrimitiveArray<T> = match to_tz {
108 Some(tz) => {
109 let tz: Tz = tz.as_ref().parse()?;
110 cast_string_to_timestamp_impl(array.iter(), &tz, cast_options)?
111 }
112 None => cast_string_to_timestamp_impl(array.iter(), &Utc, cast_options)?,
113 };
114 Ok(Arc::new(out.with_timezone_opt(to_tz.clone())))
115}
116
117pub(crate) fn cast_view_to_timestamp<T: ArrowTimestampType>(
119 array: &dyn Array,
120 to_tz: &Option<Arc<str>>,
121 cast_options: &CastOptions,
122) -> Result<ArrayRef, ArrowError> {
123 let array = array.as_string_view();
124 let out: PrimitiveArray<T> = match to_tz {
125 Some(tz) => {
126 let tz: Tz = tz.as_ref().parse()?;
127 cast_string_to_timestamp_impl(array.iter(), &tz, cast_options)?
128 }
129 None => cast_string_to_timestamp_impl(array.iter(), &Utc, cast_options)?,
130 };
131 Ok(Arc::new(out.with_timezone_opt(to_tz.clone())))
132}
133
134fn cast_string_to_timestamp_impl<
135 'a,
136 I: Iterator<Item = Option<&'a str>>,
137 T: ArrowTimestampType,
138 Tz: TimeZone,
139>(
140 iter: I,
141 tz: &Tz,
142 cast_options: &CastOptions,
143) -> Result<PrimitiveArray<T>, ArrowError> {
144 if cast_options.safe {
145 let iter = iter.map(|v| {
146 v.and_then(|v| {
147 let naive = string_to_datetime(tz, v).ok()?.naive_utc();
148 T::make_value(naive)
149 })
150 });
151 Ok(unsafe { PrimitiveArray::from_trusted_len_iter(iter) })
157 } else {
158 let vec = iter
159 .map(|v| {
160 v.map(|v| {
161 let naive = string_to_datetime(tz, v)?.naive_utc();
162 T::make_value(naive).ok_or_else(|| match T::UNIT {
163 TimeUnit::Nanosecond => ArrowError::CastError(format!(
164 "Overflow converting {naive} to Nanosecond. The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804"
165 )),
166 _ => ArrowError::CastError(format!(
167 "Overflow converting {naive} to {:?}",
168 T::UNIT
169 ))
170 })
171 })
172 .transpose()
173 })
174 .collect::<Result<Vec<Option<i64>>, _>>()?;
175
176 Ok(unsafe { PrimitiveArray::from_trusted_len_iter(vec.iter()) })
181 }
182}
183
184pub(crate) fn cast_string_to_interval<Offset, F, ArrowType>(
185 array: &dyn Array,
186 cast_options: &CastOptions,
187 parse_function: F,
188) -> Result<ArrayRef, ArrowError>
189where
190 Offset: OffsetSizeTrait,
191 ArrowType: ArrowPrimitiveType,
192 F: Fn(&str) -> Result<ArrowType::Native, ArrowError> + Copy,
193{
194 let string_array = array
195 .as_any()
196 .downcast_ref::<GenericStringArray<Offset>>()
197 .unwrap();
198 cast_string_to_interval_impl::<_, ArrowType, F>(
199 string_array.iter(),
200 cast_options,
201 parse_function,
202 )
203}
204
205pub(crate) fn cast_string_to_year_month_interval<Offset: OffsetSizeTrait>(
206 array: &dyn Array,
207 cast_options: &CastOptions,
208) -> Result<ArrayRef, ArrowError> {
209 cast_string_to_interval::<Offset, _, IntervalYearMonthType>(
210 array,
211 cast_options,
212 parse_interval_year_month,
213 )
214}
215
216pub(crate) fn cast_string_to_day_time_interval<Offset: OffsetSizeTrait>(
217 array: &dyn Array,
218 cast_options: &CastOptions,
219) -> Result<ArrayRef, ArrowError> {
220 cast_string_to_interval::<Offset, _, IntervalDayTimeType>(
221 array,
222 cast_options,
223 parse_interval_day_time,
224 )
225}
226
227pub(crate) fn cast_string_to_month_day_nano_interval<Offset: OffsetSizeTrait>(
228 array: &dyn Array,
229 cast_options: &CastOptions,
230) -> Result<ArrayRef, ArrowError> {
231 cast_string_to_interval::<Offset, _, IntervalMonthDayNanoType>(
232 array,
233 cast_options,
234 parse_interval_month_day_nano,
235 )
236}
237
238pub(crate) fn cast_view_to_interval<F, ArrowType>(
239 array: &dyn Array,
240 cast_options: &CastOptions,
241 parse_function: F,
242) -> Result<ArrayRef, ArrowError>
243where
244 ArrowType: ArrowPrimitiveType,
245 F: Fn(&str) -> Result<ArrowType::Native, ArrowError> + Copy,
246{
247 let string_view_array = array.as_any().downcast_ref::<StringViewArray>().unwrap();
248 cast_string_to_interval_impl::<_, ArrowType, F>(
249 string_view_array.iter(),
250 cast_options,
251 parse_function,
252 )
253}
254
255pub(crate) fn cast_view_to_year_month_interval(
256 array: &dyn Array,
257 cast_options: &CastOptions,
258) -> Result<ArrayRef, ArrowError> {
259 cast_view_to_interval::<_, IntervalYearMonthType>(
260 array,
261 cast_options,
262 parse_interval_year_month,
263 )
264}
265
266pub(crate) fn cast_view_to_day_time_interval(
267 array: &dyn Array,
268 cast_options: &CastOptions,
269) -> Result<ArrayRef, ArrowError> {
270 cast_view_to_interval::<_, IntervalDayTimeType>(array, cast_options, parse_interval_day_time)
271}
272
273pub(crate) fn cast_view_to_month_day_nano_interval(
274 array: &dyn Array,
275 cast_options: &CastOptions,
276) -> Result<ArrayRef, ArrowError> {
277 cast_view_to_interval::<_, IntervalMonthDayNanoType>(
278 array,
279 cast_options,
280 parse_interval_month_day_nano,
281 )
282}
283
284fn cast_string_to_interval_impl<'a, I, ArrowType, F>(
285 iter: I,
286 cast_options: &CastOptions,
287 parse_function: F,
288) -> Result<ArrayRef, ArrowError>
289where
290 I: Iterator<Item = Option<&'a str>>,
291 ArrowType: ArrowPrimitiveType,
292 F: Fn(&str) -> Result<ArrowType::Native, ArrowError> + Copy,
293{
294 let interval_array = if cast_options.safe {
295 let iter = iter.map(|v| v.and_then(|v| parse_function(v).ok()));
296
297 unsafe { PrimitiveArray::<ArrowType>::from_trusted_len_iter(iter) }
302 } else {
303 let vec = iter
304 .map(|v| v.map(parse_function).transpose())
305 .collect::<Result<Vec<_>, ArrowError>>()?;
306
307 unsafe { PrimitiveArray::<ArrowType>::from_trusted_len_iter(vec) }
312 };
313 Ok(Arc::new(interval_array) as ArrayRef)
314}
315
316pub(crate) fn cast_binary_to_string<O: OffsetSizeTrait>(
319 array: &dyn Array,
320 cast_options: &CastOptions,
321) -> Result<ArrayRef, ArrowError> {
322 let array = array
323 .as_any()
324 .downcast_ref::<GenericByteArray<GenericBinaryType<O>>>()
325 .unwrap();
326
327 match GenericStringArray::<O>::try_from_binary(array.clone()) {
328 Ok(a) => Ok(Arc::new(a)),
329 Err(e) => match cast_options.safe {
330 true => {
331 let mut builder =
333 GenericStringBuilder::<O>::with_capacity(array.len(), array.value_data().len());
334
335 let iter = array
336 .iter()
337 .map(|v| v.and_then(|v| std::str::from_utf8(v).ok()));
338
339 builder.extend(iter);
340 Ok(Arc::new(builder.finish()))
341 }
342 false => Err(e),
343 },
344 }
345}
346
347pub(crate) fn cast_utf8_to_boolean<OffsetSize>(
349 from: &dyn Array,
350 cast_options: &CastOptions,
351) -> Result<ArrayRef, ArrowError>
352where
353 OffsetSize: OffsetSizeTrait,
354{
355 let array = from
356 .as_any()
357 .downcast_ref::<GenericStringArray<OffsetSize>>()
358 .unwrap();
359
360 let output_array = array
361 .iter()
362 .map(|value| match value {
363 Some(value) => match value.to_ascii_lowercase().trim() {
364 "t" | "tr" | "tru" | "true" | "y" | "ye" | "yes" | "on" | "1" => Ok(Some(true)),
365 "f" | "fa" | "fal" | "fals" | "false" | "n" | "no" | "of" | "off" | "0" => {
366 Ok(Some(false))
367 }
368 invalid_value => match cast_options.safe {
369 true => Ok(None),
370 false => Err(ArrowError::CastError(format!(
371 "Cannot cast value '{invalid_value}' to value of Boolean type",
372 ))),
373 },
374 },
375 None => Ok(None),
376 })
377 .collect::<Result<BooleanArray, _>>()?;
378
379 Ok(Arc::new(output_array))
380}