mz_expr/scalar/func/impls/
string.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::borrow::Cow;
11use std::fmt;
12use std::sync::LazyLock;
13
14use chrono::{DateTime, NaiveDateTime, NaiveTime, Utc};
15use mz_expr_derive::sqlfunc;
16use mz_lowertest::MzReflect;
17use mz_ore::cast::CastFrom;
18use mz_ore::result::ResultExt;
19use mz_ore::str::StrExt;
20use mz_repr::adt::char::{Char, format_str_trim};
21use mz_repr::adt::date::Date;
22use mz_repr::adt::interval::Interval;
23use mz_repr::adt::jsonb::Jsonb;
24use mz_repr::adt::numeric::{self, Numeric, NumericMaxScale};
25use mz_repr::adt::pg_legacy_name::PgLegacyName;
26use mz_repr::adt::regex::Regex;
27use mz_repr::adt::system::{Oid, PgLegacyChar};
28use mz_repr::adt::timestamp::{CheckedTimestamp, TimestampPrecision};
29use mz_repr::adt::varchar::{VarChar, VarCharMaxLength};
30use mz_repr::{Datum, RowArena, SqlColumnType, SqlScalarType, strconv};
31use serde::{Deserialize, Serialize};
32use uuid::Uuid;
33
34use crate::func::regexp_match_static;
35use crate::scalar::func::{
36    EagerUnaryFunc, LazyUnaryFunc, array_create_scalar, regexp_split_to_array_re,
37};
38use crate::{EvalError, MirScalarExpr, UnaryFunc, like_pattern};
39
40#[sqlfunc(
41    sqlname = "text_to_boolean",
42    preserves_uniqueness = false,
43    inverse = to_unary!(super::CastBoolToString)
44)]
45fn cast_string_to_bool<'a>(a: &'a str) -> Result<bool, EvalError> {
46    strconv::parse_bool(a).err_into()
47}
48
49#[sqlfunc(
50    sqlname = "text_to_\"char\"",
51    preserves_uniqueness = true,
52    inverse = to_unary!(super::CastPgLegacyCharToString)
53)]
54fn cast_string_to_pg_legacy_char<'a>(a: &'a str) -> PgLegacyChar {
55    PgLegacyChar(a.as_bytes().get(0).copied().unwrap_or(0))
56}
57
58#[sqlfunc(sqlname = "text_to_name", preserves_uniqueness = true)]
59fn cast_string_to_pg_legacy_name<'a>(a: &'a str) -> PgLegacyName<String> {
60    PgLegacyName(strconv::parse_pg_legacy_name(a))
61}
62
63#[sqlfunc(
64    sqlname = "text_to_bytea",
65    preserves_uniqueness = true,
66    inverse = to_unary!(super::CastBytesToString)
67)]
68fn cast_string_to_bytes<'a>(a: &'a str) -> Result<Vec<u8>, EvalError> {
69    strconv::parse_bytes(a).err_into()
70}
71
72#[sqlfunc(
73    sqlname = "text_to_smallint",
74    preserves_uniqueness = false,
75    inverse = to_unary!(super::CastInt16ToString)
76)]
77fn cast_string_to_int16<'a>(a: &'a str) -> Result<i16, EvalError> {
78    strconv::parse_int16(a).err_into()
79}
80
81#[sqlfunc(
82    sqlname = "text_to_integer",
83    preserves_uniqueness = false,
84    inverse = to_unary!(super::CastInt32ToString)
85)]
86fn cast_string_to_int32<'a>(a: &'a str) -> Result<i32, EvalError> {
87    strconv::parse_int32(a).err_into()
88}
89
90#[sqlfunc(
91    sqlname = "text_to_bigint",
92    preserves_uniqueness = false,
93    inverse = to_unary!(super::CastInt64ToString)
94)]
95fn cast_string_to_int64<'a>(a: &'a str) -> Result<i64, EvalError> {
96    strconv::parse_int64(a).err_into()
97}
98
99#[sqlfunc(
100    sqlname = "text_to_real",
101    preserves_uniqueness = false,
102    inverse = to_unary!(super::CastFloat32ToString)
103)]
104fn cast_string_to_float32<'a>(a: &'a str) -> Result<f32, EvalError> {
105    strconv::parse_float32(a).err_into()
106}
107
108#[sqlfunc(
109    sqlname = "text_to_double",
110    preserves_uniqueness = false,
111    inverse = to_unary!(super::CastFloat64ToString)
112)]
113fn cast_string_to_float64<'a>(a: &'a str) -> Result<f64, EvalError> {
114    strconv::parse_float64(a).err_into()
115}
116
117#[sqlfunc(
118    sqlname = "text_to_oid",
119    preserves_uniqueness = false,
120    inverse = to_unary!(super::CastOidToString)
121)]
122fn cast_string_to_oid<'a>(a: &'a str) -> Result<Oid, EvalError> {
123    Ok(Oid(strconv::parse_oid(a)?))
124}
125
126#[sqlfunc(
127    sqlname = "text_to_uint2",
128    preserves_uniqueness = false,
129    inverse = to_unary!(super::CastUint16ToString)
130)]
131fn cast_string_to_uint16(a: &str) -> Result<u16, EvalError> {
132    strconv::parse_uint16(a).err_into()
133}
134
135#[sqlfunc(
136    sqlname = "text_to_uint4",
137    preserves_uniqueness = false,
138    inverse = to_unary!(super::CastUint32ToString)
139)]
140fn cast_string_to_uint32(a: &str) -> Result<u32, EvalError> {
141    strconv::parse_uint32(a).err_into()
142}
143
144#[sqlfunc(
145    sqlname = "text_to_uint8",
146    preserves_uniqueness = false,
147    inverse = to_unary!(super::CastUint64ToString)
148)]
149fn cast_string_to_uint64(a: &str) -> Result<u64, EvalError> {
150    strconv::parse_uint64(a).err_into()
151}
152
153#[sqlfunc(sqlname = "reverse")]
154fn reverse<'a>(a: &'a str) -> String {
155    a.chars().rev().collect()
156}
157
158#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
159pub struct CastStringToNumeric(pub Option<NumericMaxScale>);
160
161impl<'a> EagerUnaryFunc<'a> for CastStringToNumeric {
162    type Input = &'a str;
163    type Output = Result<Numeric, EvalError>;
164
165    fn call(&self, a: &'a str) -> Result<Numeric, EvalError> {
166        let mut d = strconv::parse_numeric(a)?;
167        if let Some(scale) = self.0 {
168            if numeric::rescale(&mut d.0, scale.into_u8()).is_err() {
169                return Err(EvalError::NumericFieldOverflow);
170            }
171        }
172        Ok(d.into_inner())
173    }
174
175    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
176        SqlScalarType::Numeric { max_scale: self.0 }.nullable(input.nullable)
177    }
178
179    fn inverse(&self) -> Option<crate::UnaryFunc> {
180        to_unary!(super::CastNumericToString)
181    }
182}
183
184impl fmt::Display for CastStringToNumeric {
185    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
186        f.write_str("text_to_numeric")
187    }
188}
189
190#[sqlfunc(
191    sqlname = "text_to_date",
192    preserves_uniqueness = false,
193    inverse = to_unary!(super::CastDateToString)
194)]
195fn cast_string_to_date<'a>(a: &'a str) -> Result<Date, EvalError> {
196    strconv::parse_date(a).err_into()
197}
198
199#[sqlfunc(
200    sqlname = "text_to_time",
201    preserves_uniqueness = false,
202    inverse = to_unary!(super::CastTimeToString)
203)]
204fn cast_string_to_time<'a>(a: &'a str) -> Result<NaiveTime, EvalError> {
205    strconv::parse_time(a).err_into()
206}
207
208#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
209pub struct CastStringToTimestamp(pub Option<TimestampPrecision>);
210
211impl<'a> EagerUnaryFunc<'a> for CastStringToTimestamp {
212    type Input = &'a str;
213    type Output = Result<CheckedTimestamp<NaiveDateTime>, EvalError>;
214
215    fn call(&self, a: &'a str) -> Result<CheckedTimestamp<NaiveDateTime>, EvalError> {
216        let out = strconv::parse_timestamp(a)?;
217        let updated = out.round_to_precision(self.0)?;
218        Ok(updated)
219    }
220
221    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
222        SqlScalarType::Timestamp { precision: self.0 }.nullable(input.nullable)
223    }
224
225    fn inverse(&self) -> Option<crate::UnaryFunc> {
226        to_unary!(super::CastTimestampToString)
227    }
228}
229
230impl fmt::Display for CastStringToTimestamp {
231    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
232        f.write_str("text_to_timestamp")
233    }
234}
235
236#[sqlfunc(sqlname = "try_parse_monotonic_iso8601_timestamp")]
237// TODO: Pretty sure this preserves uniqueness, but not 100%.
238//
239// Ironically, even though this has "monotonic" in the name, it's not quite
240// eligible for `#[is_monotone = true]` because any input could also be
241// mapped to null. So, handle it via SpecialUnary in the interpreter.
242fn try_parse_monotonic_iso8601_timestamp<'a>(
243    a: &'a str,
244) -> Option<CheckedTimestamp<NaiveDateTime>> {
245    let ts = mz_persist_types::timestamp::try_parse_monotonic_iso8601_timestamp(a)?;
246    let ts = CheckedTimestamp::from_timestamplike(ts)
247        .expect("monotonic_iso8601 range is a subset of CheckedTimestamp domain");
248    Some(ts)
249}
250
251#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
252pub struct CastStringToTimestampTz(pub Option<TimestampPrecision>);
253
254impl<'a> EagerUnaryFunc<'a> for CastStringToTimestampTz {
255    type Input = &'a str;
256    type Output = Result<CheckedTimestamp<DateTime<Utc>>, EvalError>;
257
258    fn call(&self, a: &'a str) -> Result<CheckedTimestamp<DateTime<Utc>>, EvalError> {
259        let out = strconv::parse_timestamptz(a)?;
260        let updated = out.round_to_precision(self.0)?;
261        Ok(updated)
262    }
263
264    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
265        SqlScalarType::TimestampTz { precision: self.0 }.nullable(input.nullable)
266    }
267
268    fn inverse(&self) -> Option<crate::UnaryFunc> {
269        to_unary!(super::CastTimestampTzToString)
270    }
271}
272
273impl fmt::Display for CastStringToTimestampTz {
274    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
275        f.write_str("text_to_timestamp_with_time_zone")
276    }
277}
278
279#[sqlfunc(
280    sqlname = "text_to_interval",
281    preserves_uniqueness = false,
282    inverse = to_unary!(super::CastIntervalToString)
283)]
284fn cast_string_to_interval<'a>(a: &'a str) -> Result<Interval, EvalError> {
285    strconv::parse_interval(a).err_into()
286}
287
288#[sqlfunc(
289    sqlname = "text_to_uuid",
290    preserves_uniqueness = false,
291    inverse = to_unary!(super::CastUuidToString)
292)]
293fn cast_string_to_uuid<'a>(a: &'a str) -> Result<Uuid, EvalError> {
294    strconv::parse_uuid(a).err_into()
295}
296
297#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
298pub struct CastStringToArray {
299    // Target array's type.
300    pub return_ty: SqlScalarType,
301    // The expression to cast the discovered array elements to the array's
302    // element type.
303    pub cast_expr: Box<MirScalarExpr>,
304}
305
306impl LazyUnaryFunc for CastStringToArray {
307    fn eval<'a>(
308        &'a self,
309        datums: &[Datum<'a>],
310        temp_storage: &'a RowArena,
311        a: &'a MirScalarExpr,
312    ) -> Result<Datum<'a>, EvalError> {
313        let a = a.eval(datums, temp_storage)?;
314        if a.is_null() {
315            return Ok(Datum::Null);
316        }
317        let (datums, dims) = strconv::parse_array(
318            a.unwrap_str(),
319            || Datum::Null,
320            |elem_text| {
321                let elem_text = match elem_text {
322                    Cow::Owned(s) => temp_storage.push_string(s),
323                    Cow::Borrowed(s) => s,
324                };
325                self.cast_expr
326                    .eval(&[Datum::String(elem_text)], temp_storage)
327            },
328        )?;
329
330        Ok(temp_storage.try_make_datum(|packer| packer.try_push_array(&dims, datums))?)
331    }
332
333    /// The output SqlColumnType of this function
334    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
335        self.return_ty.clone().nullable(input_type.nullable)
336    }
337
338    /// Whether this function will produce NULL on NULL input
339    fn propagates_nulls(&self) -> bool {
340        true
341    }
342
343    /// Whether this function will produce NULL on non-NULL input
344    fn introduces_nulls(&self) -> bool {
345        false
346    }
347
348    /// Whether this function preserves uniqueness
349    fn preserves_uniqueness(&self) -> bool {
350        false
351    }
352
353    fn inverse(&self) -> Option<crate::UnaryFunc> {
354        to_unary!(super::CastArrayToString {
355            ty: self.return_ty.clone(),
356        })
357    }
358
359    fn is_monotone(&self) -> bool {
360        false
361    }
362}
363
364impl fmt::Display for CastStringToArray {
365    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
366        f.write_str("strtoarray")
367    }
368}
369
370#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
371pub struct CastStringToList {
372    // Target list's type
373    pub return_ty: SqlScalarType,
374    // The expression to cast the discovered list elements to the list's
375    // element type.
376    pub cast_expr: Box<MirScalarExpr>,
377}
378
379impl LazyUnaryFunc for CastStringToList {
380    fn eval<'a>(
381        &'a self,
382        datums: &[Datum<'a>],
383        temp_storage: &'a RowArena,
384        a: &'a MirScalarExpr,
385    ) -> Result<Datum<'a>, EvalError> {
386        let a = a.eval(datums, temp_storage)?;
387        if a.is_null() {
388            return Ok(Datum::Null);
389        }
390        let parsed_datums = strconv::parse_list(
391            a.unwrap_str(),
392            matches!(
393                self.return_ty.unwrap_list_element_type(),
394                SqlScalarType::List { .. }
395            ),
396            || Datum::Null,
397            |elem_text| {
398                let elem_text = match elem_text {
399                    Cow::Owned(s) => temp_storage.push_string(s),
400                    Cow::Borrowed(s) => s,
401                };
402                self.cast_expr
403                    .eval(&[Datum::String(elem_text)], temp_storage)
404            },
405        )?;
406
407        Ok(temp_storage.make_datum(|packer| packer.push_list(parsed_datums)))
408    }
409
410    /// The output SqlColumnType of this function
411    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
412        self.return_ty
413            .without_modifiers()
414            .nullable(input_type.nullable)
415    }
416
417    /// Whether this function will produce NULL on NULL input
418    fn propagates_nulls(&self) -> bool {
419        true
420    }
421
422    /// Whether this function will produce NULL on non-NULL input
423    fn introduces_nulls(&self) -> bool {
424        false
425    }
426
427    /// Whether this function preserves uniqueness
428    fn preserves_uniqueness(&self) -> bool {
429        false
430    }
431
432    fn inverse(&self) -> Option<crate::UnaryFunc> {
433        to_unary!(super::CastListToString {
434            ty: self.return_ty.clone(),
435        })
436    }
437
438    fn is_monotone(&self) -> bool {
439        false
440    }
441}
442
443impl fmt::Display for CastStringToList {
444    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
445        f.write_str("strtolist")
446    }
447}
448
449#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
450pub struct CastStringToMap {
451    // Target map's value type
452    pub return_ty: SqlScalarType,
453    // The expression used to cast the discovered values to the map's value
454    // type.
455    pub cast_expr: Box<MirScalarExpr>,
456}
457
458impl LazyUnaryFunc for CastStringToMap {
459    fn eval<'a>(
460        &'a self,
461        datums: &[Datum<'a>],
462        temp_storage: &'a RowArena,
463        a: &'a MirScalarExpr,
464    ) -> Result<Datum<'a>, EvalError> {
465        let a = a.eval(datums, temp_storage)?;
466        if a.is_null() {
467            return Ok(Datum::Null);
468        }
469        let parsed_map = strconv::parse_map(
470            a.unwrap_str(),
471            matches!(
472                self.return_ty.unwrap_map_value_type(),
473                SqlScalarType::Map { .. }
474            ),
475            |value_text| -> Result<Datum, EvalError> {
476                let value_text = match value_text {
477                    Some(Cow::Owned(s)) => Datum::String(temp_storage.push_string(s)),
478                    Some(Cow::Borrowed(s)) => Datum::String(s),
479                    None => Datum::Null,
480                };
481                self.cast_expr.eval(&[value_text], temp_storage)
482            },
483        )?;
484        let mut pairs: Vec<(String, Datum)> = parsed_map.into_iter().map(|(k, v)| (k, v)).collect();
485        pairs.sort_by(|(k1, _v1), (k2, _v2)| k1.cmp(k2));
486        pairs.dedup_by(|(k1, _v1), (k2, _v2)| k1 == k2);
487        Ok(temp_storage.make_datum(|packer| {
488            packer.push_dict_with(|packer| {
489                for (k, v) in pairs {
490                    packer.push(Datum::String(&k));
491                    packer.push(v);
492                }
493            })
494        }))
495    }
496
497    /// The output SqlColumnType of this function
498    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
499        self.return_ty.clone().nullable(input_type.nullable)
500    }
501
502    /// Whether this function will produce NULL on NULL input
503    fn propagates_nulls(&self) -> bool {
504        true
505    }
506
507    /// Whether this function will produce NULL on non-NULL input
508    fn introduces_nulls(&self) -> bool {
509        false
510    }
511
512    /// Whether this function preserves uniqueness
513    fn preserves_uniqueness(&self) -> bool {
514        false
515    }
516
517    fn inverse(&self) -> Option<crate::UnaryFunc> {
518        to_unary!(super::CastMapToString {
519            ty: self.return_ty.clone(),
520        })
521    }
522
523    fn is_monotone(&self) -> bool {
524        false
525    }
526}
527
528impl fmt::Display for CastStringToMap {
529    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
530        f.write_str("strtomap")
531    }
532}
533
534#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
535pub struct CastStringToChar {
536    pub length: Option<mz_repr::adt::char::CharLength>,
537    pub fail_on_len: bool,
538}
539
540impl<'a> EagerUnaryFunc<'a> for CastStringToChar {
541    type Input = &'a str;
542    type Output = Result<Char<String>, EvalError>;
543
544    fn call(&self, a: &'a str) -> Result<Char<String>, EvalError> {
545        let s = format_str_trim(a, self.length, self.fail_on_len).map_err(|_| {
546            assert!(self.fail_on_len);
547            EvalError::StringValueTooLong {
548                target_type: "character".into(),
549                length: usize::cast_from(self.length.unwrap().into_u32()),
550            }
551        })?;
552
553        Ok(Char(s))
554    }
555
556    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
557        SqlScalarType::Char {
558            length: self.length,
559        }
560        .nullable(input.nullable)
561    }
562
563    fn could_error(&self) -> bool {
564        self.fail_on_len && self.length.is_some()
565    }
566
567    fn inverse(&self) -> Option<crate::UnaryFunc> {
568        to_unary!(super::CastCharToString)
569    }
570}
571
572impl fmt::Display for CastStringToChar {
573    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
574        match self.length {
575            Some(length) => {
576                write!(
577                    f,
578                    "text_to_char[len={}, fail_on_len={}]",
579                    length.into_u32(),
580                    self.fail_on_len
581                )
582            }
583            None => f.write_str("text_to_char[len=unbounded]"),
584        }
585    }
586}
587
588#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
589pub struct CastStringToRange {
590    // Target range's type
591    pub return_ty: SqlScalarType,
592    // The expression to cast the discovered range elements to the range's
593    // element type.
594    pub cast_expr: Box<MirScalarExpr>,
595}
596
597impl LazyUnaryFunc for CastStringToRange {
598    fn eval<'a>(
599        &'a self,
600        datums: &[Datum<'a>],
601        temp_storage: &'a RowArena,
602        a: &'a MirScalarExpr,
603    ) -> Result<Datum<'a>, EvalError> {
604        let a = a.eval(datums, temp_storage)?;
605        if a.is_null() {
606            return Ok(Datum::Null);
607        }
608        let mut range = strconv::parse_range(a.unwrap_str(), |elem_text| {
609            let elem_text = match elem_text {
610                Cow::Owned(s) => temp_storage.push_string(s),
611                Cow::Borrowed(s) => s,
612            };
613            self.cast_expr
614                .eval(&[Datum::String(elem_text)], temp_storage)
615        })?;
616
617        range.canonicalize()?;
618
619        Ok(temp_storage.make_datum(|packer| {
620            packer
621                .push_range(range)
622                .expect("must have already handled errors")
623        }))
624    }
625
626    /// The output SqlColumnType of this function
627    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
628        self.return_ty
629            .without_modifiers()
630            .nullable(input_type.nullable)
631    }
632
633    /// Whether this function will produce NULL on NULL input
634    fn propagates_nulls(&self) -> bool {
635        true
636    }
637
638    /// Whether this function will produce NULL on non-NULL input
639    fn introduces_nulls(&self) -> bool {
640        false
641    }
642
643    /// Whether this function preserves uniqueness
644    fn preserves_uniqueness(&self) -> bool {
645        false
646    }
647
648    fn inverse(&self) -> Option<crate::UnaryFunc> {
649        to_unary!(super::CastRangeToString {
650            ty: self.return_ty.clone(),
651        })
652    }
653
654    fn is_monotone(&self) -> bool {
655        false
656    }
657}
658
659impl fmt::Display for CastStringToRange {
660    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
661        f.write_str("strtorange")
662    }
663}
664
665#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
666pub struct CastStringToVarChar {
667    pub length: Option<VarCharMaxLength>,
668    pub fail_on_len: bool,
669}
670
671impl<'a> EagerUnaryFunc<'a> for CastStringToVarChar {
672    type Input = &'a str;
673    type Output = Result<VarChar<&'a str>, EvalError>;
674
675    fn call(&self, a: &'a str) -> Result<VarChar<&'a str>, EvalError> {
676        let s =
677            mz_repr::adt::varchar::format_str(a, self.length, self.fail_on_len).map_err(|_| {
678                assert!(self.fail_on_len);
679                EvalError::StringValueTooLong {
680                    target_type: "character varying".into(),
681                    length: usize::cast_from(self.length.unwrap().into_u32()),
682                }
683            })?;
684
685        Ok(VarChar(s))
686    }
687
688    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
689        SqlScalarType::VarChar {
690            max_length: self.length,
691        }
692        .nullable(input.nullable)
693    }
694
695    fn could_error(&self) -> bool {
696        self.fail_on_len && self.length.is_some()
697    }
698
699    fn preserves_uniqueness(&self) -> bool {
700        !self.fail_on_len || self.length.is_none()
701    }
702
703    fn inverse(&self) -> Option<crate::UnaryFunc> {
704        to_unary!(super::CastVarCharToString)
705    }
706}
707
708impl fmt::Display for CastStringToVarChar {
709    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
710        match self.length {
711            Some(length) => {
712                write!(
713                    f,
714                    "text_to_varchar[len={}, fail_on_len={}]",
715                    length.into_u32(),
716                    self.fail_on_len
717                )
718            }
719            None => f.write_str("text_to_varchar[len=unbounded]"),
720        }
721    }
722}
723
724// If we support another vector type, this should likely get hoisted into a
725// position akin to array parsing.
726static INT2VECTOR_CAST_EXPR: LazyLock<MirScalarExpr> = LazyLock::new(|| MirScalarExpr::CallUnary {
727    func: UnaryFunc::CastStringToInt16(CastStringToInt16),
728    expr: Box::new(MirScalarExpr::column(0)),
729});
730
731#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
732pub struct CastStringToInt2Vector;
733
734impl LazyUnaryFunc for CastStringToInt2Vector {
735    fn eval<'a>(
736        &'a self,
737        datums: &[Datum<'a>],
738        temp_storage: &'a RowArena,
739        a: &'a MirScalarExpr,
740    ) -> Result<Datum<'a>, EvalError> {
741        let a = a.eval(datums, temp_storage)?;
742        if a.is_null() {
743            return Ok(Datum::Null);
744        }
745
746        let datums = strconv::parse_legacy_vector(a.unwrap_str(), |elem_text| {
747            let elem_text = match elem_text {
748                Cow::Owned(s) => temp_storage.push_string(s),
749                Cow::Borrowed(s) => s,
750            };
751            INT2VECTOR_CAST_EXPR.eval(&[Datum::String(elem_text)], temp_storage)
752        })?;
753        array_create_scalar(&datums, temp_storage)
754    }
755
756    /// The output SqlColumnType of this function
757    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
758        SqlScalarType::Int2Vector.nullable(input_type.nullable)
759    }
760
761    /// Whether this function will produce NULL on NULL input
762    fn propagates_nulls(&self) -> bool {
763        true
764    }
765
766    /// Whether this function will produce NULL on non-NULL input
767    fn introduces_nulls(&self) -> bool {
768        false
769    }
770
771    /// Whether this function preserves uniqueness
772    fn preserves_uniqueness(&self) -> bool {
773        false
774    }
775
776    fn inverse(&self) -> Option<crate::UnaryFunc> {
777        to_unary!(super::CastInt2VectorToString)
778    }
779
780    fn is_monotone(&self) -> bool {
781        false
782    }
783}
784
785impl fmt::Display for CastStringToInt2Vector {
786    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
787        f.write_str("strtoint2vector")
788    }
789}
790
791#[sqlfunc(
792    sqlname = "text_to_jsonb",
793    preserves_uniqueness = false,
794    inverse = to_unary!(super::CastJsonbToString)
795)]
796// TODO(jamii): it would be much more efficient to skip the intermediate repr::jsonb::Jsonb.
797fn cast_string_to_jsonb<'a>(a: &'a str) -> Result<Jsonb, EvalError> {
798    Ok(strconv::parse_jsonb(a)?)
799}
800
801#[sqlfunc(sqlname = "btrim")]
802fn trim_whitespace<'a>(a: &'a str) -> &'a str {
803    a.trim_matches(' ')
804}
805
806#[sqlfunc(sqlname = "ltrim")]
807fn trim_leading_whitespace<'a>(a: &'a str) -> &'a str {
808    a.trim_start_matches(' ')
809}
810
811#[sqlfunc(sqlname = "rtrim")]
812fn trim_trailing_whitespace<'a>(a: &'a str) -> &'a str {
813    a.trim_end_matches(' ')
814}
815
816#[sqlfunc(sqlname = "initcap")]
817fn initcap<'a>(a: &'a str) -> String {
818    let mut out = String::new();
819    let mut capitalize_next = true;
820    for ch in a.chars() {
821        if capitalize_next {
822            out.extend(ch.to_uppercase())
823        } else {
824            out.extend(ch.to_lowercase())
825        };
826        capitalize_next = !ch.is_alphanumeric();
827    }
828    out
829}
830
831#[sqlfunc(sqlname = "ascii")]
832fn ascii<'a>(a: &'a str) -> i32 {
833    a.chars()
834        .next()
835        .and_then(|c| i32::try_from(u32::from(c)).ok())
836        .unwrap_or(0)
837}
838
839#[sqlfunc(sqlname = "char_length")]
840fn char_length<'a>(a: &'a str) -> Result<i32, EvalError> {
841    let length = a.chars().count();
842    i32::try_from(length).or_else(|_| Err(EvalError::Int32OutOfRange(length.to_string().into())))
843}
844
845#[sqlfunc(sqlname = "bit_length")]
846fn bit_length_string<'a>(a: &'a str) -> Result<i32, EvalError> {
847    let length = a.as_bytes().len() * 8;
848    i32::try_from(length).or_else(|_| Err(EvalError::Int32OutOfRange(length.to_string().into())))
849}
850
851#[sqlfunc(sqlname = "octet_length")]
852fn byte_length_string<'a>(a: &'a str) -> Result<i32, EvalError> {
853    let length = a.as_bytes().len();
854    i32::try_from(length).or_else(|_| Err(EvalError::Int32OutOfRange(length.to_string().into())))
855}
856
857#[sqlfunc]
858fn upper<'a>(a: &'a str) -> String {
859    a.to_uppercase()
860}
861
862#[sqlfunc]
863fn lower<'a>(a: &'a str) -> String {
864    a.to_lowercase()
865}
866
867pub fn normalize_with_form<'a>(
868    text: Datum<'a>,
869    form_str: Datum<'a>,
870    temp_storage: &'a RowArena,
871) -> Result<Datum<'a>, EvalError> {
872    use unicode_normalization::UnicodeNormalization;
873
874    let text = text.unwrap_str();
875    let form_str = form_str.unwrap_str();
876
877    let normalized = match form_str.to_uppercase().as_str() {
878        "NFC" => text.nfc().collect(),
879        "NFD" => text.nfd().collect(),
880        "NFKC" => text.nfkc().collect(),
881        "NFKD" => text.nfkd().collect(),
882        _ => {
883            return Err(EvalError::InvalidParameterValue(
884                format!("invalid normalization form: {}", form_str).into(),
885            ));
886        }
887    };
888
889    Ok(Datum::String(temp_storage.push_string(normalized)))
890}
891
892#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
893pub struct IsLikeMatch(pub like_pattern::Matcher);
894
895impl<'a> EagerUnaryFunc<'a> for IsLikeMatch {
896    type Input = &'a str;
897    type Output = bool;
898
899    fn call(&self, haystack: &'a str) -> bool {
900        self.0.is_match(haystack)
901    }
902
903    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
904        SqlScalarType::Bool.nullable(input.nullable)
905    }
906}
907
908impl fmt::Display for IsLikeMatch {
909    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
910        write!(
911            f,
912            "{}like[{}]",
913            if self.0.case_insensitive { "i" } else { "" },
914            self.0.pattern.escaped()
915        )
916    }
917}
918
919#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
920pub struct IsRegexpMatch(pub Regex);
921
922impl<'a> EagerUnaryFunc<'a> for IsRegexpMatch {
923    type Input = &'a str;
924    type Output = bool;
925
926    fn call(&self, haystack: &'a str) -> bool {
927        self.0.is_match(haystack)
928    }
929
930    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
931        SqlScalarType::Bool.nullable(input.nullable)
932    }
933}
934
935impl fmt::Display for IsRegexpMatch {
936    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
937        write!(
938            f,
939            "is_regexp_match[{}, case_insensitive={}]",
940            self.0.pattern().escaped(),
941            self.0.case_insensitive
942        )
943    }
944}
945
946#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
947pub struct RegexpMatch(pub Regex);
948
949impl LazyUnaryFunc for RegexpMatch {
950    fn eval<'a>(
951        &'a self,
952        datums: &[Datum<'a>],
953        temp_storage: &'a RowArena,
954        a: &'a MirScalarExpr,
955    ) -> Result<Datum<'a>, EvalError> {
956        let haystack = a.eval(datums, temp_storage)?;
957        if haystack.is_null() {
958            return Ok(Datum::Null);
959        }
960        regexp_match_static(haystack, temp_storage, &self.0)
961    }
962
963    /// The output SqlColumnType of this function
964    fn output_type(&self, _input_type: SqlColumnType) -> SqlColumnType {
965        SqlScalarType::Array(Box::new(SqlScalarType::String)).nullable(true)
966    }
967
968    /// Whether this function will produce NULL on NULL input
969    fn propagates_nulls(&self) -> bool {
970        true
971    }
972
973    /// Whether this function will produce NULL on non-NULL input
974    fn introduces_nulls(&self) -> bool {
975        // Returns null if the regex did not match
976        true
977    }
978
979    /// Whether this function preserves uniqueness
980    fn preserves_uniqueness(&self) -> bool {
981        false
982    }
983
984    fn inverse(&self) -> Option<crate::UnaryFunc> {
985        None
986    }
987
988    fn is_monotone(&self) -> bool {
989        false
990    }
991}
992
993impl fmt::Display for RegexpMatch {
994    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
995        write!(
996            f,
997            "regexp_match[{}, case_insensitive={}]",
998            self.0.pattern().escaped(),
999            self.0.case_insensitive
1000        )
1001    }
1002}
1003
1004#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
1005pub struct RegexpSplitToArray(pub Regex);
1006
1007impl LazyUnaryFunc for RegexpSplitToArray {
1008    fn eval<'a>(
1009        &'a self,
1010        datums: &[Datum<'a>],
1011        temp_storage: &'a RowArena,
1012        a: &'a MirScalarExpr,
1013    ) -> Result<Datum<'a>, EvalError> {
1014        let haystack = a.eval(datums, temp_storage)?;
1015        if haystack.is_null() {
1016            return Ok(Datum::Null);
1017        }
1018        regexp_split_to_array_re(haystack.unwrap_str(), &self.0, temp_storage)
1019    }
1020
1021    /// The output SqlColumnType of this function
1022    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
1023        SqlScalarType::Array(Box::new(SqlScalarType::String)).nullable(input_type.nullable)
1024    }
1025
1026    /// Whether this function will produce NULL on NULL input
1027    fn propagates_nulls(&self) -> bool {
1028        true
1029    }
1030
1031    /// Whether this function will produce NULL on non-NULL input
1032    fn introduces_nulls(&self) -> bool {
1033        false
1034    }
1035
1036    /// Whether this function preserves uniqueness
1037    fn preserves_uniqueness(&self) -> bool {
1038        false
1039    }
1040
1041    fn inverse(&self) -> Option<crate::UnaryFunc> {
1042        None
1043    }
1044
1045    fn is_monotone(&self) -> bool {
1046        false
1047    }
1048}
1049
1050impl fmt::Display for RegexpSplitToArray {
1051    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1052        write!(
1053            f,
1054            "regexp_split_to_array[{}, case_insensitive={}]",
1055            self.0.pattern().escaped(),
1056            self.0.case_insensitive
1057        )
1058    }
1059}
1060
1061#[sqlfunc(sqlname = "mz_panic")]
1062fn panic<'a>(a: &'a str) -> String {
1063    print!("{}", a);
1064    panic!("{}", a)
1065}
1066
1067#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
1068pub struct QuoteIdent;
1069
1070impl LazyUnaryFunc for QuoteIdent {
1071    fn eval<'a>(
1072        &'a self,
1073        datums: &[Datum<'a>],
1074        temp_storage: &'a RowArena,
1075        a: &'a MirScalarExpr,
1076    ) -> Result<Datum<'a>, EvalError> {
1077        let d = a.eval(datums, temp_storage)?;
1078        if d.is_null() {
1079            return Ok(Datum::Null);
1080        }
1081        let v = d.unwrap_str();
1082        let i = mz_sql_parser::ast::Ident::new(v).map_err(|err| EvalError::InvalidIdentifier {
1083            ident: v.into(),
1084            detail: Some(err.to_string().into()),
1085        })?;
1086        let r = temp_storage.push_string(i.to_string());
1087
1088        Ok(Datum::String(r))
1089    }
1090
1091    /// The output SqlColumnType of this function
1092    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
1093        SqlScalarType::String.nullable(input_type.nullable)
1094    }
1095
1096    /// Whether this function will produce NULL on NULL input
1097    fn propagates_nulls(&self) -> bool {
1098        true
1099    }
1100
1101    /// Whether this function will produce NULL on non-NULL input
1102    fn introduces_nulls(&self) -> bool {
1103        false
1104    }
1105
1106    /// Whether this function preserves uniqueness
1107    fn preserves_uniqueness(&self) -> bool {
1108        true
1109    }
1110
1111    fn inverse(&self) -> Option<crate::UnaryFunc> {
1112        None
1113    }
1114
1115    fn is_monotone(&self) -> bool {
1116        false
1117    }
1118}
1119
1120impl fmt::Display for QuoteIdent {
1121    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1122        write!(f, "quote_ident")
1123    }
1124}