Skip to main content

mz_expr/scalar/func/impls/
string.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::borrow::Cow;
11use std::fmt;
12use std::sync::LazyLock;
13
14use chrono::{DateTime, NaiveDateTime, NaiveTime, Utc};
15use mz_expr_derive::sqlfunc;
16use mz_lowertest::MzReflect;
17use mz_ore::cast::CastFrom;
18use mz_ore::result::ResultExt;
19use mz_ore::str::StrExt;
20use mz_repr::adt::char::{Char, format_str_trim};
21use mz_repr::adt::date::Date;
22use mz_repr::adt::interval::Interval;
23use mz_repr::adt::jsonb::Jsonb;
24use mz_repr::adt::numeric::{self, Numeric, NumericMaxScale};
25use mz_repr::adt::pg_legacy_name::PgLegacyName;
26use mz_repr::adt::regex::Regex;
27use mz_repr::adt::system::{Oid, PgLegacyChar};
28use mz_repr::adt::timestamp::{CheckedTimestamp, TimestampPrecision};
29use mz_repr::adt::varchar::{VarChar, VarCharMaxLength};
30use mz_repr::{Datum, RowArena, SqlColumnType, SqlScalarType, strconv};
31use serde::{Deserialize, Serialize};
32use uuid::Uuid;
33
34use crate::func::{binary, regexp_match_static};
35use crate::scalar::func::{
36    EagerUnaryFunc, LazyUnaryFunc, array_create_scalar, regexp_split_to_array_re,
37};
38use crate::{EvalError, MirScalarExpr, UnaryFunc, like_pattern};
39
40#[sqlfunc(
41    sqlname = "text_to_boolean",
42    preserves_uniqueness = false,
43    inverse = to_unary!(super::CastBoolToString)
44)]
45fn cast_string_to_bool<'a>(a: &'a str) -> Result<bool, EvalError> {
46    strconv::parse_bool(a).err_into()
47}
48
49#[sqlfunc(
50    sqlname = "text_to_\"char\"",
51    preserves_uniqueness = true,
52    inverse = to_unary!(super::CastPgLegacyCharToString)
53)]
54fn cast_string_to_pg_legacy_char<'a>(a: &'a str) -> PgLegacyChar {
55    PgLegacyChar(a.as_bytes().get(0).copied().unwrap_or(0))
56}
57
58#[sqlfunc(sqlname = "text_to_name", preserves_uniqueness = true)]
59fn cast_string_to_pg_legacy_name<'a>(a: &'a str) -> PgLegacyName<String> {
60    PgLegacyName(strconv::parse_pg_legacy_name(a))
61}
62
63#[sqlfunc(
64    sqlname = "text_to_bytea",
65    preserves_uniqueness = true,
66    inverse = to_unary!(super::CastBytesToString)
67)]
68fn cast_string_to_bytes<'a>(a: &'a str) -> Result<Vec<u8>, EvalError> {
69    strconv::parse_bytes(a).err_into()
70}
71
72#[sqlfunc(
73    sqlname = "text_to_smallint",
74    preserves_uniqueness = false,
75    inverse = to_unary!(super::CastInt16ToString)
76)]
77fn cast_string_to_int16<'a>(a: &'a str) -> Result<i16, EvalError> {
78    strconv::parse_int16(a).err_into()
79}
80
81#[sqlfunc(
82    sqlname = "text_to_integer",
83    preserves_uniqueness = false,
84    inverse = to_unary!(super::CastInt32ToString)
85)]
86fn cast_string_to_int32<'a>(a: &'a str) -> Result<i32, EvalError> {
87    strconv::parse_int32(a).err_into()
88}
89
90#[sqlfunc(
91    sqlname = "text_to_bigint",
92    preserves_uniqueness = false,
93    inverse = to_unary!(super::CastInt64ToString)
94)]
95fn cast_string_to_int64<'a>(a: &'a str) -> Result<i64, EvalError> {
96    strconv::parse_int64(a).err_into()
97}
98
99#[sqlfunc(
100    sqlname = "text_to_real",
101    preserves_uniqueness = false,
102    inverse = to_unary!(super::CastFloat32ToString)
103)]
104fn cast_string_to_float32<'a>(a: &'a str) -> Result<f32, EvalError> {
105    strconv::parse_float32(a).err_into()
106}
107
108#[sqlfunc(
109    sqlname = "text_to_double",
110    preserves_uniqueness = false,
111    inverse = to_unary!(super::CastFloat64ToString)
112)]
113fn cast_string_to_float64<'a>(a: &'a str) -> Result<f64, EvalError> {
114    strconv::parse_float64(a).err_into()
115}
116
117#[sqlfunc(
118    sqlname = "text_to_oid",
119    preserves_uniqueness = false,
120    inverse = to_unary!(super::CastOidToString)
121)]
122fn cast_string_to_oid<'a>(a: &'a str) -> Result<Oid, EvalError> {
123    Ok(Oid(strconv::parse_oid(a)?))
124}
125
126#[sqlfunc(
127    sqlname = "text_to_uint2",
128    preserves_uniqueness = false,
129    inverse = to_unary!(super::CastUint16ToString)
130)]
131fn cast_string_to_uint16(a: &str) -> Result<u16, EvalError> {
132    strconv::parse_uint16(a).err_into()
133}
134
135#[sqlfunc(
136    sqlname = "text_to_uint4",
137    preserves_uniqueness = false,
138    inverse = to_unary!(super::CastUint32ToString)
139)]
140fn cast_string_to_uint32(a: &str) -> Result<u32, EvalError> {
141    strconv::parse_uint32(a).err_into()
142}
143
144#[sqlfunc(
145    sqlname = "text_to_uint8",
146    preserves_uniqueness = false,
147    inverse = to_unary!(super::CastUint64ToString)
148)]
149fn cast_string_to_uint64(a: &str) -> Result<u64, EvalError> {
150    strconv::parse_uint64(a).err_into()
151}
152
153#[sqlfunc(sqlname = "reverse")]
154fn reverse<'a>(a: &'a str) -> String {
155    a.chars().rev().collect()
156}
157
158#[derive(
159    Ord,
160    PartialOrd,
161    Clone,
162    Debug,
163    Eq,
164    PartialEq,
165    Serialize,
166    Deserialize,
167    Hash,
168    MzReflect
169)]
170pub struct CastStringToNumeric(pub Option<NumericMaxScale>);
171
172impl EagerUnaryFunc for CastStringToNumeric {
173    type Input<'a> = &'a str;
174    type Output<'a> = Result<Numeric, EvalError>;
175
176    fn call<'a>(&self, a: Self::Input<'a>) -> Self::Output<'a> {
177        let mut d = strconv::parse_numeric(a)?;
178        if let Some(scale) = self.0 {
179            if numeric::rescale(&mut d.0, scale.into_u8()).is_err() {
180                return Err(EvalError::NumericFieldOverflow);
181            }
182        }
183        Ok(d.into_inner())
184    }
185
186    fn output_sql_type(&self, input: SqlColumnType) -> SqlColumnType {
187        SqlScalarType::Numeric { max_scale: self.0 }.nullable(input.nullable)
188    }
189
190    fn inverse(&self) -> Option<crate::UnaryFunc> {
191        to_unary!(super::CastNumericToString)
192    }
193}
194
195impl fmt::Display for CastStringToNumeric {
196    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
197        f.write_str("text_to_numeric")
198    }
199}
200
201#[sqlfunc(
202    sqlname = "text_to_date",
203    preserves_uniqueness = false,
204    inverse = to_unary!(super::CastDateToString)
205)]
206fn cast_string_to_date<'a>(a: &'a str) -> Result<Date, EvalError> {
207    strconv::parse_date(a).err_into()
208}
209
210#[sqlfunc(
211    sqlname = "text_to_time",
212    preserves_uniqueness = false,
213    inverse = to_unary!(super::CastTimeToString)
214)]
215fn cast_string_to_time<'a>(a: &'a str) -> Result<NaiveTime, EvalError> {
216    strconv::parse_time(a).err_into()
217}
218
219#[derive(
220    Ord,
221    PartialOrd,
222    Clone,
223    Debug,
224    Eq,
225    PartialEq,
226    Serialize,
227    Deserialize,
228    Hash,
229    MzReflect
230)]
231pub struct CastStringToTimestamp(pub Option<TimestampPrecision>);
232
233impl EagerUnaryFunc for CastStringToTimestamp {
234    type Input<'a> = &'a str;
235    type Output<'a> = Result<CheckedTimestamp<NaiveDateTime>, EvalError>;
236
237    fn call<'a>(&self, a: Self::Input<'a>) -> Self::Output<'a> {
238        let out = strconv::parse_timestamp(a)?;
239        let updated = out.round_to_precision(self.0)?;
240        Ok(updated)
241    }
242
243    fn output_sql_type(&self, input: SqlColumnType) -> SqlColumnType {
244        SqlScalarType::Timestamp { precision: self.0 }.nullable(input.nullable)
245    }
246
247    fn inverse(&self) -> Option<crate::UnaryFunc> {
248        to_unary!(super::CastTimestampToString)
249    }
250}
251
252impl fmt::Display for CastStringToTimestamp {
253    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
254        f.write_str("text_to_timestamp")
255    }
256}
257
258#[sqlfunc(sqlname = "try_parse_monotonic_iso8601_timestamp")]
259// TODO: Pretty sure this preserves uniqueness, but not 100%.
260//
261// Ironically, even though this has "monotonic" in the name, it's not quite
262// eligible for `#[is_monotone = true]` because any input could also be
263// mapped to null. So, handle it via SpecialUnary in the interpreter.
264fn try_parse_monotonic_iso8601_timestamp<'a>(
265    a: &'a str,
266) -> Option<CheckedTimestamp<NaiveDateTime>> {
267    let ts = mz_persist_types::timestamp::try_parse_monotonic_iso8601_timestamp(a)?;
268    let ts = CheckedTimestamp::from_timestamplike(ts)
269        .expect("monotonic_iso8601 range is a subset of CheckedTimestamp domain");
270    Some(ts)
271}
272
273#[derive(
274    Ord,
275    PartialOrd,
276    Clone,
277    Debug,
278    Eq,
279    PartialEq,
280    Serialize,
281    Deserialize,
282    Hash,
283    MzReflect
284)]
285pub struct CastStringToTimestampTz(pub Option<TimestampPrecision>);
286
287impl EagerUnaryFunc for CastStringToTimestampTz {
288    type Input<'a> = &'a str;
289    type Output<'a> = Result<CheckedTimestamp<DateTime<Utc>>, EvalError>;
290
291    fn call<'a>(&self, a: Self::Input<'a>) -> Self::Output<'a> {
292        let out = strconv::parse_timestamptz(a)?;
293        let updated = out.round_to_precision(self.0)?;
294        Ok(updated)
295    }
296
297    fn output_sql_type(&self, input: SqlColumnType) -> SqlColumnType {
298        SqlScalarType::TimestampTz { precision: self.0 }.nullable(input.nullable)
299    }
300
301    fn inverse(&self) -> Option<crate::UnaryFunc> {
302        to_unary!(super::CastTimestampTzToString)
303    }
304}
305
306impl fmt::Display for CastStringToTimestampTz {
307    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
308        f.write_str("text_to_timestamp_with_time_zone")
309    }
310}
311
312#[sqlfunc(
313    sqlname = "text_to_interval",
314    preserves_uniqueness = false,
315    inverse = to_unary!(super::CastIntervalToString)
316)]
317fn cast_string_to_interval<'a>(a: &'a str) -> Result<Interval, EvalError> {
318    strconv::parse_interval(a).err_into()
319}
320
321#[sqlfunc(
322    sqlname = "text_to_uuid",
323    preserves_uniqueness = false,
324    inverse = to_unary!(super::CastUuidToString)
325)]
326fn cast_string_to_uuid<'a>(a: &'a str) -> Result<Uuid, EvalError> {
327    strconv::parse_uuid(a).err_into()
328}
329
330#[derive(
331    Ord,
332    PartialOrd,
333    Clone,
334    Debug,
335    Eq,
336    PartialEq,
337    Serialize,
338    Deserialize,
339    Hash,
340    MzReflect
341)]
342pub struct CastStringToArray {
343    // Target array's type.
344    pub return_ty: SqlScalarType,
345    // The expression to cast the discovered array elements to the array's
346    // element type.
347    pub cast_expr: Box<MirScalarExpr>,
348}
349
350impl LazyUnaryFunc for CastStringToArray {
351    fn eval<'a>(
352        &'a self,
353        datums: &[Datum<'a>],
354        temp_storage: &'a RowArena,
355        a: &'a MirScalarExpr,
356    ) -> Result<Datum<'a>, EvalError> {
357        let a = a.eval(datums, temp_storage)?;
358        if a.is_null() {
359            return Ok(Datum::Null);
360        }
361        let (datums, dims) = strconv::parse_array(
362            a.unwrap_str(),
363            || Datum::Null,
364            |elem_text| {
365                let elem_text = match elem_text {
366                    Cow::Owned(s) => temp_storage.push_string(s),
367                    Cow::Borrowed(s) => s,
368                };
369                self.cast_expr
370                    .eval(&[Datum::String(elem_text)], temp_storage)
371            },
372        )?;
373
374        Ok(temp_storage.try_make_datum(|packer| packer.try_push_array(&dims, datums))?)
375    }
376
377    /// The output SqlColumnType of this function
378    fn output_sql_type(&self, input_type: SqlColumnType) -> SqlColumnType {
379        self.return_ty.clone().nullable(input_type.nullable)
380    }
381
382    /// Whether this function will produce NULL on NULL input
383    fn propagates_nulls(&self) -> bool {
384        true
385    }
386
387    /// Whether this function will produce NULL on non-NULL input
388    fn introduces_nulls(&self) -> bool {
389        false
390    }
391
392    /// Whether this function preserves uniqueness
393    fn preserves_uniqueness(&self) -> bool {
394        false
395    }
396
397    fn inverse(&self) -> Option<crate::UnaryFunc> {
398        to_unary!(super::CastArrayToString {
399            ty: self.return_ty.clone(),
400        })
401    }
402
403    fn is_monotone(&self) -> bool {
404        false
405    }
406
407    fn is_eliminable_cast(&self) -> bool {
408        false
409    }
410}
411
412impl fmt::Display for CastStringToArray {
413    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
414        f.write_str("strtoarray")
415    }
416}
417
418#[derive(
419    Ord,
420    PartialOrd,
421    Clone,
422    Debug,
423    Eq,
424    PartialEq,
425    Serialize,
426    Deserialize,
427    Hash,
428    MzReflect
429)]
430pub struct CastStringToList {
431    // Target list's type
432    pub return_ty: SqlScalarType,
433    // The expression to cast the discovered list elements to the list's
434    // element type.
435    pub cast_expr: Box<MirScalarExpr>,
436}
437
438impl LazyUnaryFunc for CastStringToList {
439    fn eval<'a>(
440        &'a self,
441        datums: &[Datum<'a>],
442        temp_storage: &'a RowArena,
443        a: &'a MirScalarExpr,
444    ) -> Result<Datum<'a>, EvalError> {
445        let a = a.eval(datums, temp_storage)?;
446        if a.is_null() {
447            return Ok(Datum::Null);
448        }
449        let parsed_datums = strconv::parse_list(
450            a.unwrap_str(),
451            matches!(
452                self.return_ty.unwrap_list_element_type(),
453                SqlScalarType::List { .. }
454            ),
455            || Datum::Null,
456            |elem_text| {
457                let elem_text = match elem_text {
458                    Cow::Owned(s) => temp_storage.push_string(s),
459                    Cow::Borrowed(s) => s,
460                };
461                self.cast_expr
462                    .eval(&[Datum::String(elem_text)], temp_storage)
463            },
464        )?;
465
466        Ok(temp_storage.make_datum(|packer| packer.push_list(parsed_datums)))
467    }
468
469    /// The output SqlColumnType of this function
470    fn output_sql_type(&self, input_type: SqlColumnType) -> SqlColumnType {
471        self.return_ty
472            .without_modifiers()
473            .nullable(input_type.nullable)
474    }
475
476    /// Whether this function will produce NULL on NULL input
477    fn propagates_nulls(&self) -> bool {
478        true
479    }
480
481    /// Whether this function will produce NULL on non-NULL input
482    fn introduces_nulls(&self) -> bool {
483        false
484    }
485
486    /// Whether this function preserves uniqueness
487    fn preserves_uniqueness(&self) -> bool {
488        false
489    }
490
491    fn inverse(&self) -> Option<crate::UnaryFunc> {
492        to_unary!(super::CastListToString {
493            ty: self.return_ty.clone(),
494        })
495    }
496
497    fn is_monotone(&self) -> bool {
498        false
499    }
500
501    fn is_eliminable_cast(&self) -> bool {
502        false
503    }
504}
505
506impl fmt::Display for CastStringToList {
507    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
508        f.write_str("strtolist")
509    }
510}
511
512#[derive(
513    Ord,
514    PartialOrd,
515    Clone,
516    Debug,
517    Eq,
518    PartialEq,
519    Serialize,
520    Deserialize,
521    Hash,
522    MzReflect
523)]
524pub struct CastStringToMap {
525    // Target map's value type
526    pub return_ty: SqlScalarType,
527    // The expression used to cast the discovered values to the map's value
528    // type.
529    pub cast_expr: Box<MirScalarExpr>,
530}
531
532impl LazyUnaryFunc for CastStringToMap {
533    fn eval<'a>(
534        &'a self,
535        datums: &[Datum<'a>],
536        temp_storage: &'a RowArena,
537        a: &'a MirScalarExpr,
538    ) -> Result<Datum<'a>, EvalError> {
539        let a = a.eval(datums, temp_storage)?;
540        if a.is_null() {
541            return Ok(Datum::Null);
542        }
543        let parsed_map = strconv::parse_map(
544            a.unwrap_str(),
545            matches!(
546                self.return_ty.unwrap_map_value_type(),
547                SqlScalarType::Map { .. }
548            ),
549            |value_text| -> Result<Datum, EvalError> {
550                let value_text = match value_text {
551                    Some(Cow::Owned(s)) => Datum::String(temp_storage.push_string(s)),
552                    Some(Cow::Borrowed(s)) => Datum::String(s),
553                    None => Datum::Null,
554                };
555                self.cast_expr.eval(&[value_text], temp_storage)
556            },
557        )?;
558        let mut pairs: Vec<(String, Datum)> = parsed_map.into_iter().map(|(k, v)| (k, v)).collect();
559        pairs.sort_by(|(k1, _v1), (k2, _v2)| k1.cmp(k2));
560        pairs.dedup_by(|(k1, _v1), (k2, _v2)| k1 == k2);
561        Ok(temp_storage.make_datum(|packer| {
562            packer.push_dict_with(|packer| {
563                for (k, v) in pairs {
564                    packer.push(Datum::String(&k));
565                    packer.push(v);
566                }
567            })
568        }))
569    }
570
571    /// The output SqlColumnType of this function
572    fn output_sql_type(&self, input_type: SqlColumnType) -> SqlColumnType {
573        self.return_ty.clone().nullable(input_type.nullable)
574    }
575
576    /// Whether this function will produce NULL on NULL input
577    fn propagates_nulls(&self) -> bool {
578        true
579    }
580
581    /// Whether this function will produce NULL on non-NULL input
582    fn introduces_nulls(&self) -> bool {
583        false
584    }
585
586    /// Whether this function preserves uniqueness
587    fn preserves_uniqueness(&self) -> bool {
588        false
589    }
590
591    fn inverse(&self) -> Option<crate::UnaryFunc> {
592        to_unary!(super::CastMapToString {
593            ty: self.return_ty.clone(),
594        })
595    }
596
597    fn is_monotone(&self) -> bool {
598        false
599    }
600
601    fn is_eliminable_cast(&self) -> bool {
602        false
603    }
604}
605
606impl fmt::Display for CastStringToMap {
607    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
608        f.write_str("strtomap")
609    }
610}
611
612#[derive(
613    Ord,
614    PartialOrd,
615    Clone,
616    Debug,
617    Eq,
618    PartialEq,
619    Serialize,
620    Deserialize,
621    Hash,
622    MzReflect
623)]
624pub struct CastStringToChar {
625    pub length: Option<mz_repr::adt::char::CharLength>,
626    pub fail_on_len: bool,
627}
628
629impl EagerUnaryFunc for CastStringToChar {
630    type Input<'a> = &'a str;
631    type Output<'a> = Result<Char<String>, EvalError>;
632
633    fn call<'a>(&self, a: Self::Input<'a>) -> Self::Output<'a> {
634        let s = format_str_trim(a, self.length, self.fail_on_len).map_err(|_| {
635            assert!(self.fail_on_len);
636            EvalError::StringValueTooLong {
637                target_type: "character".into(),
638                length: usize::cast_from(self.length.unwrap().into_u32()),
639            }
640        })?;
641
642        Ok(Char(s))
643    }
644
645    fn output_sql_type(&self, input: SqlColumnType) -> SqlColumnType {
646        SqlScalarType::Char {
647            length: self.length,
648        }
649        .nullable(input.nullable)
650    }
651
652    fn could_error(&self) -> bool {
653        self.fail_on_len && self.length.is_some()
654    }
655
656    fn inverse(&self) -> Option<crate::UnaryFunc> {
657        to_unary!(super::CastCharToString)
658    }
659
660    fn is_eliminable_cast(&self) -> bool {
661        // even when `length` is `None`, we'll trim whitespace at the end
662        false
663    }
664}
665
666impl fmt::Display for CastStringToChar {
667    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
668        match self.length {
669            Some(length) => {
670                write!(
671                    f,
672                    "text_to_char[len={}, fail_on_len={}]",
673                    length.into_u32(),
674                    self.fail_on_len
675                )
676            }
677            None => f.write_str("text_to_char[len=unbounded]"),
678        }
679    }
680}
681
682#[derive(
683    Ord,
684    PartialOrd,
685    Clone,
686    Debug,
687    Eq,
688    PartialEq,
689    Serialize,
690    Deserialize,
691    Hash,
692    MzReflect
693)]
694pub struct CastStringToRange {
695    // Target range's type
696    pub return_ty: SqlScalarType,
697    // The expression to cast the discovered range elements to the range's
698    // element type.
699    pub cast_expr: Box<MirScalarExpr>,
700}
701
702impl LazyUnaryFunc for CastStringToRange {
703    fn eval<'a>(
704        &'a self,
705        datums: &[Datum<'a>],
706        temp_storage: &'a RowArena,
707        a: &'a MirScalarExpr,
708    ) -> Result<Datum<'a>, EvalError> {
709        let a = a.eval(datums, temp_storage)?;
710        if a.is_null() {
711            return Ok(Datum::Null);
712        }
713        let mut range = strconv::parse_range(a.unwrap_str(), |elem_text| {
714            let elem_text = match elem_text {
715                Cow::Owned(s) => temp_storage.push_string(s),
716                Cow::Borrowed(s) => s,
717            };
718            self.cast_expr
719                .eval(&[Datum::String(elem_text)], temp_storage)
720        })?;
721
722        range.canonicalize()?;
723
724        Ok(temp_storage.make_datum(|packer| {
725            packer
726                .push_range(range)
727                .expect("must have already handled errors")
728        }))
729    }
730
731    /// The output SqlColumnType of this function
732    fn output_sql_type(&self, input_type: SqlColumnType) -> SqlColumnType {
733        self.return_ty
734            .without_modifiers()
735            .nullable(input_type.nullable)
736    }
737
738    /// Whether this function will produce NULL on NULL input
739    fn propagates_nulls(&self) -> bool {
740        true
741    }
742
743    /// Whether this function will produce NULL on non-NULL input
744    fn introduces_nulls(&self) -> bool {
745        false
746    }
747
748    /// Whether this function preserves uniqueness
749    fn preserves_uniqueness(&self) -> bool {
750        false
751    }
752
753    fn inverse(&self) -> Option<crate::UnaryFunc> {
754        to_unary!(super::CastRangeToString {
755            ty: self.return_ty.clone(),
756        })
757    }
758
759    fn is_monotone(&self) -> bool {
760        false
761    }
762
763    fn is_eliminable_cast(&self) -> bool {
764        false
765    }
766}
767
768impl fmt::Display for CastStringToRange {
769    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
770        f.write_str("strtorange")
771    }
772}
773
774#[derive(
775    Ord,
776    PartialOrd,
777    Clone,
778    Debug,
779    Eq,
780    PartialEq,
781    Serialize,
782    Deserialize,
783    Hash,
784    MzReflect
785)]
786pub struct CastStringToVarChar {
787    pub length: Option<VarCharMaxLength>,
788    pub fail_on_len: bool,
789}
790
791impl EagerUnaryFunc for CastStringToVarChar {
792    type Input<'a> = &'a str;
793    type Output<'a> = Result<VarChar<&'a str>, EvalError>;
794
795    fn call<'a>(&self, a: Self::Input<'a>) -> Self::Output<'a> {
796        let s =
797            mz_repr::adt::varchar::format_str(a, self.length, self.fail_on_len).map_err(|_| {
798                assert!(self.fail_on_len);
799                EvalError::StringValueTooLong {
800                    target_type: "character varying".into(),
801                    length: usize::cast_from(self.length.unwrap().into_u32()),
802                }
803            })?;
804
805        Ok(VarChar(s))
806    }
807
808    fn output_sql_type(&self, input: SqlColumnType) -> SqlColumnType {
809        SqlScalarType::VarChar {
810            max_length: self.length,
811        }
812        .nullable(input.nullable)
813    }
814
815    fn could_error(&self) -> bool {
816        self.fail_on_len && self.length.is_some()
817    }
818
819    fn preserves_uniqueness(&self) -> bool {
820        !self.fail_on_len || self.length.is_none()
821    }
822
823    fn inverse(&self) -> Option<crate::UnaryFunc> {
824        to_unary!(super::CastVarCharToString)
825    }
826
827    fn is_eliminable_cast(&self) -> bool {
828        self.length.is_none()
829    }
830}
831
832impl fmt::Display for CastStringToVarChar {
833    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
834        match self.length {
835            Some(length) => {
836                write!(
837                    f,
838                    "text_to_varchar[len={}, fail_on_len={}]",
839                    length.into_u32(),
840                    self.fail_on_len
841                )
842            }
843            None => f.write_str("text_to_varchar[len=unbounded]"),
844        }
845    }
846}
847
848// If we support another vector type, this should likely get hoisted into a
849// position akin to array parsing.
850static INT2VECTOR_CAST_EXPR: LazyLock<MirScalarExpr> = LazyLock::new(|| MirScalarExpr::CallUnary {
851    func: UnaryFunc::CastStringToInt16(CastStringToInt16),
852    expr: Box::new(MirScalarExpr::column(0)),
853});
854
855#[derive(
856    Ord,
857    PartialOrd,
858    Clone,
859    Debug,
860    Eq,
861    PartialEq,
862    Serialize,
863    Deserialize,
864    Hash,
865    MzReflect
866)]
867pub struct CastStringToInt2Vector;
868
869impl LazyUnaryFunc for CastStringToInt2Vector {
870    fn eval<'a>(
871        &'a self,
872        datums: &[Datum<'a>],
873        temp_storage: &'a RowArena,
874        a: &'a MirScalarExpr,
875    ) -> Result<Datum<'a>, EvalError> {
876        let a = a.eval(datums, temp_storage)?;
877        if a.is_null() {
878            return Ok(Datum::Null);
879        }
880
881        let datums = strconv::parse_legacy_vector(a.unwrap_str(), |elem_text| {
882            let elem_text = match elem_text {
883                Cow::Owned(s) => temp_storage.push_string(s),
884                Cow::Borrowed(s) => s,
885            };
886            INT2VECTOR_CAST_EXPR.eval(&[Datum::String(elem_text)], temp_storage)
887        })?;
888        array_create_scalar(&datums, temp_storage)
889    }
890
891    /// The output SqlColumnType of this function
892    fn output_sql_type(&self, input_type: SqlColumnType) -> SqlColumnType {
893        SqlScalarType::Int2Vector.nullable(input_type.nullable)
894    }
895
896    /// Whether this function will produce NULL on NULL input
897    fn propagates_nulls(&self) -> bool {
898        true
899    }
900
901    /// Whether this function will produce NULL on non-NULL input
902    fn introduces_nulls(&self) -> bool {
903        false
904    }
905
906    /// Whether this function preserves uniqueness
907    fn preserves_uniqueness(&self) -> bool {
908        false
909    }
910
911    fn inverse(&self) -> Option<crate::UnaryFunc> {
912        to_unary!(super::CastInt2VectorToString)
913    }
914
915    fn is_monotone(&self) -> bool {
916        false
917    }
918
919    fn is_eliminable_cast(&self) -> bool {
920        false
921    }
922}
923
924impl fmt::Display for CastStringToInt2Vector {
925    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
926        f.write_str("strtoint2vector")
927    }
928}
929
930#[sqlfunc(
931    sqlname = "text_to_jsonb",
932    preserves_uniqueness = false,
933    inverse = to_unary!(super::CastJsonbToString)
934)]
935// TODO(jamii): it would be much more efficient to skip the intermediate repr::jsonb::Jsonb.
936fn cast_string_to_jsonb<'a>(a: &'a str) -> Result<Jsonb, EvalError> {
937    Ok(strconv::parse_jsonb(a)?)
938}
939
940#[sqlfunc(sqlname = "btrim")]
941fn trim_whitespace<'a>(a: &'a str) -> &'a str {
942    a.trim_matches(' ')
943}
944
945#[sqlfunc(sqlname = "ltrim")]
946fn trim_leading_whitespace<'a>(a: &'a str) -> &'a str {
947    a.trim_start_matches(' ')
948}
949
950#[sqlfunc(sqlname = "rtrim")]
951fn trim_trailing_whitespace<'a>(a: &'a str) -> &'a str {
952    a.trim_end_matches(' ')
953}
954
955#[sqlfunc(sqlname = "initcap")]
956fn initcap<'a>(a: &'a str) -> String {
957    let mut out = String::new();
958    let mut capitalize_next = true;
959    for ch in a.chars() {
960        if capitalize_next {
961            out.extend(ch.to_uppercase())
962        } else {
963            out.extend(ch.to_lowercase())
964        };
965        capitalize_next = !ch.is_alphanumeric();
966    }
967    out
968}
969
970#[sqlfunc(sqlname = "ascii")]
971fn ascii<'a>(a: &'a str) -> i32 {
972    a.chars()
973        .next()
974        .and_then(|c| i32::try_from(u32::from(c)).ok())
975        .unwrap_or(0)
976}
977
978#[sqlfunc(sqlname = "char_length")]
979fn char_length<'a>(a: &'a str) -> Result<i32, EvalError> {
980    let length = a.chars().count();
981    i32::try_from(length).or_else(|_| Err(EvalError::Int32OutOfRange(length.to_string().into())))
982}
983
984#[sqlfunc(sqlname = "bit_length")]
985fn bit_length_string<'a>(a: &'a str) -> Result<i32, EvalError> {
986    let length = a.as_bytes().len() * 8;
987    i32::try_from(length).or_else(|_| Err(EvalError::Int32OutOfRange(length.to_string().into())))
988}
989
990#[sqlfunc(sqlname = "octet_length")]
991fn byte_length_string<'a>(a: &'a str) -> Result<i32, EvalError> {
992    let length = a.as_bytes().len();
993    i32::try_from(length).or_else(|_| Err(EvalError::Int32OutOfRange(length.to_string().into())))
994}
995
996#[sqlfunc]
997fn upper<'a>(a: &'a str) -> String {
998    a.to_uppercase()
999}
1000
1001#[sqlfunc]
1002fn lower<'a>(a: &'a str) -> String {
1003    a.to_lowercase()
1004}
1005
1006#[sqlfunc]
1007fn normalize(text: &str, form_str: &str) -> Result<String, EvalError> {
1008    use unicode_normalization::UnicodeNormalization;
1009
1010    match form_str.to_uppercase().as_str() {
1011        "NFC" => Ok(text.nfc().collect()),
1012        "NFD" => Ok(text.nfd().collect()),
1013        "NFKC" => Ok(text.nfkc().collect()),
1014        "NFKD" => Ok(text.nfkd().collect()),
1015        _ => Err(EvalError::InvalidParameterValue(
1016            format!("invalid normalization form: {}", form_str).into(),
1017        )),
1018    }
1019}
1020
1021#[derive(
1022    Ord,
1023    PartialOrd,
1024    Clone,
1025    Debug,
1026    Eq,
1027    PartialEq,
1028    Serialize,
1029    Deserialize,
1030    Hash,
1031    MzReflect
1032)]
1033pub struct IsLikeMatch(pub like_pattern::Matcher);
1034
1035impl EagerUnaryFunc for IsLikeMatch {
1036    type Input<'a> = &'a str;
1037    type Output<'a> = bool;
1038
1039    fn call<'a>(&self, haystack: Self::Input<'a>) -> Self::Output<'a> {
1040        self.0.is_match(haystack)
1041    }
1042
1043    fn output_sql_type(&self, input: SqlColumnType) -> SqlColumnType {
1044        SqlScalarType::Bool.nullable(input.nullable)
1045    }
1046}
1047
1048impl fmt::Display for IsLikeMatch {
1049    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1050        write!(
1051            f,
1052            "{}like[{}]",
1053            if self.0.case_insensitive { "i" } else { "" },
1054            self.0.pattern.escaped()
1055        )
1056    }
1057}
1058
1059#[derive(
1060    Ord,
1061    PartialOrd,
1062    Clone,
1063    Debug,
1064    Eq,
1065    PartialEq,
1066    Serialize,
1067    Deserialize,
1068    Hash,
1069    MzReflect
1070)]
1071pub struct IsRegexpMatch(pub Regex);
1072
1073impl EagerUnaryFunc for IsRegexpMatch {
1074    type Input<'a> = &'a str;
1075    type Output<'a> = bool;
1076
1077    fn call<'a>(&self, haystack: Self::Input<'a>) -> Self::Output<'a> {
1078        self.0.is_match(haystack)
1079    }
1080
1081    fn output_sql_type(&self, input: SqlColumnType) -> SqlColumnType {
1082        SqlScalarType::Bool.nullable(input.nullable)
1083    }
1084}
1085
1086impl fmt::Display for IsRegexpMatch {
1087    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1088        write!(
1089            f,
1090            "is_regexp_match[{}, case_insensitive={}]",
1091            self.0.pattern().escaped(),
1092            self.0.case_insensitive
1093        )
1094    }
1095}
1096
1097#[derive(
1098    Ord,
1099    PartialOrd,
1100    Clone,
1101    Debug,
1102    Eq,
1103    PartialEq,
1104    Serialize,
1105    Deserialize,
1106    Hash,
1107    MzReflect
1108)]
1109pub struct RegexpMatch(pub Regex);
1110
1111impl LazyUnaryFunc for RegexpMatch {
1112    fn eval<'a>(
1113        &'a self,
1114        datums: &[Datum<'a>],
1115        temp_storage: &'a RowArena,
1116        a: &'a MirScalarExpr,
1117    ) -> Result<Datum<'a>, EvalError> {
1118        let haystack = a.eval(datums, temp_storage)?;
1119        if haystack.is_null() {
1120            return Ok(Datum::Null);
1121        }
1122        regexp_match_static(haystack, temp_storage, &self.0)
1123    }
1124
1125    /// The output SqlColumnType of this function
1126    fn output_sql_type(&self, _input_type: SqlColumnType) -> SqlColumnType {
1127        SqlScalarType::Array(Box::new(SqlScalarType::String)).nullable(true)
1128    }
1129
1130    /// Whether this function will produce NULL on NULL input
1131    fn propagates_nulls(&self) -> bool {
1132        true
1133    }
1134
1135    /// Whether this function will produce NULL on non-NULL input
1136    fn introduces_nulls(&self) -> bool {
1137        // Returns null if the regex did not match
1138        true
1139    }
1140
1141    /// Whether this function preserves uniqueness
1142    fn preserves_uniqueness(&self) -> bool {
1143        false
1144    }
1145
1146    fn inverse(&self) -> Option<crate::UnaryFunc> {
1147        None
1148    }
1149
1150    fn is_monotone(&self) -> bool {
1151        false
1152    }
1153
1154    fn is_eliminable_cast(&self) -> bool {
1155        false
1156    }
1157}
1158
1159impl fmt::Display for RegexpMatch {
1160    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1161        write!(
1162            f,
1163            "regexp_match[{}, case_insensitive={}]",
1164            self.0.pattern().escaped(),
1165            self.0.case_insensitive
1166        )
1167    }
1168}
1169
1170#[derive(
1171    Ord,
1172    PartialOrd,
1173    Clone,
1174    Debug,
1175    Eq,
1176    PartialEq,
1177    Serialize,
1178    Deserialize,
1179    Hash,
1180    MzReflect
1181)]
1182pub struct RegexpSplitToArray(pub Regex);
1183
1184impl LazyUnaryFunc for RegexpSplitToArray {
1185    fn eval<'a>(
1186        &'a self,
1187        datums: &[Datum<'a>],
1188        temp_storage: &'a RowArena,
1189        a: &'a MirScalarExpr,
1190    ) -> Result<Datum<'a>, EvalError> {
1191        let haystack = a.eval(datums, temp_storage)?;
1192        if haystack.is_null() {
1193            return Ok(Datum::Null);
1194        }
1195        regexp_split_to_array_re(haystack.unwrap_str(), &self.0, temp_storage)
1196    }
1197
1198    /// The output SqlColumnType of this function
1199    fn output_sql_type(&self, input_type: SqlColumnType) -> SqlColumnType {
1200        SqlScalarType::Array(Box::new(SqlScalarType::String)).nullable(input_type.nullable)
1201    }
1202
1203    /// Whether this function will produce NULL on NULL input
1204    fn propagates_nulls(&self) -> bool {
1205        true
1206    }
1207
1208    /// Whether this function will produce NULL on non-NULL input
1209    fn introduces_nulls(&self) -> bool {
1210        false
1211    }
1212
1213    /// Whether this function preserves uniqueness
1214    fn preserves_uniqueness(&self) -> bool {
1215        false
1216    }
1217
1218    fn inverse(&self) -> Option<crate::UnaryFunc> {
1219        None
1220    }
1221
1222    fn is_monotone(&self) -> bool {
1223        false
1224    }
1225
1226    fn is_eliminable_cast(&self) -> bool {
1227        false
1228    }
1229}
1230
1231impl fmt::Display for RegexpSplitToArray {
1232    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1233        write!(
1234            f,
1235            "regexp_split_to_array[{}, case_insensitive={}]",
1236            self.0.pattern().escaped(),
1237            self.0.case_insensitive
1238        )
1239    }
1240}
1241
1242#[sqlfunc(sqlname = "mz_panic")]
1243fn panic<'a>(a: &'a str) -> String {
1244    print!("{}", a);
1245    panic!("{}", a)
1246}
1247
1248#[sqlfunc(sqlname = "quote_ident", preserves_uniqueness = true)]
1249fn quote_ident<'a>(a: &'a str) -> Result<String, EvalError> {
1250    let i = mz_sql_parser::ast::Ident::new(a).map_err(|err| EvalError::InvalidIdentifier {
1251        ident: a.into(),
1252        detail: Some(err.to_string().into()),
1253    })?;
1254    Ok(i.to_string())
1255}
1256
1257#[derive(
1258    Ord,
1259    PartialOrd,
1260    Clone,
1261    Debug,
1262    Eq,
1263    PartialEq,
1264    Serialize,
1265    Deserialize,
1266    Hash,
1267    MzReflect
1268)]
1269pub struct RegexpReplace {
1270    pub regex: Regex,
1271    pub limit: usize,
1272}
1273
1274impl binary::EagerBinaryFunc for RegexpReplace {
1275    type Input<'a> = (&'a str, &'a str);
1276    type Output<'a> = Cow<'a, str>;
1277
1278    fn call<'a>(
1279        &self,
1280        (source, replacement): Self::Input<'a>,
1281        _temp_storage: &'a RowArena,
1282    ) -> Self::Output<'a> {
1283        // WARNING: This function has potential OOM risk if used with an inflationary
1284        // replacement pattern. It is very difficult to calculate the output size ahead
1285        // of time because the replacement pattern may depend on capture groups.
1286        self.regex.replacen(source, self.limit, replacement)
1287    }
1288
1289    fn output_sql_type(&self, input_types: &[SqlColumnType]) -> SqlColumnType {
1290        use mz_repr::AsColumnType;
1291        let output = <Self::Output<'_> as AsColumnType>::as_column_type();
1292        let propagates_nulls = binary::EagerBinaryFunc::propagates_nulls(self);
1293        let nullable = output.nullable;
1294        let input_nullable = input_types.iter().any(|t| t.nullable);
1295        output.nullable(nullable || (propagates_nulls && input_nullable))
1296    }
1297}
1298
1299impl fmt::Display for RegexpReplace {
1300    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1301        write!(
1302            f,
1303            "regexp_replace[{}, case_insensitive={}, limit={}]",
1304            self.regex.pattern().escaped(),
1305            self.regex.case_insensitive,
1306            self.limit
1307        )
1308    }
1309}