Skip to main content

mz_expr/scalar/func/impls/
string.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::borrow::Cow;
11use std::fmt;
12use std::sync::LazyLock;
13
14use chrono::{DateTime, NaiveDateTime, NaiveTime, Utc};
15use mz_expr_derive::sqlfunc;
16use mz_lowertest::MzReflect;
17use mz_ore::cast::CastFrom;
18use mz_ore::result::ResultExt;
19use mz_ore::str::StrExt;
20use mz_repr::adt::char::{Char, format_str_trim};
21use mz_repr::adt::date::Date;
22use mz_repr::adt::interval::Interval;
23use mz_repr::adt::jsonb::Jsonb;
24use mz_repr::adt::numeric::{self, Numeric, NumericMaxScale};
25use mz_repr::adt::pg_legacy_name::PgLegacyName;
26use mz_repr::adt::regex::Regex;
27use mz_repr::adt::system::{Oid, PgLegacyChar};
28use mz_repr::adt::timestamp::{CheckedTimestamp, TimestampPrecision};
29use mz_repr::adt::varchar::{VarChar, VarCharMaxLength};
30use mz_repr::{Datum, RowArena, SqlColumnType, SqlScalarType, strconv};
31use serde::{Deserialize, Serialize};
32use uuid::Uuid;
33
34use crate::func::{binary, regexp_match_static};
35use crate::scalar::func::{
36    EagerUnaryFunc, LazyUnaryFunc, array_create_scalar, regexp_split_to_array_re,
37};
38use crate::{EvalError, MirScalarExpr, UnaryFunc, like_pattern};
39
40#[sqlfunc(
41    sqlname = "text_to_boolean",
42    preserves_uniqueness = false,
43    inverse = to_unary!(super::CastBoolToString)
44)]
45fn cast_string_to_bool<'a>(a: &'a str) -> Result<bool, EvalError> {
46    strconv::parse_bool(a).err_into()
47}
48
49#[sqlfunc(
50    sqlname = "text_to_\"char\"",
51    preserves_uniqueness = true,
52    inverse = to_unary!(super::CastPgLegacyCharToString)
53)]
54fn cast_string_to_pg_legacy_char<'a>(a: &'a str) -> PgLegacyChar {
55    PgLegacyChar(a.as_bytes().get(0).copied().unwrap_or(0))
56}
57
58#[sqlfunc(sqlname = "text_to_name", preserves_uniqueness = true)]
59fn cast_string_to_pg_legacy_name<'a>(a: &'a str) -> PgLegacyName<String> {
60    PgLegacyName(strconv::parse_pg_legacy_name(a))
61}
62
63#[sqlfunc(
64    sqlname = "text_to_bytea",
65    preserves_uniqueness = true,
66    inverse = to_unary!(super::CastBytesToString)
67)]
68fn cast_string_to_bytes<'a>(a: &'a str) -> Result<Vec<u8>, EvalError> {
69    strconv::parse_bytes(a).err_into()
70}
71
72#[sqlfunc(
73    sqlname = "text_to_smallint",
74    preserves_uniqueness = false,
75    inverse = to_unary!(super::CastInt16ToString)
76)]
77fn cast_string_to_int16<'a>(a: &'a str) -> Result<i16, EvalError> {
78    strconv::parse_int16(a).err_into()
79}
80
81#[sqlfunc(
82    sqlname = "text_to_integer",
83    preserves_uniqueness = false,
84    inverse = to_unary!(super::CastInt32ToString)
85)]
86fn cast_string_to_int32<'a>(a: &'a str) -> Result<i32, EvalError> {
87    strconv::parse_int32(a).err_into()
88}
89
90#[sqlfunc(
91    sqlname = "text_to_bigint",
92    preserves_uniqueness = false,
93    inverse = to_unary!(super::CastInt64ToString)
94)]
95fn cast_string_to_int64<'a>(a: &'a str) -> Result<i64, EvalError> {
96    strconv::parse_int64(a).err_into()
97}
98
99#[sqlfunc(
100    sqlname = "text_to_real",
101    preserves_uniqueness = false,
102    inverse = to_unary!(super::CastFloat32ToString)
103)]
104fn cast_string_to_float32<'a>(a: &'a str) -> Result<f32, EvalError> {
105    strconv::parse_float32(a).err_into()
106}
107
108#[sqlfunc(
109    sqlname = "text_to_double",
110    preserves_uniqueness = false,
111    inverse = to_unary!(super::CastFloat64ToString)
112)]
113fn cast_string_to_float64<'a>(a: &'a str) -> Result<f64, EvalError> {
114    strconv::parse_float64(a).err_into()
115}
116
117#[sqlfunc(
118    sqlname = "text_to_oid",
119    preserves_uniqueness = false,
120    inverse = to_unary!(super::CastOidToString)
121)]
122fn cast_string_to_oid<'a>(a: &'a str) -> Result<Oid, EvalError> {
123    Ok(Oid(strconv::parse_oid(a)?))
124}
125
126#[sqlfunc(
127    sqlname = "text_to_uint2",
128    preserves_uniqueness = false,
129    inverse = to_unary!(super::CastUint16ToString)
130)]
131fn cast_string_to_uint16(a: &str) -> Result<u16, EvalError> {
132    strconv::parse_uint16(a).err_into()
133}
134
135#[sqlfunc(
136    sqlname = "text_to_uint4",
137    preserves_uniqueness = false,
138    inverse = to_unary!(super::CastUint32ToString)
139)]
140fn cast_string_to_uint32(a: &str) -> Result<u32, EvalError> {
141    strconv::parse_uint32(a).err_into()
142}
143
144#[sqlfunc(
145    sqlname = "text_to_uint8",
146    preserves_uniqueness = false,
147    inverse = to_unary!(super::CastUint64ToString)
148)]
149fn cast_string_to_uint64(a: &str) -> Result<u64, EvalError> {
150    strconv::parse_uint64(a).err_into()
151}
152
153#[sqlfunc(sqlname = "reverse")]
154fn reverse<'a>(a: &'a str) -> String {
155    a.chars().rev().collect()
156}
157
158#[derive(
159    Ord,
160    PartialOrd,
161    Clone,
162    Debug,
163    Eq,
164    PartialEq,
165    Serialize,
166    Deserialize,
167    Hash,
168    MzReflect
169)]
170pub struct CastStringToNumeric(pub Option<NumericMaxScale>);
171
172impl EagerUnaryFunc for CastStringToNumeric {
173    type Input<'a> = &'a str;
174    type Output<'a> = Result<Numeric, EvalError>;
175
176    fn call<'a>(&self, a: Self::Input<'a>) -> Self::Output<'a> {
177        let mut d = strconv::parse_numeric(a)?;
178        if let Some(scale) = self.0 {
179            if numeric::rescale(&mut d.0, scale.into_u8()).is_err() {
180                return Err(EvalError::NumericFieldOverflow);
181            }
182        }
183        Ok(d.into_inner())
184    }
185
186    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
187        SqlScalarType::Numeric { max_scale: self.0 }.nullable(input.nullable)
188    }
189
190    fn inverse(&self) -> Option<crate::UnaryFunc> {
191        to_unary!(super::CastNumericToString)
192    }
193}
194
195impl fmt::Display for CastStringToNumeric {
196    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
197        f.write_str("text_to_numeric")
198    }
199}
200
201#[sqlfunc(
202    sqlname = "text_to_date",
203    preserves_uniqueness = false,
204    inverse = to_unary!(super::CastDateToString)
205)]
206fn cast_string_to_date<'a>(a: &'a str) -> Result<Date, EvalError> {
207    strconv::parse_date(a).err_into()
208}
209
210#[sqlfunc(
211    sqlname = "text_to_time",
212    preserves_uniqueness = false,
213    inverse = to_unary!(super::CastTimeToString)
214)]
215fn cast_string_to_time<'a>(a: &'a str) -> Result<NaiveTime, EvalError> {
216    strconv::parse_time(a).err_into()
217}
218
219#[derive(
220    Ord,
221    PartialOrd,
222    Clone,
223    Debug,
224    Eq,
225    PartialEq,
226    Serialize,
227    Deserialize,
228    Hash,
229    MzReflect
230)]
231pub struct CastStringToTimestamp(pub Option<TimestampPrecision>);
232
233impl EagerUnaryFunc for CastStringToTimestamp {
234    type Input<'a> = &'a str;
235    type Output<'a> = Result<CheckedTimestamp<NaiveDateTime>, EvalError>;
236
237    fn call<'a>(&self, a: Self::Input<'a>) -> Self::Output<'a> {
238        let out = strconv::parse_timestamp(a)?;
239        let updated = out.round_to_precision(self.0)?;
240        Ok(updated)
241    }
242
243    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
244        SqlScalarType::Timestamp { precision: self.0 }.nullable(input.nullable)
245    }
246
247    fn inverse(&self) -> Option<crate::UnaryFunc> {
248        to_unary!(super::CastTimestampToString)
249    }
250}
251
252impl fmt::Display for CastStringToTimestamp {
253    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
254        f.write_str("text_to_timestamp")
255    }
256}
257
258#[sqlfunc(sqlname = "try_parse_monotonic_iso8601_timestamp")]
259// TODO: Pretty sure this preserves uniqueness, but not 100%.
260//
261// Ironically, even though this has "monotonic" in the name, it's not quite
262// eligible for `#[is_monotone = true]` because any input could also be
263// mapped to null. So, handle it via SpecialUnary in the interpreter.
264fn try_parse_monotonic_iso8601_timestamp<'a>(
265    a: &'a str,
266) -> Option<CheckedTimestamp<NaiveDateTime>> {
267    let ts = mz_persist_types::timestamp::try_parse_monotonic_iso8601_timestamp(a)?;
268    let ts = CheckedTimestamp::from_timestamplike(ts)
269        .expect("monotonic_iso8601 range is a subset of CheckedTimestamp domain");
270    Some(ts)
271}
272
273#[derive(
274    Ord,
275    PartialOrd,
276    Clone,
277    Debug,
278    Eq,
279    PartialEq,
280    Serialize,
281    Deserialize,
282    Hash,
283    MzReflect
284)]
285pub struct CastStringToTimestampTz(pub Option<TimestampPrecision>);
286
287impl EagerUnaryFunc for CastStringToTimestampTz {
288    type Input<'a> = &'a str;
289    type Output<'a> = Result<CheckedTimestamp<DateTime<Utc>>, EvalError>;
290
291    fn call<'a>(&self, a: Self::Input<'a>) -> Self::Output<'a> {
292        let out = strconv::parse_timestamptz(a)?;
293        let updated = out.round_to_precision(self.0)?;
294        Ok(updated)
295    }
296
297    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
298        SqlScalarType::TimestampTz { precision: self.0 }.nullable(input.nullable)
299    }
300
301    fn inverse(&self) -> Option<crate::UnaryFunc> {
302        to_unary!(super::CastTimestampTzToString)
303    }
304}
305
306impl fmt::Display for CastStringToTimestampTz {
307    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
308        f.write_str("text_to_timestamp_with_time_zone")
309    }
310}
311
312#[sqlfunc(
313    sqlname = "text_to_interval",
314    preserves_uniqueness = false,
315    inverse = to_unary!(super::CastIntervalToString)
316)]
317fn cast_string_to_interval<'a>(a: &'a str) -> Result<Interval, EvalError> {
318    strconv::parse_interval(a).err_into()
319}
320
321#[sqlfunc(
322    sqlname = "text_to_uuid",
323    preserves_uniqueness = false,
324    inverse = to_unary!(super::CastUuidToString)
325)]
326fn cast_string_to_uuid<'a>(a: &'a str) -> Result<Uuid, EvalError> {
327    strconv::parse_uuid(a).err_into()
328}
329
330#[derive(
331    Ord,
332    PartialOrd,
333    Clone,
334    Debug,
335    Eq,
336    PartialEq,
337    Serialize,
338    Deserialize,
339    Hash,
340    MzReflect
341)]
342pub struct CastStringToArray {
343    // Target array's type.
344    pub return_ty: SqlScalarType,
345    // The expression to cast the discovered array elements to the array's
346    // element type.
347    pub cast_expr: Box<MirScalarExpr>,
348}
349
350impl LazyUnaryFunc for CastStringToArray {
351    fn eval<'a>(
352        &'a self,
353        datums: &[Datum<'a>],
354        temp_storage: &'a RowArena,
355        a: &'a MirScalarExpr,
356    ) -> Result<Datum<'a>, EvalError> {
357        let a = a.eval(datums, temp_storage)?;
358        if a.is_null() {
359            return Ok(Datum::Null);
360        }
361        let (datums, dims) = strconv::parse_array(
362            a.unwrap_str(),
363            || Datum::Null,
364            |elem_text| {
365                let elem_text = match elem_text {
366                    Cow::Owned(s) => temp_storage.push_string(s),
367                    Cow::Borrowed(s) => s,
368                };
369                self.cast_expr
370                    .eval(&[Datum::String(elem_text)], temp_storage)
371            },
372        )?;
373
374        Ok(temp_storage.try_make_datum(|packer| packer.try_push_array(&dims, datums))?)
375    }
376
377    /// The output SqlColumnType of this function
378    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
379        self.return_ty.clone().nullable(input_type.nullable)
380    }
381
382    /// Whether this function will produce NULL on NULL input
383    fn propagates_nulls(&self) -> bool {
384        true
385    }
386
387    /// Whether this function will produce NULL on non-NULL input
388    fn introduces_nulls(&self) -> bool {
389        false
390    }
391
392    /// Whether this function preserves uniqueness
393    fn preserves_uniqueness(&self) -> bool {
394        false
395    }
396
397    fn inverse(&self) -> Option<crate::UnaryFunc> {
398        to_unary!(super::CastArrayToString {
399            ty: self.return_ty.clone(),
400        })
401    }
402
403    fn is_monotone(&self) -> bool {
404        false
405    }
406}
407
408impl fmt::Display for CastStringToArray {
409    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
410        f.write_str("strtoarray")
411    }
412}
413
414#[derive(
415    Ord,
416    PartialOrd,
417    Clone,
418    Debug,
419    Eq,
420    PartialEq,
421    Serialize,
422    Deserialize,
423    Hash,
424    MzReflect
425)]
426pub struct CastStringToList {
427    // Target list's type
428    pub return_ty: SqlScalarType,
429    // The expression to cast the discovered list elements to the list's
430    // element type.
431    pub cast_expr: Box<MirScalarExpr>,
432}
433
434impl LazyUnaryFunc for CastStringToList {
435    fn eval<'a>(
436        &'a self,
437        datums: &[Datum<'a>],
438        temp_storage: &'a RowArena,
439        a: &'a MirScalarExpr,
440    ) -> Result<Datum<'a>, EvalError> {
441        let a = a.eval(datums, temp_storage)?;
442        if a.is_null() {
443            return Ok(Datum::Null);
444        }
445        let parsed_datums = strconv::parse_list(
446            a.unwrap_str(),
447            matches!(
448                self.return_ty.unwrap_list_element_type(),
449                SqlScalarType::List { .. }
450            ),
451            || Datum::Null,
452            |elem_text| {
453                let elem_text = match elem_text {
454                    Cow::Owned(s) => temp_storage.push_string(s),
455                    Cow::Borrowed(s) => s,
456                };
457                self.cast_expr
458                    .eval(&[Datum::String(elem_text)], temp_storage)
459            },
460        )?;
461
462        Ok(temp_storage.make_datum(|packer| packer.push_list(parsed_datums)))
463    }
464
465    /// The output SqlColumnType of this function
466    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
467        self.return_ty
468            .without_modifiers()
469            .nullable(input_type.nullable)
470    }
471
472    /// Whether this function will produce NULL on NULL input
473    fn propagates_nulls(&self) -> bool {
474        true
475    }
476
477    /// Whether this function will produce NULL on non-NULL input
478    fn introduces_nulls(&self) -> bool {
479        false
480    }
481
482    /// Whether this function preserves uniqueness
483    fn preserves_uniqueness(&self) -> bool {
484        false
485    }
486
487    fn inverse(&self) -> Option<crate::UnaryFunc> {
488        to_unary!(super::CastListToString {
489            ty: self.return_ty.clone(),
490        })
491    }
492
493    fn is_monotone(&self) -> bool {
494        false
495    }
496}
497
498impl fmt::Display for CastStringToList {
499    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
500        f.write_str("strtolist")
501    }
502}
503
504#[derive(
505    Ord,
506    PartialOrd,
507    Clone,
508    Debug,
509    Eq,
510    PartialEq,
511    Serialize,
512    Deserialize,
513    Hash,
514    MzReflect
515)]
516pub struct CastStringToMap {
517    // Target map's value type
518    pub return_ty: SqlScalarType,
519    // The expression used to cast the discovered values to the map's value
520    // type.
521    pub cast_expr: Box<MirScalarExpr>,
522}
523
524impl LazyUnaryFunc for CastStringToMap {
525    fn eval<'a>(
526        &'a self,
527        datums: &[Datum<'a>],
528        temp_storage: &'a RowArena,
529        a: &'a MirScalarExpr,
530    ) -> Result<Datum<'a>, EvalError> {
531        let a = a.eval(datums, temp_storage)?;
532        if a.is_null() {
533            return Ok(Datum::Null);
534        }
535        let parsed_map = strconv::parse_map(
536            a.unwrap_str(),
537            matches!(
538                self.return_ty.unwrap_map_value_type(),
539                SqlScalarType::Map { .. }
540            ),
541            |value_text| -> Result<Datum, EvalError> {
542                let value_text = match value_text {
543                    Some(Cow::Owned(s)) => Datum::String(temp_storage.push_string(s)),
544                    Some(Cow::Borrowed(s)) => Datum::String(s),
545                    None => Datum::Null,
546                };
547                self.cast_expr.eval(&[value_text], temp_storage)
548            },
549        )?;
550        let mut pairs: Vec<(String, Datum)> = parsed_map.into_iter().map(|(k, v)| (k, v)).collect();
551        pairs.sort_by(|(k1, _v1), (k2, _v2)| k1.cmp(k2));
552        pairs.dedup_by(|(k1, _v1), (k2, _v2)| k1 == k2);
553        Ok(temp_storage.make_datum(|packer| {
554            packer.push_dict_with(|packer| {
555                for (k, v) in pairs {
556                    packer.push(Datum::String(&k));
557                    packer.push(v);
558                }
559            })
560        }))
561    }
562
563    /// The output SqlColumnType of this function
564    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
565        self.return_ty.clone().nullable(input_type.nullable)
566    }
567
568    /// Whether this function will produce NULL on NULL input
569    fn propagates_nulls(&self) -> bool {
570        true
571    }
572
573    /// Whether this function will produce NULL on non-NULL input
574    fn introduces_nulls(&self) -> bool {
575        false
576    }
577
578    /// Whether this function preserves uniqueness
579    fn preserves_uniqueness(&self) -> bool {
580        false
581    }
582
583    fn inverse(&self) -> Option<crate::UnaryFunc> {
584        to_unary!(super::CastMapToString {
585            ty: self.return_ty.clone(),
586        })
587    }
588
589    fn is_monotone(&self) -> bool {
590        false
591    }
592}
593
594impl fmt::Display for CastStringToMap {
595    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
596        f.write_str("strtomap")
597    }
598}
599
600#[derive(
601    Ord,
602    PartialOrd,
603    Clone,
604    Debug,
605    Eq,
606    PartialEq,
607    Serialize,
608    Deserialize,
609    Hash,
610    MzReflect
611)]
612pub struct CastStringToChar {
613    pub length: Option<mz_repr::adt::char::CharLength>,
614    pub fail_on_len: bool,
615}
616
617impl EagerUnaryFunc for CastStringToChar {
618    type Input<'a> = &'a str;
619    type Output<'a> = Result<Char<String>, EvalError>;
620
621    fn call<'a>(&self, a: Self::Input<'a>) -> Self::Output<'a> {
622        let s = format_str_trim(a, self.length, self.fail_on_len).map_err(|_| {
623            assert!(self.fail_on_len);
624            EvalError::StringValueTooLong {
625                target_type: "character".into(),
626                length: usize::cast_from(self.length.unwrap().into_u32()),
627            }
628        })?;
629
630        Ok(Char(s))
631    }
632
633    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
634        SqlScalarType::Char {
635            length: self.length,
636        }
637        .nullable(input.nullable)
638    }
639
640    fn could_error(&self) -> bool {
641        self.fail_on_len && self.length.is_some()
642    }
643
644    fn inverse(&self) -> Option<crate::UnaryFunc> {
645        to_unary!(super::CastCharToString)
646    }
647}
648
649impl fmt::Display for CastStringToChar {
650    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
651        match self.length {
652            Some(length) => {
653                write!(
654                    f,
655                    "text_to_char[len={}, fail_on_len={}]",
656                    length.into_u32(),
657                    self.fail_on_len
658                )
659            }
660            None => f.write_str("text_to_char[len=unbounded]"),
661        }
662    }
663}
664
665#[derive(
666    Ord,
667    PartialOrd,
668    Clone,
669    Debug,
670    Eq,
671    PartialEq,
672    Serialize,
673    Deserialize,
674    Hash,
675    MzReflect
676)]
677pub struct CastStringToRange {
678    // Target range's type
679    pub return_ty: SqlScalarType,
680    // The expression to cast the discovered range elements to the range's
681    // element type.
682    pub cast_expr: Box<MirScalarExpr>,
683}
684
685impl LazyUnaryFunc for CastStringToRange {
686    fn eval<'a>(
687        &'a self,
688        datums: &[Datum<'a>],
689        temp_storage: &'a RowArena,
690        a: &'a MirScalarExpr,
691    ) -> Result<Datum<'a>, EvalError> {
692        let a = a.eval(datums, temp_storage)?;
693        if a.is_null() {
694            return Ok(Datum::Null);
695        }
696        let mut range = strconv::parse_range(a.unwrap_str(), |elem_text| {
697            let elem_text = match elem_text {
698                Cow::Owned(s) => temp_storage.push_string(s),
699                Cow::Borrowed(s) => s,
700            };
701            self.cast_expr
702                .eval(&[Datum::String(elem_text)], temp_storage)
703        })?;
704
705        range.canonicalize()?;
706
707        Ok(temp_storage.make_datum(|packer| {
708            packer
709                .push_range(range)
710                .expect("must have already handled errors")
711        }))
712    }
713
714    /// The output SqlColumnType of this function
715    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
716        self.return_ty
717            .without_modifiers()
718            .nullable(input_type.nullable)
719    }
720
721    /// Whether this function will produce NULL on NULL input
722    fn propagates_nulls(&self) -> bool {
723        true
724    }
725
726    /// Whether this function will produce NULL on non-NULL input
727    fn introduces_nulls(&self) -> bool {
728        false
729    }
730
731    /// Whether this function preserves uniqueness
732    fn preserves_uniqueness(&self) -> bool {
733        false
734    }
735
736    fn inverse(&self) -> Option<crate::UnaryFunc> {
737        to_unary!(super::CastRangeToString {
738            ty: self.return_ty.clone(),
739        })
740    }
741
742    fn is_monotone(&self) -> bool {
743        false
744    }
745}
746
747impl fmt::Display for CastStringToRange {
748    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
749        f.write_str("strtorange")
750    }
751}
752
753#[derive(
754    Ord,
755    PartialOrd,
756    Clone,
757    Debug,
758    Eq,
759    PartialEq,
760    Serialize,
761    Deserialize,
762    Hash,
763    MzReflect
764)]
765pub struct CastStringToVarChar {
766    pub length: Option<VarCharMaxLength>,
767    pub fail_on_len: bool,
768}
769
770impl EagerUnaryFunc for CastStringToVarChar {
771    type Input<'a> = &'a str;
772    type Output<'a> = Result<VarChar<&'a str>, EvalError>;
773
774    fn call<'a>(&self, a: Self::Input<'a>) -> Self::Output<'a> {
775        let s =
776            mz_repr::adt::varchar::format_str(a, self.length, self.fail_on_len).map_err(|_| {
777                assert!(self.fail_on_len);
778                EvalError::StringValueTooLong {
779                    target_type: "character varying".into(),
780                    length: usize::cast_from(self.length.unwrap().into_u32()),
781                }
782            })?;
783
784        Ok(VarChar(s))
785    }
786
787    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
788        SqlScalarType::VarChar {
789            max_length: self.length,
790        }
791        .nullable(input.nullable)
792    }
793
794    fn could_error(&self) -> bool {
795        self.fail_on_len && self.length.is_some()
796    }
797
798    fn preserves_uniqueness(&self) -> bool {
799        !self.fail_on_len || self.length.is_none()
800    }
801
802    fn inverse(&self) -> Option<crate::UnaryFunc> {
803        to_unary!(super::CastVarCharToString)
804    }
805}
806
807impl fmt::Display for CastStringToVarChar {
808    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
809        match self.length {
810            Some(length) => {
811                write!(
812                    f,
813                    "text_to_varchar[len={}, fail_on_len={}]",
814                    length.into_u32(),
815                    self.fail_on_len
816                )
817            }
818            None => f.write_str("text_to_varchar[len=unbounded]"),
819        }
820    }
821}
822
823// If we support another vector type, this should likely get hoisted into a
824// position akin to array parsing.
825static INT2VECTOR_CAST_EXPR: LazyLock<MirScalarExpr> = LazyLock::new(|| MirScalarExpr::CallUnary {
826    func: UnaryFunc::CastStringToInt16(CastStringToInt16),
827    expr: Box::new(MirScalarExpr::column(0)),
828});
829
830#[derive(
831    Ord,
832    PartialOrd,
833    Clone,
834    Debug,
835    Eq,
836    PartialEq,
837    Serialize,
838    Deserialize,
839    Hash,
840    MzReflect
841)]
842pub struct CastStringToInt2Vector;
843
844impl LazyUnaryFunc for CastStringToInt2Vector {
845    fn eval<'a>(
846        &'a self,
847        datums: &[Datum<'a>],
848        temp_storage: &'a RowArena,
849        a: &'a MirScalarExpr,
850    ) -> Result<Datum<'a>, EvalError> {
851        let a = a.eval(datums, temp_storage)?;
852        if a.is_null() {
853            return Ok(Datum::Null);
854        }
855
856        let datums = strconv::parse_legacy_vector(a.unwrap_str(), |elem_text| {
857            let elem_text = match elem_text {
858                Cow::Owned(s) => temp_storage.push_string(s),
859                Cow::Borrowed(s) => s,
860            };
861            INT2VECTOR_CAST_EXPR.eval(&[Datum::String(elem_text)], temp_storage)
862        })?;
863        array_create_scalar(&datums, temp_storage)
864    }
865
866    /// The output SqlColumnType of this function
867    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
868        SqlScalarType::Int2Vector.nullable(input_type.nullable)
869    }
870
871    /// Whether this function will produce NULL on NULL input
872    fn propagates_nulls(&self) -> bool {
873        true
874    }
875
876    /// Whether this function will produce NULL on non-NULL input
877    fn introduces_nulls(&self) -> bool {
878        false
879    }
880
881    /// Whether this function preserves uniqueness
882    fn preserves_uniqueness(&self) -> bool {
883        false
884    }
885
886    fn inverse(&self) -> Option<crate::UnaryFunc> {
887        to_unary!(super::CastInt2VectorToString)
888    }
889
890    fn is_monotone(&self) -> bool {
891        false
892    }
893}
894
895impl fmt::Display for CastStringToInt2Vector {
896    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
897        f.write_str("strtoint2vector")
898    }
899}
900
901#[sqlfunc(
902    sqlname = "text_to_jsonb",
903    preserves_uniqueness = false,
904    inverse = to_unary!(super::CastJsonbToString)
905)]
906// TODO(jamii): it would be much more efficient to skip the intermediate repr::jsonb::Jsonb.
907fn cast_string_to_jsonb<'a>(a: &'a str) -> Result<Jsonb, EvalError> {
908    Ok(strconv::parse_jsonb(a)?)
909}
910
911#[sqlfunc(sqlname = "btrim")]
912fn trim_whitespace<'a>(a: &'a str) -> &'a str {
913    a.trim_matches(' ')
914}
915
916#[sqlfunc(sqlname = "ltrim")]
917fn trim_leading_whitespace<'a>(a: &'a str) -> &'a str {
918    a.trim_start_matches(' ')
919}
920
921#[sqlfunc(sqlname = "rtrim")]
922fn trim_trailing_whitespace<'a>(a: &'a str) -> &'a str {
923    a.trim_end_matches(' ')
924}
925
926#[sqlfunc(sqlname = "initcap")]
927fn initcap<'a>(a: &'a str) -> String {
928    let mut out = String::new();
929    let mut capitalize_next = true;
930    for ch in a.chars() {
931        if capitalize_next {
932            out.extend(ch.to_uppercase())
933        } else {
934            out.extend(ch.to_lowercase())
935        };
936        capitalize_next = !ch.is_alphanumeric();
937    }
938    out
939}
940
941#[sqlfunc(sqlname = "ascii")]
942fn ascii<'a>(a: &'a str) -> i32 {
943    a.chars()
944        .next()
945        .and_then(|c| i32::try_from(u32::from(c)).ok())
946        .unwrap_or(0)
947}
948
949#[sqlfunc(sqlname = "char_length")]
950fn char_length<'a>(a: &'a str) -> Result<i32, EvalError> {
951    let length = a.chars().count();
952    i32::try_from(length).or_else(|_| Err(EvalError::Int32OutOfRange(length.to_string().into())))
953}
954
955#[sqlfunc(sqlname = "bit_length")]
956fn bit_length_string<'a>(a: &'a str) -> Result<i32, EvalError> {
957    let length = a.as_bytes().len() * 8;
958    i32::try_from(length).or_else(|_| Err(EvalError::Int32OutOfRange(length.to_string().into())))
959}
960
961#[sqlfunc(sqlname = "octet_length")]
962fn byte_length_string<'a>(a: &'a str) -> Result<i32, EvalError> {
963    let length = a.as_bytes().len();
964    i32::try_from(length).or_else(|_| Err(EvalError::Int32OutOfRange(length.to_string().into())))
965}
966
967#[sqlfunc]
968fn upper<'a>(a: &'a str) -> String {
969    a.to_uppercase()
970}
971
972#[sqlfunc]
973fn lower<'a>(a: &'a str) -> String {
974    a.to_lowercase()
975}
976
977#[sqlfunc]
978fn normalize(text: &str, form_str: &str) -> Result<String, EvalError> {
979    use unicode_normalization::UnicodeNormalization;
980
981    match form_str.to_uppercase().as_str() {
982        "NFC" => Ok(text.nfc().collect()),
983        "NFD" => Ok(text.nfd().collect()),
984        "NFKC" => Ok(text.nfkc().collect()),
985        "NFKD" => Ok(text.nfkd().collect()),
986        _ => Err(EvalError::InvalidParameterValue(
987            format!("invalid normalization form: {}", form_str).into(),
988        )),
989    }
990}
991
992#[derive(
993    Ord,
994    PartialOrd,
995    Clone,
996    Debug,
997    Eq,
998    PartialEq,
999    Serialize,
1000    Deserialize,
1001    Hash,
1002    MzReflect
1003)]
1004pub struct IsLikeMatch(pub like_pattern::Matcher);
1005
1006impl EagerUnaryFunc for IsLikeMatch {
1007    type Input<'a> = &'a str;
1008    type Output<'a> = bool;
1009
1010    fn call<'a>(&self, haystack: Self::Input<'a>) -> Self::Output<'a> {
1011        self.0.is_match(haystack)
1012    }
1013
1014    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
1015        SqlScalarType::Bool.nullable(input.nullable)
1016    }
1017}
1018
1019impl fmt::Display for IsLikeMatch {
1020    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1021        write!(
1022            f,
1023            "{}like[{}]",
1024            if self.0.case_insensitive { "i" } else { "" },
1025            self.0.pattern.escaped()
1026        )
1027    }
1028}
1029
1030#[derive(
1031    Ord,
1032    PartialOrd,
1033    Clone,
1034    Debug,
1035    Eq,
1036    PartialEq,
1037    Serialize,
1038    Deserialize,
1039    Hash,
1040    MzReflect
1041)]
1042pub struct IsRegexpMatch(pub Regex);
1043
1044impl EagerUnaryFunc for IsRegexpMatch {
1045    type Input<'a> = &'a str;
1046    type Output<'a> = bool;
1047
1048    fn call<'a>(&self, haystack: Self::Input<'a>) -> Self::Output<'a> {
1049        self.0.is_match(haystack)
1050    }
1051
1052    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
1053        SqlScalarType::Bool.nullable(input.nullable)
1054    }
1055}
1056
1057impl fmt::Display for IsRegexpMatch {
1058    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1059        write!(
1060            f,
1061            "is_regexp_match[{}, case_insensitive={}]",
1062            self.0.pattern().escaped(),
1063            self.0.case_insensitive
1064        )
1065    }
1066}
1067
1068#[derive(
1069    Ord,
1070    PartialOrd,
1071    Clone,
1072    Debug,
1073    Eq,
1074    PartialEq,
1075    Serialize,
1076    Deserialize,
1077    Hash,
1078    MzReflect
1079)]
1080pub struct RegexpMatch(pub Regex);
1081
1082impl LazyUnaryFunc for RegexpMatch {
1083    fn eval<'a>(
1084        &'a self,
1085        datums: &[Datum<'a>],
1086        temp_storage: &'a RowArena,
1087        a: &'a MirScalarExpr,
1088    ) -> Result<Datum<'a>, EvalError> {
1089        let haystack = a.eval(datums, temp_storage)?;
1090        if haystack.is_null() {
1091            return Ok(Datum::Null);
1092        }
1093        regexp_match_static(haystack, temp_storage, &self.0)
1094    }
1095
1096    /// The output SqlColumnType of this function
1097    fn output_type(&self, _input_type: SqlColumnType) -> SqlColumnType {
1098        SqlScalarType::Array(Box::new(SqlScalarType::String)).nullable(true)
1099    }
1100
1101    /// Whether this function will produce NULL on NULL input
1102    fn propagates_nulls(&self) -> bool {
1103        true
1104    }
1105
1106    /// Whether this function will produce NULL on non-NULL input
1107    fn introduces_nulls(&self) -> bool {
1108        // Returns null if the regex did not match
1109        true
1110    }
1111
1112    /// Whether this function preserves uniqueness
1113    fn preserves_uniqueness(&self) -> bool {
1114        false
1115    }
1116
1117    fn inverse(&self) -> Option<crate::UnaryFunc> {
1118        None
1119    }
1120
1121    fn is_monotone(&self) -> bool {
1122        false
1123    }
1124}
1125
1126impl fmt::Display for RegexpMatch {
1127    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1128        write!(
1129            f,
1130            "regexp_match[{}, case_insensitive={}]",
1131            self.0.pattern().escaped(),
1132            self.0.case_insensitive
1133        )
1134    }
1135}
1136
1137#[derive(
1138    Ord,
1139    PartialOrd,
1140    Clone,
1141    Debug,
1142    Eq,
1143    PartialEq,
1144    Serialize,
1145    Deserialize,
1146    Hash,
1147    MzReflect
1148)]
1149pub struct RegexpSplitToArray(pub Regex);
1150
1151impl LazyUnaryFunc for RegexpSplitToArray {
1152    fn eval<'a>(
1153        &'a self,
1154        datums: &[Datum<'a>],
1155        temp_storage: &'a RowArena,
1156        a: &'a MirScalarExpr,
1157    ) -> Result<Datum<'a>, EvalError> {
1158        let haystack = a.eval(datums, temp_storage)?;
1159        if haystack.is_null() {
1160            return Ok(Datum::Null);
1161        }
1162        regexp_split_to_array_re(haystack.unwrap_str(), &self.0, temp_storage)
1163    }
1164
1165    /// The output SqlColumnType of this function
1166    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
1167        SqlScalarType::Array(Box::new(SqlScalarType::String)).nullable(input_type.nullable)
1168    }
1169
1170    /// Whether this function will produce NULL on NULL input
1171    fn propagates_nulls(&self) -> bool {
1172        true
1173    }
1174
1175    /// Whether this function will produce NULL on non-NULL input
1176    fn introduces_nulls(&self) -> bool {
1177        false
1178    }
1179
1180    /// Whether this function preserves uniqueness
1181    fn preserves_uniqueness(&self) -> bool {
1182        false
1183    }
1184
1185    fn inverse(&self) -> Option<crate::UnaryFunc> {
1186        None
1187    }
1188
1189    fn is_monotone(&self) -> bool {
1190        false
1191    }
1192}
1193
1194impl fmt::Display for RegexpSplitToArray {
1195    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1196        write!(
1197            f,
1198            "regexp_split_to_array[{}, case_insensitive={}]",
1199            self.0.pattern().escaped(),
1200            self.0.case_insensitive
1201        )
1202    }
1203}
1204
1205#[sqlfunc(sqlname = "mz_panic")]
1206fn panic<'a>(a: &'a str) -> String {
1207    print!("{}", a);
1208    panic!("{}", a)
1209}
1210
1211#[sqlfunc(sqlname = "quote_ident", preserves_uniqueness = true)]
1212fn quote_ident<'a>(a: &'a str) -> Result<String, EvalError> {
1213    let i = mz_sql_parser::ast::Ident::new(a).map_err(|err| EvalError::InvalidIdentifier {
1214        ident: a.into(),
1215        detail: Some(err.to_string().into()),
1216    })?;
1217    Ok(i.to_string())
1218}
1219
1220#[derive(
1221    Ord,
1222    PartialOrd,
1223    Clone,
1224    Debug,
1225    Eq,
1226    PartialEq,
1227    Serialize,
1228    Deserialize,
1229    Hash,
1230    MzReflect
1231)]
1232pub struct RegexpReplace {
1233    pub regex: Regex,
1234    pub limit: usize,
1235}
1236
1237impl binary::EagerBinaryFunc for RegexpReplace {
1238    type Input<'a> = (&'a str, &'a str);
1239    type Output<'a> = Cow<'a, str>;
1240
1241    fn call<'a>(
1242        &self,
1243        (source, replacement): Self::Input<'a>,
1244        _temp_storage: &'a RowArena,
1245    ) -> Self::Output<'a> {
1246        // WARNING: This function has potential OOM risk if used with an inflationary
1247        // replacement pattern. It is very difficult to calculate the output size ahead
1248        // of time because the replacement pattern may depend on capture groups.
1249        self.regex.replacen(source, self.limit, replacement)
1250    }
1251
1252    fn output_type(&self, input_types: &[SqlColumnType]) -> SqlColumnType {
1253        use mz_repr::AsColumnType;
1254        let output = <Self::Output<'_> as AsColumnType>::as_column_type();
1255        let propagates_nulls = binary::EagerBinaryFunc::propagates_nulls(self);
1256        let nullable = output.nullable;
1257        let input_nullable = input_types.iter().any(|t| t.nullable);
1258        output.nullable(nullable || (propagates_nulls && input_nullable))
1259    }
1260}
1261
1262impl fmt::Display for RegexpReplace {
1263    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1264        write!(
1265            f,
1266            "regexp_replace[{}, case_insensitive={}, limit={}]",
1267            self.regex.pattern().escaped(),
1268            self.regex.case_insensitive,
1269            self.limit
1270        )
1271    }
1272}