mz_expr/scalar/func/impls/
string.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::borrow::Cow;
11use std::fmt;
12use std::sync::LazyLock;
13
14use chrono::{DateTime, NaiveDateTime, NaiveTime, Utc};
15use mz_lowertest::MzReflect;
16use mz_ore::cast::CastFrom;
17use mz_ore::result::ResultExt;
18use mz_ore::str::StrExt;
19use mz_repr::adt::char::{Char, format_str_trim};
20use mz_repr::adt::date::Date;
21use mz_repr::adt::interval::Interval;
22use mz_repr::adt::jsonb::Jsonb;
23use mz_repr::adt::numeric::{self, Numeric, NumericMaxScale};
24use mz_repr::adt::pg_legacy_name::PgLegacyName;
25use mz_repr::adt::regex::Regex;
26use mz_repr::adt::system::{Oid, PgLegacyChar};
27use mz_repr::adt::timestamp::{CheckedTimestamp, TimestampPrecision};
28use mz_repr::adt::varchar::{VarChar, VarCharMaxLength};
29use mz_repr::{Datum, RowArena, SqlColumnType, SqlScalarType, strconv};
30use serde::{Deserialize, Serialize};
31use uuid::Uuid;
32
33use crate::func::regexp_match_static;
34use crate::scalar::func::{
35    EagerUnaryFunc, LazyUnaryFunc, array_create_scalar, regexp_split_to_array_re,
36};
37use crate::{EvalError, MirScalarExpr, UnaryFunc, like_pattern};
38
39sqlfunc!(
40    #[sqlname = "text_to_boolean"]
41    #[preserves_uniqueness = false]
42    #[inverse = to_unary!(super::CastBoolToString)]
43    fn cast_string_to_bool<'a>(a: &'a str) -> Result<bool, EvalError> {
44        strconv::parse_bool(a).err_into()
45    }
46);
47
48sqlfunc!(
49    #[sqlname = "text_to_\"char\""]
50    #[preserves_uniqueness = true]
51    #[inverse = to_unary!(super::CastPgLegacyCharToString)]
52    fn cast_string_to_pg_legacy_char<'a>(a: &'a str) -> PgLegacyChar {
53        PgLegacyChar(a.as_bytes().get(0).copied().unwrap_or(0))
54    }
55);
56
57sqlfunc!(
58    #[sqlname = "text_to_name"]
59    #[preserves_uniqueness = true]
60    fn cast_string_to_pg_legacy_name<'a>(a: &'a str) -> PgLegacyName<String> {
61        PgLegacyName(strconv::parse_pg_legacy_name(a))
62    }
63);
64
65sqlfunc!(
66    #[sqlname = "text_to_bytea"]
67    #[preserves_uniqueness = true]
68    #[inverse = to_unary!(super::CastBytesToString)]
69    fn cast_string_to_bytes<'a>(a: &'a str) -> Result<Vec<u8>, EvalError> {
70        strconv::parse_bytes(a).err_into()
71    }
72);
73
74sqlfunc!(
75    #[sqlname = "text_to_smallint"]
76    #[preserves_uniqueness = false]
77    #[inverse = to_unary!(super::CastInt16ToString)]
78    fn cast_string_to_int16<'a>(a: &'a str) -> Result<i16, EvalError> {
79        strconv::parse_int16(a).err_into()
80    }
81);
82
83sqlfunc!(
84    #[sqlname = "text_to_integer"]
85    #[preserves_uniqueness = false]
86    #[inverse = to_unary!(super::CastInt32ToString)]
87    fn cast_string_to_int32<'a>(a: &'a str) -> Result<i32, EvalError> {
88        strconv::parse_int32(a).err_into()
89    }
90);
91
92sqlfunc!(
93    #[sqlname = "text_to_bigint"]
94    #[preserves_uniqueness = false]
95    #[inverse = to_unary!(super::CastInt64ToString)]
96    fn cast_string_to_int64<'a>(a: &'a str) -> Result<i64, EvalError> {
97        strconv::parse_int64(a).err_into()
98    }
99);
100
101sqlfunc!(
102    #[sqlname = "text_to_real"]
103    #[preserves_uniqueness = false]
104    #[inverse = to_unary!(super::CastFloat32ToString)]
105    fn cast_string_to_float32<'a>(a: &'a str) -> Result<f32, EvalError> {
106        strconv::parse_float32(a).err_into()
107    }
108);
109
110sqlfunc!(
111    #[sqlname = "text_to_double"]
112    #[preserves_uniqueness = false]
113    #[inverse = to_unary!(super::CastFloat64ToString)]
114    fn cast_string_to_float64<'a>(a: &'a str) -> Result<f64, EvalError> {
115        strconv::parse_float64(a).err_into()
116    }
117);
118
119sqlfunc!(
120    #[sqlname = "text_to_oid"]
121    #[preserves_uniqueness = false]
122    #[inverse = to_unary!(super::CastOidToString)]
123    fn cast_string_to_oid<'a>(a: &'a str) -> Result<Oid, EvalError> {
124        Ok(Oid(strconv::parse_oid(a)?))
125    }
126);
127
128sqlfunc!(
129    #[sqlname = "text_to_uint2"]
130    #[preserves_uniqueness = false]
131    #[inverse = to_unary!(super::CastUint16ToString)]
132    fn cast_string_to_uint16(a: &'a str) -> Result<u16, EvalError> {
133        strconv::parse_uint16(a).err_into()
134    }
135);
136
137sqlfunc!(
138    #[sqlname = "text_to_uint4"]
139    #[preserves_uniqueness = false]
140    #[inverse = to_unary!(super::CastUint32ToString)]
141    fn cast_string_to_uint32(a: &'a str) -> Result<u32, EvalError> {
142        strconv::parse_uint32(a).err_into()
143    }
144);
145
146sqlfunc!(
147    #[sqlname = "text_to_uint8"]
148    #[preserves_uniqueness = false]
149    #[inverse = to_unary!(super::CastUint64ToString)]
150    fn cast_string_to_uint64(a: &'a str) -> Result<u64, EvalError> {
151        strconv::parse_uint64(a).err_into()
152    }
153);
154
155sqlfunc!(
156    #[sqlname = "reverse"]
157    fn reverse<'a>(a: &'a str) -> String {
158        a.chars().rev().collect()
159    }
160);
161
162#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
163pub struct CastStringToNumeric(pub Option<NumericMaxScale>);
164
165impl<'a> EagerUnaryFunc<'a> for CastStringToNumeric {
166    type Input = &'a str;
167    type Output = Result<Numeric, EvalError>;
168
169    fn call(&self, a: &'a str) -> Result<Numeric, EvalError> {
170        let mut d = strconv::parse_numeric(a)?;
171        if let Some(scale) = self.0 {
172            if numeric::rescale(&mut d.0, scale.into_u8()).is_err() {
173                return Err(EvalError::NumericFieldOverflow);
174            }
175        }
176        Ok(d.into_inner())
177    }
178
179    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
180        SqlScalarType::Numeric { max_scale: self.0 }.nullable(input.nullable)
181    }
182
183    fn inverse(&self) -> Option<crate::UnaryFunc> {
184        to_unary!(super::CastNumericToString)
185    }
186}
187
188impl fmt::Display for CastStringToNumeric {
189    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
190        f.write_str("text_to_numeric")
191    }
192}
193
194sqlfunc!(
195    #[sqlname = "text_to_date"]
196    #[preserves_uniqueness = false]
197    #[inverse = to_unary!(super::CastDateToString)]
198    fn cast_string_to_date<'a>(a: &'a str) -> Result<Date, EvalError> {
199        strconv::parse_date(a).err_into()
200    }
201);
202
203sqlfunc!(
204    #[sqlname = "text_to_time"]
205    #[preserves_uniqueness = false]
206    #[inverse = to_unary!(super::CastTimeToString)]
207    fn cast_string_to_time<'a>(a: &'a str) -> Result<NaiveTime, EvalError> {
208        strconv::parse_time(a).err_into()
209    }
210);
211
212#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
213pub struct CastStringToTimestamp(pub Option<TimestampPrecision>);
214
215impl<'a> EagerUnaryFunc<'a> for CastStringToTimestamp {
216    type Input = &'a str;
217    type Output = Result<CheckedTimestamp<NaiveDateTime>, EvalError>;
218
219    fn call(&self, a: &'a str) -> Result<CheckedTimestamp<NaiveDateTime>, EvalError> {
220        let out = strconv::parse_timestamp(a)?;
221        let updated = out.round_to_precision(self.0)?;
222        Ok(updated)
223    }
224
225    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
226        SqlScalarType::Timestamp { precision: self.0 }.nullable(input.nullable)
227    }
228
229    fn inverse(&self) -> Option<crate::UnaryFunc> {
230        to_unary!(super::CastTimestampToString)
231    }
232}
233
234impl fmt::Display for CastStringToTimestamp {
235    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
236        f.write_str("text_to_timestamp")
237    }
238}
239
240sqlfunc!(
241    #[sqlname = "try_parse_monotonic_iso8601_timestamp"]
242    // TODO: Pretty sure this preserves uniqueness, but not 100%.
243    //
244    // Ironically, even though this has "monotonic" in the name, it's not quite
245    // eligible for `#[is_monotone = true]` because any input could also be
246    // mapped to null. So, handle it via SpecialUnary in the interpreter.
247    fn try_parse_monotonic_iso8601_timestamp<'a>(
248        a: &'a str,
249    ) -> Option<CheckedTimestamp<NaiveDateTime>> {
250        let ts = mz_persist_types::timestamp::try_parse_monotonic_iso8601_timestamp(a)?;
251        let ts = CheckedTimestamp::from_timestamplike(ts)
252            .expect("monotonic_iso8601 range is a subset of CheckedTimestamp domain");
253        Some(ts)
254    }
255);
256
257#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
258pub struct CastStringToTimestampTz(pub Option<TimestampPrecision>);
259
260impl<'a> EagerUnaryFunc<'a> for CastStringToTimestampTz {
261    type Input = &'a str;
262    type Output = Result<CheckedTimestamp<DateTime<Utc>>, EvalError>;
263
264    fn call(&self, a: &'a str) -> Result<CheckedTimestamp<DateTime<Utc>>, EvalError> {
265        let out = strconv::parse_timestamptz(a)?;
266        let updated = out.round_to_precision(self.0)?;
267        Ok(updated)
268    }
269
270    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
271        SqlScalarType::TimestampTz { precision: self.0 }.nullable(input.nullable)
272    }
273
274    fn inverse(&self) -> Option<crate::UnaryFunc> {
275        to_unary!(super::CastTimestampTzToString)
276    }
277}
278
279impl fmt::Display for CastStringToTimestampTz {
280    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
281        f.write_str("text_to_timestamp_with_time_zone")
282    }
283}
284
285sqlfunc!(
286    #[sqlname = "text_to_interval"]
287    #[preserves_uniqueness = false]
288    #[inverse = to_unary!(super::CastIntervalToString)]
289    fn cast_string_to_interval<'a>(a: &'a str) -> Result<Interval, EvalError> {
290        strconv::parse_interval(a).err_into()
291    }
292);
293
294sqlfunc!(
295    #[sqlname = "text_to_uuid"]
296    #[preserves_uniqueness = false]
297    #[inverse = to_unary!(super::CastUuidToString)]
298    fn cast_string_to_uuid<'a>(a: &'a str) -> Result<Uuid, EvalError> {
299        strconv::parse_uuid(a).err_into()
300    }
301);
302
303#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
304pub struct CastStringToArray {
305    // Target array's type.
306    pub return_ty: SqlScalarType,
307    // The expression to cast the discovered array elements to the array's
308    // element type.
309    pub cast_expr: Box<MirScalarExpr>,
310}
311
312impl LazyUnaryFunc for CastStringToArray {
313    fn eval<'a>(
314        &'a self,
315        datums: &[Datum<'a>],
316        temp_storage: &'a RowArena,
317        a: &'a MirScalarExpr,
318    ) -> Result<Datum<'a>, EvalError> {
319        let a = a.eval(datums, temp_storage)?;
320        if a.is_null() {
321            return Ok(Datum::Null);
322        }
323        let (datums, dims) = strconv::parse_array(
324            a.unwrap_str(),
325            || Datum::Null,
326            |elem_text| {
327                let elem_text = match elem_text {
328                    Cow::Owned(s) => temp_storage.push_string(s),
329                    Cow::Borrowed(s) => s,
330                };
331                self.cast_expr
332                    .eval(&[Datum::String(elem_text)], temp_storage)
333            },
334        )?;
335
336        Ok(temp_storage.try_make_datum(|packer| packer.try_push_array(&dims, datums))?)
337    }
338
339    /// The output SqlColumnType of this function
340    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
341        self.return_ty.clone().nullable(input_type.nullable)
342    }
343
344    /// Whether this function will produce NULL on NULL input
345    fn propagates_nulls(&self) -> bool {
346        true
347    }
348
349    /// Whether this function will produce NULL on non-NULL input
350    fn introduces_nulls(&self) -> bool {
351        false
352    }
353
354    /// Whether this function preserves uniqueness
355    fn preserves_uniqueness(&self) -> bool {
356        false
357    }
358
359    fn inverse(&self) -> Option<crate::UnaryFunc> {
360        to_unary!(super::CastArrayToString {
361            ty: self.return_ty.clone(),
362        })
363    }
364
365    fn is_monotone(&self) -> bool {
366        false
367    }
368}
369
370impl fmt::Display for CastStringToArray {
371    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
372        f.write_str("strtoarray")
373    }
374}
375
376#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
377pub struct CastStringToList {
378    // Target list's type
379    pub return_ty: SqlScalarType,
380    // The expression to cast the discovered list elements to the list's
381    // element type.
382    pub cast_expr: Box<MirScalarExpr>,
383}
384
385impl LazyUnaryFunc for CastStringToList {
386    fn eval<'a>(
387        &'a self,
388        datums: &[Datum<'a>],
389        temp_storage: &'a RowArena,
390        a: &'a MirScalarExpr,
391    ) -> Result<Datum<'a>, EvalError> {
392        let a = a.eval(datums, temp_storage)?;
393        if a.is_null() {
394            return Ok(Datum::Null);
395        }
396        let parsed_datums = strconv::parse_list(
397            a.unwrap_str(),
398            matches!(
399                self.return_ty.unwrap_list_element_type(),
400                SqlScalarType::List { .. }
401            ),
402            || Datum::Null,
403            |elem_text| {
404                let elem_text = match elem_text {
405                    Cow::Owned(s) => temp_storage.push_string(s),
406                    Cow::Borrowed(s) => s,
407                };
408                self.cast_expr
409                    .eval(&[Datum::String(elem_text)], temp_storage)
410            },
411        )?;
412
413        Ok(temp_storage.make_datum(|packer| packer.push_list(parsed_datums)))
414    }
415
416    /// The output SqlColumnType of this function
417    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
418        self.return_ty
419            .without_modifiers()
420            .nullable(input_type.nullable)
421    }
422
423    /// Whether this function will produce NULL on NULL input
424    fn propagates_nulls(&self) -> bool {
425        true
426    }
427
428    /// Whether this function will produce NULL on non-NULL input
429    fn introduces_nulls(&self) -> bool {
430        false
431    }
432
433    /// Whether this function preserves uniqueness
434    fn preserves_uniqueness(&self) -> bool {
435        false
436    }
437
438    fn inverse(&self) -> Option<crate::UnaryFunc> {
439        to_unary!(super::CastListToString {
440            ty: self.return_ty.clone(),
441        })
442    }
443
444    fn is_monotone(&self) -> bool {
445        false
446    }
447}
448
449impl fmt::Display for CastStringToList {
450    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
451        f.write_str("strtolist")
452    }
453}
454
455#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
456pub struct CastStringToMap {
457    // Target map's value type
458    pub return_ty: SqlScalarType,
459    // The expression used to cast the discovered values to the map's value
460    // type.
461    pub cast_expr: Box<MirScalarExpr>,
462}
463
464impl LazyUnaryFunc for CastStringToMap {
465    fn eval<'a>(
466        &'a self,
467        datums: &[Datum<'a>],
468        temp_storage: &'a RowArena,
469        a: &'a MirScalarExpr,
470    ) -> Result<Datum<'a>, EvalError> {
471        let a = a.eval(datums, temp_storage)?;
472        if a.is_null() {
473            return Ok(Datum::Null);
474        }
475        let parsed_map = strconv::parse_map(
476            a.unwrap_str(),
477            matches!(
478                self.return_ty.unwrap_map_value_type(),
479                SqlScalarType::Map { .. }
480            ),
481            |value_text| -> Result<Datum, EvalError> {
482                let value_text = match value_text {
483                    Some(Cow::Owned(s)) => Datum::String(temp_storage.push_string(s)),
484                    Some(Cow::Borrowed(s)) => Datum::String(s),
485                    None => Datum::Null,
486                };
487                self.cast_expr.eval(&[value_text], temp_storage)
488            },
489        )?;
490        let mut pairs: Vec<(String, Datum)> = parsed_map.into_iter().map(|(k, v)| (k, v)).collect();
491        pairs.sort_by(|(k1, _v1), (k2, _v2)| k1.cmp(k2));
492        pairs.dedup_by(|(k1, _v1), (k2, _v2)| k1 == k2);
493        Ok(temp_storage.make_datum(|packer| {
494            packer.push_dict_with(|packer| {
495                for (k, v) in pairs {
496                    packer.push(Datum::String(&k));
497                    packer.push(v);
498                }
499            })
500        }))
501    }
502
503    /// The output SqlColumnType of this function
504    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
505        self.return_ty.clone().nullable(input_type.nullable)
506    }
507
508    /// Whether this function will produce NULL on NULL input
509    fn propagates_nulls(&self) -> bool {
510        true
511    }
512
513    /// Whether this function will produce NULL on non-NULL input
514    fn introduces_nulls(&self) -> bool {
515        false
516    }
517
518    /// Whether this function preserves uniqueness
519    fn preserves_uniqueness(&self) -> bool {
520        false
521    }
522
523    fn inverse(&self) -> Option<crate::UnaryFunc> {
524        to_unary!(super::CastMapToString {
525            ty: self.return_ty.clone(),
526        })
527    }
528
529    fn is_monotone(&self) -> bool {
530        false
531    }
532}
533
534impl fmt::Display for CastStringToMap {
535    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
536        f.write_str("strtomap")
537    }
538}
539
540#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
541pub struct CastStringToChar {
542    pub length: Option<mz_repr::adt::char::CharLength>,
543    pub fail_on_len: bool,
544}
545
546impl<'a> EagerUnaryFunc<'a> for CastStringToChar {
547    type Input = &'a str;
548    type Output = Result<Char<String>, EvalError>;
549
550    fn call(&self, a: &'a str) -> Result<Char<String>, EvalError> {
551        let s = format_str_trim(a, self.length, self.fail_on_len).map_err(|_| {
552            assert!(self.fail_on_len);
553            EvalError::StringValueTooLong {
554                target_type: "character".into(),
555                length: usize::cast_from(self.length.unwrap().into_u32()),
556            }
557        })?;
558
559        Ok(Char(s))
560    }
561
562    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
563        SqlScalarType::Char {
564            length: self.length,
565        }
566        .nullable(input.nullable)
567    }
568
569    fn could_error(&self) -> bool {
570        self.fail_on_len && self.length.is_some()
571    }
572
573    fn inverse(&self) -> Option<crate::UnaryFunc> {
574        to_unary!(super::CastCharToString)
575    }
576}
577
578impl fmt::Display for CastStringToChar {
579    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
580        match self.length {
581            Some(length) => {
582                write!(
583                    f,
584                    "text_to_char[len={}, fail_on_len={}]",
585                    length.into_u32(),
586                    self.fail_on_len
587                )
588            }
589            None => f.write_str("text_to_char[len=unbounded]"),
590        }
591    }
592}
593
594#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
595pub struct CastStringToRange {
596    // Target range's type
597    pub return_ty: SqlScalarType,
598    // The expression to cast the discovered range elements to the range's
599    // element type.
600    pub cast_expr: Box<MirScalarExpr>,
601}
602
603impl LazyUnaryFunc for CastStringToRange {
604    fn eval<'a>(
605        &'a self,
606        datums: &[Datum<'a>],
607        temp_storage: &'a RowArena,
608        a: &'a MirScalarExpr,
609    ) -> Result<Datum<'a>, EvalError> {
610        let a = a.eval(datums, temp_storage)?;
611        if a.is_null() {
612            return Ok(Datum::Null);
613        }
614        let mut range = strconv::parse_range(a.unwrap_str(), |elem_text| {
615            let elem_text = match elem_text {
616                Cow::Owned(s) => temp_storage.push_string(s),
617                Cow::Borrowed(s) => s,
618            };
619            self.cast_expr
620                .eval(&[Datum::String(elem_text)], temp_storage)
621        })?;
622
623        range.canonicalize()?;
624
625        Ok(temp_storage.make_datum(|packer| {
626            packer
627                .push_range(range)
628                .expect("must have already handled errors")
629        }))
630    }
631
632    /// The output SqlColumnType of this function
633    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
634        self.return_ty
635            .without_modifiers()
636            .nullable(input_type.nullable)
637    }
638
639    /// Whether this function will produce NULL on NULL input
640    fn propagates_nulls(&self) -> bool {
641        true
642    }
643
644    /// Whether this function will produce NULL on non-NULL input
645    fn introduces_nulls(&self) -> bool {
646        false
647    }
648
649    /// Whether this function preserves uniqueness
650    fn preserves_uniqueness(&self) -> bool {
651        false
652    }
653
654    fn inverse(&self) -> Option<crate::UnaryFunc> {
655        to_unary!(super::CastRangeToString {
656            ty: self.return_ty.clone(),
657        })
658    }
659
660    fn is_monotone(&self) -> bool {
661        false
662    }
663}
664
665impl fmt::Display for CastStringToRange {
666    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
667        f.write_str("strtorange")
668    }
669}
670
671#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
672pub struct CastStringToVarChar {
673    pub length: Option<VarCharMaxLength>,
674    pub fail_on_len: bool,
675}
676
677impl<'a> EagerUnaryFunc<'a> for CastStringToVarChar {
678    type Input = &'a str;
679    type Output = Result<VarChar<&'a str>, EvalError>;
680
681    fn call(&self, a: &'a str) -> Result<VarChar<&'a str>, EvalError> {
682        let s =
683            mz_repr::adt::varchar::format_str(a, self.length, self.fail_on_len).map_err(|_| {
684                assert!(self.fail_on_len);
685                EvalError::StringValueTooLong {
686                    target_type: "character varying".into(),
687                    length: usize::cast_from(self.length.unwrap().into_u32()),
688                }
689            })?;
690
691        Ok(VarChar(s))
692    }
693
694    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
695        SqlScalarType::VarChar {
696            max_length: self.length,
697        }
698        .nullable(input.nullable)
699    }
700
701    fn could_error(&self) -> bool {
702        self.fail_on_len && self.length.is_some()
703    }
704
705    fn preserves_uniqueness(&self) -> bool {
706        !self.fail_on_len || self.length.is_none()
707    }
708
709    fn inverse(&self) -> Option<crate::UnaryFunc> {
710        to_unary!(super::CastVarCharToString)
711    }
712}
713
714impl fmt::Display for CastStringToVarChar {
715    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
716        match self.length {
717            Some(length) => {
718                write!(
719                    f,
720                    "text_to_varchar[len={}, fail_on_len={}]",
721                    length.into_u32(),
722                    self.fail_on_len
723                )
724            }
725            None => f.write_str("text_to_varchar[len=unbounded]"),
726        }
727    }
728}
729
730// If we support another vector type, this should likely get hoisted into a
731// position akin to array parsing.
732static INT2VECTOR_CAST_EXPR: LazyLock<MirScalarExpr> = LazyLock::new(|| MirScalarExpr::CallUnary {
733    func: UnaryFunc::CastStringToInt16(CastStringToInt16),
734    expr: Box::new(MirScalarExpr::column(0)),
735});
736
737#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
738pub struct CastStringToInt2Vector;
739
740impl LazyUnaryFunc for CastStringToInt2Vector {
741    fn eval<'a>(
742        &'a self,
743        datums: &[Datum<'a>],
744        temp_storage: &'a RowArena,
745        a: &'a MirScalarExpr,
746    ) -> Result<Datum<'a>, EvalError> {
747        let a = a.eval(datums, temp_storage)?;
748        if a.is_null() {
749            return Ok(Datum::Null);
750        }
751
752        let datums = strconv::parse_legacy_vector(a.unwrap_str(), |elem_text| {
753            let elem_text = match elem_text {
754                Cow::Owned(s) => temp_storage.push_string(s),
755                Cow::Borrowed(s) => s,
756            };
757            INT2VECTOR_CAST_EXPR.eval(&[Datum::String(elem_text)], temp_storage)
758        })?;
759        array_create_scalar(&datums, temp_storage)
760    }
761
762    /// The output SqlColumnType of this function
763    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
764        SqlScalarType::Int2Vector.nullable(input_type.nullable)
765    }
766
767    /// Whether this function will produce NULL on NULL input
768    fn propagates_nulls(&self) -> bool {
769        true
770    }
771
772    /// Whether this function will produce NULL on non-NULL input
773    fn introduces_nulls(&self) -> bool {
774        false
775    }
776
777    /// Whether this function preserves uniqueness
778    fn preserves_uniqueness(&self) -> bool {
779        false
780    }
781
782    fn inverse(&self) -> Option<crate::UnaryFunc> {
783        to_unary!(super::CastInt2VectorToString)
784    }
785
786    fn is_monotone(&self) -> bool {
787        false
788    }
789}
790
791impl fmt::Display for CastStringToInt2Vector {
792    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
793        f.write_str("strtoint2vector")
794    }
795}
796
797sqlfunc!(
798    #[sqlname = "text_to_jsonb"]
799    #[preserves_uniqueness = false]
800    #[inverse = to_unary!(super::CastJsonbToString)]
801    // TODO(jamii): it would be much more efficient to skip the intermediate repr::jsonb::Jsonb.
802    fn cast_string_to_jsonb<'a>(a: &'a str) -> Result<Jsonb, EvalError> {
803        Ok(strconv::parse_jsonb(a)?)
804    }
805);
806
807sqlfunc!(
808    #[sqlname = "btrim"]
809    fn trim_whitespace<'a>(a: &'a str) -> &'a str {
810        a.trim_matches(' ')
811    }
812);
813
814sqlfunc!(
815    #[sqlname = "ltrim"]
816    fn trim_leading_whitespace<'a>(a: &'a str) -> &'a str {
817        a.trim_start_matches(' ')
818    }
819);
820
821sqlfunc!(
822    #[sqlname = "rtrim"]
823    fn trim_trailing_whitespace<'a>(a: &'a str) -> &'a str {
824        a.trim_end_matches(' ')
825    }
826);
827
828sqlfunc!(
829    #[sqlname = "initcap"]
830    fn initcap<'a>(a: &'a str) -> String {
831        let mut out = String::new();
832        let mut capitalize_next = true;
833        for ch in a.chars() {
834            if capitalize_next {
835                out.extend(ch.to_uppercase())
836            } else {
837                out.extend(ch.to_lowercase())
838            };
839            capitalize_next = !ch.is_alphanumeric();
840        }
841        out
842    }
843);
844
845sqlfunc!(
846    #[sqlname = "ascii"]
847    fn ascii<'a>(a: &'a str) -> i32 {
848        a.chars()
849            .next()
850            .and_then(|c| i32::try_from(u32::from(c)).ok())
851            .unwrap_or(0)
852    }
853);
854
855sqlfunc!(
856    #[sqlname = "char_length"]
857    fn char_length<'a>(a: &'a str) -> Result<i32, EvalError> {
858        let length = a.chars().count();
859        i32::try_from(length)
860            .or_else(|_| Err(EvalError::Int32OutOfRange(length.to_string().into())))
861    }
862);
863
864sqlfunc!(
865    #[sqlname = "bit_length"]
866    fn bit_length_string<'a>(a: &'a str) -> Result<i32, EvalError> {
867        let length = a.as_bytes().len() * 8;
868        i32::try_from(length)
869            .or_else(|_| Err(EvalError::Int32OutOfRange(length.to_string().into())))
870    }
871);
872
873sqlfunc!(
874    #[sqlname = "octet_length"]
875    fn byte_length_string<'a>(a: &'a str) -> Result<i32, EvalError> {
876        let length = a.as_bytes().len();
877        i32::try_from(length)
878            .or_else(|_| Err(EvalError::Int32OutOfRange(length.to_string().into())))
879    }
880);
881
882sqlfunc!(
883    fn upper<'a>(a: &'a str) -> String {
884        a.to_uppercase()
885    }
886);
887
888sqlfunc!(
889    fn lower<'a>(a: &'a str) -> String {
890        a.to_lowercase()
891    }
892);
893
894pub fn normalize_with_form<'a>(
895    text: Datum<'a>,
896    form_str: Datum<'a>,
897    temp_storage: &'a RowArena,
898) -> Result<Datum<'a>, EvalError> {
899    use unicode_normalization::UnicodeNormalization;
900
901    let text = text.unwrap_str();
902    let form_str = form_str.unwrap_str();
903
904    let normalized = match form_str.to_uppercase().as_str() {
905        "NFC" => text.nfc().collect(),
906        "NFD" => text.nfd().collect(),
907        "NFKC" => text.nfkc().collect(),
908        "NFKD" => text.nfkd().collect(),
909        _ => {
910            return Err(EvalError::InvalidParameterValue(
911                format!("invalid normalization form: {}", form_str).into(),
912            ));
913        }
914    };
915
916    Ok(Datum::String(temp_storage.push_string(normalized)))
917}
918
919#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
920pub struct IsLikeMatch(pub like_pattern::Matcher);
921
922impl<'a> EagerUnaryFunc<'a> for IsLikeMatch {
923    type Input = &'a str;
924    type Output = bool;
925
926    fn call(&self, haystack: &'a str) -> bool {
927        self.0.is_match(haystack)
928    }
929
930    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
931        SqlScalarType::Bool.nullable(input.nullable)
932    }
933}
934
935impl fmt::Display for IsLikeMatch {
936    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
937        write!(
938            f,
939            "{}like[{}]",
940            if self.0.case_insensitive { "i" } else { "" },
941            self.0.pattern.escaped()
942        )
943    }
944}
945
946#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
947pub struct IsRegexpMatch(pub Regex);
948
949impl<'a> EagerUnaryFunc<'a> for IsRegexpMatch {
950    type Input = &'a str;
951    type Output = bool;
952
953    fn call(&self, haystack: &'a str) -> bool {
954        self.0.is_match(haystack)
955    }
956
957    fn output_type(&self, input: SqlColumnType) -> SqlColumnType {
958        SqlScalarType::Bool.nullable(input.nullable)
959    }
960}
961
962impl fmt::Display for IsRegexpMatch {
963    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
964        write!(
965            f,
966            "is_regexp_match[{}, case_insensitive={}]",
967            self.0.pattern().escaped(),
968            self.0.case_insensitive
969        )
970    }
971}
972
973#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
974pub struct RegexpMatch(pub Regex);
975
976impl LazyUnaryFunc for RegexpMatch {
977    fn eval<'a>(
978        &'a self,
979        datums: &[Datum<'a>],
980        temp_storage: &'a RowArena,
981        a: &'a MirScalarExpr,
982    ) -> Result<Datum<'a>, EvalError> {
983        let haystack = a.eval(datums, temp_storage)?;
984        if haystack.is_null() {
985            return Ok(Datum::Null);
986        }
987        regexp_match_static(haystack, temp_storage, &self.0)
988    }
989
990    /// The output SqlColumnType of this function
991    fn output_type(&self, _input_type: SqlColumnType) -> SqlColumnType {
992        SqlScalarType::Array(Box::new(SqlScalarType::String)).nullable(true)
993    }
994
995    /// Whether this function will produce NULL on NULL input
996    fn propagates_nulls(&self) -> bool {
997        true
998    }
999
1000    /// Whether this function will produce NULL on non-NULL input
1001    fn introduces_nulls(&self) -> bool {
1002        // Returns null if the regex did not match
1003        true
1004    }
1005
1006    /// Whether this function preserves uniqueness
1007    fn preserves_uniqueness(&self) -> bool {
1008        false
1009    }
1010
1011    fn inverse(&self) -> Option<crate::UnaryFunc> {
1012        None
1013    }
1014
1015    fn is_monotone(&self) -> bool {
1016        false
1017    }
1018}
1019
1020impl fmt::Display for RegexpMatch {
1021    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1022        write!(
1023            f,
1024            "regexp_match[{}, case_insensitive={}]",
1025            self.0.pattern().escaped(),
1026            self.0.case_insensitive
1027        )
1028    }
1029}
1030
1031#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
1032pub struct RegexpSplitToArray(pub Regex);
1033
1034impl LazyUnaryFunc for RegexpSplitToArray {
1035    fn eval<'a>(
1036        &'a self,
1037        datums: &[Datum<'a>],
1038        temp_storage: &'a RowArena,
1039        a: &'a MirScalarExpr,
1040    ) -> Result<Datum<'a>, EvalError> {
1041        let haystack = a.eval(datums, temp_storage)?;
1042        if haystack.is_null() {
1043            return Ok(Datum::Null);
1044        }
1045        regexp_split_to_array_re(haystack.unwrap_str(), &self.0, temp_storage)
1046    }
1047
1048    /// The output SqlColumnType of this function
1049    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
1050        SqlScalarType::Array(Box::new(SqlScalarType::String)).nullable(input_type.nullable)
1051    }
1052
1053    /// Whether this function will produce NULL on NULL input
1054    fn propagates_nulls(&self) -> bool {
1055        true
1056    }
1057
1058    /// Whether this function will produce NULL on non-NULL input
1059    fn introduces_nulls(&self) -> bool {
1060        false
1061    }
1062
1063    /// Whether this function preserves uniqueness
1064    fn preserves_uniqueness(&self) -> bool {
1065        false
1066    }
1067
1068    fn inverse(&self) -> Option<crate::UnaryFunc> {
1069        None
1070    }
1071
1072    fn is_monotone(&self) -> bool {
1073        false
1074    }
1075}
1076
1077impl fmt::Display for RegexpSplitToArray {
1078    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1079        write!(
1080            f,
1081            "regexp_split_to_array[{}, case_insensitive={}]",
1082            self.0.pattern().escaped(),
1083            self.0.case_insensitive
1084        )
1085    }
1086}
1087
1088sqlfunc!(
1089    #[sqlname = "mz_panic"]
1090    fn panic<'a>(a: &'a str) -> String {
1091        print!("{}", a);
1092        panic!("{}", a)
1093    }
1094);
1095
1096#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
1097pub struct QuoteIdent;
1098
1099impl LazyUnaryFunc for QuoteIdent {
1100    fn eval<'a>(
1101        &'a self,
1102        datums: &[Datum<'a>],
1103        temp_storage: &'a RowArena,
1104        a: &'a MirScalarExpr,
1105    ) -> Result<Datum<'a>, EvalError> {
1106        let d = a.eval(datums, temp_storage)?;
1107        if d.is_null() {
1108            return Ok(Datum::Null);
1109        }
1110        let v = d.unwrap_str();
1111        let i = mz_sql_parser::ast::Ident::new(v).map_err(|err| EvalError::InvalidIdentifier {
1112            ident: v.into(),
1113            detail: Some(err.to_string().into()),
1114        })?;
1115        let r = temp_storage.push_string(i.to_string());
1116
1117        Ok(Datum::String(r))
1118    }
1119
1120    /// The output SqlColumnType of this function
1121    fn output_type(&self, input_type: SqlColumnType) -> SqlColumnType {
1122        SqlScalarType::String.nullable(input_type.nullable)
1123    }
1124
1125    /// Whether this function will produce NULL on NULL input
1126    fn propagates_nulls(&self) -> bool {
1127        true
1128    }
1129
1130    /// Whether this function will produce NULL on non-NULL input
1131    fn introduces_nulls(&self) -> bool {
1132        false
1133    }
1134
1135    /// Whether this function preserves uniqueness
1136    fn preserves_uniqueness(&self) -> bool {
1137        true
1138    }
1139
1140    fn inverse(&self) -> Option<crate::UnaryFunc> {
1141        None
1142    }
1143
1144    fn is_monotone(&self) -> bool {
1145        false
1146    }
1147}
1148
1149impl fmt::Display for QuoteIdent {
1150    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1151        write!(f, "quote_ident")
1152    }
1153}