Skip to main content

mz_expr/scalar/func/impls/
string.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::borrow::Cow;
11use std::fmt;
12use std::sync::LazyLock;
13
14use chrono::{DateTime, NaiveDateTime, NaiveTime, Utc};
15use mz_expr_derive::sqlfunc;
16use mz_lowertest::MzReflect;
17use mz_ore::cast::CastFrom;
18use mz_ore::result::ResultExt;
19use mz_ore::str::StrExt;
20use mz_repr::adt::char::{Char, format_str_trim};
21use mz_repr::adt::date::Date;
22use mz_repr::adt::interval::Interval;
23use mz_repr::adt::jsonb::Jsonb;
24use mz_repr::adt::numeric::{self, Numeric, NumericMaxScale};
25use mz_repr::adt::pg_legacy_name::PgLegacyName;
26use mz_repr::adt::regex::Regex;
27use mz_repr::adt::system::{Oid, PgLegacyChar};
28use mz_repr::adt::timestamp::{CheckedTimestamp, TimestampPrecision};
29use mz_repr::adt::varchar::{VarChar, VarCharMaxLength};
30use mz_repr::{Datum, RowArena, SqlColumnType, SqlScalarType, strconv};
31use serde::{Deserialize, Serialize};
32use uuid::Uuid;
33
34use crate::func::{binary, regexp_match_static};
35use crate::scalar::func::{
36    EagerUnaryFunc, LazyUnaryFunc, array_create_scalar, regexp_split_to_array_re,
37};
38use crate::{EvalError, MirScalarExpr, UnaryFunc, like_pattern};
39
40#[sqlfunc(
41    sqlname = "text_to_boolean",
42    preserves_uniqueness = false,
43    inverse = to_unary!(super::CastBoolToString)
44)]
45fn cast_string_to_bool<'a>(a: &'a str) -> Result<bool, EvalError> {
46    strconv::parse_bool(a).err_into()
47}
48
49#[sqlfunc(
50    sqlname = "text_to_\"char\"",
51    // Not injective: only the first byte is kept (e.g. 'a' and 'abc' both
52    // collapse to 'a'::"char"), so inverse-cast canonicalization of
53    // `c::text = lit` would silently change results.
54    preserves_uniqueness = false,
55    inverse = to_unary!(super::CastPgLegacyCharToString)
56)]
57fn cast_string_to_pg_legacy_char<'a>(a: &'a str) -> PgLegacyChar {
58    PgLegacyChar(a.as_bytes().get(0).copied().unwrap_or(0))
59}
60
61#[sqlfunc(sqlname = "text_to_name", preserves_uniqueness = false)]
62fn cast_string_to_pg_legacy_name<'a>(a: &'a str) -> PgLegacyName<String> {
63    PgLegacyName(strconv::parse_pg_legacy_name(a))
64}
65
66#[sqlfunc(
67    sqlname = "text_to_bytea",
68    // Not injective: `parse_bytes` accepts both hex (`\x..`) and the
69    // traditional textual encoding for the same bytes, so distinct text
70    // literals can map to the same bytea. Inverse-cast canonicalization of
71    // `b::text = lit` would otherwise rewrite to a comparison that ignores
72    // the actual textual form.
73    preserves_uniqueness = false,
74    inverse = to_unary!(super::CastBytesToString)
75)]
76fn cast_string_to_bytes<'a>(a: &'a str) -> Result<Vec<u8>, EvalError> {
77    strconv::parse_bytes(a).err_into()
78}
79
80#[sqlfunc(
81    sqlname = "text_to_smallint",
82    preserves_uniqueness = false,
83    inverse = to_unary!(super::CastInt16ToString)
84)]
85fn cast_string_to_int16<'a>(a: &'a str) -> Result<i16, EvalError> {
86    strconv::parse_int16(a).err_into()
87}
88
89#[sqlfunc(
90    sqlname = "text_to_integer",
91    preserves_uniqueness = false,
92    inverse = to_unary!(super::CastInt32ToString)
93)]
94fn cast_string_to_int32<'a>(a: &'a str) -> Result<i32, EvalError> {
95    strconv::parse_int32(a).err_into()
96}
97
98#[sqlfunc(
99    sqlname = "text_to_bigint",
100    preserves_uniqueness = false,
101    inverse = to_unary!(super::CastInt64ToString)
102)]
103fn cast_string_to_int64<'a>(a: &'a str) -> Result<i64, EvalError> {
104    strconv::parse_int64(a).err_into()
105}
106
107#[sqlfunc(
108    sqlname = "text_to_real",
109    preserves_uniqueness = false,
110    inverse = to_unary!(super::CastFloat32ToString)
111)]
112fn cast_string_to_float32<'a>(a: &'a str) -> Result<f32, EvalError> {
113    strconv::parse_float32(a).err_into()
114}
115
116#[sqlfunc(
117    sqlname = "text_to_double",
118    preserves_uniqueness = false,
119    inverse = to_unary!(super::CastFloat64ToString)
120)]
121fn cast_string_to_float64<'a>(a: &'a str) -> Result<f64, EvalError> {
122    strconv::parse_float64(a).err_into()
123}
124
125#[sqlfunc(
126    sqlname = "text_to_oid",
127    preserves_uniqueness = false,
128    inverse = to_unary!(super::CastOidToString)
129)]
130fn cast_string_to_oid<'a>(a: &'a str) -> Result<Oid, EvalError> {
131    Ok(Oid(strconv::parse_oid(a)?))
132}
133
134#[sqlfunc(
135    sqlname = "text_to_uint2",
136    preserves_uniqueness = false,
137    inverse = to_unary!(super::CastUint16ToString)
138)]
139fn cast_string_to_uint16(a: &str) -> Result<u16, EvalError> {
140    strconv::parse_uint16(a).err_into()
141}
142
143#[sqlfunc(
144    sqlname = "text_to_uint4",
145    preserves_uniqueness = false,
146    inverse = to_unary!(super::CastUint32ToString)
147)]
148fn cast_string_to_uint32(a: &str) -> Result<u32, EvalError> {
149    strconv::parse_uint32(a).err_into()
150}
151
152#[sqlfunc(
153    sqlname = "text_to_uint8",
154    preserves_uniqueness = false,
155    inverse = to_unary!(super::CastUint64ToString)
156)]
157fn cast_string_to_uint64(a: &str) -> Result<u64, EvalError> {
158    strconv::parse_uint64(a).err_into()
159}
160
161#[sqlfunc(sqlname = "reverse")]
162fn reverse<'a>(a: &'a str) -> String {
163    a.chars().rev().collect()
164}
165
166#[derive(
167    Ord,
168    PartialOrd,
169    Clone,
170    Debug,
171    Eq,
172    PartialEq,
173    Serialize,
174    Deserialize,
175    Hash,
176    MzReflect
177)]
178pub struct CastStringToNumeric(pub Option<NumericMaxScale>);
179
180impl EagerUnaryFunc for CastStringToNumeric {
181    type Input<'a> = &'a str;
182    type Output<'a> = Result<Numeric, EvalError>;
183
184    fn call<'a>(&self, a: Self::Input<'a>) -> Self::Output<'a> {
185        let mut d = strconv::parse_numeric(a)?;
186        if let Some(scale) = self.0 {
187            if numeric::rescale(&mut d.0, scale.into_u8()).is_err() {
188                return Err(EvalError::NumericFieldOverflow);
189            }
190        }
191        Ok(d.into_inner())
192    }
193
194    fn output_sql_type(&self, input: SqlColumnType) -> SqlColumnType {
195        SqlScalarType::Numeric { max_scale: self.0 }.nullable(input.nullable)
196    }
197
198    fn inverse(&self) -> Option<crate::UnaryFunc> {
199        to_unary!(super::CastNumericToString)
200    }
201}
202
203impl fmt::Display for CastStringToNumeric {
204    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
205        f.write_str("text_to_numeric")
206    }
207}
208
209#[sqlfunc(
210    sqlname = "text_to_date",
211    preserves_uniqueness = false,
212    inverse = to_unary!(super::CastDateToString)
213)]
214fn cast_string_to_date<'a>(a: &'a str) -> Result<Date, EvalError> {
215    strconv::parse_date(a).err_into()
216}
217
218#[sqlfunc(
219    sqlname = "text_to_time",
220    preserves_uniqueness = false,
221    inverse = to_unary!(super::CastTimeToString)
222)]
223fn cast_string_to_time<'a>(a: &'a str) -> Result<NaiveTime, EvalError> {
224    strconv::parse_time(a).err_into()
225}
226
227#[derive(
228    Ord,
229    PartialOrd,
230    Clone,
231    Debug,
232    Eq,
233    PartialEq,
234    Serialize,
235    Deserialize,
236    Hash,
237    MzReflect
238)]
239pub struct CastStringToTimestamp(pub Option<TimestampPrecision>);
240
241impl EagerUnaryFunc for CastStringToTimestamp {
242    type Input<'a> = &'a str;
243    type Output<'a> = Result<CheckedTimestamp<NaiveDateTime>, EvalError>;
244
245    fn call<'a>(&self, a: Self::Input<'a>) -> Self::Output<'a> {
246        let out = strconv::parse_timestamp(a)?;
247        let updated = out.round_to_precision(self.0)?;
248        Ok(updated)
249    }
250
251    fn output_sql_type(&self, input: SqlColumnType) -> SqlColumnType {
252        SqlScalarType::Timestamp { precision: self.0 }.nullable(input.nullable)
253    }
254
255    fn inverse(&self) -> Option<crate::UnaryFunc> {
256        to_unary!(super::CastTimestampToString)
257    }
258}
259
260impl fmt::Display for CastStringToTimestamp {
261    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
262        f.write_str("text_to_timestamp")
263    }
264}
265
266#[sqlfunc(sqlname = "try_parse_monotonic_iso8601_timestamp")]
267// TODO: Pretty sure this preserves uniqueness, but not 100%.
268//
269// Ironically, even though this has "monotonic" in the name, it's not quite
270// eligible for `#[is_monotone = true]` because any input could also be
271// mapped to null. So, handle it via SpecialUnary in the interpreter.
272fn try_parse_monotonic_iso8601_timestamp<'a>(
273    a: &'a str,
274) -> Option<CheckedTimestamp<NaiveDateTime>> {
275    let ts = mz_persist_types::timestamp::try_parse_monotonic_iso8601_timestamp(a)?;
276    let ts = CheckedTimestamp::from_timestamplike(ts)
277        .expect("monotonic_iso8601 range is a subset of CheckedTimestamp domain");
278    Some(ts)
279}
280
281#[derive(
282    Ord,
283    PartialOrd,
284    Clone,
285    Debug,
286    Eq,
287    PartialEq,
288    Serialize,
289    Deserialize,
290    Hash,
291    MzReflect
292)]
293pub struct CastStringToTimestampTz(pub Option<TimestampPrecision>);
294
295impl EagerUnaryFunc for CastStringToTimestampTz {
296    type Input<'a> = &'a str;
297    type Output<'a> = Result<CheckedTimestamp<DateTime<Utc>>, EvalError>;
298
299    fn call<'a>(&self, a: Self::Input<'a>) -> Self::Output<'a> {
300        let out = strconv::parse_timestamptz(a)?;
301        let updated = out.round_to_precision(self.0)?;
302        Ok(updated)
303    }
304
305    fn output_sql_type(&self, input: SqlColumnType) -> SqlColumnType {
306        SqlScalarType::TimestampTz { precision: self.0 }.nullable(input.nullable)
307    }
308
309    fn inverse(&self) -> Option<crate::UnaryFunc> {
310        to_unary!(super::CastTimestampTzToString)
311    }
312}
313
314impl fmt::Display for CastStringToTimestampTz {
315    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
316        f.write_str("text_to_timestamp_with_time_zone")
317    }
318}
319
320#[sqlfunc(
321    sqlname = "text_to_interval",
322    preserves_uniqueness = false,
323    inverse = to_unary!(super::CastIntervalToString)
324)]
325fn cast_string_to_interval<'a>(a: &'a str) -> Result<Interval, EvalError> {
326    strconv::parse_interval(a).err_into()
327}
328
329#[sqlfunc(
330    sqlname = "text_to_uuid",
331    preserves_uniqueness = false,
332    inverse = to_unary!(super::CastUuidToString)
333)]
334fn cast_string_to_uuid<'a>(a: &'a str) -> Result<Uuid, EvalError> {
335    strconv::parse_uuid(a).err_into()
336}
337
338#[derive(
339    Ord,
340    PartialOrd,
341    Clone,
342    Debug,
343    Eq,
344    PartialEq,
345    Serialize,
346    Deserialize,
347    Hash,
348    MzReflect
349)]
350pub struct CastStringToArray {
351    // Target array's type.
352    pub return_ty: SqlScalarType,
353    // The expression to cast the discovered array elements to the array's
354    // element type.
355    pub cast_expr: Box<MirScalarExpr>,
356}
357
358impl LazyUnaryFunc for CastStringToArray {
359    fn eval<'a>(
360        &'a self,
361        datums: &[Datum<'a>],
362        temp_storage: &'a RowArena,
363        a: &'a MirScalarExpr,
364    ) -> Result<Datum<'a>, EvalError> {
365        let a = a.eval(datums, temp_storage)?;
366        if a.is_null() {
367            return Ok(Datum::Null);
368        }
369        let (datums, dims) = strconv::parse_array(
370            a.unwrap_str(),
371            || Datum::Null,
372            |elem_text| {
373                let elem_text = match elem_text {
374                    Cow::Owned(s) => temp_storage.push_string(s),
375                    Cow::Borrowed(s) => s,
376                };
377                self.cast_expr
378                    .eval(&[Datum::String(elem_text)], temp_storage)
379            },
380        )?;
381
382        Ok(temp_storage.try_make_datum(|packer| packer.try_push_array(&dims, datums))?)
383    }
384
385    /// The output SqlColumnType of this function
386    fn output_sql_type(&self, input_type: SqlColumnType) -> SqlColumnType {
387        self.return_ty.clone().nullable(input_type.nullable)
388    }
389
390    /// Whether this function will produce NULL on NULL input
391    fn propagates_nulls(&self) -> bool {
392        true
393    }
394
395    /// Whether this function will produce NULL on non-NULL input
396    fn introduces_nulls(&self) -> bool {
397        false
398    }
399
400    /// Whether this function preserves uniqueness
401    fn preserves_uniqueness(&self) -> bool {
402        false
403    }
404
405    fn inverse(&self) -> Option<crate::UnaryFunc> {
406        to_unary!(super::CastArrayToString {
407            ty: self.return_ty.clone(),
408        })
409    }
410
411    fn is_monotone(&self) -> bool {
412        false
413    }
414
415    fn is_eliminable_cast(&self) -> bool {
416        false
417    }
418}
419
420impl fmt::Display for CastStringToArray {
421    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
422        f.write_str("strtoarray")
423    }
424}
425
426#[derive(
427    Ord,
428    PartialOrd,
429    Clone,
430    Debug,
431    Eq,
432    PartialEq,
433    Serialize,
434    Deserialize,
435    Hash,
436    MzReflect
437)]
438pub struct CastStringToList {
439    // Target list's type
440    pub return_ty: SqlScalarType,
441    // The expression to cast the discovered list elements to the list's
442    // element type.
443    pub cast_expr: Box<MirScalarExpr>,
444}
445
446impl LazyUnaryFunc for CastStringToList {
447    fn eval<'a>(
448        &'a self,
449        datums: &[Datum<'a>],
450        temp_storage: &'a RowArena,
451        a: &'a MirScalarExpr,
452    ) -> Result<Datum<'a>, EvalError> {
453        let a = a.eval(datums, temp_storage)?;
454        if a.is_null() {
455            return Ok(Datum::Null);
456        }
457        let parsed_datums = strconv::parse_list(
458            a.unwrap_str(),
459            matches!(
460                self.return_ty.unwrap_list_element_type(),
461                SqlScalarType::List { .. }
462            ),
463            || Datum::Null,
464            |elem_text| {
465                let elem_text = match elem_text {
466                    Cow::Owned(s) => temp_storage.push_string(s),
467                    Cow::Borrowed(s) => s,
468                };
469                self.cast_expr
470                    .eval(&[Datum::String(elem_text)], temp_storage)
471            },
472        )?;
473
474        Ok(temp_storage.make_datum(|packer| packer.push_list(parsed_datums)))
475    }
476
477    /// The output SqlColumnType of this function
478    fn output_sql_type(&self, input_type: SqlColumnType) -> SqlColumnType {
479        self.return_ty
480            .without_modifiers()
481            .nullable(input_type.nullable)
482    }
483
484    /// Whether this function will produce NULL on NULL input
485    fn propagates_nulls(&self) -> bool {
486        true
487    }
488
489    /// Whether this function will produce NULL on non-NULL input
490    fn introduces_nulls(&self) -> bool {
491        false
492    }
493
494    /// Whether this function preserves uniqueness
495    fn preserves_uniqueness(&self) -> bool {
496        false
497    }
498
499    fn inverse(&self) -> Option<crate::UnaryFunc> {
500        to_unary!(super::CastListToString {
501            ty: self.return_ty.clone(),
502        })
503    }
504
505    fn is_monotone(&self) -> bool {
506        false
507    }
508
509    fn is_eliminable_cast(&self) -> bool {
510        false
511    }
512}
513
514impl fmt::Display for CastStringToList {
515    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
516        f.write_str("strtolist")
517    }
518}
519
520#[derive(
521    Ord,
522    PartialOrd,
523    Clone,
524    Debug,
525    Eq,
526    PartialEq,
527    Serialize,
528    Deserialize,
529    Hash,
530    MzReflect
531)]
532pub struct CastStringToMap {
533    // Target map's value type
534    pub return_ty: SqlScalarType,
535    // The expression used to cast the discovered values to the map's value
536    // type.
537    pub cast_expr: Box<MirScalarExpr>,
538}
539
540impl LazyUnaryFunc for CastStringToMap {
541    fn eval<'a>(
542        &'a self,
543        datums: &[Datum<'a>],
544        temp_storage: &'a RowArena,
545        a: &'a MirScalarExpr,
546    ) -> Result<Datum<'a>, EvalError> {
547        let a = a.eval(datums, temp_storage)?;
548        if a.is_null() {
549            return Ok(Datum::Null);
550        }
551        let parsed_map = strconv::parse_map(
552            a.unwrap_str(),
553            matches!(
554                self.return_ty.unwrap_map_value_type(),
555                SqlScalarType::Map { .. }
556            ),
557            |value_text| -> Result<Datum, EvalError> {
558                let value_text = match value_text {
559                    Some(Cow::Owned(s)) => Datum::String(temp_storage.push_string(s)),
560                    Some(Cow::Borrowed(s)) => Datum::String(s),
561                    None => Datum::Null,
562                };
563                self.cast_expr.eval(&[value_text], temp_storage)
564            },
565        )?;
566        let mut pairs: Vec<(String, Datum)> = parsed_map.into_iter().map(|(k, v)| (k, v)).collect();
567        pairs.sort_by(|(k1, _v1), (k2, _v2)| k1.cmp(k2));
568        pairs.dedup_by(|(k1, _v1), (k2, _v2)| k1 == k2);
569        Ok(temp_storage.make_datum(|packer| {
570            packer.push_dict_with(|packer| {
571                for (k, v) in pairs {
572                    packer.push(Datum::String(&k));
573                    packer.push(v);
574                }
575            })
576        }))
577    }
578
579    /// The output SqlColumnType of this function
580    fn output_sql_type(&self, input_type: SqlColumnType) -> SqlColumnType {
581        self.return_ty.clone().nullable(input_type.nullable)
582    }
583
584    /// Whether this function will produce NULL on NULL input
585    fn propagates_nulls(&self) -> bool {
586        true
587    }
588
589    /// Whether this function will produce NULL on non-NULL input
590    fn introduces_nulls(&self) -> bool {
591        false
592    }
593
594    /// Whether this function preserves uniqueness
595    fn preserves_uniqueness(&self) -> bool {
596        false
597    }
598
599    fn inverse(&self) -> Option<crate::UnaryFunc> {
600        to_unary!(super::CastMapToString {
601            ty: self.return_ty.clone(),
602        })
603    }
604
605    fn is_monotone(&self) -> bool {
606        false
607    }
608
609    fn is_eliminable_cast(&self) -> bool {
610        false
611    }
612}
613
614impl fmt::Display for CastStringToMap {
615    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
616        f.write_str("strtomap")
617    }
618}
619
620#[derive(
621    Ord,
622    PartialOrd,
623    Clone,
624    Debug,
625    Eq,
626    PartialEq,
627    Serialize,
628    Deserialize,
629    Hash,
630    MzReflect
631)]
632pub struct CastStringToChar {
633    pub length: Option<mz_repr::adt::char::CharLength>,
634    pub fail_on_len: bool,
635}
636
637impl EagerUnaryFunc for CastStringToChar {
638    type Input<'a> = &'a str;
639    type Output<'a> = Result<Char<String>, EvalError>;
640
641    fn call<'a>(&self, a: Self::Input<'a>) -> Self::Output<'a> {
642        let s = format_str_trim(a, self.length, self.fail_on_len).map_err(|_| {
643            assert!(self.fail_on_len);
644            EvalError::StringValueTooLong {
645                target_type: "character".into(),
646                length: usize::cast_from(self.length.unwrap().into_u32()),
647            }
648        })?;
649
650        Ok(Char(s))
651    }
652
653    fn output_sql_type(&self, input: SqlColumnType) -> SqlColumnType {
654        SqlScalarType::Char {
655            length: self.length,
656        }
657        .nullable(input.nullable)
658    }
659
660    fn could_error(&self) -> bool {
661        self.fail_on_len && self.length.is_some()
662    }
663
664    fn inverse(&self) -> Option<crate::UnaryFunc> {
665        to_unary!(super::CastCharToString)
666    }
667
668    fn is_eliminable_cast(&self) -> bool {
669        // even when `length` is `None`, we'll trim whitespace at the end
670        false
671    }
672}
673
674impl fmt::Display for CastStringToChar {
675    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
676        match self.length {
677            Some(length) => {
678                write!(
679                    f,
680                    "text_to_char[len={}, fail_on_len={}]",
681                    length.into_u32(),
682                    self.fail_on_len
683                )
684            }
685            None => f.write_str("text_to_char[len=unbounded]"),
686        }
687    }
688}
689
690#[derive(
691    Ord,
692    PartialOrd,
693    Clone,
694    Debug,
695    Eq,
696    PartialEq,
697    Serialize,
698    Deserialize,
699    Hash,
700    MzReflect
701)]
702pub struct CastStringToRange {
703    // Target range's type
704    pub return_ty: SqlScalarType,
705    // The expression to cast the discovered range elements to the range's
706    // element type.
707    pub cast_expr: Box<MirScalarExpr>,
708}
709
710impl LazyUnaryFunc for CastStringToRange {
711    fn eval<'a>(
712        &'a self,
713        datums: &[Datum<'a>],
714        temp_storage: &'a RowArena,
715        a: &'a MirScalarExpr,
716    ) -> Result<Datum<'a>, EvalError> {
717        let a = a.eval(datums, temp_storage)?;
718        if a.is_null() {
719            return Ok(Datum::Null);
720        }
721        let mut range = strconv::parse_range(a.unwrap_str(), |elem_text| {
722            let elem_text = match elem_text {
723                Cow::Owned(s) => temp_storage.push_string(s),
724                Cow::Borrowed(s) => s,
725            };
726            self.cast_expr
727                .eval(&[Datum::String(elem_text)], temp_storage)
728        })?;
729
730        range.canonicalize()?;
731
732        Ok(temp_storage.make_datum(|packer| {
733            packer
734                .push_range(range)
735                .expect("must have already handled errors")
736        }))
737    }
738
739    /// The output SqlColumnType of this function
740    fn output_sql_type(&self, input_type: SqlColumnType) -> SqlColumnType {
741        self.return_ty
742            .without_modifiers()
743            .nullable(input_type.nullable)
744    }
745
746    /// Whether this function will produce NULL on NULL input
747    fn propagates_nulls(&self) -> bool {
748        true
749    }
750
751    /// Whether this function will produce NULL on non-NULL input
752    fn introduces_nulls(&self) -> bool {
753        false
754    }
755
756    /// Whether this function preserves uniqueness
757    fn preserves_uniqueness(&self) -> bool {
758        false
759    }
760
761    fn inverse(&self) -> Option<crate::UnaryFunc> {
762        to_unary!(super::CastRangeToString {
763            ty: self.return_ty.clone(),
764        })
765    }
766
767    fn is_monotone(&self) -> bool {
768        false
769    }
770
771    fn is_eliminable_cast(&self) -> bool {
772        false
773    }
774}
775
776impl fmt::Display for CastStringToRange {
777    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
778        f.write_str("strtorange")
779    }
780}
781
782#[derive(
783    Ord,
784    PartialOrd,
785    Clone,
786    Debug,
787    Eq,
788    PartialEq,
789    Serialize,
790    Deserialize,
791    Hash,
792    MzReflect
793)]
794pub struct CastStringToVarChar {
795    pub length: Option<VarCharMaxLength>,
796    pub fail_on_len: bool,
797}
798
799impl EagerUnaryFunc for CastStringToVarChar {
800    type Input<'a> = &'a str;
801    type Output<'a> = Result<VarChar<&'a str>, EvalError>;
802
803    fn call<'a>(&self, a: Self::Input<'a>) -> Self::Output<'a> {
804        let s =
805            mz_repr::adt::varchar::format_str(a, self.length, self.fail_on_len).map_err(|_| {
806                assert!(self.fail_on_len);
807                EvalError::StringValueTooLong {
808                    target_type: "character varying".into(),
809                    length: usize::cast_from(self.length.unwrap().into_u32()),
810                }
811            })?;
812
813        Ok(VarChar(s))
814    }
815
816    fn output_sql_type(&self, input: SqlColumnType) -> SqlColumnType {
817        SqlScalarType::VarChar {
818            max_length: self.length,
819        }
820        .nullable(input.nullable)
821    }
822
823    fn could_error(&self) -> bool {
824        self.fail_on_len && self.length.is_some()
825    }
826
827    fn preserves_uniqueness(&self) -> bool {
828        self.length.is_none()
829    }
830
831    fn inverse(&self) -> Option<crate::UnaryFunc> {
832        to_unary!(super::CastVarCharToString)
833    }
834
835    fn is_eliminable_cast(&self) -> bool {
836        self.length.is_none()
837    }
838}
839
840impl fmt::Display for CastStringToVarChar {
841    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
842        match self.length {
843            Some(length) => {
844                write!(
845                    f,
846                    "text_to_varchar[len={}, fail_on_len={}]",
847                    length.into_u32(),
848                    self.fail_on_len
849                )
850            }
851            None => f.write_str("text_to_varchar[len=unbounded]"),
852        }
853    }
854}
855
856// If we support another vector type, this should likely get hoisted into a
857// position akin to array parsing.
858static INT2VECTOR_CAST_EXPR: LazyLock<MirScalarExpr> = LazyLock::new(|| MirScalarExpr::CallUnary {
859    func: UnaryFunc::CastStringToInt16(CastStringToInt16),
860    expr: Box::new(MirScalarExpr::column(0)),
861});
862
863#[derive(
864    Ord,
865    PartialOrd,
866    Clone,
867    Debug,
868    Eq,
869    PartialEq,
870    Serialize,
871    Deserialize,
872    Hash,
873    MzReflect
874)]
875pub struct CastStringToInt2Vector;
876
877impl LazyUnaryFunc for CastStringToInt2Vector {
878    fn eval<'a>(
879        &'a self,
880        datums: &[Datum<'a>],
881        temp_storage: &'a RowArena,
882        a: &'a MirScalarExpr,
883    ) -> Result<Datum<'a>, EvalError> {
884        let a = a.eval(datums, temp_storage)?;
885        if a.is_null() {
886            return Ok(Datum::Null);
887        }
888
889        let datums = strconv::parse_legacy_vector(a.unwrap_str(), |elem_text| {
890            let elem_text = match elem_text {
891                Cow::Owned(s) => temp_storage.push_string(s),
892                Cow::Borrowed(s) => s,
893            };
894            INT2VECTOR_CAST_EXPR.eval(&[Datum::String(elem_text)], temp_storage)
895        })?;
896        array_create_scalar(&datums, temp_storage)
897    }
898
899    /// The output SqlColumnType of this function
900    fn output_sql_type(&self, input_type: SqlColumnType) -> SqlColumnType {
901        SqlScalarType::Int2Vector.nullable(input_type.nullable)
902    }
903
904    /// Whether this function will produce NULL on NULL input
905    fn propagates_nulls(&self) -> bool {
906        true
907    }
908
909    /// Whether this function will produce NULL on non-NULL input
910    fn introduces_nulls(&self) -> bool {
911        false
912    }
913
914    /// Whether this function preserves uniqueness
915    fn preserves_uniqueness(&self) -> bool {
916        false
917    }
918
919    fn inverse(&self) -> Option<crate::UnaryFunc> {
920        to_unary!(super::CastInt2VectorToString)
921    }
922
923    fn is_monotone(&self) -> bool {
924        false
925    }
926
927    fn is_eliminable_cast(&self) -> bool {
928        false
929    }
930}
931
932impl fmt::Display for CastStringToInt2Vector {
933    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
934        f.write_str("strtoint2vector")
935    }
936}
937
938#[sqlfunc(
939    sqlname = "text_to_jsonb",
940    preserves_uniqueness = false,
941    inverse = to_unary!(super::CastJsonbToString)
942)]
943// TODO(jamii): it would be much more efficient to skip the intermediate repr::jsonb::Jsonb.
944fn cast_string_to_jsonb<'a>(a: &'a str) -> Result<Jsonb, EvalError> {
945    Ok(strconv::parse_jsonb(a)?)
946}
947
948#[sqlfunc(sqlname = "btrim")]
949fn trim_whitespace<'a>(a: &'a str) -> &'a str {
950    a.trim_matches(' ')
951}
952
953#[sqlfunc(sqlname = "ltrim")]
954fn trim_leading_whitespace<'a>(a: &'a str) -> &'a str {
955    a.trim_start_matches(' ')
956}
957
958#[sqlfunc(sqlname = "rtrim")]
959fn trim_trailing_whitespace<'a>(a: &'a str) -> &'a str {
960    a.trim_end_matches(' ')
961}
962
963#[sqlfunc(sqlname = "initcap")]
964fn initcap<'a>(a: &'a str) -> String {
965    let mut out = String::new();
966    let mut capitalize_next = true;
967    for ch in a.chars() {
968        if capitalize_next {
969            out.extend(ch.to_uppercase())
970        } else {
971            out.extend(ch.to_lowercase())
972        };
973        capitalize_next = !ch.is_alphanumeric();
974    }
975    out
976}
977
978#[sqlfunc(sqlname = "ascii")]
979fn ascii<'a>(a: &'a str) -> i32 {
980    a.chars()
981        .next()
982        .and_then(|c| i32::try_from(u32::from(c)).ok())
983        .unwrap_or(0)
984}
985
986#[sqlfunc(sqlname = "char_length")]
987fn char_length<'a>(a: &'a str) -> Result<i32, EvalError> {
988    let length = a.chars().count();
989    i32::try_from(length).or_else(|_| Err(EvalError::Int32OutOfRange(length.to_string().into())))
990}
991
992#[sqlfunc(sqlname = "bit_length")]
993fn bit_length_string<'a>(a: &'a str) -> Result<i32, EvalError> {
994    let length = a.as_bytes().len() * 8;
995    i32::try_from(length).or_else(|_| Err(EvalError::Int32OutOfRange(length.to_string().into())))
996}
997
998#[sqlfunc(sqlname = "octet_length")]
999fn byte_length_string<'a>(a: &'a str) -> Result<i32, EvalError> {
1000    let length = a.as_bytes().len();
1001    i32::try_from(length).or_else(|_| Err(EvalError::Int32OutOfRange(length.to_string().into())))
1002}
1003
1004#[sqlfunc]
1005fn upper<'a>(a: &'a str) -> String {
1006    a.to_uppercase()
1007}
1008
1009#[sqlfunc]
1010fn lower<'a>(a: &'a str) -> String {
1011    a.to_lowercase()
1012}
1013
1014#[sqlfunc]
1015fn normalize(text: &str, form_str: &str) -> Result<String, EvalError> {
1016    use unicode_normalization::UnicodeNormalization;
1017
1018    match form_str.to_uppercase().as_str() {
1019        "NFC" => Ok(text.nfc().collect()),
1020        "NFD" => Ok(text.nfd().collect()),
1021        "NFKC" => Ok(text.nfkc().collect()),
1022        "NFKD" => Ok(text.nfkd().collect()),
1023        _ => Err(EvalError::InvalidParameterValue(
1024            format!("invalid normalization form: {}", form_str).into(),
1025        )),
1026    }
1027}
1028
1029#[derive(
1030    Ord,
1031    PartialOrd,
1032    Clone,
1033    Debug,
1034    Eq,
1035    PartialEq,
1036    Serialize,
1037    Deserialize,
1038    Hash,
1039    MzReflect
1040)]
1041pub struct IsLikeMatch(pub like_pattern::Matcher);
1042
1043impl EagerUnaryFunc for IsLikeMatch {
1044    type Input<'a> = &'a str;
1045    type Output<'a> = bool;
1046
1047    fn call<'a>(&self, haystack: Self::Input<'a>) -> Self::Output<'a> {
1048        self.0.is_match(haystack)
1049    }
1050
1051    fn output_sql_type(&self, input: SqlColumnType) -> SqlColumnType {
1052        SqlScalarType::Bool.nullable(input.nullable)
1053    }
1054}
1055
1056impl fmt::Display for IsLikeMatch {
1057    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1058        write!(
1059            f,
1060            "{}like[{}]",
1061            if self.0.case_insensitive { "i" } else { "" },
1062            self.0.pattern.escaped()
1063        )
1064    }
1065}
1066
1067#[derive(
1068    Ord,
1069    PartialOrd,
1070    Clone,
1071    Debug,
1072    Eq,
1073    PartialEq,
1074    Serialize,
1075    Deserialize,
1076    Hash,
1077    MzReflect
1078)]
1079pub struct IsRegexpMatch(pub Regex);
1080
1081impl EagerUnaryFunc for IsRegexpMatch {
1082    type Input<'a> = &'a str;
1083    type Output<'a> = bool;
1084
1085    fn call<'a>(&self, haystack: Self::Input<'a>) -> Self::Output<'a> {
1086        self.0.is_match(haystack)
1087    }
1088
1089    fn output_sql_type(&self, input: SqlColumnType) -> SqlColumnType {
1090        SqlScalarType::Bool.nullable(input.nullable)
1091    }
1092}
1093
1094impl fmt::Display for IsRegexpMatch {
1095    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1096        write!(
1097            f,
1098            "is_regexp_match[{}, case_insensitive={}]",
1099            self.0.pattern().escaped(),
1100            self.0.case_insensitive
1101        )
1102    }
1103}
1104
1105#[derive(
1106    Ord,
1107    PartialOrd,
1108    Clone,
1109    Debug,
1110    Eq,
1111    PartialEq,
1112    Serialize,
1113    Deserialize,
1114    Hash,
1115    MzReflect
1116)]
1117pub struct RegexpMatch(pub Regex);
1118
1119impl LazyUnaryFunc for RegexpMatch {
1120    fn eval<'a>(
1121        &'a self,
1122        datums: &[Datum<'a>],
1123        temp_storage: &'a RowArena,
1124        a: &'a MirScalarExpr,
1125    ) -> Result<Datum<'a>, EvalError> {
1126        let haystack = a.eval(datums, temp_storage)?;
1127        if haystack.is_null() {
1128            return Ok(Datum::Null);
1129        }
1130        regexp_match_static(haystack, temp_storage, &self.0)
1131    }
1132
1133    /// The output SqlColumnType of this function
1134    fn output_sql_type(&self, _input_type: SqlColumnType) -> SqlColumnType {
1135        SqlScalarType::Array(Box::new(SqlScalarType::String)).nullable(true)
1136    }
1137
1138    /// Whether this function will produce NULL on NULL input
1139    fn propagates_nulls(&self) -> bool {
1140        true
1141    }
1142
1143    /// Whether this function will produce NULL on non-NULL input
1144    fn introduces_nulls(&self) -> bool {
1145        // Returns null if the regex did not match
1146        true
1147    }
1148
1149    /// Whether this function preserves uniqueness
1150    fn preserves_uniqueness(&self) -> bool {
1151        false
1152    }
1153
1154    fn inverse(&self) -> Option<crate::UnaryFunc> {
1155        None
1156    }
1157
1158    fn is_monotone(&self) -> bool {
1159        false
1160    }
1161
1162    fn is_eliminable_cast(&self) -> bool {
1163        false
1164    }
1165}
1166
1167impl fmt::Display for RegexpMatch {
1168    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1169        write!(
1170            f,
1171            "regexp_match[{}, case_insensitive={}]",
1172            self.0.pattern().escaped(),
1173            self.0.case_insensitive
1174        )
1175    }
1176}
1177
1178#[derive(
1179    Ord,
1180    PartialOrd,
1181    Clone,
1182    Debug,
1183    Eq,
1184    PartialEq,
1185    Serialize,
1186    Deserialize,
1187    Hash,
1188    MzReflect
1189)]
1190pub struct RegexpSplitToArray(pub Regex);
1191
1192impl LazyUnaryFunc for RegexpSplitToArray {
1193    fn eval<'a>(
1194        &'a self,
1195        datums: &[Datum<'a>],
1196        temp_storage: &'a RowArena,
1197        a: &'a MirScalarExpr,
1198    ) -> Result<Datum<'a>, EvalError> {
1199        let haystack = a.eval(datums, temp_storage)?;
1200        if haystack.is_null() {
1201            return Ok(Datum::Null);
1202        }
1203        regexp_split_to_array_re(haystack.unwrap_str(), &self.0, temp_storage)
1204    }
1205
1206    /// The output SqlColumnType of this function
1207    fn output_sql_type(&self, input_type: SqlColumnType) -> SqlColumnType {
1208        SqlScalarType::Array(Box::new(SqlScalarType::String)).nullable(input_type.nullable)
1209    }
1210
1211    /// Whether this function will produce NULL on NULL input
1212    fn propagates_nulls(&self) -> bool {
1213        true
1214    }
1215
1216    /// Whether this function will produce NULL on non-NULL input
1217    fn introduces_nulls(&self) -> bool {
1218        false
1219    }
1220
1221    /// Whether this function preserves uniqueness
1222    fn preserves_uniqueness(&self) -> bool {
1223        false
1224    }
1225
1226    fn inverse(&self) -> Option<crate::UnaryFunc> {
1227        None
1228    }
1229
1230    fn is_monotone(&self) -> bool {
1231        false
1232    }
1233
1234    fn is_eliminable_cast(&self) -> bool {
1235        false
1236    }
1237}
1238
1239impl fmt::Display for RegexpSplitToArray {
1240    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1241        write!(
1242            f,
1243            "regexp_split_to_array[{}, case_insensitive={}]",
1244            self.0.pattern().escaped(),
1245            self.0.case_insensitive
1246        )
1247    }
1248}
1249
1250#[sqlfunc(sqlname = "mz_panic")]
1251fn panic<'a>(a: &'a str) -> String {
1252    print!("{}", a);
1253    panic!("{}", a)
1254}
1255
1256#[sqlfunc(sqlname = "quote_ident", preserves_uniqueness = true)]
1257fn quote_ident<'a>(a: &'a str) -> Result<String, EvalError> {
1258    let i = mz_sql_parser::ast::Ident::new(a).map_err(|err| EvalError::InvalidIdentifier {
1259        ident: a.into(),
1260        detail: Some(err.to_string().into()),
1261    })?;
1262    Ok(i.to_string())
1263}
1264
1265#[derive(
1266    Ord,
1267    PartialOrd,
1268    Clone,
1269    Debug,
1270    Eq,
1271    PartialEq,
1272    Serialize,
1273    Deserialize,
1274    Hash,
1275    MzReflect
1276)]
1277pub struct RegexpReplace {
1278    pub regex: Regex,
1279    pub limit: usize,
1280}
1281
1282impl binary::EagerBinaryFunc for RegexpReplace {
1283    type Input<'a> = (&'a str, &'a str);
1284    type Output<'a> = Cow<'a, str>;
1285
1286    fn call<'a>(
1287        &self,
1288        (source, replacement): Self::Input<'a>,
1289        _temp_storage: &'a RowArena,
1290    ) -> Self::Output<'a> {
1291        // WARNING: This function has potential OOM risk if used with an inflationary
1292        // replacement pattern. It is very difficult to calculate the output size ahead
1293        // of time because the replacement pattern may depend on capture groups.
1294        self.regex.replacen(source, self.limit, replacement)
1295    }
1296
1297    fn output_sql_type(&self, input_types: &[SqlColumnType]) -> SqlColumnType {
1298        use mz_repr::AsColumnType;
1299        let output = <Self::Output<'_> as AsColumnType>::as_column_type();
1300        let propagates_nulls = binary::EagerBinaryFunc::propagates_nulls(self);
1301        let nullable = output.nullable;
1302        let input_nullable = input_types.iter().any(|t| t.nullable);
1303        output.nullable(nullable || (propagates_nulls && input_nullable))
1304    }
1305}
1306
1307impl fmt::Display for RegexpReplace {
1308    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1309        write!(
1310            f,
1311            "regexp_replace[{}, case_insensitive={}, limit={}]",
1312            self.regex.pattern().escaped(),
1313            self.regex.case_insensitive,
1314            self.limit
1315        )
1316    }
1317}