mz_expr/scalar/func/impls/
string.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::borrow::Cow;
11use std::fmt;
12use std::sync::LazyLock;
13
14use chrono::{DateTime, NaiveDateTime, NaiveTime, Utc};
15use mz_lowertest::MzReflect;
16use mz_ore::cast::CastFrom;
17use mz_ore::result::ResultExt;
18use mz_ore::str::StrExt;
19use mz_repr::adt::char::{Char, format_str_trim};
20use mz_repr::adt::date::Date;
21use mz_repr::adt::interval::Interval;
22use mz_repr::adt::jsonb::Jsonb;
23use mz_repr::adt::numeric::{self, Numeric, NumericMaxScale};
24use mz_repr::adt::pg_legacy_name::PgLegacyName;
25use mz_repr::adt::regex::Regex;
26use mz_repr::adt::system::{Oid, PgLegacyChar};
27use mz_repr::adt::timestamp::{CheckedTimestamp, TimestampPrecision};
28use mz_repr::adt::varchar::{VarChar, VarCharMaxLength};
29use mz_repr::{ColumnType, Datum, RowArena, ScalarType, strconv};
30use proptest_derive::Arbitrary;
31use serde::{Deserialize, Serialize};
32use uuid::Uuid;
33
34use crate::func::regexp_match_static;
35use crate::scalar::func::{
36    EagerUnaryFunc, LazyUnaryFunc, array_create_scalar, regexp_split_to_array_re,
37};
38use crate::{EvalError, MirScalarExpr, UnaryFunc, like_pattern};
39
40sqlfunc!(
41    #[sqlname = "text_to_boolean"]
42    #[preserves_uniqueness = false]
43    #[inverse = to_unary!(super::CastBoolToString)]
44    fn cast_string_to_bool<'a>(a: &'a str) -> Result<bool, EvalError> {
45        strconv::parse_bool(a).err_into()
46    }
47);
48
49sqlfunc!(
50    #[sqlname = "text_to_\"char\""]
51    #[preserves_uniqueness = true]
52    #[inverse = to_unary!(super::CastPgLegacyCharToString)]
53    fn cast_string_to_pg_legacy_char<'a>(a: &'a str) -> PgLegacyChar {
54        PgLegacyChar(a.as_bytes().get(0).copied().unwrap_or(0))
55    }
56);
57
58sqlfunc!(
59    #[sqlname = "text_to_name"]
60    #[preserves_uniqueness = true]
61    fn cast_string_to_pg_legacy_name<'a>(a: &'a str) -> PgLegacyName<String> {
62        PgLegacyName(strconv::parse_pg_legacy_name(a))
63    }
64);
65
66sqlfunc!(
67    #[sqlname = "text_to_bytea"]
68    #[preserves_uniqueness = true]
69    #[inverse = to_unary!(super::CastBytesToString)]
70    fn cast_string_to_bytes<'a>(a: &'a str) -> Result<Vec<u8>, EvalError> {
71        strconv::parse_bytes(a).err_into()
72    }
73);
74
75sqlfunc!(
76    #[sqlname = "text_to_smallint"]
77    #[preserves_uniqueness = false]
78    #[inverse = to_unary!(super::CastInt16ToString)]
79    fn cast_string_to_int16<'a>(a: &'a str) -> Result<i16, EvalError> {
80        strconv::parse_int16(a).err_into()
81    }
82);
83
84sqlfunc!(
85    #[sqlname = "text_to_integer"]
86    #[preserves_uniqueness = false]
87    #[inverse = to_unary!(super::CastInt32ToString)]
88    fn cast_string_to_int32<'a>(a: &'a str) -> Result<i32, EvalError> {
89        strconv::parse_int32(a).err_into()
90    }
91);
92
93sqlfunc!(
94    #[sqlname = "text_to_bigint"]
95    #[preserves_uniqueness = false]
96    #[inverse = to_unary!(super::CastInt64ToString)]
97    fn cast_string_to_int64<'a>(a: &'a str) -> Result<i64, EvalError> {
98        strconv::parse_int64(a).err_into()
99    }
100);
101
102sqlfunc!(
103    #[sqlname = "text_to_real"]
104    #[preserves_uniqueness = false]
105    #[inverse = to_unary!(super::CastFloat32ToString)]
106    fn cast_string_to_float32<'a>(a: &'a str) -> Result<f32, EvalError> {
107        strconv::parse_float32(a).err_into()
108    }
109);
110
111sqlfunc!(
112    #[sqlname = "text_to_double"]
113    #[preserves_uniqueness = false]
114    #[inverse = to_unary!(super::CastFloat64ToString)]
115    fn cast_string_to_float64<'a>(a: &'a str) -> Result<f64, EvalError> {
116        strconv::parse_float64(a).err_into()
117    }
118);
119
120sqlfunc!(
121    #[sqlname = "text_to_oid"]
122    #[preserves_uniqueness = false]
123    #[inverse = to_unary!(super::CastOidToString)]
124    fn cast_string_to_oid<'a>(a: &'a str) -> Result<Oid, EvalError> {
125        Ok(Oid(strconv::parse_oid(a)?))
126    }
127);
128
129sqlfunc!(
130    #[sqlname = "text_to_uint2"]
131    #[preserves_uniqueness = false]
132    #[inverse = to_unary!(super::CastUint16ToString)]
133    fn cast_string_to_uint16(a: &'a str) -> Result<u16, EvalError> {
134        strconv::parse_uint16(a).err_into()
135    }
136);
137
138sqlfunc!(
139    #[sqlname = "text_to_uint4"]
140    #[preserves_uniqueness = false]
141    #[inverse = to_unary!(super::CastUint32ToString)]
142    fn cast_string_to_uint32(a: &'a str) -> Result<u32, EvalError> {
143        strconv::parse_uint32(a).err_into()
144    }
145);
146
147sqlfunc!(
148    #[sqlname = "text_to_uint8"]
149    #[preserves_uniqueness = false]
150    #[inverse = to_unary!(super::CastUint64ToString)]
151    fn cast_string_to_uint64(a: &'a str) -> Result<u64, EvalError> {
152        strconv::parse_uint64(a).err_into()
153    }
154);
155
156sqlfunc!(
157    #[sqlname = "reverse"]
158    fn reverse<'a>(a: &'a str) -> String {
159        a.chars().rev().collect()
160    }
161);
162
163#[derive(
164    Arbitrary, Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect,
165)]
166pub struct CastStringToNumeric(pub Option<NumericMaxScale>);
167
168impl<'a> EagerUnaryFunc<'a> for CastStringToNumeric {
169    type Input = &'a str;
170    type Output = Result<Numeric, EvalError>;
171
172    fn call(&self, a: &'a str) -> Result<Numeric, EvalError> {
173        let mut d = strconv::parse_numeric(a)?;
174        if let Some(scale) = self.0 {
175            if numeric::rescale(&mut d.0, scale.into_u8()).is_err() {
176                return Err(EvalError::NumericFieldOverflow);
177            }
178        }
179        Ok(d.into_inner())
180    }
181
182    fn output_type(&self, input: ColumnType) -> ColumnType {
183        ScalarType::Numeric { max_scale: self.0 }.nullable(input.nullable)
184    }
185
186    fn inverse(&self) -> Option<crate::UnaryFunc> {
187        to_unary!(super::CastNumericToString)
188    }
189}
190
191impl fmt::Display for CastStringToNumeric {
192    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
193        f.write_str("text_to_numeric")
194    }
195}
196
197sqlfunc!(
198    #[sqlname = "text_to_date"]
199    #[preserves_uniqueness = false]
200    #[inverse = to_unary!(super::CastDateToString)]
201    fn cast_string_to_date<'a>(a: &'a str) -> Result<Date, EvalError> {
202        strconv::parse_date(a).err_into()
203    }
204);
205
206sqlfunc!(
207    #[sqlname = "text_to_time"]
208    #[preserves_uniqueness = false]
209    #[inverse = to_unary!(super::CastTimeToString)]
210    fn cast_string_to_time<'a>(a: &'a str) -> Result<NaiveTime, EvalError> {
211        strconv::parse_time(a).err_into()
212    }
213);
214
215#[derive(
216    Arbitrary, Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect,
217)]
218pub struct CastStringToTimestamp(pub Option<TimestampPrecision>);
219
220impl<'a> EagerUnaryFunc<'a> for CastStringToTimestamp {
221    type Input = &'a str;
222    type Output = Result<CheckedTimestamp<NaiveDateTime>, EvalError>;
223
224    fn call(&self, a: &'a str) -> Result<CheckedTimestamp<NaiveDateTime>, EvalError> {
225        let out = strconv::parse_timestamp(a)?;
226        let updated = out.round_to_precision(self.0)?;
227        Ok(updated)
228    }
229
230    fn output_type(&self, input: ColumnType) -> ColumnType {
231        ScalarType::Timestamp { precision: self.0 }.nullable(input.nullable)
232    }
233
234    fn inverse(&self) -> Option<crate::UnaryFunc> {
235        to_unary!(super::CastTimestampToString)
236    }
237}
238
239impl fmt::Display for CastStringToTimestamp {
240    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
241        f.write_str("text_to_timestamp")
242    }
243}
244
245sqlfunc!(
246    #[sqlname = "try_parse_monotonic_iso8601_timestamp"]
247    // TODO: Pretty sure this preserves uniqueness, but not 100%.
248    //
249    // Ironically, even though this has "monotonic" in the name, it's not quite
250    // eligible for `#[is_monotone = true]` because any input could also be
251    // mapped to null. So, handle it via SpecialUnary in the interpreter.
252    fn try_parse_monotonic_iso8601_timestamp<'a>(
253        a: &'a str,
254    ) -> Option<CheckedTimestamp<NaiveDateTime>> {
255        let ts = mz_persist_types::timestamp::try_parse_monotonic_iso8601_timestamp(a)?;
256        let ts = CheckedTimestamp::from_timestamplike(ts)
257            .expect("monotonic_iso8601 range is a subset of CheckedTimestamp domain");
258        Some(ts)
259    }
260);
261
262#[derive(
263    Arbitrary, Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect,
264)]
265pub struct CastStringToTimestampTz(pub Option<TimestampPrecision>);
266
267impl<'a> EagerUnaryFunc<'a> for CastStringToTimestampTz {
268    type Input = &'a str;
269    type Output = Result<CheckedTimestamp<DateTime<Utc>>, EvalError>;
270
271    fn call(&self, a: &'a str) -> Result<CheckedTimestamp<DateTime<Utc>>, EvalError> {
272        let out = strconv::parse_timestamptz(a)?;
273        let updated = out.round_to_precision(self.0)?;
274        Ok(updated)
275    }
276
277    fn output_type(&self, input: ColumnType) -> ColumnType {
278        ScalarType::TimestampTz { precision: self.0 }.nullable(input.nullable)
279    }
280
281    fn inverse(&self) -> Option<crate::UnaryFunc> {
282        to_unary!(super::CastTimestampTzToString)
283    }
284}
285
286impl fmt::Display for CastStringToTimestampTz {
287    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
288        f.write_str("text_to_timestamp_with_time_zone")
289    }
290}
291
292sqlfunc!(
293    #[sqlname = "text_to_interval"]
294    #[preserves_uniqueness = false]
295    #[inverse = to_unary!(super::CastIntervalToString)]
296    fn cast_string_to_interval<'a>(a: &'a str) -> Result<Interval, EvalError> {
297        strconv::parse_interval(a).err_into()
298    }
299);
300
301sqlfunc!(
302    #[sqlname = "text_to_uuid"]
303    #[preserves_uniqueness = false]
304    #[inverse = to_unary!(super::CastUuidToString)]
305    fn cast_string_to_uuid<'a>(a: &'a str) -> Result<Uuid, EvalError> {
306        strconv::parse_uuid(a).err_into()
307    }
308);
309
310#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
311pub struct CastStringToArray {
312    // Target array's type.
313    pub return_ty: ScalarType,
314    // The expression to cast the discovered array elements to the array's
315    // element type.
316    pub cast_expr: Box<MirScalarExpr>,
317}
318
319impl LazyUnaryFunc for CastStringToArray {
320    fn eval<'a>(
321        &'a self,
322        datums: &[Datum<'a>],
323        temp_storage: &'a RowArena,
324        a: &'a MirScalarExpr,
325    ) -> Result<Datum<'a>, EvalError> {
326        let a = a.eval(datums, temp_storage)?;
327        if a.is_null() {
328            return Ok(Datum::Null);
329        }
330        let (datums, dims) = strconv::parse_array(
331            a.unwrap_str(),
332            || Datum::Null,
333            |elem_text| {
334                let elem_text = match elem_text {
335                    Cow::Owned(s) => temp_storage.push_string(s),
336                    Cow::Borrowed(s) => s,
337                };
338                self.cast_expr
339                    .eval(&[Datum::String(elem_text)], temp_storage)
340            },
341        )?;
342
343        Ok(temp_storage.try_make_datum(|packer| packer.try_push_array(&dims, datums))?)
344    }
345
346    /// The output ColumnType of this function
347    fn output_type(&self, input_type: ColumnType) -> ColumnType {
348        self.return_ty.clone().nullable(input_type.nullable)
349    }
350
351    /// Whether this function will produce NULL on NULL input
352    fn propagates_nulls(&self) -> bool {
353        true
354    }
355
356    /// Whether this function will produce NULL on non-NULL input
357    fn introduces_nulls(&self) -> bool {
358        false
359    }
360
361    /// Whether this function preserves uniqueness
362    fn preserves_uniqueness(&self) -> bool {
363        false
364    }
365
366    fn inverse(&self) -> Option<crate::UnaryFunc> {
367        to_unary!(super::CastArrayToString {
368            ty: self.return_ty.clone(),
369        })
370    }
371
372    fn is_monotone(&self) -> bool {
373        false
374    }
375}
376
377impl fmt::Display for CastStringToArray {
378    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
379        f.write_str("strtoarray")
380    }
381}
382
383#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
384pub struct CastStringToList {
385    // Target list's type
386    pub return_ty: ScalarType,
387    // The expression to cast the discovered list elements to the list's
388    // element type.
389    pub cast_expr: Box<MirScalarExpr>,
390}
391
392impl LazyUnaryFunc for CastStringToList {
393    fn eval<'a>(
394        &'a self,
395        datums: &[Datum<'a>],
396        temp_storage: &'a RowArena,
397        a: &'a MirScalarExpr,
398    ) -> Result<Datum<'a>, EvalError> {
399        let a = a.eval(datums, temp_storage)?;
400        if a.is_null() {
401            return Ok(Datum::Null);
402        }
403        let parsed_datums = strconv::parse_list(
404            a.unwrap_str(),
405            matches!(
406                self.return_ty.unwrap_list_element_type(),
407                ScalarType::List { .. }
408            ),
409            || Datum::Null,
410            |elem_text| {
411                let elem_text = match elem_text {
412                    Cow::Owned(s) => temp_storage.push_string(s),
413                    Cow::Borrowed(s) => s,
414                };
415                self.cast_expr
416                    .eval(&[Datum::String(elem_text)], temp_storage)
417            },
418        )?;
419
420        Ok(temp_storage.make_datum(|packer| packer.push_list(parsed_datums)))
421    }
422
423    /// The output ColumnType of this function
424    fn output_type(&self, input_type: ColumnType) -> ColumnType {
425        self.return_ty
426            .without_modifiers()
427            .nullable(input_type.nullable)
428    }
429
430    /// Whether this function will produce NULL on NULL input
431    fn propagates_nulls(&self) -> bool {
432        true
433    }
434
435    /// Whether this function will produce NULL on non-NULL input
436    fn introduces_nulls(&self) -> bool {
437        false
438    }
439
440    /// Whether this function preserves uniqueness
441    fn preserves_uniqueness(&self) -> bool {
442        false
443    }
444
445    fn inverse(&self) -> Option<crate::UnaryFunc> {
446        to_unary!(super::CastListToString {
447            ty: self.return_ty.clone(),
448        })
449    }
450
451    fn is_monotone(&self) -> bool {
452        false
453    }
454}
455
456impl fmt::Display for CastStringToList {
457    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
458        f.write_str("strtolist")
459    }
460}
461
462#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
463pub struct CastStringToMap {
464    // Target map's value type
465    pub return_ty: ScalarType,
466    // The expression used to cast the discovered values to the map's value
467    // type.
468    pub cast_expr: Box<MirScalarExpr>,
469}
470
471impl LazyUnaryFunc for CastStringToMap {
472    fn eval<'a>(
473        &'a self,
474        datums: &[Datum<'a>],
475        temp_storage: &'a RowArena,
476        a: &'a MirScalarExpr,
477    ) -> Result<Datum<'a>, EvalError> {
478        let a = a.eval(datums, temp_storage)?;
479        if a.is_null() {
480            return Ok(Datum::Null);
481        }
482        let parsed_map = strconv::parse_map(
483            a.unwrap_str(),
484            matches!(
485                self.return_ty.unwrap_map_value_type(),
486                ScalarType::Map { .. }
487            ),
488            |value_text| -> Result<Datum, EvalError> {
489                let value_text = match value_text {
490                    Some(Cow::Owned(s)) => Datum::String(temp_storage.push_string(s)),
491                    Some(Cow::Borrowed(s)) => Datum::String(s),
492                    None => Datum::Null,
493                };
494                self.cast_expr.eval(&[value_text], temp_storage)
495            },
496        )?;
497        let mut pairs: Vec<(String, Datum)> = parsed_map.into_iter().map(|(k, v)| (k, v)).collect();
498        pairs.sort_by(|(k1, _v1), (k2, _v2)| k1.cmp(k2));
499        pairs.dedup_by(|(k1, _v1), (k2, _v2)| k1 == k2);
500        Ok(temp_storage.make_datum(|packer| {
501            packer.push_dict_with(|packer| {
502                for (k, v) in pairs {
503                    packer.push(Datum::String(&k));
504                    packer.push(v);
505                }
506            })
507        }))
508    }
509
510    /// The output ColumnType of this function
511    fn output_type(&self, input_type: ColumnType) -> ColumnType {
512        self.return_ty.clone().nullable(input_type.nullable)
513    }
514
515    /// Whether this function will produce NULL on NULL input
516    fn propagates_nulls(&self) -> bool {
517        true
518    }
519
520    /// Whether this function will produce NULL on non-NULL input
521    fn introduces_nulls(&self) -> bool {
522        false
523    }
524
525    /// Whether this function preserves uniqueness
526    fn preserves_uniqueness(&self) -> bool {
527        false
528    }
529
530    fn inverse(&self) -> Option<crate::UnaryFunc> {
531        to_unary!(super::CastMapToString {
532            ty: self.return_ty.clone(),
533        })
534    }
535
536    fn is_monotone(&self) -> bool {
537        false
538    }
539}
540
541impl fmt::Display for CastStringToMap {
542    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
543        f.write_str("strtomap")
544    }
545}
546
547#[derive(
548    Arbitrary, Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect,
549)]
550pub struct CastStringToChar {
551    pub length: Option<mz_repr::adt::char::CharLength>,
552    pub fail_on_len: bool,
553}
554
555impl<'a> EagerUnaryFunc<'a> for CastStringToChar {
556    type Input = &'a str;
557    type Output = Result<Char<String>, EvalError>;
558
559    fn call(&self, a: &'a str) -> Result<Char<String>, EvalError> {
560        let s = format_str_trim(a, self.length, self.fail_on_len).map_err(|_| {
561            assert!(self.fail_on_len);
562            EvalError::StringValueTooLong {
563                target_type: "character".into(),
564                length: usize::cast_from(self.length.unwrap().into_u32()),
565            }
566        })?;
567
568        Ok(Char(s))
569    }
570
571    fn output_type(&self, input: ColumnType) -> ColumnType {
572        ScalarType::Char {
573            length: self.length,
574        }
575        .nullable(input.nullable)
576    }
577
578    fn could_error(&self) -> bool {
579        self.fail_on_len && self.length.is_some()
580    }
581
582    fn inverse(&self) -> Option<crate::UnaryFunc> {
583        to_unary!(super::CastCharToString)
584    }
585}
586
587impl fmt::Display for CastStringToChar {
588    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
589        match self.length {
590            Some(length) => {
591                write!(
592                    f,
593                    "text_to_char[len={}, fail_on_len={}]",
594                    length.into_u32(),
595                    self.fail_on_len
596                )
597            }
598            None => f.write_str("text_to_char[len=unbounded]"),
599        }
600    }
601}
602
603#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
604pub struct CastStringToRange {
605    // Target range's type
606    pub return_ty: ScalarType,
607    // The expression to cast the discovered range elements to the range's
608    // element type.
609    pub cast_expr: Box<MirScalarExpr>,
610}
611
612impl LazyUnaryFunc for CastStringToRange {
613    fn eval<'a>(
614        &'a self,
615        datums: &[Datum<'a>],
616        temp_storage: &'a RowArena,
617        a: &'a MirScalarExpr,
618    ) -> Result<Datum<'a>, EvalError> {
619        let a = a.eval(datums, temp_storage)?;
620        if a.is_null() {
621            return Ok(Datum::Null);
622        }
623        let mut range = strconv::parse_range(a.unwrap_str(), |elem_text| {
624            let elem_text = match elem_text {
625                Cow::Owned(s) => temp_storage.push_string(s),
626                Cow::Borrowed(s) => s,
627            };
628            self.cast_expr
629                .eval(&[Datum::String(elem_text)], temp_storage)
630        })?;
631
632        range.canonicalize()?;
633
634        Ok(temp_storage.make_datum(|packer| {
635            packer
636                .push_range(range)
637                .expect("must have already handled errors")
638        }))
639    }
640
641    /// The output ColumnType of this function
642    fn output_type(&self, input_type: ColumnType) -> ColumnType {
643        self.return_ty
644            .without_modifiers()
645            .nullable(input_type.nullable)
646    }
647
648    /// Whether this function will produce NULL on NULL input
649    fn propagates_nulls(&self) -> bool {
650        true
651    }
652
653    /// Whether this function will produce NULL on non-NULL input
654    fn introduces_nulls(&self) -> bool {
655        false
656    }
657
658    /// Whether this function preserves uniqueness
659    fn preserves_uniqueness(&self) -> bool {
660        false
661    }
662
663    fn inverse(&self) -> Option<crate::UnaryFunc> {
664        to_unary!(super::CastRangeToString {
665            ty: self.return_ty.clone(),
666        })
667    }
668
669    fn is_monotone(&self) -> bool {
670        false
671    }
672}
673
674impl fmt::Display for CastStringToRange {
675    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
676        f.write_str("strtorange")
677    }
678}
679
680#[derive(
681    Arbitrary, Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect,
682)]
683pub struct CastStringToVarChar {
684    pub length: Option<VarCharMaxLength>,
685    pub fail_on_len: bool,
686}
687
688impl<'a> EagerUnaryFunc<'a> for CastStringToVarChar {
689    type Input = &'a str;
690    type Output = Result<VarChar<&'a str>, EvalError>;
691
692    fn call(&self, a: &'a str) -> Result<VarChar<&'a str>, EvalError> {
693        let s =
694            mz_repr::adt::varchar::format_str(a, self.length, self.fail_on_len).map_err(|_| {
695                assert!(self.fail_on_len);
696                EvalError::StringValueTooLong {
697                    target_type: "character varying".into(),
698                    length: usize::cast_from(self.length.unwrap().into_u32()),
699                }
700            })?;
701
702        Ok(VarChar(s))
703    }
704
705    fn output_type(&self, input: ColumnType) -> ColumnType {
706        ScalarType::VarChar {
707            max_length: self.length,
708        }
709        .nullable(input.nullable)
710    }
711
712    fn could_error(&self) -> bool {
713        self.fail_on_len && self.length.is_some()
714    }
715
716    fn preserves_uniqueness(&self) -> bool {
717        !self.fail_on_len || self.length.is_none()
718    }
719
720    fn inverse(&self) -> Option<crate::UnaryFunc> {
721        to_unary!(super::CastVarCharToString)
722    }
723}
724
725impl fmt::Display for CastStringToVarChar {
726    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
727        match self.length {
728            Some(length) => {
729                write!(
730                    f,
731                    "text_to_varchar[len={}, fail_on_len={}]",
732                    length.into_u32(),
733                    self.fail_on_len
734                )
735            }
736            None => f.write_str("text_to_varchar[len=unbounded]"),
737        }
738    }
739}
740
741// If we support another vector type, this should likely get hoisted into a
742// position akin to array parsing.
743static INT2VECTOR_CAST_EXPR: LazyLock<MirScalarExpr> = LazyLock::new(|| MirScalarExpr::CallUnary {
744    func: UnaryFunc::CastStringToInt16(CastStringToInt16),
745    expr: Box::new(MirScalarExpr::Column(0)),
746});
747
748#[derive(
749    Arbitrary, Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect,
750)]
751pub struct CastStringToInt2Vector;
752
753impl LazyUnaryFunc for CastStringToInt2Vector {
754    fn eval<'a>(
755        &'a self,
756        datums: &[Datum<'a>],
757        temp_storage: &'a RowArena,
758        a: &'a MirScalarExpr,
759    ) -> Result<Datum<'a>, EvalError> {
760        let a = a.eval(datums, temp_storage)?;
761        if a.is_null() {
762            return Ok(Datum::Null);
763        }
764
765        let datums = strconv::parse_legacy_vector(a.unwrap_str(), |elem_text| {
766            let elem_text = match elem_text {
767                Cow::Owned(s) => temp_storage.push_string(s),
768                Cow::Borrowed(s) => s,
769            };
770            INT2VECTOR_CAST_EXPR.eval(&[Datum::String(elem_text)], temp_storage)
771        })?;
772        array_create_scalar(&datums, temp_storage)
773    }
774
775    /// The output ColumnType of this function
776    fn output_type(&self, input_type: ColumnType) -> ColumnType {
777        ScalarType::Int2Vector.nullable(input_type.nullable)
778    }
779
780    /// Whether this function will produce NULL on NULL input
781    fn propagates_nulls(&self) -> bool {
782        true
783    }
784
785    /// Whether this function will produce NULL on non-NULL input
786    fn introduces_nulls(&self) -> bool {
787        false
788    }
789
790    /// Whether this function preserves uniqueness
791    fn preserves_uniqueness(&self) -> bool {
792        false
793    }
794
795    fn inverse(&self) -> Option<crate::UnaryFunc> {
796        to_unary!(super::CastInt2VectorToString)
797    }
798
799    fn is_monotone(&self) -> bool {
800        false
801    }
802}
803
804impl fmt::Display for CastStringToInt2Vector {
805    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
806        f.write_str("strtoint2vector")
807    }
808}
809
810sqlfunc!(
811    #[sqlname = "text_to_jsonb"]
812    #[preserves_uniqueness = false]
813    #[inverse = to_unary!(super::CastJsonbToString)]
814    // TODO(jamii): it would be much more efficient to skip the intermediate repr::jsonb::Jsonb.
815    fn cast_string_to_jsonb<'a>(a: &'a str) -> Result<Jsonb, EvalError> {
816        Ok(strconv::parse_jsonb(a)?)
817    }
818);
819
820sqlfunc!(
821    #[sqlname = "btrim"]
822    fn trim_whitespace<'a>(a: &'a str) -> &'a str {
823        a.trim_matches(' ')
824    }
825);
826
827sqlfunc!(
828    #[sqlname = "ltrim"]
829    fn trim_leading_whitespace<'a>(a: &'a str) -> &'a str {
830        a.trim_start_matches(' ')
831    }
832);
833
834sqlfunc!(
835    #[sqlname = "rtrim"]
836    fn trim_trailing_whitespace<'a>(a: &'a str) -> &'a str {
837        a.trim_end_matches(' ')
838    }
839);
840
841sqlfunc!(
842    #[sqlname = "initcap"]
843    fn initcap<'a>(a: &'a str) -> String {
844        let mut out = String::new();
845        let mut capitalize_next = true;
846        for ch in a.chars() {
847            if capitalize_next {
848                out.extend(ch.to_uppercase())
849            } else {
850                out.extend(ch.to_lowercase())
851            };
852            capitalize_next = !ch.is_alphanumeric();
853        }
854        out
855    }
856);
857
858sqlfunc!(
859    #[sqlname = "ascii"]
860    fn ascii<'a>(a: &'a str) -> i32 {
861        a.chars()
862            .next()
863            .and_then(|c| i32::try_from(u32::from(c)).ok())
864            .unwrap_or(0)
865    }
866);
867
868sqlfunc!(
869    #[sqlname = "char_length"]
870    fn char_length<'a>(a: &'a str) -> Result<i32, EvalError> {
871        let length = a.chars().count();
872        i32::try_from(length)
873            .or_else(|_| Err(EvalError::Int32OutOfRange(length.to_string().into())))
874    }
875);
876
877sqlfunc!(
878    #[sqlname = "bit_length"]
879    fn bit_length_string<'a>(a: &'a str) -> Result<i32, EvalError> {
880        let length = a.as_bytes().len() * 8;
881        i32::try_from(length)
882            .or_else(|_| Err(EvalError::Int32OutOfRange(length.to_string().into())))
883    }
884);
885
886sqlfunc!(
887    #[sqlname = "octet_length"]
888    fn byte_length_string<'a>(a: &'a str) -> Result<i32, EvalError> {
889        let length = a.as_bytes().len();
890        i32::try_from(length)
891            .or_else(|_| Err(EvalError::Int32OutOfRange(length.to_string().into())))
892    }
893);
894
895sqlfunc!(
896    fn upper<'a>(a: &'a str) -> String {
897        a.to_uppercase()
898    }
899);
900
901sqlfunc!(
902    fn lower<'a>(a: &'a str) -> String {
903        a.to_lowercase()
904    }
905);
906
907#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
908pub struct IsLikeMatch(pub like_pattern::Matcher);
909
910impl<'a> EagerUnaryFunc<'a> for IsLikeMatch {
911    type Input = &'a str;
912    type Output = bool;
913
914    fn call(&self, haystack: &'a str) -> bool {
915        self.0.is_match(haystack)
916    }
917
918    fn output_type(&self, input: ColumnType) -> ColumnType {
919        ScalarType::Bool.nullable(input.nullable)
920    }
921}
922
923impl fmt::Display for IsLikeMatch {
924    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
925        write!(
926            f,
927            "{}like[{}]",
928            if self.0.case_insensitive { "i" } else { "" },
929            self.0.pattern.escaped()
930        )
931    }
932}
933
934#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
935pub struct IsRegexpMatch(pub Regex);
936
937impl<'a> EagerUnaryFunc<'a> for IsRegexpMatch {
938    type Input = &'a str;
939    type Output = bool;
940
941    fn call(&self, haystack: &'a str) -> bool {
942        self.0.is_match(haystack)
943    }
944
945    fn output_type(&self, input: ColumnType) -> ColumnType {
946        ScalarType::Bool.nullable(input.nullable)
947    }
948}
949
950impl fmt::Display for IsRegexpMatch {
951    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
952        write!(
953            f,
954            "is_regexp_match[{}, case_insensitive={}]",
955            self.0.pattern().escaped(),
956            self.0.case_insensitive
957        )
958    }
959}
960
961#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
962pub struct RegexpMatch(pub Regex);
963
964impl LazyUnaryFunc for RegexpMatch {
965    fn eval<'a>(
966        &'a self,
967        datums: &[Datum<'a>],
968        temp_storage: &'a RowArena,
969        a: &'a MirScalarExpr,
970    ) -> Result<Datum<'a>, EvalError> {
971        let haystack = a.eval(datums, temp_storage)?;
972        if haystack.is_null() {
973            return Ok(Datum::Null);
974        }
975        regexp_match_static(haystack, temp_storage, &self.0)
976    }
977
978    /// The output ColumnType of this function
979    fn output_type(&self, _input_type: ColumnType) -> ColumnType {
980        ScalarType::Array(Box::new(ScalarType::String)).nullable(true)
981    }
982
983    /// Whether this function will produce NULL on NULL input
984    fn propagates_nulls(&self) -> bool {
985        true
986    }
987
988    /// Whether this function will produce NULL on non-NULL input
989    fn introduces_nulls(&self) -> bool {
990        // Returns null if the regex did not match
991        true
992    }
993
994    /// Whether this function preserves uniqueness
995    fn preserves_uniqueness(&self) -> bool {
996        false
997    }
998
999    fn inverse(&self) -> Option<crate::UnaryFunc> {
1000        None
1001    }
1002
1003    fn is_monotone(&self) -> bool {
1004        false
1005    }
1006}
1007
1008impl fmt::Display for RegexpMatch {
1009    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1010        write!(
1011            f,
1012            "regexp_match[{}, case_insensitive={}]",
1013            self.0.pattern().escaped(),
1014            self.0.case_insensitive
1015        )
1016    }
1017}
1018
1019#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect)]
1020pub struct RegexpSplitToArray(pub Regex);
1021
1022impl LazyUnaryFunc for RegexpSplitToArray {
1023    fn eval<'a>(
1024        &'a self,
1025        datums: &[Datum<'a>],
1026        temp_storage: &'a RowArena,
1027        a: &'a MirScalarExpr,
1028    ) -> Result<Datum<'a>, EvalError> {
1029        let haystack = a.eval(datums, temp_storage)?;
1030        if haystack.is_null() {
1031            return Ok(Datum::Null);
1032        }
1033        regexp_split_to_array_re(haystack.unwrap_str(), &self.0, temp_storage)
1034    }
1035
1036    /// The output ColumnType of this function
1037    fn output_type(&self, input_type: ColumnType) -> ColumnType {
1038        ScalarType::Array(Box::new(ScalarType::String)).nullable(input_type.nullable)
1039    }
1040
1041    /// Whether this function will produce NULL on NULL input
1042    fn propagates_nulls(&self) -> bool {
1043        true
1044    }
1045
1046    /// Whether this function will produce NULL on non-NULL input
1047    fn introduces_nulls(&self) -> bool {
1048        false
1049    }
1050
1051    /// Whether this function preserves uniqueness
1052    fn preserves_uniqueness(&self) -> bool {
1053        false
1054    }
1055
1056    fn inverse(&self) -> Option<crate::UnaryFunc> {
1057        None
1058    }
1059
1060    fn is_monotone(&self) -> bool {
1061        false
1062    }
1063}
1064
1065impl fmt::Display for RegexpSplitToArray {
1066    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1067        write!(
1068            f,
1069            "regexp_split_to_array[{}, case_insensitive={}]",
1070            self.0.pattern().escaped(),
1071            self.0.case_insensitive
1072        )
1073    }
1074}
1075
1076sqlfunc!(
1077    #[sqlname = "mz_panic"]
1078    fn panic<'a>(a: &'a str) -> String {
1079        print!("{}", a);
1080        panic!("{}", a)
1081    }
1082);
1083
1084#[derive(
1085    Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect, Arbitrary,
1086)]
1087pub struct QuoteIdent;
1088
1089impl LazyUnaryFunc for QuoteIdent {
1090    fn eval<'a>(
1091        &'a self,
1092        datums: &[Datum<'a>],
1093        temp_storage: &'a RowArena,
1094        a: &'a MirScalarExpr,
1095    ) -> Result<Datum<'a>, EvalError> {
1096        let d = a.eval(datums, temp_storage)?;
1097        if d.is_null() {
1098            return Ok(Datum::Null);
1099        }
1100        let v = d.unwrap_str();
1101        let i = mz_sql_parser::ast::Ident::new(v).map_err(|err| EvalError::InvalidIdentifier {
1102            ident: v.into(),
1103            detail: Some(err.to_string().into()),
1104        })?;
1105        let r = temp_storage.push_string(i.to_string());
1106
1107        Ok(Datum::String(r))
1108    }
1109
1110    /// The output ColumnType of this function
1111    fn output_type(&self, input_type: ColumnType) -> ColumnType {
1112        ScalarType::String.nullable(input_type.nullable)
1113    }
1114
1115    /// Whether this function will produce NULL on NULL input
1116    fn propagates_nulls(&self) -> bool {
1117        true
1118    }
1119
1120    /// Whether this function will produce NULL on non-NULL input
1121    fn introduces_nulls(&self) -> bool {
1122        false
1123    }
1124
1125    /// Whether this function preserves uniqueness
1126    fn preserves_uniqueness(&self) -> bool {
1127        true
1128    }
1129
1130    fn inverse(&self) -> Option<crate::UnaryFunc> {
1131        None
1132    }
1133
1134    fn is_monotone(&self) -> bool {
1135        false
1136    }
1137}
1138
1139impl fmt::Display for QuoteIdent {
1140    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1141        write!(f, "quote_ident")
1142    }
1143}