Skip to main content

mz_repr/
strconv.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Routines for converting datum values to and from their string
11//! representation.
12//!
13//! The functions in this module are tightly related to the variants of
14//! [`SqlScalarType`](crate::SqlScalarType). Each variant has a pair of functions in
15//! this module named `parse_VARIANT` and `format_VARIANT`. The type returned
16//! by `parse` functions, and the type accepted by `format` functions, will
17//! be a type that is easily converted into the [`Datum`](crate::Datum) variant
18//! for that type. The functions do not directly convert from `Datum`s to
19//! `String`s so that the logic can be reused when `Datum`s are not available or
20//! desired, as in the pgrepr crate.
21//!
22//! The string representations used are exactly the same as the PostgreSQL
23//! string representations for the corresponding PostgreSQL type. Deviations
24//! should be considered a bug.
25
26use std::borrow::Cow;
27use std::collections::BTreeMap;
28use std::error::Error;
29use std::fmt;
30use std::num::FpCategory;
31use std::str::FromStr;
32use std::sync::LazyLock;
33
34use chrono::offset::{Offset, TimeZone};
35use chrono::{DateTime, Datelike, Duration, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc};
36use dec::OrderedDecimal;
37use mz_lowertest::MzReflect;
38use mz_ore::cast::ReinterpretCast;
39use mz_ore::error::ErrorExt;
40use mz_ore::fmt::FormatBuffer;
41use mz_ore::lex::LexBuf;
42use mz_ore::str::StrExt;
43use mz_pgtz::timezone::{Timezone, TimezoneSpec};
44use mz_proto::{ProtoType, RustType, TryFromProtoError};
45use num_traits::Float as NumFloat;
46#[cfg(any(test, feature = "proptest"))]
47use proptest_derive::Arbitrary;
48use regex::bytes::Regex;
49use ryu::Float as RyuFloat;
50use serde::{Deserialize, Serialize};
51use uuid::Uuid;
52
53use crate::adt::array::ArrayDimension;
54use crate::adt::date::Date;
55use crate::adt::datetime::{self, DateTimeField, ParsedDateTime};
56use crate::adt::interval::Interval;
57use crate::adt::jsonb::{Jsonb, JsonbRef};
58use crate::adt::mz_acl_item::{AclItem, MzAclItem};
59use crate::adt::numeric::{self, NUMERIC_DATUM_MAX_PRECISION, Numeric};
60use crate::adt::pg_legacy_name::NAME_MAX_BYTES;
61use crate::adt::range::{Range, RangeBound, RangeInner};
62use crate::adt::timestamp::CheckedTimestamp;
63
64include!(concat!(env!("OUT_DIR"), "/mz_repr.strconv.rs"));
65
66macro_rules! bail {
67    ($($arg:tt)*) => { return Err(format!($($arg)*)) };
68}
69
70/// Yes should be provided for types that will *never* return true for [`ElementEscaper::needs_escaping`]
71#[derive(Debug)]
72pub enum Nestable {
73    Yes,
74    MayNeedEscaping,
75}
76
77/// Parses a [`bool`] from `s`.
78///
79/// The accepted values are "true", "false", "yes", "no", "on", "off", "1", and
80/// "0", or any unambiguous prefix of one of those values. Leading or trailing
81/// whitespace is permissible.
82pub fn parse_bool(s: &str) -> Result<bool, ParseError> {
83    match s.trim().to_lowercase().as_str() {
84        "t" | "tr" | "tru" | "true" | "y" | "ye" | "yes" | "on" | "1" => Ok(true),
85        "f" | "fa" | "fal" | "fals" | "false" | "n" | "no" | "of" | "off" | "0" => Ok(false),
86        _ => Err(ParseError::invalid_input_syntax("boolean", s)),
87    }
88}
89
90/// Like `format_bool`, but returns a string with a static lifetime.
91///
92/// This function should be preferred to `format_bool` when applicable, as it
93/// avoids an allocation.
94pub fn format_bool_static(b: bool) -> &'static str {
95    match b {
96        true => "t",
97        false => "f",
98    }
99}
100
101/// Writes a boolean value into `buf`.
102///
103/// `true` is encoded as the char `'t'` and `false` is encoded as the char
104/// `'f'`.
105pub fn format_bool<F>(buf: &mut F, b: bool) -> Nestable
106where
107    F: FormatBuffer,
108{
109    buf.write_str(format_bool_static(b));
110    Nestable::Yes
111}
112
113/// Parses an [`i16`] from `s`.
114///
115/// Valid values are whatever the [`std::str::FromStr`] implementation on `i16` accepts,
116/// plus leading and trailing whitespace.
117pub fn parse_int16(s: &str) -> Result<i16, ParseError> {
118    s.trim()
119        .parse()
120        .map_err(|e| ParseError::invalid_input_syntax("smallint", s).with_details(e))
121}
122
123/// Writes an [`i16`] to `buf`.
124pub fn format_int16<F>(buf: &mut F, i: i16) -> Nestable
125where
126    F: FormatBuffer,
127{
128    write!(buf, "{}", i);
129    Nestable::Yes
130}
131
132/// Parses an [`i32`] from `s`.
133///
134/// Valid values are whatever the [`std::str::FromStr`] implementation on `i32` accepts,
135/// plus leading and trailing whitespace.
136pub fn parse_int32(s: &str) -> Result<i32, ParseError> {
137    s.trim()
138        .parse()
139        .map_err(|e| ParseError::invalid_input_syntax("integer", s).with_details(e))
140}
141
142/// Writes an [`i32`] to `buf`.
143pub fn format_int32<F>(buf: &mut F, i: i32) -> Nestable
144where
145    F: FormatBuffer,
146{
147    write!(buf, "{}", i);
148    Nestable::Yes
149}
150
151/// Parses an `i64` from `s`.
152pub fn parse_int64(s: &str) -> Result<i64, ParseError> {
153    s.trim()
154        .parse()
155        .map_err(|e| ParseError::invalid_input_syntax("bigint", s).with_details(e))
156}
157
158/// Writes an `i64` to `buf`.
159pub fn format_int64<F>(buf: &mut F, i: i64) -> Nestable
160where
161    F: FormatBuffer,
162{
163    write!(buf, "{}", i);
164    Nestable::Yes
165}
166
167/// Parses an [`u16`] from `s`.
168///
169/// Valid values are whatever the [`std::str::FromStr`] implementation on `u16` accepts,
170/// plus leading and trailing whitespace.
171pub fn parse_uint16(s: &str) -> Result<u16, ParseError> {
172    s.trim()
173        .parse()
174        .map_err(|e| ParseError::invalid_input_syntax("uint2", s).with_details(e))
175}
176
177/// Writes an `u16` to `buf`.
178pub fn format_uint16<F>(buf: &mut F, u: u16) -> Nestable
179where
180    F: FormatBuffer,
181{
182    write!(buf, "{}", u);
183    Nestable::Yes
184}
185
186/// Parses an [`u32`] from `s`.
187///
188/// Valid values are whatever the [`std::str::FromStr`] implementation on `u32` accepts,
189/// plus leading and trailing whitespace.
190pub fn parse_uint32(s: &str) -> Result<u32, ParseError> {
191    s.trim()
192        .parse()
193        .map_err(|e| ParseError::invalid_input_syntax("uint4", s).with_details(e))
194}
195
196/// Writes an `u32` to `buf`.
197pub fn format_uint32<F>(buf: &mut F, u: u32) -> Nestable
198where
199    F: FormatBuffer,
200{
201    write!(buf, "{}", u);
202    Nestable::Yes
203}
204
205/// Parses an `u64` from `s`.
206pub fn parse_uint64(s: &str) -> Result<u64, ParseError> {
207    s.trim()
208        .parse()
209        .map_err(|e| ParseError::invalid_input_syntax("uint8", s).with_details(e))
210}
211
212/// Writes an `u64` to `buf`.
213pub fn format_uint64<F>(buf: &mut F, u: u64) -> Nestable
214where
215    F: FormatBuffer,
216{
217    write!(buf, "{}", u);
218    Nestable::Yes
219}
220
221/// Parses an `mz_timestamp` from `s`.
222pub fn parse_mz_timestamp(s: &str) -> Result<crate::Timestamp, ParseError> {
223    s.trim()
224        .parse()
225        .map_err(|e| ParseError::invalid_input_syntax("mz_timestamp", s).with_details(e))
226}
227
228/// Writes an `mz_timestamp` to `buf`.
229pub fn format_mz_timestamp<F>(buf: &mut F, u: crate::Timestamp) -> Nestable
230where
231    F: FormatBuffer,
232{
233    write!(buf, "{}", u);
234    Nestable::Yes
235}
236
237/// Parses an OID from `s`.
238pub fn parse_oid(s: &str) -> Result<u32, ParseError> {
239    // For historical reasons in PostgreSQL, OIDs are parsed as `i32`s and then
240    // reinterpreted as `u32`s.
241    //
242    // Do not use this as a model for behavior in other contexts. OIDs should
243    // not in general be thought of as freely convertible from `i32`s.
244    let oid: i32 = s
245        .trim()
246        .parse()
247        .map_err(|e| ParseError::invalid_input_syntax("oid", s).with_details(e))?;
248    Ok(u32::reinterpret_cast(oid))
249}
250
251fn parse_float<Fl>(type_name: &'static str, s: &str) -> Result<Fl, ParseError>
252where
253    Fl: NumFloat + FromStr,
254{
255    // Matching PostgreSQL's float parsing behavior is tricky. PostgreSQL's
256    // implementation delegates almost entirely to strtof(3)/strtod(3), which
257    // will report an out-of-range error if a number was rounded to zero or
258    // infinity. For example, parsing "1e70" as a 32-bit float will yield an
259    // out-of-range error because it is rounded to infinity, but parsing an
260    // explicitly-specified "inf" will yield infinity without an error.
261    //
262    // To @benesch's knowledge, there is no Rust implementation of float parsing
263    // that reports whether underflow or overflow occurred. So we figure it out
264    // ourselves after the fact. If parsing the float returns infinity and the input
265    // was not an explicitly-specified infinity, then we know overflow occurred.
266    // If parsing the float returns zero and the input was not an explicitly-specified
267    // zero, then we know underflow occurred.
268
269    // Matches `0`, `-0`, `+0`, `000000.00000`, `0.0e10`, 0., .0, et al.
270    static ZERO_RE: LazyLock<Regex> =
271        LazyLock::new(|| Regex::new(r#"(?i-u)^[-+]?(0+(\.0*)?|\.0+)(e|$)"#).unwrap());
272    // Matches `inf`, `-inf`, `+inf`, `infinity`, et al.
273    static INF_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new("(?i-u)^[-+]?inf").unwrap());
274
275    let buf = s.trim();
276    let f: Fl = buf
277        .parse()
278        .map_err(|_| ParseError::invalid_input_syntax(type_name, s))?;
279    match f.classify() {
280        FpCategory::Infinite if !INF_RE.is_match(buf.as_bytes()) => {
281            Err(ParseError::out_of_range(type_name, s))
282        }
283        FpCategory::Zero if !ZERO_RE.is_match(buf.as_bytes()) => {
284            Err(ParseError::out_of_range(type_name, s))
285        }
286        _ => Ok(f),
287    }
288}
289
290fn format_float<F, Fl>(buf: &mut F, f: Fl) -> Nestable
291where
292    F: FormatBuffer,
293    Fl: NumFloat + RyuFloat,
294{
295    // Use ryu rather than the standard library. ryu uses scientific notation
296    // when possible, which better matches PostgreSQL. The standard library's
297    // `ToString` implementations print all available digits, which is rather
298    // verbose.
299    //
300    // Note that we have to fix up ryu's formatting in a few cases to match
301    // PostgreSQL. PostgreSQL spells out "Infinity" in full, never emits a
302    // trailing ".0", formats positive exponents as e.g. "1e+10" rather than
303    // "1e10", and emits a negative sign for negative zero. If we need to speed
304    // up float formatting, we can look into forking ryu and making these edits
305    // directly, but for now it doesn't seem worth it.
306
307    match f.classify() {
308        FpCategory::Infinite if f.is_sign_negative() => buf.write_str("-Infinity"),
309        FpCategory::Infinite => buf.write_str("Infinity"),
310        FpCategory::Nan => buf.write_str("NaN"),
311        FpCategory::Zero if f.is_sign_negative() => buf.write_str("-0"),
312        _ => {
313            debug_assert!(f.is_finite());
314            let mut ryu_buf = ryu::Buffer::new();
315            let mut s = ryu_buf.format_finite(f);
316            if let Some(trimmed) = s.strip_suffix(".0") {
317                s = trimmed;
318            }
319            let mut chars = s.chars().peekable();
320            while let Some(ch) = chars.next() {
321                buf.write_char(ch);
322                if ch == 'e' && chars.peek() != Some(&'-') {
323                    buf.write_char('+');
324                }
325            }
326        }
327    }
328
329    Nestable::Yes
330}
331
332/// Parses an `f32` from `s`.
333pub fn parse_float32(s: &str) -> Result<f32, ParseError> {
334    parse_float("real", s)
335}
336
337/// Writes an `f32` to `buf`.
338pub fn format_float32<F>(buf: &mut F, f: f32) -> Nestable
339where
340    F: FormatBuffer,
341{
342    format_float(buf, f)
343}
344
345/// Parses an `f64` from `s`.
346pub fn parse_float64(s: &str) -> Result<f64, ParseError> {
347    parse_float("double precision", s)
348}
349
350/// Writes an `f64` to `buf`.
351pub fn format_float64<F>(buf: &mut F, f: f64) -> Nestable
352where
353    F: FormatBuffer,
354{
355    format_float(buf, f)
356}
357
358/// Use the following grammar to parse `s` into:
359///
360/// - `NaiveDate`
361/// - `NaiveTime`
362/// - Timezone string
363///
364/// `NaiveDate` and `NaiveTime` are appropriate to compute a `NaiveDateTime`,
365/// which can be used in conjunction with a timezone string to generate a
366/// `DateTime<Utc>`.
367///
368/// ```text
369/// <unquoted timestamp string> ::=
370///     <date value> <space> <time value> [ <time zone interval> ]
371/// <date value> ::=
372///     <years value> <minus sign> <months value> <minus sign> <days value>
373/// <time zone interval> ::=
374///     <sign> <hours value> <colon> <minutes value>
375/// ```
376fn parse_timestamp_string(s: &str) -> Result<(NaiveDate, NaiveTime, Timezone), String> {
377    if s.is_empty() {
378        return Err("timestamp string is empty".into());
379    }
380
381    // PostgreSQL special date-time inputs
382    // https://www.postgresql.org/docs/12/datatype-datetime.html#id-1.5.7.13.18.8
383    // We should add support for other values here, e.g. infinity
384    // which @quodlibetor is willing to add to the chrono package.
385    if s == "epoch" {
386        return Ok((
387            NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(),
388            NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
389            Default::default(),
390        ));
391    }
392
393    let (ts_string, tz_string, era) = datetime::split_timestamp_string(s);
394
395    let pdt = ParsedDateTime::build_parsed_datetime_timestamp(ts_string, era)?;
396    let d: NaiveDate = pdt.compute_date()?;
397    let t: NaiveTime = pdt.compute_time()?;
398
399    let offset = if tz_string.is_empty() {
400        Default::default()
401    } else {
402        Timezone::parse(tz_string, TimezoneSpec::Iso)?
403    };
404
405    Ok((d, t, offset))
406}
407
408/// Parses a [`Date`] from `s`.
409pub fn parse_date(s: &str) -> Result<Date, ParseError> {
410    match parse_timestamp_string(s) {
411        Ok((date, _, _)) => Date::try_from(date).map_err(|_| ParseError::out_of_range("date", s)),
412        Err(e) => Err(ParseError::invalid_input_syntax("date", s).with_details(e)),
413    }
414}
415
416/// Writes a [`Date`] to `buf`.
417pub fn format_date<F>(buf: &mut F, d: Date) -> Nestable
418where
419    F: FormatBuffer,
420{
421    let d: NaiveDate = d.into();
422    let (year_ad, year) = d.year_ce();
423    write!(buf, "{:04}-{}", year, d.format("%m-%d"));
424    if !year_ad {
425        write!(buf, " BC");
426    }
427    Nestable::Yes
428}
429
430/// Parses a `NaiveTime` from `s`, using the following grammar.
431///
432/// ```text
433/// <time value> ::=
434///     <hours value> <colon> <minutes value> <colon> <seconds integer value>
435///     [ <period> [ <seconds fraction> ] ]
436/// ```
437pub fn parse_time(s: &str) -> Result<NaiveTime, ParseError> {
438    ParsedDateTime::build_parsed_datetime_time(s)
439        .and_then(|pdt| pdt.compute_time())
440        .map_err(|e| ParseError::invalid_input_syntax("time", s).with_details(e))
441}
442
443/// Writes a [`NaiveDateTime`] timestamp to `buf`.
444pub fn format_time<F>(buf: &mut F, t: NaiveTime) -> Nestable
445where
446    F: FormatBuffer,
447{
448    write!(buf, "{}", t.format("%H:%M:%S"));
449    format_nanos_to_micros(buf, t.nanosecond());
450    Nestable::Yes
451}
452
453/// Parses a `NaiveDateTime` from `s`.
454pub fn parse_timestamp(s: &str) -> Result<CheckedTimestamp<NaiveDateTime>, ParseError> {
455    match parse_timestamp_string(s) {
456        Ok((date, time, _)) => CheckedTimestamp::from_timestamplike(date.and_time(time))
457            .map_err(|_| ParseError::out_of_range("timestamp", s)),
458        Err(e) => Err(ParseError::invalid_input_syntax("timestamp", s).with_details(e)),
459    }
460}
461
462/// Writes a [`NaiveDateTime`] timestamp to `buf`.
463pub fn format_timestamp<F>(buf: &mut F, ts: &NaiveDateTime) -> Nestable
464where
465    F: FormatBuffer,
466{
467    let (year_ad, year) = ts.year_ce();
468    write!(buf, "{:04}-{}", year, ts.format("%m-%d %H:%M:%S"));
469    format_nanos_to_micros(buf, ts.and_utc().timestamp_subsec_nanos());
470    if !year_ad {
471        write!(buf, " BC");
472    }
473    // This always needs escaping because of the whitespace
474    Nestable::MayNeedEscaping
475}
476
477/// Parses a `DateTime<Utc>` from `s`. See `mz_expr::scalar::func::timezone_timestamp` for timezone anomaly considerations.
478pub fn parse_timestamptz(s: &str) -> Result<CheckedTimestamp<DateTime<Utc>>, ParseError> {
479    parse_timestamp_string(s)
480        .and_then(|(date, time, timezone)| {
481            use Timezone::*;
482            let mut dt = date.and_time(time);
483            let offset = match timezone {
484                FixedOffset(offset) => offset,
485                Tz(tz) => match tz.offset_from_local_datetime(&dt).latest() {
486                    Some(offset) => offset.fix(),
487                    None => {
488                        dt += Duration::try_hours(1).unwrap();
489                        tz.offset_from_local_datetime(&dt)
490                            .latest()
491                            .ok_or_else(|| "invalid timezone conversion".to_owned())?
492                            .fix()
493                    }
494                },
495            };
496            Ok(DateTime::from_naive_utc_and_offset(dt - offset, Utc))
497        })
498        .map_err(|e| {
499            ParseError::invalid_input_syntax("timestamp with time zone", s).with_details(e)
500        })
501        .and_then(|ts| {
502            CheckedTimestamp::from_timestamplike(ts)
503                .map_err(|_| ParseError::out_of_range("timestamp with time zone", s))
504        })
505}
506
507/// Writes a [`DateTime<Utc>`] timestamp to `buf`.
508pub fn format_timestamptz<F>(buf: &mut F, ts: &DateTime<Utc>) -> Nestable
509where
510    F: FormatBuffer,
511{
512    let (year_ad, year) = ts.year_ce();
513    write!(buf, "{:04}-{}", year, ts.format("%m-%d %H:%M:%S"));
514    format_nanos_to_micros(buf, ts.timestamp_subsec_nanos());
515    write!(buf, "+00");
516    if !year_ad {
517        write!(buf, " BC");
518    }
519    // This always needs escaping because of the whitespace
520    Nestable::MayNeedEscaping
521}
522
523/// parse
524///
525/// ```text
526/// <unquoted interval string> ::=
527///   [ <sign> ] { <year-month literal> | <day-time literal> }
528/// <year-month literal> ::=
529///     <years value> [ <minus sign> <months value> ]
530///   | <months value>
531/// <day-time literal> ::=
532///     <day-time interval>
533///   | <time interval>
534/// <day-time interval> ::=
535///   <days value> [ <space> <hours value> [ <colon> <minutes value>
536///       [ <colon> <seconds value> ] ] ]
537/// <time interval> ::=
538///     <hours value> [ <colon> <minutes value> [ <colon> <seconds value> ] ]
539///   | <minutes value> [ <colon> <seconds value> ]
540///   | <seconds value>
541/// ```
542pub fn parse_interval(s: &str) -> Result<Interval, ParseError> {
543    parse_interval_w_disambiguator(s, None, DateTimeField::Second)
544}
545
546/// Parse an interval string, using an optional leading precision for time (H:M:S)
547/// and a specific mz_sql_parser::ast::DateTimeField to identify ambiguous elements.
548/// For more information about this operation, see the documentation on
549/// ParsedDateTime::build_parsed_datetime_interval.
550pub fn parse_interval_w_disambiguator(
551    s: &str,
552    leading_time_precision: Option<DateTimeField>,
553    d: DateTimeField,
554) -> Result<Interval, ParseError> {
555    ParsedDateTime::build_parsed_datetime_interval(s, leading_time_precision, d)
556        .and_then(|pdt| pdt.compute_interval())
557        .map_err(|e| ParseError::invalid_input_syntax("interval", s).with_details(e))
558}
559
560pub fn format_interval<F>(buf: &mut F, iv: Interval) -> Nestable
561where
562    F: FormatBuffer,
563{
564    write!(buf, "{}", iv);
565    Nestable::MayNeedEscaping
566}
567
568pub fn parse_numeric(s: &str) -> Result<OrderedDecimal<Numeric>, ParseError> {
569    let mut cx = numeric::cx_datum();
570    let mut n = match cx.parse(s.trim()) {
571        Ok(n) => n,
572        Err(..) => {
573            return Err(ParseError::invalid_input_syntax("numeric", s));
574        }
575    };
576
577    let cx_status = cx.status();
578
579    // Check for values that can only be generated by invalid syntax.
580    if (n.is_infinite() && !cx_status.overflow())
581        || (n.is_nan() && n.is_negative())
582        || n.is_signaling_nan()
583    {
584        return Err(ParseError::invalid_input_syntax("numeric", s));
585    }
586
587    // Process value; only errors if value is out of range of numeric's max precision.
588    let out_of_range = numeric::munge_numeric(&mut n).is_err();
589
590    if cx_status.overflow() || cx_status.subnormal() || out_of_range {
591        Err(ParseError::out_of_range("numeric", s).with_details(format!(
592            "exceeds maximum precision {}",
593            NUMERIC_DATUM_MAX_PRECISION
594        )))
595    } else {
596        Ok(OrderedDecimal(n))
597    }
598}
599
600pub fn format_numeric<F>(buf: &mut F, n: &OrderedDecimal<Numeric>) -> Nestable
601where
602    F: FormatBuffer,
603{
604    write!(buf, "{}", n.0.to_standard_notation_string());
605    Nestable::Yes
606}
607
608pub fn format_string<F>(buf: &mut F, s: &str) -> Nestable
609where
610    F: FormatBuffer,
611{
612    buf.write_str(s);
613    Nestable::MayNeedEscaping
614}
615
616pub fn parse_pg_legacy_name(s: &str) -> String {
617    // To match PostgreSQL, we truncate the string to 64 bytes, while being
618    // careful not to truncate in the middle of any multibyte characters.
619    let mut out = String::new();
620    let mut len = 0;
621    for c in s.chars() {
622        len += c.len_utf8();
623        if len > NAME_MAX_BYTES {
624            break;
625        }
626        out.push(c);
627    }
628    out
629}
630
631pub fn parse_bytes(s: &str) -> Result<Vec<u8>, ParseError> {
632    // If the input starts with "\x", then the remaining bytes are hex encoded
633    // [0]. Otherwise the bytes use the traditional "escape" format. [1]
634    //
635    // [0]: https://www.postgresql.org/docs/current/datatype-binary.html#id-1.5.7.12.9
636    // [1]: https://www.postgresql.org/docs/current/datatype-binary.html#id-1.5.7.12.10
637    if let Some(remainder) = s.strip_prefix(r"\x") {
638        parse_bytes_hex(remainder).map_err(|e| {
639            ParseError::invalid_input_syntax("bytea", s).with_details(e.to_string_with_causes())
640        })
641    } else {
642        parse_bytes_traditional(s)
643    }
644}
645
646pub fn parse_bytes_hex(s: &str) -> Result<Vec<u8>, ParseHexError> {
647    // Can't use `hex::decode` here, as it doesn't tolerate whitespace
648    // between encoded bytes.
649
650    let decode_nibble = |b| match b {
651        b'a'..=b'f' => Ok(b - b'a' + 10),
652        b'A'..=b'F' => Ok(b - b'A' + 10),
653        b'0'..=b'9' => Ok(b - b'0'),
654        _ => Err(ParseHexError::InvalidHexDigit(char::from(b))),
655    };
656
657    let mut buf = vec![];
658    let mut nibbles = s.as_bytes().iter().copied();
659    while let Some(n) = nibbles.next() {
660        if let b' ' | b'\n' | b'\t' | b'\r' = n {
661            continue;
662        }
663        let n = decode_nibble(n)?;
664        let n2 = match nibbles.next() {
665            None => return Err(ParseHexError::OddLength),
666            Some(n2) => decode_nibble(n2)?,
667        };
668        buf.push((n << 4) | n2);
669    }
670    Ok(buf)
671}
672
673pub fn parse_bytes_traditional(s: &str) -> Result<Vec<u8>, ParseError> {
674    // Bytes are interpreted literally, save for the special escape sequences
675    // "\\", which represents a single backslash, and "\NNN", where each N
676    // is an octal digit, which represents the byte whose octal value is NNN.
677    let mut out = Vec::with_capacity(s.len());
678    let mut bytes = s.as_bytes().iter().fuse();
679    while let Some(&b) = bytes.next() {
680        if b != b'\\' {
681            out.push(b);
682            continue;
683        }
684        match bytes.next() {
685            None => {
686                return Err(ParseError::invalid_input_syntax("bytea", s)
687                    .with_details("ends with escape character"));
688            }
689            Some(b'\\') => out.push(b'\\'),
690            b => match (b, bytes.next(), bytes.next()) {
691                (Some(d2 @ b'0'..=b'3'), Some(d1 @ b'0'..=b'7'), Some(d0 @ b'0'..=b'7')) => {
692                    out.push(((d2 - b'0') << 6) + ((d1 - b'0') << 3) + (d0 - b'0'));
693                }
694                _ => {
695                    return Err(ParseError::invalid_input_syntax("bytea", s)
696                        .with_details("invalid escape sequence"));
697                }
698            },
699        }
700    }
701    Ok(out)
702}
703
704pub fn format_bytes<F>(buf: &mut F, bytes: &[u8]) -> Nestable
705where
706    F: FormatBuffer,
707{
708    write!(buf, "\\x{}", hex::encode(bytes));
709    Nestable::MayNeedEscaping
710}
711
712pub fn parse_jsonb(s: &str) -> Result<Jsonb, ParseError> {
713    s.trim()
714        .parse()
715        .map_err(|e| ParseError::invalid_input_syntax("jsonb", s).with_details(e))
716}
717
718pub fn format_jsonb<F>(buf: &mut F, jsonb: JsonbRef) -> Nestable
719where
720    F: FormatBuffer,
721{
722    write!(buf, "{}", jsonb);
723    Nestable::MayNeedEscaping
724}
725
726pub fn format_jsonb_pretty<F>(buf: &mut F, jsonb: JsonbRef)
727where
728    F: FormatBuffer,
729{
730    write!(buf, "{:#}", jsonb)
731}
732
733pub fn parse_uuid(s: &str) -> Result<Uuid, ParseError> {
734    s.trim()
735        .parse()
736        .map_err(|e| ParseError::invalid_input_syntax("uuid", s).with_details(e))
737}
738
739pub fn format_uuid<F>(buf: &mut F, uuid: Uuid) -> Nestable
740where
741    F: FormatBuffer,
742{
743    write!(buf, "{}", uuid);
744    Nestable::Yes
745}
746
747fn format_nanos_to_micros<F>(buf: &mut F, nanos: u32)
748where
749    F: FormatBuffer,
750{
751    if nanos >= 500 {
752        let mut micros = nanos / 1000;
753        let rem = nanos % 1000;
754        if rem >= 500 {
755            micros += 1;
756        }
757        // strip trailing zeros
758        let mut width = 6;
759        while micros % 10 == 0 {
760            width -= 1;
761            micros /= 10;
762        }
763        write!(buf, ".{:0width$}", micros, width = width);
764    }
765}
766
767#[derive(Debug, thiserror::Error)]
768enum ArrayParsingError {
769    #[error("Array value must start with \"{{\"")]
770    OpeningBraceMissing,
771    #[error("Specifying array lower bounds is not supported")]
772    DimsUnsupported,
773    #[error("{0}")]
774    Generic(String),
775    #[error("Unexpected \"{0}\" character.")]
776    UnexpectedChar(char),
777    #[error("Multidimensional arrays must have sub-arrays with matching dimensions.")]
778    NonRectilinearDims,
779    #[error("Unexpected array element.")]
780    UnexpectedElement,
781    #[error("Junk after closing right brace.")]
782    Junk,
783    #[error("Unexpected end of input.")]
784    EarlyTerm,
785}
786
787impl From<String> for ArrayParsingError {
788    fn from(value: String) -> Self {
789        ArrayParsingError::Generic(value)
790    }
791}
792
793pub fn parse_array<'a, T, E>(
794    s: &'a str,
795    make_null: impl FnMut() -> T,
796    gen_elem: impl FnMut(Cow<'a, str>) -> Result<T, E>,
797) -> Result<(Vec<T>, Vec<ArrayDimension>), ParseError>
798where
799    E: ToString,
800{
801    parse_array_inner(s, make_null, gen_elem)
802        .map_err(|details| ParseError::invalid_input_syntax("array", s).with_details(details))
803}
804
805fn parse_array_inner<'a, T, E>(
806    s: &'a str,
807    mut make_null: impl FnMut() -> T,
808    mut gen_elem: impl FnMut(Cow<'a, str>) -> Result<T, E>,
809) -> Result<(Vec<T>, Vec<ArrayDimension>), ArrayParsingError>
810where
811    E: ToString,
812{
813    use ArrayParsingError::*;
814
815    #[derive(Clone, Debug, Default)]
816    struct Dimension {
817        // If None, still discovering this dimension's permitted width;
818        // otherwise only permits `length` elements per dimension.
819        length: Option<usize>,
820        // Whether this dimension has a staged element that can be committed.
821        // This prevents us from accepting "empty" elements, e.g. `{1,}` or
822        // `{1,,2}`.
823        staged_element: bool,
824        // The total number of elements committed in this dimension since it was
825        // last entered. Zeroed out when exited.
826        committed_element_count: usize,
827    }
828
829    #[derive(Clone, Debug, Default)]
830    struct ArrayBuilder<'a> {
831        // The current character we're operating from.
832        current_command_char: char,
833        // The dimension information, which will get turned into
834        // `ArrayDimensions`.
835        dimensions: Vec<Dimension>,
836        // THe current dimension we're operating on.
837        current_dim: usize,
838        // Whether or not this array may be modified any further.
839        sealed: bool,
840        // The elements extracted from the input str. This is on the array
841        // builder to necessitate using `insert_element` so we understand when
842        // elements are staged.
843        elements: Vec<Option<Cow<'a, str>>>,
844    }
845
846    impl<'a> ArrayBuilder<'a> {
847        fn build(
848            s: &'a str,
849        ) -> Result<(Vec<Option<Cow<'a, str>>>, Vec<ArrayDimension>), ArrayParsingError> {
850            let buf = &mut LexBuf::new(s);
851
852            // TODO: support parsing array dimensions
853            if buf.consume('[') {
854                Err(DimsUnsupported)?;
855            }
856
857            buf.take_while(|ch| ch.is_ascii_whitespace());
858
859            if !buf.consume('{') {
860                Err(OpeningBraceMissing)?;
861            }
862
863            let mut dimensions = 1;
864
865            loop {
866                buf.take_while(|ch| ch.is_ascii_whitespace());
867                if buf.consume('{') {
868                    dimensions += 1;
869                } else {
870                    break;
871                }
872            }
873
874            let mut builder = ArrayBuilder {
875                current_command_char: '{',
876                dimensions: vec![Dimension::default(); dimensions],
877                // We enter the builder at the element-bearing dimension, which is the last
878                // dimension.
879                current_dim: dimensions - 1,
880                sealed: false,
881                elements: vec![],
882            };
883
884            let is_special_char = |c| matches!(c, '{' | '}' | ',' | '\\' | '"');
885            let is_end_of_literal = |c| matches!(c, ',' | '}');
886
887            loop {
888                buf.take_while(|ch| ch.is_ascii_whitespace());
889
890                // Filter command state from terminal states.
891                match buf.next() {
892                    None if builder.sealed => {
893                        break;
894                    }
895                    None => Err(EarlyTerm)?,
896                    Some(_) if builder.sealed => Err(Junk)?,
897                    Some(c) => builder.current_command_char = c,
898                }
899
900                // Run command char
901                match builder.current_command_char {
902                    '{' => builder.enter_dim()?,
903                    '}' => builder.exit_dim()?,
904                    ',' => builder.commit_element(true)?,
905                    c => {
906                        buf.prev();
907                        let s = match c {
908                            '"' => Some(lex_quoted_element(buf)?),
909                            _ => lex_unquoted_element(buf, is_special_char, is_end_of_literal)?,
910                        };
911                        builder.insert_element(s)?;
912                    }
913                }
914            }
915
916            if builder.elements.is_empty() {
917                // Per PG, empty arrays are represented by empty dimensions
918                // rather than one dimension with 0 length.
919                return Ok((vec![], vec![]));
920            }
921
922            let dims = builder
923                .dimensions
924                .into_iter()
925                .map(|dim| ArrayDimension {
926                    length: dim
927                        .length
928                        .expect("every dimension must have its length discovered"),
929                    lower_bound: 1,
930                })
931                .collect();
932
933            Ok((builder.elements, dims))
934        }
935
936        /// Descend into another dimension of the array.
937        fn enter_dim(&mut self) -> Result<(), ArrayParsingError> {
938            let d = &mut self.dimensions[self.current_dim];
939            // Cannot enter a new dimension with an uncommitted element.
940            if d.staged_element {
941                return Err(UnexpectedChar(self.current_command_char));
942            }
943
944            self.current_dim += 1;
945
946            // You have exceeded the maximum dimensions.
947            if self.current_dim >= self.dimensions.len() {
948                return Err(NonRectilinearDims);
949            }
950
951            Ok(())
952        }
953
954        /// Insert a new element into the array, ensuring it is in the proper dimension.
955        fn insert_element(&mut self, s: Option<Cow<'a, str>>) -> Result<(), ArrayParsingError> {
956            // Can only insert elements into data-bearing dimension, which is
957            // the last one.
958            if self.current_dim != self.dimensions.len() - 1 {
959                return Err(UnexpectedElement);
960            }
961
962            self.stage_element()?;
963
964            self.elements.push(s);
965
966            Ok(())
967        }
968
969        /// Stage an element to be committed. Only one element can be staged at
970        /// a time and staged elements must be committed before moving onto the
971        /// next element or leaving the dimension.
972        fn stage_element(&mut self) -> Result<(), ArrayParsingError> {
973            let d = &mut self.dimensions[self.current_dim];
974            // Cannot stage two elements at once, i.e. previous element wasn't
975            // followed by committing token (`,` or `}`).
976            if d.staged_element {
977                return Err(UnexpectedElement);
978            }
979            d.staged_element = true;
980            Ok(())
981        }
982
983        /// Commit the currently staged element, which can be made optional.
984        /// This ensures that each element has an appropriate terminal character
985        /// after it.
986        fn commit_element(&mut self, require_staged: bool) -> Result<(), ArrayParsingError> {
987            let d = &mut self.dimensions[self.current_dim];
988            if !d.staged_element {
989                // - , requires a preceding staged element
990                // - } does not require a preceding staged element only when
991                //   it's the close of an empty dimension.
992                return if require_staged || d.committed_element_count > 0 {
993                    Err(UnexpectedChar(self.current_command_char))
994                } else {
995                    // This indicates that we have an empty value in this
996                    // dimension and want to exit before incrementing the
997                    // committed element count.
998                    Ok(())
999                };
1000            }
1001            d.staged_element = false;
1002            d.committed_element_count += 1;
1003
1004            Ok(())
1005        }
1006
1007        /// Exit the current dimension, committing any currently staged element
1008        /// in this dimension, and marking the interior array that this is part
1009        /// of as staged itself. If this is the 0th dimension, i.e. the closed
1010        /// brace matching the first open brace, seal the builder from further
1011        /// modification.
1012        fn exit_dim(&mut self) -> Result<(), ArrayParsingError> {
1013            // Commit an element of this dimension
1014            self.commit_element(false)?;
1015
1016            let d = &mut self.dimensions[self.current_dim];
1017
1018            // Ensure that the elements in this dimension conform to the expected shape.
1019            match d.length {
1020                None => d.length = Some(d.committed_element_count),
1021                Some(l) => {
1022                    if l != d.committed_element_count {
1023                        return Err(NonRectilinearDims);
1024                    }
1025                }
1026            }
1027
1028            // Reset this dimension's counter in case it's re-entered.
1029            d.committed_element_count = 0;
1030
1031            // If we closed the last dimension, this array may not be modified
1032            // any longer.
1033            if self.current_dim == 0 {
1034                self.sealed = true;
1035            } else {
1036                self.current_dim -= 1;
1037                // This object is an element of a higher dimension.
1038                self.stage_element()?;
1039            }
1040
1041            Ok(())
1042        }
1043    }
1044
1045    let (raw_elems, dims) = ArrayBuilder::build(s)?;
1046
1047    let mut elems = Vec::with_capacity(raw_elems.len());
1048
1049    let mut generated = |elem| gen_elem(elem).map_err(|e| e.to_string());
1050
1051    for elem in raw_elems.into_iter() {
1052        elems.push(match elem {
1053            Some(elem) => generated(elem)?,
1054            None => make_null(),
1055        });
1056    }
1057
1058    Ok((elems, dims))
1059}
1060
1061pub fn parse_list<'a, T, E>(
1062    s: &'a str,
1063    is_element_type_list: bool,
1064    make_null: impl FnMut() -> T,
1065    gen_elem: impl FnMut(Cow<'a, str>) -> Result<T, E>,
1066) -> Result<Vec<T>, ParseError>
1067where
1068    E: ToString,
1069{
1070    parse_list_inner(s, is_element_type_list, make_null, gen_elem)
1071        .map_err(|details| ParseError::invalid_input_syntax("list", s).with_details(details))
1072}
1073
1074// `parse_list_inner`'s separation from `parse_list` simplifies error handling
1075// by allowing subprocedures to return `String` errors.
1076fn parse_list_inner<'a, T, E>(
1077    s: &'a str,
1078    is_element_type_list: bool,
1079    mut make_null: impl FnMut() -> T,
1080    mut gen_elem: impl FnMut(Cow<'a, str>) -> Result<T, E>,
1081) -> Result<Vec<T>, String>
1082where
1083    E: ToString,
1084{
1085    let mut elems = vec![];
1086    let buf = &mut LexBuf::new(s);
1087
1088    // Consume opening paren.
1089    if !buf.consume('{') {
1090        bail!(
1091            "expected '{{', found {}",
1092            match buf.next() {
1093                Some(c) => format!("{}", c),
1094                None => "empty string".to_string(),
1095            }
1096        )
1097    }
1098
1099    // Simplifies calls to `gen_elem` by handling errors
1100    let mut generated = |elem| gen_elem(elem).map_err(|e| e.to_string());
1101    let is_special_char = |c| matches!(c, '{' | '}' | ',' | '\\' | '"');
1102    let is_end_of_literal = |c| matches!(c, ',' | '}');
1103
1104    // Consume elements.
1105    loop {
1106        buf.take_while(|ch| ch.is_ascii_whitespace());
1107        // Check for terminals.
1108        match buf.next() {
1109            Some('}') => {
1110                break;
1111            }
1112            _ if elems.len() == 0 => {
1113                buf.prev();
1114            }
1115            Some(',') => {}
1116            Some(c) => bail!("expected ',' or '}}', got '{}'", c),
1117            None => bail!("unexpected end of input"),
1118        }
1119
1120        buf.take_while(|ch| ch.is_ascii_whitespace());
1121        // Get elements.
1122        let elem = match buf.peek() {
1123            Some('"') => generated(lex_quoted_element(buf)?)?,
1124            Some('{') => {
1125                if !is_element_type_list {
1126                    bail!(
1127                        "unescaped '{{' at beginning of element; perhaps you \
1128                        want a nested list, e.g. '{{a}}'::text list list"
1129                    )
1130                }
1131                generated(lex_embedded_element(buf)?)?
1132            }
1133            Some(_) => match lex_unquoted_element(buf, is_special_char, is_end_of_literal)? {
1134                Some(elem) => generated(elem)?,
1135                None => make_null(),
1136            },
1137            None => bail!("unexpected end of input"),
1138        };
1139        elems.push(elem);
1140    }
1141
1142    buf.take_while(|ch| ch.is_ascii_whitespace());
1143    if let Some(c) = buf.next() {
1144        bail!(
1145            "malformed array literal; contains '{}' after terminal '}}'",
1146            c
1147        )
1148    }
1149
1150    Ok(elems)
1151}
1152
1153pub fn parse_legacy_vector<'a, T, E>(
1154    s: &'a str,
1155    gen_elem: impl FnMut(Cow<'a, str>) -> Result<T, E>,
1156) -> Result<Vec<T>, ParseError>
1157where
1158    E: ToString,
1159{
1160    parse_legacy_vector_inner(s, gen_elem)
1161        .map_err(|details| ParseError::invalid_input_syntax("int2vector", s).with_details(details))
1162}
1163
1164/// Parses PostgreSQL's legacy whitespace-separated vector syntax (used in
1165/// Materialize for `int2vector`). Unlike [`parse_array`], this grammar has
1166/// no token for `NULL`, which is why `int2vector` cannot represent `NULL`
1167/// elements. See [`crate::scalar::Int2Vector`].
1168pub fn parse_legacy_vector_inner<'a, T, E>(
1169    s: &'a str,
1170    mut gen_elem: impl FnMut(Cow<'a, str>) -> Result<T, E>,
1171) -> Result<Vec<T>, String>
1172where
1173    E: ToString,
1174{
1175    let mut elems = vec![];
1176    let buf = &mut LexBuf::new(s);
1177
1178    let mut generated = |elem| gen_elem(elem).map_err(|e| e.to_string());
1179
1180    loop {
1181        buf.take_while(|ch| ch.is_ascii_whitespace());
1182        match buf.peek() {
1183            Some(_) => {
1184                let elem = buf.take_while(|ch| !ch.is_ascii_whitespace());
1185                elems.push(generated(elem.into())?);
1186            }
1187            None => break,
1188        }
1189    }
1190
1191    Ok(elems)
1192}
1193
1194fn lex_quoted_element<'a>(buf: &mut LexBuf<'a>) -> Result<Cow<'a, str>, String> {
1195    assert!(buf.consume('"'));
1196    let s = buf.take_while(|ch| !matches!(ch, '"' | '\\'));
1197
1198    // `Cow::Borrowed` optimization for quoted strings without escapes
1199    if let Some('"') = buf.peek() {
1200        buf.next();
1201        return Ok(s.into());
1202    }
1203
1204    let mut s = s.to_string();
1205    loop {
1206        match buf.next() {
1207            Some('\\') => match buf.next() {
1208                Some(c) => s.push(c),
1209                None => bail!("unterminated quoted string"),
1210            },
1211            Some('"') => break,
1212            Some(c) => s.push(c),
1213            None => bail!("unterminated quoted string"),
1214        }
1215    }
1216    Ok(s.into())
1217}
1218
1219fn lex_embedded_element<'a>(buf: &mut LexBuf<'a>) -> Result<Cow<'a, str>, String> {
1220    let pos = buf.pos();
1221    assert!(matches!(buf.next(), Some('{')));
1222    let mut depth = 1;
1223    let mut in_escape = false;
1224    while depth > 0 {
1225        match buf.next() {
1226            Some('\\') => {
1227                buf.next(); // Next character is escaped, so ignore it
1228            }
1229            Some('"') => in_escape = !in_escape, // Begin or end escape
1230            Some('{') if !in_escape => depth += 1,
1231            Some('}') if !in_escape => depth -= 1,
1232            Some(_) => (),
1233            None => bail!("unterminated embedded element"),
1234        }
1235    }
1236    let s = &buf.inner()[pos..buf.pos()];
1237    Ok(Cow::Borrowed(s))
1238}
1239
1240// Result of `None` indicates element is NULL.
1241fn lex_unquoted_element<'a>(
1242    buf: &mut LexBuf<'a>,
1243    is_special_char: impl Fn(char) -> bool,
1244    is_end_of_literal: impl Fn(char) -> bool,
1245) -> Result<Option<Cow<'a, str>>, String> {
1246    // first char is guaranteed to be non-whitespace
1247    assert!(!buf.peek().unwrap().is_ascii_whitespace());
1248
1249    let s = buf.take_while(|ch| !is_special_char(ch) && !ch.is_ascii_whitespace());
1250
1251    // `Cow::Borrowed` optimization for elements without special characters.
1252    match buf.peek() {
1253        Some(',') | Some('}') if !s.is_empty() => {
1254            return Ok(if s.to_uppercase() == "NULL" {
1255                None
1256            } else {
1257                Some(s.into())
1258            });
1259        }
1260        _ => {}
1261    }
1262
1263    // Track whether there are any escaped characters to determine if the string
1264    // "NULL" should be treated as a NULL, or if it had any escaped characters
1265    // and should be treated as the string "NULL".
1266    let mut escaped_char = false;
1267
1268    let mut s = s.to_string();
1269    // As we go, we keep track of where to truncate to in order to remove any
1270    // trailing whitespace.
1271    let mut trimmed_len = s.len();
1272    loop {
1273        match buf.next() {
1274            Some('\\') => match buf.next() {
1275                Some(c) => {
1276                    escaped_char = true;
1277                    s.push(c);
1278                    trimmed_len = s.len();
1279                }
1280                None => return Err("unterminated element".into()),
1281            },
1282            Some(c) if is_end_of_literal(c) => {
1283                // End of literal characters as the first character indicates
1284                // a missing element definition.
1285                if s.is_empty() {
1286                    bail!("malformed literal; missing element")
1287                }
1288                buf.prev();
1289                break;
1290            }
1291            Some(c) if is_special_char(c) => {
1292                bail!("malformed literal; must escape special character '{}'", c)
1293            }
1294            Some(c) => {
1295                s.push(c);
1296                if !c.is_ascii_whitespace() {
1297                    trimmed_len = s.len();
1298                }
1299            }
1300            None => bail!("unterminated element"),
1301        }
1302    }
1303    s.truncate(trimmed_len);
1304    Ok(if s.to_uppercase() == "NULL" && !escaped_char {
1305        None
1306    } else {
1307        Some(Cow::Owned(s))
1308    })
1309}
1310
1311pub fn parse_map<'a, V, E>(
1312    s: &'a str,
1313    is_value_type_map: bool,
1314    gen_elem: impl FnMut(Option<Cow<'a, str>>) -> Result<V, E>,
1315) -> Result<BTreeMap<String, V>, ParseError>
1316where
1317    E: ToString,
1318{
1319    parse_map_inner(s, is_value_type_map, gen_elem)
1320        .map_err(|details| ParseError::invalid_input_syntax("map", s).with_details(details))
1321}
1322
1323fn parse_map_inner<'a, V, E>(
1324    s: &'a str,
1325    is_value_type_map: bool,
1326    mut gen_elem: impl FnMut(Option<Cow<'a, str>>) -> Result<V, E>,
1327) -> Result<BTreeMap<String, V>, String>
1328where
1329    E: ToString,
1330{
1331    let mut map = BTreeMap::new();
1332    let buf = &mut LexBuf::new(s);
1333
1334    // Consume opening paren.
1335    if !buf.consume('{') {
1336        bail!(
1337            "expected '{{', found {}",
1338            match buf.next() {
1339                Some(c) => format!("{}", c),
1340                None => "empty string".to_string(),
1341            }
1342        )
1343    }
1344
1345    // Simplifies calls to generators by handling errors
1346    let gen_key = |key: Option<Cow<'a, str>>| -> Result<String, String> {
1347        match key {
1348            Some(Cow::Owned(s)) => Ok(s),
1349            Some(Cow::Borrowed(s)) => Ok(s.to_owned()),
1350            None => Err("expected key".to_owned()),
1351        }
1352    };
1353    let mut gen_value = |elem| gen_elem(elem).map_err(|e| e.to_string());
1354    let is_special_char = |c| matches!(c, '{' | '}' | ',' | '"' | '=' | '>' | '\\');
1355    let is_end_of_literal = |c| matches!(c, ',' | '}' | '=');
1356
1357    loop {
1358        // Check for terminals.
1359        buf.take_while(|ch| ch.is_ascii_whitespace());
1360        match buf.next() {
1361            Some('}') => break,
1362            _ if map.len() == 0 => {
1363                buf.prev();
1364            }
1365            Some(',') => {}
1366            Some(c) => bail!("expected ',' or end of input, got '{}'", c),
1367            None => bail!("unexpected end of input"),
1368        }
1369
1370        // Get key.
1371        buf.take_while(|ch| ch.is_ascii_whitespace());
1372        let key = match buf.peek() {
1373            Some('"') => Some(lex_quoted_element(buf)?),
1374            Some(_) => lex_unquoted_element(buf, is_special_char, is_end_of_literal)?,
1375            None => bail!("unexpected end of input"),
1376        };
1377        let key = gen_key(key)?;
1378
1379        // Assert mapping arrow (=>) is present.
1380        buf.take_while(|ch| ch.is_ascii_whitespace());
1381        if !buf.consume('=') || !buf.consume('>') {
1382            bail!("expected =>")
1383        }
1384
1385        // Get value.
1386        buf.take_while(|ch| ch.is_ascii_whitespace());
1387        let value = match buf.peek() {
1388            Some('"') => Some(lex_quoted_element(buf)?),
1389            Some('{') => {
1390                if !is_value_type_map {
1391                    bail!(
1392                        "unescaped '{{' at beginning of value; perhaps you \
1393                           want a nested map, e.g. '{{a=>{{a=>1}}}}'::map[text=>map[text=>int]]"
1394                    )
1395                }
1396                Some(lex_embedded_element(buf)?)
1397            }
1398            Some(_) => lex_unquoted_element(buf, is_special_char, is_end_of_literal)?,
1399            None => bail!("unexpected end of input"),
1400        };
1401        let value = gen_value(value)?;
1402
1403        // Insert elements.
1404        map.insert(key, value);
1405    }
1406    Ok(map)
1407}
1408
1409pub fn format_map<F, T, E>(
1410    buf: &mut F,
1411    elems: impl IntoIterator<Item = (impl AsRef<str>, T)>,
1412    mut format_elem: impl FnMut(MapValueWriter<F>, T) -> Result<Nestable, E>,
1413) -> Result<Nestable, E>
1414where
1415    F: FormatBuffer,
1416{
1417    buf.write_char('{');
1418    let mut elems = elems.into_iter().peekable();
1419    while let Some((key, value)) = elems.next() {
1420        // Map key values are always Strings, which always evaluate to
1421        // Nestable::MayNeedEscaping.
1422        let key_start = buf.len();
1423        buf.write_str(key.as_ref());
1424        escape_elem::<_, MapElementEscaper>(buf, key_start);
1425
1426        buf.write_str("=>");
1427
1428        let value_start = buf.len();
1429        if let Nestable::MayNeedEscaping = format_elem(MapValueWriter(buf), value)? {
1430            escape_elem::<_, MapElementEscaper>(buf, value_start);
1431        }
1432
1433        if elems.peek().is_some() {
1434            buf.write_char(',');
1435        }
1436    }
1437    buf.write_char('}');
1438    Ok(Nestable::Yes)
1439}
1440
1441pub fn parse_range<'a, V, E>(
1442    s: &'a str,
1443    gen_elem: impl FnMut(Cow<'a, str>) -> Result<V, E>,
1444) -> Result<Range<V>, ParseError>
1445where
1446    E: ToString,
1447{
1448    Ok(Range {
1449        inner: parse_range_inner(s, gen_elem).map_err(|details| {
1450            ParseError::invalid_input_syntax("range", s).with_details(details)
1451        })?,
1452    })
1453}
1454
1455fn parse_range_inner<'a, V, E>(
1456    s: &'a str,
1457    mut gen_elem: impl FnMut(Cow<'a, str>) -> Result<V, E>,
1458) -> Result<Option<RangeInner<V>>, String>
1459where
1460    E: ToString,
1461{
1462    let buf = &mut LexBuf::new(s);
1463
1464    buf.take_while(|ch| ch.is_ascii_whitespace());
1465
1466    if buf.consume_str("empty") {
1467        buf.take_while(|ch| ch.is_ascii_whitespace());
1468        if buf.next().is_none() {
1469            return Ok(None);
1470        } else {
1471            bail!("Junk after \"empty\" key word.")
1472        }
1473    }
1474
1475    let lower_inclusive = match buf.next() {
1476        Some('[') => true,
1477        Some('(') => false,
1478        _ => bail!("Missing left parenthesis or bracket."),
1479    };
1480
1481    let lower_bound = match buf.peek() {
1482        Some(',') => None,
1483        Some(_) => {
1484            let v = buf.take_while(|c| !matches!(c, ','));
1485            let v = gen_elem(Cow::from(v)).map_err(|e| e.to_string())?;
1486            Some(v)
1487        }
1488        None => bail!("Unexpected end of input."),
1489    };
1490
1491    buf.take_while(|ch| ch.is_ascii_whitespace());
1492
1493    if buf.next() != Some(',') {
1494        bail!("Missing comma after lower bound.")
1495    }
1496
1497    let upper_bound = match buf.peek() {
1498        Some(']' | ')') => None,
1499        Some(_) => {
1500            let v = buf.take_while(|c| !matches!(c, ')' | ']'));
1501            let v = gen_elem(Cow::from(v)).map_err(|e| e.to_string())?;
1502            Some(v)
1503        }
1504        None => bail!("Unexpected end of input."),
1505    };
1506
1507    let upper_inclusive = match buf.next() {
1508        Some(']') => true,
1509        Some(')') => false,
1510        _ => bail!("Missing left parenthesis or bracket."),
1511    };
1512
1513    buf.take_while(|ch| ch.is_ascii_whitespace());
1514
1515    if buf.next().is_some() {
1516        bail!("Junk after right parenthesis or bracket.")
1517    }
1518
1519    let range = Some(RangeInner {
1520        lower: RangeBound {
1521            inclusive: lower_inclusive,
1522            bound: lower_bound,
1523        },
1524        upper: RangeBound {
1525            inclusive: upper_inclusive,
1526            bound: upper_bound,
1527        },
1528    });
1529
1530    Ok(range)
1531}
1532
1533/// Writes a [`Range`] to `buf`.
1534pub fn format_range<F, V, E>(
1535    buf: &mut F,
1536    r: &Range<V>,
1537    mut format_elem: impl FnMut(RangeElementWriter<F>, Option<&V>) -> Result<Nestable, E>,
1538) -> Result<Nestable, E>
1539where
1540    F: FormatBuffer,
1541{
1542    let range = match &r.inner {
1543        None => {
1544            buf.write_str("empty");
1545            return Ok(Nestable::MayNeedEscaping);
1546        }
1547        Some(i) => i,
1548    };
1549
1550    if range.lower.inclusive {
1551        buf.write_char('[');
1552    } else {
1553        buf.write_char('(');
1554    }
1555
1556    let start = buf.len();
1557    if let Nestable::MayNeedEscaping =
1558        format_elem(RangeElementWriter(buf), range.lower.bound.as_ref())?
1559    {
1560        escape_elem::<_, ListElementEscaper>(buf, start);
1561    }
1562
1563    buf.write_char(',');
1564
1565    let start = buf.len();
1566    if let Nestable::MayNeedEscaping =
1567        format_elem(RangeElementWriter(buf), range.upper.bound.as_ref())?
1568    {
1569        escape_elem::<_, ListElementEscaper>(buf, start);
1570    }
1571
1572    if range.upper.inclusive {
1573        buf.write_char(']');
1574    } else {
1575        buf.write_char(')');
1576    }
1577
1578    Ok(Nestable::MayNeedEscaping)
1579}
1580
1581/// A helper for `format_range` that formats a single record element.
1582#[derive(Debug)]
1583pub struct RangeElementWriter<'a, F>(&'a mut F);
1584
1585impl<'a, F> RangeElementWriter<'a, F>
1586where
1587    F: FormatBuffer,
1588{
1589    /// Marks this record element as null.
1590    pub fn write_null(self) -> Nestable {
1591        // In ranges these "null" values represent infinite bounds, which are
1592        // not represented as values, but rather the absence of a value.
1593        Nestable::Yes
1594    }
1595
1596    /// Returns a [`FormatBuffer`] into which a non-null element can be
1597    /// written.
1598    pub fn nonnull_buffer(self) -> &'a mut F {
1599        self.0
1600    }
1601}
1602
1603pub fn format_array<F, T, E>(
1604    buf: &mut F,
1605    dims: &[ArrayDimension],
1606    elems: impl IntoIterator<Item = T>,
1607    mut format_elem: impl FnMut(ListElementWriter<F>, T) -> Result<Nestable, E>,
1608) -> Result<Nestable, E>
1609where
1610    F: FormatBuffer,
1611{
1612    if dims.iter().any(|dim| dim.lower_bound != 1) {
1613        for d in dims.iter() {
1614            let (lower, upper) = d.dimension_bounds();
1615            write!(buf, "[{}:{}]", lower, upper);
1616        }
1617        buf.write_char('=');
1618    }
1619
1620    format_array_inner(buf, dims, &mut elems.into_iter(), &mut format_elem)?;
1621    Ok(Nestable::Yes)
1622}
1623
1624pub fn format_array_inner<F, T, E>(
1625    buf: &mut F,
1626    dims: &[ArrayDimension],
1627    elems: &mut impl Iterator<Item = T>,
1628    format_elem: &mut impl FnMut(ListElementWriter<F>, T) -> Result<Nestable, E>,
1629) -> Result<(), E>
1630where
1631    F: FormatBuffer,
1632{
1633    if dims.is_empty() {
1634        buf.write_str("{}");
1635        return Ok(());
1636    }
1637
1638    buf.write_char('{');
1639    for j in 0..dims[0].length {
1640        if j > 0 {
1641            buf.write_char(',');
1642        }
1643        if dims.len() == 1 {
1644            let start = buf.len();
1645            let elem = elems.next().unwrap();
1646            if let Nestable::MayNeedEscaping = format_elem(ListElementWriter(buf), elem)? {
1647                escape_elem::<_, ListElementEscaper>(buf, start);
1648            }
1649        } else {
1650            format_array_inner(buf, &dims[1..], elems, format_elem)?;
1651        }
1652    }
1653    buf.write_char('}');
1654
1655    Ok(())
1656}
1657
1658pub fn format_legacy_vector<F, T, E>(
1659    buf: &mut F,
1660    elems: impl IntoIterator<Item = T>,
1661    format_elem: impl FnMut(ListElementWriter<F>, T) -> Result<Nestable, E>,
1662) -> Result<Nestable, E>
1663where
1664    F: FormatBuffer,
1665{
1666    format_elems(buf, elems, format_elem, ' ')?;
1667    Ok(Nestable::MayNeedEscaping)
1668}
1669
1670pub fn format_list<F, T, E>(
1671    buf: &mut F,
1672    elems: impl IntoIterator<Item = T>,
1673    format_elem: impl FnMut(ListElementWriter<F>, T) -> Result<Nestable, E>,
1674) -> Result<Nestable, E>
1675where
1676    F: FormatBuffer,
1677{
1678    buf.write_char('{');
1679    format_elems(buf, elems, format_elem, ',')?;
1680    buf.write_char('}');
1681    Ok(Nestable::Yes)
1682}
1683
1684/// Writes each `elem` into `buf`, separating the elems with `sep`.
1685pub fn format_elems<F, T, E>(
1686    buf: &mut F,
1687    elems: impl IntoIterator<Item = T>,
1688    mut format_elem: impl FnMut(ListElementWriter<F>, T) -> Result<Nestable, E>,
1689    sep: char,
1690) -> Result<(), E>
1691where
1692    F: FormatBuffer,
1693{
1694    let mut elems = elems.into_iter().peekable();
1695    while let Some(elem) = elems.next() {
1696        let start = buf.len();
1697        if let Nestable::MayNeedEscaping = format_elem(ListElementWriter(buf), elem)? {
1698            escape_elem::<_, ListElementEscaper>(buf, start);
1699        }
1700        if elems.peek().is_some() {
1701            buf.write_char(sep)
1702        }
1703    }
1704    Ok(())
1705}
1706
1707/// Writes an `mz_acl_item` to `buf`.
1708pub fn format_mz_acl_item<F>(buf: &mut F, mz_acl_item: MzAclItem) -> Nestable
1709where
1710    F: FormatBuffer,
1711{
1712    write!(buf, "{mz_acl_item}");
1713    Nestable::Yes
1714}
1715
1716/// Parses an MzAclItem from `s`.
1717pub fn parse_mz_acl_item(s: &str) -> Result<MzAclItem, ParseError> {
1718    s.trim()
1719        .parse()
1720        .map_err(|e| ParseError::invalid_input_syntax("mz_aclitem", s).with_details(e))
1721}
1722
1723/// Writes an `acl_item` to `buf`.
1724pub fn format_acl_item<F>(buf: &mut F, acl_item: AclItem) -> Nestable
1725where
1726    F: FormatBuffer,
1727{
1728    write!(buf, "{acl_item}");
1729    Nestable::Yes
1730}
1731
1732/// Parses an AclItem from `s`.
1733pub fn parse_acl_item(s: &str) -> Result<AclItem, ParseError> {
1734    s.trim()
1735        .parse()
1736        .map_err(|e| ParseError::invalid_input_syntax("aclitem", s).with_details(e))
1737}
1738
1739pub trait ElementEscaper {
1740    fn needs_escaping(elem: &[u8]) -> bool;
1741    fn escape_char(c: u8) -> u8;
1742}
1743
1744struct ListElementEscaper;
1745
1746impl ElementEscaper for ListElementEscaper {
1747    fn needs_escaping(elem: &[u8]) -> bool {
1748        elem.is_empty()
1749            || elem == b"NULL"
1750            || elem
1751                .iter()
1752                .any(|c| matches!(c, b'{' | b'}' | b',' | b'"' | b'\\') || c.is_ascii_whitespace())
1753    }
1754
1755    fn escape_char(_: u8) -> u8 {
1756        b'\\'
1757    }
1758}
1759
1760struct MapElementEscaper;
1761
1762impl ElementEscaper for MapElementEscaper {
1763    fn needs_escaping(elem: &[u8]) -> bool {
1764        elem.is_empty()
1765            || elem == b"NULL"
1766            || elem.iter().any(|c| {
1767                matches!(c, b'{' | b'}' | b',' | b'"' | b'=' | b'>' | b'\\')
1768                    || c.is_ascii_whitespace()
1769            })
1770    }
1771
1772    fn escape_char(_: u8) -> u8 {
1773        b'\\'
1774    }
1775}
1776
1777struct RecordElementEscaper;
1778
1779impl ElementEscaper for RecordElementEscaper {
1780    fn needs_escaping(elem: &[u8]) -> bool {
1781        elem.is_empty()
1782            || elem
1783                .iter()
1784                .any(|c| matches!(c, b'(' | b')' | b',' | b'"' | b'\\') || c.is_ascii_whitespace())
1785    }
1786
1787    fn escape_char(c: u8) -> u8 {
1788        if c == b'"' { b'"' } else { b'\\' }
1789    }
1790}
1791
1792/// Escapes a list, record, or map element in place.
1793///
1794/// The element must start at `start` and extend to the end of the buffer. The
1795/// buffer will be resized if escaping is necessary to account for the
1796/// additional escape characters.
1797///
1798/// The `needs_escaping` function is used to determine whether an element needs
1799/// to be escaped. It is provided with the bytes of each element and should
1800/// return whether the element needs to be escaped.
1801fn escape_elem<F, E>(buf: &mut F, start: usize)
1802where
1803    F: FormatBuffer,
1804    E: ElementEscaper,
1805{
1806    let elem = &buf.as_ref()[start..];
1807    if !E::needs_escaping(elem) {
1808        return;
1809    }
1810
1811    // We'll need two extra bytes for the quotes at the start and end of the
1812    // element, plus an extra byte for each quote and backslash.
1813    let extras = 2 + elem.iter().filter(|b| matches!(b, b'"' | b'\\')).count();
1814    let orig_end = buf.len();
1815    let new_end = buf.len() + extras;
1816
1817    // Pad the buffer to the new length. These characters will all be
1818    // overwritten.
1819    //
1820    // NOTE(benesch): we never read these characters, so we could instead use
1821    // uninitialized memory, but that's a level of unsafety I'm currently
1822    // uncomfortable with. The performance gain is negligible anyway.
1823    for _ in 0..extras {
1824        buf.write_char('\0');
1825    }
1826
1827    // SAFETY: inserting ASCII characters before other ASCII characters
1828    // preserves UTF-8 encoding.
1829    let elem = unsafe { buf.as_bytes_mut() };
1830
1831    // Walk the string backwards, writing characters at the new end index while
1832    // reading from the old end index, adding quotes at the beginning and end,
1833    // and adding a backslash before every backslash or quote.
1834    let mut wi = new_end - 1;
1835    elem[wi] = b'"';
1836    wi -= 1;
1837    for ri in (start..orig_end).rev() {
1838        elem[wi] = elem[ri];
1839        wi -= 1;
1840        if let b'\\' | b'"' = elem[ri] {
1841            elem[wi] = E::escape_char(elem[ri]);
1842            wi -= 1;
1843        }
1844    }
1845    elem[wi] = b'"';
1846
1847    assert!(wi == start);
1848}
1849
1850/// A helper for `format_list` that formats a single list element.
1851#[derive(Debug)]
1852pub struct ListElementWriter<'a, F>(&'a mut F);
1853
1854impl<'a, F> ListElementWriter<'a, F>
1855where
1856    F: FormatBuffer,
1857{
1858    /// Marks this list element as null.
1859    pub fn write_null(self) -> Nestable {
1860        self.0.write_str("NULL");
1861        Nestable::Yes
1862    }
1863
1864    /// Returns a [`FormatBuffer`] into which a non-null element can be
1865    /// written.
1866    pub fn nonnull_buffer(self) -> &'a mut F {
1867        self.0
1868    }
1869}
1870
1871/// A helper for `format_map` that formats a single map value.
1872#[derive(Debug)]
1873pub struct MapValueWriter<'a, F>(&'a mut F);
1874
1875impl<'a, F> MapValueWriter<'a, F>
1876where
1877    F: FormatBuffer,
1878{
1879    /// Marks this value element as null.
1880    pub fn write_null(self) -> Nestable {
1881        self.0.write_str("NULL");
1882        Nestable::Yes
1883    }
1884
1885    /// Returns a [`FormatBuffer`] into which a non-null element can be
1886    /// written.
1887    pub fn nonnull_buffer(self) -> &'a mut F {
1888        self.0
1889    }
1890}
1891
1892pub fn format_record<F, T, E>(
1893    buf: &mut F,
1894    elems: impl IntoIterator<Item = T>,
1895    mut format_elem: impl FnMut(RecordElementWriter<F>, T) -> Result<Nestable, E>,
1896) -> Result<Nestable, E>
1897where
1898    F: FormatBuffer,
1899{
1900    buf.write_char('(');
1901    let mut elems = elems.into_iter().peekable();
1902    while let Some(elem) = elems.next() {
1903        let start = buf.len();
1904        if let Nestable::MayNeedEscaping = format_elem(RecordElementWriter(buf), elem)? {
1905            escape_elem::<_, RecordElementEscaper>(buf, start);
1906        }
1907        if elems.peek().is_some() {
1908            buf.write_char(',')
1909        }
1910    }
1911    buf.write_char(')');
1912    Ok(Nestable::MayNeedEscaping)
1913}
1914
1915/// A helper for `format_record` that formats a single record element.
1916#[derive(Debug)]
1917pub struct RecordElementWriter<'a, F>(&'a mut F);
1918
1919impl<'a, F> RecordElementWriter<'a, F>
1920where
1921    F: FormatBuffer,
1922{
1923    /// Marks this record element as null.
1924    pub fn write_null(self) -> Nestable {
1925        Nestable::Yes
1926    }
1927
1928    /// Returns a [`FormatBuffer`] into which a non-null element can be
1929    /// written.
1930    pub fn nonnull_buffer(self) -> &'a mut F {
1931        self.0
1932    }
1933}
1934
1935/// An error while parsing an input as a type.
1936#[derive(
1937    Ord,
1938    PartialOrd,
1939    Clone,
1940    Debug,
1941    Eq,
1942    PartialEq,
1943    Serialize,
1944    Deserialize,
1945    Hash,
1946    MzReflect
1947)]
1948#[cfg_attr(any(test, feature = "proptest"), derive(Arbitrary))]
1949pub struct ParseError {
1950    pub kind: ParseErrorKind,
1951    pub type_name: Box<str>,
1952    pub input: Box<str>,
1953    pub details: Option<Box<str>>,
1954}
1955
1956#[derive(
1957    Ord,
1958    PartialOrd,
1959    Clone,
1960    Copy,
1961    Debug,
1962    Eq,
1963    PartialEq,
1964    Serialize,
1965    Deserialize,
1966    Hash,
1967    MzReflect
1968)]
1969#[cfg_attr(any(test, feature = "proptest"), derive(Arbitrary))]
1970pub enum ParseErrorKind {
1971    OutOfRange,
1972    InvalidInputSyntax,
1973}
1974
1975impl ParseError {
1976    // To ensure that reversing the parameters causes a compile-time error, we
1977    // require that `type_name` be a string literal, even though `ParseError`
1978    // itself stores the type name as a `String`.
1979    fn new<S>(kind: ParseErrorKind, type_name: &'static str, input: S) -> ParseError
1980    where
1981        S: Into<Box<str>>,
1982    {
1983        ParseError {
1984            kind,
1985            type_name: type_name.into(),
1986            input: input.into(),
1987            details: None,
1988        }
1989    }
1990
1991    fn out_of_range<S>(type_name: &'static str, input: S) -> ParseError
1992    where
1993        S: Into<Box<str>>,
1994    {
1995        ParseError::new(ParseErrorKind::OutOfRange, type_name, input)
1996    }
1997
1998    fn invalid_input_syntax<S>(type_name: &'static str, input: S) -> ParseError
1999    where
2000        S: Into<Box<str>>,
2001    {
2002        ParseError::new(ParseErrorKind::InvalidInputSyntax, type_name, input)
2003    }
2004
2005    fn with_details<D>(mut self, details: D) -> ParseError
2006    where
2007        D: fmt::Display,
2008    {
2009        self.details = Some(details.to_string().into());
2010        self
2011    }
2012}
2013
2014impl fmt::Display for ParseError {
2015    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2016        match self.kind {
2017            ParseErrorKind::OutOfRange => {
2018                write!(
2019                    f,
2020                    "{} is out of range for type {}",
2021                    self.input.quoted(),
2022                    self.type_name
2023                )?;
2024                if let Some(details) = &self.details {
2025                    write!(f, ": {}", details)?;
2026                }
2027                Ok(())
2028            }
2029            ParseErrorKind::InvalidInputSyntax => {
2030                write!(f, "invalid input syntax for type {}: ", self.type_name)?;
2031                if let Some(details) = &self.details {
2032                    write!(f, "{}: ", details)?;
2033                }
2034                write!(f, "{}", self.input.quoted())
2035            }
2036        }
2037    }
2038}
2039
2040impl Error for ParseError {}
2041
2042impl RustType<ProtoParseError> for ParseError {
2043    fn into_proto(&self) -> ProtoParseError {
2044        use Kind::*;
2045        use proto_parse_error::*;
2046        let kind = match self.kind {
2047            ParseErrorKind::OutOfRange => OutOfRange(()),
2048            ParseErrorKind::InvalidInputSyntax => InvalidInputSyntax(()),
2049        };
2050        ProtoParseError {
2051            kind: Some(kind),
2052            type_name: self.type_name.into_proto(),
2053            input: self.input.into_proto(),
2054            details: self.details.into_proto(),
2055        }
2056    }
2057
2058    fn from_proto(proto: ProtoParseError) -> Result<Self, TryFromProtoError> {
2059        use proto_parse_error::Kind::*;
2060
2061        if let Some(kind) = proto.kind {
2062            Ok(ParseError {
2063                kind: match kind {
2064                    OutOfRange(()) => ParseErrorKind::OutOfRange,
2065                    InvalidInputSyntax(()) => ParseErrorKind::InvalidInputSyntax,
2066                },
2067                type_name: proto.type_name.into(),
2068                input: proto.input.into(),
2069                details: proto.details.into_rust()?,
2070            })
2071        } else {
2072            Err(TryFromProtoError::missing_field("ProtoParseError::kind"))
2073        }
2074    }
2075}
2076
2077#[derive(
2078    Ord,
2079    PartialOrd,
2080    Copy,
2081    Clone,
2082    Debug,
2083    Eq,
2084    PartialEq,
2085    Serialize,
2086    Deserialize,
2087    Hash,
2088    MzReflect
2089)]
2090#[cfg_attr(any(test, feature = "proptest"), derive(Arbitrary))]
2091pub enum ParseHexError {
2092    InvalidHexDigit(char),
2093    OddLength,
2094}
2095impl Error for ParseHexError {}
2096
2097impl fmt::Display for ParseHexError {
2098    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2099        match self {
2100            ParseHexError::InvalidHexDigit(c) => {
2101                write!(f, "invalid hexadecimal digit: \"{}\"", c.escape_default())
2102            }
2103            ParseHexError::OddLength => {
2104                f.write_str("invalid hexadecimal data: odd number of digits")
2105            }
2106        }
2107    }
2108}
2109
2110impl RustType<ProtoParseHexError> for ParseHexError {
2111    fn into_proto(&self) -> ProtoParseHexError {
2112        use Kind::*;
2113        use proto_parse_hex_error::*;
2114        let kind = match self {
2115            ParseHexError::InvalidHexDigit(v) => InvalidHexDigit(v.into_proto()),
2116            ParseHexError::OddLength => OddLength(()),
2117        };
2118        ProtoParseHexError { kind: Some(kind) }
2119    }
2120
2121    fn from_proto(error: ProtoParseHexError) -> Result<Self, TryFromProtoError> {
2122        use proto_parse_hex_error::Kind::*;
2123        match error.kind {
2124            Some(kind) => match kind {
2125                InvalidHexDigit(v) => Ok(ParseHexError::InvalidHexDigit(char::from_proto(v)?)),
2126                OddLength(()) => Ok(ParseHexError::OddLength),
2127            },
2128            None => Err(TryFromProtoError::missing_field(
2129                "`ProtoParseHexError::kind`",
2130            )),
2131        }
2132    }
2133}
2134
2135#[cfg(test)]
2136mod tests {
2137    use mz_ore::assert_ok;
2138    use mz_proto::protobuf_roundtrip;
2139    use proptest::prelude::*;
2140
2141    use super::*;
2142
2143    proptest! {
2144        #[mz_ore::test]
2145        #[cfg_attr(miri, ignore)] // too slow
2146        fn parse_error_protobuf_roundtrip(expect in any::<ParseError>()) {
2147            let actual = protobuf_roundtrip::<_, ProtoParseError>(&expect);
2148            assert_ok!(actual);
2149            assert_eq!(actual.unwrap(), expect);
2150        }
2151    }
2152
2153    proptest! {
2154        #[mz_ore::test]
2155        #[cfg_attr(miri, ignore)] // too slow
2156        fn parse_hex_error_protobuf_roundtrip(expect in any::<ParseHexError>()) {
2157            let actual = protobuf_roundtrip::<_, ProtoParseHexError>(&expect);
2158            assert_ok!(actual);
2159            assert_eq!(actual.unwrap(), expect);
2160        }
2161    }
2162
2163    #[mz_ore::test]
2164    fn test_format_nanos_to_micros() {
2165        let cases: Vec<(u32, &str)> = vec![
2166            (0, ""),
2167            (1, ""),
2168            (499, ""),
2169            (500, ".000001"),
2170            (500_000, ".0005"),
2171            (5_000_000, ".005"),
2172            // Leap second. This is possibly wrong and should maybe be reduced (nanosecond
2173            // % 1_000_000_000), but we are at least now aware it does this.
2174            (1_999_999_999, ".2"),
2175        ];
2176        for (nanos, expect) in cases {
2177            let mut buf = String::new();
2178            format_nanos_to_micros(&mut buf, nanos);
2179            assert_eq!(&buf, expect);
2180        }
2181    }
2182
2183    #[mz_ore::test]
2184    fn test_parse_pg_legacy_name() {
2185        let s = "hello world";
2186        assert_eq!(s, parse_pg_legacy_name(s));
2187
2188        let s = "x".repeat(63);
2189        assert_eq!(s, parse_pg_legacy_name(&s));
2190
2191        let s = "x".repeat(64);
2192        assert_eq!("x".repeat(63), parse_pg_legacy_name(&s));
2193
2194        // The Hebrew character Aleph (א) has a length of 2 bytes.
2195        let s = format!("{}{}", "x".repeat(61), "א");
2196        assert_eq!(s, parse_pg_legacy_name(&s));
2197
2198        let s = format!("{}{}", "x".repeat(62), "א");
2199        assert_eq!("x".repeat(62), parse_pg_legacy_name(&s));
2200    }
2201}