Skip to main content

mz_repr/
strconv.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Routines for converting datum values to and from their string
11//! representation.
12//!
13//! The functions in this module are tightly related to the variants of
14//! [`SqlScalarType`](crate::SqlScalarType). Each variant has a pair of functions in
15//! this module named `parse_VARIANT` and `format_VARIANT`. The type returned
16//! by `parse` functions, and the type accepted by `format` functions, will
17//! be a type that is easily converted into the [`Datum`](crate::Datum) variant
18//! for that type. The functions do not directly convert from `Datum`s to
19//! `String`s so that the logic can be reused when `Datum`s are not available or
20//! desired, as in the pgrepr crate.
21//!
22//! The string representations used are exactly the same as the PostgreSQL
23//! string representations for the corresponding PostgreSQL type. Deviations
24//! should be considered a bug.
25
26use std::borrow::Cow;
27use std::collections::BTreeMap;
28use std::error::Error;
29use std::fmt;
30use std::num::FpCategory;
31use std::str::FromStr;
32use std::sync::LazyLock;
33
34use chrono::offset::{Offset, TimeZone};
35use chrono::{DateTime, Datelike, Duration, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc};
36use dec::OrderedDecimal;
37use mz_lowertest::MzReflect;
38use mz_ore::cast::ReinterpretCast;
39use mz_ore::error::ErrorExt;
40use mz_ore::fmt::FormatBuffer;
41use mz_ore::lex::LexBuf;
42use mz_ore::str::StrExt;
43use mz_pgtz::timezone::{Timezone, TimezoneSpec};
44use mz_proto::{ProtoType, RustType, TryFromProtoError};
45use num_traits::Float as NumFloat;
46use proptest_derive::Arbitrary;
47use regex::bytes::Regex;
48use ryu::Float as RyuFloat;
49use serde::{Deserialize, Serialize};
50use uuid::Uuid;
51
52use crate::adt::array::ArrayDimension;
53use crate::adt::date::Date;
54use crate::adt::datetime::{self, DateTimeField, ParsedDateTime};
55use crate::adt::interval::Interval;
56use crate::adt::jsonb::{Jsonb, JsonbRef};
57use crate::adt::mz_acl_item::{AclItem, MzAclItem};
58use crate::adt::numeric::{self, NUMERIC_DATUM_MAX_PRECISION, Numeric};
59use crate::adt::pg_legacy_name::NAME_MAX_BYTES;
60use crate::adt::range::{Range, RangeBound, RangeInner};
61use crate::adt::timestamp::CheckedTimestamp;
62
63include!(concat!(env!("OUT_DIR"), "/mz_repr.strconv.rs"));
64
65macro_rules! bail {
66    ($($arg:tt)*) => { return Err(format!($($arg)*)) };
67}
68
69/// Yes should be provided for types that will *never* return true for [`ElementEscaper::needs_escaping`]
70#[derive(Debug)]
71pub enum Nestable {
72    Yes,
73    MayNeedEscaping,
74}
75
76/// Parses a [`bool`] from `s`.
77///
78/// The accepted values are "true", "false", "yes", "no", "on", "off", "1", and
79/// "0", or any unambiguous prefix of one of those values. Leading or trailing
80/// whitespace is permissible.
81pub fn parse_bool(s: &str) -> Result<bool, ParseError> {
82    match s.trim().to_lowercase().as_str() {
83        "t" | "tr" | "tru" | "true" | "y" | "ye" | "yes" | "on" | "1" => Ok(true),
84        "f" | "fa" | "fal" | "fals" | "false" | "n" | "no" | "of" | "off" | "0" => Ok(false),
85        _ => Err(ParseError::invalid_input_syntax("boolean", s)),
86    }
87}
88
89/// Like `format_bool`, but returns a string with a static lifetime.
90///
91/// This function should be preferred to `format_bool` when applicable, as it
92/// avoids an allocation.
93pub fn format_bool_static(b: bool) -> &'static str {
94    match b {
95        true => "t",
96        false => "f",
97    }
98}
99
100/// Writes a boolean value into `buf`.
101///
102/// `true` is encoded as the char `'t'` and `false` is encoded as the char
103/// `'f'`.
104pub fn format_bool<F>(buf: &mut F, b: bool) -> Nestable
105where
106    F: FormatBuffer,
107{
108    buf.write_str(format_bool_static(b));
109    Nestable::Yes
110}
111
112/// Parses an [`i16`] from `s`.
113///
114/// Valid values are whatever the [`std::str::FromStr`] implementation on `i16` accepts,
115/// plus leading and trailing whitespace.
116pub fn parse_int16(s: &str) -> Result<i16, ParseError> {
117    s.trim()
118        .parse()
119        .map_err(|e| ParseError::invalid_input_syntax("smallint", s).with_details(e))
120}
121
122/// Writes an [`i16`] to `buf`.
123pub fn format_int16<F>(buf: &mut F, i: i16) -> Nestable
124where
125    F: FormatBuffer,
126{
127    write!(buf, "{}", i);
128    Nestable::Yes
129}
130
131/// Parses an [`i32`] from `s`.
132///
133/// Valid values are whatever the [`std::str::FromStr`] implementation on `i32` accepts,
134/// plus leading and trailing whitespace.
135pub fn parse_int32(s: &str) -> Result<i32, ParseError> {
136    s.trim()
137        .parse()
138        .map_err(|e| ParseError::invalid_input_syntax("integer", s).with_details(e))
139}
140
141/// Writes an [`i32`] to `buf`.
142pub fn format_int32<F>(buf: &mut F, i: i32) -> Nestable
143where
144    F: FormatBuffer,
145{
146    write!(buf, "{}", i);
147    Nestable::Yes
148}
149
150/// Parses an `i64` from `s`.
151pub fn parse_int64(s: &str) -> Result<i64, ParseError> {
152    s.trim()
153        .parse()
154        .map_err(|e| ParseError::invalid_input_syntax("bigint", s).with_details(e))
155}
156
157/// Writes an `i64` to `buf`.
158pub fn format_int64<F>(buf: &mut F, i: i64) -> Nestable
159where
160    F: FormatBuffer,
161{
162    write!(buf, "{}", i);
163    Nestable::Yes
164}
165
166/// Parses an [`u16`] from `s`.
167///
168/// Valid values are whatever the [`std::str::FromStr`] implementation on `u16` accepts,
169/// plus leading and trailing whitespace.
170pub fn parse_uint16(s: &str) -> Result<u16, ParseError> {
171    s.trim()
172        .parse()
173        .map_err(|e| ParseError::invalid_input_syntax("uint2", s).with_details(e))
174}
175
176/// Writes an `u16` to `buf`.
177pub fn format_uint16<F>(buf: &mut F, u: u16) -> Nestable
178where
179    F: FormatBuffer,
180{
181    write!(buf, "{}", u);
182    Nestable::Yes
183}
184
185/// Parses an [`u32`] from `s`.
186///
187/// Valid values are whatever the [`std::str::FromStr`] implementation on `u32` accepts,
188/// plus leading and trailing whitespace.
189pub fn parse_uint32(s: &str) -> Result<u32, ParseError> {
190    s.trim()
191        .parse()
192        .map_err(|e| ParseError::invalid_input_syntax("uint4", s).with_details(e))
193}
194
195/// Writes an `u32` to `buf`.
196pub fn format_uint32<F>(buf: &mut F, u: u32) -> Nestable
197where
198    F: FormatBuffer,
199{
200    write!(buf, "{}", u);
201    Nestable::Yes
202}
203
204/// Parses an `u64` from `s`.
205pub fn parse_uint64(s: &str) -> Result<u64, ParseError> {
206    s.trim()
207        .parse()
208        .map_err(|e| ParseError::invalid_input_syntax("uint8", s).with_details(e))
209}
210
211/// Writes an `u64` to `buf`.
212pub fn format_uint64<F>(buf: &mut F, u: u64) -> Nestable
213where
214    F: FormatBuffer,
215{
216    write!(buf, "{}", u);
217    Nestable::Yes
218}
219
220/// Parses an `mz_timestamp` from `s`.
221pub fn parse_mz_timestamp(s: &str) -> Result<crate::Timestamp, ParseError> {
222    s.trim()
223        .parse()
224        .map_err(|e| ParseError::invalid_input_syntax("mz_timestamp", s).with_details(e))
225}
226
227/// Writes an `mz_timestamp` to `buf`.
228pub fn format_mz_timestamp<F>(buf: &mut F, u: crate::Timestamp) -> Nestable
229where
230    F: FormatBuffer,
231{
232    write!(buf, "{}", u);
233    Nestable::Yes
234}
235
236/// Parses an OID from `s`.
237pub fn parse_oid(s: &str) -> Result<u32, ParseError> {
238    // For historical reasons in PostgreSQL, OIDs are parsed as `i32`s and then
239    // reinterpreted as `u32`s.
240    //
241    // Do not use this as a model for behavior in other contexts. OIDs should
242    // not in general be thought of as freely convertible from `i32`s.
243    let oid: i32 = s
244        .trim()
245        .parse()
246        .map_err(|e| ParseError::invalid_input_syntax("oid", s).with_details(e))?;
247    Ok(u32::reinterpret_cast(oid))
248}
249
250fn parse_float<Fl>(type_name: &'static str, s: &str) -> Result<Fl, ParseError>
251where
252    Fl: NumFloat + FromStr,
253{
254    // Matching PostgreSQL's float parsing behavior is tricky. PostgreSQL's
255    // implementation delegates almost entirely to strtof(3)/strtod(3), which
256    // will report an out-of-range error if a number was rounded to zero or
257    // infinity. For example, parsing "1e70" as a 32-bit float will yield an
258    // out-of-range error because it is rounded to infinity, but parsing an
259    // explicitly-specified "inf" will yield infinity without an error.
260    //
261    // To @benesch's knowledge, there is no Rust implementation of float parsing
262    // that reports whether underflow or overflow occurred. So we figure it out
263    // ourselves after the fact. If parsing the float returns infinity and the input
264    // was not an explicitly-specified infinity, then we know overflow occurred.
265    // If parsing the float returns zero and the input was not an explicitly-specified
266    // zero, then we know underflow occurred.
267
268    // Matches `0`, `-0`, `+0`, `000000.00000`, `0.0e10`, 0., .0, et al.
269    static ZERO_RE: LazyLock<Regex> =
270        LazyLock::new(|| Regex::new(r#"(?i-u)^[-+]?(0+(\.0*)?|\.0+)(e|$)"#).unwrap());
271    // Matches `inf`, `-inf`, `+inf`, `infinity`, et al.
272    static INF_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new("(?i-u)^[-+]?inf").unwrap());
273
274    let buf = s.trim();
275    let f: Fl = buf
276        .parse()
277        .map_err(|_| ParseError::invalid_input_syntax(type_name, s))?;
278    match f.classify() {
279        FpCategory::Infinite if !INF_RE.is_match(buf.as_bytes()) => {
280            Err(ParseError::out_of_range(type_name, s))
281        }
282        FpCategory::Zero if !ZERO_RE.is_match(buf.as_bytes()) => {
283            Err(ParseError::out_of_range(type_name, s))
284        }
285        _ => Ok(f),
286    }
287}
288
289fn format_float<F, Fl>(buf: &mut F, f: Fl) -> Nestable
290where
291    F: FormatBuffer,
292    Fl: NumFloat + RyuFloat,
293{
294    // Use ryu rather than the standard library. ryu uses scientific notation
295    // when possible, which better matches PostgreSQL. The standard library's
296    // `ToString` implementations print all available digits, which is rather
297    // verbose.
298    //
299    // Note that we have to fix up ryu's formatting in a few cases to match
300    // PostgreSQL. PostgreSQL spells out "Infinity" in full, never emits a
301    // trailing ".0", formats positive exponents as e.g. "1e+10" rather than
302    // "1e10", and emits a negative sign for negative zero. If we need to speed
303    // up float formatting, we can look into forking ryu and making these edits
304    // directly, but for now it doesn't seem worth it.
305
306    match f.classify() {
307        FpCategory::Infinite if f.is_sign_negative() => buf.write_str("-Infinity"),
308        FpCategory::Infinite => buf.write_str("Infinity"),
309        FpCategory::Nan => buf.write_str("NaN"),
310        FpCategory::Zero if f.is_sign_negative() => buf.write_str("-0"),
311        _ => {
312            debug_assert!(f.is_finite());
313            let mut ryu_buf = ryu::Buffer::new();
314            let mut s = ryu_buf.format_finite(f);
315            if let Some(trimmed) = s.strip_suffix(".0") {
316                s = trimmed;
317            }
318            let mut chars = s.chars().peekable();
319            while let Some(ch) = chars.next() {
320                buf.write_char(ch);
321                if ch == 'e' && chars.peek() != Some(&'-') {
322                    buf.write_char('+');
323                }
324            }
325        }
326    }
327
328    Nestable::Yes
329}
330
331/// Parses an `f32` from `s`.
332pub fn parse_float32(s: &str) -> Result<f32, ParseError> {
333    parse_float("real", s)
334}
335
336/// Writes an `f32` to `buf`.
337pub fn format_float32<F>(buf: &mut F, f: f32) -> Nestable
338where
339    F: FormatBuffer,
340{
341    format_float(buf, f)
342}
343
344/// Parses an `f64` from `s`.
345pub fn parse_float64(s: &str) -> Result<f64, ParseError> {
346    parse_float("double precision", s)
347}
348
349/// Writes an `f64` to `buf`.
350pub fn format_float64<F>(buf: &mut F, f: f64) -> Nestable
351where
352    F: FormatBuffer,
353{
354    format_float(buf, f)
355}
356
357/// Use the following grammar to parse `s` into:
358///
359/// - `NaiveDate`
360/// - `NaiveTime`
361/// - Timezone string
362///
363/// `NaiveDate` and `NaiveTime` are appropriate to compute a `NaiveDateTime`,
364/// which can be used in conjunction with a timezone string to generate a
365/// `DateTime<Utc>`.
366///
367/// ```text
368/// <unquoted timestamp string> ::=
369///     <date value> <space> <time value> [ <time zone interval> ]
370/// <date value> ::=
371///     <years value> <minus sign> <months value> <minus sign> <days value>
372/// <time zone interval> ::=
373///     <sign> <hours value> <colon> <minutes value>
374/// ```
375fn parse_timestamp_string(s: &str) -> Result<(NaiveDate, NaiveTime, Timezone), String> {
376    if s.is_empty() {
377        return Err("timestamp string is empty".into());
378    }
379
380    // PostgreSQL special date-time inputs
381    // https://www.postgresql.org/docs/12/datatype-datetime.html#id-1.5.7.13.18.8
382    // We should add support for other values here, e.g. infinity
383    // which @quodlibetor is willing to add to the chrono package.
384    if s == "epoch" {
385        return Ok((
386            NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(),
387            NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
388            Default::default(),
389        ));
390    }
391
392    let (ts_string, tz_string, era) = datetime::split_timestamp_string(s);
393
394    let pdt = ParsedDateTime::build_parsed_datetime_timestamp(ts_string, era)?;
395    let d: NaiveDate = pdt.compute_date()?;
396    let t: NaiveTime = pdt.compute_time()?;
397
398    let offset = if tz_string.is_empty() {
399        Default::default()
400    } else {
401        Timezone::parse(tz_string, TimezoneSpec::Iso)?
402    };
403
404    Ok((d, t, offset))
405}
406
407/// Parses a [`Date`] from `s`.
408pub fn parse_date(s: &str) -> Result<Date, ParseError> {
409    match parse_timestamp_string(s) {
410        Ok((date, _, _)) => Date::try_from(date).map_err(|_| ParseError::out_of_range("date", s)),
411        Err(e) => Err(ParseError::invalid_input_syntax("date", s).with_details(e)),
412    }
413}
414
415/// Writes a [`Date`] to `buf`.
416pub fn format_date<F>(buf: &mut F, d: Date) -> Nestable
417where
418    F: FormatBuffer,
419{
420    let d: NaiveDate = d.into();
421    let (year_ad, year) = d.year_ce();
422    write!(buf, "{:04}-{}", year, d.format("%m-%d"));
423    if !year_ad {
424        write!(buf, " BC");
425    }
426    Nestable::Yes
427}
428
429/// Parses a `NaiveTime` from `s`, using the following grammar.
430///
431/// ```text
432/// <time value> ::=
433///     <hours value> <colon> <minutes value> <colon> <seconds integer value>
434///     [ <period> [ <seconds fraction> ] ]
435/// ```
436pub fn parse_time(s: &str) -> Result<NaiveTime, ParseError> {
437    ParsedDateTime::build_parsed_datetime_time(s)
438        .and_then(|pdt| pdt.compute_time())
439        .map_err(|e| ParseError::invalid_input_syntax("time", s).with_details(e))
440}
441
442/// Writes a [`NaiveDateTime`] timestamp to `buf`.
443pub fn format_time<F>(buf: &mut F, t: NaiveTime) -> Nestable
444where
445    F: FormatBuffer,
446{
447    write!(buf, "{}", t.format("%H:%M:%S"));
448    format_nanos_to_micros(buf, t.nanosecond());
449    Nestable::Yes
450}
451
452/// Parses a `NaiveDateTime` from `s`.
453pub fn parse_timestamp(s: &str) -> Result<CheckedTimestamp<NaiveDateTime>, ParseError> {
454    match parse_timestamp_string(s) {
455        Ok((date, time, _)) => CheckedTimestamp::from_timestamplike(date.and_time(time))
456            .map_err(|_| ParseError::out_of_range("timestamp", s)),
457        Err(e) => Err(ParseError::invalid_input_syntax("timestamp", s).with_details(e)),
458    }
459}
460
461/// Writes a [`NaiveDateTime`] timestamp to `buf`.
462pub fn format_timestamp<F>(buf: &mut F, ts: &NaiveDateTime) -> Nestable
463where
464    F: FormatBuffer,
465{
466    let (year_ad, year) = ts.year_ce();
467    write!(buf, "{:04}-{}", year, ts.format("%m-%d %H:%M:%S"));
468    format_nanos_to_micros(buf, ts.and_utc().timestamp_subsec_nanos());
469    if !year_ad {
470        write!(buf, " BC");
471    }
472    // This always needs escaping because of the whitespace
473    Nestable::MayNeedEscaping
474}
475
476/// Parses a `DateTime<Utc>` from `s`. See `mz_expr::scalar::func::timezone_timestamp` for timezone anomaly considerations.
477pub fn parse_timestamptz(s: &str) -> Result<CheckedTimestamp<DateTime<Utc>>, ParseError> {
478    parse_timestamp_string(s)
479        .and_then(|(date, time, timezone)| {
480            use Timezone::*;
481            let mut dt = date.and_time(time);
482            let offset = match timezone {
483                FixedOffset(offset) => offset,
484                Tz(tz) => match tz.offset_from_local_datetime(&dt).latest() {
485                    Some(offset) => offset.fix(),
486                    None => {
487                        dt += Duration::try_hours(1).unwrap();
488                        tz.offset_from_local_datetime(&dt)
489                            .latest()
490                            .ok_or_else(|| "invalid timezone conversion".to_owned())?
491                            .fix()
492                    }
493                },
494            };
495            Ok(DateTime::from_naive_utc_and_offset(dt - offset, Utc))
496        })
497        .map_err(|e| {
498            ParseError::invalid_input_syntax("timestamp with time zone", s).with_details(e)
499        })
500        .and_then(|ts| {
501            CheckedTimestamp::from_timestamplike(ts)
502                .map_err(|_| ParseError::out_of_range("timestamp with time zone", s))
503        })
504}
505
506/// Writes a [`DateTime<Utc>`] timestamp to `buf`.
507pub fn format_timestamptz<F>(buf: &mut F, ts: &DateTime<Utc>) -> Nestable
508where
509    F: FormatBuffer,
510{
511    let (year_ad, year) = ts.year_ce();
512    write!(buf, "{:04}-{}", year, ts.format("%m-%d %H:%M:%S"));
513    format_nanos_to_micros(buf, ts.timestamp_subsec_nanos());
514    write!(buf, "+00");
515    if !year_ad {
516        write!(buf, " BC");
517    }
518    // This always needs escaping because of the whitespace
519    Nestable::MayNeedEscaping
520}
521
522/// parse
523///
524/// ```text
525/// <unquoted interval string> ::=
526///   [ <sign> ] { <year-month literal> | <day-time literal> }
527/// <year-month literal> ::=
528///     <years value> [ <minus sign> <months value> ]
529///   | <months value>
530/// <day-time literal> ::=
531///     <day-time interval>
532///   | <time interval>
533/// <day-time interval> ::=
534///   <days value> [ <space> <hours value> [ <colon> <minutes value>
535///       [ <colon> <seconds value> ] ] ]
536/// <time interval> ::=
537///     <hours value> [ <colon> <minutes value> [ <colon> <seconds value> ] ]
538///   | <minutes value> [ <colon> <seconds value> ]
539///   | <seconds value>
540/// ```
541pub fn parse_interval(s: &str) -> Result<Interval, ParseError> {
542    parse_interval_w_disambiguator(s, None, DateTimeField::Second)
543}
544
545/// Parse an interval string, using an optional leading precision for time (H:M:S)
546/// and a specific mz_sql_parser::ast::DateTimeField to identify ambiguous elements.
547/// For more information about this operation, see the documentation on
548/// ParsedDateTime::build_parsed_datetime_interval.
549pub fn parse_interval_w_disambiguator(
550    s: &str,
551    leading_time_precision: Option<DateTimeField>,
552    d: DateTimeField,
553) -> Result<Interval, ParseError> {
554    ParsedDateTime::build_parsed_datetime_interval(s, leading_time_precision, d)
555        .and_then(|pdt| pdt.compute_interval())
556        .map_err(|e| ParseError::invalid_input_syntax("interval", s).with_details(e))
557}
558
559pub fn format_interval<F>(buf: &mut F, iv: Interval) -> Nestable
560where
561    F: FormatBuffer,
562{
563    write!(buf, "{}", iv);
564    Nestable::MayNeedEscaping
565}
566
567pub fn parse_numeric(s: &str) -> Result<OrderedDecimal<Numeric>, ParseError> {
568    let mut cx = numeric::cx_datum();
569    let mut n = match cx.parse(s.trim()) {
570        Ok(n) => n,
571        Err(..) => {
572            return Err(ParseError::invalid_input_syntax("numeric", s));
573        }
574    };
575
576    let cx_status = cx.status();
577
578    // Check for values that can only be generated by invalid syntax.
579    if (n.is_infinite() && !cx_status.overflow())
580        || (n.is_nan() && n.is_negative())
581        || n.is_signaling_nan()
582    {
583        return Err(ParseError::invalid_input_syntax("numeric", s));
584    }
585
586    // Process value; only errors if value is out of range of numeric's max precision.
587    let out_of_range = numeric::munge_numeric(&mut n).is_err();
588
589    if cx_status.overflow() || cx_status.subnormal() || out_of_range {
590        Err(ParseError::out_of_range("numeric", s).with_details(format!(
591            "exceeds maximum precision {}",
592            NUMERIC_DATUM_MAX_PRECISION
593        )))
594    } else {
595        Ok(OrderedDecimal(n))
596    }
597}
598
599pub fn format_numeric<F>(buf: &mut F, n: &OrderedDecimal<Numeric>) -> Nestable
600where
601    F: FormatBuffer,
602{
603    write!(buf, "{}", n.0.to_standard_notation_string());
604    Nestable::Yes
605}
606
607pub fn format_string<F>(buf: &mut F, s: &str) -> Nestable
608where
609    F: FormatBuffer,
610{
611    buf.write_str(s);
612    Nestable::MayNeedEscaping
613}
614
615pub fn parse_pg_legacy_name(s: &str) -> String {
616    // To match PostgreSQL, we truncate the string to 64 bytes, while being
617    // careful not to truncate in the middle of any multibyte characters.
618    let mut out = String::new();
619    let mut len = 0;
620    for c in s.chars() {
621        len += c.len_utf8();
622        if len > NAME_MAX_BYTES {
623            break;
624        }
625        out.push(c);
626    }
627    out
628}
629
630pub fn parse_bytes(s: &str) -> Result<Vec<u8>, ParseError> {
631    // If the input starts with "\x", then the remaining bytes are hex encoded
632    // [0]. Otherwise the bytes use the traditional "escape" format. [1]
633    //
634    // [0]: https://www.postgresql.org/docs/current/datatype-binary.html#id-1.5.7.12.9
635    // [1]: https://www.postgresql.org/docs/current/datatype-binary.html#id-1.5.7.12.10
636    if let Some(remainder) = s.strip_prefix(r"\x") {
637        parse_bytes_hex(remainder).map_err(|e| {
638            ParseError::invalid_input_syntax("bytea", s).with_details(e.to_string_with_causes())
639        })
640    } else {
641        parse_bytes_traditional(s)
642    }
643}
644
645pub fn parse_bytes_hex(s: &str) -> Result<Vec<u8>, ParseHexError> {
646    // Can't use `hex::decode` here, as it doesn't tolerate whitespace
647    // between encoded bytes.
648
649    let decode_nibble = |b| match b {
650        b'a'..=b'f' => Ok(b - b'a' + 10),
651        b'A'..=b'F' => Ok(b - b'A' + 10),
652        b'0'..=b'9' => Ok(b - b'0'),
653        _ => Err(ParseHexError::InvalidHexDigit(char::from(b))),
654    };
655
656    let mut buf = vec![];
657    let mut nibbles = s.as_bytes().iter().copied();
658    while let Some(n) = nibbles.next() {
659        if let b' ' | b'\n' | b'\t' | b'\r' = n {
660            continue;
661        }
662        let n = decode_nibble(n)?;
663        let n2 = match nibbles.next() {
664            None => return Err(ParseHexError::OddLength),
665            Some(n2) => decode_nibble(n2)?,
666        };
667        buf.push((n << 4) | n2);
668    }
669    Ok(buf)
670}
671
672pub fn parse_bytes_traditional(s: &str) -> Result<Vec<u8>, ParseError> {
673    // Bytes are interpreted literally, save for the special escape sequences
674    // "\\", which represents a single backslash, and "\NNN", where each N
675    // is an octal digit, which represents the byte whose octal value is NNN.
676    let mut out = Vec::with_capacity(s.len());
677    let mut bytes = s.as_bytes().iter().fuse();
678    while let Some(&b) = bytes.next() {
679        if b != b'\\' {
680            out.push(b);
681            continue;
682        }
683        match bytes.next() {
684            None => {
685                return Err(ParseError::invalid_input_syntax("bytea", s)
686                    .with_details("ends with escape character"));
687            }
688            Some(b'\\') => out.push(b'\\'),
689            b => match (b, bytes.next(), bytes.next()) {
690                (Some(d2 @ b'0'..=b'3'), Some(d1 @ b'0'..=b'7'), Some(d0 @ b'0'..=b'7')) => {
691                    out.push(((d2 - b'0') << 6) + ((d1 - b'0') << 3) + (d0 - b'0'));
692                }
693                _ => {
694                    return Err(ParseError::invalid_input_syntax("bytea", s)
695                        .with_details("invalid escape sequence"));
696                }
697            },
698        }
699    }
700    Ok(out)
701}
702
703pub fn format_bytes<F>(buf: &mut F, bytes: &[u8]) -> Nestable
704where
705    F: FormatBuffer,
706{
707    write!(buf, "\\x{}", hex::encode(bytes));
708    Nestable::MayNeedEscaping
709}
710
711pub fn parse_jsonb(s: &str) -> Result<Jsonb, ParseError> {
712    s.trim()
713        .parse()
714        .map_err(|e| ParseError::invalid_input_syntax("jsonb", s).with_details(e))
715}
716
717pub fn format_jsonb<F>(buf: &mut F, jsonb: JsonbRef) -> Nestable
718where
719    F: FormatBuffer,
720{
721    write!(buf, "{}", jsonb);
722    Nestable::MayNeedEscaping
723}
724
725pub fn format_jsonb_pretty<F>(buf: &mut F, jsonb: JsonbRef)
726where
727    F: FormatBuffer,
728{
729    write!(buf, "{:#}", jsonb)
730}
731
732pub fn parse_uuid(s: &str) -> Result<Uuid, ParseError> {
733    s.trim()
734        .parse()
735        .map_err(|e| ParseError::invalid_input_syntax("uuid", s).with_details(e))
736}
737
738pub fn format_uuid<F>(buf: &mut F, uuid: Uuid) -> Nestable
739where
740    F: FormatBuffer,
741{
742    write!(buf, "{}", uuid);
743    Nestable::Yes
744}
745
746fn format_nanos_to_micros<F>(buf: &mut F, nanos: u32)
747where
748    F: FormatBuffer,
749{
750    if nanos >= 500 {
751        let mut micros = nanos / 1000;
752        let rem = nanos % 1000;
753        if rem >= 500 {
754            micros += 1;
755        }
756        // strip trailing zeros
757        let mut width = 6;
758        while micros % 10 == 0 {
759            width -= 1;
760            micros /= 10;
761        }
762        write!(buf, ".{:0width$}", micros, width = width);
763    }
764}
765
766#[derive(Debug, thiserror::Error)]
767enum ArrayParsingError {
768    #[error("Array value must start with \"{{\"")]
769    OpeningBraceMissing,
770    #[error("Specifying array lower bounds is not supported")]
771    DimsUnsupported,
772    #[error("{0}")]
773    Generic(String),
774    #[error("Unexpected \"{0}\" character.")]
775    UnexpectedChar(char),
776    #[error("Multidimensional arrays must have sub-arrays with matching dimensions.")]
777    NonRectilinearDims,
778    #[error("Unexpected array element.")]
779    UnexpectedElement,
780    #[error("Junk after closing right brace.")]
781    Junk,
782    #[error("Unexpected end of input.")]
783    EarlyTerm,
784}
785
786impl From<String> for ArrayParsingError {
787    fn from(value: String) -> Self {
788        ArrayParsingError::Generic(value)
789    }
790}
791
792pub fn parse_array<'a, T, E>(
793    s: &'a str,
794    make_null: impl FnMut() -> T,
795    gen_elem: impl FnMut(Cow<'a, str>) -> Result<T, E>,
796) -> Result<(Vec<T>, Vec<ArrayDimension>), ParseError>
797where
798    E: ToString,
799{
800    parse_array_inner(s, make_null, gen_elem)
801        .map_err(|details| ParseError::invalid_input_syntax("array", s).with_details(details))
802}
803
804fn parse_array_inner<'a, T, E>(
805    s: &'a str,
806    mut make_null: impl FnMut() -> T,
807    mut gen_elem: impl FnMut(Cow<'a, str>) -> Result<T, E>,
808) -> Result<(Vec<T>, Vec<ArrayDimension>), ArrayParsingError>
809where
810    E: ToString,
811{
812    use ArrayParsingError::*;
813
814    #[derive(Clone, Debug, Default)]
815    struct Dimension {
816        // If None, still discovering this dimension's permitted width;
817        // otherwise only permits `length` elements per dimension.
818        length: Option<usize>,
819        // Whether this dimension has a staged element that can be committed.
820        // This prevents us from accepting "empty" elements, e.g. `{1,}` or
821        // `{1,,2}`.
822        staged_element: bool,
823        // The total number of elements committed in this dimension since it was
824        // last entered. Zeroed out when exited.
825        committed_element_count: usize,
826    }
827
828    #[derive(Clone, Debug, Default)]
829    struct ArrayBuilder<'a> {
830        // The current character we're operating from.
831        current_command_char: char,
832        // The dimension information, which will get turned into
833        // `ArrayDimensions`.
834        dimensions: Vec<Dimension>,
835        // THe current dimension we're operating on.
836        current_dim: usize,
837        // Whether or not this array may be modified any further.
838        sealed: bool,
839        // The elements extracted from the input str. This is on the array
840        // builder to necessitate using `insert_element` so we understand when
841        // elements are staged.
842        elements: Vec<Option<Cow<'a, str>>>,
843    }
844
845    impl<'a> ArrayBuilder<'a> {
846        fn build(
847            s: &'a str,
848        ) -> Result<(Vec<Option<Cow<'a, str>>>, Vec<ArrayDimension>), ArrayParsingError> {
849            let buf = &mut LexBuf::new(s);
850
851            // TODO: support parsing array dimensions
852            if buf.consume('[') {
853                Err(DimsUnsupported)?;
854            }
855
856            buf.take_while(|ch| ch.is_ascii_whitespace());
857
858            if !buf.consume('{') {
859                Err(OpeningBraceMissing)?;
860            }
861
862            let mut dimensions = 1;
863
864            loop {
865                buf.take_while(|ch| ch.is_ascii_whitespace());
866                if buf.consume('{') {
867                    dimensions += 1;
868                } else {
869                    break;
870                }
871            }
872
873            let mut builder = ArrayBuilder {
874                current_command_char: '{',
875                dimensions: vec![Dimension::default(); dimensions],
876                // We enter the builder at the element-bearing dimension, which is the last
877                // dimension.
878                current_dim: dimensions - 1,
879                sealed: false,
880                elements: vec![],
881            };
882
883            let is_special_char = |c| matches!(c, '{' | '}' | ',' | '\\' | '"');
884            let is_end_of_literal = |c| matches!(c, ',' | '}');
885
886            loop {
887                buf.take_while(|ch| ch.is_ascii_whitespace());
888
889                // Filter command state from terminal states.
890                match buf.next() {
891                    None if builder.sealed => {
892                        break;
893                    }
894                    None => Err(EarlyTerm)?,
895                    Some(_) if builder.sealed => Err(Junk)?,
896                    Some(c) => builder.current_command_char = c,
897                }
898
899                // Run command char
900                match builder.current_command_char {
901                    '{' => builder.enter_dim()?,
902                    '}' => builder.exit_dim()?,
903                    ',' => builder.commit_element(true)?,
904                    c => {
905                        buf.prev();
906                        let s = match c {
907                            '"' => Some(lex_quoted_element(buf)?),
908                            _ => lex_unquoted_element(buf, is_special_char, is_end_of_literal)?,
909                        };
910                        builder.insert_element(s)?;
911                    }
912                }
913            }
914
915            if builder.elements.is_empty() {
916                // Per PG, empty arrays are represented by empty dimensions
917                // rather than one dimension with 0 length.
918                return Ok((vec![], vec![]));
919            }
920
921            let dims = builder
922                .dimensions
923                .into_iter()
924                .map(|dim| ArrayDimension {
925                    length: dim
926                        .length
927                        .expect("every dimension must have its length discovered"),
928                    lower_bound: 1,
929                })
930                .collect();
931
932            Ok((builder.elements, dims))
933        }
934
935        /// Descend into another dimension of the array.
936        fn enter_dim(&mut self) -> Result<(), ArrayParsingError> {
937            let d = &mut self.dimensions[self.current_dim];
938            // Cannot enter a new dimension with an uncommitted element.
939            if d.staged_element {
940                return Err(UnexpectedChar(self.current_command_char));
941            }
942
943            self.current_dim += 1;
944
945            // You have exceeded the maximum dimensions.
946            if self.current_dim >= self.dimensions.len() {
947                return Err(NonRectilinearDims);
948            }
949
950            Ok(())
951        }
952
953        /// Insert a new element into the array, ensuring it is in the proper dimension.
954        fn insert_element(&mut self, s: Option<Cow<'a, str>>) -> Result<(), ArrayParsingError> {
955            // Can only insert elements into data-bearing dimension, which is
956            // the last one.
957            if self.current_dim != self.dimensions.len() - 1 {
958                return Err(UnexpectedElement);
959            }
960
961            self.stage_element()?;
962
963            self.elements.push(s);
964
965            Ok(())
966        }
967
968        /// Stage an element to be committed. Only one element can be staged at
969        /// a time and staged elements must be committed before moving onto the
970        /// next element or leaving the dimension.
971        fn stage_element(&mut self) -> Result<(), ArrayParsingError> {
972            let d = &mut self.dimensions[self.current_dim];
973            // Cannot stage two elements at once, i.e. previous element wasn't
974            // followed by committing token (`,` or `}`).
975            if d.staged_element {
976                return Err(UnexpectedElement);
977            }
978            d.staged_element = true;
979            Ok(())
980        }
981
982        /// Commit the currently staged element, which can be made optional.
983        /// This ensures that each element has an appropriate terminal character
984        /// after it.
985        fn commit_element(&mut self, require_staged: bool) -> Result<(), ArrayParsingError> {
986            let d = &mut self.dimensions[self.current_dim];
987            if !d.staged_element {
988                // - , requires a preceding staged element
989                // - } does not require a preceding staged element only when
990                //   it's the close of an empty dimension.
991                return if require_staged || d.committed_element_count > 0 {
992                    Err(UnexpectedChar(self.current_command_char))
993                } else {
994                    // This indicates that we have an empty value in this
995                    // dimension and want to exit before incrementing the
996                    // committed element count.
997                    Ok(())
998                };
999            }
1000            d.staged_element = false;
1001            d.committed_element_count += 1;
1002
1003            Ok(())
1004        }
1005
1006        /// Exit the current dimension, committing any currently staged element
1007        /// in this dimension, and marking the interior array that this is part
1008        /// of as staged itself. If this is the 0th dimension, i.e. the closed
1009        /// brace matching the first open brace, seal the builder from further
1010        /// modification.
1011        fn exit_dim(&mut self) -> Result<(), ArrayParsingError> {
1012            // Commit an element of this dimension
1013            self.commit_element(false)?;
1014
1015            let d = &mut self.dimensions[self.current_dim];
1016
1017            // Ensure that the elements in this dimension conform to the expected shape.
1018            match d.length {
1019                None => d.length = Some(d.committed_element_count),
1020                Some(l) => {
1021                    if l != d.committed_element_count {
1022                        return Err(NonRectilinearDims);
1023                    }
1024                }
1025            }
1026
1027            // Reset this dimension's counter in case it's re-entered.
1028            d.committed_element_count = 0;
1029
1030            // If we closed the last dimension, this array may not be modified
1031            // any longer.
1032            if self.current_dim == 0 {
1033                self.sealed = true;
1034            } else {
1035                self.current_dim -= 1;
1036                // This object is an element of a higher dimension.
1037                self.stage_element()?;
1038            }
1039
1040            Ok(())
1041        }
1042    }
1043
1044    let (raw_elems, dims) = ArrayBuilder::build(s)?;
1045
1046    let mut elems = Vec::with_capacity(raw_elems.len());
1047
1048    let mut generated = |elem| gen_elem(elem).map_err(|e| e.to_string());
1049
1050    for elem in raw_elems.into_iter() {
1051        elems.push(match elem {
1052            Some(elem) => generated(elem)?,
1053            None => make_null(),
1054        });
1055    }
1056
1057    Ok((elems, dims))
1058}
1059
1060pub fn parse_list<'a, T, E>(
1061    s: &'a str,
1062    is_element_type_list: bool,
1063    make_null: impl FnMut() -> T,
1064    gen_elem: impl FnMut(Cow<'a, str>) -> Result<T, E>,
1065) -> Result<Vec<T>, ParseError>
1066where
1067    E: ToString,
1068{
1069    parse_list_inner(s, is_element_type_list, make_null, gen_elem)
1070        .map_err(|details| ParseError::invalid_input_syntax("list", s).with_details(details))
1071}
1072
1073// `parse_list_inner`'s separation from `parse_list` simplifies error handling
1074// by allowing subprocedures to return `String` errors.
1075fn parse_list_inner<'a, T, E>(
1076    s: &'a str,
1077    is_element_type_list: bool,
1078    mut make_null: impl FnMut() -> T,
1079    mut gen_elem: impl FnMut(Cow<'a, str>) -> Result<T, E>,
1080) -> Result<Vec<T>, String>
1081where
1082    E: ToString,
1083{
1084    let mut elems = vec![];
1085    let buf = &mut LexBuf::new(s);
1086
1087    // Consume opening paren.
1088    if !buf.consume('{') {
1089        bail!(
1090            "expected '{{', found {}",
1091            match buf.next() {
1092                Some(c) => format!("{}", c),
1093                None => "empty string".to_string(),
1094            }
1095        )
1096    }
1097
1098    // Simplifies calls to `gen_elem` by handling errors
1099    let mut generated = |elem| gen_elem(elem).map_err(|e| e.to_string());
1100    let is_special_char = |c| matches!(c, '{' | '}' | ',' | '\\' | '"');
1101    let is_end_of_literal = |c| matches!(c, ',' | '}');
1102
1103    // Consume elements.
1104    loop {
1105        buf.take_while(|ch| ch.is_ascii_whitespace());
1106        // Check for terminals.
1107        match buf.next() {
1108            Some('}') => {
1109                break;
1110            }
1111            _ if elems.len() == 0 => {
1112                buf.prev();
1113            }
1114            Some(',') => {}
1115            Some(c) => bail!("expected ',' or '}}', got '{}'", c),
1116            None => bail!("unexpected end of input"),
1117        }
1118
1119        buf.take_while(|ch| ch.is_ascii_whitespace());
1120        // Get elements.
1121        let elem = match buf.peek() {
1122            Some('"') => generated(lex_quoted_element(buf)?)?,
1123            Some('{') => {
1124                if !is_element_type_list {
1125                    bail!(
1126                        "unescaped '{{' at beginning of element; perhaps you \
1127                        want a nested list, e.g. '{{a}}'::text list list"
1128                    )
1129                }
1130                generated(lex_embedded_element(buf)?)?
1131            }
1132            Some(_) => match lex_unquoted_element(buf, is_special_char, is_end_of_literal)? {
1133                Some(elem) => generated(elem)?,
1134                None => make_null(),
1135            },
1136            None => bail!("unexpected end of input"),
1137        };
1138        elems.push(elem);
1139    }
1140
1141    buf.take_while(|ch| ch.is_ascii_whitespace());
1142    if let Some(c) = buf.next() {
1143        bail!(
1144            "malformed array literal; contains '{}' after terminal '}}'",
1145            c
1146        )
1147    }
1148
1149    Ok(elems)
1150}
1151
1152pub fn parse_legacy_vector<'a, T, E>(
1153    s: &'a str,
1154    gen_elem: impl FnMut(Cow<'a, str>) -> Result<T, E>,
1155) -> Result<Vec<T>, ParseError>
1156where
1157    E: ToString,
1158{
1159    parse_legacy_vector_inner(s, gen_elem)
1160        .map_err(|details| ParseError::invalid_input_syntax("int2vector", s).with_details(details))
1161}
1162
1163/// Parses PostgreSQL's legacy whitespace-separated vector syntax (used in
1164/// Materialize for `int2vector`). Unlike [`parse_array`], this grammar has
1165/// no token for `NULL`, which is why `int2vector` cannot represent `NULL`
1166/// elements. See [`crate::scalar::Int2Vector`].
1167pub fn parse_legacy_vector_inner<'a, T, E>(
1168    s: &'a str,
1169    mut gen_elem: impl FnMut(Cow<'a, str>) -> Result<T, E>,
1170) -> Result<Vec<T>, String>
1171where
1172    E: ToString,
1173{
1174    let mut elems = vec![];
1175    let buf = &mut LexBuf::new(s);
1176
1177    let mut generated = |elem| gen_elem(elem).map_err(|e| e.to_string());
1178
1179    loop {
1180        buf.take_while(|ch| ch.is_ascii_whitespace());
1181        match buf.peek() {
1182            Some(_) => {
1183                let elem = buf.take_while(|ch| !ch.is_ascii_whitespace());
1184                elems.push(generated(elem.into())?);
1185            }
1186            None => break,
1187        }
1188    }
1189
1190    Ok(elems)
1191}
1192
1193fn lex_quoted_element<'a>(buf: &mut LexBuf<'a>) -> Result<Cow<'a, str>, String> {
1194    assert!(buf.consume('"'));
1195    let s = buf.take_while(|ch| !matches!(ch, '"' | '\\'));
1196
1197    // `Cow::Borrowed` optimization for quoted strings without escapes
1198    if let Some('"') = buf.peek() {
1199        buf.next();
1200        return Ok(s.into());
1201    }
1202
1203    let mut s = s.to_string();
1204    loop {
1205        match buf.next() {
1206            Some('\\') => match buf.next() {
1207                Some(c) => s.push(c),
1208                None => bail!("unterminated quoted string"),
1209            },
1210            Some('"') => break,
1211            Some(c) => s.push(c),
1212            None => bail!("unterminated quoted string"),
1213        }
1214    }
1215    Ok(s.into())
1216}
1217
1218fn lex_embedded_element<'a>(buf: &mut LexBuf<'a>) -> Result<Cow<'a, str>, String> {
1219    let pos = buf.pos();
1220    assert!(matches!(buf.next(), Some('{')));
1221    let mut depth = 1;
1222    let mut in_escape = false;
1223    while depth > 0 {
1224        match buf.next() {
1225            Some('\\') => {
1226                buf.next(); // Next character is escaped, so ignore it
1227            }
1228            Some('"') => in_escape = !in_escape, // Begin or end escape
1229            Some('{') if !in_escape => depth += 1,
1230            Some('}') if !in_escape => depth -= 1,
1231            Some(_) => (),
1232            None => bail!("unterminated embedded element"),
1233        }
1234    }
1235    let s = &buf.inner()[pos..buf.pos()];
1236    Ok(Cow::Borrowed(s))
1237}
1238
1239// Result of `None` indicates element is NULL.
1240fn lex_unquoted_element<'a>(
1241    buf: &mut LexBuf<'a>,
1242    is_special_char: impl Fn(char) -> bool,
1243    is_end_of_literal: impl Fn(char) -> bool,
1244) -> Result<Option<Cow<'a, str>>, String> {
1245    // first char is guaranteed to be non-whitespace
1246    assert!(!buf.peek().unwrap().is_ascii_whitespace());
1247
1248    let s = buf.take_while(|ch| !is_special_char(ch) && !ch.is_ascii_whitespace());
1249
1250    // `Cow::Borrowed` optimization for elements without special characters.
1251    match buf.peek() {
1252        Some(',') | Some('}') if !s.is_empty() => {
1253            return Ok(if s.to_uppercase() == "NULL" {
1254                None
1255            } else {
1256                Some(s.into())
1257            });
1258        }
1259        _ => {}
1260    }
1261
1262    // Track whether there are any escaped characters to determine if the string
1263    // "NULL" should be treated as a NULL, or if it had any escaped characters
1264    // and should be treated as the string "NULL".
1265    let mut escaped_char = false;
1266
1267    let mut s = s.to_string();
1268    // As we go, we keep track of where to truncate to in order to remove any
1269    // trailing whitespace.
1270    let mut trimmed_len = s.len();
1271    loop {
1272        match buf.next() {
1273            Some('\\') => match buf.next() {
1274                Some(c) => {
1275                    escaped_char = true;
1276                    s.push(c);
1277                    trimmed_len = s.len();
1278                }
1279                None => return Err("unterminated element".into()),
1280            },
1281            Some(c) if is_end_of_literal(c) => {
1282                // End of literal characters as the first character indicates
1283                // a missing element definition.
1284                if s.is_empty() {
1285                    bail!("malformed literal; missing element")
1286                }
1287                buf.prev();
1288                break;
1289            }
1290            Some(c) if is_special_char(c) => {
1291                bail!("malformed literal; must escape special character '{}'", c)
1292            }
1293            Some(c) => {
1294                s.push(c);
1295                if !c.is_ascii_whitespace() {
1296                    trimmed_len = s.len();
1297                }
1298            }
1299            None => bail!("unterminated element"),
1300        }
1301    }
1302    s.truncate(trimmed_len);
1303    Ok(if s.to_uppercase() == "NULL" && !escaped_char {
1304        None
1305    } else {
1306        Some(Cow::Owned(s))
1307    })
1308}
1309
1310pub fn parse_map<'a, V, E>(
1311    s: &'a str,
1312    is_value_type_map: bool,
1313    gen_elem: impl FnMut(Option<Cow<'a, str>>) -> Result<V, E>,
1314) -> Result<BTreeMap<String, V>, ParseError>
1315where
1316    E: ToString,
1317{
1318    parse_map_inner(s, is_value_type_map, gen_elem)
1319        .map_err(|details| ParseError::invalid_input_syntax("map", s).with_details(details))
1320}
1321
1322fn parse_map_inner<'a, V, E>(
1323    s: &'a str,
1324    is_value_type_map: bool,
1325    mut gen_elem: impl FnMut(Option<Cow<'a, str>>) -> Result<V, E>,
1326) -> Result<BTreeMap<String, V>, String>
1327where
1328    E: ToString,
1329{
1330    let mut map = BTreeMap::new();
1331    let buf = &mut LexBuf::new(s);
1332
1333    // Consume opening paren.
1334    if !buf.consume('{') {
1335        bail!(
1336            "expected '{{', found {}",
1337            match buf.next() {
1338                Some(c) => format!("{}", c),
1339                None => "empty string".to_string(),
1340            }
1341        )
1342    }
1343
1344    // Simplifies calls to generators by handling errors
1345    let gen_key = |key: Option<Cow<'a, str>>| -> Result<String, String> {
1346        match key {
1347            Some(Cow::Owned(s)) => Ok(s),
1348            Some(Cow::Borrowed(s)) => Ok(s.to_owned()),
1349            None => Err("expected key".to_owned()),
1350        }
1351    };
1352    let mut gen_value = |elem| gen_elem(elem).map_err(|e| e.to_string());
1353    let is_special_char = |c| matches!(c, '{' | '}' | ',' | '"' | '=' | '>' | '\\');
1354    let is_end_of_literal = |c| matches!(c, ',' | '}' | '=');
1355
1356    loop {
1357        // Check for terminals.
1358        buf.take_while(|ch| ch.is_ascii_whitespace());
1359        match buf.next() {
1360            Some('}') => break,
1361            _ if map.len() == 0 => {
1362                buf.prev();
1363            }
1364            Some(',') => {}
1365            Some(c) => bail!("expected ',' or end of input, got '{}'", c),
1366            None => bail!("unexpected end of input"),
1367        }
1368
1369        // Get key.
1370        buf.take_while(|ch| ch.is_ascii_whitespace());
1371        let key = match buf.peek() {
1372            Some('"') => Some(lex_quoted_element(buf)?),
1373            Some(_) => lex_unquoted_element(buf, is_special_char, is_end_of_literal)?,
1374            None => bail!("unexpected end of input"),
1375        };
1376        let key = gen_key(key)?;
1377
1378        // Assert mapping arrow (=>) is present.
1379        buf.take_while(|ch| ch.is_ascii_whitespace());
1380        if !buf.consume('=') || !buf.consume('>') {
1381            bail!("expected =>")
1382        }
1383
1384        // Get value.
1385        buf.take_while(|ch| ch.is_ascii_whitespace());
1386        let value = match buf.peek() {
1387            Some('"') => Some(lex_quoted_element(buf)?),
1388            Some('{') => {
1389                if !is_value_type_map {
1390                    bail!(
1391                        "unescaped '{{' at beginning of value; perhaps you \
1392                           want a nested map, e.g. '{{a=>{{a=>1}}}}'::map[text=>map[text=>int]]"
1393                    )
1394                }
1395                Some(lex_embedded_element(buf)?)
1396            }
1397            Some(_) => lex_unquoted_element(buf, is_special_char, is_end_of_literal)?,
1398            None => bail!("unexpected end of input"),
1399        };
1400        let value = gen_value(value)?;
1401
1402        // Insert elements.
1403        map.insert(key, value);
1404    }
1405    Ok(map)
1406}
1407
1408pub fn format_map<F, T, E>(
1409    buf: &mut F,
1410    elems: impl IntoIterator<Item = (impl AsRef<str>, T)>,
1411    mut format_elem: impl FnMut(MapValueWriter<F>, T) -> Result<Nestable, E>,
1412) -> Result<Nestable, E>
1413where
1414    F: FormatBuffer,
1415{
1416    buf.write_char('{');
1417    let mut elems = elems.into_iter().peekable();
1418    while let Some((key, value)) = elems.next() {
1419        // Map key values are always Strings, which always evaluate to
1420        // Nestable::MayNeedEscaping.
1421        let key_start = buf.len();
1422        buf.write_str(key.as_ref());
1423        escape_elem::<_, MapElementEscaper>(buf, key_start);
1424
1425        buf.write_str("=>");
1426
1427        let value_start = buf.len();
1428        if let Nestable::MayNeedEscaping = format_elem(MapValueWriter(buf), value)? {
1429            escape_elem::<_, MapElementEscaper>(buf, value_start);
1430        }
1431
1432        if elems.peek().is_some() {
1433            buf.write_char(',');
1434        }
1435    }
1436    buf.write_char('}');
1437    Ok(Nestable::Yes)
1438}
1439
1440pub fn parse_range<'a, V, E>(
1441    s: &'a str,
1442    gen_elem: impl FnMut(Cow<'a, str>) -> Result<V, E>,
1443) -> Result<Range<V>, ParseError>
1444where
1445    E: ToString,
1446{
1447    Ok(Range {
1448        inner: parse_range_inner(s, gen_elem).map_err(|details| {
1449            ParseError::invalid_input_syntax("range", s).with_details(details)
1450        })?,
1451    })
1452}
1453
1454fn parse_range_inner<'a, V, E>(
1455    s: &'a str,
1456    mut gen_elem: impl FnMut(Cow<'a, str>) -> Result<V, E>,
1457) -> Result<Option<RangeInner<V>>, String>
1458where
1459    E: ToString,
1460{
1461    let buf = &mut LexBuf::new(s);
1462
1463    buf.take_while(|ch| ch.is_ascii_whitespace());
1464
1465    if buf.consume_str("empty") {
1466        buf.take_while(|ch| ch.is_ascii_whitespace());
1467        if buf.next().is_none() {
1468            return Ok(None);
1469        } else {
1470            bail!("Junk after \"empty\" key word.")
1471        }
1472    }
1473
1474    let lower_inclusive = match buf.next() {
1475        Some('[') => true,
1476        Some('(') => false,
1477        _ => bail!("Missing left parenthesis or bracket."),
1478    };
1479
1480    let lower_bound = match buf.peek() {
1481        Some(',') => None,
1482        Some(_) => {
1483            let v = buf.take_while(|c| !matches!(c, ','));
1484            let v = gen_elem(Cow::from(v)).map_err(|e| e.to_string())?;
1485            Some(v)
1486        }
1487        None => bail!("Unexpected end of input."),
1488    };
1489
1490    buf.take_while(|ch| ch.is_ascii_whitespace());
1491
1492    if buf.next() != Some(',') {
1493        bail!("Missing comma after lower bound.")
1494    }
1495
1496    let upper_bound = match buf.peek() {
1497        Some(']' | ')') => None,
1498        Some(_) => {
1499            let v = buf.take_while(|c| !matches!(c, ')' | ']'));
1500            let v = gen_elem(Cow::from(v)).map_err(|e| e.to_string())?;
1501            Some(v)
1502        }
1503        None => bail!("Unexpected end of input."),
1504    };
1505
1506    let upper_inclusive = match buf.next() {
1507        Some(']') => true,
1508        Some(')') => false,
1509        _ => bail!("Missing left parenthesis or bracket."),
1510    };
1511
1512    buf.take_while(|ch| ch.is_ascii_whitespace());
1513
1514    if buf.next().is_some() {
1515        bail!("Junk after right parenthesis or bracket.")
1516    }
1517
1518    let range = Some(RangeInner {
1519        lower: RangeBound {
1520            inclusive: lower_inclusive,
1521            bound: lower_bound,
1522        },
1523        upper: RangeBound {
1524            inclusive: upper_inclusive,
1525            bound: upper_bound,
1526        },
1527    });
1528
1529    Ok(range)
1530}
1531
1532/// Writes a [`Range`] to `buf`.
1533pub fn format_range<F, V, E>(
1534    buf: &mut F,
1535    r: &Range<V>,
1536    mut format_elem: impl FnMut(RangeElementWriter<F>, Option<&V>) -> Result<Nestable, E>,
1537) -> Result<Nestable, E>
1538where
1539    F: FormatBuffer,
1540{
1541    let range = match &r.inner {
1542        None => {
1543            buf.write_str("empty");
1544            return Ok(Nestable::MayNeedEscaping);
1545        }
1546        Some(i) => i,
1547    };
1548
1549    if range.lower.inclusive {
1550        buf.write_char('[');
1551    } else {
1552        buf.write_char('(');
1553    }
1554
1555    let start = buf.len();
1556    if let Nestable::MayNeedEscaping =
1557        format_elem(RangeElementWriter(buf), range.lower.bound.as_ref())?
1558    {
1559        escape_elem::<_, ListElementEscaper>(buf, start);
1560    }
1561
1562    buf.write_char(',');
1563
1564    let start = buf.len();
1565    if let Nestable::MayNeedEscaping =
1566        format_elem(RangeElementWriter(buf), range.upper.bound.as_ref())?
1567    {
1568        escape_elem::<_, ListElementEscaper>(buf, start);
1569    }
1570
1571    if range.upper.inclusive {
1572        buf.write_char(']');
1573    } else {
1574        buf.write_char(')');
1575    }
1576
1577    Ok(Nestable::MayNeedEscaping)
1578}
1579
1580/// A helper for `format_range` that formats a single record element.
1581#[derive(Debug)]
1582pub struct RangeElementWriter<'a, F>(&'a mut F);
1583
1584impl<'a, F> RangeElementWriter<'a, F>
1585where
1586    F: FormatBuffer,
1587{
1588    /// Marks this record element as null.
1589    pub fn write_null(self) -> Nestable {
1590        // In ranges these "null" values represent infinite bounds, which are
1591        // not represented as values, but rather the absence of a value.
1592        Nestable::Yes
1593    }
1594
1595    /// Returns a [`FormatBuffer`] into which a non-null element can be
1596    /// written.
1597    pub fn nonnull_buffer(self) -> &'a mut F {
1598        self.0
1599    }
1600}
1601
1602pub fn format_array<F, T, E>(
1603    buf: &mut F,
1604    dims: &[ArrayDimension],
1605    elems: impl IntoIterator<Item = T>,
1606    mut format_elem: impl FnMut(ListElementWriter<F>, T) -> Result<Nestable, E>,
1607) -> Result<Nestable, E>
1608where
1609    F: FormatBuffer,
1610{
1611    if dims.iter().any(|dim| dim.lower_bound != 1) {
1612        for d in dims.iter() {
1613            let (lower, upper) = d.dimension_bounds();
1614            write!(buf, "[{}:{}]", lower, upper);
1615        }
1616        buf.write_char('=');
1617    }
1618
1619    format_array_inner(buf, dims, &mut elems.into_iter(), &mut format_elem)?;
1620    Ok(Nestable::Yes)
1621}
1622
1623pub fn format_array_inner<F, T, E>(
1624    buf: &mut F,
1625    dims: &[ArrayDimension],
1626    elems: &mut impl Iterator<Item = T>,
1627    format_elem: &mut impl FnMut(ListElementWriter<F>, T) -> Result<Nestable, E>,
1628) -> Result<(), E>
1629where
1630    F: FormatBuffer,
1631{
1632    if dims.is_empty() {
1633        buf.write_str("{}");
1634        return Ok(());
1635    }
1636
1637    buf.write_char('{');
1638    for j in 0..dims[0].length {
1639        if j > 0 {
1640            buf.write_char(',');
1641        }
1642        if dims.len() == 1 {
1643            let start = buf.len();
1644            let elem = elems.next().unwrap();
1645            if let Nestable::MayNeedEscaping = format_elem(ListElementWriter(buf), elem)? {
1646                escape_elem::<_, ListElementEscaper>(buf, start);
1647            }
1648        } else {
1649            format_array_inner(buf, &dims[1..], elems, format_elem)?;
1650        }
1651    }
1652    buf.write_char('}');
1653
1654    Ok(())
1655}
1656
1657pub fn format_legacy_vector<F, T, E>(
1658    buf: &mut F,
1659    elems: impl IntoIterator<Item = T>,
1660    format_elem: impl FnMut(ListElementWriter<F>, T) -> Result<Nestable, E>,
1661) -> Result<Nestable, E>
1662where
1663    F: FormatBuffer,
1664{
1665    format_elems(buf, elems, format_elem, ' ')?;
1666    Ok(Nestable::MayNeedEscaping)
1667}
1668
1669pub fn format_list<F, T, E>(
1670    buf: &mut F,
1671    elems: impl IntoIterator<Item = T>,
1672    format_elem: impl FnMut(ListElementWriter<F>, T) -> Result<Nestable, E>,
1673) -> Result<Nestable, E>
1674where
1675    F: FormatBuffer,
1676{
1677    buf.write_char('{');
1678    format_elems(buf, elems, format_elem, ',')?;
1679    buf.write_char('}');
1680    Ok(Nestable::Yes)
1681}
1682
1683/// Writes each `elem` into `buf`, separating the elems with `sep`.
1684pub fn format_elems<F, T, E>(
1685    buf: &mut F,
1686    elems: impl IntoIterator<Item = T>,
1687    mut format_elem: impl FnMut(ListElementWriter<F>, T) -> Result<Nestable, E>,
1688    sep: char,
1689) -> Result<(), E>
1690where
1691    F: FormatBuffer,
1692{
1693    let mut elems = elems.into_iter().peekable();
1694    while let Some(elem) = elems.next() {
1695        let start = buf.len();
1696        if let Nestable::MayNeedEscaping = format_elem(ListElementWriter(buf), elem)? {
1697            escape_elem::<_, ListElementEscaper>(buf, start);
1698        }
1699        if elems.peek().is_some() {
1700            buf.write_char(sep)
1701        }
1702    }
1703    Ok(())
1704}
1705
1706/// Writes an `mz_acl_item` to `buf`.
1707pub fn format_mz_acl_item<F>(buf: &mut F, mz_acl_item: MzAclItem) -> Nestable
1708where
1709    F: FormatBuffer,
1710{
1711    write!(buf, "{mz_acl_item}");
1712    Nestable::Yes
1713}
1714
1715/// Parses an MzAclItem from `s`.
1716pub fn parse_mz_acl_item(s: &str) -> Result<MzAclItem, ParseError> {
1717    s.trim()
1718        .parse()
1719        .map_err(|e| ParseError::invalid_input_syntax("mz_aclitem", s).with_details(e))
1720}
1721
1722/// Writes an `acl_item` to `buf`.
1723pub fn format_acl_item<F>(buf: &mut F, acl_item: AclItem) -> Nestable
1724where
1725    F: FormatBuffer,
1726{
1727    write!(buf, "{acl_item}");
1728    Nestable::Yes
1729}
1730
1731/// Parses an AclItem from `s`.
1732pub fn parse_acl_item(s: &str) -> Result<AclItem, ParseError> {
1733    s.trim()
1734        .parse()
1735        .map_err(|e| ParseError::invalid_input_syntax("aclitem", s).with_details(e))
1736}
1737
1738pub trait ElementEscaper {
1739    fn needs_escaping(elem: &[u8]) -> bool;
1740    fn escape_char(c: u8) -> u8;
1741}
1742
1743struct ListElementEscaper;
1744
1745impl ElementEscaper for ListElementEscaper {
1746    fn needs_escaping(elem: &[u8]) -> bool {
1747        elem.is_empty()
1748            || elem == b"NULL"
1749            || elem
1750                .iter()
1751                .any(|c| matches!(c, b'{' | b'}' | b',' | b'"' | b'\\') || c.is_ascii_whitespace())
1752    }
1753
1754    fn escape_char(_: u8) -> u8 {
1755        b'\\'
1756    }
1757}
1758
1759struct MapElementEscaper;
1760
1761impl ElementEscaper for MapElementEscaper {
1762    fn needs_escaping(elem: &[u8]) -> bool {
1763        elem.is_empty()
1764            || elem == b"NULL"
1765            || elem.iter().any(|c| {
1766                matches!(c, b'{' | b'}' | b',' | b'"' | b'=' | b'>' | b'\\')
1767                    || c.is_ascii_whitespace()
1768            })
1769    }
1770
1771    fn escape_char(_: u8) -> u8 {
1772        b'\\'
1773    }
1774}
1775
1776struct RecordElementEscaper;
1777
1778impl ElementEscaper for RecordElementEscaper {
1779    fn needs_escaping(elem: &[u8]) -> bool {
1780        elem.is_empty()
1781            || elem
1782                .iter()
1783                .any(|c| matches!(c, b'(' | b')' | b',' | b'"' | b'\\') || c.is_ascii_whitespace())
1784    }
1785
1786    fn escape_char(c: u8) -> u8 {
1787        if c == b'"' { b'"' } else { b'\\' }
1788    }
1789}
1790
1791/// Escapes a list, record, or map element in place.
1792///
1793/// The element must start at `start` and extend to the end of the buffer. The
1794/// buffer will be resized if escaping is necessary to account for the
1795/// additional escape characters.
1796///
1797/// The `needs_escaping` function is used to determine whether an element needs
1798/// to be escaped. It is provided with the bytes of each element and should
1799/// return whether the element needs to be escaped.
1800fn escape_elem<F, E>(buf: &mut F, start: usize)
1801where
1802    F: FormatBuffer,
1803    E: ElementEscaper,
1804{
1805    let elem = &buf.as_ref()[start..];
1806    if !E::needs_escaping(elem) {
1807        return;
1808    }
1809
1810    // We'll need two extra bytes for the quotes at the start and end of the
1811    // element, plus an extra byte for each quote and backslash.
1812    let extras = 2 + elem.iter().filter(|b| matches!(b, b'"' | b'\\')).count();
1813    let orig_end = buf.len();
1814    let new_end = buf.len() + extras;
1815
1816    // Pad the buffer to the new length. These characters will all be
1817    // overwritten.
1818    //
1819    // NOTE(benesch): we never read these characters, so we could instead use
1820    // uninitialized memory, but that's a level of unsafety I'm currently
1821    // uncomfortable with. The performance gain is negligible anyway.
1822    for _ in 0..extras {
1823        buf.write_char('\0');
1824    }
1825
1826    // SAFETY: inserting ASCII characters before other ASCII characters
1827    // preserves UTF-8 encoding.
1828    let elem = unsafe { buf.as_bytes_mut() };
1829
1830    // Walk the string backwards, writing characters at the new end index while
1831    // reading from the old end index, adding quotes at the beginning and end,
1832    // and adding a backslash before every backslash or quote.
1833    let mut wi = new_end - 1;
1834    elem[wi] = b'"';
1835    wi -= 1;
1836    for ri in (start..orig_end).rev() {
1837        elem[wi] = elem[ri];
1838        wi -= 1;
1839        if let b'\\' | b'"' = elem[ri] {
1840            elem[wi] = E::escape_char(elem[ri]);
1841            wi -= 1;
1842        }
1843    }
1844    elem[wi] = b'"';
1845
1846    assert!(wi == start);
1847}
1848
1849/// A helper for `format_list` that formats a single list element.
1850#[derive(Debug)]
1851pub struct ListElementWriter<'a, F>(&'a mut F);
1852
1853impl<'a, F> ListElementWriter<'a, F>
1854where
1855    F: FormatBuffer,
1856{
1857    /// Marks this list element as null.
1858    pub fn write_null(self) -> Nestable {
1859        self.0.write_str("NULL");
1860        Nestable::Yes
1861    }
1862
1863    /// Returns a [`FormatBuffer`] into which a non-null element can be
1864    /// written.
1865    pub fn nonnull_buffer(self) -> &'a mut F {
1866        self.0
1867    }
1868}
1869
1870/// A helper for `format_map` that formats a single map value.
1871#[derive(Debug)]
1872pub struct MapValueWriter<'a, F>(&'a mut F);
1873
1874impl<'a, F> MapValueWriter<'a, F>
1875where
1876    F: FormatBuffer,
1877{
1878    /// Marks this value element as null.
1879    pub fn write_null(self) -> Nestable {
1880        self.0.write_str("NULL");
1881        Nestable::Yes
1882    }
1883
1884    /// Returns a [`FormatBuffer`] into which a non-null element can be
1885    /// written.
1886    pub fn nonnull_buffer(self) -> &'a mut F {
1887        self.0
1888    }
1889}
1890
1891pub fn format_record<F, T, E>(
1892    buf: &mut F,
1893    elems: impl IntoIterator<Item = T>,
1894    mut format_elem: impl FnMut(RecordElementWriter<F>, T) -> Result<Nestable, E>,
1895) -> Result<Nestable, E>
1896where
1897    F: FormatBuffer,
1898{
1899    buf.write_char('(');
1900    let mut elems = elems.into_iter().peekable();
1901    while let Some(elem) = elems.next() {
1902        let start = buf.len();
1903        if let Nestable::MayNeedEscaping = format_elem(RecordElementWriter(buf), elem)? {
1904            escape_elem::<_, RecordElementEscaper>(buf, start);
1905        }
1906        if elems.peek().is_some() {
1907            buf.write_char(',')
1908        }
1909    }
1910    buf.write_char(')');
1911    Ok(Nestable::MayNeedEscaping)
1912}
1913
1914/// A helper for `format_record` that formats a single record element.
1915#[derive(Debug)]
1916pub struct RecordElementWriter<'a, F>(&'a mut F);
1917
1918impl<'a, F> RecordElementWriter<'a, F>
1919where
1920    F: FormatBuffer,
1921{
1922    /// Marks this record element as null.
1923    pub fn write_null(self) -> Nestable {
1924        Nestable::Yes
1925    }
1926
1927    /// Returns a [`FormatBuffer`] into which a non-null element can be
1928    /// written.
1929    pub fn nonnull_buffer(self) -> &'a mut F {
1930        self.0
1931    }
1932}
1933
1934/// An error while parsing an input as a type.
1935#[derive(
1936    Arbitrary,
1937    Ord,
1938    PartialOrd,
1939    Clone,
1940    Debug,
1941    Eq,
1942    PartialEq,
1943    Serialize,
1944    Deserialize,
1945    Hash,
1946    MzReflect
1947)]
1948pub struct ParseError {
1949    pub kind: ParseErrorKind,
1950    pub type_name: Box<str>,
1951    pub input: Box<str>,
1952    pub details: Option<Box<str>>,
1953}
1954
1955#[derive(
1956    Arbitrary,
1957    Ord,
1958    PartialOrd,
1959    Clone,
1960    Copy,
1961    Debug,
1962    Eq,
1963    PartialEq,
1964    Serialize,
1965    Deserialize,
1966    Hash,
1967    MzReflect
1968)]
1969pub enum ParseErrorKind {
1970    OutOfRange,
1971    InvalidInputSyntax,
1972}
1973
1974impl ParseError {
1975    // To ensure that reversing the parameters causes a compile-time error, we
1976    // require that `type_name` be a string literal, even though `ParseError`
1977    // itself stores the type name as a `String`.
1978    fn new<S>(kind: ParseErrorKind, type_name: &'static str, input: S) -> ParseError
1979    where
1980        S: Into<Box<str>>,
1981    {
1982        ParseError {
1983            kind,
1984            type_name: type_name.into(),
1985            input: input.into(),
1986            details: None,
1987        }
1988    }
1989
1990    fn out_of_range<S>(type_name: &'static str, input: S) -> ParseError
1991    where
1992        S: Into<Box<str>>,
1993    {
1994        ParseError::new(ParseErrorKind::OutOfRange, type_name, input)
1995    }
1996
1997    fn invalid_input_syntax<S>(type_name: &'static str, input: S) -> ParseError
1998    where
1999        S: Into<Box<str>>,
2000    {
2001        ParseError::new(ParseErrorKind::InvalidInputSyntax, type_name, input)
2002    }
2003
2004    fn with_details<D>(mut self, details: D) -> ParseError
2005    where
2006        D: fmt::Display,
2007    {
2008        self.details = Some(details.to_string().into());
2009        self
2010    }
2011}
2012
2013impl fmt::Display for ParseError {
2014    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2015        match self.kind {
2016            ParseErrorKind::OutOfRange => {
2017                write!(
2018                    f,
2019                    "{} is out of range for type {}",
2020                    self.input.quoted(),
2021                    self.type_name
2022                )?;
2023                if let Some(details) = &self.details {
2024                    write!(f, ": {}", details)?;
2025                }
2026                Ok(())
2027            }
2028            ParseErrorKind::InvalidInputSyntax => {
2029                write!(f, "invalid input syntax for type {}: ", self.type_name)?;
2030                if let Some(details) = &self.details {
2031                    write!(f, "{}: ", details)?;
2032                }
2033                write!(f, "{}", self.input.quoted())
2034            }
2035        }
2036    }
2037}
2038
2039impl Error for ParseError {}
2040
2041impl RustType<ProtoParseError> for ParseError {
2042    fn into_proto(&self) -> ProtoParseError {
2043        use Kind::*;
2044        use proto_parse_error::*;
2045        let kind = match self.kind {
2046            ParseErrorKind::OutOfRange => OutOfRange(()),
2047            ParseErrorKind::InvalidInputSyntax => InvalidInputSyntax(()),
2048        };
2049        ProtoParseError {
2050            kind: Some(kind),
2051            type_name: self.type_name.into_proto(),
2052            input: self.input.into_proto(),
2053            details: self.details.into_proto(),
2054        }
2055    }
2056
2057    fn from_proto(proto: ProtoParseError) -> Result<Self, TryFromProtoError> {
2058        use proto_parse_error::Kind::*;
2059
2060        if let Some(kind) = proto.kind {
2061            Ok(ParseError {
2062                kind: match kind {
2063                    OutOfRange(()) => ParseErrorKind::OutOfRange,
2064                    InvalidInputSyntax(()) => ParseErrorKind::InvalidInputSyntax,
2065                },
2066                type_name: proto.type_name.into(),
2067                input: proto.input.into(),
2068                details: proto.details.into_rust()?,
2069            })
2070        } else {
2071            Err(TryFromProtoError::missing_field("ProtoParseError::kind"))
2072        }
2073    }
2074}
2075
2076#[derive(
2077    Arbitrary,
2078    Ord,
2079    PartialOrd,
2080    Copy,
2081    Clone,
2082    Debug,
2083    Eq,
2084    PartialEq,
2085    Serialize,
2086    Deserialize,
2087    Hash,
2088    MzReflect
2089)]
2090pub enum ParseHexError {
2091    InvalidHexDigit(char),
2092    OddLength,
2093}
2094impl Error for ParseHexError {}
2095
2096impl fmt::Display for ParseHexError {
2097    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2098        match self {
2099            ParseHexError::InvalidHexDigit(c) => {
2100                write!(f, "invalid hexadecimal digit: \"{}\"", c.escape_default())
2101            }
2102            ParseHexError::OddLength => {
2103                f.write_str("invalid hexadecimal data: odd number of digits")
2104            }
2105        }
2106    }
2107}
2108
2109impl RustType<ProtoParseHexError> for ParseHexError {
2110    fn into_proto(&self) -> ProtoParseHexError {
2111        use Kind::*;
2112        use proto_parse_hex_error::*;
2113        let kind = match self {
2114            ParseHexError::InvalidHexDigit(v) => InvalidHexDigit(v.into_proto()),
2115            ParseHexError::OddLength => OddLength(()),
2116        };
2117        ProtoParseHexError { kind: Some(kind) }
2118    }
2119
2120    fn from_proto(error: ProtoParseHexError) -> Result<Self, TryFromProtoError> {
2121        use proto_parse_hex_error::Kind::*;
2122        match error.kind {
2123            Some(kind) => match kind {
2124                InvalidHexDigit(v) => Ok(ParseHexError::InvalidHexDigit(char::from_proto(v)?)),
2125                OddLength(()) => Ok(ParseHexError::OddLength),
2126            },
2127            None => Err(TryFromProtoError::missing_field(
2128                "`ProtoParseHexError::kind`",
2129            )),
2130        }
2131    }
2132}
2133
2134#[cfg(test)]
2135mod tests {
2136    use mz_ore::assert_ok;
2137    use mz_proto::protobuf_roundtrip;
2138    use proptest::prelude::*;
2139
2140    use super::*;
2141
2142    proptest! {
2143        #[mz_ore::test]
2144        #[cfg_attr(miri, ignore)] // too slow
2145        fn parse_error_protobuf_roundtrip(expect in any::<ParseError>()) {
2146            let actual = protobuf_roundtrip::<_, ProtoParseError>(&expect);
2147            assert_ok!(actual);
2148            assert_eq!(actual.unwrap(), expect);
2149        }
2150    }
2151
2152    proptest! {
2153        #[mz_ore::test]
2154        #[cfg_attr(miri, ignore)] // too slow
2155        fn parse_hex_error_protobuf_roundtrip(expect in any::<ParseHexError>()) {
2156            let actual = protobuf_roundtrip::<_, ProtoParseHexError>(&expect);
2157            assert_ok!(actual);
2158            assert_eq!(actual.unwrap(), expect);
2159        }
2160    }
2161
2162    #[mz_ore::test]
2163    fn test_format_nanos_to_micros() {
2164        let cases: Vec<(u32, &str)> = vec![
2165            (0, ""),
2166            (1, ""),
2167            (499, ""),
2168            (500, ".000001"),
2169            (500_000, ".0005"),
2170            (5_000_000, ".005"),
2171            // Leap second. This is possibly wrong and should maybe be reduced (nanosecond
2172            // % 1_000_000_000), but we are at least now aware it does this.
2173            (1_999_999_999, ".2"),
2174        ];
2175        for (nanos, expect) in cases {
2176            let mut buf = String::new();
2177            format_nanos_to_micros(&mut buf, nanos);
2178            assert_eq!(&buf, expect);
2179        }
2180    }
2181
2182    #[mz_ore::test]
2183    fn test_parse_pg_legacy_name() {
2184        let s = "hello world";
2185        assert_eq!(s, parse_pg_legacy_name(s));
2186
2187        let s = "x".repeat(63);
2188        assert_eq!(s, parse_pg_legacy_name(&s));
2189
2190        let s = "x".repeat(64);
2191        assert_eq!("x".repeat(63), parse_pg_legacy_name(&s));
2192
2193        // The Hebrew character Aleph (א) has a length of 2 bytes.
2194        let s = format!("{}{}", "x".repeat(61), "א");
2195        assert_eq!(s, parse_pg_legacy_name(&s));
2196
2197        let s = format!("{}{}", "x".repeat(62), "א");
2198        assert_eq!("x".repeat(62), parse_pg_legacy_name(&s));
2199    }
2200}