mz_repr/
strconv.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Routines for converting datum values to and from their string
11//! representation.
12//!
13//! The functions in this module are tightly related to the variants of
14//! [`ScalarType`](crate::ScalarType). Each variant has a pair of functions in
15//! this module named `parse_VARIANT` and `format_VARIANT`. The type returned
16//! by `parse` functions, and the type accepted by `format` functions, will
17//! be a type that is easily converted into the [`Datum`](crate::Datum) variant
18//! for that type. The functions do not directly convert from `Datum`s to
19//! `String`s so that the logic can be reused when `Datum`s are not available or
20//! desired, as in the pgrepr crate.
21//!
22//! The string representations used are exactly the same as the PostgreSQL
23//! string representations for the corresponding PostgreSQL type. Deviations
24//! should be considered a bug.
25
26use std::borrow::Cow;
27use std::collections::BTreeMap;
28use std::error::Error;
29use std::fmt;
30use std::num::FpCategory;
31use std::str::FromStr;
32use std::sync::LazyLock;
33
34use chrono::offset::{Offset, TimeZone};
35use chrono::{DateTime, Datelike, Duration, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc};
36use dec::OrderedDecimal;
37use mz_lowertest::MzReflect;
38use mz_ore::cast::ReinterpretCast;
39use mz_ore::error::ErrorExt;
40use mz_ore::fmt::FormatBuffer;
41use mz_ore::lex::LexBuf;
42use mz_ore::str::StrExt;
43use mz_pgtz::timezone::{Timezone, TimezoneSpec};
44use mz_proto::{ProtoType, RustType, TryFromProtoError};
45use num_traits::Float as NumFloat;
46use proptest_derive::Arbitrary;
47use regex::bytes::Regex;
48use ryu::Float as RyuFloat;
49use serde::{Deserialize, Serialize};
50use uuid::Uuid;
51
52use crate::adt::array::ArrayDimension;
53use crate::adt::date::Date;
54use crate::adt::datetime::{self, DateTimeField, ParsedDateTime};
55use crate::adt::interval::Interval;
56use crate::adt::jsonb::{Jsonb, JsonbRef};
57use crate::adt::mz_acl_item::{AclItem, MzAclItem};
58use crate::adt::numeric::{self, NUMERIC_DATUM_MAX_PRECISION, Numeric};
59use crate::adt::pg_legacy_name::NAME_MAX_BYTES;
60use crate::adt::range::{Range, RangeBound, RangeInner};
61use crate::adt::timestamp::CheckedTimestamp;
62
63include!(concat!(env!("OUT_DIR"), "/mz_repr.strconv.rs"));
64
65macro_rules! bail {
66    ($($arg:tt)*) => { return Err(format!($($arg)*)) };
67}
68
69/// Yes should be provided for types that will *never* return true for [`ElementEscaper::needs_escaping`]
70#[derive(Debug)]
71pub enum Nestable {
72    Yes,
73    MayNeedEscaping,
74}
75
76/// Parses a [`bool`] from `s`.
77///
78/// The accepted values are "true", "false", "yes", "no", "on", "off", "1", and
79/// "0", or any unambiguous prefix of one of those values. Leading or trailing
80/// whitespace is permissible.
81pub fn parse_bool(s: &str) -> Result<bool, ParseError> {
82    match s.trim().to_lowercase().as_str() {
83        "t" | "tr" | "tru" | "true" | "y" | "ye" | "yes" | "on" | "1" => Ok(true),
84        "f" | "fa" | "fal" | "fals" | "false" | "n" | "no" | "of" | "off" | "0" => Ok(false),
85        _ => Err(ParseError::invalid_input_syntax("boolean", s)),
86    }
87}
88
89/// Like `format_bool`, but returns a string with a static lifetime.
90///
91/// This function should be preferred to `format_bool` when applicable, as it
92/// avoids an allocation.
93pub fn format_bool_static(b: bool) -> &'static str {
94    match b {
95        true => "t",
96        false => "f",
97    }
98}
99
100/// Writes a boolean value into `buf`.
101///
102/// `true` is encoded as the char `'t'` and `false` is encoded as the char
103/// `'f'`.
104pub fn format_bool<F>(buf: &mut F, b: bool) -> Nestable
105where
106    F: FormatBuffer,
107{
108    buf.write_str(format_bool_static(b));
109    Nestable::Yes
110}
111
112/// Parses an [`i16`] from `s`.
113///
114/// Valid values are whatever the [`std::str::FromStr`] implementation on `i16` accepts,
115/// plus leading and trailing whitespace.
116pub fn parse_int16(s: &str) -> Result<i16, ParseError> {
117    s.trim()
118        .parse()
119        .map_err(|e| ParseError::invalid_input_syntax("smallint", s).with_details(e))
120}
121
122/// Writes an [`i16`] to `buf`.
123pub fn format_int16<F>(buf: &mut F, i: i16) -> Nestable
124where
125    F: FormatBuffer,
126{
127    write!(buf, "{}", i);
128    Nestable::Yes
129}
130
131/// Parses an [`i32`] from `s`.
132///
133/// Valid values are whatever the [`std::str::FromStr`] implementation on `i32` accepts,
134/// plus leading and trailing whitespace.
135pub fn parse_int32(s: &str) -> Result<i32, ParseError> {
136    s.trim()
137        .parse()
138        .map_err(|e| ParseError::invalid_input_syntax("integer", s).with_details(e))
139}
140
141/// Writes an [`i32`] to `buf`.
142pub fn format_int32<F>(buf: &mut F, i: i32) -> Nestable
143where
144    F: FormatBuffer,
145{
146    write!(buf, "{}", i);
147    Nestable::Yes
148}
149
150/// Parses an `i64` from `s`.
151pub fn parse_int64(s: &str) -> Result<i64, ParseError> {
152    s.trim()
153        .parse()
154        .map_err(|e| ParseError::invalid_input_syntax("bigint", s).with_details(e))
155}
156
157/// Writes an `i64` to `buf`.
158pub fn format_int64<F>(buf: &mut F, i: i64) -> Nestable
159where
160    F: FormatBuffer,
161{
162    write!(buf, "{}", i);
163    Nestable::Yes
164}
165
166/// Parses an [`u16`] from `s`.
167///
168/// Valid values are whatever the [`std::str::FromStr`] implementation on `u16` accepts,
169/// plus leading and trailing whitespace.
170pub fn parse_uint16(s: &str) -> Result<u16, ParseError> {
171    s.trim()
172        .parse()
173        .map_err(|e| ParseError::invalid_input_syntax("uint2", s).with_details(e))
174}
175
176/// Writes an `u16` to `buf`.
177pub fn format_uint16<F>(buf: &mut F, u: u16) -> Nestable
178where
179    F: FormatBuffer,
180{
181    write!(buf, "{}", u);
182    Nestable::Yes
183}
184
185/// Parses an [`u32`] from `s`.
186///
187/// Valid values are whatever the [`std::str::FromStr`] implementation on `u32` accepts,
188/// plus leading and trailing whitespace.
189pub fn parse_uint32(s: &str) -> Result<u32, ParseError> {
190    s.trim()
191        .parse()
192        .map_err(|e| ParseError::invalid_input_syntax("uint4", s).with_details(e))
193}
194
195/// Writes an `u32` to `buf`.
196pub fn format_uint32<F>(buf: &mut F, u: u32) -> Nestable
197where
198    F: FormatBuffer,
199{
200    write!(buf, "{}", u);
201    Nestable::Yes
202}
203
204/// Parses an `u64` from `s`.
205pub fn parse_uint64(s: &str) -> Result<u64, ParseError> {
206    s.trim()
207        .parse()
208        .map_err(|e| ParseError::invalid_input_syntax("uint8", s).with_details(e))
209}
210
211/// Writes an `u64` to `buf`.
212pub fn format_uint64<F>(buf: &mut F, u: u64) -> Nestable
213where
214    F: FormatBuffer,
215{
216    write!(buf, "{}", u);
217    Nestable::Yes
218}
219
220/// Parses an `mz_timestamp` from `s`.
221pub fn parse_mz_timestamp(s: &str) -> Result<crate::Timestamp, ParseError> {
222    s.trim()
223        .parse()
224        .map_err(|e| ParseError::invalid_input_syntax("mz_timestamp", s).with_details(e))
225}
226
227/// Writes an `mz_timestamp` to `buf`.
228pub fn format_mz_timestamp<F>(buf: &mut F, u: crate::Timestamp) -> Nestable
229where
230    F: FormatBuffer,
231{
232    write!(buf, "{}", u);
233    Nestable::Yes
234}
235
236/// Parses an OID from `s`.
237pub fn parse_oid(s: &str) -> Result<u32, ParseError> {
238    // For historical reasons in PostgreSQL, OIDs are parsed as `i32`s and then
239    // reinterpreted as `u32`s.
240    //
241    // Do not use this as a model for behavior in other contexts. OIDs should
242    // not in general be thought of as freely convertible from `i32`s.
243    let oid: i32 = s
244        .trim()
245        .parse()
246        .map_err(|e| ParseError::invalid_input_syntax("oid", s).with_details(e))?;
247    Ok(u32::reinterpret_cast(oid))
248}
249
250fn parse_float<Fl>(type_name: &'static str, s: &str) -> Result<Fl, ParseError>
251where
252    Fl: NumFloat + FromStr,
253{
254    // Matching PostgreSQL's float parsing behavior is tricky. PostgreSQL's
255    // implementation delegates almost entirely to strtof(3)/strtod(3), which
256    // will report an out-of-range error if a number was rounded to zero or
257    // infinity. For example, parsing "1e70" as a 32-bit float will yield an
258    // out-of-range error because it is rounded to infinity, but parsing an
259    // explicitly-specified "inf" will yield infinity without an error.
260    //
261    // To @benesch's knowledge, there is no Rust implementation of float parsing
262    // that reports whether underflow or overflow occurred. So we figure it out
263    // ourselves after the fact. If parsing the float returns infinity and the input
264    // was not an explicitly-specified infinity, then we know overflow occurred.
265    // If parsing the float returns zero and the input was not an explicitly-specified
266    // zero, then we know underflow occurred.
267
268    // Matches `0`, `-0`, `+0`, `000000.00000`, `0.0e10`, 0., .0, et al.
269    static ZERO_RE: LazyLock<Regex> =
270        LazyLock::new(|| Regex::new(r#"(?i-u)^[-+]?(0+(\.0*)?|\.0+)(e|$)"#).unwrap());
271    // Matches `inf`, `-inf`, `+inf`, `infinity`, et al.
272    static INF_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new("(?i-u)^[-+]?inf").unwrap());
273
274    let buf = s.trim();
275    let f: Fl = buf
276        .parse()
277        .map_err(|_| ParseError::invalid_input_syntax(type_name, s))?;
278    match f.classify() {
279        FpCategory::Infinite if !INF_RE.is_match(buf.as_bytes()) => {
280            Err(ParseError::out_of_range(type_name, s))
281        }
282        FpCategory::Zero if !ZERO_RE.is_match(buf.as_bytes()) => {
283            Err(ParseError::out_of_range(type_name, s))
284        }
285        _ => Ok(f),
286    }
287}
288
289fn format_float<F, Fl>(buf: &mut F, f: Fl) -> Nestable
290where
291    F: FormatBuffer,
292    Fl: NumFloat + RyuFloat,
293{
294    // Use ryu rather than the standard library. ryu uses scientific notation
295    // when possible, which better matches PostgreSQL. The standard library's
296    // `ToString` implementations print all available digits, which is rather
297    // verbose.
298    //
299    // Note that we have to fix up ryu's formatting in a few cases to match
300    // PostgreSQL. PostgreSQL spells out "Infinity" in full, never emits a
301    // trailing ".0", formats positive exponents as e.g. "1e+10" rather than
302    // "1e10", and emits a negative sign for negative zero. If we need to speed
303    // up float formatting, we can look into forking ryu and making these edits
304    // directly, but for now it doesn't seem worth it.
305
306    match f.classify() {
307        FpCategory::Infinite if f.is_sign_negative() => buf.write_str("-Infinity"),
308        FpCategory::Infinite => buf.write_str("Infinity"),
309        FpCategory::Nan => buf.write_str("NaN"),
310        FpCategory::Zero if f.is_sign_negative() => buf.write_str("-0"),
311        _ => {
312            debug_assert!(f.is_finite());
313            let mut ryu_buf = ryu::Buffer::new();
314            let mut s = ryu_buf.format_finite(f);
315            if let Some(trimmed) = s.strip_suffix(".0") {
316                s = trimmed;
317            }
318            let mut chars = s.chars().peekable();
319            while let Some(ch) = chars.next() {
320                buf.write_char(ch);
321                if ch == 'e' && chars.peek() != Some(&'-') {
322                    buf.write_char('+');
323                }
324            }
325        }
326    }
327
328    Nestable::Yes
329}
330
331/// Parses an `f32` from `s`.
332pub fn parse_float32(s: &str) -> Result<f32, ParseError> {
333    parse_float("real", s)
334}
335
336/// Writes an `f32` to `buf`.
337pub fn format_float32<F>(buf: &mut F, f: f32) -> Nestable
338where
339    F: FormatBuffer,
340{
341    format_float(buf, f)
342}
343
344/// Parses an `f64` from `s`.
345pub fn parse_float64(s: &str) -> Result<f64, ParseError> {
346    parse_float("double precision", s)
347}
348
349/// Writes an `f64` to `buf`.
350pub fn format_float64<F>(buf: &mut F, f: f64) -> Nestable
351where
352    F: FormatBuffer,
353{
354    format_float(buf, f)
355}
356
357/// Use the following grammar to parse `s` into:
358///
359/// - `NaiveDate`
360/// - `NaiveTime`
361/// - Timezone string
362///
363/// `NaiveDate` and `NaiveTime` are appropriate to compute a `NaiveDateTime`,
364/// which can be used in conjunction with a timezone string to generate a
365/// `DateTime<Utc>`.
366///
367/// ```text
368/// <unquoted timestamp string> ::=
369///     <date value> <space> <time value> [ <time zone interval> ]
370/// <date value> ::=
371///     <years value> <minus sign> <months value> <minus sign> <days value>
372/// <time zone interval> ::=
373///     <sign> <hours value> <colon> <minutes value>
374/// ```
375fn parse_timestamp_string(s: &str) -> Result<(NaiveDate, NaiveTime, Timezone), String> {
376    if s.is_empty() {
377        return Err("timestamp string is empty".into());
378    }
379
380    // PostgreSQL special date-time inputs
381    // https://www.postgresql.org/docs/12/datatype-datetime.html#id-1.5.7.13.18.8
382    // We should add support for other values here, e.g. infinity
383    // which @quodlibetor is willing to add to the chrono package.
384    if s == "epoch" {
385        return Ok((
386            NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(),
387            NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
388            Default::default(),
389        ));
390    }
391
392    let (ts_string, tz_string, era) = datetime::split_timestamp_string(s);
393
394    let pdt = ParsedDateTime::build_parsed_datetime_timestamp(ts_string, era)?;
395    let d: NaiveDate = pdt.compute_date()?;
396    let t: NaiveTime = pdt.compute_time()?;
397
398    let offset = if tz_string.is_empty() {
399        Default::default()
400    } else {
401        Timezone::parse(tz_string, TimezoneSpec::Iso)?
402    };
403
404    Ok((d, t, offset))
405}
406
407/// Parses a [`Date`] from `s`.
408pub fn parse_date(s: &str) -> Result<Date, ParseError> {
409    match parse_timestamp_string(s) {
410        Ok((date, _, _)) => Date::try_from(date).map_err(|_| ParseError::out_of_range("date", s)),
411        Err(e) => Err(ParseError::invalid_input_syntax("date", s).with_details(e)),
412    }
413}
414
415/// Writes a [`Date`] to `buf`.
416pub fn format_date<F>(buf: &mut F, d: Date) -> Nestable
417where
418    F: FormatBuffer,
419{
420    let d: NaiveDate = d.into();
421    let (year_ad, year) = d.year_ce();
422    write!(buf, "{:04}-{}", year, d.format("%m-%d"));
423    if !year_ad {
424        write!(buf, " BC");
425    }
426    Nestable::Yes
427}
428
429/// Parses a `NaiveTime` from `s`, using the following grammar.
430///
431/// ```text
432/// <time value> ::=
433///     <hours value> <colon> <minutes value> <colon> <seconds integer value>
434///     [ <period> [ <seconds fraction> ] ]
435/// ```
436pub fn parse_time(s: &str) -> Result<NaiveTime, ParseError> {
437    ParsedDateTime::build_parsed_datetime_time(s)
438        .and_then(|pdt| pdt.compute_time())
439        .map_err(|e| ParseError::invalid_input_syntax("time", s).with_details(e))
440}
441
442/// Writes a [`NaiveDateTime`] timestamp to `buf`.
443pub fn format_time<F>(buf: &mut F, t: NaiveTime) -> Nestable
444where
445    F: FormatBuffer,
446{
447    write!(buf, "{}", t.format("%H:%M:%S"));
448    format_nanos_to_micros(buf, t.nanosecond());
449    Nestable::Yes
450}
451
452/// Parses a `NaiveDateTime` from `s`.
453pub fn parse_timestamp(s: &str) -> Result<CheckedTimestamp<NaiveDateTime>, ParseError> {
454    match parse_timestamp_string(s) {
455        Ok((date, time, _)) => CheckedTimestamp::from_timestamplike(date.and_time(time))
456            .map_err(|_| ParseError::out_of_range("timestamp", s)),
457        Err(e) => Err(ParseError::invalid_input_syntax("timestamp", s).with_details(e)),
458    }
459}
460
461/// Writes a [`NaiveDateTime`] timestamp to `buf`.
462pub fn format_timestamp<F>(buf: &mut F, ts: &NaiveDateTime) -> Nestable
463where
464    F: FormatBuffer,
465{
466    let (year_ad, year) = ts.year_ce();
467    write!(buf, "{:04}-{}", year, ts.format("%m-%d %H:%M:%S"));
468    format_nanos_to_micros(buf, ts.and_utc().timestamp_subsec_nanos());
469    if !year_ad {
470        write!(buf, " BC");
471    }
472    // This always needs escaping because of the whitespace
473    Nestable::MayNeedEscaping
474}
475
476/// Parses a `DateTime<Utc>` from `s`. See `mz_expr::scalar::func::timezone_timestamp` for timezone anomaly considerations.
477pub fn parse_timestamptz(s: &str) -> Result<CheckedTimestamp<DateTime<Utc>>, ParseError> {
478    parse_timestamp_string(s)
479        .and_then(|(date, time, timezone)| {
480            use Timezone::*;
481            let mut dt = date.and_time(time);
482            let offset = match timezone {
483                FixedOffset(offset) => offset,
484                Tz(tz) => match tz.offset_from_local_datetime(&dt).latest() {
485                    Some(offset) => offset.fix(),
486                    None => {
487                        dt += Duration::try_hours(1).unwrap();
488                        tz.offset_from_local_datetime(&dt)
489                            .latest()
490                            .ok_or_else(|| "invalid timezone conversion".to_owned())?
491                            .fix()
492                    }
493                },
494            };
495            Ok(DateTime::from_naive_utc_and_offset(dt - offset, Utc))
496        })
497        .map_err(|e| {
498            ParseError::invalid_input_syntax("timestamp with time zone", s).with_details(e)
499        })
500        .and_then(|ts| {
501            CheckedTimestamp::from_timestamplike(ts)
502                .map_err(|_| ParseError::out_of_range("timestamp with time zone", s))
503        })
504}
505
506/// Writes a [`DateTime<Utc>`] timestamp to `buf`.
507pub fn format_timestamptz<F>(buf: &mut F, ts: &DateTime<Utc>) -> Nestable
508where
509    F: FormatBuffer,
510{
511    let (year_ad, year) = ts.year_ce();
512    write!(buf, "{:04}-{}", year, ts.format("%m-%d %H:%M:%S"));
513    format_nanos_to_micros(buf, ts.timestamp_subsec_nanos());
514    write!(buf, "+00");
515    if !year_ad {
516        write!(buf, " BC");
517    }
518    // This always needs escaping because of the whitespace
519    Nestable::MayNeedEscaping
520}
521
522/// parse
523///
524/// ```text
525/// <unquoted interval string> ::=
526///   [ <sign> ] { <year-month literal> | <day-time literal> }
527/// <year-month literal> ::=
528///     <years value> [ <minus sign> <months value> ]
529///   | <months value>
530/// <day-time literal> ::=
531///     <day-time interval>
532///   | <time interval>
533/// <day-time interval> ::=
534///   <days value> [ <space> <hours value> [ <colon> <minutes value>
535///       [ <colon> <seconds value> ] ] ]
536/// <time interval> ::=
537///     <hours value> [ <colon> <minutes value> [ <colon> <seconds value> ] ]
538///   | <minutes value> [ <colon> <seconds value> ]
539///   | <seconds value>
540/// ```
541pub fn parse_interval(s: &str) -> Result<Interval, ParseError> {
542    parse_interval_w_disambiguator(s, None, DateTimeField::Second)
543}
544
545/// Parse an interval string, using an optional leading precision for time (H:M:S)
546/// and a specific mz_sql_parser::ast::DateTimeField to identify ambiguous elements.
547/// For more information about this operation, see the documentation on
548/// ParsedDateTime::build_parsed_datetime_interval.
549pub fn parse_interval_w_disambiguator(
550    s: &str,
551    leading_time_precision: Option<DateTimeField>,
552    d: DateTimeField,
553) -> Result<Interval, ParseError> {
554    ParsedDateTime::build_parsed_datetime_interval(s, leading_time_precision, d)
555        .and_then(|pdt| pdt.compute_interval())
556        .map_err(|e| ParseError::invalid_input_syntax("interval", s).with_details(e))
557}
558
559pub fn format_interval<F>(buf: &mut F, iv: Interval) -> Nestable
560where
561    F: FormatBuffer,
562{
563    write!(buf, "{}", iv);
564    Nestable::MayNeedEscaping
565}
566
567pub fn parse_numeric(s: &str) -> Result<OrderedDecimal<Numeric>, ParseError> {
568    let mut cx = numeric::cx_datum();
569    let mut n = match cx.parse(s.trim()) {
570        Ok(n) => n,
571        Err(..) => {
572            return Err(ParseError::invalid_input_syntax("numeric", s));
573        }
574    };
575
576    let cx_status = cx.status();
577
578    // Check for values that can only be generated by invalid syntax.
579    if (n.is_infinite() && !cx_status.overflow())
580        || (n.is_nan() && n.is_negative())
581        || n.is_signaling_nan()
582    {
583        return Err(ParseError::invalid_input_syntax("numeric", s));
584    }
585
586    // Process value; only errors if value is out of range of numeric's max precision.
587    let out_of_range = numeric::munge_numeric(&mut n).is_err();
588
589    if cx_status.overflow() || cx_status.subnormal() || out_of_range {
590        Err(ParseError::out_of_range("numeric", s).with_details(format!(
591            "exceeds maximum precision {}",
592            NUMERIC_DATUM_MAX_PRECISION
593        )))
594    } else {
595        Ok(OrderedDecimal(n))
596    }
597}
598
599pub fn format_numeric<F>(buf: &mut F, n: &OrderedDecimal<Numeric>) -> Nestable
600where
601    F: FormatBuffer,
602{
603    write!(buf, "{}", n.0.to_standard_notation_string());
604    Nestable::Yes
605}
606
607pub fn format_string<F>(buf: &mut F, s: &str) -> Nestable
608where
609    F: FormatBuffer,
610{
611    buf.write_str(s);
612    Nestable::MayNeedEscaping
613}
614
615pub fn parse_pg_legacy_name(s: &str) -> String {
616    // To match PostgreSQL, we truncate the string to 64 bytes, while being
617    // careful not to truncate in the middle of any multibyte characters.
618    let mut out = String::new();
619    let mut len = 0;
620    for c in s.chars() {
621        len += c.len_utf8();
622        if len > NAME_MAX_BYTES {
623            break;
624        }
625        out.push(c);
626    }
627    out
628}
629
630pub fn parse_bytes(s: &str) -> Result<Vec<u8>, ParseError> {
631    // If the input starts with "\x", then the remaining bytes are hex encoded
632    // [0]. Otherwise the bytes use the traditional "escape" format. [1]
633    //
634    // [0]: https://www.postgresql.org/docs/current/datatype-binary.html#id-1.5.7.12.9
635    // [1]: https://www.postgresql.org/docs/current/datatype-binary.html#id-1.5.7.12.10
636    if let Some(remainder) = s.strip_prefix(r"\x") {
637        parse_bytes_hex(remainder).map_err(|e| {
638            ParseError::invalid_input_syntax("bytea", s).with_details(e.to_string_with_causes())
639        })
640    } else {
641        parse_bytes_traditional(s)
642    }
643}
644
645pub fn parse_bytes_hex(s: &str) -> Result<Vec<u8>, ParseHexError> {
646    // Can't use `hex::decode` here, as it doesn't tolerate whitespace
647    // between encoded bytes.
648
649    let decode_nibble = |b| match b {
650        b'a'..=b'f' => Ok(b - b'a' + 10),
651        b'A'..=b'F' => Ok(b - b'A' + 10),
652        b'0'..=b'9' => Ok(b - b'0'),
653        _ => Err(ParseHexError::InvalidHexDigit(char::from(b))),
654    };
655
656    let mut buf = vec![];
657    let mut nibbles = s.as_bytes().iter().copied();
658    while let Some(n) = nibbles.next() {
659        if let b' ' | b'\n' | b'\t' | b'\r' = n {
660            continue;
661        }
662        let n = decode_nibble(n)?;
663        let n2 = match nibbles.next() {
664            None => return Err(ParseHexError::OddLength),
665            Some(n2) => decode_nibble(n2)?,
666        };
667        buf.push((n << 4) | n2);
668    }
669    Ok(buf)
670}
671
672pub fn parse_bytes_traditional(s: &str) -> Result<Vec<u8>, ParseError> {
673    // Bytes are interpreted literally, save for the special escape sequences
674    // "\\", which represents a single backslash, and "\NNN", where each N
675    // is an octal digit, which represents the byte whose octal value is NNN.
676    let mut out = Vec::new();
677    let mut bytes = s.as_bytes().iter().fuse();
678    while let Some(&b) = bytes.next() {
679        if b != b'\\' {
680            out.push(b);
681            continue;
682        }
683        match bytes.next() {
684            None => {
685                return Err(ParseError::invalid_input_syntax("bytea", s)
686                    .with_details("ends with escape character"));
687            }
688            Some(b'\\') => out.push(b'\\'),
689            b => match (b, bytes.next(), bytes.next()) {
690                (Some(d2 @ b'0'..=b'3'), Some(d1 @ b'0'..=b'7'), Some(d0 @ b'0'..=b'7')) => {
691                    out.push(((d2 - b'0') << 6) + ((d1 - b'0') << 3) + (d0 - b'0'));
692                }
693                _ => {
694                    return Err(ParseError::invalid_input_syntax("bytea", s)
695                        .with_details("invalid escape sequence"));
696                }
697            },
698        }
699    }
700    Ok(out)
701}
702
703pub fn format_bytes<F>(buf: &mut F, bytes: &[u8]) -> Nestable
704where
705    F: FormatBuffer,
706{
707    write!(buf, "\\x{}", hex::encode(bytes));
708    Nestable::MayNeedEscaping
709}
710
711pub fn parse_jsonb(s: &str) -> Result<Jsonb, ParseError> {
712    s.trim()
713        .parse()
714        .map_err(|e| ParseError::invalid_input_syntax("jsonb", s).with_details(e))
715}
716
717pub fn format_jsonb<F>(buf: &mut F, jsonb: JsonbRef) -> Nestable
718where
719    F: FormatBuffer,
720{
721    write!(buf, "{}", jsonb);
722    Nestable::MayNeedEscaping
723}
724
725pub fn format_jsonb_pretty<F>(buf: &mut F, jsonb: JsonbRef)
726where
727    F: FormatBuffer,
728{
729    write!(buf, "{:#}", jsonb)
730}
731
732pub fn parse_uuid(s: &str) -> Result<Uuid, ParseError> {
733    s.trim()
734        .parse()
735        .map_err(|e| ParseError::invalid_input_syntax("uuid", s).with_details(e))
736}
737
738pub fn format_uuid<F>(buf: &mut F, uuid: Uuid) -> Nestable
739where
740    F: FormatBuffer,
741{
742    write!(buf, "{}", uuid);
743    Nestable::Yes
744}
745
746fn format_nanos_to_micros<F>(buf: &mut F, nanos: u32)
747where
748    F: FormatBuffer,
749{
750    if nanos >= 500 {
751        let mut micros = nanos / 1000;
752        let rem = nanos % 1000;
753        if rem >= 500 {
754            micros += 1;
755        }
756        // strip trailing zeros
757        let mut width = 6;
758        while micros % 10 == 0 {
759            width -= 1;
760            micros /= 10;
761        }
762        write!(buf, ".{:0width$}", micros, width = width);
763    }
764}
765
766#[derive(Debug, thiserror::Error)]
767enum ArrayParsingError {
768    #[error("Array value must start with \"{{\"")]
769    OpeningBraceMissing,
770    #[error("Specifying array lower bounds is not supported")]
771    DimsUnsupported,
772    #[error("{0}")]
773    Generic(String),
774    #[error("Unexpected \"{0}\" character.")]
775    UnexpectedChar(char),
776    #[error("Multidimensional arrays must have sub-arrays with matching dimensions.")]
777    NonRectilinearDims,
778    #[error("Unexpected array element.")]
779    UnexpectedElement,
780    #[error("Junk after closing right brace.")]
781    Junk,
782    #[error("Unexpected end of input.")]
783    EarlyTerm,
784}
785
786impl From<String> for ArrayParsingError {
787    fn from(value: String) -> Self {
788        ArrayParsingError::Generic(value)
789    }
790}
791
792pub fn parse_array<'a, T, E>(
793    s: &'a str,
794    make_null: impl FnMut() -> T,
795    gen_elem: impl FnMut(Cow<'a, str>) -> Result<T, E>,
796) -> Result<(Vec<T>, Vec<ArrayDimension>), ParseError>
797where
798    E: ToString,
799{
800    parse_array_inner(s, make_null, gen_elem)
801        .map_err(|details| ParseError::invalid_input_syntax("array", s).with_details(details))
802}
803
804fn parse_array_inner<'a, T, E>(
805    s: &'a str,
806    mut make_null: impl FnMut() -> T,
807    mut gen_elem: impl FnMut(Cow<'a, str>) -> Result<T, E>,
808) -> Result<(Vec<T>, Vec<ArrayDimension>), ArrayParsingError>
809where
810    E: ToString,
811{
812    use ArrayParsingError::*;
813
814    #[derive(Clone, Debug, Default)]
815    struct Dimension {
816        // If None, still discovering this dimension's permitted width;
817        // otherwise only permits `length` elements per dimension.
818        length: Option<usize>,
819        // Whether this dimension has a staged element that can be committed.
820        // This prevents us from accepting "empty" elements, e.g. `{1,}` or
821        // `{1,,2}`.
822        staged_element: bool,
823        // The total number of elements committed in this dimension since it was
824        // last entered. Zeroed out when exited.
825        committed_element_count: usize,
826    }
827
828    #[derive(Clone, Debug, Default)]
829    struct ArrayBuilder<'a> {
830        // The current character we're operating from.
831        current_command_char: char,
832        // The dimension information, which will get turned into
833        // `ArrayDimensions`.
834        dimensions: Vec<Dimension>,
835        // THe current dimension we're operating on.
836        current_dim: usize,
837        // Whether or not this array may be modified any further.
838        sealed: bool,
839        // The elements extracted from the input str. This is on the array
840        // builder to necessitate using `insert_element` so we understand when
841        // elements are staged.
842        elements: Vec<Option<Cow<'a, str>>>,
843    }
844
845    impl<'a> ArrayBuilder<'a> {
846        fn build(
847            s: &'a str,
848        ) -> Result<(Vec<Option<Cow<'a, str>>>, Vec<ArrayDimension>), ArrayParsingError> {
849            let buf = &mut LexBuf::new(s);
850
851            // TODO: support parsing array dimensions
852            if buf.consume('[') {
853                Err(DimsUnsupported)?;
854            }
855
856            buf.take_while(|ch| ch.is_ascii_whitespace());
857
858            if !buf.consume('{') {
859                Err(OpeningBraceMissing)?;
860            }
861
862            let mut dimensions = 1;
863
864            loop {
865                buf.take_while(|ch| ch.is_ascii_whitespace());
866                if buf.consume('{') {
867                    dimensions += 1;
868                } else {
869                    break;
870                }
871            }
872
873            let mut builder = ArrayBuilder {
874                current_command_char: '{',
875                dimensions: vec![Dimension::default(); dimensions],
876                // We enter the builder at the element-bearing dimension, which is the last
877                // dimension.
878                current_dim: dimensions - 1,
879                sealed: false,
880                elements: vec![],
881            };
882
883            let is_special_char = |c| matches!(c, '{' | '}' | ',' | '\\' | '"');
884            let is_end_of_literal = |c| matches!(c, ',' | '}');
885
886            loop {
887                buf.take_while(|ch| ch.is_ascii_whitespace());
888
889                // Filter command state from terminal states.
890                match buf.next() {
891                    None if builder.sealed => {
892                        break;
893                    }
894                    None => Err(EarlyTerm)?,
895                    Some(_) if builder.sealed => Err(Junk)?,
896                    Some(c) => builder.current_command_char = c,
897                }
898
899                // Run command char
900                match builder.current_command_char {
901                    '{' => builder.enter_dim()?,
902                    '}' => builder.exit_dim()?,
903                    ',' => builder.commit_element(true)?,
904                    c => {
905                        buf.prev();
906                        let s = match c {
907                            '"' => Some(lex_quoted_element(buf)?),
908                            _ => lex_unquoted_element(buf, is_special_char, is_end_of_literal)?,
909                        };
910                        builder.insert_element(s)?;
911                    }
912                }
913            }
914
915            if builder.elements.is_empty() {
916                // Per PG, empty arrays are represented by empty dimensions
917                // rather than one dimension with 0 length.
918                return Ok((vec![], vec![]));
919            }
920
921            let dims = builder
922                .dimensions
923                .into_iter()
924                .map(|dim| ArrayDimension {
925                    length: dim
926                        .length
927                        .expect("every dimension must have its length discovered"),
928                    lower_bound: 1,
929                })
930                .collect();
931
932            Ok((builder.elements, dims))
933        }
934
935        /// Descend into another dimension of the array.
936        fn enter_dim(&mut self) -> Result<(), ArrayParsingError> {
937            let d = &mut self.dimensions[self.current_dim];
938            // Cannot enter a new dimension with an uncommitted element.
939            if d.staged_element {
940                return Err(UnexpectedChar(self.current_command_char));
941            }
942
943            self.current_dim += 1;
944
945            // You have exceeded the maximum dimensions.
946            if self.current_dim >= self.dimensions.len() {
947                return Err(NonRectilinearDims);
948            }
949
950            Ok(())
951        }
952
953        /// Insert a new element into the array, ensuring it is in the proper dimension.
954        fn insert_element(&mut self, s: Option<Cow<'a, str>>) -> Result<(), ArrayParsingError> {
955            // Can only insert elements into data-bearing dimension, which is
956            // the last one.
957            if self.current_dim != self.dimensions.len() - 1 {
958                return Err(UnexpectedElement);
959            }
960
961            self.stage_element()?;
962
963            self.elements.push(s);
964
965            Ok(())
966        }
967
968        /// Stage an element to be committed. Only one element can be staged at
969        /// a time and staged elements must be committed before moving onto the
970        /// next element or leaving the dimension.
971        fn stage_element(&mut self) -> Result<(), ArrayParsingError> {
972            let d = &mut self.dimensions[self.current_dim];
973            // Cannot stage two elements at once, i.e. previous element wasn't
974            // followed by committing token (`,` or `}`).
975            if d.staged_element {
976                return Err(UnexpectedElement);
977            }
978            d.staged_element = true;
979            Ok(())
980        }
981
982        /// Commit the currently staged element, which can be made optional.
983        /// This ensures that each element has an appropriate terminal character
984        /// after it.
985        fn commit_element(&mut self, require_staged: bool) -> Result<(), ArrayParsingError> {
986            let d = &mut self.dimensions[self.current_dim];
987            if !d.staged_element {
988                // - , requires a preceding staged element
989                // - } does not require a preceding staged element only when
990                //   it's the close of an empty dimension.
991                return if require_staged || d.committed_element_count > 0 {
992                    Err(UnexpectedChar(self.current_command_char))
993                } else {
994                    // This indicates that we have an empty value in this
995                    // dimension and want to exit before incrementing the
996                    // committed element count.
997                    Ok(())
998                };
999            }
1000            d.staged_element = false;
1001            d.committed_element_count += 1;
1002
1003            Ok(())
1004        }
1005
1006        /// Exit the current dimension, committing any currently staged element
1007        /// in this dimension, and marking the interior array that this is part
1008        /// of as staged itself. If this is the 0th dimension, i.e. the closed
1009        /// brace matching the first open brace, seal the builder from further
1010        /// modification.
1011        fn exit_dim(&mut self) -> Result<(), ArrayParsingError> {
1012            // Commit an element of this dimension
1013            self.commit_element(false)?;
1014
1015            let d = &mut self.dimensions[self.current_dim];
1016
1017            // Ensure that the elements in this dimension conform to the expected shape.
1018            match d.length {
1019                None => d.length = Some(d.committed_element_count),
1020                Some(l) => {
1021                    if l != d.committed_element_count {
1022                        return Err(NonRectilinearDims);
1023                    }
1024                }
1025            }
1026
1027            // Reset this dimension's counter in case it's re-entered.
1028            d.committed_element_count = 0;
1029
1030            // If we closed the last dimension, this array may not be modified
1031            // any longer.
1032            if self.current_dim == 0 {
1033                self.sealed = true;
1034            } else {
1035                self.current_dim -= 1;
1036                // This object is an element of a higher dimension.
1037                self.stage_element()?;
1038            }
1039
1040            Ok(())
1041        }
1042    }
1043
1044    let (raw_elems, dims) = ArrayBuilder::build(s)?;
1045
1046    let mut elems = Vec::with_capacity(raw_elems.len());
1047
1048    let mut generated = |elem| gen_elem(elem).map_err(|e| e.to_string());
1049
1050    for elem in raw_elems.into_iter() {
1051        elems.push(match elem {
1052            Some(elem) => generated(elem)?,
1053            None => make_null(),
1054        });
1055    }
1056
1057    Ok((elems, dims))
1058}
1059
1060pub fn parse_list<'a, T, E>(
1061    s: &'a str,
1062    is_element_type_list: bool,
1063    make_null: impl FnMut() -> T,
1064    gen_elem: impl FnMut(Cow<'a, str>) -> Result<T, E>,
1065) -> Result<Vec<T>, ParseError>
1066where
1067    E: ToString,
1068{
1069    parse_list_inner(s, is_element_type_list, make_null, gen_elem)
1070        .map_err(|details| ParseError::invalid_input_syntax("list", s).with_details(details))
1071}
1072
1073// `parse_list_inner`'s separation from `parse_list` simplifies error handling
1074// by allowing subprocedures to return `String` errors.
1075fn parse_list_inner<'a, T, E>(
1076    s: &'a str,
1077    is_element_type_list: bool,
1078    mut make_null: impl FnMut() -> T,
1079    mut gen_elem: impl FnMut(Cow<'a, str>) -> Result<T, E>,
1080) -> Result<Vec<T>, String>
1081where
1082    E: ToString,
1083{
1084    let mut elems = vec![];
1085    let buf = &mut LexBuf::new(s);
1086
1087    // Consume opening paren.
1088    if !buf.consume('{') {
1089        bail!(
1090            "expected '{{', found {}",
1091            match buf.next() {
1092                Some(c) => format!("{}", c),
1093                None => "empty string".to_string(),
1094            }
1095        )
1096    }
1097
1098    // Simplifies calls to `gen_elem` by handling errors
1099    let mut generated = |elem| gen_elem(elem).map_err(|e| e.to_string());
1100    let is_special_char = |c| matches!(c, '{' | '}' | ',' | '\\' | '"');
1101    let is_end_of_literal = |c| matches!(c, ',' | '}');
1102
1103    // Consume elements.
1104    loop {
1105        buf.take_while(|ch| ch.is_ascii_whitespace());
1106        // Check for terminals.
1107        match buf.next() {
1108            Some('}') => {
1109                break;
1110            }
1111            _ if elems.len() == 0 => {
1112                buf.prev();
1113            }
1114            Some(',') => {}
1115            Some(c) => bail!("expected ',' or '}}', got '{}'", c),
1116            None => bail!("unexpected end of input"),
1117        }
1118
1119        buf.take_while(|ch| ch.is_ascii_whitespace());
1120        // Get elements.
1121        let elem = match buf.peek() {
1122            Some('"') => generated(lex_quoted_element(buf)?)?,
1123            Some('{') => {
1124                if !is_element_type_list {
1125                    bail!(
1126                        "unescaped '{{' at beginning of element; perhaps you \
1127                        want a nested list, e.g. '{{a}}'::text list list"
1128                    )
1129                }
1130                generated(lex_embedded_element(buf)?)?
1131            }
1132            Some(_) => match lex_unquoted_element(buf, is_special_char, is_end_of_literal)? {
1133                Some(elem) => generated(elem)?,
1134                None => make_null(),
1135            },
1136            None => bail!("unexpected end of input"),
1137        };
1138        elems.push(elem);
1139    }
1140
1141    buf.take_while(|ch| ch.is_ascii_whitespace());
1142    if let Some(c) = buf.next() {
1143        bail!(
1144            "malformed array literal; contains '{}' after terminal '}}'",
1145            c
1146        )
1147    }
1148
1149    Ok(elems)
1150}
1151
1152pub fn parse_legacy_vector<'a, T, E>(
1153    s: &'a str,
1154    gen_elem: impl FnMut(Cow<'a, str>) -> Result<T, E>,
1155) -> Result<Vec<T>, ParseError>
1156where
1157    E: ToString,
1158{
1159    parse_legacy_vector_inner(s, gen_elem)
1160        .map_err(|details| ParseError::invalid_input_syntax("int2vector", s).with_details(details))
1161}
1162
1163pub fn parse_legacy_vector_inner<'a, T, E>(
1164    s: &'a str,
1165    mut gen_elem: impl FnMut(Cow<'a, str>) -> Result<T, E>,
1166) -> Result<Vec<T>, String>
1167where
1168    E: ToString,
1169{
1170    let mut elems = vec![];
1171    let buf = &mut LexBuf::new(s);
1172
1173    let mut generated = |elem| gen_elem(elem).map_err(|e| e.to_string());
1174
1175    loop {
1176        buf.take_while(|ch| ch.is_ascii_whitespace());
1177        match buf.peek() {
1178            Some(_) => {
1179                let elem = buf.take_while(|ch| !ch.is_ascii_whitespace());
1180                elems.push(generated(elem.into())?);
1181            }
1182            None => break,
1183        }
1184    }
1185
1186    Ok(elems)
1187}
1188
1189fn lex_quoted_element<'a>(buf: &mut LexBuf<'a>) -> Result<Cow<'a, str>, String> {
1190    assert!(buf.consume('"'));
1191    let s = buf.take_while(|ch| !matches!(ch, '"' | '\\'));
1192
1193    // `Cow::Borrowed` optimization for quoted strings without escapes
1194    if let Some('"') = buf.peek() {
1195        buf.next();
1196        return Ok(s.into());
1197    }
1198
1199    let mut s = s.to_string();
1200    loop {
1201        match buf.next() {
1202            Some('\\') => match buf.next() {
1203                Some(c) => s.push(c),
1204                None => bail!("unterminated quoted string"),
1205            },
1206            Some('"') => break,
1207            Some(c) => s.push(c),
1208            None => bail!("unterminated quoted string"),
1209        }
1210    }
1211    Ok(s.into())
1212}
1213
1214fn lex_embedded_element<'a>(buf: &mut LexBuf<'a>) -> Result<Cow<'a, str>, String> {
1215    let pos = buf.pos();
1216    assert!(matches!(buf.next(), Some('{')));
1217    let mut depth = 1;
1218    let mut in_escape = false;
1219    while depth > 0 {
1220        match buf.next() {
1221            Some('\\') => {
1222                buf.next(); // Next character is escaped, so ignore it
1223            }
1224            Some('"') => in_escape = !in_escape, // Begin or end escape
1225            Some('{') if !in_escape => depth += 1,
1226            Some('}') if !in_escape => depth -= 1,
1227            Some(_) => (),
1228            None => bail!("unterminated embedded element"),
1229        }
1230    }
1231    let s = &buf.inner()[pos..buf.pos()];
1232    Ok(Cow::Borrowed(s))
1233}
1234
1235// Result of `None` indicates element is NULL.
1236fn lex_unquoted_element<'a>(
1237    buf: &mut LexBuf<'a>,
1238    is_special_char: impl Fn(char) -> bool,
1239    is_end_of_literal: impl Fn(char) -> bool,
1240) -> Result<Option<Cow<'a, str>>, String> {
1241    // first char is guaranteed to be non-whitespace
1242    assert!(!buf.peek().unwrap().is_ascii_whitespace());
1243
1244    let s = buf.take_while(|ch| !is_special_char(ch) && !ch.is_ascii_whitespace());
1245
1246    // `Cow::Borrowed` optimization for elements without special characters.
1247    match buf.peek() {
1248        Some(',') | Some('}') if !s.is_empty() => {
1249            return Ok(if s.to_uppercase() == "NULL" {
1250                None
1251            } else {
1252                Some(s.into())
1253            });
1254        }
1255        _ => {}
1256    }
1257
1258    // Track whether there are any escaped characters to determine if the string
1259    // "NULL" should be treated as a NULL, or if it had any escaped characters
1260    // and should be treated as the string "NULL".
1261    let mut escaped_char = false;
1262
1263    let mut s = s.to_string();
1264    // As we go, we keep track of where to truncate to in order to remove any
1265    // trailing whitespace.
1266    let mut trimmed_len = s.len();
1267    loop {
1268        match buf.next() {
1269            Some('\\') => match buf.next() {
1270                Some(c) => {
1271                    escaped_char = true;
1272                    s.push(c);
1273                    trimmed_len = s.len();
1274                }
1275                None => return Err("unterminated element".into()),
1276            },
1277            Some(c) if is_end_of_literal(c) => {
1278                // End of literal characters as the first character indicates
1279                // a missing element definition.
1280                if s.is_empty() {
1281                    bail!("malformed literal; missing element")
1282                }
1283                buf.prev();
1284                break;
1285            }
1286            Some(c) if is_special_char(c) => {
1287                bail!("malformed literal; must escape special character '{}'", c)
1288            }
1289            Some(c) => {
1290                s.push(c);
1291                if !c.is_ascii_whitespace() {
1292                    trimmed_len = s.len();
1293                }
1294            }
1295            None => bail!("unterminated element"),
1296        }
1297    }
1298    s.truncate(trimmed_len);
1299    Ok(if s.to_uppercase() == "NULL" && !escaped_char {
1300        None
1301    } else {
1302        Some(Cow::Owned(s))
1303    })
1304}
1305
1306pub fn parse_map<'a, V, E>(
1307    s: &'a str,
1308    is_value_type_map: bool,
1309    gen_elem: impl FnMut(Option<Cow<'a, str>>) -> Result<V, E>,
1310) -> Result<BTreeMap<String, V>, ParseError>
1311where
1312    E: ToString,
1313{
1314    parse_map_inner(s, is_value_type_map, gen_elem)
1315        .map_err(|details| ParseError::invalid_input_syntax("map", s).with_details(details))
1316}
1317
1318fn parse_map_inner<'a, V, E>(
1319    s: &'a str,
1320    is_value_type_map: bool,
1321    mut gen_elem: impl FnMut(Option<Cow<'a, str>>) -> Result<V, E>,
1322) -> Result<BTreeMap<String, V>, String>
1323where
1324    E: ToString,
1325{
1326    let mut map = BTreeMap::new();
1327    let buf = &mut LexBuf::new(s);
1328
1329    // Consume opening paren.
1330    if !buf.consume('{') {
1331        bail!(
1332            "expected '{{', found {}",
1333            match buf.next() {
1334                Some(c) => format!("{}", c),
1335                None => "empty string".to_string(),
1336            }
1337        )
1338    }
1339
1340    // Simplifies calls to generators by handling errors
1341    let gen_key = |key: Option<Cow<'a, str>>| -> Result<String, String> {
1342        match key {
1343            Some(Cow::Owned(s)) => Ok(s),
1344            Some(Cow::Borrowed(s)) => Ok(s.to_owned()),
1345            None => Err("expected key".to_owned()),
1346        }
1347    };
1348    let mut gen_value = |elem| gen_elem(elem).map_err(|e| e.to_string());
1349    let is_special_char = |c| matches!(c, '{' | '}' | ',' | '"' | '=' | '>' | '\\');
1350    let is_end_of_literal = |c| matches!(c, ',' | '}' | '=');
1351
1352    loop {
1353        // Check for terminals.
1354        buf.take_while(|ch| ch.is_ascii_whitespace());
1355        match buf.next() {
1356            Some('}') => break,
1357            _ if map.len() == 0 => {
1358                buf.prev();
1359            }
1360            Some(',') => {}
1361            Some(c) => bail!("expected ',' or end of input, got '{}'", c),
1362            None => bail!("unexpected end of input"),
1363        }
1364
1365        // Get key.
1366        buf.take_while(|ch| ch.is_ascii_whitespace());
1367        let key = match buf.peek() {
1368            Some('"') => Some(lex_quoted_element(buf)?),
1369            Some(_) => lex_unquoted_element(buf, is_special_char, is_end_of_literal)?,
1370            None => bail!("unexpected end of input"),
1371        };
1372        let key = gen_key(key)?;
1373
1374        // Assert mapping arrow (=>) is present.
1375        buf.take_while(|ch| ch.is_ascii_whitespace());
1376        if !buf.consume('=') || !buf.consume('>') {
1377            bail!("expected =>")
1378        }
1379
1380        // Get value.
1381        buf.take_while(|ch| ch.is_ascii_whitespace());
1382        let value = match buf.peek() {
1383            Some('"') => Some(lex_quoted_element(buf)?),
1384            Some('{') => {
1385                if !is_value_type_map {
1386                    bail!(
1387                        "unescaped '{{' at beginning of value; perhaps you \
1388                           want a nested map, e.g. '{{a=>{{a=>1}}}}'::map[text=>map[text=>int]]"
1389                    )
1390                }
1391                Some(lex_embedded_element(buf)?)
1392            }
1393            Some(_) => lex_unquoted_element(buf, is_special_char, is_end_of_literal)?,
1394            None => bail!("unexpected end of input"),
1395        };
1396        let value = gen_value(value)?;
1397
1398        // Insert elements.
1399        map.insert(key, value);
1400    }
1401    Ok(map)
1402}
1403
1404pub fn format_map<F, T, E>(
1405    buf: &mut F,
1406    elems: impl IntoIterator<Item = (impl AsRef<str>, T)>,
1407    mut format_elem: impl FnMut(MapValueWriter<F>, T) -> Result<Nestable, E>,
1408) -> Result<Nestable, E>
1409where
1410    F: FormatBuffer,
1411{
1412    buf.write_char('{');
1413    let mut elems = elems.into_iter().peekable();
1414    while let Some((key, value)) = elems.next() {
1415        // Map key values are always Strings, which always evaluate to
1416        // Nestable::MayNeedEscaping.
1417        let key_start = buf.len();
1418        buf.write_str(key.as_ref());
1419        escape_elem::<_, MapElementEscaper>(buf, key_start);
1420
1421        buf.write_str("=>");
1422
1423        let value_start = buf.len();
1424        if let Nestable::MayNeedEscaping = format_elem(MapValueWriter(buf), value)? {
1425            escape_elem::<_, MapElementEscaper>(buf, value_start);
1426        }
1427
1428        if elems.peek().is_some() {
1429            buf.write_char(',');
1430        }
1431    }
1432    buf.write_char('}');
1433    Ok(Nestable::Yes)
1434}
1435
1436pub fn parse_range<'a, V, E>(
1437    s: &'a str,
1438    gen_elem: impl FnMut(Cow<'a, str>) -> Result<V, E>,
1439) -> Result<Range<V>, ParseError>
1440where
1441    E: ToString,
1442{
1443    Ok(Range {
1444        inner: parse_range_inner(s, gen_elem).map_err(|details| {
1445            ParseError::invalid_input_syntax("range", s).with_details(details)
1446        })?,
1447    })
1448}
1449
1450fn parse_range_inner<'a, V, E>(
1451    s: &'a str,
1452    mut gen_elem: impl FnMut(Cow<'a, str>) -> Result<V, E>,
1453) -> Result<Option<RangeInner<V>>, String>
1454where
1455    E: ToString,
1456{
1457    let buf = &mut LexBuf::new(s);
1458
1459    buf.take_while(|ch| ch.is_ascii_whitespace());
1460
1461    if buf.consume_str("empty") {
1462        buf.take_while(|ch| ch.is_ascii_whitespace());
1463        if buf.next().is_none() {
1464            return Ok(None);
1465        } else {
1466            bail!("Junk after \"empty\" key word.")
1467        }
1468    }
1469
1470    let lower_inclusive = match buf.next() {
1471        Some('[') => true,
1472        Some('(') => false,
1473        _ => bail!("Missing left parenthesis or bracket."),
1474    };
1475
1476    let lower_bound = match buf.peek() {
1477        Some(',') => None,
1478        Some(_) => {
1479            let v = buf.take_while(|c| !matches!(c, ','));
1480            let v = gen_elem(Cow::from(v)).map_err(|e| e.to_string())?;
1481            Some(v)
1482        }
1483        None => bail!("Unexpected end of input."),
1484    };
1485
1486    buf.take_while(|ch| ch.is_ascii_whitespace());
1487
1488    if buf.next() != Some(',') {
1489        bail!("Missing comma after lower bound.")
1490    }
1491
1492    let upper_bound = match buf.peek() {
1493        Some(']' | ')') => None,
1494        Some(_) => {
1495            let v = buf.take_while(|c| !matches!(c, ')' | ']'));
1496            let v = gen_elem(Cow::from(v)).map_err(|e| e.to_string())?;
1497            Some(v)
1498        }
1499        None => bail!("Unexpected end of input."),
1500    };
1501
1502    let upper_inclusive = match buf.next() {
1503        Some(']') => true,
1504        Some(')') => false,
1505        _ => bail!("Missing left parenthesis or bracket."),
1506    };
1507
1508    buf.take_while(|ch| ch.is_ascii_whitespace());
1509
1510    if buf.next().is_some() {
1511        bail!("Junk after right parenthesis or bracket.")
1512    }
1513
1514    let range = Some(RangeInner {
1515        lower: RangeBound {
1516            inclusive: lower_inclusive,
1517            bound: lower_bound,
1518        },
1519        upper: RangeBound {
1520            inclusive: upper_inclusive,
1521            bound: upper_bound,
1522        },
1523    });
1524
1525    Ok(range)
1526}
1527
1528/// Writes a [`Range`] to `buf`.
1529pub fn format_range<F, V, E>(
1530    buf: &mut F,
1531    r: &Range<V>,
1532    mut format_elem: impl FnMut(RangeElementWriter<F>, Option<&V>) -> Result<Nestable, E>,
1533) -> Result<Nestable, E>
1534where
1535    F: FormatBuffer,
1536{
1537    let range = match &r.inner {
1538        None => {
1539            buf.write_str("empty");
1540            return Ok(Nestable::MayNeedEscaping);
1541        }
1542        Some(i) => i,
1543    };
1544
1545    if range.lower.inclusive {
1546        buf.write_char('[');
1547    } else {
1548        buf.write_char('(');
1549    }
1550
1551    let start = buf.len();
1552    if let Nestable::MayNeedEscaping =
1553        format_elem(RangeElementWriter(buf), range.lower.bound.as_ref())?
1554    {
1555        escape_elem::<_, ListElementEscaper>(buf, start);
1556    }
1557
1558    buf.write_char(',');
1559
1560    let start = buf.len();
1561    if let Nestable::MayNeedEscaping =
1562        format_elem(RangeElementWriter(buf), range.upper.bound.as_ref())?
1563    {
1564        escape_elem::<_, ListElementEscaper>(buf, start);
1565    }
1566
1567    if range.upper.inclusive {
1568        buf.write_char(']');
1569    } else {
1570        buf.write_char(')');
1571    }
1572
1573    Ok(Nestable::MayNeedEscaping)
1574}
1575
1576/// A helper for `format_range` that formats a single record element.
1577#[derive(Debug)]
1578pub struct RangeElementWriter<'a, F>(&'a mut F);
1579
1580impl<'a, F> RangeElementWriter<'a, F>
1581where
1582    F: FormatBuffer,
1583{
1584    /// Marks this record element as null.
1585    pub fn write_null(self) -> Nestable {
1586        // In ranges these "null" values represent infinite bounds, which are
1587        // not represented as values, but rather the absence of a value.
1588        Nestable::Yes
1589    }
1590
1591    /// Returns a [`FormatBuffer`] into which a non-null element can be
1592    /// written.
1593    pub fn nonnull_buffer(self) -> &'a mut F {
1594        self.0
1595    }
1596}
1597
1598pub fn format_array<F, T, E>(
1599    buf: &mut F,
1600    dims: &[ArrayDimension],
1601    elems: impl IntoIterator<Item = T>,
1602    mut format_elem: impl FnMut(ListElementWriter<F>, T) -> Result<Nestable, E>,
1603) -> Result<Nestable, E>
1604where
1605    F: FormatBuffer,
1606{
1607    if dims.iter().any(|dim| dim.lower_bound != 1) {
1608        for d in dims.iter() {
1609            let (lower, upper) = d.dimension_bounds();
1610            write!(buf, "[{}:{}]", lower, upper);
1611        }
1612        buf.write_char('=');
1613    }
1614
1615    format_array_inner(buf, dims, &mut elems.into_iter(), &mut format_elem)?;
1616    Ok(Nestable::Yes)
1617}
1618
1619pub fn format_array_inner<F, T, E>(
1620    buf: &mut F,
1621    dims: &[ArrayDimension],
1622    elems: &mut impl Iterator<Item = T>,
1623    format_elem: &mut impl FnMut(ListElementWriter<F>, T) -> Result<Nestable, E>,
1624) -> Result<(), E>
1625where
1626    F: FormatBuffer,
1627{
1628    if dims.is_empty() {
1629        buf.write_str("{}");
1630        return Ok(());
1631    }
1632
1633    buf.write_char('{');
1634    for j in 0..dims[0].length {
1635        if j > 0 {
1636            buf.write_char(',');
1637        }
1638        if dims.len() == 1 {
1639            let start = buf.len();
1640            let elem = elems.next().unwrap();
1641            if let Nestable::MayNeedEscaping = format_elem(ListElementWriter(buf), elem)? {
1642                escape_elem::<_, ListElementEscaper>(buf, start);
1643            }
1644        } else {
1645            format_array_inner(buf, &dims[1..], elems, format_elem)?;
1646        }
1647    }
1648    buf.write_char('}');
1649
1650    Ok(())
1651}
1652
1653pub fn format_legacy_vector<F, T, E>(
1654    buf: &mut F,
1655    elems: impl IntoIterator<Item = T>,
1656    format_elem: impl FnMut(ListElementWriter<F>, T) -> Result<Nestable, E>,
1657) -> Result<Nestable, E>
1658where
1659    F: FormatBuffer,
1660{
1661    format_elems(buf, elems, format_elem, ' ')?;
1662    Ok(Nestable::MayNeedEscaping)
1663}
1664
1665pub fn format_list<F, T, E>(
1666    buf: &mut F,
1667    elems: impl IntoIterator<Item = T>,
1668    format_elem: impl FnMut(ListElementWriter<F>, T) -> Result<Nestable, E>,
1669) -> Result<Nestable, E>
1670where
1671    F: FormatBuffer,
1672{
1673    buf.write_char('{');
1674    format_elems(buf, elems, format_elem, ',')?;
1675    buf.write_char('}');
1676    Ok(Nestable::Yes)
1677}
1678
1679/// Writes each `elem` into `buf`, separating the elems with `sep`.
1680pub fn format_elems<F, T, E>(
1681    buf: &mut F,
1682    elems: impl IntoIterator<Item = T>,
1683    mut format_elem: impl FnMut(ListElementWriter<F>, T) -> Result<Nestable, E>,
1684    sep: char,
1685) -> Result<(), E>
1686where
1687    F: FormatBuffer,
1688{
1689    let mut elems = elems.into_iter().peekable();
1690    while let Some(elem) = elems.next() {
1691        let start = buf.len();
1692        if let Nestable::MayNeedEscaping = format_elem(ListElementWriter(buf), elem)? {
1693            escape_elem::<_, ListElementEscaper>(buf, start);
1694        }
1695        if elems.peek().is_some() {
1696            buf.write_char(sep)
1697        }
1698    }
1699    Ok(())
1700}
1701
1702/// Writes an `mz_acl_item` to `buf`.
1703pub fn format_mz_acl_item<F>(buf: &mut F, mz_acl_item: MzAclItem) -> Nestable
1704where
1705    F: FormatBuffer,
1706{
1707    write!(buf, "{mz_acl_item}");
1708    Nestable::Yes
1709}
1710
1711/// Parses an MzAclItem from `s`.
1712pub fn parse_mz_acl_item(s: &str) -> Result<MzAclItem, ParseError> {
1713    s.trim()
1714        .parse()
1715        .map_err(|e| ParseError::invalid_input_syntax("mz_aclitem", s).with_details(e))
1716}
1717
1718/// Writes an `acl_item` to `buf`.
1719pub fn format_acl_item<F>(buf: &mut F, acl_item: AclItem) -> Nestable
1720where
1721    F: FormatBuffer,
1722{
1723    write!(buf, "{acl_item}");
1724    Nestable::Yes
1725}
1726
1727/// Parses an AclItem from `s`.
1728pub fn parse_acl_item(s: &str) -> Result<AclItem, ParseError> {
1729    s.trim()
1730        .parse()
1731        .map_err(|e| ParseError::invalid_input_syntax("aclitem", s).with_details(e))
1732}
1733
1734pub trait ElementEscaper {
1735    fn needs_escaping(elem: &[u8]) -> bool;
1736    fn escape_char(c: u8) -> u8;
1737}
1738
1739struct ListElementEscaper;
1740
1741impl ElementEscaper for ListElementEscaper {
1742    fn needs_escaping(elem: &[u8]) -> bool {
1743        elem.is_empty()
1744            || elem == b"NULL"
1745            || elem
1746                .iter()
1747                .any(|c| matches!(c, b'{' | b'}' | b',' | b'"' | b'\\') || c.is_ascii_whitespace())
1748    }
1749
1750    fn escape_char(_: u8) -> u8 {
1751        b'\\'
1752    }
1753}
1754
1755struct MapElementEscaper;
1756
1757impl ElementEscaper for MapElementEscaper {
1758    fn needs_escaping(elem: &[u8]) -> bool {
1759        elem.is_empty()
1760            || elem == b"NULL"
1761            || elem.iter().any(|c| {
1762                matches!(c, b'{' | b'}' | b',' | b'"' | b'=' | b'>' | b'\\')
1763                    || c.is_ascii_whitespace()
1764            })
1765    }
1766
1767    fn escape_char(_: u8) -> u8 {
1768        b'\\'
1769    }
1770}
1771
1772struct RecordElementEscaper;
1773
1774impl ElementEscaper for RecordElementEscaper {
1775    fn needs_escaping(elem: &[u8]) -> bool {
1776        elem.is_empty()
1777            || elem
1778                .iter()
1779                .any(|c| matches!(c, b'(' | b')' | b',' | b'"' | b'\\') || c.is_ascii_whitespace())
1780    }
1781
1782    fn escape_char(c: u8) -> u8 {
1783        if c == b'"' { b'"' } else { b'\\' }
1784    }
1785}
1786
1787/// Escapes a list, record, or map element in place.
1788///
1789/// The element must start at `start` and extend to the end of the buffer. The
1790/// buffer will be resized if escaping is necessary to account for the
1791/// additional escape characters.
1792///
1793/// The `needs_escaping` function is used to determine whether an element needs
1794/// to be escaped. It is provided with the bytes of each element and should
1795/// return whether the element needs to be escaped.
1796fn escape_elem<F, E>(buf: &mut F, start: usize)
1797where
1798    F: FormatBuffer,
1799    E: ElementEscaper,
1800{
1801    let elem = &buf.as_ref()[start..];
1802    if !E::needs_escaping(elem) {
1803        return;
1804    }
1805
1806    // We'll need two extra bytes for the quotes at the start and end of the
1807    // element, plus an extra byte for each quote and backslash.
1808    let extras = 2 + elem.iter().filter(|b| matches!(b, b'"' | b'\\')).count();
1809    let orig_end = buf.len();
1810    let new_end = buf.len() + extras;
1811
1812    // Pad the buffer to the new length. These characters will all be
1813    // overwritten.
1814    //
1815    // NOTE(benesch): we never read these characters, so we could instead use
1816    // uninitialized memory, but that's a level of unsafety I'm currently
1817    // uncomfortable with. The performance gain is negligible anyway.
1818    for _ in 0..extras {
1819        buf.write_char('\0');
1820    }
1821
1822    // SAFETY: inserting ASCII characters before other ASCII characters
1823    // preserves UTF-8 encoding.
1824    let elem = unsafe { buf.as_bytes_mut() };
1825
1826    // Walk the string backwards, writing characters at the new end index while
1827    // reading from the old end index, adding quotes at the beginning and end,
1828    // and adding a backslash before every backslash or quote.
1829    let mut wi = new_end - 1;
1830    elem[wi] = b'"';
1831    wi -= 1;
1832    for ri in (start..orig_end).rev() {
1833        elem[wi] = elem[ri];
1834        wi -= 1;
1835        if let b'\\' | b'"' = elem[ri] {
1836            elem[wi] = E::escape_char(elem[ri]);
1837            wi -= 1;
1838        }
1839    }
1840    elem[wi] = b'"';
1841
1842    assert!(wi == start);
1843}
1844
1845/// A helper for `format_list` that formats a single list element.
1846#[derive(Debug)]
1847pub struct ListElementWriter<'a, F>(&'a mut F);
1848
1849impl<'a, F> ListElementWriter<'a, F>
1850where
1851    F: FormatBuffer,
1852{
1853    /// Marks this list element as null.
1854    pub fn write_null(self) -> Nestable {
1855        self.0.write_str("NULL");
1856        Nestable::Yes
1857    }
1858
1859    /// Returns a [`FormatBuffer`] into which a non-null element can be
1860    /// written.
1861    pub fn nonnull_buffer(self) -> &'a mut F {
1862        self.0
1863    }
1864}
1865
1866/// A helper for `format_map` that formats a single map value.
1867#[derive(Debug)]
1868pub struct MapValueWriter<'a, F>(&'a mut F);
1869
1870impl<'a, F> MapValueWriter<'a, F>
1871where
1872    F: FormatBuffer,
1873{
1874    /// Marks this value element as null.
1875    pub fn write_null(self) -> Nestable {
1876        self.0.write_str("NULL");
1877        Nestable::Yes
1878    }
1879
1880    /// Returns a [`FormatBuffer`] into which a non-null element can be
1881    /// written.
1882    pub fn nonnull_buffer(self) -> &'a mut F {
1883        self.0
1884    }
1885}
1886
1887pub fn format_record<F, T, E>(
1888    buf: &mut F,
1889    elems: impl IntoIterator<Item = T>,
1890    mut format_elem: impl FnMut(RecordElementWriter<F>, T) -> Result<Nestable, E>,
1891) -> Result<Nestable, E>
1892where
1893    F: FormatBuffer,
1894{
1895    buf.write_char('(');
1896    let mut elems = elems.into_iter().peekable();
1897    while let Some(elem) = elems.next() {
1898        let start = buf.len();
1899        if let Nestable::MayNeedEscaping = format_elem(RecordElementWriter(buf), elem)? {
1900            escape_elem::<_, RecordElementEscaper>(buf, start);
1901        }
1902        if elems.peek().is_some() {
1903            buf.write_char(',')
1904        }
1905    }
1906    buf.write_char(')');
1907    Ok(Nestable::MayNeedEscaping)
1908}
1909
1910/// A helper for `format_record` that formats a single record element.
1911#[derive(Debug)]
1912pub struct RecordElementWriter<'a, F>(&'a mut F);
1913
1914impl<'a, F> RecordElementWriter<'a, F>
1915where
1916    F: FormatBuffer,
1917{
1918    /// Marks this record element as null.
1919    pub fn write_null(self) -> Nestable {
1920        Nestable::Yes
1921    }
1922
1923    /// Returns a [`FormatBuffer`] into which a non-null element can be
1924    /// written.
1925    pub fn nonnull_buffer(self) -> &'a mut F {
1926        self.0
1927    }
1928}
1929
1930/// An error while parsing an input as a type.
1931#[derive(
1932    Arbitrary, Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, MzReflect,
1933)]
1934pub struct ParseError {
1935    pub kind: ParseErrorKind,
1936    pub type_name: Box<str>,
1937    pub input: Box<str>,
1938    pub details: Option<Box<str>>,
1939}
1940
1941#[derive(
1942    Arbitrary,
1943    Ord,
1944    PartialOrd,
1945    Clone,
1946    Copy,
1947    Debug,
1948    Eq,
1949    PartialEq,
1950    Serialize,
1951    Deserialize,
1952    Hash,
1953    MzReflect,
1954)]
1955pub enum ParseErrorKind {
1956    OutOfRange,
1957    InvalidInputSyntax,
1958}
1959
1960impl ParseError {
1961    // To ensure that reversing the parameters causes a compile-time error, we
1962    // require that `type_name` be a string literal, even though `ParseError`
1963    // itself stores the type name as a `String`.
1964    fn new<S>(kind: ParseErrorKind, type_name: &'static str, input: S) -> ParseError
1965    where
1966        S: Into<Box<str>>,
1967    {
1968        ParseError {
1969            kind,
1970            type_name: type_name.into(),
1971            input: input.into(),
1972            details: None,
1973        }
1974    }
1975
1976    fn out_of_range<S>(type_name: &'static str, input: S) -> ParseError
1977    where
1978        S: Into<Box<str>>,
1979    {
1980        ParseError::new(ParseErrorKind::OutOfRange, type_name, input)
1981    }
1982
1983    fn invalid_input_syntax<S>(type_name: &'static str, input: S) -> ParseError
1984    where
1985        S: Into<Box<str>>,
1986    {
1987        ParseError::new(ParseErrorKind::InvalidInputSyntax, type_name, input)
1988    }
1989
1990    fn with_details<D>(mut self, details: D) -> ParseError
1991    where
1992        D: fmt::Display,
1993    {
1994        self.details = Some(details.to_string().into());
1995        self
1996    }
1997}
1998
1999impl fmt::Display for ParseError {
2000    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2001        match self.kind {
2002            ParseErrorKind::OutOfRange => {
2003                write!(
2004                    f,
2005                    "{} is out of range for type {}",
2006                    self.input.quoted(),
2007                    self.type_name
2008                )?;
2009                if let Some(details) = &self.details {
2010                    write!(f, ": {}", details)?;
2011                }
2012                Ok(())
2013            }
2014            ParseErrorKind::InvalidInputSyntax => {
2015                write!(f, "invalid input syntax for type {}: ", self.type_name)?;
2016                if let Some(details) = &self.details {
2017                    write!(f, "{}: ", details)?;
2018                }
2019                write!(f, "{}", self.input.quoted())
2020            }
2021        }
2022    }
2023}
2024
2025impl Error for ParseError {}
2026
2027impl RustType<ProtoParseError> for ParseError {
2028    fn into_proto(&self) -> ProtoParseError {
2029        use Kind::*;
2030        use proto_parse_error::*;
2031        let kind = match self.kind {
2032            ParseErrorKind::OutOfRange => OutOfRange(()),
2033            ParseErrorKind::InvalidInputSyntax => InvalidInputSyntax(()),
2034        };
2035        ProtoParseError {
2036            kind: Some(kind),
2037            type_name: self.type_name.into_proto(),
2038            input: self.input.into_proto(),
2039            details: self.details.into_proto(),
2040        }
2041    }
2042
2043    fn from_proto(proto: ProtoParseError) -> Result<Self, TryFromProtoError> {
2044        use proto_parse_error::Kind::*;
2045
2046        if let Some(kind) = proto.kind {
2047            Ok(ParseError {
2048                kind: match kind {
2049                    OutOfRange(()) => ParseErrorKind::OutOfRange,
2050                    InvalidInputSyntax(()) => ParseErrorKind::InvalidInputSyntax,
2051                },
2052                type_name: proto.type_name.into(),
2053                input: proto.input.into(),
2054                details: proto.details.into_rust()?,
2055            })
2056        } else {
2057            Err(TryFromProtoError::missing_field("ProtoParseError::kind"))
2058        }
2059    }
2060}
2061
2062#[derive(
2063    Arbitrary,
2064    Ord,
2065    PartialOrd,
2066    Copy,
2067    Clone,
2068    Debug,
2069    Eq,
2070    PartialEq,
2071    Serialize,
2072    Deserialize,
2073    Hash,
2074    MzReflect,
2075)]
2076pub enum ParseHexError {
2077    InvalidHexDigit(char),
2078    OddLength,
2079}
2080impl Error for ParseHexError {}
2081
2082impl fmt::Display for ParseHexError {
2083    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2084        match self {
2085            ParseHexError::InvalidHexDigit(c) => {
2086                write!(f, "invalid hexadecimal digit: \"{}\"", c.escape_default())
2087            }
2088            ParseHexError::OddLength => {
2089                f.write_str("invalid hexadecimal data: odd number of digits")
2090            }
2091        }
2092    }
2093}
2094
2095impl RustType<ProtoParseHexError> for ParseHexError {
2096    fn into_proto(&self) -> ProtoParseHexError {
2097        use Kind::*;
2098        use proto_parse_hex_error::*;
2099        let kind = match self {
2100            ParseHexError::InvalidHexDigit(v) => InvalidHexDigit(v.into_proto()),
2101            ParseHexError::OddLength => OddLength(()),
2102        };
2103        ProtoParseHexError { kind: Some(kind) }
2104    }
2105
2106    fn from_proto(error: ProtoParseHexError) -> Result<Self, TryFromProtoError> {
2107        use proto_parse_hex_error::Kind::*;
2108        match error.kind {
2109            Some(kind) => match kind {
2110                InvalidHexDigit(v) => Ok(ParseHexError::InvalidHexDigit(char::from_proto(v)?)),
2111                OddLength(()) => Ok(ParseHexError::OddLength),
2112            },
2113            None => Err(TryFromProtoError::missing_field(
2114                "`ProtoParseHexError::kind`",
2115            )),
2116        }
2117    }
2118}
2119
2120#[cfg(test)]
2121mod tests {
2122    use mz_ore::assert_ok;
2123    use mz_proto::protobuf_roundtrip;
2124    use proptest::prelude::*;
2125
2126    use super::*;
2127
2128    proptest! {
2129        #[mz_ore::test]
2130        #[cfg_attr(miri, ignore)] // too slow
2131        fn parse_error_protobuf_roundtrip(expect in any::<ParseError>()) {
2132            let actual = protobuf_roundtrip::<_, ProtoParseError>(&expect);
2133            assert_ok!(actual);
2134            assert_eq!(actual.unwrap(), expect);
2135        }
2136    }
2137
2138    proptest! {
2139        #[mz_ore::test]
2140        #[cfg_attr(miri, ignore)] // too slow
2141        fn parse_hex_error_protobuf_roundtrip(expect in any::<ParseHexError>()) {
2142            let actual = protobuf_roundtrip::<_, ProtoParseHexError>(&expect);
2143            assert_ok!(actual);
2144            assert_eq!(actual.unwrap(), expect);
2145        }
2146    }
2147
2148    #[mz_ore::test]
2149    fn test_format_nanos_to_micros() {
2150        let cases: Vec<(u32, &str)> = vec![
2151            (0, ""),
2152            (1, ""),
2153            (499, ""),
2154            (500, ".000001"),
2155            (500_000, ".0005"),
2156            (5_000_000, ".005"),
2157            // Leap second. This is possibly wrong and should maybe be reduced (nanosecond
2158            // % 1_000_000_000), but we are at least now aware it does this.
2159            (1_999_999_999, ".2"),
2160        ];
2161        for (nanos, expect) in cases {
2162            let mut buf = String::new();
2163            format_nanos_to_micros(&mut buf, nanos);
2164            assert_eq!(&buf, expect);
2165        }
2166    }
2167
2168    #[mz_ore::test]
2169    fn test_parse_pg_legacy_name() {
2170        let s = "hello world";
2171        assert_eq!(s, parse_pg_legacy_name(s));
2172
2173        let s = "x".repeat(63);
2174        assert_eq!(s, parse_pg_legacy_name(&s));
2175
2176        let s = "x".repeat(64);
2177        assert_eq!("x".repeat(63), parse_pg_legacy_name(&s));
2178
2179        // The Hebrew character Aleph (א) has a length of 2 bytes.
2180        let s = format!("{}{}", "x".repeat(61), "א");
2181        assert_eq!(s, parse_pg_legacy_name(&s));
2182
2183        let s = format!("{}{}", "x".repeat(62), "א");
2184        assert_eq!("x".repeat(62), parse_pg_legacy_name(&s));
2185    }
2186}