Skip to main content

mz_sql/pure/
postgres.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Postgres utilities for SQL purification.
11
12use std::collections::{BTreeMap, BTreeSet};
13
14use mz_postgres_util::desc::PostgresTableDesc;
15use mz_proto::RustType;
16use mz_repr::{Datum, ReprColumnType, ReprScalarType, Row, SqlScalarType};
17use mz_sql_parser::ast::display::AstDisplay;
18use mz_sql_parser::ast::{
19    ColumnDef, CreateSubsourceOption, CreateSubsourceOptionName, CreateSubsourceStatement,
20    ExternalReferences, Ident, PgConfigOptionName, TableConstraint, UnresolvedItemName, Value,
21    WithOptionValue,
22};
23use mz_storage_types::sources::SourceExportStatementDetails;
24use mz_storage_types::sources::casts::{CastFunc, StorageScalarExpr};
25use mz_storage_types::sources::postgres::CastType;
26use prost::Message;
27use tokio_postgres::Client;
28use tokio_postgres::types::Oid;
29
30use crate::names::{Aug, ResolvedItemName};
31use crate::normalize;
32use crate::plan::{PlanError, StatementContext};
33
34use super::error::PgSourcePurificationError;
35use super::references::RetrievedSourceReferences;
36use super::{PartialItemName, PurifiedExportDetails, PurifiedSourceExport, SourceReferencePolicy};
37
38/// Ensure that we have select permissions on all tables; we have to do this before we
39/// start snapshotting because if we discover we cannot `COPY` from a table while
40/// snapshotting, we break the entire source.
41pub(super) async fn validate_requested_references_privileges(
42    client: &Client,
43    table_oids: &[Oid],
44) -> Result<(), PlanError> {
45    privileges::check_table_privileges(client, table_oids).await?;
46    privileges::check_rls_privileges(client, table_oids).await?;
47    replica_identity::check_replica_identity_full(client, table_oids).await?;
48
49    Ok(())
50}
51
52/// Map a list of column references to a map of table oids to column names.
53///
54/// Additionally, modify `columns` so that they contain database-qualified
55/// references to the columns.
56pub(super) fn map_column_refs(
57    retrieved_references: &RetrievedSourceReferences,
58    columns: &mut [UnresolvedItemName],
59    option_type: PgConfigOptionName,
60) -> Result<BTreeMap<u32, BTreeSet<String>>, PlanError> {
61    let mut cols_map: BTreeMap<u32, BTreeSet<String>> = BTreeMap::new();
62
63    for name in columns {
64        let (qual, col) = match name.0.split_last().expect("must have at least one element") {
65            (col, []) => {
66                return Err(PlanError::InvalidOptionValue {
67                    option_name: option_type.to_ast_string_simple(),
68                    err: Box::new(PlanError::UnderqualifiedColumnName(
69                        col.as_str().to_string(),
70                    )),
71                });
72            }
73            (col, qual) => (qual.to_vec(), col.as_str().to_string()),
74        };
75
76        let resolved_reference = retrieved_references.resolve_name(&qual)?;
77        let mut fully_qualified_name =
78            resolved_reference
79                .external_reference()
80                .map_err(|e| PlanError::InvalidOptionValue {
81                    option_name: option_type.to_ast_string_simple(),
82                    err: Box::new(e.into()),
83                })?;
84
85        let desc = resolved_reference
86            .postgres_desc()
87            .expect("known to be postgres");
88
89        if !desc.columns.iter().any(|column| column.name == col) {
90            let column = mz_repr::ColumnName::from(col);
91            let similar = desc
92                .columns
93                .iter()
94                .filter_map(|c| {
95                    let c_name = mz_repr::ColumnName::from(c.name.clone());
96                    c_name.is_similar(&column).then_some(c_name)
97                })
98                .collect();
99            return Err(PlanError::InvalidOptionValue {
100                option_name: option_type.to_ast_string_simple(),
101                err: Box::new(PlanError::UnknownColumn {
102                    table: Some(
103                        normalize::unresolved_item_name(fully_qualified_name)
104                            .expect("known to be of valid len"),
105                    ),
106                    column,
107                    similar,
108                }),
109            });
110        }
111
112        // Rewrite fully qualified name.
113        let col_ident = Ident::new(col.as_str().to_string())?;
114        fully_qualified_name.0.push(col_ident);
115        *name = fully_qualified_name;
116
117        let new = cols_map
118            .entry(desc.oid)
119            .or_default()
120            .insert(col.as_str().to_string());
121
122        if !new {
123            return Err(PlanError::InvalidOptionValue {
124                option_name: option_type.to_ast_string_simple(),
125                err: Box::new(PlanError::UnexpectedDuplicateReference { name: name.clone() }),
126            });
127        }
128    }
129
130    Ok(cols_map)
131}
132
133pub fn generate_create_subsource_statements(
134    scx: &StatementContext,
135    source_name: ResolvedItemName,
136    requested_subsources: BTreeMap<UnresolvedItemName, PurifiedSourceExport>,
137) -> Result<Vec<CreateSubsourceStatement<Aug>>, PlanError> {
138    // Aggregate all unrecognized types.
139    let mut unsupported_cols = vec![];
140
141    // Now that we have an explicit list of validated requested subsources we can create them
142    let mut subsources = Vec::with_capacity(requested_subsources.len());
143
144    for (subsource_name, purified_export) in requested_subsources {
145        let PostgresExportStatementValues {
146            columns,
147            constraints,
148            text_columns,
149            exclude_columns,
150            details,
151            external_reference,
152        } = generate_source_export_statement_values(scx, purified_export, &mut unsupported_cols)?;
153
154        let mut with_options = vec![
155            CreateSubsourceOption {
156                name: CreateSubsourceOptionName::ExternalReference,
157                value: Some(WithOptionValue::UnresolvedItemName(external_reference)),
158            },
159            CreateSubsourceOption {
160                name: CreateSubsourceOptionName::Details,
161                value: Some(WithOptionValue::Value(Value::String(hex::encode(
162                    details.into_proto().encode_to_vec(),
163                )))),
164            },
165        ];
166
167        if let Some(text_columns) = text_columns {
168            with_options.push(CreateSubsourceOption {
169                name: CreateSubsourceOptionName::TextColumns,
170                value: Some(WithOptionValue::Sequence(text_columns)),
171            });
172        }
173
174        if let Some(exclude_columns) = exclude_columns {
175            with_options.push(CreateSubsourceOption {
176                name: CreateSubsourceOptionName::ExcludeColumns,
177                value: Some(WithOptionValue::Sequence(exclude_columns)),
178            });
179        }
180
181        // Create the subsource statement
182        let subsource = CreateSubsourceStatement {
183            name: subsource_name,
184            columns,
185            // We might not know the primary source's `GlobalId` yet; if not,
186            // we'll fill it in once we generate it.
187            of_source: Some(source_name.clone()),
188            // TODO(petrosagg): nothing stops us from getting the constraints of the
189            // upstream tables and mirroring them here which will lead to more optimization
190            // opportunities if for example there is a primary key or an index.
191            //
192            // If we ever do that we must triple check that we will get notified *in the
193            // replication stream*, if our assumptions change. Failure to do that could
194            // mean that an upstream table that started with an index was then altered to
195            // one without and now we're producing garbage data.
196            constraints,
197            if_not_exists: false,
198            with_options,
199        };
200        subsources.push(subsource);
201    }
202
203    if !unsupported_cols.is_empty() {
204        unsupported_cols.sort();
205        Err(PgSourcePurificationError::UnrecognizedTypes {
206            cols: unsupported_cols,
207        })?;
208    }
209
210    Ok(subsources)
211}
212
213pub(super) struct PostgresExportStatementValues {
214    pub(super) columns: Vec<ColumnDef<Aug>>,
215    pub(super) constraints: Vec<TableConstraint<Aug>>,
216    pub(super) text_columns: Option<Vec<WithOptionValue<Aug>>>,
217    pub(super) exclude_columns: Option<Vec<WithOptionValue<Aug>>>,
218    pub(super) details: SourceExportStatementDetails,
219    pub(super) external_reference: UnresolvedItemName,
220}
221
222pub(super) fn generate_source_export_statement_values(
223    scx: &StatementContext,
224    purified_export: PurifiedSourceExport,
225    unsupported_cols: &mut Vec<(String, mz_repr::adt::system::Oid)>,
226) -> Result<PostgresExportStatementValues, PlanError> {
227    let PurifiedExportDetails::Postgres {
228        table,
229        text_columns,
230        exclude_columns,
231    } = purified_export.details
232    else {
233        bail_internal!("purified export details must be postgres");
234    };
235
236    let text_column_set = BTreeSet::from_iter(text_columns.iter().flatten().map(Ident::as_str));
237    let exclude_column_set =
238        BTreeSet::from_iter(exclude_columns.iter().flatten().map(Ident::as_str));
239
240    // Figure out the schema of the subsource
241    let mut columns = vec![];
242    for c in table.columns.iter() {
243        let name = Ident::new(c.name.clone())?;
244
245        if exclude_column_set.contains(c.name.as_str()) {
246            continue;
247        }
248
249        let ty = if text_column_set.contains(c.name.as_str()) {
250            mz_pgrepr::Type::Text
251        } else {
252            match mz_pgrepr::Type::from_oid_and_typmod(c.type_oid, c.type_mod) {
253                Ok(t) => t,
254                Err(_) => {
255                    let mut full_name = purified_export.external_reference.0.clone();
256                    full_name.push(name);
257                    unsupported_cols.push((
258                        UnresolvedItemName(full_name).to_ast_string_simple(),
259                        mz_repr::adt::system::Oid(c.type_oid),
260                    ));
261                    continue;
262                }
263            }
264        };
265
266        let data_type = scx.resolve_type(ty)?;
267        let mut options = vec![];
268
269        if !c.nullable {
270            options.push(mz_sql_parser::ast::ColumnOptionDef {
271                name: None,
272                option: mz_sql_parser::ast::ColumnOption::NotNull,
273            });
274        }
275
276        columns.push(ColumnDef {
277            name,
278            data_type,
279            collation: None,
280            options,
281        });
282    }
283
284    let mut constraints = vec![];
285    for key in table.keys.clone() {
286        let mut key_columns = vec![];
287
288        for col_num in key.cols {
289            let ident = Ident::new(
290                table
291                    .columns
292                    .iter()
293                    .find(|col| col.col_num == col_num)
294                    .expect("key exists as column")
295                    .name
296                    .clone(),
297            )?;
298            key_columns.push(ident);
299        }
300
301        let constraint = mz_sql_parser::ast::TableConstraint::Unique {
302            name: Some(Ident::new(key.name)?),
303            columns: key_columns,
304            is_primary: key.is_primary,
305            nulls_not_distinct: key.nulls_not_distinct,
306        };
307
308        // We take the first constraint available to be the primary key.
309        if key.is_primary {
310            constraints.insert(0, constraint);
311        } else {
312            constraints.push(constraint);
313        }
314    }
315    let details = SourceExportStatementDetails::Postgres { table };
316
317    let text_columns = text_columns.map(|mut columns| {
318        columns.sort();
319        columns
320            .into_iter()
321            .map(WithOptionValue::Ident::<Aug>)
322            .collect()
323    });
324
325    let exclude_columns = exclude_columns.map(|mut columns| {
326        columns.sort();
327        columns
328            .into_iter()
329            .map(WithOptionValue::Ident::<Aug>)
330            .collect()
331    });
332
333    Ok(PostgresExportStatementValues {
334        columns,
335        constraints,
336        text_columns,
337        exclude_columns,
338        details,
339        external_reference: purified_export.external_reference,
340    })
341}
342
343pub(super) struct PurifiedSourceExports {
344    pub(super) source_exports: BTreeMap<UnresolvedItemName, PurifiedSourceExport>,
345    // NOTE(roshan): The text columns are already part of their
346    // appropriate `source_exports` above, but these are returned to allow
347    // round-tripping a `CREATE SOURCE` statement while we still allow creating
348    // implicit subsources from `CREATE SOURCE`. Remove once
349    // fully deprecating that feature and forcing users to use explicit
350    // `CREATE TABLE .. FROM SOURCE` statements.
351    pub(super) normalized_text_columns: Vec<WithOptionValue<Aug>>,
352}
353
354// Purify the requested external references, returning a set of purified
355// source exports corresponding to external tables, and and additional
356// fields necessary to generate relevant statements and update statement options
357pub(super) async fn purify_source_exports(
358    client: &Client,
359    retrieved_references: &RetrievedSourceReferences,
360    requested_references: &Option<ExternalReferences>,
361    mut text_columns: Vec<UnresolvedItemName>,
362    mut exclude_columns: Vec<UnresolvedItemName>,
363    unresolved_source_name: &UnresolvedItemName,
364    reference_policy: &SourceReferencePolicy,
365) -> Result<PurifiedSourceExports, PlanError> {
366    let requested_exports = match requested_references.as_ref() {
367        Some(requested) if matches!(reference_policy, SourceReferencePolicy::NotAllowed) => {
368            Err(PlanError::UseTablesForSources(requested.to_string()))?
369        }
370        Some(requested) => retrieved_references
371            .requested_source_exports(Some(requested), unresolved_source_name)?,
372        None => {
373            if matches!(reference_policy, SourceReferencePolicy::Required) {
374                Err(PgSourcePurificationError::RequiresExternalReferences)?
375            }
376
377            // If no external reference is specified, it does not make sense to include
378            // text columns.
379            if !text_columns.is_empty() {
380                Err(
381                    PgSourcePurificationError::UnnecessaryOptionsWithoutReferences(
382                        "TEXT COLUMNS".to_string(),
383                    ),
384                )?
385            }
386
387            // If no external reference is specified, it does not make sense to include
388            // exclude columns.
389            if !exclude_columns.is_empty() {
390                Err(
391                    PgSourcePurificationError::UnnecessaryOptionsWithoutReferences(
392                        "EXCLUDE COLUMNS".to_string(),
393                    ),
394                )?
395            }
396
397            return Ok(PurifiedSourceExports {
398                source_exports: BTreeMap::new(),
399                normalized_text_columns: vec![],
400            });
401        }
402    };
403
404    if requested_exports.is_empty() {
405        sql_bail!(
406            "[internal error]: Postgres reference {} did not match any tables",
407            requested_references
408                .as_ref()
409                .unwrap()
410                .to_ast_string_simple()
411        );
412    }
413
414    super::validate_source_export_names(&requested_exports)?;
415
416    let table_oids: Vec<_> = requested_exports
417        .iter()
418        .map(|r| r.meta.postgres_desc().expect("is postgres").oid)
419        .collect();
420
421    validate_requested_references_privileges(client, &table_oids).await?;
422
423    let mut text_column_map = map_column_refs(
424        retrieved_references,
425        &mut text_columns,
426        PgConfigOptionName::TextColumns,
427    )?;
428    let mut exclude_column_map = map_column_refs(
429        retrieved_references,
430        &mut exclude_columns,
431        PgConfigOptionName::ExcludeColumns,
432    )?;
433
434    // Normalize options to contain full qualified values.
435    text_columns.sort();
436    text_columns.dedup();
437    let normalized_text_columns: Vec<_> = text_columns
438        .into_iter()
439        .map(WithOptionValue::UnresolvedItemName)
440        .collect();
441
442    let source_exports = requested_exports
443        .into_iter()
444        .map(|r| {
445            let mut desc = r.meta.postgres_desc().expect("known postgres").clone();
446            let text_columns = text_column_map.remove(&desc.oid);
447            let exclude_columns = exclude_column_map.remove(&desc.oid);
448
449            if let Some(exclude_cols) = &exclude_columns {
450                desc.columns.retain(|c| !exclude_cols.contains(&c.name));
451            }
452
453            if let (Some(text_cols), Some(exclude_cols)) = (&text_columns, &exclude_columns) {
454                let intersection: Vec<_> = text_cols.intersection(exclude_cols).collect();
455                if !intersection.is_empty() {
456                    return Err(PgSourcePurificationError::DuplicatedColumnNames(
457                        intersection.iter().map(|s| (*s).to_string()).collect(),
458                    ));
459                }
460            }
461            Ok((
462                r.name,
463                PurifiedSourceExport {
464                    external_reference: r.external_reference,
465                    details: PurifiedExportDetails::Postgres {
466                        text_columns: text_columns.map(|v| {
467                            v.into_iter()
468                                .map(|s| Ident::new(s).expect("validated above"))
469                                .collect()
470                        }),
471                        exclude_columns: exclude_columns.map(|v| {
472                            v.into_iter()
473                                .map(|s| Ident::new(s).expect("validated above"))
474                                .collect()
475                        }),
476                        table: desc,
477                    },
478                },
479            ))
480        })
481        .collect::<Result<BTreeMap<_, _>, _>>()?;
482
483    if !text_column_map.is_empty() {
484        // If any any item was not removed from the text_column_map, it wasn't being
485        // added.
486        let mut dangling_text_column_refs = vec![];
487        let all_references = retrieved_references.all_references();
488
489        for id in text_column_map.keys() {
490            let desc = all_references
491                .iter()
492                .find_map(|reference| {
493                    let desc = reference.postgres_desc().expect("is postgres");
494                    if desc.oid == *id { Some(desc) } else { None }
495                })
496                .expect("validated when generating text columns");
497
498            dangling_text_column_refs.push(PartialItemName {
499                database: None,
500                schema: Some(desc.namespace.clone()),
501                item: desc.name.clone(),
502            });
503        }
504
505        dangling_text_column_refs.sort();
506        return Err(PlanError::from(
507            PgSourcePurificationError::DanglingTextColumns {
508                items: dangling_text_column_refs,
509            },
510        ));
511    }
512
513    if !exclude_column_map.is_empty() {
514        // If any any item was not removed from the exclude_column_map, it wasn't being
515        // added.
516        let mut dangling_exclude_column_refs = vec![];
517        let all_references = retrieved_references.all_references();
518
519        for id in exclude_column_map.keys() {
520            let desc = all_references
521                .iter()
522                .find_map(|reference| {
523                    let desc = reference.postgres_desc().expect("is postgres");
524                    if desc.oid == *id { Some(desc) } else { None }
525                })
526                .expect("validated when generating exclude columns");
527
528            dangling_exclude_column_refs.push(PartialItemName {
529                database: None,
530                schema: Some(desc.namespace.clone()),
531                item: desc.name.clone(),
532            });
533        }
534
535        dangling_exclude_column_refs.sort();
536        return Err(PlanError::from(
537            PgSourcePurificationError::DanglingExcludeColumns {
538                items: dangling_exclude_column_refs,
539            },
540        ));
541    }
542
543    Ok(PurifiedSourceExports {
544        source_exports,
545        normalized_text_columns,
546    })
547}
548
549pub(crate) fn generate_column_casts(
550    scx: &StatementContext,
551    table: &PostgresTableDesc,
552    text_columns: &Vec<Ident>,
553) -> Result<Vec<(CastType, StorageScalarExpr)>, PlanError> {
554    // Generate the cast expressions required to convert the text encoded columns into
555    // the appropriate target types, creating a Vec<StorageScalarExpr>.
556    // The postgres source reader will then eval each of those on the incoming rows.
557
558    let text_columns = BTreeSet::from_iter(text_columns.iter().map(Ident::as_str));
559
560    let mut table_cast = vec![];
561    for (i, column) in table.columns.iter().enumerate() {
562        let (cast_type, ty) = if text_columns.contains(column.name.as_str()) {
563            // Treat the column as text if it was referenced in
564            // `TEXT COLUMNS`. This is the only place we need to
565            // perform this logic; even if the type is unsupported,
566            // we'll be able to ingest its values as text in
567            // storage.
568            (CastType::Text, mz_pgrepr::Type::Text)
569        } else {
570            match mz_pgrepr::Type::from_oid_and_typmod(column.type_oid, column.type_mod) {
571                Ok(t) => (CastType::Natural, t),
572                // If this reference survived purification, we
573                // do not expect it to be from a table that the
574                // user will consume., i.e. expect this table to
575                // be filtered out of table casts.
576                Err(_) => {
577                    table_cast.push((
578                        CastType::Natural,
579                        StorageScalarExpr::ErrorIfNull(
580                            Box::new(StorageScalarExpr::Literal(
581                                Row::pack_slice(&[Datum::Null]),
582                                ReprColumnType {
583                                    nullable: true,
584                                    scalar_type: ReprScalarType::String,
585                                },
586                            )),
587                            format!("Unsupported type with OID {}", column.type_oid),
588                        ),
589                    ));
590                    continue;
591                }
592            }
593        };
594
595        let cast_expr = match pg_type_to_cast_func(scx, &ty) {
596            Ok(None) => {
597                // No cast needed (e.g. Text → String identity).
598                StorageScalarExpr::Column(i)
599            }
600            Ok(Some(cast_func)) => {
601                StorageScalarExpr::CallUnary(cast_func, Box::new(StorageScalarExpr::Column(i)))
602            }
603            Err(PlanError::TableContainsUningestableTypes { type_, .. }) => {
604                // We expect only reg* types and similar to encounter
605                // this. Users can ingest the data as text if they need
606                // to. This is acceptable because we don't expect the
607                // OIDs from an external PG source to be unilaterally
608                // usable in resolving item names in MZ.
609                return Err(PlanError::TableContainsUningestableTypes {
610                    name: table.name.to_string(),
611                    type_,
612                    column: column.name.to_string(),
613                });
614            }
615            Err(e) => return Err(e),
616        };
617
618        let cast = if column.nullable {
619            cast_expr
620        } else {
621            // We must enforce nullability constraint on cast
622            // because PG replication stream does not propagate
623            // constraint changes and we want to error subsource if
624            // e.g. the constraint is dropped and we don't notice
625            // it.
626            let message = format!(
627                "PG column {}.{}.{} contained NULL data, despite having NOT NULL constraint",
628                table.namespace, table.name, column.name
629            );
630            StorageScalarExpr::ErrorIfNull(Box::new(cast_expr), message)
631        };
632
633        table_cast.push((cast_type, cast));
634    }
635    Ok(table_cast)
636}
637
638/// Resolve a PG type to its corresponding `SqlScalarType` via the catalog.
639fn resolve_pg_type_to_scalar_type(
640    scx: &StatementContext,
641    ty: &mz_pgrepr::Type,
642) -> Result<SqlScalarType, PlanError> {
643    let data_type = scx.resolve_type(ty.clone())?;
644    crate::plan::query::scalar_type_from_sql(scx, &data_type)
645}
646
647/// Map a PG type to the corresponding `CastFunc` variant. Returns:
648/// - `Ok(Some(func))` for types that need a cast
649/// - `Ok(None)` for types that need no cast (Text → String identity)
650/// - `Err(PlanError::TableContainsUningestableTypes { .. })` for types
651///   that cannot be ingested. The error uses placeholder strings for
652///   table/column name; callers with context should use
653///   `pg_type_to_cast_func_or_uningestable` instead.
654fn pg_type_to_cast_func(
655    scx: &StatementContext,
656    ty: &mz_pgrepr::Type,
657) -> Result<Option<CastFunc>, PlanError> {
658    use mz_pgrepr::Type;
659
660    let cast_func = match ty {
661        Type::Bool => CastFunc::CastStringToBool,
662        Type::Bytea => CastFunc::CastStringToBytes,
663        Type::Char => CastFunc::CastStringToPgLegacyChar,
664        Type::Date => CastFunc::CastStringToDate,
665        Type::Float4 => CastFunc::CastStringToFloat32,
666        Type::Float8 => CastFunc::CastStringToFloat64,
667        Type::Int2 => CastFunc::CastStringToInt16,
668        Type::Int4 => CastFunc::CastStringToInt32,
669        Type::Int8 => CastFunc::CastStringToInt64,
670        Type::UInt2 => CastFunc::CastStringToUint16,
671        Type::UInt4 => CastFunc::CastStringToUint32,
672        Type::UInt8 => CastFunc::CastStringToUint64,
673        Type::Interval { .. } => CastFunc::CastStringToInterval,
674        Type::Jsonb => CastFunc::CastStringToJsonb,
675        Type::Name => CastFunc::CastStringToPgLegacyName,
676        Type::Numeric { .. } => {
677            // Resolve through the catalog to get the repr NumericMaxScale type.
678            let scalar_type = resolve_pg_type_to_scalar_type(scx, ty)?;
679            match scalar_type {
680                SqlScalarType::Numeric { max_scale } => CastFunc::CastStringToNumeric(max_scale),
681                _ => unreachable!("Numeric must resolve to Numeric"),
682            }
683        }
684        Type::Oid => CastFunc::CastStringToOid,
685        Type::Text => return Ok(None),
686        Type::BpChar { .. } => {
687            // Resolve through the catalog to get the repr CharLength type.
688            let scalar_type = resolve_pg_type_to_scalar_type(scx, ty)?;
689            match scalar_type {
690                SqlScalarType::Char { length } => CastFunc::CastStringToChar {
691                    length,
692                    fail_on_len: true,
693                },
694                _ => unreachable!("BpChar must resolve to Char"),
695            }
696        }
697        Type::VarChar { .. } => {
698            // Resolve through the catalog to get the repr VarCharMaxLength type.
699            let scalar_type = resolve_pg_type_to_scalar_type(scx, ty)?;
700            match scalar_type {
701                SqlScalarType::VarChar { max_length } => CastFunc::CastStringToVarChar {
702                    length: max_length,
703                    fail_on_len: true,
704                },
705                _ => unreachable!("VarChar must resolve to VarChar"),
706            }
707        }
708        Type::Time { .. } => {
709            // Time precision is not yet fully supported; resolve_type strips precision.
710            CastFunc::CastStringToTime
711        }
712        Type::Timestamp { .. } => {
713            // Resolve through the catalog to get the repr TimestampPrecision type.
714            let scalar_type = resolve_pg_type_to_scalar_type(scx, ty)?;
715            match scalar_type {
716                SqlScalarType::Timestamp { precision } => {
717                    CastFunc::CastStringToTimestamp(precision)
718                }
719                _ => unreachable!("Timestamp must resolve to Timestamp"),
720            }
721        }
722        Type::TimestampTz { .. } => {
723            // Resolve through the catalog to get the repr TimestampPrecision type.
724            let scalar_type = resolve_pg_type_to_scalar_type(scx, ty)?;
725            match scalar_type {
726                SqlScalarType::TimestampTz { precision } => {
727                    CastFunc::CastStringToTimestampTz(precision)
728                }
729                _ => unreachable!("TimestampTz must resolve to TimestampTz"),
730            }
731        }
732        Type::Uuid => CastFunc::CastStringToUuid,
733        Type::Int2Vector => CastFunc::CastStringToInt2Vector,
734        Type::MzTimestamp => CastFunc::CastStringToMzTimestamp,
735        // JSON is ingested as JSONB (same as the old plan_cast path).
736        Type::Json => CastFunc::CastStringToJsonb,
737        Type::Array(elem) => {
738            let return_ty = resolve_pg_type_to_scalar_type(scx, ty)?;
739            let elem_cast = build_element_cast_expr(scx, elem)?;
740            CastFunc::CastStringToArray {
741                return_ty,
742                cast_expr: Box::new(elem_cast),
743            }
744        }
745        Type::List(elem) => {
746            let return_ty = resolve_pg_type_to_scalar_type(scx, ty)?;
747            let elem_cast = build_element_cast_expr(scx, elem)?;
748            CastFunc::CastStringToList {
749                return_ty,
750                cast_expr: Box::new(elem_cast),
751            }
752        }
753        Type::Map { value_type } => {
754            let return_ty = resolve_pg_type_to_scalar_type(scx, ty)?;
755            let value_cast = build_element_cast_expr(scx, value_type)?;
756            CastFunc::CastStringToMap {
757                return_ty,
758                cast_expr: Box::new(value_cast),
759            }
760        }
761        Type::Range { element_type } => {
762            let return_ty = resolve_pg_type_to_scalar_type(scx, ty)?;
763            let elem_cast = build_element_cast_expr(scx, element_type)?;
764            CastFunc::CastStringToRange {
765                return_ty,
766                cast_expr: Box::new(elem_cast),
767            }
768        }
769        // reg* types require subquery-based casts that storage cannot
770        // evaluate. Users can ingest them as text via TEXT COLUMNS.
771        Type::RegType | Type::RegClass | Type::RegProc => {
772            return Err(PlanError::TableContainsUningestableTypes {
773                name: String::new(),
774                type_: ty.name().to_string(),
775                column: String::new(),
776            });
777        }
778        other => {
779            return Err(PlanError::TableContainsUningestableTypes {
780                name: String::new(),
781                type_: other.name().to_string(),
782                column: String::new(),
783            });
784        }
785    };
786    Ok(Some(cast_func))
787}
788
789/// Build the element cast expression for container types (Array, List, Map,
790/// Range). The element expression operates on a single-column input row
791/// containing the text-encoded element at column 0.
792fn build_element_cast_expr(
793    scx: &StatementContext,
794    elem_ty: &mz_pgrepr::Type,
795) -> Result<StorageScalarExpr, PlanError> {
796    match pg_type_to_cast_func(scx, elem_ty)? {
797        None => Ok(StorageScalarExpr::Column(0)),
798        Some(cast_func) => Ok(StorageScalarExpr::CallUnary(
799            cast_func,
800            Box::new(StorageScalarExpr::Column(0)),
801        )),
802    }
803}
804
805mod privileges {
806    use mz_postgres_util::PostgresError;
807
808    use super::*;
809    use crate::plan::PlanError;
810    use crate::pure::PgSourcePurificationError;
811
812    async fn check_schema_privileges(client: &Client, table_oids: &[Oid]) -> Result<(), PlanError> {
813        let invalid_schema_privileges_rows = client
814            .query(
815                "
816                WITH distinct_namespace AS (
817                    SELECT
818                        DISTINCT n.oid, n.nspname AS schema_name
819                    FROM unnest($1::OID[]) AS oids (oid)
820                    JOIN pg_class AS c ON c.oid = oids.oid
821                    JOIN pg_namespace AS n ON c.relnamespace = n.oid
822                )
823                SELECT d.schema_name
824                FROM distinct_namespace AS d
825                WHERE
826                    NOT has_schema_privilege(CURRENT_USER::TEXT, d.oid, 'usage')",
827                &[&table_oids],
828            )
829            .await
830            .map_err(PostgresError::from)?;
831
832        let mut invalid_schema_privileges = invalid_schema_privileges_rows
833            .into_iter()
834            .map(|row| row.get("schema_name"))
835            .collect::<Vec<String>>();
836
837        if invalid_schema_privileges.is_empty() {
838            Ok(())
839        } else {
840            invalid_schema_privileges.sort();
841            Err(PgSourcePurificationError::UserLacksUsageOnSchemas {
842                schemas: invalid_schema_privileges,
843            })?
844        }
845    }
846
847    /// Ensure that the user specified in `config` has:
848    ///
849    /// -`SELECT` privileges for the identified `tables`.
850    ///
851    ///  `tables`'s elements should be of the structure `[<schema name>, <table name>]`.
852    ///
853    /// - `USAGE` privileges on the schemas references in `tables`.
854    ///
855    /// # Panics
856    /// If `config` does not specify a user.
857    pub async fn check_table_privileges(
858        client: &Client,
859        table_oids: &[Oid],
860    ) -> Result<(), PlanError> {
861        check_schema_privileges(client, table_oids).await?;
862
863        let invalid_table_privileges_rows = client
864            .query(
865                "
866            SELECT
867                format('%I.%I', n.nspname, c.relname) AS schema_qualified_table_name
868             FROM unnest($1::oid[]) AS oids (oid)
869             JOIN
870                 pg_class c ON c.oid = oids.oid
871             JOIN
872                 pg_namespace n ON c.relnamespace = n.oid
873             WHERE NOT has_table_privilege(CURRENT_USER::text, c.oid, 'select')",
874                &[&table_oids],
875            )
876            .await
877            .map_err(PostgresError::from)?;
878
879        let mut invalid_table_privileges = invalid_table_privileges_rows
880            .into_iter()
881            .map(|row| row.get("schema_qualified_table_name"))
882            .collect::<Vec<String>>();
883
884        if invalid_table_privileges.is_empty() {
885            Ok(())
886        } else {
887            invalid_table_privileges.sort();
888            Err(PgSourcePurificationError::UserLacksSelectOnTables {
889                tables: invalid_table_privileges,
890            })?
891        }
892    }
893
894    /// Ensure that the user specified in `config` can read data from tables if row level security
895    /// (RLS) is enabled. If the user/role does not have the BYPASSRLS attribute set, there is
896    /// the possibility that MZ may not be able to read all data during the snapshot, which would
897    /// result in missing data.
898    pub async fn check_rls_privileges(
899        client: &Client,
900        table_oids: &[Oid],
901    ) -> Result<(), PlanError> {
902        match mz_postgres_util::validate_no_rls_policies(client, table_oids).await {
903            Ok(_) => Ok(()),
904            Err(err) => match err {
905                // This is a little gross to do, but PlanError::PostgresConnectionErr implements
906                // From<PostgresError>, and the error in that case would be
907                // "failed to connect to PostgreSQL database", which doesn't make any sense.
908                PostgresError::BypassRLSRequired(tables) => {
909                    Err(PgSourcePurificationError::BypassRLSRequired { tables })?
910                }
911                _ => Err(err)?,
912            },
913        }
914    }
915}
916
917mod replica_identity {
918    use mz_postgres_util::PostgresError;
919
920    use super::*;
921    use crate::plan::PlanError;
922    use crate::pure::PgSourcePurificationError;
923
924    /// Ensures that all provided OIDs are tables with `REPLICA IDENTITY FULL`.
925    pub async fn check_replica_identity_full(
926        client: &Client,
927        table_oids: &[Oid],
928    ) -> Result<(), PlanError> {
929        let invalid_replica_identity_rows = client
930            .query(
931                "
932            SELECT
933                format('%I.%I', n.nspname, c.relname) AS schema_qualified_table_name
934             FROM unnest($1::oid[]) AS oids (oid)
935             JOIN
936                 pg_class c ON c.oid = oids.oid
937             JOIN
938                 pg_namespace n ON c.relnamespace = n.oid
939             WHERE relreplident != 'f' OR relreplident IS NULL;",
940                &[&table_oids],
941            )
942            .await
943            .map_err(PostgresError::from)?;
944
945        let mut invalid_replica_identity = invalid_replica_identity_rows
946            .into_iter()
947            .map(|row| row.get("schema_qualified_table_name"))
948            .collect::<Vec<String>>();
949
950        if invalid_replica_identity.is_empty() {
951            Ok(())
952        } else {
953            invalid_replica_identity.sort();
954            Err(PgSourcePurificationError::NotTablesWReplicaIdentityFull {
955                items: invalid_replica_identity,
956            })?
957        }
958    }
959}