Skip to main content

mz_sql/plan/
side_effecting_func.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Support for side-effecting functions.
11//!
12//! In PostgreSQL, these functions can appear anywhere in a query:
13//!
14//! ```sql
15//! SELECT 1 WHERE pg_cancel_backend(1234)
16//! ```
17//!
18//! In Materialize, our compute layer cannot execute functions with side
19//! effects. So we sniff out the common form of calls to side-effecting
20//! functions, i.e. at the top level of a `SELECT`
21//!
22//! ```sql
23//! SELECT side_effecting_function(...)
24//! ```
25//!
26//! where all arguments are literals or bound parameters, and plan them
27//! specially as a `Plan::SideEffectingFunc`. This gets us compatibility with
28//! PostgreSQL for most real-world use cases, without causing stress for the
29//! compute layer (optimizer, dataflow execution, etc.), as we can apply all the
30//! side effects entirely in the adapter layer.
31
32use std::collections::BTreeMap;
33use std::sync::LazyLock;
34
35use enum_kinds::EnumKind;
36use itertools::Itertools;
37use mz_expr::Eval;
38use mz_ore::cast::ReinterpretCast;
39use mz_ore::collections::CollectionExt;
40use mz_ore::result::ResultExt;
41use mz_repr::SqlRelationType;
42use mz_repr::{Datum, RelationDesc, RowArena, SqlColumnType, SqlScalarType};
43use mz_sql_parser::ast::{CteBlock, Expr, Function, FunctionArgs, Select, SelectItem, SetExpr};
44
45use crate::ast::{Query, SelectStatement};
46use crate::func::Func;
47use crate::names::Aug;
48use crate::plan::query::{self, ExprContext, QueryLifetime};
49use crate::plan::scope::Scope;
50use crate::plan::statement::StatementContext;
51use crate::plan::typeconv::CastContext;
52use crate::plan::{HirScalarExpr, Params};
53use crate::plan::{PlanError, QueryContext};
54
55/// A side-effecting function is a function whose evaluation triggers side
56/// effects.
57///
58/// See the module docs for details.
59#[derive(Debug, EnumKind, Clone)]
60#[enum_kind(SefKind)]
61pub enum SideEffectingFunc {
62    /// The `pg_cancel_backend` function.
63    PgCancelBackend {
64        // The ID of the connection to cancel, or `None` if the argument was
65        // `NULL`, in which case the function returns `NULL`.
66        connection_id: Option<u32>,
67    },
68}
69
70/// Describes a `SELECT` if it contains calls to side-effecting functions.
71///
72/// See the module docs for details.
73pub fn describe_select_if_side_effecting(
74    scx: &StatementContext,
75    select: &SelectStatement<Aug>,
76) -> Result<Option<RelationDesc>, PlanError> {
77    let Some(sef_call) = extract_sef_call(scx, select)? else {
78        return Ok(None);
79    };
80
81    // We currently support only a single call to a side-effecting function
82    // without an alias, so there is always a single output column is named
83    // after the function.
84    let desc = RelationDesc::builder()
85        .with_column(sef_call.imp.name, sef_call.imp.return_type.clone())
86        .finish();
87
88    Ok(Some(desc))
89}
90
91/// Plans the `SELECT` if it contains calls to side-effecting functions.
92///
93/// See the module docs for details.
94pub fn plan_select_if_side_effecting(
95    scx: &StatementContext,
96    select: &SelectStatement<Aug>,
97    params: &Params,
98) -> Result<Option<SideEffectingFunc>, PlanError> {
99    let Some(sef_call) = extract_sef_call(scx, select)? else {
100        return Ok(None);
101    };
102
103    // Bind parameters and then eagerly evaluate each argument. Expressions that
104    // cannot be eagerly evaluated should have been rejected by `extract_sef_call`.
105    let temp_storage = RowArena::new();
106    let mut args = vec![];
107    for mut arg in sef_call.args {
108        arg.bind_parameters_and_simplify_offset(scx, QueryLifetime::OneShot, params)?;
109        let arg = arg.lower_uncorrelated(scx.catalog.system_vars())?;
110        args.push(arg);
111    }
112    let mut datums = vec![];
113    for arg in &args {
114        let datum = arg.eval(&[], &temp_storage)?;
115        datums.push(datum);
116    }
117
118    let func = (sef_call.imp.plan_fn)(&datums);
119
120    Ok(Some(func))
121}
122
123/// Helper function used in both describing and planning a side-effecting
124/// `SELECT`.
125fn extract_sef_call(
126    scx: &StatementContext,
127    select: &SelectStatement<Aug>,
128) -> Result<Option<SefCall>, PlanError> {
129    // First check if the `SELECT` contains exactly one function call.
130    let SelectStatement {
131        query:
132            Query {
133                ctes: CteBlock::Simple(ctes),
134                body: SetExpr::Select(body),
135                order_by,
136                limit: None,
137                offset: None,
138            },
139        as_of: None,
140    } = select
141    else {
142        return Ok(None);
143    };
144    if !ctes.is_empty() || !order_by.is_empty() {
145        return Ok(None);
146    }
147    let Select {
148        distinct: None,
149        projection,
150        from,
151        selection: None,
152        group_by,
153        having: None,
154        qualify: None,
155        options,
156    } = &**body
157    else {
158        return Ok(None);
159    };
160    if !from.is_empty() || !group_by.is_empty() || !options.is_empty() || projection.len() != 1 {
161        return Ok(None);
162    }
163    let [
164        SelectItem::Expr {
165            expr:
166                Expr::Function(Function {
167                    name,
168                    args: FunctionArgs::Args { args, order_by },
169                    filter: None,
170                    over: None,
171                    distinct: false,
172                }),
173            alias: None,
174        },
175    ] = &projection[..]
176    else {
177        return Ok(None);
178    };
179    if !order_by.is_empty() {
180        return Ok(None);
181    }
182
183    // Check if the called function is a scalar function with exactly one
184    // implementation. All side-effecting functions have only a single
185    // implementation.
186    let Ok(func) = scx
187        .get_item_by_resolved_name(name)
188        .and_then(|item| item.func().err_into())
189    else {
190        return Ok(None);
191    };
192    let func_impl = match func {
193        Func::Scalar(impls) if impls.len() == 1 => impls.into_element(),
194        _ => return Ok(None),
195    };
196
197    // Check whether the implementation is a known side-effecting function.
198    let Some(sef_impl) = PG_CATALOG_SEF_BUILTINS.get(&func_impl.oid) else {
199        return Ok(None);
200    };
201
202    // Check that the number of provided arguments matches the function
203    // signature.
204    if args.len() != sef_impl.param_types.len() {
205        // We return `Ok(None)` instead of an error for the same reason to let
206        // the function selection code produce the standard "no function matches
207        // the given name and argument types" error.
208        return Ok(None);
209    }
210
211    // Plan and coerce all argument expressions.
212    let mut args_out = vec![];
213    let qcx = QueryContext::root(scx, QueryLifetime::OneShot);
214    let ecx = ExprContext {
215        qcx: &qcx,
216        name: sef_impl.name,
217        scope: &Scope::empty(),
218        relation_type: &SqlRelationType::empty(),
219        allow_aggregates: false,
220        allow_subqueries: false,
221        allow_parameters: true,
222        allow_windows: false,
223    };
224    for (arg, ty) in args.iter().zip_eq(sef_impl.param_types) {
225        // If we encounter an error when planning the argument expression, that
226        // error is unrelated to planning the function call and can be returned
227        // directly to the user.
228        let arg = query::plan_expr(&ecx, arg)?;
229
230        // Implicitly cast the argument to the correct type. This matches what
231        // the standard function selection code will do.
232        //
233        // If the cast fails, we give up on planning the side-effecting function but
234        // intentionally do not produce an error. This way, we fall into the
235        // standard function selection code, which will produce the correct "no
236        // function matches the given name and argument types" error rather than a
237        // "cast failed" error.
238        let Ok(arg) = arg.cast_to(&ecx, CastContext::Implicit, ty) else {
239            return Ok(None);
240        };
241
242        args_out.push(arg);
243    }
244
245    Ok(Some(SefCall {
246        imp: sef_impl,
247        args: args_out,
248    }))
249}
250
251struct SefCall {
252    imp: &'static SideEffectingFuncImpl,
253    args: Vec<HirScalarExpr>,
254}
255
256/// Defines the implementation of a side-effecting function.
257///
258/// This is a very restricted subset of the [`Func`] struct (no overloads, no
259/// variadic arguments, etc) to make side-effecting functions easier to plan.
260pub struct SideEffectingFuncImpl {
261    /// The name of the function.
262    pub name: &'static str,
263    /// The OID of the function.
264    pub oid: u32,
265    /// The parameter types for the function.
266    pub param_types: &'static [SqlScalarType],
267    /// The return type of the function.
268    pub return_type: SqlColumnType,
269    /// A function that will produce a `SideEffectingFunc` given arguments
270    /// that have been evaluated to `Datum`s.
271    pub plan_fn: fn(&[Datum]) -> SideEffectingFunc,
272}
273
274/// A map of the side-effecting functions in the `pg_catalog` schema, keyed by
275/// OID.
276pub static PG_CATALOG_SEF_BUILTINS: LazyLock<BTreeMap<u32, SideEffectingFuncImpl>> =
277    LazyLock::new(|| {
278        [PG_CANCEL_BACKEND]
279            .into_iter()
280            .map(|f| (f.oid, f))
281            .collect()
282    });
283
284// Implementations of each side-effecting function follow.
285//
286// If you add a new side-effecting function, be sure to add it to the map above.
287
288const PG_CANCEL_BACKEND: SideEffectingFuncImpl = SideEffectingFuncImpl {
289    name: "pg_cancel_backend",
290    oid: 2171,
291    param_types: &[SqlScalarType::Int32],
292    // Like in PostgreSQL, the function returns `NULL` when its argument is
293    // `NULL`, so the output column is nullable.
294    return_type: SqlScalarType::Bool.nullable(true),
295    plan_fn: |datums| -> SideEffectingFunc {
296        let connection_id = match datums[0] {
297            Datum::Null => None,
298            datum => Some(u32::reinterpret_cast(datum.unwrap_int32())),
299        };
300        SideEffectingFunc::PgCancelBackend { connection_id }
301    },
302};