mz_adapter/
client.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::borrow::Cow;
11use std::collections::BTreeMap;
12use std::fmt::{Debug, Display, Formatter};
13use std::future::Future;
14use std::pin::{self, Pin};
15use std::sync::Arc;
16use std::time::{Duration, Instant};
17
18use anyhow::bail;
19use chrono::{DateTime, Utc};
20use derivative::Derivative;
21use futures::{Stream, StreamExt};
22use itertools::Itertools;
23use mz_adapter_types::connection::{ConnectionId, ConnectionIdType};
24use mz_auth::password::Password;
25use mz_build_info::BuildInfo;
26use mz_compute_types::ComputeInstanceId;
27use mz_ore::channel::OneshotReceiverExt;
28use mz_ore::collections::CollectionExt;
29use mz_ore::id_gen::{IdAllocator, IdAllocatorInnerBitSet, MAX_ORG_ID, org_id_conn_bits};
30use mz_ore::instrument;
31use mz_ore::now::{EpochMillis, NowFn, to_datetime};
32use mz_ore::result::ResultExt;
33use mz_ore::task::AbortOnDropHandle;
34use mz_ore::thread::JoinOnDropHandle;
35use mz_ore::tracing::OpenTelemetryContext;
36use mz_repr::{CatalogItemId, ColumnIndex, Row, SqlScalarType};
37use mz_sql::ast::{Raw, Statement};
38use mz_sql::catalog::{EnvironmentId, SessionCatalog};
39use mz_sql::session::hint::ApplicationNameHint;
40use mz_sql::session::metadata::SessionMetadata;
41use mz_sql::session::user::SUPPORT_USER;
42use mz_sql::session::vars::{
43    CLUSTER, ENABLE_FRONTEND_PEEK_SEQUENCING, OwnedVarInput, SystemVars, Var,
44};
45use mz_sql_parser::parser::{ParserStatementError, StatementParseResult};
46use prometheus::Histogram;
47use serde_json::json;
48use tokio::sync::{mpsc, oneshot};
49use tracing::{debug, error};
50use uuid::Uuid;
51
52use crate::catalog::Catalog;
53use crate::command::{
54    AuthResponse, CatalogDump, CatalogSnapshot, Command, ExecuteResponse, Response,
55    SASLChallengeResponse, SASLVerifyProofResponse,
56};
57use crate::coord::{Coordinator, ExecuteContextExtra};
58use crate::error::AdapterError;
59use crate::metrics::Metrics;
60use crate::optimize::dataflows::{EvalTime, ExprPrepStyle};
61use crate::optimize::{self, Optimize};
62use crate::session::{
63    EndTransactionAction, PreparedStatement, Session, SessionConfig, StateRevision, TransactionId,
64};
65use crate::statement_logging::{StatementEndedExecutionReason, StatementExecutionStrategy};
66use crate::telemetry::{self, EventDetails, SegmentClientExt, StatementFailureType};
67use crate::webhook::AppendWebhookResponse;
68use crate::{AdapterNotice, AppendWebhookError, PeekClient, PeekResponseUnary, StartupResponse};
69
70/// A handle to a running coordinator.
71///
72/// The coordinator runs on its own thread. Dropping the handle will wait for
73/// the coordinator's thread to exit, which will only occur after all
74/// outstanding [`Client`]s for the coordinator have dropped.
75pub struct Handle {
76    pub(crate) session_id: Uuid,
77    pub(crate) start_instant: Instant,
78    pub(crate) _thread: JoinOnDropHandle<()>,
79}
80
81impl Handle {
82    /// Returns the session ID associated with this coordinator.
83    ///
84    /// The session ID is generated on coordinator boot. It lasts for the
85    /// lifetime of the coordinator. Restarting the coordinator will result
86    /// in a new session ID.
87    pub fn session_id(&self) -> Uuid {
88        self.session_id
89    }
90
91    /// Returns the instant at which the coordinator booted.
92    pub fn start_instant(&self) -> Instant {
93        self.start_instant
94    }
95}
96
97/// A coordinator client.
98///
99/// A coordinator client is a simple handle to a communication channel with the
100/// coordinator. It can be cheaply cloned.
101///
102/// Clients keep the coordinator alive. The coordinator will not exit until all
103/// outstanding clients have dropped.
104#[derive(Debug, Clone)]
105pub struct Client {
106    build_info: &'static BuildInfo,
107    inner_cmd_tx: mpsc::UnboundedSender<(OpenTelemetryContext, Command)>,
108    id_alloc: IdAllocator<IdAllocatorInnerBitSet>,
109    now: NowFn,
110    metrics: Metrics,
111    environment_id: EnvironmentId,
112    segment_client: Option<mz_segment::Client>,
113}
114
115impl Client {
116    pub(crate) fn new(
117        build_info: &'static BuildInfo,
118        cmd_tx: mpsc::UnboundedSender<(OpenTelemetryContext, Command)>,
119        metrics: Metrics,
120        now: NowFn,
121        environment_id: EnvironmentId,
122        segment_client: Option<mz_segment::Client>,
123    ) -> Client {
124        // Connection ids are 32 bits and have 3 parts.
125        // 1. MSB bit is always 0 because these are interpreted as an i32, and it is possible some
126        //    driver will not handle a negative id since postgres has never produced one because it
127        //    uses process ids.
128        // 2. Next 12 bits are the lower 12 bits of the org id. This allows balancerd to route
129        //    incoming cancel messages to a subset of the environments.
130        // 3. Last 19 bits are random.
131        let env_lower = org_id_conn_bits(&environment_id.organization_id());
132        Client {
133            build_info,
134            inner_cmd_tx: cmd_tx,
135            id_alloc: IdAllocator::new(1, MAX_ORG_ID, env_lower),
136            now,
137            metrics,
138            environment_id,
139            segment_client,
140        }
141    }
142
143    /// Allocates a client for an incoming connection.
144    pub fn new_conn_id(&self) -> Result<ConnectionId, AdapterError> {
145        self.id_alloc.alloc().ok_or(AdapterError::IdExhaustionError)
146    }
147
148    /// Creates a new session associated with this client for the given user.
149    ///
150    /// It is the caller's responsibility to have authenticated the user.
151    pub fn new_session(&self, config: SessionConfig) -> Session {
152        // We use the system clock to determine when a session connected to Materialize. This is not
153        // intended to be 100% accurate and correct, so we don't burden the timestamp oracle with
154        // generating a more correct timestamp.
155        Session::new(self.build_info, config, self.metrics().session_metrics())
156    }
157
158    /// Preforms an authentication check for the given user.
159    pub async fn authenticate(
160        &self,
161        user: &String,
162        password: &Password,
163    ) -> Result<AuthResponse, AdapterError> {
164        let (tx, rx) = oneshot::channel();
165        self.send(Command::AuthenticatePassword {
166            role_name: user.to_string(),
167            password: Some(password.clone()),
168            tx,
169        });
170        let response = rx.await.expect("sender dropped")?;
171        Ok(response)
172    }
173
174    pub async fn generate_sasl_challenge(
175        &self,
176        user: &String,
177        client_nonce: &String,
178    ) -> Result<SASLChallengeResponse, AdapterError> {
179        let (tx, rx) = oneshot::channel();
180        self.send(Command::AuthenticateGetSASLChallenge {
181            role_name: user.to_string(),
182            nonce: client_nonce.to_string(),
183            tx,
184        });
185        let response = rx.await.expect("sender dropped")?;
186        Ok(response)
187    }
188
189    pub async fn verify_sasl_proof(
190        &self,
191        user: &String,
192        proof: &String,
193        nonce: &String,
194        mock_hash: &String,
195    ) -> Result<SASLVerifyProofResponse, AdapterError> {
196        let (tx, rx) = oneshot::channel();
197        self.send(Command::AuthenticateVerifySASLProof {
198            role_name: user.to_string(),
199            proof: proof.to_string(),
200            auth_message: nonce.to_string(),
201            mock_hash: mock_hash.to_string(),
202            tx,
203        });
204        let response = rx.await.expect("sender dropped")?;
205        Ok(response)
206    }
207
208    /// Upgrades this client to a session client.
209    ///
210    /// A session is a connection that has successfully negotiated parameters,
211    /// like the user. Most coordinator operations are available only after
212    /// upgrading a connection to a session.
213    ///
214    /// Returns a new client that is bound to the session and a response
215    /// containing various details about the startup.
216    #[mz_ore::instrument(level = "debug")]
217    pub async fn startup(&self, session: Session) -> Result<SessionClient, AdapterError> {
218        let user = session.user().clone();
219        let conn_id = session.conn_id().clone();
220        let secret_key = session.secret_key();
221        let uuid = session.uuid();
222        let client_ip = session.client_ip();
223        let application_name = session.application_name().into();
224        let notice_tx = session.retain_notice_transmitter();
225
226        let (tx, rx) = oneshot::channel();
227
228        // ~~SPOOKY ZONE~~
229        //
230        // This guard prevents a race where the startup command finishes, but the Future returned
231        // by this function is concurrently dropped, so we never create a `SessionClient` and thus
232        // never cleanup the initialized Session.
233        let rx = rx.with_guard(|_| {
234            self.send(Command::Terminate {
235                conn_id: conn_id.clone(),
236                tx: None,
237            });
238        });
239
240        self.send(Command::Startup {
241            tx,
242            user,
243            conn_id: conn_id.clone(),
244            secret_key,
245            uuid,
246            client_ip: client_ip.copied(),
247            application_name,
248            notice_tx,
249        });
250
251        // When startup fails, no need to call terminate (handle_startup does this). Delay creating
252        // the client until after startup to sidestep the panic in its `Drop` implementation.
253        let response = rx.await.expect("sender dropped")?;
254
255        // Create the client as soon as startup succeeds (before any await points) so its `Drop` can
256        // handle termination.
257        // Build the PeekClient with controller handles returned from startup.
258        let StartupResponse {
259            role_id,
260            write_notify,
261            session_defaults,
262            catalog,
263            storage_collections,
264            transient_id_gen,
265            optimizer_metrics,
266            persist_client,
267        } = response;
268
269        let peek_client = PeekClient::new(
270            self.clone(),
271            storage_collections,
272            transient_id_gen,
273            optimizer_metrics,
274            persist_client,
275        );
276
277        let mut client = SessionClient {
278            inner: Some(self.clone()),
279            session: Some(session),
280            timeouts: Timeout::new(),
281            environment_id: self.environment_id.clone(),
282            segment_client: self.segment_client.clone(),
283            peek_client,
284            enable_frontend_peek_sequencing: false, // initialized below, once we have a ConnCatalog
285        };
286
287        let session = client.session();
288        session.initialize_role_metadata(role_id);
289        let vars_mut = session.vars_mut();
290        for (name, val) in session_defaults {
291            if let Err(err) = vars_mut.set_default(&name, val.borrow()) {
292                // Note: erroring here is unexpected, but we don't want to panic if somehow our
293                // assumptions are wrong.
294                tracing::error!("failed to set peristed default, {err:?}");
295            }
296        }
297        session
298            .vars_mut()
299            .end_transaction(EndTransactionAction::Commit);
300
301        // Stash the future that notifies us of builtin table writes completing, we'll block on
302        // this future before allowing queries from this session against relevant relations.
303        //
304        // Note: We stash the future as opposed to waiting on it here to prevent blocking session
305        // creation on builtin table updates. This improves the latency for session creation and
306        // reduces scheduling load on any dataflows that read from these builtin relations, since
307        // it allows updates to be batched.
308        session.set_builtin_table_updates(write_notify);
309
310        let catalog = catalog.for_session(session);
311
312        let cluster_active = session.vars().cluster().to_string();
313        if session.vars().welcome_message() {
314            let cluster_info = if catalog.resolve_cluster(Some(&cluster_active)).is_err() {
315                format!("{cluster_active} (does not exist)")
316            } else {
317                cluster_active.to_string()
318            };
319
320            // Emit a welcome message, optimized for readability by humans using
321            // interactive tools. If you change the message, make sure that it
322            // formats nicely in both `psql` and the console's SQL shell.
323            session.add_notice(AdapterNotice::Welcome(format!(
324                "connected to Materialize v{}
325  Org ID: {}
326  Region: {}
327  User: {}
328  Cluster: {}
329  Database: {}
330  {}
331  Session UUID: {}
332
333Issue a SQL query to get started. Need help?
334  View documentation: https://materialize.com/s/docs
335  Join our Slack community: https://materialize.com/s/chat
336    ",
337                session.vars().build_info().semver_version(),
338                self.environment_id.organization_id(),
339                self.environment_id.region(),
340                session.vars().user().name,
341                cluster_info,
342                session.vars().database(),
343                match session.vars().search_path() {
344                    [schema] => format!("Schema: {}", schema),
345                    schemas => format!(
346                        "Search path: {}",
347                        schemas.iter().map(|id| id.to_string()).join(", ")
348                    ),
349                },
350                session.uuid(),
351            )));
352        }
353
354        if session.vars().current_object_missing_warnings() {
355            if catalog.active_database().is_none() {
356                let db = session.vars().database().into();
357                session.add_notice(AdapterNotice::UnknownSessionDatabase(db));
358            }
359        }
360
361        // Users stub their toe on their default cluster not existing, so we provide a notice to
362        // help guide them on what do to.
363        let cluster_var = session
364            .vars()
365            .inspect(CLUSTER.name())
366            .expect("cluster should exist");
367        if session.vars().current_object_missing_warnings()
368            && catalog.resolve_cluster(Some(&cluster_active)).is_err()
369        {
370            let cluster_notice = 'notice: {
371                if cluster_var.inspect_session_value().is_some() {
372                    break 'notice Some(AdapterNotice::DefaultClusterDoesNotExist {
373                        name: cluster_active,
374                        kind: "session",
375                        suggested_action: "Pick an extant cluster with SET CLUSTER = name. Run SHOW CLUSTERS to see available clusters.".into(),
376                    });
377                }
378
379                let role_default = catalog.get_role(catalog.active_role_id());
380                let role_cluster = match role_default.vars().get(CLUSTER.name()) {
381                    Some(OwnedVarInput::Flat(name)) => Some(name),
382                    None => None,
383                    // This is unexpected!
384                    Some(v @ OwnedVarInput::SqlSet(_)) => {
385                        tracing::warn!(?v, "SqlSet found for cluster Role Default");
386                        break 'notice None;
387                    }
388                };
389
390                let alter_role = "with `ALTER ROLE <role> SET cluster TO <cluster>;`";
391                match role_cluster {
392                    // If there is no default, suggest a Role default.
393                    None => Some(AdapterNotice::DefaultClusterDoesNotExist {
394                        name: cluster_active,
395                        kind: "system",
396                        suggested_action: format!(
397                            "Set a default cluster for the current role {alter_role}."
398                        ),
399                    }),
400                    // If the default does not exist, suggest to change it.
401                    Some(_) => Some(AdapterNotice::DefaultClusterDoesNotExist {
402                        name: cluster_active,
403                        kind: "role",
404                        suggested_action: format!(
405                            "Change the default cluster for the current role {alter_role}."
406                        ),
407                    }),
408                }
409            };
410
411            if let Some(notice) = cluster_notice {
412                session.add_notice(notice);
413            }
414        }
415
416        client.enable_frontend_peek_sequencing = ENABLE_FRONTEND_PEEK_SEQUENCING
417            .require(catalog.system_vars())
418            .is_ok();
419
420        Ok(client)
421    }
422
423    /// Cancels the query currently running on the specified connection.
424    pub fn cancel_request(&self, conn_id: ConnectionIdType, secret_key: u32) {
425        self.send(Command::CancelRequest {
426            conn_id,
427            secret_key,
428        });
429    }
430
431    /// Executes a single SQL statement that returns rows as the
432    /// `mz_support` user.
433    pub async fn support_execute_one(
434        &self,
435        sql: &str,
436    ) -> Result<Pin<Box<dyn Stream<Item = PeekResponseUnary> + Send>>, anyhow::Error> {
437        // Connect to the coordinator.
438        let conn_id = self.new_conn_id()?;
439        let session = self.new_session(SessionConfig {
440            conn_id,
441            uuid: Uuid::new_v4(),
442            user: SUPPORT_USER.name.clone(),
443            client_ip: None,
444            external_metadata_rx: None,
445            internal_user_metadata: None,
446            helm_chart_version: None,
447        });
448        let mut session_client = self.startup(session).await?;
449
450        // Parse the SQL statement.
451        let stmts = mz_sql::parse::parse(sql)?;
452        if stmts.len() != 1 {
453            bail!("must supply exactly one query");
454        }
455        let StatementParseResult { ast: stmt, sql } = stmts.into_element();
456
457        const EMPTY_PORTAL: &str = "";
458        session_client.start_transaction(Some(1))?;
459        session_client
460            .declare(EMPTY_PORTAL.into(), stmt, sql.to_string())
461            .await?;
462
463        match session_client
464            .execute(EMPTY_PORTAL.into(), futures::future::pending(), None)
465            .await?
466        {
467            (ExecuteResponse::SendingRowsStreaming { mut rows, .. }, _) => {
468                // We have to only drop the session client _after_ we read the
469                // result. Otherwise the peek will get cancelled right when we
470                // drop the session client. So we wrap it up in an extra stream
471                // like this, which owns the client and can return it.
472                let owning_response_stream = async_stream::stream! {
473                    while let Some(rows) = rows.next().await {
474                        yield rows;
475                    }
476                    drop(session_client);
477                };
478                Ok(Box::pin(owning_response_stream))
479            }
480            r => bail!("unsupported response type: {r:?}"),
481        }
482    }
483
484    /// Returns the metrics associated with the adapter layer.
485    pub fn metrics(&self) -> &Metrics {
486        &self.metrics
487    }
488
489    /// The current time according to the [`Client`].
490    pub fn now(&self) -> DateTime<Utc> {
491        to_datetime((self.now)())
492    }
493
494    /// Get a metadata and a channel that can be used to append to a webhook source.
495    pub async fn get_webhook_appender(
496        &self,
497        database: String,
498        schema: String,
499        name: String,
500    ) -> Result<AppendWebhookResponse, AppendWebhookError> {
501        let (tx, rx) = oneshot::channel();
502
503        // Send our request.
504        self.send(Command::GetWebhook {
505            database,
506            schema,
507            name,
508            tx,
509        });
510
511        // Using our one shot channel to get the result, returning an error if the sender dropped.
512        let response = rx
513            .await
514            .map_err(|_| anyhow::anyhow!("failed to receive webhook response"))?;
515
516        response
517    }
518
519    /// Gets the current value of all system variables.
520    pub async fn get_system_vars(&self) -> SystemVars {
521        let (tx, rx) = oneshot::channel();
522        self.send(Command::GetSystemVars { tx });
523        rx.await.expect("coordinator unexpectedly gone")
524    }
525
526    #[instrument(level = "debug")]
527    pub(crate) fn send(&self, cmd: Command) {
528        self.inner_cmd_tx
529            .send((OpenTelemetryContext::obtain(), cmd))
530            .expect("coordinator unexpectedly gone");
531    }
532}
533
534/// A coordinator client that is bound to a connection.
535///
536/// See also [`Client`].
537pub struct SessionClient {
538    // Invariant: inner may only be `None` after the session has been terminated.
539    // Once the session is terminated, no communication to the Coordinator
540    // should be attempted.
541    inner: Option<Client>,
542    // Invariant: session may only be `None` during a method call. Every public
543    // method must ensure that `Session` is `Some` before it returns.
544    session: Option<Session>,
545    timeouts: Timeout,
546    segment_client: Option<mz_segment::Client>,
547    environment_id: EnvironmentId,
548    /// Client for frontend peek sequencing; populated at connection startup.
549    peek_client: PeekClient,
550    /// Whether frontend peek sequencing is enabled; initialized at connection startup.
551    // TODO(peek-seq): Currently, this is initialized only at session startup. We'll be able to
552    // check the actual feature flag value at every peek (without a Coordinator call) once we'll
553    // always have a catalog snapshot at hand.
554    pub enable_frontend_peek_sequencing: bool,
555}
556
557impl SessionClient {
558    /// Parses a SQL expression, reporting failures as a telemetry event if
559    /// possible.
560    pub fn parse<'a>(
561        &self,
562        sql: &'a str,
563    ) -> Result<Result<Vec<StatementParseResult<'a>>, ParserStatementError>, String> {
564        match mz_sql::parse::parse_with_limit(sql) {
565            Ok(Err(e)) => {
566                self.track_statement_parse_failure(&e);
567                Ok(Err(e))
568            }
569            r => r,
570        }
571    }
572
573    fn track_statement_parse_failure(&self, parse_error: &ParserStatementError) {
574        let session = self.session.as_ref().expect("session invariant violated");
575        let Some(user_id) = session.user().external_metadata.as_ref().map(|m| m.user_id) else {
576            return;
577        };
578        let Some(segment_client) = &self.segment_client else {
579            return;
580        };
581        let Some(statement_kind) = parse_error.statement else {
582            return;
583        };
584        let Some((action, object_type)) = telemetry::analyze_audited_statement(statement_kind)
585        else {
586            return;
587        };
588        let event_type = StatementFailureType::ParseFailure;
589        let event_name = format!(
590            "{} {} {}",
591            object_type.as_title_case(),
592            action.as_title_case(),
593            event_type.as_title_case(),
594        );
595        segment_client.environment_track(
596            &self.environment_id,
597            event_name,
598            json!({
599                "statement_kind": statement_kind,
600                "error": &parse_error.error,
601            }),
602            EventDetails {
603                user_id: Some(user_id),
604                application_name: Some(session.application_name()),
605                ..Default::default()
606            },
607        );
608    }
609
610    // Verify and return the named prepared statement. We need to verify each use
611    // to make sure the prepared statement is still safe to use.
612    pub async fn get_prepared_statement(
613        &mut self,
614        name: &str,
615    ) -> Result<&PreparedStatement, AdapterError> {
616        let catalog = self.catalog_snapshot("get_prepared_statement").await;
617        Coordinator::verify_prepared_statement(&catalog, self.session(), name)?;
618        Ok(self
619            .session()
620            .get_prepared_statement_unverified(name)
621            .expect("must exist"))
622    }
623
624    /// Saves the parsed statement as a prepared statement.
625    ///
626    /// The prepared statement is saved in the connection's [`crate::session::Session`]
627    /// under the specified name.
628    pub async fn prepare(
629        &mut self,
630        name: String,
631        stmt: Option<Statement<Raw>>,
632        sql: String,
633        param_types: Vec<Option<SqlScalarType>>,
634    ) -> Result<(), AdapterError> {
635        let catalog = self.catalog_snapshot("prepare").await;
636
637        // Note: This failpoint is used to simulate a request outliving the external connection
638        // that made it.
639        let mut async_pause = false;
640        (|| {
641            fail::fail_point!("async_prepare", |val| {
642                async_pause = val.map_or(false, |val| val.parse().unwrap_or(false))
643            });
644        })();
645        if async_pause {
646            tokio::time::sleep(Duration::from_secs(1)).await;
647        };
648
649        let desc = Coordinator::describe(&catalog, self.session(), stmt.clone(), param_types)?;
650        let now = self.now();
651        let state_revision = StateRevision {
652            catalog_revision: catalog.transient_revision(),
653            session_state_revision: self.session().state_revision(),
654        };
655        self.session()
656            .set_prepared_statement(name, stmt, sql, desc, state_revision, now);
657        Ok(())
658    }
659
660    /// Binds a statement to a portal.
661    #[mz_ore::instrument(level = "debug")]
662    pub async fn declare(
663        &mut self,
664        name: String,
665        stmt: Statement<Raw>,
666        sql: String,
667    ) -> Result<(), AdapterError> {
668        let catalog = self.catalog_snapshot("declare").await;
669        let param_types = vec![];
670        let desc =
671            Coordinator::describe(&catalog, self.session(), Some(stmt.clone()), param_types)?;
672        let params = vec![];
673        let result_formats = vec![mz_pgwire_common::Format::Text; desc.arity()];
674        let now = self.now();
675        let logging = self.session().mint_logging(sql, Some(&stmt), now);
676        let state_revision = StateRevision {
677            catalog_revision: catalog.transient_revision(),
678            session_state_revision: self.session().state_revision(),
679        };
680        self.session().set_portal(
681            name,
682            desc,
683            Some(stmt),
684            logging,
685            params,
686            result_formats,
687            state_revision,
688        )?;
689        Ok(())
690    }
691
692    /// Executes a previously-bound portal.
693    ///
694    /// Note: the provided `cancel_future` must be cancel-safe as it's polled in a `select!` loop.
695    #[mz_ore::instrument(level = "debug")]
696    pub async fn execute(
697        &mut self,
698        portal_name: String,
699        cancel_future: impl Future<Output = std::io::Error> + Send,
700        outer_ctx_extra: Option<ExecuteContextExtra>,
701    ) -> Result<(ExecuteResponse, Instant), AdapterError> {
702        let execute_started = Instant::now();
703
704        // Attempt peek sequencing in the session task.
705        // If unsupported, fall back to the Coordinator path.
706        // TODO(peek-seq): wire up cancel_future
707        if let Some(resp) = self.try_frontend_peek(&portal_name).await? {
708            debug!("frontend peek succeeded");
709            return Ok((resp, execute_started));
710        } else {
711            debug!("frontend peek did not happen");
712        }
713
714        let response = self
715            .send_with_cancel(
716                |tx, session| Command::Execute {
717                    portal_name,
718                    session,
719                    tx,
720                    outer_ctx_extra,
721                },
722                cancel_future,
723            )
724            .await?;
725        Ok((response, execute_started))
726    }
727
728    fn now(&self) -> EpochMillis {
729        (self.inner().now)()
730    }
731
732    fn now_datetime(&self) -> DateTime<Utc> {
733        to_datetime(self.now())
734    }
735
736    /// Starts a transaction based on implicit:
737    /// - `None`: InTransaction
738    /// - `Some(1)`: Started
739    /// - `Some(n > 1)`: InTransactionImplicit
740    /// - `Some(0)`: no change
741    pub fn start_transaction(&mut self, implicit: Option<usize>) -> Result<(), AdapterError> {
742        let now = self.now_datetime();
743        let session = self.session.as_mut().expect("session invariant violated");
744        let result = match implicit {
745            None => session.start_transaction(now, None, None),
746            Some(stmts) => {
747                session.start_transaction_implicit(now, stmts);
748                Ok(())
749            }
750        };
751        result
752    }
753
754    /// Ends a transaction. Even if an error is returned, guarantees that the transaction in the
755    /// session and Coordinator has cleared its state.
756    #[instrument(level = "debug")]
757    pub async fn end_transaction(
758        &mut self,
759        action: EndTransactionAction,
760    ) -> Result<ExecuteResponse, AdapterError> {
761        let res = self
762            .send(|tx, session| Command::Commit {
763                action,
764                session,
765                tx,
766            })
767            .await;
768        // Commit isn't guaranteed to set the session's state to anything specific, so clear it
769        // here. It's safe to ignore the returned `TransactionStatus` because that doesn't contain
770        // any data that the Coordinator must act on for correctness.
771        let _ = self.session().clear_transaction();
772        res
773    }
774
775    /// Fails a transaction.
776    pub fn fail_transaction(&mut self) {
777        let session = self.session.take().expect("session invariant violated");
778        let session = session.fail_transaction();
779        self.session = Some(session);
780    }
781
782    /// Fetches the catalog.
783    #[instrument(level = "debug")]
784    pub async fn catalog_snapshot(&self, context: &str) -> Arc<Catalog> {
785        let start = std::time::Instant::now();
786        let CatalogSnapshot { catalog } = self
787            .send_without_session(|tx| Command::CatalogSnapshot { tx })
788            .await;
789        self.inner()
790            .metrics()
791            .catalog_snapshot_seconds
792            .with_label_values(&[context])
793            .observe(start.elapsed().as_secs_f64());
794        catalog
795    }
796
797    /// Dumps the catalog to a JSON string.
798    ///
799    /// No authorization is performed, so access to this function must be limited to internal
800    /// servers or superusers.
801    pub async fn dump_catalog(&self) -> Result<CatalogDump, AdapterError> {
802        let catalog = self.catalog_snapshot("dump_catalog").await;
803        catalog.dump().map_err(AdapterError::from)
804    }
805
806    /// Checks the catalog for internal consistency, returning a JSON object describing the
807    /// inconsistencies, if there are any.
808    ///
809    /// No authorization is performed, so access to this function must be limited to internal
810    /// servers or superusers.
811    pub async fn check_catalog(&self) -> Result<(), serde_json::Value> {
812        let catalog = self.catalog_snapshot("check_catalog").await;
813        catalog.check_consistency()
814    }
815
816    /// Checks the coordinator for internal consistency, returning a JSON object describing the
817    /// inconsistencies, if there are any. This is a superset of checks that check_catalog performs,
818    ///
819    /// No authorization is performed, so access to this function must be limited to internal
820    /// servers or superusers.
821    pub async fn check_coordinator(&self) -> Result<(), serde_json::Value> {
822        self.send_without_session(|tx| Command::CheckConsistency { tx })
823            .await
824            .map_err(|inconsistencies| {
825                serde_json::to_value(inconsistencies).unwrap_or_else(|_| {
826                    serde_json::Value::String("failed to serialize inconsistencies".to_string())
827                })
828            })
829    }
830
831    pub async fn dump_coordinator_state(&self) -> Result<serde_json::Value, anyhow::Error> {
832        self.send_without_session(|tx| Command::Dump { tx }).await
833    }
834
835    /// Tells the coordinator a statement has finished execution, in the cases
836    /// where we have no other reason to communicate with the coordinator.
837    pub fn retire_execute(&self, data: ExecuteContextExtra, reason: StatementEndedExecutionReason) {
838        if !data.is_trivial() {
839            let cmd = Command::RetireExecute { data, reason };
840            self.inner().send(cmd);
841        }
842    }
843
844    /// Inserts a set of rows into the given table.
845    ///
846    /// The rows only contain the columns positions in `columns`, so they
847    /// must be re-encoded for adding the default values for the remaining
848    /// ones.
849    pub async fn insert_rows(
850        &mut self,
851        target_id: CatalogItemId,
852        target_name: String,
853        columns: Vec<ColumnIndex>,
854        rows: Vec<Row>,
855        ctx_extra: ExecuteContextExtra,
856    ) -> Result<ExecuteResponse, AdapterError> {
857        // TODO: Remove this clone once we always have the session. It's currently needed because
858        // self.session returns a mut ref, so we can't call it twice.
859        let pcx = self.session().pcx().clone();
860
861        let session_meta = self.session().meta();
862
863        let catalog = self.catalog_snapshot("insert_rows").await;
864        let conn_catalog = catalog.for_session(self.session());
865        let catalog_state = conn_catalog.state();
866
867        // Collect optimizer parameters.
868        let optimizer_config = optimize::OptimizerConfig::from(conn_catalog.system_vars());
869        let prep = ExprPrepStyle::OneShot {
870            logical_time: EvalTime::NotAvailable,
871            session: &session_meta,
872            catalog_state,
873        };
874        let mut optimizer =
875            optimize::view::Optimizer::new_with_prep_no_limit(optimizer_config.clone(), None, prep);
876
877        let result: Result<_, AdapterError> = mz_sql::plan::plan_copy_from(
878            &pcx,
879            &conn_catalog,
880            target_id,
881            target_name,
882            columns,
883            rows,
884        )
885        .err_into()
886        .and_then(|values| optimizer.optimize(values).err_into())
887        .and_then(|values| {
888            // Copied rows must always be constants.
889            Coordinator::insert_constant(&catalog, self.session(), target_id, values.into_inner())
890        });
891        self.retire_execute(ctx_extra, (&result).into());
892        result
893    }
894
895    /// Gets the current value of all system variables.
896    pub async fn get_system_vars(&self) -> SystemVars {
897        self.inner().get_system_vars().await
898    }
899
900    /// Updates the specified system variables to the specified values.
901    pub async fn set_system_vars(
902        &mut self,
903        vars: BTreeMap<String, String>,
904    ) -> Result<(), AdapterError> {
905        let conn_id = self.session().conn_id().clone();
906        self.send_without_session(|tx| Command::SetSystemVars { vars, conn_id, tx })
907            .await
908    }
909
910    /// Terminates the client session.
911    pub async fn terminate(&mut self) {
912        let conn_id = self.session().conn_id().clone();
913        let res = self
914            .send_without_session(|tx| Command::Terminate {
915                conn_id,
916                tx: Some(tx),
917            })
918            .await;
919        if let Err(e) = res {
920            // Nothing we can do to handle a failed terminate so we just log and ignore it.
921            error!("Unable to terminate session: {e:?}");
922        }
923        // Prevent any communication with Coordinator after session is terminated.
924        self.inner = None;
925    }
926
927    /// Returns a mutable reference to the session bound to this client.
928    pub fn session(&mut self) -> &mut Session {
929        self.session.as_mut().expect("session invariant violated")
930    }
931
932    /// Returns a reference to the inner client.
933    pub fn inner(&self) -> &Client {
934        self.inner.as_ref().expect("inner invariant violated")
935    }
936
937    async fn send_without_session<T, F>(&self, f: F) -> T
938    where
939        F: FnOnce(oneshot::Sender<T>) -> Command,
940    {
941        let (tx, rx) = oneshot::channel();
942        self.inner().send(f(tx));
943        rx.await.expect("sender dropped")
944    }
945
946    #[instrument(level = "debug")]
947    async fn send<T, F>(&mut self, f: F) -> Result<T, AdapterError>
948    where
949        F: FnOnce(oneshot::Sender<Response<T>>, Session) -> Command,
950    {
951        self.send_with_cancel(f, futures::future::pending()).await
952    }
953
954    /// Send a [`Command`] to the Coordinator, with the ability to cancel the command.
955    ///
956    /// Note: the provided `cancel_future` must be cancel-safe as it's polled in a `select!` loop.
957    #[instrument(level = "debug")]
958    async fn send_with_cancel<T, F>(
959        &mut self,
960        f: F,
961        cancel_future: impl Future<Output = std::io::Error> + Send,
962    ) -> Result<T, AdapterError>
963    where
964        F: FnOnce(oneshot::Sender<Response<T>>, Session) -> Command,
965    {
966        let session = self.session.take().expect("session invariant violated");
967        let mut typ = None;
968        let application_name = session.application_name();
969        let name_hint = ApplicationNameHint::from_str(application_name);
970        let conn_id = session.conn_id().clone();
971        let (tx, rx) = oneshot::channel();
972
973        // Destructure self so we can hold a mutable reference to the inner client and session at
974        // the same time.
975        let Self {
976            inner: inner_client,
977            session: client_session,
978            ..
979        } = self;
980
981        // TODO(parkmycar): Leaking this invariant here doesn't feel great, but calling
982        // `self.client()` doesn't work because then Rust takes a borrow on the entirity of self.
983        let inner_client = inner_client.as_ref().expect("inner invariant violated");
984
985        // ~~SPOOKY ZONE~~
986        //
987        // This guard prevents a race where a `Session` is returned on `rx` but never placed
988        // back in `self` because the Future returned by this function is concurrently dropped
989        // with the Coordinator sending a response.
990        let mut guarded_rx = rx.with_guard(|response: Response<_>| {
991            *client_session = Some(response.session);
992        });
993
994        inner_client.send({
995            let cmd = f(tx, session);
996            // Measure the success and error rate of certain commands:
997            // - declare reports success of SQL statement planning
998            // - execute reports success of dataflow execution
999            match cmd {
1000                Command::Execute { .. } => typ = Some("execute"),
1001                Command::GetWebhook { .. } => typ = Some("webhook"),
1002                Command::Startup { .. }
1003                | Command::AuthenticatePassword { .. }
1004                | Command::AuthenticateGetSASLChallenge { .. }
1005                | Command::AuthenticateVerifySASLProof { .. }
1006                | Command::CatalogSnapshot { .. }
1007                | Command::Commit { .. }
1008                | Command::CancelRequest { .. }
1009                | Command::PrivilegedCancelRequest { .. }
1010                | Command::GetSystemVars { .. }
1011                | Command::SetSystemVars { .. }
1012                | Command::Terminate { .. }
1013                | Command::RetireExecute { .. }
1014                | Command::CheckConsistency { .. }
1015                | Command::Dump { .. }
1016                | Command::GetComputeInstanceClient { .. }
1017                | Command::GetOracle { .. }
1018                | Command::DetermineRealTimeRecentTimestamp { .. }
1019                | Command::GetTransactionReadHoldsBundle { .. }
1020                | Command::StoreTransactionReadHolds { .. }
1021                | Command::ExecuteSlowPathPeek { .. }
1022                | Command::ExecuteCopyTo { .. } => {}
1023            };
1024            cmd
1025        });
1026
1027        let mut cancel_future = pin::pin!(cancel_future);
1028        let mut cancelled = false;
1029        loop {
1030            tokio::select! {
1031                res = &mut guarded_rx => {
1032                    // We received a result, so drop our guard to drop our borrows.
1033                    drop(guarded_rx);
1034
1035                    let res = res.expect("sender dropped");
1036                    let status = res.result.is_ok().then_some("success").unwrap_or("error");
1037                    if let Err(err) = res.result.as_ref() {
1038                        if name_hint.should_trace_errors() {
1039                            tracing::warn!(?err, ?name_hint, "adapter response error");
1040                        }
1041                    }
1042
1043                    if let Some(typ) = typ {
1044                        inner_client
1045                            .metrics
1046                            .commands
1047                            .with_label_values(&[typ, status, name_hint.as_str()])
1048                            .inc();
1049                    }
1050                    *client_session = Some(res.session);
1051                    return res.result;
1052                },
1053                _err = &mut cancel_future, if !cancelled => {
1054                    cancelled = true;
1055                    inner_client.send(Command::PrivilegedCancelRequest {
1056                        conn_id: conn_id.clone(),
1057                    });
1058                }
1059            };
1060        }
1061    }
1062
1063    pub fn add_idle_in_transaction_session_timeout(&mut self) {
1064        let session = self.session();
1065        let timeout_dur = session.vars().idle_in_transaction_session_timeout();
1066        if !timeout_dur.is_zero() {
1067            let timeout_dur = timeout_dur.clone();
1068            if let Some(txn) = session.transaction().inner() {
1069                let txn_id = txn.id.clone();
1070                let timeout = TimeoutType::IdleInTransactionSession(txn_id);
1071                self.timeouts.add_timeout(timeout, timeout_dur);
1072            }
1073        }
1074    }
1075
1076    pub fn remove_idle_in_transaction_session_timeout(&mut self) {
1077        let session = self.session();
1078        if let Some(txn) = session.transaction().inner() {
1079            let txn_id = txn.id.clone();
1080            self.timeouts
1081                .remove_timeout(&TimeoutType::IdleInTransactionSession(txn_id));
1082        }
1083    }
1084
1085    /// # Cancel safety
1086    ///
1087    /// This method is cancel safe. If `recv` is used as the event in a
1088    /// `tokio::select!` statement and some other branch
1089    /// completes first, it is guaranteed that no messages were received on this
1090    /// channel.
1091    pub async fn recv_timeout(&mut self) -> Option<TimeoutType> {
1092        self.timeouts.recv().await
1093    }
1094
1095    /// Returns a reference to the PeekClient used for frontend peek sequencing.
1096    pub fn peek_client(&self) -> &PeekClient {
1097        &self.peek_client
1098    }
1099
1100    /// Returns a reference to the PeekClient used for frontend peek sequencing.
1101    pub fn peek_client_mut(&mut self) -> &mut PeekClient {
1102        &mut self.peek_client
1103    }
1104
1105    /// Attempt to sequence a peek from the session task.
1106    ///
1107    /// Returns Some(response) if we handled the peek, or None to fall back to the Coordinator's
1108    /// peek sequencing.
1109    pub(crate) async fn try_frontend_peek(
1110        &mut self,
1111        portal_name: &str,
1112    ) -> Result<Option<ExecuteResponse>, AdapterError> {
1113        if self.enable_frontend_peek_sequencing {
1114            let session = self.session.as_mut().expect("SessionClient invariant");
1115            self.peek_client
1116                .try_frontend_peek_inner(portal_name, session)
1117                .await
1118        } else {
1119            Ok(None)
1120        }
1121    }
1122}
1123
1124impl Drop for SessionClient {
1125    fn drop(&mut self) {
1126        // We may not have a session if this client was dropped while awaiting
1127        // a response. In this case, it is the coordinator's responsibility to
1128        // terminate the session.
1129        if let Some(session) = self.session.take() {
1130            // We may not have a connection to the Coordinator if the session was
1131            // prematurely terminated, for example due to a timeout.
1132            if let Some(inner) = &self.inner {
1133                inner.send(Command::Terminate {
1134                    conn_id: session.conn_id().clone(),
1135                    tx: None,
1136                })
1137            }
1138        }
1139    }
1140}
1141
1142#[derive(Hash, PartialEq, Eq, PartialOrd, Ord, Clone, Debug)]
1143pub enum TimeoutType {
1144    IdleInTransactionSession(TransactionId),
1145}
1146
1147impl Display for TimeoutType {
1148    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1149        match self {
1150            TimeoutType::IdleInTransactionSession(txn_id) => {
1151                writeln!(f, "Idle in transaction session for transaction '{txn_id}'")
1152            }
1153        }
1154    }
1155}
1156
1157impl From<TimeoutType> for AdapterError {
1158    fn from(timeout: TimeoutType) -> Self {
1159        match timeout {
1160            TimeoutType::IdleInTransactionSession(_) => {
1161                AdapterError::IdleInTransactionSessionTimeout
1162            }
1163        }
1164    }
1165}
1166
1167struct Timeout {
1168    tx: mpsc::UnboundedSender<TimeoutType>,
1169    rx: mpsc::UnboundedReceiver<TimeoutType>,
1170    active_timeouts: BTreeMap<TimeoutType, AbortOnDropHandle<()>>,
1171}
1172
1173impl Timeout {
1174    fn new() -> Self {
1175        let (tx, rx) = mpsc::unbounded_channel();
1176        Timeout {
1177            tx,
1178            rx,
1179            active_timeouts: BTreeMap::new(),
1180        }
1181    }
1182
1183    /// # Cancel safety
1184    ///
1185    /// This method is cancel safe. If `recv` is used as the event in a
1186    /// `tokio::select!` statement and some other branch
1187    /// completes first, it is guaranteed that no messages were received on this
1188    /// channel.
1189    ///
1190    /// <https://docs.rs/tokio/latest/tokio/sync/mpsc/struct.UnboundedReceiver.html#cancel-safety>
1191    async fn recv(&mut self) -> Option<TimeoutType> {
1192        self.rx.recv().await
1193    }
1194
1195    fn add_timeout(&mut self, timeout: TimeoutType, duration: Duration) {
1196        let tx = self.tx.clone();
1197        let timeout_key = timeout.clone();
1198        let handle = mz_ore::task::spawn(|| format!("{timeout_key}"), async move {
1199            tokio::time::sleep(duration).await;
1200            let _ = tx.send(timeout);
1201        })
1202        .abort_on_drop();
1203        self.active_timeouts.insert(timeout_key, handle);
1204    }
1205
1206    fn remove_timeout(&mut self, timeout: &TimeoutType) {
1207        self.active_timeouts.remove(timeout);
1208
1209        // Remove the timeout from the rx queue if it exists.
1210        let mut timeouts = Vec::new();
1211        while let Ok(pending_timeout) = self.rx.try_recv() {
1212            if timeout != &pending_timeout {
1213                timeouts.push(pending_timeout);
1214            }
1215        }
1216        for pending_timeout in timeouts {
1217            self.tx.send(pending_timeout).expect("rx is in this struct");
1218        }
1219    }
1220}
1221
1222/// A wrapper around a Stream of PeekResponseUnary that records when it sees the
1223/// first row data in the given histogram. It also keeps track of whether we have already observed
1224/// the end of the underlying stream.
1225#[derive(Derivative)]
1226#[derivative(Debug)]
1227pub struct RecordFirstRowStream {
1228    /// The underlying stream of rows.
1229    #[derivative(Debug = "ignore")]
1230    pub rows: Box<dyn Stream<Item = PeekResponseUnary> + Unpin + Send + Sync>,
1231    /// The Instant when execution started.
1232    pub execute_started: Instant,
1233    /// The histogram where the time since `execute_started` will be recorded when we see the first
1234    /// row.
1235    pub time_to_first_row_seconds: Histogram,
1236    /// Whether we've seen any rows.
1237    pub saw_rows: bool,
1238    /// The Instant when we saw the first row.
1239    pub recorded_first_row_instant: Option<Instant>,
1240    /// Whether we have already observed the end of the underlying stream.
1241    pub no_more_rows: bool,
1242}
1243
1244impl RecordFirstRowStream {
1245    /// Create a new [`RecordFirstRowStream`]
1246    pub fn new(
1247        rows: Box<dyn Stream<Item = PeekResponseUnary> + Unpin + Send + Sync>,
1248        execute_started: Instant,
1249        client: &SessionClient,
1250        instance_id: Option<ComputeInstanceId>,
1251        strategy: Option<StatementExecutionStrategy>,
1252    ) -> Self {
1253        let histogram = Self::histogram(client, instance_id, strategy);
1254        Self {
1255            rows,
1256            execute_started,
1257            time_to_first_row_seconds: histogram,
1258            saw_rows: false,
1259            recorded_first_row_instant: None,
1260            no_more_rows: false,
1261        }
1262    }
1263
1264    fn histogram(
1265        client: &SessionClient,
1266        instance_id: Option<ComputeInstanceId>,
1267        strategy: Option<StatementExecutionStrategy>,
1268    ) -> Histogram {
1269        let isolation_level = *client
1270            .session
1271            .as_ref()
1272            .expect("session invariant")
1273            .vars()
1274            .transaction_isolation();
1275        let instance = match instance_id {
1276            Some(i) => Cow::Owned(i.to_string()),
1277            None => Cow::Borrowed("none"),
1278        };
1279        let strategy = match strategy {
1280            Some(s) => s.name(),
1281            None => "none",
1282        };
1283
1284        client
1285            .inner()
1286            .metrics()
1287            .time_to_first_row_seconds
1288            .with_label_values(&[instance.as_ref(), isolation_level.as_str(), strategy])
1289    }
1290
1291    /// If you want to match [`RecordFirstRowStream`]'s logic but don't need
1292    /// a UnboundedReceiver, you can tell it when to record an observation.
1293    pub fn record(
1294        execute_started: Instant,
1295        client: &SessionClient,
1296        instance_id: Option<ComputeInstanceId>,
1297        strategy: Option<StatementExecutionStrategy>,
1298    ) {
1299        Self::histogram(client, instance_id, strategy)
1300            .observe(execute_started.elapsed().as_secs_f64());
1301    }
1302
1303    pub async fn recv(&mut self) -> Option<PeekResponseUnary> {
1304        let msg = self.rows.next().await;
1305        if !self.saw_rows && matches!(msg, Some(PeekResponseUnary::Rows(_))) {
1306            self.saw_rows = true;
1307            self.time_to_first_row_seconds
1308                .observe(self.execute_started.elapsed().as_secs_f64());
1309            self.recorded_first_row_instant = Some(Instant::now());
1310        }
1311        if msg.is_none() {
1312            self.no_more_rows = true;
1313        }
1314        msg
1315    }
1316}