Skip to main content

mz_testdrive/action/sql_server/
execute.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10use std::time::Duration;
11
12use anyhow::{Context, anyhow};
13use mz_ore::str::StrExt;
14
15use crate::action::{ControlFlow, State};
16use crate::parser::BuiltinCommand;
17
18/// Check if an error is a transient SQL Server error that should be retried.
19///
20/// Covers:
21/// - Deadlock victim (error 1205)
22/// - SQL Server Agent still starting (error 14258 inside 22836/22832) — the
23///   Agent is needed for CDC job creation and may not be ready even though the
24///   healthcheck (`SELECT 1`) already passes.
25/// - Database not yet available during startup (error 904)
26fn is_retryable_error(err: &anyhow::Error) -> bool {
27    // Use alternate Display format `{:#}` to get the full anyhow error chain,
28    // not just the outermost context message.
29    let msg = format!("{:#}", err);
30    (msg.contains("1205") && msg.contains("deadlock"))
31        || msg.contains("SQLServerAgent is starting")
32        || msg.contains("cannot be autostarted during server shutdown or startup")
33}
34
35/// Maximum number of retries for transient errors.
36const MAX_RETRIES: usize = 20;
37
38/// Fixed backoff duration between retries.
39const RETRY_BACKOFF: Duration = Duration::from_millis(100);
40
41async fn execute_with_retry(
42    client: &mut mz_sql_server_util::Client,
43    query: &str,
44) -> Result<(), anyhow::Error> {
45    for attempt in 0..=MAX_RETRIES {
46        match client
47            .simple_query(query.to_string())
48            .await
49            .context("executing SQL Server query")
50        {
51            Ok(_) => return Ok(()),
52            Err(err) if is_retryable_error(&err) && attempt < MAX_RETRIES => {
53                println!(
54                    ">> transient error (attempt {}/{}), retrying after {:?}: {:#}",
55                    attempt + 1,
56                    MAX_RETRIES,
57                    RETRY_BACKOFF,
58                    err,
59                );
60                tokio::time::sleep(RETRY_BACKOFF).await;
61            }
62            Err(err) => return Err(err),
63        }
64    }
65    unreachable!()
66}
67
68pub async fn run_execute(
69    mut cmd: BuiltinCommand,
70    state: &mut State,
71) -> Result<ControlFlow, anyhow::Error> {
72    let name = cmd.args.string("name")?;
73    let split_lines = cmd.args.opt_bool("split-lines")?.unwrap_or(true);
74    // When set, wraps the SQL in a Transaction and then drops it without
75    // calling commit or rollback. Used to test that Transaction::drop sends
76    // ROLLBACK correctly.
77    let abandon_txn = cmd.args.opt_bool("abandon-txn")?.unwrap_or(false);
78    cmd.args.done()?;
79
80    let client = state
81        .sql_server_clients
82        .get_mut(&name)
83        .ok_or_else(|| anyhow!("connection {} not found", name.quoted()))?;
84
85    if abandon_txn {
86        let mut txn = client
87            .transaction()
88            .await
89            .context("begin transaction for abandon-txn")?;
90        if split_lines {
91            for query in &cmd.input {
92                println!(">> (abandon-txn) {}", query);
93                txn.simple_query(query.to_string())
94                    .await
95                    .context("executing SQL Server query in transaction")?;
96            }
97        } else {
98            let query = cmd.input.join("\n");
99            println!(">> (abandon-txn) {}", query);
100            txn.simple_query(query)
101                .await
102                .context("executing SQL Server query in transaction")?;
103        }
104        // Transaction dropped here without commit or rollback.
105        // If Drop is correct, a ROLLBACK is sent via the channel.
106    } else {
107        if split_lines {
108            for query in &cmd.input {
109                println!(">> {}", query);
110                execute_with_retry(client, query).await?;
111            }
112        } else {
113            let query = cmd.input.join("\n");
114            println!(">> {}", query);
115            // execute uses prepared statements, which will fail for CREATE FUNCTION/PROCEDURE etc, see
116            // https://github.com/prisma/tiberius/issues/236, so using simple_query instead
117            execute_with_retry(client, &query).await?;
118        }
119    }
120
121    Ok(ControlFlow::Continue)
122}