1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
// Copyright Materialize, Inc. and contributors. All rights reserved.
//
// Use of this software is governed by the Business Source License
// included in the LICENSE file.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0.

//! Methods common to servers listening for TCP connections.

use std::future::Future;
use std::io;
use std::net::SocketAddr;
use std::pin::Pin;
use std::time::Duration;

use async_trait::async_trait;
use futures::stream::{Stream, StreamExt};
use mz_ore::error::ErrorExt;
use mz_ore::task;
use socket2::{SockRef, TcpKeepalive};
use tokio::net::{TcpListener, TcpStream};
use tokio::sync::oneshot;
use tokio_stream::wrappers::TcpListenerStream;
use tracing::{debug, error};

/// TCP keepalive settings. The idle time and interval match CockroachDB [0].
/// The number of retries matches the Linux default.
///
/// [0]: https://github.com/cockroachdb/cockroach/pull/14063
const KEEPALIVE: TcpKeepalive = TcpKeepalive::new()
    .with_time(Duration::from_secs(60))
    .with_interval(Duration::from_secs(60))
    .with_retries(9);

/// A future that handles a connection.
pub type ConnectionHandler = Pin<Box<dyn Future<Output = Result<(), anyhow::Error>> + Send>>;

/// A server handles incoming network connections.
pub trait Server {
    /// Returns the name of the connection handler for use in e.g. log messages.
    const NAME: &'static str;

    /// Handles a single connection.
    fn handle_connection(&self, conn: TcpStream) -> ConnectionHandler;
}

/// A stream of incoming connections.
pub trait ConnectionStream: Stream<Item = io::Result<TcpStream>> + Unpin + Send {}

impl<T> ConnectionStream for T where T: Stream<Item = io::Result<TcpStream>> + Unpin + Send {}

/// A handle to a listener created by [`listen`].
pub struct ListenerHandle {
    local_addr: SocketAddr,
    _trigger: oneshot::Sender<()>,
}

impl ListenerHandle {
    /// Returns the local address to which the listener is bound.
    pub fn local_addr(&self) -> SocketAddr {
        self.local_addr
    }
}

/// Listens for incoming TCP connections on the specified address.
///
/// Returns a handle to the listener and the stream of incoming connections
/// produced by the listener. When the handle is dropped, the listener is
/// closed, and the stream of incoming connections terminates.
pub async fn listen(
    addr: SocketAddr,
) -> Result<(ListenerHandle, Pin<Box<dyn ConnectionStream>>), io::Error> {
    let listener = TcpListener::bind(addr).await?;
    let local_addr = listener.local_addr()?;
    let (trigger, tripwire) = oneshot::channel();
    let handle = ListenerHandle {
        local_addr,
        _trigger: trigger,
    };
    // TODO(benesch): replace `TCPListenerStream`s with `listener.incoming()` if
    // that is restored when the `Stream` trait stabilizes.
    let stream = TcpListenerStream::new(listener).take_until(tripwire);
    Ok((handle, Box::pin(stream)))
}

/// Serves incoming TCP connections from `conns` using `server`.
pub async fn serve<C, S>(mut conns: C, server: S)
where
    C: ConnectionStream,
    S: Server,
{
    let task_name = format!("handle_{}_connection", S::NAME);
    while let Some(conn) = conns.next().await {
        let conn = match conn {
            Ok(conn) => conn,
            Err(err) => {
                error!("error accepting connection: {}", err);
                continue;
            }
        };
        // Set TCP_NODELAY to disable tinygram prevention (Nagle's
        // algorithm), which forces a 40ms delay between each query
        // on linux. According to John Nagle [0], the true problem
        // is delayed acks, but disabling those is a receive-side
        // operation (TCP_QUICKACK), and we can't always control the
        // client. PostgreSQL sets TCP_NODELAY on both sides of its
        // sockets, so it seems sane to just do the same.
        //
        // If set_nodelay fails, it's a programming error, so panic.
        //
        // [0]: https://news.ycombinator.com/item?id=10608356
        conn.set_nodelay(true).expect("set_nodelay failed");
        // Enable TCP keepalives to avoid any idle connection timeouts that may
        // be enforced by networking devices between us and the client. Idle SQL
        // connections are expected--e.g., a `SUBSCRIBE` to a view containing
        // critical alerts will ideally be producing no data most of the time.
        if let Err(e) = SockRef::from(&conn).set_tcp_keepalive(&KEEPALIVE) {
            error!("failed enabling keepalive: {e}");
            continue;
        }
        let fut = server.handle_connection(conn);
        task::spawn(|| &task_name, async {
            if let Err(e) = fut.await {
                debug!(
                    "error handling connection in {}: {}",
                    S::NAME,
                    e.display_with_causes()
                );
            }
        });
    }
}

#[async_trait]
impl Server for mz_pgwire::Server {
    const NAME: &'static str = "pgwire";

    fn handle_connection(&self, conn: TcpStream) -> ConnectionHandler {
        // Using fully-qualified syntax means we won't accidentally call
        // ourselves (i.e., silently infinitely recurse) if the name or type of
        // `mz_pgwire::Server::handle_connection` changes.
        Box::pin(mz_pgwire::Server::handle_connection(self, conn))
    }
}