1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
// Copyright Materialize, Inc. and contributors. All rights reserved.
//
// Use of this software is governed by the Business Source License
// included in the LICENSE file.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0.

use std::collections::{BTreeSet, VecDeque};
use std::str;
use std::time::Instant;

use anyhow::{bail, Context};
use aws_sdk_kinesis::Client as KinesisClient;
use itertools::Itertools;

use crate::action::{ControlFlow, State};
use crate::parser::BuiltinCommand;

pub async fn run_verify(
    mut cmd: BuiltinCommand,
    state: &mut State,
) -> Result<ControlFlow, anyhow::Error> {
    let stream_prefix = cmd.args.string("stream")?;
    cmd.args.done()?;
    let expected_records = cmd.input.into_iter().collect();

    let stream_name = format!("testdrive-{}-{}", stream_prefix, state.seed);

    let mut shard_iterators = get_shard_iterators(&state.kinesis_client, &stream_name).await?;
    let timer = Instant::now();
    let mut records: BTreeSet<String> = BTreeSet::new();
    while let Some(iterator) = shard_iterators.pop_front() {
        if let Some(iterator) = &iterator {
            let output = state
                .kinesis_client
                .get_records()
                .shard_iterator(iterator)
                .send()
                .await
                .context("getting Kinesis records")?;
            for record in output.records.unwrap() {
                records.insert(
                    String::from_utf8(record.data.unwrap().into_inner())
                        .context("converting Kinesis record bytes to string")?,
                );
            }
            match output.millis_behind_latest {
                // Test hack!
                // Assume all records have already been written to the stream. Once you've
                // caught up, you're done with that shard.
                // NOTE: this is not true for real Kinesis streams as data could still be
                // arriving.
                Some(0) => (),
                _ => shard_iterators.push_back(output.next_shard_iterator),
            };
            if timer.elapsed() > state.default_timeout {
                // Unable to read all Kinesis records in the default
                // time allotted -- fail.
                bail!("timeout reading from Kinesis stream: {}", stream_name);
            }
        }
    }

    // For now, we don't guarantee any type of ordering!
    if records != expected_records {
        let missing_records = &expected_records - &records;
        let extra_records = &records - &expected_records;
        bail!(
            "kinesis records did not match:\nmissing:\n{}\nextra:\n{}",
            missing_records.iter().join("\n"),
            extra_records.iter().join("\n")
        );
    }

    Ok(ControlFlow::Continue)
}

async fn get_shard_iterators(
    kinesis_client: &KinesisClient,
    stream_name: &str,
) -> Result<VecDeque<Option<String>>, anyhow::Error> {
    let mut iterators: VecDeque<Option<String>> = VecDeque::new();
    for shard_id in mz_kinesis_util::get_shard_ids(kinesis_client, stream_name)
        .await
        .context("listing Kinesis shards")?
    {
        iterators.push_back(
            mz_kinesis_util::get_shard_iterator(kinesis_client, stream_name, &shard_id)
                .await
                .context("getting Kinesis shard iterator")?,
        );
    }

    Ok(iterators)
}