1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
// Copyright Materialize, Inc. and contributors. All rights reserved.
//
// Use of this software is governed by the Business Source License
// included in the LICENSE file.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0.

//! Defines the [`PropagatedNulls`] attribute.
//!
//! The attribute value is a vector of column references corresponding
//! to the columns of the associated `QueryBox`.
//!
//! If any of the references is `NULL`, the corresponding column will
//! also be `NULL`.

use crate::query_model::attribute::core::{Attribute, AttributeKey};
use crate::query_model::model::{BoxId, BoxScalarExpr, ColumnReference, Model};
use std::collections::HashSet;

#[derive(Debug, PartialEq, Eq, Clone, Hash)]
pub(crate) struct PropagatedNulls;

impl AttributeKey for PropagatedNulls {
    type Value = Vec<HashSet<ColumnReference>>;
}

impl Attribute for PropagatedNulls {
    fn attr_id(&self) -> &'static str {
        "PropagatedNulls"
    }

    fn requires(&self) -> Vec<Box<dyn Attribute>> {
        vec![]
    }

    fn derive(&self, model: &mut Model, box_id: BoxId) {
        let mut r#box = model.get_mut_box(box_id);

        let value = r#box
            .columns
            .iter()
            .map(|x| propagated_nulls(&x.expr))
            .collect::<Vec<_>>();

        // TODO: remove this
        // println!("|box[{}].columns| = {:?}", box_id, r#box.columns.len());
        // println!("attr[{}] = {:?}", box_id, value);

        r#box.attributes.set::<PropagatedNulls>(value);
    }
}

/// Returns all columns that *must* be non-Null for the `expr` to be non-Null.
pub(crate) fn propagated_nulls(expr: &BoxScalarExpr) -> HashSet<ColumnReference> {
    use BoxScalarExpr::*;
    let mut result = HashSet::new();

    expr.try_visit_pre_post(
        &mut |expr| {
            match expr {
                ColumnReference(col) => {
                    result.insert(col.clone());
                    None
                }
                BaseColumn(..) | Literal(..) | CallUnmaterializable(_) => None,
                CallUnary { func, .. } => {
                    if func.propagates_nulls() {
                        None
                    } else {
                        Some(vec![])
                    }
                }
                CallBinary { func, .. } => {
                    if func.propagates_nulls() {
                        None
                    } else {
                        Some(vec![])
                    }
                }
                CallVariadic { func, .. } => {
                    if func.propagates_nulls() {
                        None
                    } else {
                        Some(vec![])
                    }
                }
                // The branches of an if are computed lazily, but the condition is not.
                // However, nulls propagate to the condition are cast to false.
                // Consequently, we currently don't do anything here.
                // TODO: I think we might be able to take use the intersection of the
                // results in the two branches.
                If { .. } => Some(vec![]),
                // TODO the non-null requeriments of an aggregate expression can
                // be pused down to, for example, convert an outer join into an
                // inner join
                Aggregate { .. } => Some(vec![]),
            }
        },
        &mut |_| (),
    );

    result
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::query_model::model::*;
    use crate::query_model::test::util::*;

    #[test]
    fn test_derivation() {
        let mut model = Model::default();
        let g_id = model.make_box(qgm::get(0).into());
        {
            let mut b = model.get_mut_box(g_id);
            b.add_column(exp::base(0, typ::int32(true)));
            b.add_column(exp::base(1, typ::int32(true)));
            b.add_column(exp::base(2, typ::int32(true)));
            b.add_column(exp::base(3, typ::int32(true)));
        }

        let s_id = model.make_box(Select::default().into());
        let q_id = model.make_quantifier(QuantifierType::FOREACH, g_id, s_id);
        {
            let mut b = model.get_mut_box(s_id);
            // C0: (#0 - #1) + (#2 - #3)
            b.add_column(exp::add(
                exp::sub(exp::cref(q_id, 0), exp::cref(q_id, 1)),
                exp::sub(exp::cref(q_id, 2), exp::cref(q_id, 3)),
            ));
            // C1: (#0 > #1) || (#2 > #3)
            b.add_column(exp::or(
                exp::gt(exp::cref(q_id, 0), exp::cref(q_id, 1)),
                exp::gt(exp::cref(q_id, 2), exp::cref(q_id, 3)),
            ));
            // C2: (#0 > #1) && isnull(#1)
            b.add_column(exp::and(
                exp::gt(exp::cref(q_id, 0), exp::cref(q_id, 1)),
                exp::not(exp::isnull(exp::cref(q_id, 1))),
            ));
        }

        PropagatedNulls.derive(&mut model, s_id);

        {
            let s_box = model.get_box(s_id);

            let act_value = s_box.attributes.get::<PropagatedNulls>();
            let exp_value = &vec![
                HashSet::from([cref(q_id, 0), cref(q_id, 1), cref(q_id, 2), cref(q_id, 3)]),
                HashSet::from([]),
                HashSet::from([]),
            ];

            assert_eq!(act_value, exp_value);
        }
    }
}