1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
// Copyright Materialize, Inc. and contributors. All rights reserved.
//
// Use of this software is governed by the Business Source License
// included in the LICENSE file.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0.

//! Canonicalizes MFPs and performs common sub-expression elimination.
//!
//! This transform takes a sequence of Maps, Filters, and Projects and
//! canonicalizes it to a sequence like this:
//! | Filter
//! | Map
//! | Filter
//! | Project
//! The filters before the map are those that can be
//! pushed down through a map according the rules of
//! [crate::projection_pushdown::ProjectionPushdown.push_filters_through_map()].
//! TODO: It would be nice to canonicalize it to just an MFP, but currently
//! putting a filter after instead of before a Map can result in the loss of
//! nullability information.
//!
//! After canonicalizing, this transform looks at the Map-Filter-Project
//! subsequence and identifies common `ScalarExpr` expressions across and within
//! expressions that are arguments to the Map-Filter-Project. It reforms the
//! `Map-Filter-Project` subsequence to build each distinct expression at most
//! once and to re-use expressions instead of re-evaluating them.
//!
//! The re-use policy at the moment is severe and re-uses everything.
//! It may be worth considering relations of this if it results in more
//! busywork and less efficiency, but the wins can be substantial when
//! expressions re-use complex subexpressions.

use crate::TransformArgs;
use expr::canonicalize::canonicalize_predicates;
use expr::MirRelationExpr;

/// Canonicalizes MFPs and performs common sub-expression elimination.
#[derive(Debug)]
pub struct CanonicalizeMfp;

impl crate::Transform for CanonicalizeMfp {
    fn transform(
        &self,
        relation: &mut MirRelationExpr,
        _: TransformArgs,
    ) -> Result<(), crate::TransformError> {
        self.action(relation)
    }
}

impl CanonicalizeMfp {
    fn action(&self, relation: &mut MirRelationExpr) -> Result<(), crate::TransformError> {
        let mut mfp = expr::MapFilterProject::extract_non_errors_from_expr_mut(relation);
        relation.try_visit_mut_children(|e| self.action(e))?;
        mfp.optimize();
        if !mfp.is_identity() {
            let (map, mut filter, project) = mfp.as_map_filter_project();
            if !filter.is_empty() {
                // Push down the predicates that can be pushed down, removing
                // them from the mfp object to be optimized.
                let mut relation_type = relation.typ();
                for expr in map.iter() {
                    relation_type.column_types.push(expr.typ(&relation_type));
                }
                canonicalize_predicates(&mut filter, &relation_type);
                let all_errors = filter.iter().all(|p| p.is_literal_err());
                let (retained, pushdown) = crate::predicate_pushdown::PredicatePushdown::default()
                    .push_filters_through_map(&map, &mut filter, mfp.input_arity, all_errors);
                if !pushdown.is_empty() {
                    *relation = relation.take_dangerous().filter(pushdown);
                }
                mfp = expr::MapFilterProject::new(mfp.input_arity)
                    .map(map)
                    .filter(retained)
                    .project(project);
            }
            mfp.optimize();
            if !mfp.is_identity() {
                let (map, filter, project) = mfp.as_map_filter_project();
                let total_arity = mfp.input_arity + map.len();
                if !map.is_empty() {
                    *relation = relation.take_dangerous().map(map);
                }
                if !filter.is_empty() {
                    *relation = relation.take_dangerous().filter(filter);
                }
                if project.len() != total_arity || !project.iter().enumerate().all(|(i, o)| i == *o)
                {
                    *relation = relation.take_dangerous().project(project);
                }
            }
        }
        Ok(())
    }
}