Skip to main content

mz_transform/
normalize_lets.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Normalize the structure of `Let` and `LetRec` operators in expressions.
11//!
12//! Normalization happens in the context of "scopes", corresponding to
13//! 1. the expression's root and 2. each instance of a `LetRec` AST node.
14//!
15//! Within each scope,
16//! 1. Each expression is normalized to have all `Let` nodes at the root
17//! of the expression, in order of identifier.
18//! 2. Each expression assigns a contiguous block of identifiers.
19//!
20//! The transform may remove some `Let` and `Get` operators, and does not
21//! introduce any new operators.
22//!
23//! The module also publishes the function `renumber_bindings` which can
24//! be used to renumber bindings in an expression starting from a provided
25//! `IdGen`, which is used to prepare distinct expressions for inlining.
26
27use mz_expr::{MirRelationExpr, visit::Visit};
28use mz_ore::assert_none;
29use mz_ore::{id_gen::IdGen, stack::RecursionLimitError};
30use mz_repr::optimize::OptimizerFeatures;
31
32use crate::{TransformCtx, catch_unwind_optimize};
33
34pub use renumbering::renumber_bindings;
35
36/// Normalize `Let` and `LetRec` structure.
37pub fn normalize_lets(
38    expr: &mut MirRelationExpr,
39    features: &OptimizerFeatures,
40) -> Result<(), crate::TransformError> {
41    catch_unwind_optimize(|| NormalizeLets::new(false).action(expr, features))
42}
43
44/// Install replace certain `Get` operators with their `Let` value.
45#[derive(Debug)]
46pub struct NormalizeLets {
47    /// If `true`, inline MFPs around a Get.
48    ///
49    /// We want this value to be true for the NormalizeLets call that comes right
50    /// before [crate::join_implementation::JoinImplementation] runs because
51    /// - JoinImplementation cannot lift MFPs through a Let.
52    /// - JoinImplementation can't extract FilterCharacteristics through a Let.
53    ///
54    /// Generally, though, we prefer to be more conservative in our inlining in
55    /// order to be able to better detect CSEs.
56    pub inline_mfp: bool,
57}
58
59impl NormalizeLets {
60    /// Construct a new [`NormalizeLets`] instance with the given `inline_mfp`.
61    pub fn new(inline_mfp: bool) -> NormalizeLets {
62        NormalizeLets { inline_mfp }
63    }
64}
65
66impl crate::Transform for NormalizeLets {
67    fn name(&self) -> &'static str {
68        "NormalizeLets"
69    }
70
71    #[mz_ore::instrument(
72        target = "optimizer",
73        level = "debug",
74        fields(path.segment = "normalize_lets")
75    )]
76    fn actually_perform_transform(
77        &self,
78        relation: &mut MirRelationExpr,
79        ctx: &mut TransformCtx,
80    ) -> Result<(), crate::TransformError> {
81        let result = self.action(relation, ctx.features);
82        mz_repr::explain::trace_plan(&*relation);
83        result
84    }
85}
86
87impl NormalizeLets {
88    /// Normalize `Let` and `LetRec` bindings in `relation`.
89    ///
90    /// Mechanically, `action` first renumbers all bindings, erroring if any shadowing is encountered.
91    /// It then promotes all `Let` and `LetRec` expressions to the roots of their expressions, fusing
92    /// `Let` bindings into containing `LetRec` bindings, but leaving stacked `LetRec` bindings unfused to each
93    /// other (for reasons of correctness). It then considers potential inlining in each `LetRec` scope.
94    /// Lastly, it refreshes the types of each `Get` operator, erroring if any scalar types have changed
95    /// but updating nullability and keys.
96    ///
97    /// We then perform a final renumbering.
98    pub fn action(
99        &self,
100        relation: &mut MirRelationExpr,
101        features: &OptimizerFeatures,
102    ) -> Result<(), crate::TransformError> {
103        // Record whether the relation was initially recursive, to confirm that we do not introduce
104        // recursion to a non-recursive expression.
105        let was_recursive = relation.is_recursive();
106
107        // Renumber all bindings to ensure that identifier order matches binding order.
108        // In particular, as we use `BTreeMap` for binding order, we want to ensure that
109        // 1. Bindings within a `LetRec` are assigned increasing identifiers, and
110        // 2. Bindings across `LetRec`s are assigned identifiers in "visibility order", corresponding to an
111        // in-order traversal.
112        // TODO: More can and perhaps should be said about "visibility order" and how let promotion is correct.
113        renumbering::renumber_bindings(relation, &mut IdGen::default())?;
114
115        // Promote all `Let` and `LetRec` AST nodes to the roots.
116        // After this, all non-`LetRec` nodes contain no further `Let` or `LetRec` nodes,
117        // placing all `LetRec` nodes around the root, if not always in a single AST node.
118        let_motion::promote_let_rec(relation);
119        let_motion::assert_no_lets(relation);
120        let_motion::assert_letrec_major(relation);
121
122        // Inlining may violate letrec-major form.
123        inlining::inline_lets(relation, self.inline_mfp)?;
124
125        // Return to letrec-major form to refresh types.
126        let_motion::promote_let_rec(relation);
127        support::refresh_types(relation, features)?;
128
129        // Renumber bindings for good measure.
130        // Ideally we could skip when `action` is a no-op, but hard to thread that through at the moment.
131        renumbering::renumber_bindings(relation, &mut IdGen::default())?;
132
133        // A final bottom-up traversal to normalize the shape of nested LetRec blocks
134        relation.try_visit_mut_post(&mut |relation| -> Result<(), RecursionLimitError> {
135            // Move a non-recursive suffix of bindings from the end of the LetRec
136            // to the LetRec body.
137            // This is unsafe when applied to expressions which contain `ArrangeBy`,
138            // as if the extracted suffixes reference arrangements they will not be
139            // able to access those arrangements from outside the `LetRec` scope.
140            // It happens to work at the moment, so we don't touch it but should fix.
141            let bindings = let_motion::harvest_nonrec_suffix(relation)?;
142            if let MirRelationExpr::LetRec {
143                ids: _,
144                values: _,
145                limits: _,
146                body,
147            } = relation
148            {
149                for (id, value) in bindings.into_iter().rev() {
150                    **body = MirRelationExpr::Let {
151                        id,
152                        value: Box::new(value),
153                        body: Box::new(body.take_dangerous()),
154                    };
155                }
156            } else {
157                for (id, value) in bindings.into_iter().rev() {
158                    *relation = MirRelationExpr::Let {
159                        id,
160                        value: Box::new(value),
161                        body: Box::new(relation.take_dangerous()),
162                    };
163                }
164            }
165
166            // Extract `Let` prefixes from `LetRec`, to reveal their non-recursive nature.
167            // This assists with hoisting e.g. arrangements out of `LetRec` blocks, a thing
168            // we don't promise to do, but it can be helpful to do. This also exposes more
169            // AST nodes to non-`LetRec` analyses, which don't always have parity with `LetRec`.
170            let bindings = let_motion::harvest_non_recursive(relation);
171            for (id, (value, max_iter)) in bindings.into_iter().rev() {
172                assert_none!(max_iter);
173                *relation = MirRelationExpr::Let {
174                    id,
175                    value: Box::new(value),
176                    body: Box::new(relation.take_dangerous()),
177                };
178            }
179
180            Ok(())
181        })?;
182
183        if !was_recursive && relation.is_recursive() {
184            Err(crate::TransformError::Internal(
185                "NormalizeLets introduced LetRec to a LetRec-free expression".to_string(),
186            ))?;
187        }
188
189        Ok(())
190    }
191}
192
193// Support methods that are unlikely to be useful to other modules.
194mod support {
195
196    use std::collections::BTreeMap;
197
198    use itertools::Itertools;
199
200    use mz_expr::{Id, LetRecLimit, LocalId, MirRelationExpr};
201    use mz_repr::ReprRelationType;
202    use mz_repr::optimize::OptimizerFeatures;
203
204    pub(super) fn replace_bindings_from_map(
205        map: BTreeMap<LocalId, (MirRelationExpr, Option<LetRecLimit>)>,
206        ids: &mut Vec<LocalId>,
207        values: &mut Vec<MirRelationExpr>,
208        limits: &mut Vec<Option<LetRecLimit>>,
209    ) {
210        let (new_ids, new_values, new_limits) = map_to_3vecs(map);
211        *ids = new_ids;
212        *values = new_values;
213        *limits = new_limits;
214    }
215
216    pub(super) fn map_to_3vecs(
217        map: BTreeMap<LocalId, (MirRelationExpr, Option<LetRecLimit>)>,
218    ) -> (Vec<LocalId>, Vec<MirRelationExpr>, Vec<Option<LetRecLimit>>) {
219        let (new_ids, new_values_and_limits): (Vec<_>, Vec<_>) = map.into_iter().unzip();
220        let (new_values, new_limits) = new_values_and_limits.into_iter().unzip();
221        (new_ids, new_values, new_limits)
222    }
223
224    /// Logic mapped across each use of a `LocalId`.
225    pub(super) fn for_local_id<F>(expr: &MirRelationExpr, mut logic: F)
226    where
227        F: FnMut(LocalId),
228    {
229        expr.visit_pre(|expr| {
230            if let MirRelationExpr::Get {
231                id: Id::Local(i), ..
232            } = expr
233            {
234                logic(*i);
235            }
236        });
237    }
238
239    /// Populates `counts` with the number of uses of each local identifier in `expr`.
240    pub(super) fn count_local_id_uses(
241        expr: &MirRelationExpr,
242        counts: &mut std::collections::BTreeMap<LocalId, usize>,
243    ) {
244        for_local_id(expr, |i| *counts.entry(i).or_insert(0) += 1)
245    }
246
247    /// Visit `LetRec` stages and determine and update type information for `Get` nodes.
248    ///
249    /// This method errors if the scalar type information has changed (number of columns, or types).
250    /// It only refreshes the nullability and unique key information. As this information can regress,
251    /// we do not error if the type weakens, even though that may be something we want to look into.
252    ///
253    /// The method relies on the `analysis::{UniqueKeys, ReprRelationType}` analyses to improve its
254    /// type information for `LetRec` stages.
255    pub(super) fn refresh_types(
256        expr: &mut MirRelationExpr,
257        features: &OptimizerFeatures,
258    ) -> Result<(), crate::TransformError> {
259        // Assemble type information once for the whole expression.
260        use crate::analysis::{
261            DerivedBuilder, ReprRelationType as ReprRelationTypeAnalysis, UniqueKeys,
262        };
263        let mut builder = DerivedBuilder::new(features);
264        builder.require(ReprRelationTypeAnalysis);
265        builder.require(UniqueKeys);
266        let derived = builder.visit(expr);
267        let derived_view = derived.as_view();
268
269        // Collect id -> type mappings.
270        let mut types = BTreeMap::new();
271        let mut todo = vec![(&*expr, derived_view)];
272        while let Some((expr, view)) = todo.pop() {
273            let ids = match expr {
274                MirRelationExpr::Let { id, .. } => std::slice::from_ref(id),
275                MirRelationExpr::LetRec { ids, .. } => ids,
276                _ => &[],
277            };
278            if !ids.is_empty() {
279                // The `skip(1)` skips the `body` child, and is followed by binding children.
280                for (id, view) in ids.iter().rev().zip_eq(view.children_rev().skip(1)) {
281                    let repr_cols = view
282                        .value::<ReprRelationTypeAnalysis>()
283                        .expect("ReprRelationType required")
284                        .clone()
285                        .expect("Expression not well typed");
286                    let keys = view
287                        .value::<UniqueKeys>()
288                        .expect("UniqueKeys required")
289                        .clone();
290                    types.insert(*id, ReprRelationType::new(repr_cols).with_keys(keys));
291                }
292            }
293            todo.extend(expr.children().rev().zip_eq(view.children_rev()));
294        }
295
296        // Install the new types in each `Get`.
297        let mut todo = vec![&mut *expr];
298        while let Some(expr) = todo.pop() {
299            if let MirRelationExpr::Get {
300                id: Id::Local(i),
301                typ,
302                ..
303            } = expr
304            {
305                if let Some(new_type) = types.get(i) {
306                    // Assert that the column length has not changed.
307                    if new_type.column_types.len() != typ.column_types.len() {
308                        Err(crate::TransformError::Internal(format!(
309                            "column lengths do not match: {:?} v {:?}",
310                            new_type.column_types, typ.column_types
311                        )))?;
312                    }
313                    // Assert that the column types have not changed.
314                    // NB the ReprScalarType::eq ignores nullability (correctly!)
315                    // since record field nullability can legitimately differ between the stored
316                    // type and the analysis-recomputed type.
317                    // Note: We also want to ignore nullability changes at the top level, but
318                    // ReprColumnType has the derived Eq, so that wouldn't ignore nullability, hence
319                    // the `.zip_eq(...).all(...)` dance.
320                    if !new_type
321                        .column_types
322                        .iter()
323                        .zip_eq(typ.column_types.iter())
324                        .all(|(t1, t2)| t1.scalar_type == t2.scalar_type)
325                    {
326                        Err(crate::TransformError::Internal(format!(
327                            "scalar types do not match: {:?} v {:?}",
328                            new_type.column_types, typ.column_types
329                        )))?;
330                    }
331
332                    typ.clone_from(new_type);
333                } else {
334                    panic!("Type not found for: {:?}", i);
335                }
336            }
337            todo.extend(expr.children_mut());
338        }
339        Ok(())
340    }
341}
342
343mod let_motion {
344
345    use std::collections::{BTreeMap, BTreeSet};
346
347    use itertools::Itertools;
348    use mz_expr::{LetRecLimit, LocalId, MirRelationExpr};
349    use mz_ore::stack::RecursionLimitError;
350
351    use crate::normalize_lets::support::replace_bindings_from_map;
352
353    /// Promotes all `Let` and `LetRec` nodes to the roots of their expressions.
354    ///
355    /// We cannot (without further reasoning) fuse stacked `LetRec` stages, and instead we just promote
356    /// `LetRec` to the roots of their expressions (e.g. as children of another `LetRec` stage).
357    pub(crate) fn promote_let_rec(expr: &mut MirRelationExpr) {
358        // First, promote all `LetRec` nodes above all other nodes.
359        let mut worklist = vec![&mut *expr];
360        while let Some(mut expr) = worklist.pop() {
361            hoist_bindings(expr);
362            while let MirRelationExpr::LetRec { values, body, .. } = expr {
363                worklist.extend(values.iter_mut().rev());
364                expr = body;
365            }
366        }
367
368        // Harvest any potential `Let` nodes, via a post-order traversal.
369        post_order_harvest_lets(expr);
370    }
371
372    /// A stand in for the types of bindings we might encounter.
373    ///
374    /// As we dissolve various `Let` and `LetRec` expressions, a `Binding` will carry
375    /// the relevant information as we hoist it to the root of the expression.
376    enum Binding {
377        // Binding resulting from a `Let` expression.
378        Let(LocalId, MirRelationExpr),
379        // Bindings resulting from a `LetRec` expression.
380        LetRec(Vec<(LocalId, MirRelationExpr, Option<LetRecLimit>)>),
381    }
382
383    /// Hoist all exposed bindings to the root of the expression.
384    ///
385    /// A binding is "exposed" if the path from the root does not cross a LetRec binding.
386    /// After the call, the expression should be a linear sequence of bindings, where each
387    /// `Let` binding is of a let-free expression. There may be `LetRec` expressions in the
388    /// sequence, and their bindings will have hoisted bindings to their root, but not out
389    /// of the binding.
390    fn hoist_bindings(expr: &mut MirRelationExpr) {
391        // Bindings we have extracted but not fully processed.
392        let mut worklist = Vec::new();
393        // Bindings we have extracted and then fully processed.
394        let mut finished = Vec::new();
395
396        extract_bindings(expr, &mut worklist);
397        while let Some(mut bind) = worklist.pop() {
398            match &mut bind {
399                Binding::Let(_id, value) => {
400                    extract_bindings(value, &mut worklist);
401                }
402                Binding::LetRec(_binds) => {
403                    // nothing to do here; we cannot hoist letrec bindings and refine
404                    // them in an outer loop.
405                }
406            }
407            finished.push(bind);
408        }
409
410        // The worklist is empty and finished should contain only LetRec bindings and Let
411        // bindings with let-free expressions bound. We need to re-assemble them now in
412        // the correct order. The identifiers are "sequential", so we should be able to
413        // sort by them, with some care.
414
415        // We only extract non-empty letrec bindings, so it is safe to peek at the first.
416        finished.sort_by_key(|b| match b {
417            Binding::Let(id, _) => *id,
418            Binding::LetRec(binds) => binds[0].0,
419        });
420
421        // To match historical behavior we fuse let bindings into adjacent letrec bindings.
422        // We could alternately make each a singleton letrec binding (just, non-recursive).
423        // We don't yet have a strong opinion on which is most helpful and least harmful.
424        // In the absence of any letrec bindings, we form one to house the let bindings.
425        let mut ids = Vec::new();
426        let mut values = Vec::new();
427        let mut limits = Vec::new();
428        let mut compact = Vec::new();
429        for bind in finished {
430            match bind {
431                Binding::Let(id, value) => {
432                    ids.push(id);
433                    values.push(value);
434                    limits.push(None);
435                }
436                Binding::LetRec(binds) => {
437                    for (id, value, limit) in binds {
438                        ids.push(id);
439                        values.push(value);
440                        limits.push(limit);
441                    }
442                    compact.push((ids, values, limits));
443                    ids = Vec::new();
444                    values = Vec::new();
445                    limits = Vec::new();
446                }
447            }
448        }
449
450        // Remaining bindings can either be fused to the prior letrec, or put in their own.
451        if let Some((last_ids, last_vals, last_lims)) = compact.last_mut() {
452            last_ids.extend(ids);
453            last_vals.extend(values);
454            last_lims.extend(limits);
455        } else if !ids.is_empty() {
456            compact.push((ids, values, limits));
457        }
458
459        while let Some((ids, values, limits)) = compact.pop() {
460            *expr = MirRelationExpr::LetRec {
461                ids,
462                values,
463                limits,
464                body: Box::new(expr.take_dangerous()),
465            };
466        }
467    }
468
469    /// Extracts exposed bindings into `bindings`.
470    ///
471    /// After this call `expr` will contain no let or letrec bindings, though the bindings
472    /// it introduces to `bindings` may themselves contain such bindings (and they should
473    /// be further processed if the goal is to maximally extract let bindings).
474    fn extract_bindings(expr: &mut MirRelationExpr, bindings: &mut Vec<Binding>) {
475        let mut todo = vec![expr];
476        while let Some(expr) = todo.pop() {
477            match expr {
478                MirRelationExpr::Let { id, value, body } => {
479                    bindings.push(Binding::Let(*id, value.take_dangerous()));
480                    *expr = body.take_dangerous();
481                    todo.push(expr);
482                }
483                MirRelationExpr::LetRec {
484                    ids,
485                    values,
486                    limits,
487                    body,
488                } => {
489                    use itertools::Itertools;
490                    let binds: Vec<_> = ids
491                        .drain(..)
492                        .zip_eq(values.drain(..))
493                        .zip_eq(limits.drain(..))
494                        .map(|((i, v), l)| (i, v, l))
495                        .collect();
496                    if !binds.is_empty() {
497                        bindings.push(Binding::LetRec(binds));
498                    }
499                    *expr = body.take_dangerous();
500                    todo.push(expr);
501                }
502                _ => {
503                    todo.extend(expr.children_mut());
504                }
505            }
506        }
507    }
508
509    /// Performs a post-order traversal of the `LetRec` nodes at the root of an expression.
510    ///
511    /// The traversal is only of the `LetRec` nodes, for which fear of stack exhaustion is nominal.
512    fn post_order_harvest_lets(expr: &mut MirRelationExpr) {
513        if let MirRelationExpr::LetRec {
514            ids,
515            values,
516            limits,
517            body,
518        } = expr
519        {
520            // Only recursively descend through `LetRec` stages.
521            for value in values.iter_mut() {
522                post_order_harvest_lets(value);
523            }
524
525            let mut bindings = BTreeMap::new();
526            for ((id, mut value), max_iter) in ids
527                .drain(..)
528                .zip_eq(values.drain(..))
529                .zip_eq(limits.drain(..))
530            {
531                bindings.extend(harvest_non_recursive(&mut value));
532                bindings.insert(id, (value, max_iter));
533            }
534            bindings.extend(harvest_non_recursive(body));
535            replace_bindings_from_map(bindings, ids, values, limits);
536        }
537    }
538
539    /// Harvest any safe-to-lift non-recursive bindings from a `LetRec`
540    /// expression.
541    ///
542    /// At the moment, we reason that a binding can be lifted without changing
543    /// the output if both:
544    /// 1. It references no other non-lifted binding bound in `expr`,
545    /// 2. It is referenced by no prior non-lifted binding in `expr`.
546    ///
547    /// The rationale is that (1) ensures that the binding's value does not
548    /// change across iterations, and that (2) ensures that all observations of
549    /// the binding are after it assumes its first value, rather than when it
550    /// could be empty.
551    pub(crate) fn harvest_non_recursive(
552        expr: &mut MirRelationExpr,
553    ) -> BTreeMap<LocalId, (MirRelationExpr, Option<LetRecLimit>)> {
554        if let MirRelationExpr::LetRec {
555            ids,
556            values,
557            limits,
558            body,
559        } = expr
560        {
561            // Bindings to lift.
562            let mut lifted = BTreeMap::<LocalId, (MirRelationExpr, Option<LetRecLimit>)>::new();
563            // Bindings to retain.
564            let mut retained = BTreeMap::<LocalId, (MirRelationExpr, Option<LetRecLimit>)>::new();
565
566            // All remaining LocalIds bound by the enclosing LetRec.
567            let mut id_set = ids.iter().cloned().collect::<BTreeSet<LocalId>>();
568            // All LocalIds referenced up to (including) the current binding.
569            let mut cannot = BTreeSet::<LocalId>::new();
570            // The reference count of the current bindings.
571            let mut refcnt = BTreeMap::<LocalId, usize>::new();
572
573            for ((id, value), max_iter) in ids
574                .drain(..)
575                .zip_eq(values.drain(..))
576                .zip_eq(limits.drain(..))
577            {
578                refcnt.clear();
579                super::support::count_local_id_uses(&value, &mut refcnt);
580
581                // LocalIds that have already been referenced cannot be lifted.
582                cannot.extend(refcnt.keys().cloned());
583
584                // - The first conjunct excludes bindings that have already been
585                //   referenced.
586                // - The second conjunct excludes bindings that reference a
587                //   LocalId that either defined later or is a known retained.
588                if !cannot.contains(&id) && !refcnt.keys().any(|i| id_set.contains(i)) {
589                    lifted.insert(id, (value, None)); // Non-recursive bindings don't need a limit
590                    id_set.remove(&id);
591                } else {
592                    retained.insert(id, (value, max_iter));
593                }
594            }
595
596            replace_bindings_from_map(retained, ids, values, limits);
597            if values.is_empty() {
598                *expr = body.take_dangerous();
599            }
600
601            lifted
602        } else {
603            BTreeMap::new()
604        }
605    }
606
607    /// Harvest any safe-to-lower non-recursive suffix of binding from a
608    /// `LetRec` expression.
609    pub(crate) fn harvest_nonrec_suffix(
610        expr: &mut MirRelationExpr,
611    ) -> Result<BTreeMap<LocalId, MirRelationExpr>, RecursionLimitError> {
612        if let MirRelationExpr::LetRec {
613            ids,
614            values,
615            limits,
616            body,
617        } = expr
618        {
619            // Bindings to lower.
620            let mut lowered = BTreeMap::<LocalId, MirRelationExpr>::new();
621
622            let rec_ids = MirRelationExpr::recursive_ids(ids, values);
623
624            while ids.last().map(|id| !rec_ids.contains(id)).unwrap_or(false) {
625                let id = ids.pop().expect("non-empty ids");
626                let value = values.pop().expect("non-empty values");
627                let _limit = limits.pop().expect("non-empty limits");
628
629                lowered.insert(id, value); // Non-recursive bindings don't need a limit
630            }
631
632            if values.is_empty() {
633                *expr = body.take_dangerous();
634            }
635
636            Ok(lowered)
637        } else {
638            Ok(BTreeMap::new())
639        }
640    }
641
642    pub(crate) fn assert_no_lets(expr: &MirRelationExpr) {
643        expr.visit_pre(|expr| {
644            assert!(!matches!(expr, MirRelationExpr::Let { .. }));
645        });
646    }
647
648    /// Asserts that `expr` in "LetRec-major" form.
649    ///
650    /// This means `expr` is either `LetRec`-free, or a `LetRec` whose values and body are `LetRec`-major.
651    pub(crate) fn assert_letrec_major(expr: &MirRelationExpr) {
652        let mut todo = vec![expr];
653        while let Some(expr) = todo.pop() {
654            match expr {
655                MirRelationExpr::LetRec {
656                    ids: _,
657                    values,
658                    limits: _,
659                    body,
660                } => {
661                    todo.extend(values.iter());
662                    todo.push(body);
663                }
664                _ => {
665                    expr.visit_pre(|expr| {
666                        assert!(!matches!(expr, MirRelationExpr::LetRec { .. }));
667                    });
668                }
669            }
670        }
671    }
672}
673
674mod inlining {
675
676    use std::collections::BTreeMap;
677
678    use itertools::Itertools;
679    use mz_expr::{Id, LetRecLimit, LocalId, MirRelationExpr};
680
681    use crate::normalize_lets::support::replace_bindings_from_map;
682
683    pub(super) fn inline_lets(
684        expr: &mut MirRelationExpr,
685        inline_mfp: bool,
686    ) -> Result<(), crate::TransformError> {
687        let mut worklist = vec![&mut *expr];
688        while let Some(expr) = worklist.pop() {
689            inline_lets_core(expr, inline_mfp)?;
690            // We descend only into `LetRec` nodes, because `promote_let_rec` ensured that all
691            // `LetRec` nodes are clustered near the root. This means that we can get to all the
692            // `LetRec` nodes by just descending into `LetRec` nodes, as there can't be any other
693            // nodes between them.
694            if let MirRelationExpr::LetRec {
695                ids: _,
696                values,
697                limits: _,
698                body,
699            } = expr
700            {
701                worklist.extend(values);
702                worklist.push(body);
703            }
704        }
705        Ok(())
706    }
707
708    /// Considers inlining actions to perform for a sequence of bindings and a
709    /// following body.
710    ///
711    /// A let binding may be inlined only in subsequent bindings or in the body;
712    /// other bindings should not "immediately" observe the binding, as that
713    /// would be a change to the semantics of `LetRec`. For example, it would
714    /// not be correct to replace `C` with `A` in the definition of `B` here:
715    /// ```ignore
716    /// let A = ...;
717    /// let B = A - C;
718    /// let C = A;
719    /// ```
720    /// The explanation is that `B` should always be the difference between the
721    /// current and previous `A`, and that the substitution of `C` would instead
722    /// make it always zero, changing its definition.
723    ///
724    /// Here a let binding is proposed for inlining if any of the following is true:
725    ///  1. It has a single reference across all bindings and the body.
726    ///  2. It is a "sufficiently simple" `Get`, determined in part by the
727    ///     `inline_mfp` argument.
728    ///
729    /// We don't need extra checks for `limits`, because
730    ///  - `limits` is only relevant when a binding is directly used through a back edge (because
731    ///    that is where the rendering puts the `limits` check);
732    ///  - when a binding is directly used through a back edge, it can't be inlined anyway.
733    ///  - Also note that if a `LetRec` completely disappears at the end of `inline_lets_core`, then
734    ///    there was no recursion in it.
735    ///
736    /// The case of `Constant` binding is handled here (as opposed to
737    /// `FoldConstants`) in a somewhat limited manner (see database-issues#5346). Although a
738    /// bit weird, constants should also not be inlined into prior bindings as
739    /// this does change the behavior from one where the collection is initially
740    /// empty to one where it is always the constant.
741    ///
742    /// Having inlined bindings, many of them may now be dead (with no
743    /// transitive references from `body`). These can now be removed. They may
744    /// not be exactly those bindings that were inlineable, as we may not always
745    /// be able to apply inlining due to ordering (we cannot inline a binding
746    /// into one that is not strictly later).
747    pub(super) fn inline_lets_core(
748        expr: &mut MirRelationExpr,
749        inline_mfp: bool,
750    ) -> Result<(), crate::TransformError> {
751        if let MirRelationExpr::LetRec {
752            ids,
753            values,
754            limits,
755            body,
756        } = expr
757        {
758            // Count the number of uses of each local id across all expressions.
759            let mut counts = BTreeMap::new();
760            for value in values.iter() {
761                super::support::count_local_id_uses(value, &mut counts);
762            }
763            super::support::count_local_id_uses(body, &mut counts);
764
765            // Each binding can reach one of three positions on its inlineability:
766            //  1. The binding is used once and is available to be directly taken.
767            //  2. The binding is simple enough that it can just be cloned.
768            //  3. The binding is not available for inlining.
769            let mut inline_offers = BTreeMap::new();
770
771            // Each binding may require the expiration of prior inlining offers.
772            // This occurs when an inlined body references the prior iterate of a binding,
773            // and inlining it would change the meaning to be the current iterate.
774            // Roughly, all inlining offers expire just after the binding of the least
775            // identifier they contain that is greater than the bound identifier itself.
776            let mut expire_offers = BTreeMap::new();
777            let mut expired_offers = Vec::new();
778
779            // For each binding, inline `Get`s and then determine if *it* should be inlined.
780            // It is important that we do the substitution in-order and before reasoning
781            // about the inlineability of each binding, to ensure that our conclusion about
782            // the inlineability of a binding stays put. Specifically,
783            //   1. by going in order no substitution will increase the `Get`-count of an
784            //      identifier beyond one, as all in values with strictly greater identifiers.
785            //   2. by performing the substitution before reasoning, the structure of the value
786            //      as it would be substituted is fixed.
787            for ((id, mut expr), max_iter) in ids
788                .drain(..)
789                .zip_eq(values.drain(..))
790                .zip_eq(limits.drain(..))
791            {
792                // Substitute any appropriate prior let bindings.
793                inline_lets_helper(&mut expr, &mut inline_offers)?;
794
795                // Determine the first `id'` at which any inlining offer must expire.
796                // An inlining offer expires because it references an `id'` that is not yet bound,
797                // indicating a reference to the *prior* iterate of that identifier. Inlining the
798                // expression once `id'` becomes bound would advance the reference to be the
799                // *current* iterate of the identifier.
800                MirRelationExpr::collect_expirations(id, &expr, &mut expire_offers);
801
802                // Gets for `id` only occur in later expressions, so this should still be correct.
803                let num_gets = counts.get(&id).map(|x| *x).unwrap_or(0);
804                // Counts of zero or one lead to substitution; otherwise certain simple structures
805                // are cloned in to `Get` operators, and all others emitted as `Let` bindings.
806                if num_gets == 0 {
807                } else if num_gets == 1 {
808                    inline_offers.insert(id, InlineOffer::Take(Some(expr), max_iter));
809                } else {
810                    let clone_binding = {
811                        let stripped_value = if inline_mfp {
812                            mz_expr::MapFilterProject::extract_non_errors_from_expr(&expr).1
813                        } else {
814                            &expr
815                        };
816                        match stripped_value {
817                            MirRelationExpr::Get { .. } | MirRelationExpr::Constant { .. } => true,
818                            _ => false,
819                        }
820                    };
821
822                    if clone_binding {
823                        inline_offers.insert(id, InlineOffer::Clone(expr, max_iter));
824                    } else {
825                        inline_offers.insert(id, InlineOffer::Unavailable(expr, max_iter));
826                    }
827                }
828
829                // We must now discard any offers that reference `id`, as it is no longer correct
830                // to inline such an offer as it would have access to this iteration's binding of
831                // `id` rather than the prior iteration's binding of `id`.
832                expired_offers.extend(MirRelationExpr::do_expirations(
833                    id,
834                    &mut expire_offers,
835                    &mut inline_offers,
836                ));
837            }
838            // Complete the inlining in `body`.
839            inline_lets_helper(body, &mut inline_offers)?;
840
841            // Re-introduce expired offers for the subsequent logic that expects to see them all.
842            for (id, offer) in expired_offers.into_iter() {
843                inline_offers.insert(id, offer);
844            }
845
846            // We may now be able to discard some of `inline_offer` based on the remaining pattern of `Get` expressions.
847            // Starting from `body` and working backwards, we can activate bindings that are still required because we
848            // observe `Get` expressions referencing them. Any bindings not so identified can be dropped (including any
849            // that may be part of a cycle not reachable from `body`).
850            let mut let_bindings = BTreeMap::new();
851            let mut todo = Vec::new();
852            super::support::for_local_id(body, |id| todo.push(id));
853            while let Some(id) = todo.pop() {
854                if let Some(offer) = inline_offers.remove(&id) {
855                    let (value, max_iter) = match offer {
856                        InlineOffer::Take(value, max_iter) => (
857                            value.ok_or_else(|| {
858                                crate::TransformError::Internal(
859                                    "Needed value already taken".to_string(),
860                                )
861                            })?,
862                            max_iter,
863                        ),
864                        InlineOffer::Clone(value, max_iter) => (value, max_iter),
865                        InlineOffer::Unavailable(value, max_iter) => (value, max_iter),
866                    };
867                    super::support::for_local_id(&value, |id| todo.push(id));
868                    let_bindings.insert(id, (value, max_iter));
869                }
870            }
871
872            // If bindings remain we update the `LetRec`, otherwise we remove it.
873            if !let_bindings.is_empty() {
874                replace_bindings_from_map(let_bindings, ids, values, limits);
875            } else {
876                *expr = body.take_dangerous();
877            }
878        }
879        Ok(())
880    }
881
882    /// Possible states of let binding inlineability.
883    enum InlineOffer {
884        /// There is a unique reference to this value and given the option it should take this expression.
885        Take(Option<MirRelationExpr>, Option<LetRecLimit>),
886        /// Any reference to this value should clone this expression.
887        Clone(MirRelationExpr, Option<LetRecLimit>),
888        /// Any reference to this value should do no inlining of it.
889        Unavailable(MirRelationExpr, Option<LetRecLimit>),
890    }
891
892    /// Substitute `Get{id}` expressions for any proposed expressions.
893    ///
894    /// The proposed expressions can be proposed either to be taken or cloned.
895    fn inline_lets_helper(
896        expr: &mut MirRelationExpr,
897        inline_offer: &mut BTreeMap<LocalId, InlineOffer>,
898    ) -> Result<(), crate::TransformError> {
899        let mut worklist = vec![expr];
900        while let Some(expr) = worklist.pop() {
901            if let MirRelationExpr::Get {
902                id: Id::Local(id), ..
903            } = expr
904            {
905                if let Some(offer) = inline_offer.get_mut(id) {
906                    // It is important that we *not* continue to iterate
907                    // on the contents of `offer`, which has already been
908                    // maximally inlined. If we did, we could mis-inline
909                    // bindings into bodies that precede them, which would
910                    // change the semantics of the expression.
911                    match offer {
912                        InlineOffer::Take(value, _max_iter) => {
913                            *expr = value.take().ok_or_else(|| {
914                                crate::TransformError::Internal(format!(
915                                    "Value already taken for {:?}",
916                                    id
917                                ))
918                            })?;
919                        }
920                        InlineOffer::Clone(value, _max_iter) => {
921                            *expr = value.clone();
922                        }
923                        InlineOffer::Unavailable(_, _) => {
924                            // Do nothing.
925                        }
926                    }
927                } else {
928                    // Presumably a reference to an outer scope.
929                }
930            } else {
931                worklist.extend(expr.children_mut().rev());
932            }
933        }
934        Ok(())
935    }
936}
937
938mod renumbering {
939
940    use std::collections::BTreeMap;
941
942    use itertools::Itertools;
943    use mz_expr::{Id, LocalId, MirRelationExpr};
944    use mz_ore::id_gen::IdGen;
945
946    /// Re-assign an identifier to each `Let`.
947    ///
948    /// Under the assumption that `id_gen` produces identifiers in order, this process
949    /// maintains in-orderness of `LetRec` identifiers.
950    pub fn renumber_bindings(
951        relation: &mut MirRelationExpr,
952        id_gen: &mut IdGen,
953    ) -> Result<(), crate::TransformError> {
954        let mut renaming = BTreeMap::new();
955        determine(&*relation, &mut renaming, id_gen)?;
956        implement(relation, &renaming)?;
957        Ok(())
958    }
959
960    /// Performs an in-order traversal of the AST, assigning identifiers as it goes.
961    fn determine(
962        relation: &MirRelationExpr,
963        remap: &mut BTreeMap<LocalId, LocalId>,
964        id_gen: &mut IdGen,
965    ) -> Result<(), crate::TransformError> {
966        // The stack contains pending work as `Result<LocalId, &MirRelationExpr>`, where
967        // 1. 'Ok(id)` means the identifier `id` is ready for renumbering,
968        // 2. `Err(expr)` means that the expression `expr` needs to be further processed.
969        let mut stack: Vec<Result<LocalId, _>> = vec![Err(relation)];
970        while let Some(action) = stack.pop() {
971            match action {
972                Ok(id) => {
973                    if remap.contains_key(&id) {
974                        Err(crate::TransformError::Internal(format!(
975                            "Shadowing of let binding for {:?}",
976                            id
977                        )))?;
978                    } else {
979                        remap.insert(id, LocalId::new(id_gen.allocate_id()));
980                    }
981                }
982                Err(expr) => match expr {
983                    MirRelationExpr::Let { id, value, body } => {
984                        stack.push(Err(body));
985                        stack.push(Ok(*id));
986                        stack.push(Err(value));
987                    }
988                    MirRelationExpr::LetRec {
989                        ids,
990                        values,
991                        limits: _,
992                        body,
993                    } => {
994                        stack.push(Err(body));
995                        for (id, value) in ids.iter().rev().zip_eq(values.iter().rev()) {
996                            stack.push(Ok(*id));
997                            stack.push(Err(value));
998                        }
999                    }
1000                    _ => {
1001                        stack.extend(expr.children().rev().map(Err));
1002                    }
1003                },
1004            }
1005        }
1006        Ok(())
1007    }
1008
1009    fn implement(
1010        relation: &mut MirRelationExpr,
1011        remap: &BTreeMap<LocalId, LocalId>,
1012    ) -> Result<(), crate::TransformError> {
1013        let mut worklist = vec![relation];
1014        while let Some(expr) = worklist.pop() {
1015            match expr {
1016                MirRelationExpr::Let { id, .. } => {
1017                    *id = *remap
1018                        .get(id)
1019                        .ok_or(crate::TransformError::IdentifierMissing(*id))?;
1020                }
1021                MirRelationExpr::LetRec { ids, .. } => {
1022                    for id in ids.iter_mut() {
1023                        *id = *remap
1024                            .get(id)
1025                            .ok_or(crate::TransformError::IdentifierMissing(*id))?;
1026                    }
1027                }
1028                MirRelationExpr::Get {
1029                    id: Id::Local(id), ..
1030                } => {
1031                    *id = *remap
1032                        .get(id)
1033                        .ok_or(crate::TransformError::IdentifierMissing(*id))?;
1034                }
1035                _ => {
1036                    // Remapped identifiers not used in these patterns.
1037                }
1038            }
1039            // The order is not critical, but behave as a stack for clarity.
1040            worklist.extend(expr.children_mut().rev());
1041        }
1042        Ok(())
1043    }
1044}