mz_transform/normalize_lets.rs
1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Normalize the structure of `Let` and `LetRec` operators in expressions.
11//!
12//! Normalization happens in the context of "scopes", corresponding to
13//! 1. the expression's root and 2. each instance of a `LetRec` AST node.
14//!
15//! Within each scope,
16//! 1. Each expression is normalized to have all `Let` nodes at the root
17//! of the expression, in order of identifier.
18//! 2. Each expression assigns a contiguous block of identifiers.
19//!
20//! The transform may remove some `Let` and `Get` operators, and does not
21//! introduce any new operators.
22//!
23//! The module also publishes the function `renumber_bindings` which can
24//! be used to renumber bindings in an expression starting from a provided
25//! `IdGen`, which is used to prepare distinct expressions for inlining.
26
27use mz_expr::{MirRelationExpr, visit::Visit};
28use mz_ore::assert_none;
29use mz_ore::{id_gen::IdGen, stack::RecursionLimitError};
30use mz_repr::optimize::OptimizerFeatures;
31
32use crate::{TransformCtx, catch_unwind_optimize};
33
34pub use renumbering::renumber_bindings;
35
36/// Normalize `Let` and `LetRec` structure.
37pub fn normalize_lets(
38 expr: &mut MirRelationExpr,
39 features: &OptimizerFeatures,
40) -> Result<(), crate::TransformError> {
41 catch_unwind_optimize(|| NormalizeLets::new(false).action(expr, features))
42}
43
44/// Install replace certain `Get` operators with their `Let` value.
45#[derive(Debug)]
46pub struct NormalizeLets {
47 /// If `true`, inline MFPs around a Get.
48 ///
49 /// We want this value to be true for the NormalizeLets call that comes right
50 /// before [crate::join_implementation::JoinImplementation] runs because
51 /// - JoinImplementation cannot lift MFPs through a Let.
52 /// - JoinImplementation can't extract FilterCharacteristics through a Let.
53 ///
54 /// Generally, though, we prefer to be more conservative in our inlining in
55 /// order to be able to better detect CSEs.
56 pub inline_mfp: bool,
57}
58
59impl NormalizeLets {
60 /// Construct a new [`NormalizeLets`] instance with the given `inline_mfp`.
61 pub fn new(inline_mfp: bool) -> NormalizeLets {
62 NormalizeLets { inline_mfp }
63 }
64}
65
66impl crate::Transform for NormalizeLets {
67 fn name(&self) -> &'static str {
68 "NormalizeLets"
69 }
70
71 #[mz_ore::instrument(
72 target = "optimizer",
73 level = "debug",
74 fields(path.segment = "normalize_lets")
75 )]
76 fn actually_perform_transform(
77 &self,
78 relation: &mut MirRelationExpr,
79 ctx: &mut TransformCtx,
80 ) -> Result<(), crate::TransformError> {
81 let result = self.action(relation, ctx.features);
82 mz_repr::explain::trace_plan(&*relation);
83 result
84 }
85}
86
87impl NormalizeLets {
88 /// Normalize `Let` and `LetRec` bindings in `relation`.
89 ///
90 /// Mechanically, `action` first renumbers all bindings, erroring if any shadowing is encountered.
91 /// It then promotes all `Let` and `LetRec` expressions to the roots of their expressions, fusing
92 /// `Let` bindings into containing `LetRec` bindings, but leaving stacked `LetRec` bindings unfused to each
93 /// other (for reasons of correctness). It then considers potential inlining in each `LetRec` scope.
94 /// Lastly, it refreshes the types of each `Get` operator, erroring if any scalar types have changed
95 /// but updating nullability and keys.
96 ///
97 /// We then perform a final renumbering.
98 pub fn action(
99 &self,
100 relation: &mut MirRelationExpr,
101 features: &OptimizerFeatures,
102 ) -> Result<(), crate::TransformError> {
103 // Record whether the relation was initially recursive, to confirm that we do not introduce
104 // recursion to a non-recursive expression.
105 let was_recursive = relation.is_recursive();
106
107 // Renumber all bindings to ensure that identifier order matches binding order.
108 // In particular, as we use `BTreeMap` for binding order, we want to ensure that
109 // 1. Bindings within a `LetRec` are assigned increasing identifiers, and
110 // 2. Bindings across `LetRec`s are assigned identifiers in "visibility order", corresponding to an
111 // in-order traversal.
112 // TODO: More can and perhaps should be said about "visibility order" and how let promotion is correct.
113 renumbering::renumber_bindings(relation, &mut IdGen::default())?;
114
115 // Promote all `Let` and `LetRec` AST nodes to the roots.
116 // After this, all non-`LetRec` nodes contain no further `Let` or `LetRec` nodes,
117 // placing all `LetRec` nodes around the root, if not always in a single AST node.
118 let_motion::promote_let_rec(relation);
119 let_motion::assert_no_lets(relation);
120 let_motion::assert_letrec_major(relation);
121
122 // Inlining may violate letrec-major form.
123 inlining::inline_lets(relation, self.inline_mfp)?;
124
125 // Return to letrec-major form to refresh types.
126 let_motion::promote_let_rec(relation);
127 support::refresh_types(relation, features)?;
128
129 // Renumber bindings for good measure.
130 // Ideally we could skip when `action` is a no-op, but hard to thread that through at the moment.
131 renumbering::renumber_bindings(relation, &mut IdGen::default())?;
132
133 // A final bottom-up traversal to normalize the shape of nested LetRec blocks
134 relation.try_visit_mut_post(&mut |relation| -> Result<(), RecursionLimitError> {
135 // Move a non-recursive suffix of bindings from the end of the LetRec
136 // to the LetRec body.
137 // This is unsafe when applied to expressions which contain `ArrangeBy`,
138 // as if the extracted suffixes reference arrangements they will not be
139 // able to access those arrangements from outside the `LetRec` scope.
140 // It happens to work at the moment, so we don't touch it but should fix.
141 let bindings = let_motion::harvest_nonrec_suffix(relation)?;
142 if let MirRelationExpr::LetRec {
143 ids: _,
144 values: _,
145 limits: _,
146 body,
147 } = relation
148 {
149 for (id, value) in bindings.into_iter().rev() {
150 **body = MirRelationExpr::Let {
151 id,
152 value: Box::new(value),
153 body: Box::new(body.take_dangerous()),
154 };
155 }
156 } else {
157 for (id, value) in bindings.into_iter().rev() {
158 *relation = MirRelationExpr::Let {
159 id,
160 value: Box::new(value),
161 body: Box::new(relation.take_dangerous()),
162 };
163 }
164 }
165
166 // Extract `Let` prefixes from `LetRec`, to reveal their non-recursive nature.
167 // This assists with hoisting e.g. arrangements out of `LetRec` blocks, a thing
168 // we don't promise to do, but it can be helpful to do. This also exposes more
169 // AST nodes to non-`LetRec` analyses, which don't always have parity with `LetRec`.
170 let bindings = let_motion::harvest_non_recursive(relation);
171 for (id, (value, max_iter)) in bindings.into_iter().rev() {
172 assert_none!(max_iter);
173 *relation = MirRelationExpr::Let {
174 id,
175 value: Box::new(value),
176 body: Box::new(relation.take_dangerous()),
177 };
178 }
179
180 Ok(())
181 })?;
182
183 if !was_recursive && relation.is_recursive() {
184 Err(crate::TransformError::Internal(
185 "NormalizeLets introduced LetRec to a LetRec-free expression".to_string(),
186 ))?;
187 }
188
189 Ok(())
190 }
191}
192
193// Support methods that are unlikely to be useful to other modules.
194mod support {
195
196 use std::collections::BTreeMap;
197
198 use itertools::Itertools;
199
200 use mz_expr::{Id, LetRecLimit, LocalId, MirRelationExpr};
201 use mz_repr::ReprRelationType;
202 use mz_repr::optimize::OptimizerFeatures;
203
204 pub(super) fn replace_bindings_from_map(
205 map: BTreeMap<LocalId, (MirRelationExpr, Option<LetRecLimit>)>,
206 ids: &mut Vec<LocalId>,
207 values: &mut Vec<MirRelationExpr>,
208 limits: &mut Vec<Option<LetRecLimit>>,
209 ) {
210 let (new_ids, new_values, new_limits) = map_to_3vecs(map);
211 *ids = new_ids;
212 *values = new_values;
213 *limits = new_limits;
214 }
215
216 pub(super) fn map_to_3vecs(
217 map: BTreeMap<LocalId, (MirRelationExpr, Option<LetRecLimit>)>,
218 ) -> (Vec<LocalId>, Vec<MirRelationExpr>, Vec<Option<LetRecLimit>>) {
219 let (new_ids, new_values_and_limits): (Vec<_>, Vec<_>) = map.into_iter().unzip();
220 let (new_values, new_limits) = new_values_and_limits.into_iter().unzip();
221 (new_ids, new_values, new_limits)
222 }
223
224 /// Logic mapped across each use of a `LocalId`.
225 pub(super) fn for_local_id<F>(expr: &MirRelationExpr, mut logic: F)
226 where
227 F: FnMut(LocalId),
228 {
229 expr.visit_pre(|expr| {
230 if let MirRelationExpr::Get {
231 id: Id::Local(i), ..
232 } = expr
233 {
234 logic(*i);
235 }
236 });
237 }
238
239 /// Populates `counts` with the number of uses of each local identifier in `expr`.
240 pub(super) fn count_local_id_uses(
241 expr: &MirRelationExpr,
242 counts: &mut std::collections::BTreeMap<LocalId, usize>,
243 ) {
244 for_local_id(expr, |i| *counts.entry(i).or_insert(0) += 1)
245 }
246
247 /// Visit `LetRec` stages and determine and update type information for `Get` nodes.
248 ///
249 /// This method errors if the scalar type information has changed (number of columns, or types).
250 /// It only refreshes the nullability and unique key information. As this information can regress,
251 /// we do not error if the type weakens, even though that may be something we want to look into.
252 ///
253 /// The method relies on the `analysis::{UniqueKeys, ReprRelationType}` analyses to improve its
254 /// type information for `LetRec` stages.
255 pub(super) fn refresh_types(
256 expr: &mut MirRelationExpr,
257 features: &OptimizerFeatures,
258 ) -> Result<(), crate::TransformError> {
259 // Assemble type information once for the whole expression.
260 use crate::analysis::{
261 DerivedBuilder, ReprRelationType as ReprRelationTypeAnalysis, UniqueKeys,
262 };
263 let mut builder = DerivedBuilder::new(features);
264 builder.require(ReprRelationTypeAnalysis);
265 builder.require(UniqueKeys);
266 let derived = builder.visit(expr);
267 let derived_view = derived.as_view();
268
269 // Collect id -> type mappings.
270 let mut types = BTreeMap::new();
271 let mut todo = vec![(&*expr, derived_view)];
272 while let Some((expr, view)) = todo.pop() {
273 let ids = match expr {
274 MirRelationExpr::Let { id, .. } => std::slice::from_ref(id),
275 MirRelationExpr::LetRec { ids, .. } => ids,
276 _ => &[],
277 };
278 if !ids.is_empty() {
279 // The `skip(1)` skips the `body` child, and is followed by binding children.
280 for (id, view) in ids.iter().rev().zip_eq(view.children_rev().skip(1)) {
281 let repr_cols = view
282 .value::<ReprRelationTypeAnalysis>()
283 .expect("ReprRelationType required")
284 .clone()
285 .expect("Expression not well typed");
286 let keys = view
287 .value::<UniqueKeys>()
288 .expect("UniqueKeys required")
289 .clone();
290 types.insert(*id, ReprRelationType::new(repr_cols).with_keys(keys));
291 }
292 }
293 todo.extend(expr.children().rev().zip_eq(view.children_rev()));
294 }
295
296 // Install the new types in each `Get`.
297 let mut todo = vec![&mut *expr];
298 while let Some(expr) = todo.pop() {
299 if let MirRelationExpr::Get {
300 id: Id::Local(i),
301 typ,
302 ..
303 } = expr
304 {
305 if let Some(new_type) = types.get(i) {
306 // Assert that the column length has not changed.
307 if new_type.column_types.len() != typ.column_types.len() {
308 Err(crate::TransformError::Internal(format!(
309 "column lengths do not match: {:?} v {:?}",
310 new_type.column_types, typ.column_types
311 )))?;
312 }
313 // Assert that the column types have not changed.
314 // NB the ReprScalarType::eq ignores nullability (correctly!)
315 // since record field nullability can legitimately differ between the stored
316 // type and the analysis-recomputed type.
317 // Note: We also want to ignore nullability changes at the top level, but
318 // ReprColumnType has the derived Eq, so that wouldn't ignore nullability, hence
319 // the `.zip_eq(...).all(...)` dance.
320 if !new_type
321 .column_types
322 .iter()
323 .zip_eq(typ.column_types.iter())
324 .all(|(t1, t2)| t1.scalar_type == t2.scalar_type)
325 {
326 Err(crate::TransformError::Internal(format!(
327 "scalar types do not match: {:?} v {:?}",
328 new_type.column_types, typ.column_types
329 )))?;
330 }
331
332 typ.clone_from(new_type);
333 } else {
334 panic!("Type not found for: {:?}", i);
335 }
336 }
337 todo.extend(expr.children_mut());
338 }
339 Ok(())
340 }
341}
342
343mod let_motion {
344
345 use std::collections::{BTreeMap, BTreeSet};
346
347 use itertools::Itertools;
348 use mz_expr::{LetRecLimit, LocalId, MirRelationExpr};
349 use mz_ore::stack::RecursionLimitError;
350
351 use crate::normalize_lets::support::replace_bindings_from_map;
352
353 /// Promotes all `Let` and `LetRec` nodes to the roots of their expressions.
354 ///
355 /// We cannot (without further reasoning) fuse stacked `LetRec` stages, and instead we just promote
356 /// `LetRec` to the roots of their expressions (e.g. as children of another `LetRec` stage).
357 pub(crate) fn promote_let_rec(expr: &mut MirRelationExpr) {
358 // First, promote all `LetRec` nodes above all other nodes.
359 let mut worklist = vec![&mut *expr];
360 while let Some(mut expr) = worklist.pop() {
361 hoist_bindings(expr);
362 while let MirRelationExpr::LetRec { values, body, .. } = expr {
363 worklist.extend(values.iter_mut().rev());
364 expr = body;
365 }
366 }
367
368 // Harvest any potential `Let` nodes, via a post-order traversal.
369 post_order_harvest_lets(expr);
370 }
371
372 /// A stand in for the types of bindings we might encounter.
373 ///
374 /// As we dissolve various `Let` and `LetRec` expressions, a `Binding` will carry
375 /// the relevant information as we hoist it to the root of the expression.
376 enum Binding {
377 // Binding resulting from a `Let` expression.
378 Let(LocalId, MirRelationExpr),
379 // Bindings resulting from a `LetRec` expression.
380 LetRec(Vec<(LocalId, MirRelationExpr, Option<LetRecLimit>)>),
381 }
382
383 /// Hoist all exposed bindings to the root of the expression.
384 ///
385 /// A binding is "exposed" if the path from the root does not cross a LetRec binding.
386 /// After the call, the expression should be a linear sequence of bindings, where each
387 /// `Let` binding is of a let-free expression. There may be `LetRec` expressions in the
388 /// sequence, and their bindings will have hoisted bindings to their root, but not out
389 /// of the binding.
390 fn hoist_bindings(expr: &mut MirRelationExpr) {
391 // Bindings we have extracted but not fully processed.
392 let mut worklist = Vec::new();
393 // Bindings we have extracted and then fully processed.
394 let mut finished = Vec::new();
395
396 extract_bindings(expr, &mut worklist);
397 while let Some(mut bind) = worklist.pop() {
398 match &mut bind {
399 Binding::Let(_id, value) => {
400 extract_bindings(value, &mut worklist);
401 }
402 Binding::LetRec(_binds) => {
403 // nothing to do here; we cannot hoist letrec bindings and refine
404 // them in an outer loop.
405 }
406 }
407 finished.push(bind);
408 }
409
410 // The worklist is empty and finished should contain only LetRec bindings and Let
411 // bindings with let-free expressions bound. We need to re-assemble them now in
412 // the correct order. The identifiers are "sequential", so we should be able to
413 // sort by them, with some care.
414
415 // We only extract non-empty letrec bindings, so it is safe to peek at the first.
416 finished.sort_by_key(|b| match b {
417 Binding::Let(id, _) => *id,
418 Binding::LetRec(binds) => binds[0].0,
419 });
420
421 // To match historical behavior we fuse let bindings into adjacent letrec bindings.
422 // We could alternately make each a singleton letrec binding (just, non-recursive).
423 // We don't yet have a strong opinion on which is most helpful and least harmful.
424 // In the absence of any letrec bindings, we form one to house the let bindings.
425 let mut ids = Vec::new();
426 let mut values = Vec::new();
427 let mut limits = Vec::new();
428 let mut compact = Vec::new();
429 for bind in finished {
430 match bind {
431 Binding::Let(id, value) => {
432 ids.push(id);
433 values.push(value);
434 limits.push(None);
435 }
436 Binding::LetRec(binds) => {
437 for (id, value, limit) in binds {
438 ids.push(id);
439 values.push(value);
440 limits.push(limit);
441 }
442 compact.push((ids, values, limits));
443 ids = Vec::new();
444 values = Vec::new();
445 limits = Vec::new();
446 }
447 }
448 }
449
450 // Remaining bindings can either be fused to the prior letrec, or put in their own.
451 if let Some((last_ids, last_vals, last_lims)) = compact.last_mut() {
452 last_ids.extend(ids);
453 last_vals.extend(values);
454 last_lims.extend(limits);
455 } else if !ids.is_empty() {
456 compact.push((ids, values, limits));
457 }
458
459 while let Some((ids, values, limits)) = compact.pop() {
460 *expr = MirRelationExpr::LetRec {
461 ids,
462 values,
463 limits,
464 body: Box::new(expr.take_dangerous()),
465 };
466 }
467 }
468
469 /// Extracts exposed bindings into `bindings`.
470 ///
471 /// After this call `expr` will contain no let or letrec bindings, though the bindings
472 /// it introduces to `bindings` may themselves contain such bindings (and they should
473 /// be further processed if the goal is to maximally extract let bindings).
474 fn extract_bindings(expr: &mut MirRelationExpr, bindings: &mut Vec<Binding>) {
475 let mut todo = vec![expr];
476 while let Some(expr) = todo.pop() {
477 match expr {
478 MirRelationExpr::Let { id, value, body } => {
479 bindings.push(Binding::Let(*id, value.take_dangerous()));
480 *expr = body.take_dangerous();
481 todo.push(expr);
482 }
483 MirRelationExpr::LetRec {
484 ids,
485 values,
486 limits,
487 body,
488 } => {
489 use itertools::Itertools;
490 let binds: Vec<_> = ids
491 .drain(..)
492 .zip_eq(values.drain(..))
493 .zip_eq(limits.drain(..))
494 .map(|((i, v), l)| (i, v, l))
495 .collect();
496 if !binds.is_empty() {
497 bindings.push(Binding::LetRec(binds));
498 }
499 *expr = body.take_dangerous();
500 todo.push(expr);
501 }
502 _ => {
503 todo.extend(expr.children_mut());
504 }
505 }
506 }
507 }
508
509 /// Performs a post-order traversal of the `LetRec` nodes at the root of an expression.
510 ///
511 /// The traversal is only of the `LetRec` nodes, for which fear of stack exhaustion is nominal.
512 fn post_order_harvest_lets(expr: &mut MirRelationExpr) {
513 if let MirRelationExpr::LetRec {
514 ids,
515 values,
516 limits,
517 body,
518 } = expr
519 {
520 // Only recursively descend through `LetRec` stages.
521 for value in values.iter_mut() {
522 post_order_harvest_lets(value);
523 }
524
525 let mut bindings = BTreeMap::new();
526 for ((id, mut value), max_iter) in ids
527 .drain(..)
528 .zip_eq(values.drain(..))
529 .zip_eq(limits.drain(..))
530 {
531 bindings.extend(harvest_non_recursive(&mut value));
532 bindings.insert(id, (value, max_iter));
533 }
534 bindings.extend(harvest_non_recursive(body));
535 replace_bindings_from_map(bindings, ids, values, limits);
536 }
537 }
538
539 /// Harvest any safe-to-lift non-recursive bindings from a `LetRec`
540 /// expression.
541 ///
542 /// At the moment, we reason that a binding can be lifted without changing
543 /// the output if both:
544 /// 1. It references no other non-lifted binding bound in `expr`,
545 /// 2. It is referenced by no prior non-lifted binding in `expr`.
546 ///
547 /// The rationale is that (1) ensures that the binding's value does not
548 /// change across iterations, and that (2) ensures that all observations of
549 /// the binding are after it assumes its first value, rather than when it
550 /// could be empty.
551 pub(crate) fn harvest_non_recursive(
552 expr: &mut MirRelationExpr,
553 ) -> BTreeMap<LocalId, (MirRelationExpr, Option<LetRecLimit>)> {
554 if let MirRelationExpr::LetRec {
555 ids,
556 values,
557 limits,
558 body,
559 } = expr
560 {
561 // Bindings to lift.
562 let mut lifted = BTreeMap::<LocalId, (MirRelationExpr, Option<LetRecLimit>)>::new();
563 // Bindings to retain.
564 let mut retained = BTreeMap::<LocalId, (MirRelationExpr, Option<LetRecLimit>)>::new();
565
566 // All remaining LocalIds bound by the enclosing LetRec.
567 let mut id_set = ids.iter().cloned().collect::<BTreeSet<LocalId>>();
568 // All LocalIds referenced up to (including) the current binding.
569 let mut cannot = BTreeSet::<LocalId>::new();
570 // The reference count of the current bindings.
571 let mut refcnt = BTreeMap::<LocalId, usize>::new();
572
573 for ((id, value), max_iter) in ids
574 .drain(..)
575 .zip_eq(values.drain(..))
576 .zip_eq(limits.drain(..))
577 {
578 refcnt.clear();
579 super::support::count_local_id_uses(&value, &mut refcnt);
580
581 // LocalIds that have already been referenced cannot be lifted.
582 cannot.extend(refcnt.keys().cloned());
583
584 // - The first conjunct excludes bindings that have already been
585 // referenced.
586 // - The second conjunct excludes bindings that reference a
587 // LocalId that either defined later or is a known retained.
588 if !cannot.contains(&id) && !refcnt.keys().any(|i| id_set.contains(i)) {
589 lifted.insert(id, (value, None)); // Non-recursive bindings don't need a limit
590 id_set.remove(&id);
591 } else {
592 retained.insert(id, (value, max_iter));
593 }
594 }
595
596 replace_bindings_from_map(retained, ids, values, limits);
597 if values.is_empty() {
598 *expr = body.take_dangerous();
599 }
600
601 lifted
602 } else {
603 BTreeMap::new()
604 }
605 }
606
607 /// Harvest any safe-to-lower non-recursive suffix of binding from a
608 /// `LetRec` expression.
609 pub(crate) fn harvest_nonrec_suffix(
610 expr: &mut MirRelationExpr,
611 ) -> Result<BTreeMap<LocalId, MirRelationExpr>, RecursionLimitError> {
612 if let MirRelationExpr::LetRec {
613 ids,
614 values,
615 limits,
616 body,
617 } = expr
618 {
619 // Bindings to lower.
620 let mut lowered = BTreeMap::<LocalId, MirRelationExpr>::new();
621
622 let rec_ids = MirRelationExpr::recursive_ids(ids, values);
623
624 while ids.last().map(|id| !rec_ids.contains(id)).unwrap_or(false) {
625 let id = ids.pop().expect("non-empty ids");
626 let value = values.pop().expect("non-empty values");
627 let _limit = limits.pop().expect("non-empty limits");
628
629 lowered.insert(id, value); // Non-recursive bindings don't need a limit
630 }
631
632 if values.is_empty() {
633 *expr = body.take_dangerous();
634 }
635
636 Ok(lowered)
637 } else {
638 Ok(BTreeMap::new())
639 }
640 }
641
642 pub(crate) fn assert_no_lets(expr: &MirRelationExpr) {
643 expr.visit_pre(|expr| {
644 assert!(!matches!(expr, MirRelationExpr::Let { .. }));
645 });
646 }
647
648 /// Asserts that `expr` in "LetRec-major" form.
649 ///
650 /// This means `expr` is either `LetRec`-free, or a `LetRec` whose values and body are `LetRec`-major.
651 pub(crate) fn assert_letrec_major(expr: &MirRelationExpr) {
652 let mut todo = vec![expr];
653 while let Some(expr) = todo.pop() {
654 match expr {
655 MirRelationExpr::LetRec {
656 ids: _,
657 values,
658 limits: _,
659 body,
660 } => {
661 todo.extend(values.iter());
662 todo.push(body);
663 }
664 _ => {
665 expr.visit_pre(|expr| {
666 assert!(!matches!(expr, MirRelationExpr::LetRec { .. }));
667 });
668 }
669 }
670 }
671 }
672}
673
674mod inlining {
675
676 use std::collections::BTreeMap;
677
678 use itertools::Itertools;
679 use mz_expr::{Id, LetRecLimit, LocalId, MirRelationExpr};
680
681 use crate::normalize_lets::support::replace_bindings_from_map;
682
683 pub(super) fn inline_lets(
684 expr: &mut MirRelationExpr,
685 inline_mfp: bool,
686 ) -> Result<(), crate::TransformError> {
687 let mut worklist = vec![&mut *expr];
688 while let Some(expr) = worklist.pop() {
689 inline_lets_core(expr, inline_mfp)?;
690 // We descend only into `LetRec` nodes, because `promote_let_rec` ensured that all
691 // `LetRec` nodes are clustered near the root. This means that we can get to all the
692 // `LetRec` nodes by just descending into `LetRec` nodes, as there can't be any other
693 // nodes between them.
694 if let MirRelationExpr::LetRec {
695 ids: _,
696 values,
697 limits: _,
698 body,
699 } = expr
700 {
701 worklist.extend(values);
702 worklist.push(body);
703 }
704 }
705 Ok(())
706 }
707
708 /// Considers inlining actions to perform for a sequence of bindings and a
709 /// following body.
710 ///
711 /// A let binding may be inlined only in subsequent bindings or in the body;
712 /// other bindings should not "immediately" observe the binding, as that
713 /// would be a change to the semantics of `LetRec`. For example, it would
714 /// not be correct to replace `C` with `A` in the definition of `B` here:
715 /// ```ignore
716 /// let A = ...;
717 /// let B = A - C;
718 /// let C = A;
719 /// ```
720 /// The explanation is that `B` should always be the difference between the
721 /// current and previous `A`, and that the substitution of `C` would instead
722 /// make it always zero, changing its definition.
723 ///
724 /// Here a let binding is proposed for inlining if any of the following is true:
725 /// 1. It has a single reference across all bindings and the body.
726 /// 2. It is a "sufficiently simple" `Get`, determined in part by the
727 /// `inline_mfp` argument.
728 ///
729 /// We don't need extra checks for `limits`, because
730 /// - `limits` is only relevant when a binding is directly used through a back edge (because
731 /// that is where the rendering puts the `limits` check);
732 /// - when a binding is directly used through a back edge, it can't be inlined anyway.
733 /// - Also note that if a `LetRec` completely disappears at the end of `inline_lets_core`, then
734 /// there was no recursion in it.
735 ///
736 /// The case of `Constant` binding is handled here (as opposed to
737 /// `FoldConstants`) in a somewhat limited manner (see database-issues#5346). Although a
738 /// bit weird, constants should also not be inlined into prior bindings as
739 /// this does change the behavior from one where the collection is initially
740 /// empty to one where it is always the constant.
741 ///
742 /// Having inlined bindings, many of them may now be dead (with no
743 /// transitive references from `body`). These can now be removed. They may
744 /// not be exactly those bindings that were inlineable, as we may not always
745 /// be able to apply inlining due to ordering (we cannot inline a binding
746 /// into one that is not strictly later).
747 pub(super) fn inline_lets_core(
748 expr: &mut MirRelationExpr,
749 inline_mfp: bool,
750 ) -> Result<(), crate::TransformError> {
751 if let MirRelationExpr::LetRec {
752 ids,
753 values,
754 limits,
755 body,
756 } = expr
757 {
758 // Count the number of uses of each local id across all expressions.
759 let mut counts = BTreeMap::new();
760 for value in values.iter() {
761 super::support::count_local_id_uses(value, &mut counts);
762 }
763 super::support::count_local_id_uses(body, &mut counts);
764
765 // Each binding can reach one of three positions on its inlineability:
766 // 1. The binding is used once and is available to be directly taken.
767 // 2. The binding is simple enough that it can just be cloned.
768 // 3. The binding is not available for inlining.
769 let mut inline_offers = BTreeMap::new();
770
771 // Each binding may require the expiration of prior inlining offers.
772 // This occurs when an inlined body references the prior iterate of a binding,
773 // and inlining it would change the meaning to be the current iterate.
774 // Roughly, all inlining offers expire just after the binding of the least
775 // identifier they contain that is greater than the bound identifier itself.
776 let mut expire_offers = BTreeMap::new();
777 let mut expired_offers = Vec::new();
778
779 // For each binding, inline `Get`s and then determine if *it* should be inlined.
780 // It is important that we do the substitution in-order and before reasoning
781 // about the inlineability of each binding, to ensure that our conclusion about
782 // the inlineability of a binding stays put. Specifically,
783 // 1. by going in order no substitution will increase the `Get`-count of an
784 // identifier beyond one, as all in values with strictly greater identifiers.
785 // 2. by performing the substitution before reasoning, the structure of the value
786 // as it would be substituted is fixed.
787 for ((id, mut expr), max_iter) in ids
788 .drain(..)
789 .zip_eq(values.drain(..))
790 .zip_eq(limits.drain(..))
791 {
792 // Substitute any appropriate prior let bindings.
793 inline_lets_helper(&mut expr, &mut inline_offers)?;
794
795 // Determine the first `id'` at which any inlining offer must expire.
796 // An inlining offer expires because it references an `id'` that is not yet bound,
797 // indicating a reference to the *prior* iterate of that identifier. Inlining the
798 // expression once `id'` becomes bound would advance the reference to be the
799 // *current* iterate of the identifier.
800 MirRelationExpr::collect_expirations(id, &expr, &mut expire_offers);
801
802 // Gets for `id` only occur in later expressions, so this should still be correct.
803 let num_gets = counts.get(&id).map(|x| *x).unwrap_or(0);
804 // Counts of zero or one lead to substitution; otherwise certain simple structures
805 // are cloned in to `Get` operators, and all others emitted as `Let` bindings.
806 if num_gets == 0 {
807 } else if num_gets == 1 {
808 inline_offers.insert(id, InlineOffer::Take(Some(expr), max_iter));
809 } else {
810 let clone_binding = {
811 let stripped_value = if inline_mfp {
812 mz_expr::MapFilterProject::extract_non_errors_from_expr(&expr).1
813 } else {
814 &expr
815 };
816 match stripped_value {
817 MirRelationExpr::Get { .. } | MirRelationExpr::Constant { .. } => true,
818 _ => false,
819 }
820 };
821
822 if clone_binding {
823 inline_offers.insert(id, InlineOffer::Clone(expr, max_iter));
824 } else {
825 inline_offers.insert(id, InlineOffer::Unavailable(expr, max_iter));
826 }
827 }
828
829 // We must now discard any offers that reference `id`, as it is no longer correct
830 // to inline such an offer as it would have access to this iteration's binding of
831 // `id` rather than the prior iteration's binding of `id`.
832 expired_offers.extend(MirRelationExpr::do_expirations(
833 id,
834 &mut expire_offers,
835 &mut inline_offers,
836 ));
837 }
838 // Complete the inlining in `body`.
839 inline_lets_helper(body, &mut inline_offers)?;
840
841 // Re-introduce expired offers for the subsequent logic that expects to see them all.
842 for (id, offer) in expired_offers.into_iter() {
843 inline_offers.insert(id, offer);
844 }
845
846 // We may now be able to discard some of `inline_offer` based on the remaining pattern of `Get` expressions.
847 // Starting from `body` and working backwards, we can activate bindings that are still required because we
848 // observe `Get` expressions referencing them. Any bindings not so identified can be dropped (including any
849 // that may be part of a cycle not reachable from `body`).
850 let mut let_bindings = BTreeMap::new();
851 let mut todo = Vec::new();
852 super::support::for_local_id(body, |id| todo.push(id));
853 while let Some(id) = todo.pop() {
854 if let Some(offer) = inline_offers.remove(&id) {
855 let (value, max_iter) = match offer {
856 InlineOffer::Take(value, max_iter) => (
857 value.ok_or_else(|| {
858 crate::TransformError::Internal(
859 "Needed value already taken".to_string(),
860 )
861 })?,
862 max_iter,
863 ),
864 InlineOffer::Clone(value, max_iter) => (value, max_iter),
865 InlineOffer::Unavailable(value, max_iter) => (value, max_iter),
866 };
867 super::support::for_local_id(&value, |id| todo.push(id));
868 let_bindings.insert(id, (value, max_iter));
869 }
870 }
871
872 // If bindings remain we update the `LetRec`, otherwise we remove it.
873 if !let_bindings.is_empty() {
874 replace_bindings_from_map(let_bindings, ids, values, limits);
875 } else {
876 *expr = body.take_dangerous();
877 }
878 }
879 Ok(())
880 }
881
882 /// Possible states of let binding inlineability.
883 enum InlineOffer {
884 /// There is a unique reference to this value and given the option it should take this expression.
885 Take(Option<MirRelationExpr>, Option<LetRecLimit>),
886 /// Any reference to this value should clone this expression.
887 Clone(MirRelationExpr, Option<LetRecLimit>),
888 /// Any reference to this value should do no inlining of it.
889 Unavailable(MirRelationExpr, Option<LetRecLimit>),
890 }
891
892 /// Substitute `Get{id}` expressions for any proposed expressions.
893 ///
894 /// The proposed expressions can be proposed either to be taken or cloned.
895 fn inline_lets_helper(
896 expr: &mut MirRelationExpr,
897 inline_offer: &mut BTreeMap<LocalId, InlineOffer>,
898 ) -> Result<(), crate::TransformError> {
899 let mut worklist = vec![expr];
900 while let Some(expr) = worklist.pop() {
901 if let MirRelationExpr::Get {
902 id: Id::Local(id), ..
903 } = expr
904 {
905 if let Some(offer) = inline_offer.get_mut(id) {
906 // It is important that we *not* continue to iterate
907 // on the contents of `offer`, which has already been
908 // maximally inlined. If we did, we could mis-inline
909 // bindings into bodies that precede them, which would
910 // change the semantics of the expression.
911 match offer {
912 InlineOffer::Take(value, _max_iter) => {
913 *expr = value.take().ok_or_else(|| {
914 crate::TransformError::Internal(format!(
915 "Value already taken for {:?}",
916 id
917 ))
918 })?;
919 }
920 InlineOffer::Clone(value, _max_iter) => {
921 *expr = value.clone();
922 }
923 InlineOffer::Unavailable(_, _) => {
924 // Do nothing.
925 }
926 }
927 } else {
928 // Presumably a reference to an outer scope.
929 }
930 } else {
931 worklist.extend(expr.children_mut().rev());
932 }
933 }
934 Ok(())
935 }
936}
937
938mod renumbering {
939
940 use std::collections::BTreeMap;
941
942 use itertools::Itertools;
943 use mz_expr::{Id, LocalId, MirRelationExpr};
944 use mz_ore::id_gen::IdGen;
945
946 /// Re-assign an identifier to each `Let`.
947 ///
948 /// Under the assumption that `id_gen` produces identifiers in order, this process
949 /// maintains in-orderness of `LetRec` identifiers.
950 pub fn renumber_bindings(
951 relation: &mut MirRelationExpr,
952 id_gen: &mut IdGen,
953 ) -> Result<(), crate::TransformError> {
954 let mut renaming = BTreeMap::new();
955 determine(&*relation, &mut renaming, id_gen)?;
956 implement(relation, &renaming)?;
957 Ok(())
958 }
959
960 /// Performs an in-order traversal of the AST, assigning identifiers as it goes.
961 fn determine(
962 relation: &MirRelationExpr,
963 remap: &mut BTreeMap<LocalId, LocalId>,
964 id_gen: &mut IdGen,
965 ) -> Result<(), crate::TransformError> {
966 // The stack contains pending work as `Result<LocalId, &MirRelationExpr>`, where
967 // 1. 'Ok(id)` means the identifier `id` is ready for renumbering,
968 // 2. `Err(expr)` means that the expression `expr` needs to be further processed.
969 let mut stack: Vec<Result<LocalId, _>> = vec![Err(relation)];
970 while let Some(action) = stack.pop() {
971 match action {
972 Ok(id) => {
973 if remap.contains_key(&id) {
974 Err(crate::TransformError::Internal(format!(
975 "Shadowing of let binding for {:?}",
976 id
977 )))?;
978 } else {
979 remap.insert(id, LocalId::new(id_gen.allocate_id()));
980 }
981 }
982 Err(expr) => match expr {
983 MirRelationExpr::Let { id, value, body } => {
984 stack.push(Err(body));
985 stack.push(Ok(*id));
986 stack.push(Err(value));
987 }
988 MirRelationExpr::LetRec {
989 ids,
990 values,
991 limits: _,
992 body,
993 } => {
994 stack.push(Err(body));
995 for (id, value) in ids.iter().rev().zip_eq(values.iter().rev()) {
996 stack.push(Ok(*id));
997 stack.push(Err(value));
998 }
999 }
1000 _ => {
1001 stack.extend(expr.children().rev().map(Err));
1002 }
1003 },
1004 }
1005 }
1006 Ok(())
1007 }
1008
1009 fn implement(
1010 relation: &mut MirRelationExpr,
1011 remap: &BTreeMap<LocalId, LocalId>,
1012 ) -> Result<(), crate::TransformError> {
1013 let mut worklist = vec![relation];
1014 while let Some(expr) = worklist.pop() {
1015 match expr {
1016 MirRelationExpr::Let { id, .. } => {
1017 *id = *remap
1018 .get(id)
1019 .ok_or(crate::TransformError::IdentifierMissing(*id))?;
1020 }
1021 MirRelationExpr::LetRec { ids, .. } => {
1022 for id in ids.iter_mut() {
1023 *id = *remap
1024 .get(id)
1025 .ok_or(crate::TransformError::IdentifierMissing(*id))?;
1026 }
1027 }
1028 MirRelationExpr::Get {
1029 id: Id::Local(id), ..
1030 } => {
1031 *id = *remap
1032 .get(id)
1033 .ok_or(crate::TransformError::IdentifierMissing(*id))?;
1034 }
1035 _ => {
1036 // Remapped identifiers not used in these patterns.
1037 }
1038 }
1039 // The order is not critical, but behave as a stack for clarity.
1040 worklist.extend(expr.children_mut().rev());
1041 }
1042 Ok(())
1043 }
1044}