misc.python.materialize.mzbuild

The implementation of the mzbuild system for Docker images.

For an overview of what mzbuild is and why it exists, see the user-facing documentation.

   1# Copyright Materialize, Inc. and contributors. All rights reserved.
   2#
   3# Use of this software is governed by the Business Source License
   4# included in the LICENSE file at the root of this repository.
   5#
   6# As of the Change Date specified in that file, in accordance with
   7# the Business Source License, use of this software will be governed
   8# by the Apache License, Version 2.0.
   9
  10"""The implementation of the mzbuild system for Docker images.
  11
  12For an overview of what mzbuild is and why it exists, see the [user-facing
  13documentation][user-docs].
  14
  15[user-docs]: https://github.com/MaterializeInc/materialize/blob/main/doc/developer/mzbuild.md
  16"""
  17
  18import argparse
  19import base64
  20import collections
  21import fcntl
  22import hashlib
  23import io
  24import json
  25import multiprocessing
  26import os
  27import platform
  28import re
  29import selectors
  30import shutil
  31import stat
  32import subprocess
  33import sys
  34import time
  35from collections import OrderedDict
  36from collections.abc import Callable, Iterable, Iterator, Sequence
  37from concurrent.futures import ThreadPoolExecutor, as_completed
  38from enum import Enum, auto
  39from functools import cache
  40from pathlib import Path
  41from tempfile import TemporaryFile
  42from threading import Lock
  43from typing import IO, Any, cast
  44
  45import requests
  46import yaml
  47from requests.auth import HTTPBasicAuth
  48
  49from materialize import MZ_ROOT, buildkite, cargo, git, rustc_flags, spawn, ui, xcompile
  50from materialize.docker import image_registry
  51from materialize.rustc_flags import Sanitizer
  52from materialize.xcompile import Arch, target
  53
  54GHCR_PREFIX = "ghcr.io/materializeinc/"
  55
  56
  57class RustIncrementalBuildFailure(Exception):
  58    pass
  59
  60
  61def run_and_detect_rust_incremental_build_failure(
  62    cmd: list[str],
  63    cwd: str | Path,
  64    env: dict[str, str] | None = None,
  65) -> subprocess.CompletedProcess:
  66    """This function is complex since it prints out each line immediately to
  67    stdout/stderr, but still records them at the same time so that we can scan
  68    for known incremental build failures."""
  69    stdout_result = io.StringIO()
  70    stderr_result = io.StringIO()
  71    base_env = env if env is not None else os.environ
  72    p = subprocess.Popen(
  73        cmd,
  74        stdout=subprocess.PIPE,
  75        stderr=subprocess.PIPE,
  76        text=True,
  77        bufsize=1,
  78        env={**base_env, "CARGO_TERM_COLOR": "always", "RUSTC_COLOR": "always"},
  79    )
  80
  81    sel = selectors.DefaultSelector()
  82    sel.register(p.stdout, selectors.EVENT_READ)  # type: ignore
  83    sel.register(p.stderr, selectors.EVENT_READ)  # type: ignore
  84    assert p.stdout is not None
  85    assert p.stderr is not None
  86    os.set_blocking(p.stdout.fileno(), False)
  87    os.set_blocking(p.stderr.fileno(), False)
  88    running = True
  89    while running:
  90        for key, val in sel.select():
  91            output = io.StringIO()
  92            running = False
  93            while True:
  94                new_output = key.fileobj.read(1024)  # type: ignore
  95                if not new_output:
  96                    break
  97                output.write(new_output)
  98            contents = output.getvalue()
  99            output.close()
 100            if not contents:
 101                continue
 102            # Keep running as long as stdout or stderr have any content
 103            running = True
 104            if key.fileobj is p.stdout:
 105                print(
 106                    contents,
 107                    end="",
 108                    flush=True,
 109                )
 110                stdout_result.write(contents)
 111            else:
 112                print(
 113                    contents,
 114                    end="",
 115                    file=sys.stderr,
 116                    flush=True,
 117                )
 118                stderr_result.write(contents)
 119    p.wait()
 120    retcode = p.poll()
 121    assert retcode is not None
 122    stdout_contents = stdout_result.getvalue()
 123    stdout_result.close()
 124    stderr_contents = stderr_result.getvalue()
 125    stderr_result.close()
 126    if retcode:
 127        incremental_build_failure_msgs = [
 128            "panicked at compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs",
 129            "Found unstable fingerprints for",
 130            "ld.lld: error: undefined symbol",
 131            "signal: 11, SIGSEGV",
 132        ]
 133        combined = stdout_contents + stderr_contents
 134        if any(msg in combined for msg in incremental_build_failure_msgs):
 135            raise RustIncrementalBuildFailure()
 136
 137        raise subprocess.CalledProcessError(
 138            retcode, p.args, output=stdout_contents, stderr=stderr_contents
 139        )
 140    return subprocess.CompletedProcess(
 141        p.args, retcode, stdout_contents, stderr_contents
 142    )
 143
 144
 145class Fingerprint(bytes):
 146    """A SHA-1 hash of the inputs to an `Image`.
 147
 148    The string representation uses base32 encoding to distinguish mzbuild
 149    fingerprints from Git's hex encoded SHA-1 hashes while still being
 150    URL safe.
 151    """
 152
 153    def __str__(self) -> str:
 154        return base64.b32encode(self).decode()
 155
 156
 157class Profile(Enum):
 158    RELEASE = auto()
 159    OPTIMIZED = auto()
 160    DEV = auto()
 161
 162
 163class RepositoryDetails:
 164    """Immutable details about a `Repository`.
 165
 166    Used internally by mzbuild.
 167
 168    Attributes:
 169        root: The path to the root of the repository.
 170        arch: The CPU architecture to build for.
 171        profile: What profile the repository is being built with.
 172        coverage: Whether the repository has code coverage instrumentation
 173            enabled.
 174        sanitizer: Whether to use a sanitizer (address, hwaddress, cfi, thread, leak, memory, none)
 175        cargo_workspace: The `cargo.Workspace` associated with the repository.
 176        image_registry: The Docker image registry to pull images from and push
 177            images to.
 178        image_prefix: A prefix to apply to all Docker image names.
 179    """
 180
 181    def __init__(
 182        self,
 183        root: Path,
 184        arch: Arch,
 185        profile: Profile,
 186        coverage: bool,
 187        sanitizer: Sanitizer,
 188        image_registry: str,
 189        image_prefix: str,
 190    ):
 191        self.root = root
 192        self.arch = arch
 193        self.profile = profile
 194        self.coverage = coverage
 195        self.sanitizer = sanitizer
 196        self.cargo_workspace = cargo.Workspace(root)
 197        self.image_registry = image_registry
 198        self.image_prefix = image_prefix
 199
 200    def build(
 201        self,
 202        subcommand: str,
 203        rustflags: list[str],
 204        channel: str | None = None,
 205        extra_env: dict[str, str] = {},
 206    ) -> list[str]:
 207        """Start a build invocation for the configured architecture."""
 208        return xcompile.cargo(
 209            arch=self.arch,
 210            channel=channel,
 211            subcommand=subcommand,
 212            rustflags=rustflags,
 213            extra_env=extra_env,
 214        )
 215
 216    def tool(self, name: str) -> list[str]:
 217        """Start a binutils tool invocation for the configured architecture."""
 218        if platform.system() != "Linux":
 219            # We can't use the local tools from macOS to build a Linux executable
 220            return ["bin/ci-builder", "run", "stable", name]
 221        # If we're on Linux, trust that the tools are installed instead of
 222        # loading the slow ci-builder. If you don't have compilation tools
 223        # installed you can still run `bin/ci-builder run stable
 224        # bin/mzcompose ...`, and most likely the Cargo build will already
 225        # fail earlier if you don't have compilation tools installed and
 226        # run without the ci-builder.
 227        return [name]
 228
 229    def cargo_target_dir(self) -> Path:
 230        """Determine the path to the target directory for Cargo."""
 231        return self.root / "target-xcompile" / xcompile.target(self.arch)
 232
 233    def rewrite_builder_path_for_host(self, path: Path) -> Path:
 234        """Rewrite a path that is relative to the target directory inside the
 235        builder to a path that is relative to the target directory on the host.
 236
 237        If path does is not relative to the target directory inside the builder,
 238        it is returned unchanged.
 239        """
 240        builder_target_dir = Path("/mnt/build") / xcompile.target(self.arch)
 241        try:
 242            return self.cargo_target_dir() / path.relative_to(builder_target_dir)
 243        except ValueError:
 244            return path
 245
 246
 247@cache
 248def docker_images() -> frozenset[str]:
 249    """List the Docker images available on the local machine."""
 250    return frozenset(
 251        spawn.capture(["docker", "images", "--format", "{{.Repository}}:{{.Tag}}"])
 252        .strip()
 253        .split("\n")
 254    )
 255
 256
 257KNOWN_DOCKER_IMAGES_FILE = Path(MZ_ROOT / "known-docker-images.txt")
 258_known_docker_images: set[str] | None = None
 259_known_docker_images_lock = Lock()
 260
 261
 262def is_docker_image_pushed(name: str) -> bool:
 263    """Check whether the named image is pushed to Docker Hub.
 264
 265    Note that this operation requires a rather slow network request.
 266    """
 267    global _known_docker_images
 268
 269    if _known_docker_images is None:
 270        with _known_docker_images_lock:
 271            if not KNOWN_DOCKER_IMAGES_FILE.exists():
 272                _known_docker_images = set()
 273            else:
 274                with KNOWN_DOCKER_IMAGES_FILE.open() as f:
 275                    _known_docker_images = set(line.strip() for line in f)
 276
 277    if name in _known_docker_images:
 278        return True
 279
 280    if ":" not in name:
 281        image, tag = name, "latest"
 282    else:
 283        image, tag = name.rsplit(":", 1)
 284
 285    dockerhub_username = os.getenv("DOCKERHUB_USERNAME")
 286    dockerhub_token = os.getenv("DOCKERHUB_ACCESS_TOKEN")
 287
 288    exists: bool = False
 289
 290    try:
 291        if dockerhub_username and dockerhub_token:
 292            response = requests.head(
 293                f"https://registry-1.docker.io/v2/{image}/manifests/{tag}",
 294                headers={
 295                    "Accept": "application/vnd.docker.distribution.manifest.v2+json",
 296                },
 297                auth=HTTPBasicAuth(dockerhub_username, dockerhub_token),
 298                timeout=10,
 299            )
 300        else:
 301            token = requests.get(
 302                "https://auth.docker.io/token",
 303                params={
 304                    "service": "registry.docker.io",
 305                    "scope": f"repository:{image}:pull",
 306                },
 307                timeout=10,
 308            ).json()["token"]
 309            response = requests.head(
 310                f"https://registry-1.docker.io/v2/{image}/manifests/{tag}",
 311                headers={
 312                    "Accept": "application/vnd.docker.distribution.manifest.v2+json",
 313                    "Authorization": f"Bearer {token}",
 314                },
 315                timeout=10,
 316            )
 317
 318        if response.status_code in (401, 429, 500, 502, 503, 504):
 319            # Fall back to 5x slower method
 320            proc = subprocess.run(
 321                ["docker", "manifest", "inspect", name],
 322                stdout=subprocess.DEVNULL,
 323                stderr=subprocess.DEVNULL,
 324                env=dict(os.environ, DOCKER_CLI_EXPERIMENTAL="enabled"),
 325            )
 326            exists = proc.returncode == 0
 327        else:
 328            exists = response.status_code == 200
 329
 330    except Exception as e:
 331        print(f"Error checking Docker image: {e}")
 332        return False
 333
 334    if exists:
 335        with _known_docker_images_lock:
 336            _known_docker_images.add(name)
 337            with KNOWN_DOCKER_IMAGES_FILE.open("a") as f:
 338                print(name, file=f)
 339
 340    return exists
 341
 342
 343def is_ghcr_image_pushed(name: str) -> bool:
 344    global _known_docker_images
 345
 346    if _known_docker_images is None:
 347        with _known_docker_images_lock:
 348            if not KNOWN_DOCKER_IMAGES_FILE.exists():
 349                _known_docker_images = set()
 350            else:
 351                with KNOWN_DOCKER_IMAGES_FILE.open() as f:
 352                    _known_docker_images = set(line.strip() for line in f)
 353
 354    name_without_ghcr = name.removeprefix("ghcr.io/")
 355    if name in _known_docker_images:
 356        return True
 357
 358    if ":" not in name_without_ghcr:
 359        image, tag = name_without_ghcr, "latest"
 360    else:
 361        image, tag = name_without_ghcr.rsplit(":", 1)
 362
 363    exists: bool = False
 364
 365    try:
 366        token = requests.get(
 367            "https://ghcr.io/token",
 368            params={
 369                "scope": f"repository:{image}:pull",
 370            },
 371            timeout=10,
 372        ).json()["token"]
 373        response = requests.head(
 374            f"https://ghcr.io/v2/{image}/manifests/{tag}",
 375            headers={"Authorization": f"Bearer {token}"},
 376            timeout=10,
 377        )
 378
 379        if response.status_code in (401, 429, 500, 502, 503, 504):
 380            # Fall back to 5x slower method
 381            proc = subprocess.run(
 382                ["docker", "manifest", "inspect", name],
 383                stdout=subprocess.DEVNULL,
 384                stderr=subprocess.DEVNULL,
 385                env=dict(os.environ, DOCKER_CLI_EXPERIMENTAL="enabled"),
 386            )
 387            exists = proc.returncode == 0
 388        else:
 389            exists = response.status_code == 200
 390
 391    except Exception as e:
 392        print(f"Error checking Docker image: {e}")
 393        return False
 394
 395    if exists:
 396        with _known_docker_images_lock:
 397            _known_docker_images.add(name)
 398            with KNOWN_DOCKER_IMAGES_FILE.open("a") as f:
 399                print(name, file=f)
 400
 401    return exists
 402
 403
 404def chmod_x(path: Path) -> None:
 405    """Set the executable bit on a file or directory."""
 406    # https://stackoverflow.com/a/30463972/1122351
 407    mode = os.stat(path).st_mode
 408    mode |= (mode & 0o444) >> 2  # copy R bits to X
 409    os.chmod(path, mode)
 410
 411
 412class PreImage:
 413    """An action to run before building a Docker image.
 414
 415    Args:
 416        rd: The `RepositoryDetails` for the repository.
 417        path: The path to the `Image` associated with this action.
 418    """
 419
 420    def __init__(self, rd: RepositoryDetails, path: Path):
 421        self.rd = rd
 422        self.path = path
 423
 424    @classmethod
 425    def prepare_batch(cls, instances: list["PreImage"]) -> Any:
 426        """Prepare a batch of actions.
 427
 428        This is useful for `PreImage` actions that are more efficient when
 429        their actions are applied to several images in bulk.
 430
 431        Returns an arbitrary output that is passed to `PreImage.run`.
 432        """
 433        pass
 434
 435    def run(self, prep: Any) -> None:
 436        """Perform the action.
 437
 438        Args:
 439            prep: Any prep work returned by `prepare_batch`.
 440        """
 441        pass
 442
 443    def inputs(self) -> set[str]:
 444        """Return the files which are considered inputs to the action."""
 445        raise NotImplementedError
 446
 447    def extra(self) -> str:
 448        """Returns additional data for incorporation in the fingerprint."""
 449        return ""
 450
 451
 452class Copy(PreImage):
 453    """A `PreImage` action which copies files from a directory.
 454
 455    See doc/developer/mzbuild.md for an explanation of the user-facing
 456    parameters.
 457    """
 458
 459    def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]):
 460        super().__init__(rd, path)
 461
 462        self.source = config.pop("source", None)
 463        if self.source is None:
 464            raise ValueError("mzbuild config is missing 'source' argument")
 465
 466        self.destination = config.pop("destination", None)
 467        if self.destination is None:
 468            raise ValueError("mzbuild config is missing 'destination' argument")
 469
 470        self.matching = config.pop("matching", "*")
 471
 472    def run(self, prep: Any) -> None:
 473        super().run(prep)
 474        for src in self.inputs():
 475            dst = self.path / self.destination / src
 476            dst.parent.mkdir(parents=True, exist_ok=True)
 477            shutil.copy(self.rd.root / self.source / src, dst)
 478
 479    def inputs(self) -> set[str]:
 480        return set(git.expand_globs(self.rd.root / self.source, self.matching))
 481
 482
 483class CargoPreImage(PreImage):
 484    """A `PreImage` action that uses Cargo."""
 485
 486    @staticmethod
 487    @cache
 488    def _cargo_shared_inputs() -> frozenset[str]:
 489        """Resolve shared Cargo inputs once and cache the result.
 490
 491        This expands the 'ci/builder' directory glob and filters out
 492        non-existent files like '.cargo/config', avoiding repeated
 493        git subprocess calls in fingerprint().
 494        """
 495        inputs: set[str] = set()
 496        inputs |= git.expand_globs(Path("."), "ci/builder/**")
 497        inputs.add("Cargo.toml")
 498        inputs.add("Cargo.lock")
 499        if Path(".cargo/config").exists():
 500            inputs.add(".cargo/config")
 501        return frozenset(inputs)
 502
 503    def inputs(self) -> set[str]:
 504        return set(CargoPreImage._cargo_shared_inputs())
 505
 506    def extra(self) -> str:
 507        # Cargo images depend on the release mode and whether
 508        # coverage/sanitizer is enabled.
 509        flags: list[str] = []
 510        if self.rd.profile == Profile.RELEASE:
 511            flags += "release"
 512        if self.rd.profile == Profile.OPTIMIZED:
 513            flags += "optimized"
 514        if self.rd.coverage:
 515            flags += "coverage"
 516        if self.rd.sanitizer != Sanitizer.none:
 517            flags += self.rd.sanitizer.value
 518        flags.sort()
 519        return ",".join(flags)
 520
 521
 522class CargoBuild(CargoPreImage):
 523    """A `PreImage` action that builds a single binary with Cargo.
 524
 525    See doc/developer/mzbuild.md for an explanation of the user-facing
 526    parameters.
 527    """
 528
 529    def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]):
 530        super().__init__(rd, path)
 531        bin = config.pop("bin", [])
 532        self.bins = bin if isinstance(bin, list) else [bin]
 533        example = config.pop("example", [])
 534        self.examples = example if isinstance(example, list) else [example]
 535        self.strip = config.pop("strip", True)
 536        self.extract = config.pop("extract", {})
 537        features = config.pop("features", [])
 538        self.features = features if isinstance(features, list) else [features]
 539
 540        if len(self.bins) == 0 and len(self.examples) == 0:
 541            raise ValueError("mzbuild config is missing pre-build target")
 542
 543    @staticmethod
 544    def generate_cargo_build_command(
 545        rd: RepositoryDetails,
 546        bins: list[str],
 547        examples: list[str],
 548        features: list[str] | None = None,
 549    ) -> list[str]:
 550        rustflags = (
 551            rustc_flags.coverage
 552            if rd.coverage
 553            else (
 554                rustc_flags.sanitizer[rd.sanitizer]
 555                if rd.sanitizer != Sanitizer.none
 556                else ["--cfg=tokio_unstable"]
 557            )
 558        )
 559        cflags = (
 560            [
 561                f"--target={target(rd.arch)}",
 562                f"--gcc-toolchain=/opt/x-tools/{target(rd.arch)}/",
 563                "-fuse-ld=lld",
 564                f"--sysroot=/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/sysroot",
 565                f"-L/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/lib64",
 566            ]
 567            + rustc_flags.sanitizer_cflags[rd.sanitizer]
 568            if rd.sanitizer != Sanitizer.none
 569            else []
 570        )
 571        extra_env = (
 572            {
 573                "CFLAGS": " ".join(cflags),
 574                "CXXFLAGS": " ".join(cflags),
 575                "LDFLAGS": " ".join(cflags),
 576                "CXXSTDLIB": "stdc++",
 577                "CC": "cc",
 578                "CXX": "c++",
 579                "CPP": "clang-cpp-18",
 580                "CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER": "cc",
 581                "CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER": "cc",
 582                "PATH": f"/sanshim:/opt/x-tools/{target(rd.arch)}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
 583                "TSAN_OPTIONS": "report_bugs=0",  # build-scripts fail
 584            }
 585            if rd.sanitizer != Sanitizer.none
 586            else {}
 587        )
 588
 589        cargo_build = rd.build(
 590            "build", channel=None, rustflags=rustflags, extra_env=extra_env
 591        )
 592
 593        packages = set()
 594        for bin in bins:
 595            cargo_build.extend(["--bin", bin])
 596            packages.add(rd.cargo_workspace.crate_for_bin(bin).name)
 597        for example in examples:
 598            cargo_build.extend(["--example", example])
 599            packages.add(rd.cargo_workspace.crate_for_example(example).name)
 600        cargo_build.extend(f"--package={p}" for p in packages)
 601
 602        if rd.profile == Profile.RELEASE:
 603            cargo_build.append("--release")
 604        if rd.profile == Profile.OPTIMIZED:
 605            cargo_build.extend(["--profile", "optimized"])
 606        if rd.sanitizer != Sanitizer.none:
 607            # ASan doesn't work with jemalloc
 608            cargo_build.append("--no-default-features")
 609            # Uses more memory, so reduce the number of jobs
 610            cargo_build.extend(
 611                ["--jobs", str(round(multiprocessing.cpu_count() * 2 / 3))]
 612            )
 613        if features:
 614            cargo_build.append(f"--features={','.join(features)}")
 615
 616        return cargo_build
 617
 618    @classmethod
 619    def prepare_batch(cls, instances: list["PreImage"]) -> dict[str, Any]:
 620        super().prepare_batch(instances)
 621
 622        if not instances:
 623            return {}
 624
 625        # Building all binaries and examples in the same `cargo build` command
 626        # allows Cargo to link in parallel with other work, which can
 627        # meaningfully speed up builds.
 628
 629        rd: RepositoryDetails | None = None
 630        builds = cast(list[CargoBuild], instances)
 631        bins = set()
 632        examples = set()
 633        features = set()
 634        for build in builds:
 635            if not rd:
 636                rd = build.rd
 637            bins.update(build.bins)
 638            examples.update(build.examples)
 639            features.update(build.features)
 640        assert rd
 641
 642        ui.section(f"Common build for: {', '.join(bins | examples)}")
 643
 644        cargo_build = cls.generate_cargo_build_command(
 645            rd, list(bins), list(examples), list(features) if features else None
 646        )
 647
 648        run_and_detect_rust_incremental_build_failure(cargo_build, cwd=rd.root)
 649
 650        # Re-run with JSON-formatted messages and capture the output so we can
 651        # later analyze the build artifacts in `run`. This should be nearly
 652        # instantaneous since we just compiled above with the same crates and
 653        # features. (We don't want to do the compile above with JSON-formatted
 654        # messages because it wouldn't be human readable.)
 655        json_output = spawn.capture(
 656            cargo_build + ["--message-format=json"],
 657            cwd=rd.root,
 658        )
 659        prep = {"cargo": json_output}
 660
 661        return prep
 662
 663    def build(self, build_output: dict[str, Any]) -> None:
 664        cargo_profile = (
 665            "release"
 666            if self.rd.profile == Profile.RELEASE
 667            else "optimized" if self.rd.profile == Profile.OPTIMIZED else "debug"
 668        )
 669
 670        def copy(src: Path, relative_dst: Path) -> None:
 671            exe_path = self.path / relative_dst
 672            exe_path.parent.mkdir(parents=True, exist_ok=True)
 673            shutil.copy(src, exe_path)
 674
 675            if self.strip:
 676                # The debug information is large enough that it slows down CI,
 677                # since we're packaging these binaries up into Docker images and
 678                # shipping them around.
 679                spawn.runv(
 680                    [*self.rd.tool("strip"), "--strip-debug", exe_path],
 681                    cwd=self.rd.root,
 682                )
 683            else:
 684                # Even if we've been asked not to strip the binary, remove the
 685                # `.debug_pubnames` and `.debug_pubtypes` sections. These are just
 686                # indexes that speed up launching a debugger against the binary,
 687                # and we're happy to have slower debugger start up in exchange for
 688                # smaller binaries. Plus the sections have been obsoleted by a
 689                # `.debug_names` section in DWARF 5, and so debugger support for
 690                # `.debug_pubnames`/`.debug_pubtypes` is minimal anyway.
 691                # See: https://github.com/rust-lang/rust/issues/46034
 692                spawn.runv(
 693                    [
 694                        *self.rd.tool("objcopy"),
 695                        "-R",
 696                        ".debug_pubnames",
 697                        "-R",
 698                        ".debug_pubtypes",
 699                        exe_path,
 700                    ],
 701                    cwd=self.rd.root,
 702                )
 703
 704        for bin in self.bins:
 705            src_path = self.rd.cargo_target_dir() / cargo_profile / bin
 706            copy(src_path, bin)
 707        for example in self.examples:
 708            src_path = (
 709                self.rd.cargo_target_dir() / cargo_profile / Path("examples") / example
 710            )
 711            copy(src_path, Path("examples") / example)
 712
 713        if self.extract:
 714            cargo_build_json_output = build_output["cargo"]
 715
 716            target_dir = self.rd.cargo_target_dir()
 717            for line in cargo_build_json_output.split("\n"):
 718                if line.strip() == "" or not line.startswith("{"):
 719                    continue
 720                message = json.loads(line)
 721                if message["reason"] != "build-script-executed":
 722                    continue
 723                out_dir = self.rd.rewrite_builder_path_for_host(
 724                    Path(message["out_dir"])
 725                )
 726                if not out_dir.is_relative_to(target_dir):
 727                    # Some crates are built for both the host and the target.
 728                    # Ignore the built-for-host out dir.
 729                    continue
 730                # parse the package name from a package_id that looks like one of:
 731                # git+https://github.com/MaterializeInc/rust-server-sdk#launchdarkly-server-sdk@1.0.0
 732                # path+file:///Users/roshan/materialize/src/catalog#mz-catalog@0.0.0
 733                # registry+https://github.com/rust-lang/crates.io-index#num-rational@0.4.0
 734                # file:///path/to/my-package#0.1.0
 735                package_id = message["package_id"]
 736                if "@" in package_id:
 737                    package = package_id.split("@")[0].split("#")[-1]
 738                else:
 739                    package = message["package_id"].split("#")[0].split("/")[-1]
 740                for src, dst in self.extract.get(package, {}).items():
 741                    spawn.runv(["cp", "-R", out_dir / src, self.path / dst])
 742
 743        self.acquired = True
 744
 745    def run(self, prep: dict[str, Any]) -> None:
 746        super().run(prep)
 747        self.build(prep)
 748
 749    @cache
 750    def inputs(self) -> set[str]:
 751        deps = set()
 752
 753        for bin in self.bins:
 754            crate = self.rd.cargo_workspace.crate_for_bin(bin)
 755            deps |= self.rd.cargo_workspace.transitive_path_dependencies(crate)
 756        for example in self.examples:
 757            crate = self.rd.cargo_workspace.crate_for_example(example)
 758            deps |= self.rd.cargo_workspace.transitive_path_dependencies(
 759                crate, dev=True
 760            )
 761
 762        inputs = super().inputs() | set(inp for dep in deps for inp in dep.inputs())
 763        return inputs
 764
 765
 766class Image:
 767    """A Docker image whose build and dependencies are managed by mzbuild.
 768
 769    An image corresponds to a directory in a repository that contains a
 770    `mzbuild.yml` file. This directory is called an "mzbuild context."
 771
 772    Attributes:
 773        name: The name of the image.
 774        publish: Whether the image should be pushed to Docker Hub.
 775        depends_on: The names of the images upon which this image depends.
 776        root: The path to the root of the associated `Repository`.
 777        path: The path to the directory containing the `mzbuild.yml`
 778            configuration file.
 779        pre_images: Optional actions to perform before running `docker build`.
 780        build_args: An optional list of --build-arg to pass to the dockerfile
 781    """
 782
 783    _DOCKERFILE_MZFROM_RE = re.compile(rb"^MZFROM\s*(\S+)")
 784
 785    _context_files_cache: set[str] | None
 786
 787    def __init__(self, rd: RepositoryDetails, path: Path):
 788        self.rd = rd
 789        self.path = path
 790        self._context_files_cache = None
 791        self.pre_images: list[PreImage] = []
 792        with open(self.path / "mzbuild.yml") as f:
 793            data = yaml.safe_load(f)
 794            self.name: str = data.pop("name")
 795            self.publish: bool = data.pop("publish", True)
 796            self.description: str | None = data.pop("description", None)
 797            self.mainline: bool = data.pop("mainline", True)
 798            for pre_image in data.pop("pre-image", []):
 799                typ = pre_image.pop("type", None)
 800                if typ == "cargo-build":
 801                    self.pre_images.append(CargoBuild(self.rd, self.path, pre_image))
 802                elif typ == "copy":
 803                    self.pre_images.append(Copy(self.rd, self.path, pre_image))
 804                else:
 805                    raise ValueError(
 806                        f"mzbuild config in {self.path} has unknown pre-image type"
 807                    )
 808            self.build_args = data.pop("build-args", {})
 809
 810        if re.search(r"[^A-Za-z0-9\-]", self.name):
 811            raise ValueError(
 812                f"mzbuild image name {self.name} contains invalid character; only alphanumerics and hyphens allowed"
 813            )
 814
 815        self.depends_on: list[str] = []
 816        with open(self.path / "Dockerfile", "rb") as f:
 817            for line in f:
 818                match = self._DOCKERFILE_MZFROM_RE.match(line)
 819                if match:
 820                    self.depends_on.append(match.group(1).decode())
 821
 822    def sync_description(self) -> None:
 823        """Sync the description to Docker Hub if the image is publishable
 824        and a README.md file exists."""
 825
 826        if not self.publish:
 827            ui.say(f"{self.name} is not publishable")
 828            return
 829
 830        readme_path = self.path / "README.md"
 831        has_readme = readme_path.exists()
 832        if not has_readme:
 833            ui.say(f"{self.name} has no README.md or description")
 834            return
 835
 836        docker_config = os.getenv("DOCKER_CONFIG")
 837        spawn.runv(
 838            [
 839                "docker",
 840                "pushrm",
 841                f"--file={readme_path}",
 842                *([f"--config={docker_config}/config.json"] if docker_config else []),
 843                *([f"--short={self.description}"] if self.description else []),
 844                self.docker_name(),
 845            ]
 846        )
 847
 848    def docker_name(self, tag: str | None = None) -> str:
 849        """Return the name of the image on Docker Hub at the given tag."""
 850        name = f"{self.rd.image_registry}/{self.rd.image_prefix}{self.name}"
 851        if tag:
 852            name += f":{tag}"
 853        return name
 854
 855
 856class ResolvedImage:
 857    """An `Image` whose dependencies have been resolved.
 858
 859    Attributes:
 860        image: The underlying `Image`.
 861        acquired: Whether the image is available locally.
 862        dependencies: A mapping from dependency name to `ResolvedImage` for
 863            each of the images that `image` depends upon.
 864    """
 865
 866    def __init__(self, image: Image, dependencies: Iterable["ResolvedImage"]):
 867        self.image = image
 868        self.acquired = False
 869        self.dependencies = {}
 870        for d in dependencies:
 871            self.dependencies[d.name] = d
 872
 873    def __repr__(self) -> str:
 874        return f"ResolvedImage<{self.spec()}>"
 875
 876    @property
 877    def name(self) -> str:
 878        """The name of the underlying image."""
 879        return self.image.name
 880
 881    @property
 882    def publish(self) -> bool:
 883        """Whether the underlying image should be pushed to Docker Hub."""
 884        return self.image.publish
 885
 886    @cache
 887    def spec(self) -> str:
 888        """Return the "spec" for the image.
 889
 890        A spec is the unique identifier for the image given its current
 891        fingerprint. It is a valid Docker Hub name.
 892        """
 893        return self.image.docker_name(tag=f"mzbuild-{self.fingerprint()}")
 894
 895    def write_dockerfile(self) -> IO[bytes]:
 896        """Render the Dockerfile without mzbuild directives.
 897
 898        Returns:
 899            file: A handle to a temporary file containing the adjusted
 900                Dockerfile."""
 901        with open(self.image.path / "Dockerfile", "rb") as f:
 902            lines = f.readlines()
 903        f = TemporaryFile()
 904        for line in lines:
 905            match = Image._DOCKERFILE_MZFROM_RE.match(line)
 906            if match:
 907                image = match.group(1).decode()
 908                spec = self.dependencies[image].spec()
 909                line = Image._DOCKERFILE_MZFROM_RE.sub(b"FROM %b" % spec.encode(), line)
 910            f.write(line)
 911        f.seek(0)
 912        return f
 913
 914    def build(self, prep: dict[type[PreImage], Any], push: bool = False) -> None:
 915        """Build the image from source.
 916
 917        Requires that the caller has already acquired all dependencies and
 918        prepared all `PreImage` actions via `PreImage.prepare_batch`.
 919        """
 920        # Use a file lock to prevent parallel mzcompose processes from
 921        # racing on git clean / copy / strip for the same image directory.
 922        lock_dir = self.image.rd.root / "target" / "mzbuild-locks"
 923        lock_dir.mkdir(parents=True, exist_ok=True)
 924        profile = self.image.rd.profile.name.lower()
 925        lock_path = lock_dir / f"{self.image.name}-{profile}.lock"
 926        with open(lock_path, "w") as lock_file:
 927            ui.say(f"Acquiring lock for {self.spec()}")
 928            fcntl.flock(lock_file, fcntl.LOCK_EX)
 929            try:
 930                self._build_locked(prep, push)
 931            finally:
 932                fcntl.flock(lock_file, fcntl.LOCK_UN)
 933
 934    def _build_locked(
 935        self, prep: dict[type[PreImage], Any], push: bool = False
 936    ) -> None:
 937        ui.section(f"Building {self.spec()}")
 938        spawn.runv(["git", "clean", "-ffdX", self.image.path])
 939
 940        for pre_image in self.image.pre_images:
 941            pre_image.run(prep[type(pre_image)])
 942        build_args = {
 943            **self.image.build_args,
 944            "BUILD_PROFILE": self.image.rd.profile.name,
 945            "ARCH_GCC": str(self.image.rd.arch),
 946            "ARCH_GO": self.image.rd.arch.go_str(),
 947            "CI_SANITIZER": str(self.image.rd.sanitizer),
 948        }
 949        f = self.write_dockerfile()
 950
 951        try:
 952            spawn.capture(["docker", "buildx", "version"])
 953        except subprocess.CalledProcessError:
 954            if push:
 955                print(
 956                    "docker buildx not found, required to push images. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing"
 957                )
 958                raise
 959            print(
 960                "docker buildx not found, you can install it to build faster. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing"
 961            )
 962            print("Falling back to docker build")
 963            cmd: Sequence[str] = [
 964                "docker",
 965                "build",
 966                "-f",
 967                "-",
 968                *(f"--build-arg={k}={v}" for k, v in build_args.items()),
 969                "-t",
 970                self.spec(),
 971                f"--platform=linux/{self.image.rd.arch.go_str()}",
 972                str(self.image.path),
 973            ]
 974        else:
 975            docker_tag = f"docker.io/{self.spec()}"
 976            ghcr_tag = f"ghcr.io/materializeinc/{self.spec()}"
 977            cmd: Sequence[str] = [
 978                "docker",
 979                "buildx",
 980                "build",
 981                "--progress=plain",  # less noisy
 982                "-f",
 983                "-",
 984                *(f"--build-arg={k}={v}" for k, v in build_args.items()),
 985                "-t",
 986                docker_tag,
 987                "-t",
 988                ghcr_tag,
 989                f"--platform=linux/{self.image.rd.arch.go_str()}",
 990                str(self.image.path),
 991                "--load",
 992            ]
 993
 994        if token := os.getenv("GITHUB_GHCR_TOKEN"):
 995            spawn.runv(
 996                [
 997                    "docker",
 998                    "login",
 999                    "ghcr.io",
1000                    "-u",
1001                    "materialize-bot",
1002                    "--password-stdin",
1003                ],
1004                stdin=token.encode(),
1005            )
1006
1007        spawn.runv(cmd, stdin=f, stdout=sys.stderr.buffer)
1008
1009        if push:
1010            # Push to both registries in parallel. With the docker driver,
1011            # the image is already in the local daemon after --load, so
1012            # docker push is the same mechanism buildx --push uses internally.
1013            pending = [docker_tag, ghcr_tag]
1014            for sleep_time in [5, 10, 20, 40, 60, None]:
1015                procs = [
1016                    subprocess.Popen(
1017                        ["docker", "push", tag],
1018                        stdout=sys.stderr,
1019                        stderr=sys.stderr,
1020                    )
1021                    for tag in pending
1022                ]
1023                pending = [tag for tag, proc in zip(pending, procs) if proc.wait() != 0]
1024                if not pending:
1025                    break
1026                if sleep_time is None:
1027                    raise subprocess.CalledProcessError(
1028                        1, ["docker", "push", pending[0]]
1029                    )
1030                print(f"docker push failed for {pending}, retrying in {sleep_time}s")
1031                time.sleep(sleep_time)
1032
1033    def try_pull(self, max_retries: int) -> bool:
1034        """Download the image if it does not exist locally. Returns whether it was found."""
1035        command = ["docker", "pull"]
1036        # --quiet skips printing the progress bar, which does not display well in CI.
1037        if ui.env_is_truthy("CI"):
1038            command.append("--quiet")
1039        command.append(self.spec())
1040        if not self.acquired:
1041            ui.header(f"Acquiring {self.spec()}")
1042            sleep_time = 1
1043            for retry in range(1, max_retries + 1):
1044                try:
1045                    spawn.runv(
1046                        command,
1047                        stdin=subprocess.DEVNULL,
1048                        stdout=sys.stderr.buffer,
1049                    )
1050                    self.acquired = True
1051                    break
1052                except subprocess.CalledProcessError:
1053                    if retry < max_retries:
1054                        # There seems to be no good way to tell what error
1055                        # happened based on error code
1056                        # (https://github.com/docker/cli/issues/538) and we
1057                        # want to print output directly to terminal.
1058                        if build := os.getenv("CI_WAITING_FOR_BUILD"):
1059                            for retry in range(max_retries):
1060                                try:
1061                                    build_status = buildkite.get_build_status(build)
1062                                except subprocess.CalledProcessError:
1063                                    time.sleep(sleep_time)
1064                                    sleep_time = min(sleep_time * 2, 10)
1065                                    break
1066                                print(f"Build {build} status: {build_status}")
1067                                if build_status == "failed":
1068                                    print(
1069                                        f"Build {build} has been marked as failed, exiting hard"
1070                                    )
1071                                    sys.exit(1)
1072                                elif build_status == "success":
1073                                    break
1074                                assert (
1075                                    build_status == "pending"
1076                                ), f"Unknown build status {build_status}"
1077                                time.sleep(1)
1078                        else:
1079                            print(f"Retrying in {sleep_time}s ...")
1080                            time.sleep(sleep_time)
1081                            sleep_time = min(sleep_time * 2, 10)
1082                        continue
1083                    else:
1084                        break
1085        return self.acquired
1086
1087    def is_published_if_necessary(self) -> bool:
1088        """Report whether the image exists on DockerHub & GHCR if it is publishable."""
1089        if not self.publish:
1090            return False
1091        spec = self.spec()
1092        if spec.startswith(GHCR_PREFIX):
1093            spec = spec.removeprefix(GHCR_PREFIX)
1094        ghcr_spec = f"{GHCR_PREFIX}{spec}"
1095        if is_docker_image_pushed(spec) and is_ghcr_image_pushed(ghcr_spec):
1096            ui.say(f"{spec} already exists")
1097            return True
1098        return False
1099
1100    def run(
1101        self,
1102        args: list[str] = [],
1103        docker_args: list[str] = [],
1104        env: dict[str, str] = {},
1105    ) -> None:
1106        """Run a command in the image.
1107
1108        Creates a container from the image and runs the command described by
1109        `args` in the image.
1110        """
1111        envs = []
1112        for key, val in env.items():
1113            envs.extend(["--env", f"{key}={val}"])
1114        spawn.runv(
1115            [
1116                "docker",
1117                "run",
1118                "--tty",
1119                "--rm",
1120                *envs,
1121                "--init",
1122                *docker_args,
1123                self.spec(),
1124                *args,
1125            ],
1126        )
1127
1128    def list_dependencies(self, transitive: bool = False) -> set[str]:
1129        out = set()
1130        for dep in self.dependencies.values():
1131            out.add(dep.name)
1132            if transitive:
1133                out |= dep.list_dependencies(transitive)
1134        return out
1135
1136    @cache
1137    def inputs(self, transitive: bool = False) -> set[str]:
1138        """List the files tracked as inputs to the image.
1139
1140        These files are used to compute the fingerprint for the image. See
1141        `ResolvedImage.fingerprint` for details.
1142
1143        Returns:
1144            inputs: A list of input files, relative to the root of the
1145                repository.
1146        """
1147        if self.image._context_files_cache is not None:
1148            paths = set(self.image._context_files_cache)
1149        else:
1150            paths = set(git.expand_globs(self.image.rd.root, f"{self.image.path}/**"))
1151        if not paths:
1152            # While we could find an `mzbuild.yml` file for this service, expland_globs didn't
1153            # return any files that matched this service. At the very least, the `mzbuild.yml`
1154            # file itself should have been returned. We have a bug if paths is empty.
1155            raise AssertionError(
1156                f"{self.image.name} mzbuild exists but its files are unknown to git"
1157            )
1158        for pre_image in self.image.pre_images:
1159            paths |= pre_image.inputs()
1160        if transitive:
1161            for dep in self.dependencies.values():
1162                paths |= dep.inputs(transitive)
1163        return paths
1164
1165    @cache
1166    def fingerprint(self) -> Fingerprint:
1167        """Fingerprint the inputs to the image.
1168
1169        Compute the fingerprint of the image. Changing the contents of any of
1170        the files or adding or removing files to the image will change the
1171        fingerprint, as will modifying the inputs to any of its dependencies.
1172
1173        The image considers all non-gitignored files in its mzbuild context to
1174        be inputs. If it has a pre-image action, that action may add additional
1175        inputs via `PreImage.inputs`.
1176        """
1177        self_hash = hashlib.sha1()
1178        # When inputs come from precomputed sources (crate and image context
1179        # batching + resolved CargoPreImage paths), they are already individual
1180        # file paths from git. Skip the expensive expand_globs subprocess calls.
1181        inputs = self.inputs()
1182        if self.image._context_files_cache is not None:
1183            resolved_inputs = sorted(inputs)
1184        else:
1185            resolved_inputs = sorted(set(git.expand_globs(self.image.rd.root, *inputs)))
1186        for rel_path in resolved_inputs:
1187            abs_path = self.image.rd.root / rel_path
1188            file_hash = hashlib.sha1()
1189            raw_file_mode = os.lstat(abs_path).st_mode
1190            # Compute a simplified file mode using the same rules as Git.
1191            # https://github.com/git/git/blob/3bab5d562/Documentation/git-fast-import.txt#L610-L616
1192            if stat.S_ISLNK(raw_file_mode):
1193                file_mode = 0o120000
1194            elif raw_file_mode & stat.S_IXUSR:
1195                file_mode = 0o100755
1196            else:
1197                file_mode = 0o100644
1198            with open(abs_path, "rb") as f:
1199                file_hash.update(f.read())
1200            self_hash.update(file_mode.to_bytes(2, byteorder="big"))
1201            self_hash.update(rel_path.encode())
1202            self_hash.update(file_hash.digest())
1203            self_hash.update(b"\0")
1204
1205        for pre_image in self.image.pre_images:
1206            self_hash.update(pre_image.extra().encode())
1207            self_hash.update(b"\0")
1208
1209        self_hash.update(f"profile={self.image.rd.profile}".encode())
1210        self_hash.update(f"arch={self.image.rd.arch}".encode())
1211        self_hash.update(f"coverage={self.image.rd.coverage}".encode())
1212        self_hash.update(f"sanitizer={self.image.rd.sanitizer}".encode())
1213        # This exists to make sure all hashes from before we had a GHCR mirror are invalidated, so that we rebuild when an image doesn't exist on GHCR yet
1214        self_hash.update(b"mirror=ghcr")
1215
1216        full_hash = hashlib.sha1()
1217        full_hash.update(self_hash.digest())
1218        for dep in sorted(self.dependencies.values(), key=lambda d: d.name):
1219            full_hash.update(dep.name.encode())
1220            full_hash.update(dep.fingerprint())
1221            full_hash.update(b"\0")
1222
1223        return Fingerprint(full_hash.digest())
1224
1225
1226class DependencySet:
1227    """A set of `ResolvedImage`s.
1228
1229    Iterating over a dependency set yields the contained images in an arbitrary
1230    order. Indexing a dependency set yields the image with the specified name.
1231    """
1232
1233    def __init__(self, dependencies: Iterable[Image]):
1234        """Construct a new `DependencySet`.
1235
1236        The provided `dependencies` must be topologically sorted.
1237        """
1238        self._dependencies: dict[str, ResolvedImage] = {}
1239        dependencies = list(dependencies)
1240        known_images = docker_images() if dependencies else set()
1241        for d in dependencies:
1242            image = ResolvedImage(
1243                image=d,
1244                dependencies=(self._dependencies[d0] for d0 in d.depends_on),
1245            )
1246            image.acquired = image.spec() in known_images
1247            self._dependencies[d.name] = image
1248
1249    def _prepare_batch(self, images: list[ResolvedImage]) -> dict[type[PreImage], Any]:
1250        pre_images = collections.defaultdict(list)
1251        for image in images:
1252            for pre_image in image.image.pre_images:
1253                pre_images[type(pre_image)].append(pre_image)
1254        pre_image_prep = {}
1255        for cls, instances in pre_images.items():
1256            pre_image = cast(PreImage, cls)
1257            pre_image_prep[cls] = pre_image.prepare_batch(instances)
1258        return pre_image_prep
1259
1260    def acquire(self, max_retries: int | None = None) -> None:
1261        """Download or build all of the images in the dependency set that do not
1262        already exist locally.
1263
1264        Args:
1265            max_retries: Number of retries on failure.
1266        """
1267
1268        # Only retry in CI runs since we struggle with flaky docker pulls there
1269        if not max_retries:
1270            max_retries = (
1271                90
1272                if os.getenv("CI_WAITING_FOR_BUILD")
1273                else (
1274                    5
1275                    if ui.env_is_truthy("CI")
1276                    and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD")
1277                    else 1
1278                )
1279            )
1280        assert max_retries > 0
1281
1282        deps_to_check = [dep for dep in self if dep.publish]
1283        deps_to_build = [dep for dep in self if not dep.publish]
1284        if len(deps_to_check):
1285            with ThreadPoolExecutor(max_workers=len(deps_to_check)) as executor:
1286                futures = [
1287                    executor.submit(dep.try_pull, max_retries) for dep in deps_to_check
1288                ]
1289                for dep, future in zip(deps_to_check, futures):
1290                    try:
1291                        if not future.result():
1292                            deps_to_build.append(dep)
1293                    except Exception:
1294                        deps_to_build.append(dep)
1295
1296        # Don't attempt to build in CI, as our timeouts and small machines won't allow it anyway
1297        if ui.env_is_truthy("CI") and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD"):
1298            expected_deps = [dep for dep in deps_to_build if dep.publish]
1299            if expected_deps:
1300                print(
1301                    f"+++ Expected builds to be available, the build probably failed, so not proceeding: {expected_deps}"
1302                )
1303                sys.exit(128)
1304
1305        prep = self._prepare_batch(deps_to_build)
1306        for dep in deps_to_build:
1307            dep.build(prep)
1308
1309    def ensure(self, pre_build: Callable[[list[ResolvedImage]], None] | None = None):
1310        """Ensure all publishable images in this dependency set exist on Docker
1311        Hub.
1312
1313        Images are pushed using their spec as their tag.
1314
1315        Args:
1316            pre_build: A callback to invoke with all dependency that are going
1317                       to be built locally, invoked after their cargo build is
1318                       done, but before the Docker images are build and
1319                       uploaded to DockerHub.
1320        """
1321        num_deps = len(list(self))
1322        if not num_deps:
1323            deps_to_build = []
1324        else:
1325            with ThreadPoolExecutor(max_workers=num_deps) as executor:
1326                futures = list(
1327                    executor.map(
1328                        lambda dep: (dep, not dep.is_published_if_necessary()), self
1329                    )
1330                )
1331
1332            deps_to_build = [dep for dep, should_build in futures if should_build]
1333
1334        prep = self._prepare_batch(deps_to_build)
1335        if pre_build:
1336            pre_build(deps_to_build)
1337        lock = Lock()
1338        built_deps: set[str] = set([dep.name for dep in self]) - set(
1339            [dep.name for dep in deps_to_build]
1340        )
1341
1342        def build_dep(dep):
1343            end_time = time.time() + 600
1344            while True:
1345                if time.time() > end_time:
1346                    raise TimeoutError(
1347                        f"Timed out in {dep.name} waiting for {[dep2 for dep2 in dep.dependencies if dep2 not in built_deps]}"
1348                    )
1349                with lock:
1350                    if all(dep2 in built_deps for dep2 in dep.dependencies):
1351                        break
1352                time.sleep(0.01)
1353            for attempts_remaining in reversed(range(3)):
1354                try:
1355                    dep.build(prep, push=dep.publish)
1356                    with lock:
1357                        built_deps.add(dep.name)
1358                    break
1359                except Exception:
1360                    if not dep.publish or attempts_remaining == 0:
1361                        raise
1362
1363        if deps_to_build:
1364            with ThreadPoolExecutor(max_workers=len(deps_to_build)) as executor:
1365                futures = [executor.submit(build_dep, dep) for dep in deps_to_build]
1366                for future in as_completed(futures):
1367                    future.result()
1368
1369    def check(self) -> bool:
1370        """Check all publishable images in this dependency set exist on Docker
1371        Hub. Don't try to download or build them."""
1372        num_deps = len(list(self))
1373        if num_deps == 0:
1374            return True
1375        with ThreadPoolExecutor(max_workers=num_deps) as executor:
1376            results = list(
1377                executor.map(lambda dep: dep.is_published_if_necessary(), list(self))
1378            )
1379        return all(results)
1380
1381    def __iter__(self) -> Iterator[ResolvedImage]:
1382        return iter(self._dependencies.values())
1383
1384    def __getitem__(self, key: str) -> ResolvedImage:
1385        return self._dependencies[key]
1386
1387
1388class Repository:
1389    """A collection of mzbuild `Image`s.
1390
1391    Creating a repository will walk the filesystem beneath `root` to
1392    automatically discover all contained `Image`s.
1393
1394    Iterating over a repository yields the contained images in an arbitrary
1395    order.
1396
1397    Args:
1398        root: The path to the root of the repository.
1399        arch: The CPU architecture to build for.
1400        profile: What profile to build the repository in.
1401        coverage: Whether to enable code coverage instrumentation.
1402        sanitizer: Whether to a sanitizer (address, thread, leak, memory, none)
1403        image_registry: The Docker image registry to pull images from and push
1404            images to.
1405        image_prefix: A prefix to apply to all Docker image names.
1406
1407    Attributes:
1408        images: A mapping from image name to `Image` for all contained images.
1409        compose_dirs: The set of directories containing a `mzcompose.py` file.
1410    """
1411
1412    def __init__(
1413        self,
1414        root: Path,
1415        arch: Arch = Arch.host(),
1416        profile: Profile = (
1417            Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED
1418        ),
1419        coverage: bool = False,
1420        sanitizer: Sanitizer = Sanitizer.none,
1421        image_registry: str = image_registry(),
1422        image_prefix: str = "",
1423    ):
1424        self.rd = RepositoryDetails(
1425            root,
1426            arch,
1427            profile,
1428            coverage,
1429            sanitizer,
1430            image_registry,
1431            image_prefix,
1432        )
1433        self.images: dict[str, Image] = {}
1434        self.compositions: dict[str, Path] = {}
1435        for rel_path_s in sorted(
1436            git.expand_globs(self.root, "**/mzbuild.yml", "**/mzcompose.py")
1437        ):
1438            rel_path = Path(rel_path_s)
1439            if rel_path.parts[:2] == ("misc", "python"):
1440                continue
1441
1442            parent = self.root / rel_path.parent
1443            if rel_path.name == "mzbuild.yml":
1444                image = Image(self.rd, parent)
1445                if not image.name:
1446                    raise ValueError(f"config at {parent} missing name")
1447                if image.name in self.images:
1448                    raise ValueError(f"image {image.name} exists twice")
1449                self.images[image.name] = image
1450            elif rel_path.name == "mzcompose.py":
1451                name = parent.name
1452                if name in self.compositions:
1453                    raise ValueError(f"composition {name} exists twice")
1454                self.compositions[name] = parent
1455
1456        # Validate dependencies.
1457        for image in self.images.values():
1458            for d in image.depends_on:
1459                if d not in self.images:
1460                    raise ValueError(
1461                        f"image {image.name} depends on non-existent image {d}"
1462                    )
1463
1464    @staticmethod
1465    def install_arguments(parser: argparse.ArgumentParser) -> None:
1466        """Install options to configure a repository into an argparse parser.
1467
1468        This function installs the following options:
1469
1470          * The mutually-exclusive `--dev`/`--optimized`/`--release` options to control the
1471            `profile` repository attribute.
1472          * The `--coverage` boolean option to control the `coverage` repository
1473            attribute.
1474
1475        Use `Repository.from_arguments` to construct a repository from the
1476        parsed command-line arguments.
1477        """
1478        build_mode = parser.add_mutually_exclusive_group()
1479        build_mode.add_argument(
1480            "--dev",
1481            action="store_true",
1482            help="build Rust binaries with the dev profile",
1483        )
1484        build_mode.add_argument(
1485            "--release",
1486            action="store_true",
1487            help="build Rust binaries with the release profile (default)",
1488        )
1489        build_mode.add_argument(
1490            "--optimized",
1491            action="store_true",
1492            help="build Rust binaries with the optimized profile (optimizations, no LTO, no debug symbols)",
1493        )
1494        parser.add_argument(
1495            "--coverage",
1496            help="whether to enable code coverage compilation flags",
1497            default=ui.env_is_truthy("CI_COVERAGE_ENABLED"),
1498            action="store_true",
1499        )
1500        parser.add_argument(
1501            "--sanitizer",
1502            help="whether to enable a sanitizer",
1503            default=Sanitizer[os.getenv("CI_SANITIZER", "none")],
1504            type=Sanitizer,
1505            choices=Sanitizer,
1506        )
1507
1508        def _parse_arch(s: str) -> Arch:
1509            try:
1510                return Arch(s)
1511            except ValueError:
1512                valid = ", ".join(m.value for m in Arch)
1513                raise argparse.ArgumentTypeError(
1514                    f"invalid arch: {s!r} (choose from {valid})"
1515                )
1516
1517        parser.add_argument(
1518            "--arch",
1519            default=Arch.host(),
1520            help="the CPU architecture to build for",
1521            type=_parse_arch,
1522            metavar="{" + ",".join(m.value for m in Arch) + "}",
1523        )
1524        parser.add_argument(
1525            "--image-registry",
1526            default=image_registry(),
1527            help="the Docker image registry to pull images from and push images to",
1528        )
1529        parser.add_argument(
1530            "--image-prefix",
1531            default="",
1532            help="a prefix to apply to all Docker image names",
1533        )
1534
1535    @classmethod
1536    def from_arguments(cls, root: Path, args: argparse.Namespace) -> "Repository":
1537        """Construct a repository from command-line arguments.
1538
1539        The provided namespace must contain the options installed by
1540        `Repository.install_arguments`.
1541        """
1542        if args.release:
1543            profile = Profile.RELEASE
1544        elif args.optimized:
1545            profile = Profile.OPTIMIZED
1546        elif args.dev:
1547            profile = Profile.DEV
1548        else:
1549            profile = (
1550                Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED
1551            )
1552
1553        return cls(
1554            root,
1555            profile=profile,
1556            coverage=args.coverage,
1557            sanitizer=args.sanitizer,
1558            image_registry=args.image_registry,
1559            image_prefix=args.image_prefix,
1560            arch=args.arch,
1561        )
1562
1563    @property
1564    def root(self) -> Path:
1565        """The path to the root directory for the repository."""
1566        return self.rd.root
1567
1568    def resolve_dependencies(self, targets: Iterable[Image]) -> DependencySet:
1569        """Compute the dependency set necessary to build target images.
1570
1571        The dependencies of `targets` will be crawled recursively until the
1572        complete set of transitive dependencies is determined or a circular
1573        dependency is discovered. The returned dependency set will be sorted
1574        in topological order.
1575
1576        Raises:
1577           ValueError: A circular dependency was discovered in the images
1578               in the repository.
1579        """
1580        # Pre-fetch all crate input files in a single batched git call,
1581        # replacing ~118 individual subprocess pairs with one pair.
1582        self.rd.cargo_workspace.precompute_crate_inputs()
1583        # Pre-fetch all image context files in a single batched git call,
1584        # replacing ~41 individual subprocess pairs with one pair.
1585        self._precompute_image_context_files()
1586
1587        resolved = OrderedDict()
1588        visiting = set()
1589
1590        def visit(image: Image, path: list[str] = []) -> None:
1591            if image.name in resolved:
1592                return
1593            if image.name in visiting:
1594                diagram = " -> ".join(path + [image.name])
1595                raise ValueError(f"circular dependency in mzbuild: {diagram}")
1596
1597            visiting.add(image.name)
1598            for d in sorted(image.depends_on):
1599                visit(self.images[d], path + [image.name])
1600            resolved[image.name] = image
1601
1602        for target_image in sorted(targets, key=lambda image: image.name):
1603            visit(target_image)
1604
1605        return DependencySet(resolved.values())
1606
1607    def _precompute_image_context_files(self) -> None:
1608        """Pre-fetch all image context files in a single batched git call.
1609
1610        This replaces ~41 individual pairs of git subprocess calls (one per
1611        image) with a single pair, then partitions the results by image path.
1612        """
1613        root = self.rd.root
1614        # Use paths relative to root for git specs and partitioning, since
1615        # git --relative outputs paths relative to cwd (root). Image paths
1616        # may be absolute when MZ_ROOT is an absolute path.
1617        image_rel_paths = sorted(
1618            set(str(img.path.relative_to(root)) for img in self.images.values())
1619        )
1620        specs = [f"{p}/**" for p in image_rel_paths]
1621
1622        empty_tree = "4b825dc642cb6eb9a060e54bf8d69288fbee4904"
1623        diff_files = spawn.capture(
1624            ["git", "diff", "--name-only", "-z", "--relative", empty_tree, "--"]
1625            + specs,
1626            cwd=root,
1627        )
1628        ls_files = spawn.capture(
1629            ["git", "ls-files", "--others", "--exclude-standard", "-z", "--"] + specs,
1630            cwd=root,
1631        )
1632        all_files = set(
1633            f for f in (diff_files + ls_files).split("\0") if f.strip() != ""
1634        )
1635
1636        # Partition files by image path (longest match first for nested paths)
1637        image_file_map: dict[str, set[str]] = {p: set() for p in image_rel_paths}
1638        sorted_paths = sorted(image_rel_paths, key=len, reverse=True)
1639        for f in all_files:
1640            for ip in sorted_paths:
1641                if f.startswith(ip + "/"):
1642                    image_file_map[ip].add(f)
1643                    break
1644
1645        for img in self.images.values():
1646            rel = str(img.path.relative_to(root))
1647            img._context_files_cache = image_file_map.get(rel, set())
1648
1649    def __iter__(self) -> Iterator[Image]:
1650        return iter(self.images.values())
1651
1652
1653def publish_multiarch_images(
1654    tag: str, dependency_sets: Iterable[Iterable[ResolvedImage]]
1655) -> None:
1656    """Publishes a set of docker images under a given tag."""
1657    always_push_tags = ("latest", "unstable")
1658    if ghcr_token := os.getenv("GITHUB_GHCR_TOKEN"):
1659        spawn.runv(
1660            [
1661                "docker",
1662                "login",
1663                "ghcr.io",
1664                "-u",
1665                "materialize-bot",
1666                "--password-stdin",
1667            ],
1668            stdin=ghcr_token.encode(),
1669        )
1670    for images in zip(*dependency_sets):
1671        names = set(image.image.name for image in images)
1672        assert len(names) == 1, "dependency sets did not contain identical images"
1673        name = images[0].image.docker_name(tag)
1674        if tag in always_push_tags or not is_docker_image_pushed(name):
1675            spawn.run_with_retries(
1676                lambda: (
1677                    spawn.runv(
1678                        [
1679                            "docker",
1680                            "manifest",
1681                            "create",
1682                            "--amend",
1683                            name,
1684                            *(image.spec() for image in images),
1685                        ]
1686                    ),
1687                    spawn.runv(["docker", "manifest", "push", name]),
1688                )
1689            )
1690
1691        ghcr_name = f"{GHCR_PREFIX}{name}"
1692        if ghcr_token and (
1693            tag in always_push_tags or not is_ghcr_image_pushed(ghcr_name)
1694        ):
1695            spawn.run_with_retries(
1696                lambda: (
1697                    spawn.runv(
1698                        [
1699                            "docker",
1700                            "manifest",
1701                            "create",
1702                            "--amend",
1703                            ghcr_name,
1704                            *(f"{GHCR_PREFIX}{image.spec()}" for image in images),
1705                        ]
1706                    ),
1707                    spawn.runv(["docker", "manifest", "push", ghcr_name]),
1708                )
1709            )
1710    print(f"--- Nofifying for tag {tag}")
1711    markdown = f"""Pushed images with Docker tag `{tag}`"""
1712    spawn.runv(
1713        [
1714            "buildkite-agent",
1715            "annotate",
1716            "--style=info",
1717            f"--context=build-tags-{tag}",
1718        ],
1719        stdin=markdown.encode(),
1720    )
GHCR_PREFIX = 'ghcr.io/materializeinc/'
class RustIncrementalBuildFailure(builtins.Exception):
58class RustIncrementalBuildFailure(Exception):
59    pass

Common base class for all non-exit exceptions.

def run_and_detect_rust_incremental_build_failure( cmd: list[str], cwd: str | pathlib.Path, env: dict[str, str] | None = None) -> subprocess.CompletedProcess:
 62def run_and_detect_rust_incremental_build_failure(
 63    cmd: list[str],
 64    cwd: str | Path,
 65    env: dict[str, str] | None = None,
 66) -> subprocess.CompletedProcess:
 67    """This function is complex since it prints out each line immediately to
 68    stdout/stderr, but still records them at the same time so that we can scan
 69    for known incremental build failures."""
 70    stdout_result = io.StringIO()
 71    stderr_result = io.StringIO()
 72    base_env = env if env is not None else os.environ
 73    p = subprocess.Popen(
 74        cmd,
 75        stdout=subprocess.PIPE,
 76        stderr=subprocess.PIPE,
 77        text=True,
 78        bufsize=1,
 79        env={**base_env, "CARGO_TERM_COLOR": "always", "RUSTC_COLOR": "always"},
 80    )
 81
 82    sel = selectors.DefaultSelector()
 83    sel.register(p.stdout, selectors.EVENT_READ)  # type: ignore
 84    sel.register(p.stderr, selectors.EVENT_READ)  # type: ignore
 85    assert p.stdout is not None
 86    assert p.stderr is not None
 87    os.set_blocking(p.stdout.fileno(), False)
 88    os.set_blocking(p.stderr.fileno(), False)
 89    running = True
 90    while running:
 91        for key, val in sel.select():
 92            output = io.StringIO()
 93            running = False
 94            while True:
 95                new_output = key.fileobj.read(1024)  # type: ignore
 96                if not new_output:
 97                    break
 98                output.write(new_output)
 99            contents = output.getvalue()
100            output.close()
101            if not contents:
102                continue
103            # Keep running as long as stdout or stderr have any content
104            running = True
105            if key.fileobj is p.stdout:
106                print(
107                    contents,
108                    end="",
109                    flush=True,
110                )
111                stdout_result.write(contents)
112            else:
113                print(
114                    contents,
115                    end="",
116                    file=sys.stderr,
117                    flush=True,
118                )
119                stderr_result.write(contents)
120    p.wait()
121    retcode = p.poll()
122    assert retcode is not None
123    stdout_contents = stdout_result.getvalue()
124    stdout_result.close()
125    stderr_contents = stderr_result.getvalue()
126    stderr_result.close()
127    if retcode:
128        incremental_build_failure_msgs = [
129            "panicked at compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs",
130            "Found unstable fingerprints for",
131            "ld.lld: error: undefined symbol",
132            "signal: 11, SIGSEGV",
133        ]
134        combined = stdout_contents + stderr_contents
135        if any(msg in combined for msg in incremental_build_failure_msgs):
136            raise RustIncrementalBuildFailure()
137
138        raise subprocess.CalledProcessError(
139            retcode, p.args, output=stdout_contents, stderr=stderr_contents
140        )
141    return subprocess.CompletedProcess(
142        p.args, retcode, stdout_contents, stderr_contents
143    )

This function is complex since it prints out each line immediately to stdout/stderr, but still records them at the same time so that we can scan for known incremental build failures.

class Fingerprint(builtins.bytes):
146class Fingerprint(bytes):
147    """A SHA-1 hash of the inputs to an `Image`.
148
149    The string representation uses base32 encoding to distinguish mzbuild
150    fingerprints from Git's hex encoded SHA-1 hashes while still being
151    URL safe.
152    """
153
154    def __str__(self) -> str:
155        return base64.b32encode(self).decode()

A SHA-1 hash of the inputs to an Image.

The string representation uses base32 encoding to distinguish mzbuild fingerprints from Git's hex encoded SHA-1 hashes while still being URL safe.

class Profile(enum.Enum):
158class Profile(Enum):
159    RELEASE = auto()
160    OPTIMIZED = auto()
161    DEV = auto()
RELEASE = <Profile.RELEASE: 1>
OPTIMIZED = <Profile.OPTIMIZED: 2>
DEV = <Profile.DEV: 3>
class RepositoryDetails:
164class RepositoryDetails:
165    """Immutable details about a `Repository`.
166
167    Used internally by mzbuild.
168
169    Attributes:
170        root: The path to the root of the repository.
171        arch: The CPU architecture to build for.
172        profile: What profile the repository is being built with.
173        coverage: Whether the repository has code coverage instrumentation
174            enabled.
175        sanitizer: Whether to use a sanitizer (address, hwaddress, cfi, thread, leak, memory, none)
176        cargo_workspace: The `cargo.Workspace` associated with the repository.
177        image_registry: The Docker image registry to pull images from and push
178            images to.
179        image_prefix: A prefix to apply to all Docker image names.
180    """
181
182    def __init__(
183        self,
184        root: Path,
185        arch: Arch,
186        profile: Profile,
187        coverage: bool,
188        sanitizer: Sanitizer,
189        image_registry: str,
190        image_prefix: str,
191    ):
192        self.root = root
193        self.arch = arch
194        self.profile = profile
195        self.coverage = coverage
196        self.sanitizer = sanitizer
197        self.cargo_workspace = cargo.Workspace(root)
198        self.image_registry = image_registry
199        self.image_prefix = image_prefix
200
201    def build(
202        self,
203        subcommand: str,
204        rustflags: list[str],
205        channel: str | None = None,
206        extra_env: dict[str, str] = {},
207    ) -> list[str]:
208        """Start a build invocation for the configured architecture."""
209        return xcompile.cargo(
210            arch=self.arch,
211            channel=channel,
212            subcommand=subcommand,
213            rustflags=rustflags,
214            extra_env=extra_env,
215        )
216
217    def tool(self, name: str) -> list[str]:
218        """Start a binutils tool invocation for the configured architecture."""
219        if platform.system() != "Linux":
220            # We can't use the local tools from macOS to build a Linux executable
221            return ["bin/ci-builder", "run", "stable", name]
222        # If we're on Linux, trust that the tools are installed instead of
223        # loading the slow ci-builder. If you don't have compilation tools
224        # installed you can still run `bin/ci-builder run stable
225        # bin/mzcompose ...`, and most likely the Cargo build will already
226        # fail earlier if you don't have compilation tools installed and
227        # run without the ci-builder.
228        return [name]
229
230    def cargo_target_dir(self) -> Path:
231        """Determine the path to the target directory for Cargo."""
232        return self.root / "target-xcompile" / xcompile.target(self.arch)
233
234    def rewrite_builder_path_for_host(self, path: Path) -> Path:
235        """Rewrite a path that is relative to the target directory inside the
236        builder to a path that is relative to the target directory on the host.
237
238        If path does is not relative to the target directory inside the builder,
239        it is returned unchanged.
240        """
241        builder_target_dir = Path("/mnt/build") / xcompile.target(self.arch)
242        try:
243            return self.cargo_target_dir() / path.relative_to(builder_target_dir)
244        except ValueError:
245            return path

Immutable details about a Repository.

Used internally by mzbuild.

Attributes: root: The path to the root of the repository. arch: The CPU architecture to build for. profile: What profile the repository is being built with. coverage: Whether the repository has code coverage instrumentation enabled. sanitizer: Whether to use a sanitizer (address, hwaddress, cfi, thread, leak, memory, none) cargo_workspace: The cargo.Workspace associated with the repository. image_registry: The Docker image registry to pull images from and push images to. image_prefix: A prefix to apply to all Docker image names.

RepositoryDetails( root: pathlib.Path, arch: materialize.xcompile.Arch, profile: Profile, coverage: bool, sanitizer: materialize.rustc_flags.Sanitizer, image_registry: str, image_prefix: str)
182    def __init__(
183        self,
184        root: Path,
185        arch: Arch,
186        profile: Profile,
187        coverage: bool,
188        sanitizer: Sanitizer,
189        image_registry: str,
190        image_prefix: str,
191    ):
192        self.root = root
193        self.arch = arch
194        self.profile = profile
195        self.coverage = coverage
196        self.sanitizer = sanitizer
197        self.cargo_workspace = cargo.Workspace(root)
198        self.image_registry = image_registry
199        self.image_prefix = image_prefix
root
arch
profile
coverage
sanitizer
cargo_workspace
image_registry
image_prefix
def build( self, subcommand: str, rustflags: list[str], channel: str | None = None, extra_env: dict[str, str] = {}) -> list[str]:
201    def build(
202        self,
203        subcommand: str,
204        rustflags: list[str],
205        channel: str | None = None,
206        extra_env: dict[str, str] = {},
207    ) -> list[str]:
208        """Start a build invocation for the configured architecture."""
209        return xcompile.cargo(
210            arch=self.arch,
211            channel=channel,
212            subcommand=subcommand,
213            rustflags=rustflags,
214            extra_env=extra_env,
215        )

Start a build invocation for the configured architecture.

def tool(self, name: str) -> list[str]:
217    def tool(self, name: str) -> list[str]:
218        """Start a binutils tool invocation for the configured architecture."""
219        if platform.system() != "Linux":
220            # We can't use the local tools from macOS to build a Linux executable
221            return ["bin/ci-builder", "run", "stable", name]
222        # If we're on Linux, trust that the tools are installed instead of
223        # loading the slow ci-builder. If you don't have compilation tools
224        # installed you can still run `bin/ci-builder run stable
225        # bin/mzcompose ...`, and most likely the Cargo build will already
226        # fail earlier if you don't have compilation tools installed and
227        # run without the ci-builder.
228        return [name]

Start a binutils tool invocation for the configured architecture.

def cargo_target_dir(self) -> pathlib.Path:
230    def cargo_target_dir(self) -> Path:
231        """Determine the path to the target directory for Cargo."""
232        return self.root / "target-xcompile" / xcompile.target(self.arch)

Determine the path to the target directory for Cargo.

def rewrite_builder_path_for_host(self, path: pathlib.Path) -> pathlib.Path:
234    def rewrite_builder_path_for_host(self, path: Path) -> Path:
235        """Rewrite a path that is relative to the target directory inside the
236        builder to a path that is relative to the target directory on the host.
237
238        If path does is not relative to the target directory inside the builder,
239        it is returned unchanged.
240        """
241        builder_target_dir = Path("/mnt/build") / xcompile.target(self.arch)
242        try:
243            return self.cargo_target_dir() / path.relative_to(builder_target_dir)
244        except ValueError:
245            return path

Rewrite a path that is relative to the target directory inside the builder to a path that is relative to the target directory on the host.

If path does is not relative to the target directory inside the builder, it is returned unchanged.

@cache
def docker_images() -> frozenset[str]:
248@cache
249def docker_images() -> frozenset[str]:
250    """List the Docker images available on the local machine."""
251    return frozenset(
252        spawn.capture(["docker", "images", "--format", "{{.Repository}}:{{.Tag}}"])
253        .strip()
254        .split("\n")
255    )

List the Docker images available on the local machine.

KNOWN_DOCKER_IMAGES_FILE = PosixPath('/var/lib/buildkite-agent/builds/buildkite-15f2293-i-0d8afd624b584b65d-1/materialize/deploy/known-docker-images.txt')
def is_docker_image_pushed(name: str) -> bool:
263def is_docker_image_pushed(name: str) -> bool:
264    """Check whether the named image is pushed to Docker Hub.
265
266    Note that this operation requires a rather slow network request.
267    """
268    global _known_docker_images
269
270    if _known_docker_images is None:
271        with _known_docker_images_lock:
272            if not KNOWN_DOCKER_IMAGES_FILE.exists():
273                _known_docker_images = set()
274            else:
275                with KNOWN_DOCKER_IMAGES_FILE.open() as f:
276                    _known_docker_images = set(line.strip() for line in f)
277
278    if name in _known_docker_images:
279        return True
280
281    if ":" not in name:
282        image, tag = name, "latest"
283    else:
284        image, tag = name.rsplit(":", 1)
285
286    dockerhub_username = os.getenv("DOCKERHUB_USERNAME")
287    dockerhub_token = os.getenv("DOCKERHUB_ACCESS_TOKEN")
288
289    exists: bool = False
290
291    try:
292        if dockerhub_username and dockerhub_token:
293            response = requests.head(
294                f"https://registry-1.docker.io/v2/{image}/manifests/{tag}",
295                headers={
296                    "Accept": "application/vnd.docker.distribution.manifest.v2+json",
297                },
298                auth=HTTPBasicAuth(dockerhub_username, dockerhub_token),
299                timeout=10,
300            )
301        else:
302            token = requests.get(
303                "https://auth.docker.io/token",
304                params={
305                    "service": "registry.docker.io",
306                    "scope": f"repository:{image}:pull",
307                },
308                timeout=10,
309            ).json()["token"]
310            response = requests.head(
311                f"https://registry-1.docker.io/v2/{image}/manifests/{tag}",
312                headers={
313                    "Accept": "application/vnd.docker.distribution.manifest.v2+json",
314                    "Authorization": f"Bearer {token}",
315                },
316                timeout=10,
317            )
318
319        if response.status_code in (401, 429, 500, 502, 503, 504):
320            # Fall back to 5x slower method
321            proc = subprocess.run(
322                ["docker", "manifest", "inspect", name],
323                stdout=subprocess.DEVNULL,
324                stderr=subprocess.DEVNULL,
325                env=dict(os.environ, DOCKER_CLI_EXPERIMENTAL="enabled"),
326            )
327            exists = proc.returncode == 0
328        else:
329            exists = response.status_code == 200
330
331    except Exception as e:
332        print(f"Error checking Docker image: {e}")
333        return False
334
335    if exists:
336        with _known_docker_images_lock:
337            _known_docker_images.add(name)
338            with KNOWN_DOCKER_IMAGES_FILE.open("a") as f:
339                print(name, file=f)
340
341    return exists

Check whether the named image is pushed to Docker Hub.

Note that this operation requires a rather slow network request.

def is_ghcr_image_pushed(name: str) -> bool:
344def is_ghcr_image_pushed(name: str) -> bool:
345    global _known_docker_images
346
347    if _known_docker_images is None:
348        with _known_docker_images_lock:
349            if not KNOWN_DOCKER_IMAGES_FILE.exists():
350                _known_docker_images = set()
351            else:
352                with KNOWN_DOCKER_IMAGES_FILE.open() as f:
353                    _known_docker_images = set(line.strip() for line in f)
354
355    name_without_ghcr = name.removeprefix("ghcr.io/")
356    if name in _known_docker_images:
357        return True
358
359    if ":" not in name_without_ghcr:
360        image, tag = name_without_ghcr, "latest"
361    else:
362        image, tag = name_without_ghcr.rsplit(":", 1)
363
364    exists: bool = False
365
366    try:
367        token = requests.get(
368            "https://ghcr.io/token",
369            params={
370                "scope": f"repository:{image}:pull",
371            },
372            timeout=10,
373        ).json()["token"]
374        response = requests.head(
375            f"https://ghcr.io/v2/{image}/manifests/{tag}",
376            headers={"Authorization": f"Bearer {token}"},
377            timeout=10,
378        )
379
380        if response.status_code in (401, 429, 500, 502, 503, 504):
381            # Fall back to 5x slower method
382            proc = subprocess.run(
383                ["docker", "manifest", "inspect", name],
384                stdout=subprocess.DEVNULL,
385                stderr=subprocess.DEVNULL,
386                env=dict(os.environ, DOCKER_CLI_EXPERIMENTAL="enabled"),
387            )
388            exists = proc.returncode == 0
389        else:
390            exists = response.status_code == 200
391
392    except Exception as e:
393        print(f"Error checking Docker image: {e}")
394        return False
395
396    if exists:
397        with _known_docker_images_lock:
398            _known_docker_images.add(name)
399            with KNOWN_DOCKER_IMAGES_FILE.open("a") as f:
400                print(name, file=f)
401
402    return exists
def chmod_x(path: pathlib.Path) -> None:
405def chmod_x(path: Path) -> None:
406    """Set the executable bit on a file or directory."""
407    # https://stackoverflow.com/a/30463972/1122351
408    mode = os.stat(path).st_mode
409    mode |= (mode & 0o444) >> 2  # copy R bits to X
410    os.chmod(path, mode)

Set the executable bit on a file or directory.

class PreImage:
413class PreImage:
414    """An action to run before building a Docker image.
415
416    Args:
417        rd: The `RepositoryDetails` for the repository.
418        path: The path to the `Image` associated with this action.
419    """
420
421    def __init__(self, rd: RepositoryDetails, path: Path):
422        self.rd = rd
423        self.path = path
424
425    @classmethod
426    def prepare_batch(cls, instances: list["PreImage"]) -> Any:
427        """Prepare a batch of actions.
428
429        This is useful for `PreImage` actions that are more efficient when
430        their actions are applied to several images in bulk.
431
432        Returns an arbitrary output that is passed to `PreImage.run`.
433        """
434        pass
435
436    def run(self, prep: Any) -> None:
437        """Perform the action.
438
439        Args:
440            prep: Any prep work returned by `prepare_batch`.
441        """
442        pass
443
444    def inputs(self) -> set[str]:
445        """Return the files which are considered inputs to the action."""
446        raise NotImplementedError
447
448    def extra(self) -> str:
449        """Returns additional data for incorporation in the fingerprint."""
450        return ""

An action to run before building a Docker image.

Args: rd: The RepositoryDetails for the repository. path: The path to the Image associated with this action.

PreImage( rd: RepositoryDetails, path: pathlib.Path)
421    def __init__(self, rd: RepositoryDetails, path: Path):
422        self.rd = rd
423        self.path = path
rd
path
@classmethod
def prepare_batch(cls, instances: list[PreImage]) -> Any:
425    @classmethod
426    def prepare_batch(cls, instances: list["PreImage"]) -> Any:
427        """Prepare a batch of actions.
428
429        This is useful for `PreImage` actions that are more efficient when
430        their actions are applied to several images in bulk.
431
432        Returns an arbitrary output that is passed to `PreImage.run`.
433        """
434        pass

Prepare a batch of actions.

This is useful for PreImage actions that are more efficient when their actions are applied to several images in bulk.

Returns an arbitrary output that is passed to PreImage.run.

def run(self, prep: Any) -> None:
436    def run(self, prep: Any) -> None:
437        """Perform the action.
438
439        Args:
440            prep: Any prep work returned by `prepare_batch`.
441        """
442        pass

Perform the action.

Args: prep: Any prep work returned by prepare_batch.

def inputs(self) -> set[str]:
444    def inputs(self) -> set[str]:
445        """Return the files which are considered inputs to the action."""
446        raise NotImplementedError

Return the files which are considered inputs to the action.

def extra(self) -> str:
448    def extra(self) -> str:
449        """Returns additional data for incorporation in the fingerprint."""
450        return ""

Returns additional data for incorporation in the fingerprint.

class Copy(PreImage):
453class Copy(PreImage):
454    """A `PreImage` action which copies files from a directory.
455
456    See doc/developer/mzbuild.md for an explanation of the user-facing
457    parameters.
458    """
459
460    def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]):
461        super().__init__(rd, path)
462
463        self.source = config.pop("source", None)
464        if self.source is None:
465            raise ValueError("mzbuild config is missing 'source' argument")
466
467        self.destination = config.pop("destination", None)
468        if self.destination is None:
469            raise ValueError("mzbuild config is missing 'destination' argument")
470
471        self.matching = config.pop("matching", "*")
472
473    def run(self, prep: Any) -> None:
474        super().run(prep)
475        for src in self.inputs():
476            dst = self.path / self.destination / src
477            dst.parent.mkdir(parents=True, exist_ok=True)
478            shutil.copy(self.rd.root / self.source / src, dst)
479
480    def inputs(self) -> set[str]:
481        return set(git.expand_globs(self.rd.root / self.source, self.matching))

A PreImage action which copies files from a directory.

See doc/developer/mzbuild.md for an explanation of the user-facing parameters.

Copy( rd: RepositoryDetails, path: pathlib.Path, config: dict[str, typing.Any])
460    def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]):
461        super().__init__(rd, path)
462
463        self.source = config.pop("source", None)
464        if self.source is None:
465            raise ValueError("mzbuild config is missing 'source' argument")
466
467        self.destination = config.pop("destination", None)
468        if self.destination is None:
469            raise ValueError("mzbuild config is missing 'destination' argument")
470
471        self.matching = config.pop("matching", "*")
source
destination
matching
def run(self, prep: Any) -> None:
473    def run(self, prep: Any) -> None:
474        super().run(prep)
475        for src in self.inputs():
476            dst = self.path / self.destination / src
477            dst.parent.mkdir(parents=True, exist_ok=True)
478            shutil.copy(self.rd.root / self.source / src, dst)

Perform the action.

Args: prep: Any prep work returned by prepare_batch.

def inputs(self) -> set[str]:
480    def inputs(self) -> set[str]:
481        return set(git.expand_globs(self.rd.root / self.source, self.matching))

Return the files which are considered inputs to the action.

Inherited Members
PreImage
rd
path
prepare_batch
extra
class CargoPreImage(PreImage):
484class CargoPreImage(PreImage):
485    """A `PreImage` action that uses Cargo."""
486
487    @staticmethod
488    @cache
489    def _cargo_shared_inputs() -> frozenset[str]:
490        """Resolve shared Cargo inputs once and cache the result.
491
492        This expands the 'ci/builder' directory glob and filters out
493        non-existent files like '.cargo/config', avoiding repeated
494        git subprocess calls in fingerprint().
495        """
496        inputs: set[str] = set()
497        inputs |= git.expand_globs(Path("."), "ci/builder/**")
498        inputs.add("Cargo.toml")
499        inputs.add("Cargo.lock")
500        if Path(".cargo/config").exists():
501            inputs.add(".cargo/config")
502        return frozenset(inputs)
503
504    def inputs(self) -> set[str]:
505        return set(CargoPreImage._cargo_shared_inputs())
506
507    def extra(self) -> str:
508        # Cargo images depend on the release mode and whether
509        # coverage/sanitizer is enabled.
510        flags: list[str] = []
511        if self.rd.profile == Profile.RELEASE:
512            flags += "release"
513        if self.rd.profile == Profile.OPTIMIZED:
514            flags += "optimized"
515        if self.rd.coverage:
516            flags += "coverage"
517        if self.rd.sanitizer != Sanitizer.none:
518            flags += self.rd.sanitizer.value
519        flags.sort()
520        return ",".join(flags)

A PreImage action that uses Cargo.

def inputs(self) -> set[str]:
504    def inputs(self) -> set[str]:
505        return set(CargoPreImage._cargo_shared_inputs())

Return the files which are considered inputs to the action.

def extra(self) -> str:
507    def extra(self) -> str:
508        # Cargo images depend on the release mode and whether
509        # coverage/sanitizer is enabled.
510        flags: list[str] = []
511        if self.rd.profile == Profile.RELEASE:
512            flags += "release"
513        if self.rd.profile == Profile.OPTIMIZED:
514            flags += "optimized"
515        if self.rd.coverage:
516            flags += "coverage"
517        if self.rd.sanitizer != Sanitizer.none:
518            flags += self.rd.sanitizer.value
519        flags.sort()
520        return ",".join(flags)

Returns additional data for incorporation in the fingerprint.

class CargoBuild(CargoPreImage):
523class CargoBuild(CargoPreImage):
524    """A `PreImage` action that builds a single binary with Cargo.
525
526    See doc/developer/mzbuild.md for an explanation of the user-facing
527    parameters.
528    """
529
530    def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]):
531        super().__init__(rd, path)
532        bin = config.pop("bin", [])
533        self.bins = bin if isinstance(bin, list) else [bin]
534        example = config.pop("example", [])
535        self.examples = example if isinstance(example, list) else [example]
536        self.strip = config.pop("strip", True)
537        self.extract = config.pop("extract", {})
538        features = config.pop("features", [])
539        self.features = features if isinstance(features, list) else [features]
540
541        if len(self.bins) == 0 and len(self.examples) == 0:
542            raise ValueError("mzbuild config is missing pre-build target")
543
544    @staticmethod
545    def generate_cargo_build_command(
546        rd: RepositoryDetails,
547        bins: list[str],
548        examples: list[str],
549        features: list[str] | None = None,
550    ) -> list[str]:
551        rustflags = (
552            rustc_flags.coverage
553            if rd.coverage
554            else (
555                rustc_flags.sanitizer[rd.sanitizer]
556                if rd.sanitizer != Sanitizer.none
557                else ["--cfg=tokio_unstable"]
558            )
559        )
560        cflags = (
561            [
562                f"--target={target(rd.arch)}",
563                f"--gcc-toolchain=/opt/x-tools/{target(rd.arch)}/",
564                "-fuse-ld=lld",
565                f"--sysroot=/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/sysroot",
566                f"-L/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/lib64",
567            ]
568            + rustc_flags.sanitizer_cflags[rd.sanitizer]
569            if rd.sanitizer != Sanitizer.none
570            else []
571        )
572        extra_env = (
573            {
574                "CFLAGS": " ".join(cflags),
575                "CXXFLAGS": " ".join(cflags),
576                "LDFLAGS": " ".join(cflags),
577                "CXXSTDLIB": "stdc++",
578                "CC": "cc",
579                "CXX": "c++",
580                "CPP": "clang-cpp-18",
581                "CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER": "cc",
582                "CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER": "cc",
583                "PATH": f"/sanshim:/opt/x-tools/{target(rd.arch)}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
584                "TSAN_OPTIONS": "report_bugs=0",  # build-scripts fail
585            }
586            if rd.sanitizer != Sanitizer.none
587            else {}
588        )
589
590        cargo_build = rd.build(
591            "build", channel=None, rustflags=rustflags, extra_env=extra_env
592        )
593
594        packages = set()
595        for bin in bins:
596            cargo_build.extend(["--bin", bin])
597            packages.add(rd.cargo_workspace.crate_for_bin(bin).name)
598        for example in examples:
599            cargo_build.extend(["--example", example])
600            packages.add(rd.cargo_workspace.crate_for_example(example).name)
601        cargo_build.extend(f"--package={p}" for p in packages)
602
603        if rd.profile == Profile.RELEASE:
604            cargo_build.append("--release")
605        if rd.profile == Profile.OPTIMIZED:
606            cargo_build.extend(["--profile", "optimized"])
607        if rd.sanitizer != Sanitizer.none:
608            # ASan doesn't work with jemalloc
609            cargo_build.append("--no-default-features")
610            # Uses more memory, so reduce the number of jobs
611            cargo_build.extend(
612                ["--jobs", str(round(multiprocessing.cpu_count() * 2 / 3))]
613            )
614        if features:
615            cargo_build.append(f"--features={','.join(features)}")
616
617        return cargo_build
618
619    @classmethod
620    def prepare_batch(cls, instances: list["PreImage"]) -> dict[str, Any]:
621        super().prepare_batch(instances)
622
623        if not instances:
624            return {}
625
626        # Building all binaries and examples in the same `cargo build` command
627        # allows Cargo to link in parallel with other work, which can
628        # meaningfully speed up builds.
629
630        rd: RepositoryDetails | None = None
631        builds = cast(list[CargoBuild], instances)
632        bins = set()
633        examples = set()
634        features = set()
635        for build in builds:
636            if not rd:
637                rd = build.rd
638            bins.update(build.bins)
639            examples.update(build.examples)
640            features.update(build.features)
641        assert rd
642
643        ui.section(f"Common build for: {', '.join(bins | examples)}")
644
645        cargo_build = cls.generate_cargo_build_command(
646            rd, list(bins), list(examples), list(features) if features else None
647        )
648
649        run_and_detect_rust_incremental_build_failure(cargo_build, cwd=rd.root)
650
651        # Re-run with JSON-formatted messages and capture the output so we can
652        # later analyze the build artifacts in `run`. This should be nearly
653        # instantaneous since we just compiled above with the same crates and
654        # features. (We don't want to do the compile above with JSON-formatted
655        # messages because it wouldn't be human readable.)
656        json_output = spawn.capture(
657            cargo_build + ["--message-format=json"],
658            cwd=rd.root,
659        )
660        prep = {"cargo": json_output}
661
662        return prep
663
664    def build(self, build_output: dict[str, Any]) -> None:
665        cargo_profile = (
666            "release"
667            if self.rd.profile == Profile.RELEASE
668            else "optimized" if self.rd.profile == Profile.OPTIMIZED else "debug"
669        )
670
671        def copy(src: Path, relative_dst: Path) -> None:
672            exe_path = self.path / relative_dst
673            exe_path.parent.mkdir(parents=True, exist_ok=True)
674            shutil.copy(src, exe_path)
675
676            if self.strip:
677                # The debug information is large enough that it slows down CI,
678                # since we're packaging these binaries up into Docker images and
679                # shipping them around.
680                spawn.runv(
681                    [*self.rd.tool("strip"), "--strip-debug", exe_path],
682                    cwd=self.rd.root,
683                )
684            else:
685                # Even if we've been asked not to strip the binary, remove the
686                # `.debug_pubnames` and `.debug_pubtypes` sections. These are just
687                # indexes that speed up launching a debugger against the binary,
688                # and we're happy to have slower debugger start up in exchange for
689                # smaller binaries. Plus the sections have been obsoleted by a
690                # `.debug_names` section in DWARF 5, and so debugger support for
691                # `.debug_pubnames`/`.debug_pubtypes` is minimal anyway.
692                # See: https://github.com/rust-lang/rust/issues/46034
693                spawn.runv(
694                    [
695                        *self.rd.tool("objcopy"),
696                        "-R",
697                        ".debug_pubnames",
698                        "-R",
699                        ".debug_pubtypes",
700                        exe_path,
701                    ],
702                    cwd=self.rd.root,
703                )
704
705        for bin in self.bins:
706            src_path = self.rd.cargo_target_dir() / cargo_profile / bin
707            copy(src_path, bin)
708        for example in self.examples:
709            src_path = (
710                self.rd.cargo_target_dir() / cargo_profile / Path("examples") / example
711            )
712            copy(src_path, Path("examples") / example)
713
714        if self.extract:
715            cargo_build_json_output = build_output["cargo"]
716
717            target_dir = self.rd.cargo_target_dir()
718            for line in cargo_build_json_output.split("\n"):
719                if line.strip() == "" or not line.startswith("{"):
720                    continue
721                message = json.loads(line)
722                if message["reason"] != "build-script-executed":
723                    continue
724                out_dir = self.rd.rewrite_builder_path_for_host(
725                    Path(message["out_dir"])
726                )
727                if not out_dir.is_relative_to(target_dir):
728                    # Some crates are built for both the host and the target.
729                    # Ignore the built-for-host out dir.
730                    continue
731                # parse the package name from a package_id that looks like one of:
732                # git+https://github.com/MaterializeInc/rust-server-sdk#launchdarkly-server-sdk@1.0.0
733                # path+file:///Users/roshan/materialize/src/catalog#mz-catalog@0.0.0
734                # registry+https://github.com/rust-lang/crates.io-index#num-rational@0.4.0
735                # file:///path/to/my-package#0.1.0
736                package_id = message["package_id"]
737                if "@" in package_id:
738                    package = package_id.split("@")[0].split("#")[-1]
739                else:
740                    package = message["package_id"].split("#")[0].split("/")[-1]
741                for src, dst in self.extract.get(package, {}).items():
742                    spawn.runv(["cp", "-R", out_dir / src, self.path / dst])
743
744        self.acquired = True
745
746    def run(self, prep: dict[str, Any]) -> None:
747        super().run(prep)
748        self.build(prep)
749
750    @cache
751    def inputs(self) -> set[str]:
752        deps = set()
753
754        for bin in self.bins:
755            crate = self.rd.cargo_workspace.crate_for_bin(bin)
756            deps |= self.rd.cargo_workspace.transitive_path_dependencies(crate)
757        for example in self.examples:
758            crate = self.rd.cargo_workspace.crate_for_example(example)
759            deps |= self.rd.cargo_workspace.transitive_path_dependencies(
760                crate, dev=True
761            )
762
763        inputs = super().inputs() | set(inp for dep in deps for inp in dep.inputs())
764        return inputs

A PreImage action that builds a single binary with Cargo.

See doc/developer/mzbuild.md for an explanation of the user-facing parameters.

CargoBuild( rd: RepositoryDetails, path: pathlib.Path, config: dict[str, typing.Any])
530    def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]):
531        super().__init__(rd, path)
532        bin = config.pop("bin", [])
533        self.bins = bin if isinstance(bin, list) else [bin]
534        example = config.pop("example", [])
535        self.examples = example if isinstance(example, list) else [example]
536        self.strip = config.pop("strip", True)
537        self.extract = config.pop("extract", {})
538        features = config.pop("features", [])
539        self.features = features if isinstance(features, list) else [features]
540
541        if len(self.bins) == 0 and len(self.examples) == 0:
542            raise ValueError("mzbuild config is missing pre-build target")
bins
examples
strip
extract
features
@staticmethod
def generate_cargo_build_command( rd: RepositoryDetails, bins: list[str], examples: list[str], features: list[str] | None = None) -> list[str]:
544    @staticmethod
545    def generate_cargo_build_command(
546        rd: RepositoryDetails,
547        bins: list[str],
548        examples: list[str],
549        features: list[str] | None = None,
550    ) -> list[str]:
551        rustflags = (
552            rustc_flags.coverage
553            if rd.coverage
554            else (
555                rustc_flags.sanitizer[rd.sanitizer]
556                if rd.sanitizer != Sanitizer.none
557                else ["--cfg=tokio_unstable"]
558            )
559        )
560        cflags = (
561            [
562                f"--target={target(rd.arch)}",
563                f"--gcc-toolchain=/opt/x-tools/{target(rd.arch)}/",
564                "-fuse-ld=lld",
565                f"--sysroot=/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/sysroot",
566                f"-L/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/lib64",
567            ]
568            + rustc_flags.sanitizer_cflags[rd.sanitizer]
569            if rd.sanitizer != Sanitizer.none
570            else []
571        )
572        extra_env = (
573            {
574                "CFLAGS": " ".join(cflags),
575                "CXXFLAGS": " ".join(cflags),
576                "LDFLAGS": " ".join(cflags),
577                "CXXSTDLIB": "stdc++",
578                "CC": "cc",
579                "CXX": "c++",
580                "CPP": "clang-cpp-18",
581                "CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER": "cc",
582                "CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER": "cc",
583                "PATH": f"/sanshim:/opt/x-tools/{target(rd.arch)}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
584                "TSAN_OPTIONS": "report_bugs=0",  # build-scripts fail
585            }
586            if rd.sanitizer != Sanitizer.none
587            else {}
588        )
589
590        cargo_build = rd.build(
591            "build", channel=None, rustflags=rustflags, extra_env=extra_env
592        )
593
594        packages = set()
595        for bin in bins:
596            cargo_build.extend(["--bin", bin])
597            packages.add(rd.cargo_workspace.crate_for_bin(bin).name)
598        for example in examples:
599            cargo_build.extend(["--example", example])
600            packages.add(rd.cargo_workspace.crate_for_example(example).name)
601        cargo_build.extend(f"--package={p}" for p in packages)
602
603        if rd.profile == Profile.RELEASE:
604            cargo_build.append("--release")
605        if rd.profile == Profile.OPTIMIZED:
606            cargo_build.extend(["--profile", "optimized"])
607        if rd.sanitizer != Sanitizer.none:
608            # ASan doesn't work with jemalloc
609            cargo_build.append("--no-default-features")
610            # Uses more memory, so reduce the number of jobs
611            cargo_build.extend(
612                ["--jobs", str(round(multiprocessing.cpu_count() * 2 / 3))]
613            )
614        if features:
615            cargo_build.append(f"--features={','.join(features)}")
616
617        return cargo_build
@classmethod
def prepare_batch( cls, instances: list[PreImage]) -> dict[str, typing.Any]:
619    @classmethod
620    def prepare_batch(cls, instances: list["PreImage"]) -> dict[str, Any]:
621        super().prepare_batch(instances)
622
623        if not instances:
624            return {}
625
626        # Building all binaries and examples in the same `cargo build` command
627        # allows Cargo to link in parallel with other work, which can
628        # meaningfully speed up builds.
629
630        rd: RepositoryDetails | None = None
631        builds = cast(list[CargoBuild], instances)
632        bins = set()
633        examples = set()
634        features = set()
635        for build in builds:
636            if not rd:
637                rd = build.rd
638            bins.update(build.bins)
639            examples.update(build.examples)
640            features.update(build.features)
641        assert rd
642
643        ui.section(f"Common build for: {', '.join(bins | examples)}")
644
645        cargo_build = cls.generate_cargo_build_command(
646            rd, list(bins), list(examples), list(features) if features else None
647        )
648
649        run_and_detect_rust_incremental_build_failure(cargo_build, cwd=rd.root)
650
651        # Re-run with JSON-formatted messages and capture the output so we can
652        # later analyze the build artifacts in `run`. This should be nearly
653        # instantaneous since we just compiled above with the same crates and
654        # features. (We don't want to do the compile above with JSON-formatted
655        # messages because it wouldn't be human readable.)
656        json_output = spawn.capture(
657            cargo_build + ["--message-format=json"],
658            cwd=rd.root,
659        )
660        prep = {"cargo": json_output}
661
662        return prep

Prepare a batch of actions.

This is useful for PreImage actions that are more efficient when their actions are applied to several images in bulk.

Returns an arbitrary output that is passed to PreImage.run.

def build(self, build_output: dict[str, typing.Any]) -> None:
664    def build(self, build_output: dict[str, Any]) -> None:
665        cargo_profile = (
666            "release"
667            if self.rd.profile == Profile.RELEASE
668            else "optimized" if self.rd.profile == Profile.OPTIMIZED else "debug"
669        )
670
671        def copy(src: Path, relative_dst: Path) -> None:
672            exe_path = self.path / relative_dst
673            exe_path.parent.mkdir(parents=True, exist_ok=True)
674            shutil.copy(src, exe_path)
675
676            if self.strip:
677                # The debug information is large enough that it slows down CI,
678                # since we're packaging these binaries up into Docker images and
679                # shipping them around.
680                spawn.runv(
681                    [*self.rd.tool("strip"), "--strip-debug", exe_path],
682                    cwd=self.rd.root,
683                )
684            else:
685                # Even if we've been asked not to strip the binary, remove the
686                # `.debug_pubnames` and `.debug_pubtypes` sections. These are just
687                # indexes that speed up launching a debugger against the binary,
688                # and we're happy to have slower debugger start up in exchange for
689                # smaller binaries. Plus the sections have been obsoleted by a
690                # `.debug_names` section in DWARF 5, and so debugger support for
691                # `.debug_pubnames`/`.debug_pubtypes` is minimal anyway.
692                # See: https://github.com/rust-lang/rust/issues/46034
693                spawn.runv(
694                    [
695                        *self.rd.tool("objcopy"),
696                        "-R",
697                        ".debug_pubnames",
698                        "-R",
699                        ".debug_pubtypes",
700                        exe_path,
701                    ],
702                    cwd=self.rd.root,
703                )
704
705        for bin in self.bins:
706            src_path = self.rd.cargo_target_dir() / cargo_profile / bin
707            copy(src_path, bin)
708        for example in self.examples:
709            src_path = (
710                self.rd.cargo_target_dir() / cargo_profile / Path("examples") / example
711            )
712            copy(src_path, Path("examples") / example)
713
714        if self.extract:
715            cargo_build_json_output = build_output["cargo"]
716
717            target_dir = self.rd.cargo_target_dir()
718            for line in cargo_build_json_output.split("\n"):
719                if line.strip() == "" or not line.startswith("{"):
720                    continue
721                message = json.loads(line)
722                if message["reason"] != "build-script-executed":
723                    continue
724                out_dir = self.rd.rewrite_builder_path_for_host(
725                    Path(message["out_dir"])
726                )
727                if not out_dir.is_relative_to(target_dir):
728                    # Some crates are built for both the host and the target.
729                    # Ignore the built-for-host out dir.
730                    continue
731                # parse the package name from a package_id that looks like one of:
732                # git+https://github.com/MaterializeInc/rust-server-sdk#launchdarkly-server-sdk@1.0.0
733                # path+file:///Users/roshan/materialize/src/catalog#mz-catalog@0.0.0
734                # registry+https://github.com/rust-lang/crates.io-index#num-rational@0.4.0
735                # file:///path/to/my-package#0.1.0
736                package_id = message["package_id"]
737                if "@" in package_id:
738                    package = package_id.split("@")[0].split("#")[-1]
739                else:
740                    package = message["package_id"].split("#")[0].split("/")[-1]
741                for src, dst in self.extract.get(package, {}).items():
742                    spawn.runv(["cp", "-R", out_dir / src, self.path / dst])
743
744        self.acquired = True
def run(self, prep: dict[str, typing.Any]) -> None:
746    def run(self, prep: dict[str, Any]) -> None:
747        super().run(prep)
748        self.build(prep)

Perform the action.

Args: prep: Any prep work returned by prepare_batch.

@cache
def inputs(self) -> set[str]:
750    @cache
751    def inputs(self) -> set[str]:
752        deps = set()
753
754        for bin in self.bins:
755            crate = self.rd.cargo_workspace.crate_for_bin(bin)
756            deps |= self.rd.cargo_workspace.transitive_path_dependencies(crate)
757        for example in self.examples:
758            crate = self.rd.cargo_workspace.crate_for_example(example)
759            deps |= self.rd.cargo_workspace.transitive_path_dependencies(
760                crate, dev=True
761            )
762
763        inputs = super().inputs() | set(inp for dep in deps for inp in dep.inputs())
764        return inputs

Return the files which are considered inputs to the action.

Inherited Members
CargoPreImage
extra
PreImage
rd
path
class Image:
767class Image:
768    """A Docker image whose build and dependencies are managed by mzbuild.
769
770    An image corresponds to a directory in a repository that contains a
771    `mzbuild.yml` file. This directory is called an "mzbuild context."
772
773    Attributes:
774        name: The name of the image.
775        publish: Whether the image should be pushed to Docker Hub.
776        depends_on: The names of the images upon which this image depends.
777        root: The path to the root of the associated `Repository`.
778        path: The path to the directory containing the `mzbuild.yml`
779            configuration file.
780        pre_images: Optional actions to perform before running `docker build`.
781        build_args: An optional list of --build-arg to pass to the dockerfile
782    """
783
784    _DOCKERFILE_MZFROM_RE = re.compile(rb"^MZFROM\s*(\S+)")
785
786    _context_files_cache: set[str] | None
787
788    def __init__(self, rd: RepositoryDetails, path: Path):
789        self.rd = rd
790        self.path = path
791        self._context_files_cache = None
792        self.pre_images: list[PreImage] = []
793        with open(self.path / "mzbuild.yml") as f:
794            data = yaml.safe_load(f)
795            self.name: str = data.pop("name")
796            self.publish: bool = data.pop("publish", True)
797            self.description: str | None = data.pop("description", None)
798            self.mainline: bool = data.pop("mainline", True)
799            for pre_image in data.pop("pre-image", []):
800                typ = pre_image.pop("type", None)
801                if typ == "cargo-build":
802                    self.pre_images.append(CargoBuild(self.rd, self.path, pre_image))
803                elif typ == "copy":
804                    self.pre_images.append(Copy(self.rd, self.path, pre_image))
805                else:
806                    raise ValueError(
807                        f"mzbuild config in {self.path} has unknown pre-image type"
808                    )
809            self.build_args = data.pop("build-args", {})
810
811        if re.search(r"[^A-Za-z0-9\-]", self.name):
812            raise ValueError(
813                f"mzbuild image name {self.name} contains invalid character; only alphanumerics and hyphens allowed"
814            )
815
816        self.depends_on: list[str] = []
817        with open(self.path / "Dockerfile", "rb") as f:
818            for line in f:
819                match = self._DOCKERFILE_MZFROM_RE.match(line)
820                if match:
821                    self.depends_on.append(match.group(1).decode())
822
823    def sync_description(self) -> None:
824        """Sync the description to Docker Hub if the image is publishable
825        and a README.md file exists."""
826
827        if not self.publish:
828            ui.say(f"{self.name} is not publishable")
829            return
830
831        readme_path = self.path / "README.md"
832        has_readme = readme_path.exists()
833        if not has_readme:
834            ui.say(f"{self.name} has no README.md or description")
835            return
836
837        docker_config = os.getenv("DOCKER_CONFIG")
838        spawn.runv(
839            [
840                "docker",
841                "pushrm",
842                f"--file={readme_path}",
843                *([f"--config={docker_config}/config.json"] if docker_config else []),
844                *([f"--short={self.description}"] if self.description else []),
845                self.docker_name(),
846            ]
847        )
848
849    def docker_name(self, tag: str | None = None) -> str:
850        """Return the name of the image on Docker Hub at the given tag."""
851        name = f"{self.rd.image_registry}/{self.rd.image_prefix}{self.name}"
852        if tag:
853            name += f":{tag}"
854        return name

A Docker image whose build and dependencies are managed by mzbuild.

An image corresponds to a directory in a repository that contains a mzbuild.yml file. This directory is called an "mzbuild context."

Attributes: name: The name of the image. publish: Whether the image should be pushed to Docker Hub. depends_on: The names of the images upon which this image depends. root: The path to the root of the associated Repository. path: The path to the directory containing the mzbuild.yml configuration file. pre_images: Optional actions to perform before running docker build. build_args: An optional list of --build-arg to pass to the dockerfile

Image( rd: RepositoryDetails, path: pathlib.Path)
788    def __init__(self, rd: RepositoryDetails, path: Path):
789        self.rd = rd
790        self.path = path
791        self._context_files_cache = None
792        self.pre_images: list[PreImage] = []
793        with open(self.path / "mzbuild.yml") as f:
794            data = yaml.safe_load(f)
795            self.name: str = data.pop("name")
796            self.publish: bool = data.pop("publish", True)
797            self.description: str | None = data.pop("description", None)
798            self.mainline: bool = data.pop("mainline", True)
799            for pre_image in data.pop("pre-image", []):
800                typ = pre_image.pop("type", None)
801                if typ == "cargo-build":
802                    self.pre_images.append(CargoBuild(self.rd, self.path, pre_image))
803                elif typ == "copy":
804                    self.pre_images.append(Copy(self.rd, self.path, pre_image))
805                else:
806                    raise ValueError(
807                        f"mzbuild config in {self.path} has unknown pre-image type"
808                    )
809            self.build_args = data.pop("build-args", {})
810
811        if re.search(r"[^A-Za-z0-9\-]", self.name):
812            raise ValueError(
813                f"mzbuild image name {self.name} contains invalid character; only alphanumerics and hyphens allowed"
814            )
815
816        self.depends_on: list[str] = []
817        with open(self.path / "Dockerfile", "rb") as f:
818            for line in f:
819                match = self._DOCKERFILE_MZFROM_RE.match(line)
820                if match:
821                    self.depends_on.append(match.group(1).decode())
rd
path
pre_images: list[PreImage]
depends_on: list[str]
def sync_description(self) -> None:
823    def sync_description(self) -> None:
824        """Sync the description to Docker Hub if the image is publishable
825        and a README.md file exists."""
826
827        if not self.publish:
828            ui.say(f"{self.name} is not publishable")
829            return
830
831        readme_path = self.path / "README.md"
832        has_readme = readme_path.exists()
833        if not has_readme:
834            ui.say(f"{self.name} has no README.md or description")
835            return
836
837        docker_config = os.getenv("DOCKER_CONFIG")
838        spawn.runv(
839            [
840                "docker",
841                "pushrm",
842                f"--file={readme_path}",
843                *([f"--config={docker_config}/config.json"] if docker_config else []),
844                *([f"--short={self.description}"] if self.description else []),
845                self.docker_name(),
846            ]
847        )

Sync the description to Docker Hub if the image is publishable and a README.md file exists.

def docker_name(self, tag: str | None = None) -> str:
849    def docker_name(self, tag: str | None = None) -> str:
850        """Return the name of the image on Docker Hub at the given tag."""
851        name = f"{self.rd.image_registry}/{self.rd.image_prefix}{self.name}"
852        if tag:
853            name += f":{tag}"
854        return name

Return the name of the image on Docker Hub at the given tag.

class ResolvedImage:
 857class ResolvedImage:
 858    """An `Image` whose dependencies have been resolved.
 859
 860    Attributes:
 861        image: The underlying `Image`.
 862        acquired: Whether the image is available locally.
 863        dependencies: A mapping from dependency name to `ResolvedImage` for
 864            each of the images that `image` depends upon.
 865    """
 866
 867    def __init__(self, image: Image, dependencies: Iterable["ResolvedImage"]):
 868        self.image = image
 869        self.acquired = False
 870        self.dependencies = {}
 871        for d in dependencies:
 872            self.dependencies[d.name] = d
 873
 874    def __repr__(self) -> str:
 875        return f"ResolvedImage<{self.spec()}>"
 876
 877    @property
 878    def name(self) -> str:
 879        """The name of the underlying image."""
 880        return self.image.name
 881
 882    @property
 883    def publish(self) -> bool:
 884        """Whether the underlying image should be pushed to Docker Hub."""
 885        return self.image.publish
 886
 887    @cache
 888    def spec(self) -> str:
 889        """Return the "spec" for the image.
 890
 891        A spec is the unique identifier for the image given its current
 892        fingerprint. It is a valid Docker Hub name.
 893        """
 894        return self.image.docker_name(tag=f"mzbuild-{self.fingerprint()}")
 895
 896    def write_dockerfile(self) -> IO[bytes]:
 897        """Render the Dockerfile without mzbuild directives.
 898
 899        Returns:
 900            file: A handle to a temporary file containing the adjusted
 901                Dockerfile."""
 902        with open(self.image.path / "Dockerfile", "rb") as f:
 903            lines = f.readlines()
 904        f = TemporaryFile()
 905        for line in lines:
 906            match = Image._DOCKERFILE_MZFROM_RE.match(line)
 907            if match:
 908                image = match.group(1).decode()
 909                spec = self.dependencies[image].spec()
 910                line = Image._DOCKERFILE_MZFROM_RE.sub(b"FROM %b" % spec.encode(), line)
 911            f.write(line)
 912        f.seek(0)
 913        return f
 914
 915    def build(self, prep: dict[type[PreImage], Any], push: bool = False) -> None:
 916        """Build the image from source.
 917
 918        Requires that the caller has already acquired all dependencies and
 919        prepared all `PreImage` actions via `PreImage.prepare_batch`.
 920        """
 921        # Use a file lock to prevent parallel mzcompose processes from
 922        # racing on git clean / copy / strip for the same image directory.
 923        lock_dir = self.image.rd.root / "target" / "mzbuild-locks"
 924        lock_dir.mkdir(parents=True, exist_ok=True)
 925        profile = self.image.rd.profile.name.lower()
 926        lock_path = lock_dir / f"{self.image.name}-{profile}.lock"
 927        with open(lock_path, "w") as lock_file:
 928            ui.say(f"Acquiring lock for {self.spec()}")
 929            fcntl.flock(lock_file, fcntl.LOCK_EX)
 930            try:
 931                self._build_locked(prep, push)
 932            finally:
 933                fcntl.flock(lock_file, fcntl.LOCK_UN)
 934
 935    def _build_locked(
 936        self, prep: dict[type[PreImage], Any], push: bool = False
 937    ) -> None:
 938        ui.section(f"Building {self.spec()}")
 939        spawn.runv(["git", "clean", "-ffdX", self.image.path])
 940
 941        for pre_image in self.image.pre_images:
 942            pre_image.run(prep[type(pre_image)])
 943        build_args = {
 944            **self.image.build_args,
 945            "BUILD_PROFILE": self.image.rd.profile.name,
 946            "ARCH_GCC": str(self.image.rd.arch),
 947            "ARCH_GO": self.image.rd.arch.go_str(),
 948            "CI_SANITIZER": str(self.image.rd.sanitizer),
 949        }
 950        f = self.write_dockerfile()
 951
 952        try:
 953            spawn.capture(["docker", "buildx", "version"])
 954        except subprocess.CalledProcessError:
 955            if push:
 956                print(
 957                    "docker buildx not found, required to push images. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing"
 958                )
 959                raise
 960            print(
 961                "docker buildx not found, you can install it to build faster. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing"
 962            )
 963            print("Falling back to docker build")
 964            cmd: Sequence[str] = [
 965                "docker",
 966                "build",
 967                "-f",
 968                "-",
 969                *(f"--build-arg={k}={v}" for k, v in build_args.items()),
 970                "-t",
 971                self.spec(),
 972                f"--platform=linux/{self.image.rd.arch.go_str()}",
 973                str(self.image.path),
 974            ]
 975        else:
 976            docker_tag = f"docker.io/{self.spec()}"
 977            ghcr_tag = f"ghcr.io/materializeinc/{self.spec()}"
 978            cmd: Sequence[str] = [
 979                "docker",
 980                "buildx",
 981                "build",
 982                "--progress=plain",  # less noisy
 983                "-f",
 984                "-",
 985                *(f"--build-arg={k}={v}" for k, v in build_args.items()),
 986                "-t",
 987                docker_tag,
 988                "-t",
 989                ghcr_tag,
 990                f"--platform=linux/{self.image.rd.arch.go_str()}",
 991                str(self.image.path),
 992                "--load",
 993            ]
 994
 995        if token := os.getenv("GITHUB_GHCR_TOKEN"):
 996            spawn.runv(
 997                [
 998                    "docker",
 999                    "login",
1000                    "ghcr.io",
1001                    "-u",
1002                    "materialize-bot",
1003                    "--password-stdin",
1004                ],
1005                stdin=token.encode(),
1006            )
1007
1008        spawn.runv(cmd, stdin=f, stdout=sys.stderr.buffer)
1009
1010        if push:
1011            # Push to both registries in parallel. With the docker driver,
1012            # the image is already in the local daemon after --load, so
1013            # docker push is the same mechanism buildx --push uses internally.
1014            pending = [docker_tag, ghcr_tag]
1015            for sleep_time in [5, 10, 20, 40, 60, None]:
1016                procs = [
1017                    subprocess.Popen(
1018                        ["docker", "push", tag],
1019                        stdout=sys.stderr,
1020                        stderr=sys.stderr,
1021                    )
1022                    for tag in pending
1023                ]
1024                pending = [tag for tag, proc in zip(pending, procs) if proc.wait() != 0]
1025                if not pending:
1026                    break
1027                if sleep_time is None:
1028                    raise subprocess.CalledProcessError(
1029                        1, ["docker", "push", pending[0]]
1030                    )
1031                print(f"docker push failed for {pending}, retrying in {sleep_time}s")
1032                time.sleep(sleep_time)
1033
1034    def try_pull(self, max_retries: int) -> bool:
1035        """Download the image if it does not exist locally. Returns whether it was found."""
1036        command = ["docker", "pull"]
1037        # --quiet skips printing the progress bar, which does not display well in CI.
1038        if ui.env_is_truthy("CI"):
1039            command.append("--quiet")
1040        command.append(self.spec())
1041        if not self.acquired:
1042            ui.header(f"Acquiring {self.spec()}")
1043            sleep_time = 1
1044            for retry in range(1, max_retries + 1):
1045                try:
1046                    spawn.runv(
1047                        command,
1048                        stdin=subprocess.DEVNULL,
1049                        stdout=sys.stderr.buffer,
1050                    )
1051                    self.acquired = True
1052                    break
1053                except subprocess.CalledProcessError:
1054                    if retry < max_retries:
1055                        # There seems to be no good way to tell what error
1056                        # happened based on error code
1057                        # (https://github.com/docker/cli/issues/538) and we
1058                        # want to print output directly to terminal.
1059                        if build := os.getenv("CI_WAITING_FOR_BUILD"):
1060                            for retry in range(max_retries):
1061                                try:
1062                                    build_status = buildkite.get_build_status(build)
1063                                except subprocess.CalledProcessError:
1064                                    time.sleep(sleep_time)
1065                                    sleep_time = min(sleep_time * 2, 10)
1066                                    break
1067                                print(f"Build {build} status: {build_status}")
1068                                if build_status == "failed":
1069                                    print(
1070                                        f"Build {build} has been marked as failed, exiting hard"
1071                                    )
1072                                    sys.exit(1)
1073                                elif build_status == "success":
1074                                    break
1075                                assert (
1076                                    build_status == "pending"
1077                                ), f"Unknown build status {build_status}"
1078                                time.sleep(1)
1079                        else:
1080                            print(f"Retrying in {sleep_time}s ...")
1081                            time.sleep(sleep_time)
1082                            sleep_time = min(sleep_time * 2, 10)
1083                        continue
1084                    else:
1085                        break
1086        return self.acquired
1087
1088    def is_published_if_necessary(self) -> bool:
1089        """Report whether the image exists on DockerHub & GHCR if it is publishable."""
1090        if not self.publish:
1091            return False
1092        spec = self.spec()
1093        if spec.startswith(GHCR_PREFIX):
1094            spec = spec.removeprefix(GHCR_PREFIX)
1095        ghcr_spec = f"{GHCR_PREFIX}{spec}"
1096        if is_docker_image_pushed(spec) and is_ghcr_image_pushed(ghcr_spec):
1097            ui.say(f"{spec} already exists")
1098            return True
1099        return False
1100
1101    def run(
1102        self,
1103        args: list[str] = [],
1104        docker_args: list[str] = [],
1105        env: dict[str, str] = {},
1106    ) -> None:
1107        """Run a command in the image.
1108
1109        Creates a container from the image and runs the command described by
1110        `args` in the image.
1111        """
1112        envs = []
1113        for key, val in env.items():
1114            envs.extend(["--env", f"{key}={val}"])
1115        spawn.runv(
1116            [
1117                "docker",
1118                "run",
1119                "--tty",
1120                "--rm",
1121                *envs,
1122                "--init",
1123                *docker_args,
1124                self.spec(),
1125                *args,
1126            ],
1127        )
1128
1129    def list_dependencies(self, transitive: bool = False) -> set[str]:
1130        out = set()
1131        for dep in self.dependencies.values():
1132            out.add(dep.name)
1133            if transitive:
1134                out |= dep.list_dependencies(transitive)
1135        return out
1136
1137    @cache
1138    def inputs(self, transitive: bool = False) -> set[str]:
1139        """List the files tracked as inputs to the image.
1140
1141        These files are used to compute the fingerprint for the image. See
1142        `ResolvedImage.fingerprint` for details.
1143
1144        Returns:
1145            inputs: A list of input files, relative to the root of the
1146                repository.
1147        """
1148        if self.image._context_files_cache is not None:
1149            paths = set(self.image._context_files_cache)
1150        else:
1151            paths = set(git.expand_globs(self.image.rd.root, f"{self.image.path}/**"))
1152        if not paths:
1153            # While we could find an `mzbuild.yml` file for this service, expland_globs didn't
1154            # return any files that matched this service. At the very least, the `mzbuild.yml`
1155            # file itself should have been returned. We have a bug if paths is empty.
1156            raise AssertionError(
1157                f"{self.image.name} mzbuild exists but its files are unknown to git"
1158            )
1159        for pre_image in self.image.pre_images:
1160            paths |= pre_image.inputs()
1161        if transitive:
1162            for dep in self.dependencies.values():
1163                paths |= dep.inputs(transitive)
1164        return paths
1165
1166    @cache
1167    def fingerprint(self) -> Fingerprint:
1168        """Fingerprint the inputs to the image.
1169
1170        Compute the fingerprint of the image. Changing the contents of any of
1171        the files or adding or removing files to the image will change the
1172        fingerprint, as will modifying the inputs to any of its dependencies.
1173
1174        The image considers all non-gitignored files in its mzbuild context to
1175        be inputs. If it has a pre-image action, that action may add additional
1176        inputs via `PreImage.inputs`.
1177        """
1178        self_hash = hashlib.sha1()
1179        # When inputs come from precomputed sources (crate and image context
1180        # batching + resolved CargoPreImage paths), they are already individual
1181        # file paths from git. Skip the expensive expand_globs subprocess calls.
1182        inputs = self.inputs()
1183        if self.image._context_files_cache is not None:
1184            resolved_inputs = sorted(inputs)
1185        else:
1186            resolved_inputs = sorted(set(git.expand_globs(self.image.rd.root, *inputs)))
1187        for rel_path in resolved_inputs:
1188            abs_path = self.image.rd.root / rel_path
1189            file_hash = hashlib.sha1()
1190            raw_file_mode = os.lstat(abs_path).st_mode
1191            # Compute a simplified file mode using the same rules as Git.
1192            # https://github.com/git/git/blob/3bab5d562/Documentation/git-fast-import.txt#L610-L616
1193            if stat.S_ISLNK(raw_file_mode):
1194                file_mode = 0o120000
1195            elif raw_file_mode & stat.S_IXUSR:
1196                file_mode = 0o100755
1197            else:
1198                file_mode = 0o100644
1199            with open(abs_path, "rb") as f:
1200                file_hash.update(f.read())
1201            self_hash.update(file_mode.to_bytes(2, byteorder="big"))
1202            self_hash.update(rel_path.encode())
1203            self_hash.update(file_hash.digest())
1204            self_hash.update(b"\0")
1205
1206        for pre_image in self.image.pre_images:
1207            self_hash.update(pre_image.extra().encode())
1208            self_hash.update(b"\0")
1209
1210        self_hash.update(f"profile={self.image.rd.profile}".encode())
1211        self_hash.update(f"arch={self.image.rd.arch}".encode())
1212        self_hash.update(f"coverage={self.image.rd.coverage}".encode())
1213        self_hash.update(f"sanitizer={self.image.rd.sanitizer}".encode())
1214        # This exists to make sure all hashes from before we had a GHCR mirror are invalidated, so that we rebuild when an image doesn't exist on GHCR yet
1215        self_hash.update(b"mirror=ghcr")
1216
1217        full_hash = hashlib.sha1()
1218        full_hash.update(self_hash.digest())
1219        for dep in sorted(self.dependencies.values(), key=lambda d: d.name):
1220            full_hash.update(dep.name.encode())
1221            full_hash.update(dep.fingerprint())
1222            full_hash.update(b"\0")
1223
1224        return Fingerprint(full_hash.digest())

An Image whose dependencies have been resolved.

Attributes: image: The underlying Image. acquired: Whether the image is available locally. dependencies: A mapping from dependency name to ResolvedImage for each of the images that image depends upon.

ResolvedImage( image: Image, dependencies: Iterable[ResolvedImage])
867    def __init__(self, image: Image, dependencies: Iterable["ResolvedImage"]):
868        self.image = image
869        self.acquired = False
870        self.dependencies = {}
871        for d in dependencies:
872            self.dependencies[d.name] = d
image
acquired
dependencies
name: str
877    @property
878    def name(self) -> str:
879        """The name of the underlying image."""
880        return self.image.name

The name of the underlying image.

publish: bool
882    @property
883    def publish(self) -> bool:
884        """Whether the underlying image should be pushed to Docker Hub."""
885        return self.image.publish

Whether the underlying image should be pushed to Docker Hub.

@cache
def spec(self) -> str:
887    @cache
888    def spec(self) -> str:
889        """Return the "spec" for the image.
890
891        A spec is the unique identifier for the image given its current
892        fingerprint. It is a valid Docker Hub name.
893        """
894        return self.image.docker_name(tag=f"mzbuild-{self.fingerprint()}")

Return the "spec" for the image.

A spec is the unique identifier for the image given its current fingerprint. It is a valid Docker Hub name.

def write_dockerfile(self) -> IO[bytes]:
896    def write_dockerfile(self) -> IO[bytes]:
897        """Render the Dockerfile without mzbuild directives.
898
899        Returns:
900            file: A handle to a temporary file containing the adjusted
901                Dockerfile."""
902        with open(self.image.path / "Dockerfile", "rb") as f:
903            lines = f.readlines()
904        f = TemporaryFile()
905        for line in lines:
906            match = Image._DOCKERFILE_MZFROM_RE.match(line)
907            if match:
908                image = match.group(1).decode()
909                spec = self.dependencies[image].spec()
910                line = Image._DOCKERFILE_MZFROM_RE.sub(b"FROM %b" % spec.encode(), line)
911            f.write(line)
912        f.seek(0)
913        return f

Render the Dockerfile without mzbuild directives.

Returns: file: A handle to a temporary file containing the adjusted Dockerfile.

def build( self, prep: dict[type[PreImage], typing.Any], push: bool = False) -> None:
915    def build(self, prep: dict[type[PreImage], Any], push: bool = False) -> None:
916        """Build the image from source.
917
918        Requires that the caller has already acquired all dependencies and
919        prepared all `PreImage` actions via `PreImage.prepare_batch`.
920        """
921        # Use a file lock to prevent parallel mzcompose processes from
922        # racing on git clean / copy / strip for the same image directory.
923        lock_dir = self.image.rd.root / "target" / "mzbuild-locks"
924        lock_dir.mkdir(parents=True, exist_ok=True)
925        profile = self.image.rd.profile.name.lower()
926        lock_path = lock_dir / f"{self.image.name}-{profile}.lock"
927        with open(lock_path, "w") as lock_file:
928            ui.say(f"Acquiring lock for {self.spec()}")
929            fcntl.flock(lock_file, fcntl.LOCK_EX)
930            try:
931                self._build_locked(prep, push)
932            finally:
933                fcntl.flock(lock_file, fcntl.LOCK_UN)

Build the image from source.

Requires that the caller has already acquired all dependencies and prepared all PreImage actions via PreImage.prepare_batch.

def try_pull(self, max_retries: int) -> bool:
1034    def try_pull(self, max_retries: int) -> bool:
1035        """Download the image if it does not exist locally. Returns whether it was found."""
1036        command = ["docker", "pull"]
1037        # --quiet skips printing the progress bar, which does not display well in CI.
1038        if ui.env_is_truthy("CI"):
1039            command.append("--quiet")
1040        command.append(self.spec())
1041        if not self.acquired:
1042            ui.header(f"Acquiring {self.spec()}")
1043            sleep_time = 1
1044            for retry in range(1, max_retries + 1):
1045                try:
1046                    spawn.runv(
1047                        command,
1048                        stdin=subprocess.DEVNULL,
1049                        stdout=sys.stderr.buffer,
1050                    )
1051                    self.acquired = True
1052                    break
1053                except subprocess.CalledProcessError:
1054                    if retry < max_retries:
1055                        # There seems to be no good way to tell what error
1056                        # happened based on error code
1057                        # (https://github.com/docker/cli/issues/538) and we
1058                        # want to print output directly to terminal.
1059                        if build := os.getenv("CI_WAITING_FOR_BUILD"):
1060                            for retry in range(max_retries):
1061                                try:
1062                                    build_status = buildkite.get_build_status(build)
1063                                except subprocess.CalledProcessError:
1064                                    time.sleep(sleep_time)
1065                                    sleep_time = min(sleep_time * 2, 10)
1066                                    break
1067                                print(f"Build {build} status: {build_status}")
1068                                if build_status == "failed":
1069                                    print(
1070                                        f"Build {build} has been marked as failed, exiting hard"
1071                                    )
1072                                    sys.exit(1)
1073                                elif build_status == "success":
1074                                    break
1075                                assert (
1076                                    build_status == "pending"
1077                                ), f"Unknown build status {build_status}"
1078                                time.sleep(1)
1079                        else:
1080                            print(f"Retrying in {sleep_time}s ...")
1081                            time.sleep(sleep_time)
1082                            sleep_time = min(sleep_time * 2, 10)
1083                        continue
1084                    else:
1085                        break
1086        return self.acquired

Download the image if it does not exist locally. Returns whether it was found.

def is_published_if_necessary(self) -> bool:
1088    def is_published_if_necessary(self) -> bool:
1089        """Report whether the image exists on DockerHub & GHCR if it is publishable."""
1090        if not self.publish:
1091            return False
1092        spec = self.spec()
1093        if spec.startswith(GHCR_PREFIX):
1094            spec = spec.removeprefix(GHCR_PREFIX)
1095        ghcr_spec = f"{GHCR_PREFIX}{spec}"
1096        if is_docker_image_pushed(spec) and is_ghcr_image_pushed(ghcr_spec):
1097            ui.say(f"{spec} already exists")
1098            return True
1099        return False

Report whether the image exists on DockerHub & GHCR if it is publishable.

def run( self, args: list[str] = [], docker_args: list[str] = [], env: dict[str, str] = {}) -> None:
1101    def run(
1102        self,
1103        args: list[str] = [],
1104        docker_args: list[str] = [],
1105        env: dict[str, str] = {},
1106    ) -> None:
1107        """Run a command in the image.
1108
1109        Creates a container from the image and runs the command described by
1110        `args` in the image.
1111        """
1112        envs = []
1113        for key, val in env.items():
1114            envs.extend(["--env", f"{key}={val}"])
1115        spawn.runv(
1116            [
1117                "docker",
1118                "run",
1119                "--tty",
1120                "--rm",
1121                *envs,
1122                "--init",
1123                *docker_args,
1124                self.spec(),
1125                *args,
1126            ],
1127        )

Run a command in the image.

Creates a container from the image and runs the command described by args in the image.

def list_dependencies(self, transitive: bool = False) -> set[str]:
1129    def list_dependencies(self, transitive: bool = False) -> set[str]:
1130        out = set()
1131        for dep in self.dependencies.values():
1132            out.add(dep.name)
1133            if transitive:
1134                out |= dep.list_dependencies(transitive)
1135        return out
@cache
def inputs(self, transitive: bool = False) -> set[str]:
1137    @cache
1138    def inputs(self, transitive: bool = False) -> set[str]:
1139        """List the files tracked as inputs to the image.
1140
1141        These files are used to compute the fingerprint for the image. See
1142        `ResolvedImage.fingerprint` for details.
1143
1144        Returns:
1145            inputs: A list of input files, relative to the root of the
1146                repository.
1147        """
1148        if self.image._context_files_cache is not None:
1149            paths = set(self.image._context_files_cache)
1150        else:
1151            paths = set(git.expand_globs(self.image.rd.root, f"{self.image.path}/**"))
1152        if not paths:
1153            # While we could find an `mzbuild.yml` file for this service, expland_globs didn't
1154            # return any files that matched this service. At the very least, the `mzbuild.yml`
1155            # file itself should have been returned. We have a bug if paths is empty.
1156            raise AssertionError(
1157                f"{self.image.name} mzbuild exists but its files are unknown to git"
1158            )
1159        for pre_image in self.image.pre_images:
1160            paths |= pre_image.inputs()
1161        if transitive:
1162            for dep in self.dependencies.values():
1163                paths |= dep.inputs(transitive)
1164        return paths

List the files tracked as inputs to the image.

These files are used to compute the fingerprint for the image. See ResolvedImage.fingerprint for details.

Returns: inputs: A list of input files, relative to the root of the repository.

@cache
def fingerprint(self) -> Fingerprint:
1166    @cache
1167    def fingerprint(self) -> Fingerprint:
1168        """Fingerprint the inputs to the image.
1169
1170        Compute the fingerprint of the image. Changing the contents of any of
1171        the files or adding or removing files to the image will change the
1172        fingerprint, as will modifying the inputs to any of its dependencies.
1173
1174        The image considers all non-gitignored files in its mzbuild context to
1175        be inputs. If it has a pre-image action, that action may add additional
1176        inputs via `PreImage.inputs`.
1177        """
1178        self_hash = hashlib.sha1()
1179        # When inputs come from precomputed sources (crate and image context
1180        # batching + resolved CargoPreImage paths), they are already individual
1181        # file paths from git. Skip the expensive expand_globs subprocess calls.
1182        inputs = self.inputs()
1183        if self.image._context_files_cache is not None:
1184            resolved_inputs = sorted(inputs)
1185        else:
1186            resolved_inputs = sorted(set(git.expand_globs(self.image.rd.root, *inputs)))
1187        for rel_path in resolved_inputs:
1188            abs_path = self.image.rd.root / rel_path
1189            file_hash = hashlib.sha1()
1190            raw_file_mode = os.lstat(abs_path).st_mode
1191            # Compute a simplified file mode using the same rules as Git.
1192            # https://github.com/git/git/blob/3bab5d562/Documentation/git-fast-import.txt#L610-L616
1193            if stat.S_ISLNK(raw_file_mode):
1194                file_mode = 0o120000
1195            elif raw_file_mode & stat.S_IXUSR:
1196                file_mode = 0o100755
1197            else:
1198                file_mode = 0o100644
1199            with open(abs_path, "rb") as f:
1200                file_hash.update(f.read())
1201            self_hash.update(file_mode.to_bytes(2, byteorder="big"))
1202            self_hash.update(rel_path.encode())
1203            self_hash.update(file_hash.digest())
1204            self_hash.update(b"\0")
1205
1206        for pre_image in self.image.pre_images:
1207            self_hash.update(pre_image.extra().encode())
1208            self_hash.update(b"\0")
1209
1210        self_hash.update(f"profile={self.image.rd.profile}".encode())
1211        self_hash.update(f"arch={self.image.rd.arch}".encode())
1212        self_hash.update(f"coverage={self.image.rd.coverage}".encode())
1213        self_hash.update(f"sanitizer={self.image.rd.sanitizer}".encode())
1214        # This exists to make sure all hashes from before we had a GHCR mirror are invalidated, so that we rebuild when an image doesn't exist on GHCR yet
1215        self_hash.update(b"mirror=ghcr")
1216
1217        full_hash = hashlib.sha1()
1218        full_hash.update(self_hash.digest())
1219        for dep in sorted(self.dependencies.values(), key=lambda d: d.name):
1220            full_hash.update(dep.name.encode())
1221            full_hash.update(dep.fingerprint())
1222            full_hash.update(b"\0")
1223
1224        return Fingerprint(full_hash.digest())

Fingerprint the inputs to the image.

Compute the fingerprint of the image. Changing the contents of any of the files or adding or removing files to the image will change the fingerprint, as will modifying the inputs to any of its dependencies.

The image considers all non-gitignored files in its mzbuild context to be inputs. If it has a pre-image action, that action may add additional inputs via PreImage.inputs.

class DependencySet:
1227class DependencySet:
1228    """A set of `ResolvedImage`s.
1229
1230    Iterating over a dependency set yields the contained images in an arbitrary
1231    order. Indexing a dependency set yields the image with the specified name.
1232    """
1233
1234    def __init__(self, dependencies: Iterable[Image]):
1235        """Construct a new `DependencySet`.
1236
1237        The provided `dependencies` must be topologically sorted.
1238        """
1239        self._dependencies: dict[str, ResolvedImage] = {}
1240        dependencies = list(dependencies)
1241        known_images = docker_images() if dependencies else set()
1242        for d in dependencies:
1243            image = ResolvedImage(
1244                image=d,
1245                dependencies=(self._dependencies[d0] for d0 in d.depends_on),
1246            )
1247            image.acquired = image.spec() in known_images
1248            self._dependencies[d.name] = image
1249
1250    def _prepare_batch(self, images: list[ResolvedImage]) -> dict[type[PreImage], Any]:
1251        pre_images = collections.defaultdict(list)
1252        for image in images:
1253            for pre_image in image.image.pre_images:
1254                pre_images[type(pre_image)].append(pre_image)
1255        pre_image_prep = {}
1256        for cls, instances in pre_images.items():
1257            pre_image = cast(PreImage, cls)
1258            pre_image_prep[cls] = pre_image.prepare_batch(instances)
1259        return pre_image_prep
1260
1261    def acquire(self, max_retries: int | None = None) -> None:
1262        """Download or build all of the images in the dependency set that do not
1263        already exist locally.
1264
1265        Args:
1266            max_retries: Number of retries on failure.
1267        """
1268
1269        # Only retry in CI runs since we struggle with flaky docker pulls there
1270        if not max_retries:
1271            max_retries = (
1272                90
1273                if os.getenv("CI_WAITING_FOR_BUILD")
1274                else (
1275                    5
1276                    if ui.env_is_truthy("CI")
1277                    and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD")
1278                    else 1
1279                )
1280            )
1281        assert max_retries > 0
1282
1283        deps_to_check = [dep for dep in self if dep.publish]
1284        deps_to_build = [dep for dep in self if not dep.publish]
1285        if len(deps_to_check):
1286            with ThreadPoolExecutor(max_workers=len(deps_to_check)) as executor:
1287                futures = [
1288                    executor.submit(dep.try_pull, max_retries) for dep in deps_to_check
1289                ]
1290                for dep, future in zip(deps_to_check, futures):
1291                    try:
1292                        if not future.result():
1293                            deps_to_build.append(dep)
1294                    except Exception:
1295                        deps_to_build.append(dep)
1296
1297        # Don't attempt to build in CI, as our timeouts and small machines won't allow it anyway
1298        if ui.env_is_truthy("CI") and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD"):
1299            expected_deps = [dep for dep in deps_to_build if dep.publish]
1300            if expected_deps:
1301                print(
1302                    f"+++ Expected builds to be available, the build probably failed, so not proceeding: {expected_deps}"
1303                )
1304                sys.exit(128)
1305
1306        prep = self._prepare_batch(deps_to_build)
1307        for dep in deps_to_build:
1308            dep.build(prep)
1309
1310    def ensure(self, pre_build: Callable[[list[ResolvedImage]], None] | None = None):
1311        """Ensure all publishable images in this dependency set exist on Docker
1312        Hub.
1313
1314        Images are pushed using their spec as their tag.
1315
1316        Args:
1317            pre_build: A callback to invoke with all dependency that are going
1318                       to be built locally, invoked after their cargo build is
1319                       done, but before the Docker images are build and
1320                       uploaded to DockerHub.
1321        """
1322        num_deps = len(list(self))
1323        if not num_deps:
1324            deps_to_build = []
1325        else:
1326            with ThreadPoolExecutor(max_workers=num_deps) as executor:
1327                futures = list(
1328                    executor.map(
1329                        lambda dep: (dep, not dep.is_published_if_necessary()), self
1330                    )
1331                )
1332
1333            deps_to_build = [dep for dep, should_build in futures if should_build]
1334
1335        prep = self._prepare_batch(deps_to_build)
1336        if pre_build:
1337            pre_build(deps_to_build)
1338        lock = Lock()
1339        built_deps: set[str] = set([dep.name for dep in self]) - set(
1340            [dep.name for dep in deps_to_build]
1341        )
1342
1343        def build_dep(dep):
1344            end_time = time.time() + 600
1345            while True:
1346                if time.time() > end_time:
1347                    raise TimeoutError(
1348                        f"Timed out in {dep.name} waiting for {[dep2 for dep2 in dep.dependencies if dep2 not in built_deps]}"
1349                    )
1350                with lock:
1351                    if all(dep2 in built_deps for dep2 in dep.dependencies):
1352                        break
1353                time.sleep(0.01)
1354            for attempts_remaining in reversed(range(3)):
1355                try:
1356                    dep.build(prep, push=dep.publish)
1357                    with lock:
1358                        built_deps.add(dep.name)
1359                    break
1360                except Exception:
1361                    if not dep.publish or attempts_remaining == 0:
1362                        raise
1363
1364        if deps_to_build:
1365            with ThreadPoolExecutor(max_workers=len(deps_to_build)) as executor:
1366                futures = [executor.submit(build_dep, dep) for dep in deps_to_build]
1367                for future in as_completed(futures):
1368                    future.result()
1369
1370    def check(self) -> bool:
1371        """Check all publishable images in this dependency set exist on Docker
1372        Hub. Don't try to download or build them."""
1373        num_deps = len(list(self))
1374        if num_deps == 0:
1375            return True
1376        with ThreadPoolExecutor(max_workers=num_deps) as executor:
1377            results = list(
1378                executor.map(lambda dep: dep.is_published_if_necessary(), list(self))
1379            )
1380        return all(results)
1381
1382    def __iter__(self) -> Iterator[ResolvedImage]:
1383        return iter(self._dependencies.values())
1384
1385    def __getitem__(self, key: str) -> ResolvedImage:
1386        return self._dependencies[key]

A set of ResolvedImages.

Iterating over a dependency set yields the contained images in an arbitrary order. Indexing a dependency set yields the image with the specified name.

DependencySet(dependencies: Iterable[Image])
1234    def __init__(self, dependencies: Iterable[Image]):
1235        """Construct a new `DependencySet`.
1236
1237        The provided `dependencies` must be topologically sorted.
1238        """
1239        self._dependencies: dict[str, ResolvedImage] = {}
1240        dependencies = list(dependencies)
1241        known_images = docker_images() if dependencies else set()
1242        for d in dependencies:
1243            image = ResolvedImage(
1244                image=d,
1245                dependencies=(self._dependencies[d0] for d0 in d.depends_on),
1246            )
1247            image.acquired = image.spec() in known_images
1248            self._dependencies[d.name] = image

Construct a new DependencySet.

The provided dependencies must be topologically sorted.

def acquire(self, max_retries: int | None = None) -> None:
1261    def acquire(self, max_retries: int | None = None) -> None:
1262        """Download or build all of the images in the dependency set that do not
1263        already exist locally.
1264
1265        Args:
1266            max_retries: Number of retries on failure.
1267        """
1268
1269        # Only retry in CI runs since we struggle with flaky docker pulls there
1270        if not max_retries:
1271            max_retries = (
1272                90
1273                if os.getenv("CI_WAITING_FOR_BUILD")
1274                else (
1275                    5
1276                    if ui.env_is_truthy("CI")
1277                    and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD")
1278                    else 1
1279                )
1280            )
1281        assert max_retries > 0
1282
1283        deps_to_check = [dep for dep in self if dep.publish]
1284        deps_to_build = [dep for dep in self if not dep.publish]
1285        if len(deps_to_check):
1286            with ThreadPoolExecutor(max_workers=len(deps_to_check)) as executor:
1287                futures = [
1288                    executor.submit(dep.try_pull, max_retries) for dep in deps_to_check
1289                ]
1290                for dep, future in zip(deps_to_check, futures):
1291                    try:
1292                        if not future.result():
1293                            deps_to_build.append(dep)
1294                    except Exception:
1295                        deps_to_build.append(dep)
1296
1297        # Don't attempt to build in CI, as our timeouts and small machines won't allow it anyway
1298        if ui.env_is_truthy("CI") and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD"):
1299            expected_deps = [dep for dep in deps_to_build if dep.publish]
1300            if expected_deps:
1301                print(
1302                    f"+++ Expected builds to be available, the build probably failed, so not proceeding: {expected_deps}"
1303                )
1304                sys.exit(128)
1305
1306        prep = self._prepare_batch(deps_to_build)
1307        for dep in deps_to_build:
1308            dep.build(prep)

Download or build all of the images in the dependency set that do not already exist locally.

Args: max_retries: Number of retries on failure.

def ensure( self, pre_build: Callable[[list[ResolvedImage]], None] | None = None):
1310    def ensure(self, pre_build: Callable[[list[ResolvedImage]], None] | None = None):
1311        """Ensure all publishable images in this dependency set exist on Docker
1312        Hub.
1313
1314        Images are pushed using their spec as their tag.
1315
1316        Args:
1317            pre_build: A callback to invoke with all dependency that are going
1318                       to be built locally, invoked after their cargo build is
1319                       done, but before the Docker images are build and
1320                       uploaded to DockerHub.
1321        """
1322        num_deps = len(list(self))
1323        if not num_deps:
1324            deps_to_build = []
1325        else:
1326            with ThreadPoolExecutor(max_workers=num_deps) as executor:
1327                futures = list(
1328                    executor.map(
1329                        lambda dep: (dep, not dep.is_published_if_necessary()), self
1330                    )
1331                )
1332
1333            deps_to_build = [dep for dep, should_build in futures if should_build]
1334
1335        prep = self._prepare_batch(deps_to_build)
1336        if pre_build:
1337            pre_build(deps_to_build)
1338        lock = Lock()
1339        built_deps: set[str] = set([dep.name for dep in self]) - set(
1340            [dep.name for dep in deps_to_build]
1341        )
1342
1343        def build_dep(dep):
1344            end_time = time.time() + 600
1345            while True:
1346                if time.time() > end_time:
1347                    raise TimeoutError(
1348                        f"Timed out in {dep.name} waiting for {[dep2 for dep2 in dep.dependencies if dep2 not in built_deps]}"
1349                    )
1350                with lock:
1351                    if all(dep2 in built_deps for dep2 in dep.dependencies):
1352                        break
1353                time.sleep(0.01)
1354            for attempts_remaining in reversed(range(3)):
1355                try:
1356                    dep.build(prep, push=dep.publish)
1357                    with lock:
1358                        built_deps.add(dep.name)
1359                    break
1360                except Exception:
1361                    if not dep.publish or attempts_remaining == 0:
1362                        raise
1363
1364        if deps_to_build:
1365            with ThreadPoolExecutor(max_workers=len(deps_to_build)) as executor:
1366                futures = [executor.submit(build_dep, dep) for dep in deps_to_build]
1367                for future in as_completed(futures):
1368                    future.result()

Ensure all publishable images in this dependency set exist on Docker Hub.

Images are pushed using their spec as their tag.

Args: pre_build: A callback to invoke with all dependency that are going to be built locally, invoked after their cargo build is done, but before the Docker images are build and uploaded to DockerHub.

def check(self) -> bool:
1370    def check(self) -> bool:
1371        """Check all publishable images in this dependency set exist on Docker
1372        Hub. Don't try to download or build them."""
1373        num_deps = len(list(self))
1374        if num_deps == 0:
1375            return True
1376        with ThreadPoolExecutor(max_workers=num_deps) as executor:
1377            results = list(
1378                executor.map(lambda dep: dep.is_published_if_necessary(), list(self))
1379            )
1380        return all(results)

Check all publishable images in this dependency set exist on Docker Hub. Don't try to download or build them.

class Repository:
1389class Repository:
1390    """A collection of mzbuild `Image`s.
1391
1392    Creating a repository will walk the filesystem beneath `root` to
1393    automatically discover all contained `Image`s.
1394
1395    Iterating over a repository yields the contained images in an arbitrary
1396    order.
1397
1398    Args:
1399        root: The path to the root of the repository.
1400        arch: The CPU architecture to build for.
1401        profile: What profile to build the repository in.
1402        coverage: Whether to enable code coverage instrumentation.
1403        sanitizer: Whether to a sanitizer (address, thread, leak, memory, none)
1404        image_registry: The Docker image registry to pull images from and push
1405            images to.
1406        image_prefix: A prefix to apply to all Docker image names.
1407
1408    Attributes:
1409        images: A mapping from image name to `Image` for all contained images.
1410        compose_dirs: The set of directories containing a `mzcompose.py` file.
1411    """
1412
1413    def __init__(
1414        self,
1415        root: Path,
1416        arch: Arch = Arch.host(),
1417        profile: Profile = (
1418            Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED
1419        ),
1420        coverage: bool = False,
1421        sanitizer: Sanitizer = Sanitizer.none,
1422        image_registry: str = image_registry(),
1423        image_prefix: str = "",
1424    ):
1425        self.rd = RepositoryDetails(
1426            root,
1427            arch,
1428            profile,
1429            coverage,
1430            sanitizer,
1431            image_registry,
1432            image_prefix,
1433        )
1434        self.images: dict[str, Image] = {}
1435        self.compositions: dict[str, Path] = {}
1436        for rel_path_s in sorted(
1437            git.expand_globs(self.root, "**/mzbuild.yml", "**/mzcompose.py")
1438        ):
1439            rel_path = Path(rel_path_s)
1440            if rel_path.parts[:2] == ("misc", "python"):
1441                continue
1442
1443            parent = self.root / rel_path.parent
1444            if rel_path.name == "mzbuild.yml":
1445                image = Image(self.rd, parent)
1446                if not image.name:
1447                    raise ValueError(f"config at {parent} missing name")
1448                if image.name in self.images:
1449                    raise ValueError(f"image {image.name} exists twice")
1450                self.images[image.name] = image
1451            elif rel_path.name == "mzcompose.py":
1452                name = parent.name
1453                if name in self.compositions:
1454                    raise ValueError(f"composition {name} exists twice")
1455                self.compositions[name] = parent
1456
1457        # Validate dependencies.
1458        for image in self.images.values():
1459            for d in image.depends_on:
1460                if d not in self.images:
1461                    raise ValueError(
1462                        f"image {image.name} depends on non-existent image {d}"
1463                    )
1464
1465    @staticmethod
1466    def install_arguments(parser: argparse.ArgumentParser) -> None:
1467        """Install options to configure a repository into an argparse parser.
1468
1469        This function installs the following options:
1470
1471          * The mutually-exclusive `--dev`/`--optimized`/`--release` options to control the
1472            `profile` repository attribute.
1473          * The `--coverage` boolean option to control the `coverage` repository
1474            attribute.
1475
1476        Use `Repository.from_arguments` to construct a repository from the
1477        parsed command-line arguments.
1478        """
1479        build_mode = parser.add_mutually_exclusive_group()
1480        build_mode.add_argument(
1481            "--dev",
1482            action="store_true",
1483            help="build Rust binaries with the dev profile",
1484        )
1485        build_mode.add_argument(
1486            "--release",
1487            action="store_true",
1488            help="build Rust binaries with the release profile (default)",
1489        )
1490        build_mode.add_argument(
1491            "--optimized",
1492            action="store_true",
1493            help="build Rust binaries with the optimized profile (optimizations, no LTO, no debug symbols)",
1494        )
1495        parser.add_argument(
1496            "--coverage",
1497            help="whether to enable code coverage compilation flags",
1498            default=ui.env_is_truthy("CI_COVERAGE_ENABLED"),
1499            action="store_true",
1500        )
1501        parser.add_argument(
1502            "--sanitizer",
1503            help="whether to enable a sanitizer",
1504            default=Sanitizer[os.getenv("CI_SANITIZER", "none")],
1505            type=Sanitizer,
1506            choices=Sanitizer,
1507        )
1508
1509        def _parse_arch(s: str) -> Arch:
1510            try:
1511                return Arch(s)
1512            except ValueError:
1513                valid = ", ".join(m.value for m in Arch)
1514                raise argparse.ArgumentTypeError(
1515                    f"invalid arch: {s!r} (choose from {valid})"
1516                )
1517
1518        parser.add_argument(
1519            "--arch",
1520            default=Arch.host(),
1521            help="the CPU architecture to build for",
1522            type=_parse_arch,
1523            metavar="{" + ",".join(m.value for m in Arch) + "}",
1524        )
1525        parser.add_argument(
1526            "--image-registry",
1527            default=image_registry(),
1528            help="the Docker image registry to pull images from and push images to",
1529        )
1530        parser.add_argument(
1531            "--image-prefix",
1532            default="",
1533            help="a prefix to apply to all Docker image names",
1534        )
1535
1536    @classmethod
1537    def from_arguments(cls, root: Path, args: argparse.Namespace) -> "Repository":
1538        """Construct a repository from command-line arguments.
1539
1540        The provided namespace must contain the options installed by
1541        `Repository.install_arguments`.
1542        """
1543        if args.release:
1544            profile = Profile.RELEASE
1545        elif args.optimized:
1546            profile = Profile.OPTIMIZED
1547        elif args.dev:
1548            profile = Profile.DEV
1549        else:
1550            profile = (
1551                Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED
1552            )
1553
1554        return cls(
1555            root,
1556            profile=profile,
1557            coverage=args.coverage,
1558            sanitizer=args.sanitizer,
1559            image_registry=args.image_registry,
1560            image_prefix=args.image_prefix,
1561            arch=args.arch,
1562        )
1563
1564    @property
1565    def root(self) -> Path:
1566        """The path to the root directory for the repository."""
1567        return self.rd.root
1568
1569    def resolve_dependencies(self, targets: Iterable[Image]) -> DependencySet:
1570        """Compute the dependency set necessary to build target images.
1571
1572        The dependencies of `targets` will be crawled recursively until the
1573        complete set of transitive dependencies is determined or a circular
1574        dependency is discovered. The returned dependency set will be sorted
1575        in topological order.
1576
1577        Raises:
1578           ValueError: A circular dependency was discovered in the images
1579               in the repository.
1580        """
1581        # Pre-fetch all crate input files in a single batched git call,
1582        # replacing ~118 individual subprocess pairs with one pair.
1583        self.rd.cargo_workspace.precompute_crate_inputs()
1584        # Pre-fetch all image context files in a single batched git call,
1585        # replacing ~41 individual subprocess pairs with one pair.
1586        self._precompute_image_context_files()
1587
1588        resolved = OrderedDict()
1589        visiting = set()
1590
1591        def visit(image: Image, path: list[str] = []) -> None:
1592            if image.name in resolved:
1593                return
1594            if image.name in visiting:
1595                diagram = " -> ".join(path + [image.name])
1596                raise ValueError(f"circular dependency in mzbuild: {diagram}")
1597
1598            visiting.add(image.name)
1599            for d in sorted(image.depends_on):
1600                visit(self.images[d], path + [image.name])
1601            resolved[image.name] = image
1602
1603        for target_image in sorted(targets, key=lambda image: image.name):
1604            visit(target_image)
1605
1606        return DependencySet(resolved.values())
1607
1608    def _precompute_image_context_files(self) -> None:
1609        """Pre-fetch all image context files in a single batched git call.
1610
1611        This replaces ~41 individual pairs of git subprocess calls (one per
1612        image) with a single pair, then partitions the results by image path.
1613        """
1614        root = self.rd.root
1615        # Use paths relative to root for git specs and partitioning, since
1616        # git --relative outputs paths relative to cwd (root). Image paths
1617        # may be absolute when MZ_ROOT is an absolute path.
1618        image_rel_paths = sorted(
1619            set(str(img.path.relative_to(root)) for img in self.images.values())
1620        )
1621        specs = [f"{p}/**" for p in image_rel_paths]
1622
1623        empty_tree = "4b825dc642cb6eb9a060e54bf8d69288fbee4904"
1624        diff_files = spawn.capture(
1625            ["git", "diff", "--name-only", "-z", "--relative", empty_tree, "--"]
1626            + specs,
1627            cwd=root,
1628        )
1629        ls_files = spawn.capture(
1630            ["git", "ls-files", "--others", "--exclude-standard", "-z", "--"] + specs,
1631            cwd=root,
1632        )
1633        all_files = set(
1634            f for f in (diff_files + ls_files).split("\0") if f.strip() != ""
1635        )
1636
1637        # Partition files by image path (longest match first for nested paths)
1638        image_file_map: dict[str, set[str]] = {p: set() for p in image_rel_paths}
1639        sorted_paths = sorted(image_rel_paths, key=len, reverse=True)
1640        for f in all_files:
1641            for ip in sorted_paths:
1642                if f.startswith(ip + "/"):
1643                    image_file_map[ip].add(f)
1644                    break
1645
1646        for img in self.images.values():
1647            rel = str(img.path.relative_to(root))
1648            img._context_files_cache = image_file_map.get(rel, set())
1649
1650    def __iter__(self) -> Iterator[Image]:
1651        return iter(self.images.values())

A collection of mzbuild Images.

Creating a repository will walk the filesystem beneath root to automatically discover all contained Images.

Iterating over a repository yields the contained images in an arbitrary order.

Args: root: The path to the root of the repository. arch: The CPU architecture to build for. profile: What profile to build the repository in. coverage: Whether to enable code coverage instrumentation. sanitizer: Whether to a sanitizer (address, thread, leak, memory, none) image_registry: The Docker image registry to pull images from and push images to. image_prefix: A prefix to apply to all Docker image names.

Attributes: images: A mapping from image name to Image for all contained images. compose_dirs: The set of directories containing a mzcompose.py file.

Repository( root: pathlib.Path, arch: materialize.xcompile.Arch = <Arch.X86_64: 'x86_64'>, profile: Profile = <Profile.OPTIMIZED: 2>, coverage: bool = False, sanitizer: materialize.rustc_flags.Sanitizer = <Sanitizer.none: 'none'>, image_registry: str = 'ghcr.io/materializeinc/materialize', image_prefix: str = '')
1413    def __init__(
1414        self,
1415        root: Path,
1416        arch: Arch = Arch.host(),
1417        profile: Profile = (
1418            Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED
1419        ),
1420        coverage: bool = False,
1421        sanitizer: Sanitizer = Sanitizer.none,
1422        image_registry: str = image_registry(),
1423        image_prefix: str = "",
1424    ):
1425        self.rd = RepositoryDetails(
1426            root,
1427            arch,
1428            profile,
1429            coverage,
1430            sanitizer,
1431            image_registry,
1432            image_prefix,
1433        )
1434        self.images: dict[str, Image] = {}
1435        self.compositions: dict[str, Path] = {}
1436        for rel_path_s in sorted(
1437            git.expand_globs(self.root, "**/mzbuild.yml", "**/mzcompose.py")
1438        ):
1439            rel_path = Path(rel_path_s)
1440            if rel_path.parts[:2] == ("misc", "python"):
1441                continue
1442
1443            parent = self.root / rel_path.parent
1444            if rel_path.name == "mzbuild.yml":
1445                image = Image(self.rd, parent)
1446                if not image.name:
1447                    raise ValueError(f"config at {parent} missing name")
1448                if image.name in self.images:
1449                    raise ValueError(f"image {image.name} exists twice")
1450                self.images[image.name] = image
1451            elif rel_path.name == "mzcompose.py":
1452                name = parent.name
1453                if name in self.compositions:
1454                    raise ValueError(f"composition {name} exists twice")
1455                self.compositions[name] = parent
1456
1457        # Validate dependencies.
1458        for image in self.images.values():
1459            for d in image.depends_on:
1460                if d not in self.images:
1461                    raise ValueError(
1462                        f"image {image.name} depends on non-existent image {d}"
1463                    )
rd
images: dict[str, Image]
compositions: dict[str, pathlib.Path]
@staticmethod
def install_arguments(parser: argparse.ArgumentParser) -> None:
1465    @staticmethod
1466    def install_arguments(parser: argparse.ArgumentParser) -> None:
1467        """Install options to configure a repository into an argparse parser.
1468
1469        This function installs the following options:
1470
1471          * The mutually-exclusive `--dev`/`--optimized`/`--release` options to control the
1472            `profile` repository attribute.
1473          * The `--coverage` boolean option to control the `coverage` repository
1474            attribute.
1475
1476        Use `Repository.from_arguments` to construct a repository from the
1477        parsed command-line arguments.
1478        """
1479        build_mode = parser.add_mutually_exclusive_group()
1480        build_mode.add_argument(
1481            "--dev",
1482            action="store_true",
1483            help="build Rust binaries with the dev profile",
1484        )
1485        build_mode.add_argument(
1486            "--release",
1487            action="store_true",
1488            help="build Rust binaries with the release profile (default)",
1489        )
1490        build_mode.add_argument(
1491            "--optimized",
1492            action="store_true",
1493            help="build Rust binaries with the optimized profile (optimizations, no LTO, no debug symbols)",
1494        )
1495        parser.add_argument(
1496            "--coverage",
1497            help="whether to enable code coverage compilation flags",
1498            default=ui.env_is_truthy("CI_COVERAGE_ENABLED"),
1499            action="store_true",
1500        )
1501        parser.add_argument(
1502            "--sanitizer",
1503            help="whether to enable a sanitizer",
1504            default=Sanitizer[os.getenv("CI_SANITIZER", "none")],
1505            type=Sanitizer,
1506            choices=Sanitizer,
1507        )
1508
1509        def _parse_arch(s: str) -> Arch:
1510            try:
1511                return Arch(s)
1512            except ValueError:
1513                valid = ", ".join(m.value for m in Arch)
1514                raise argparse.ArgumentTypeError(
1515                    f"invalid arch: {s!r} (choose from {valid})"
1516                )
1517
1518        parser.add_argument(
1519            "--arch",
1520            default=Arch.host(),
1521            help="the CPU architecture to build for",
1522            type=_parse_arch,
1523            metavar="{" + ",".join(m.value for m in Arch) + "}",
1524        )
1525        parser.add_argument(
1526            "--image-registry",
1527            default=image_registry(),
1528            help="the Docker image registry to pull images from and push images to",
1529        )
1530        parser.add_argument(
1531            "--image-prefix",
1532            default="",
1533            help="a prefix to apply to all Docker image names",
1534        )

Install options to configure a repository into an argparse parser.

This function installs the following options:

  • The mutually-exclusive --dev/--optimized/--release options to control the profile repository attribute.
  • The --coverage boolean option to control the coverage repository attribute.

Use Repository.from_arguments to construct a repository from the parsed command-line arguments.

@classmethod
def from_arguments( cls, root: pathlib.Path, args: argparse.Namespace) -> Repository:
1536    @classmethod
1537    def from_arguments(cls, root: Path, args: argparse.Namespace) -> "Repository":
1538        """Construct a repository from command-line arguments.
1539
1540        The provided namespace must contain the options installed by
1541        `Repository.install_arguments`.
1542        """
1543        if args.release:
1544            profile = Profile.RELEASE
1545        elif args.optimized:
1546            profile = Profile.OPTIMIZED
1547        elif args.dev:
1548            profile = Profile.DEV
1549        else:
1550            profile = (
1551                Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED
1552            )
1553
1554        return cls(
1555            root,
1556            profile=profile,
1557            coverage=args.coverage,
1558            sanitizer=args.sanitizer,
1559            image_registry=args.image_registry,
1560            image_prefix=args.image_prefix,
1561            arch=args.arch,
1562        )

Construct a repository from command-line arguments.

The provided namespace must contain the options installed by Repository.install_arguments.

root: pathlib.Path
1564    @property
1565    def root(self) -> Path:
1566        """The path to the root directory for the repository."""
1567        return self.rd.root

The path to the root directory for the repository.

def resolve_dependencies( self, targets: Iterable[Image]) -> DependencySet:
1569    def resolve_dependencies(self, targets: Iterable[Image]) -> DependencySet:
1570        """Compute the dependency set necessary to build target images.
1571
1572        The dependencies of `targets` will be crawled recursively until the
1573        complete set of transitive dependencies is determined or a circular
1574        dependency is discovered. The returned dependency set will be sorted
1575        in topological order.
1576
1577        Raises:
1578           ValueError: A circular dependency was discovered in the images
1579               in the repository.
1580        """
1581        # Pre-fetch all crate input files in a single batched git call,
1582        # replacing ~118 individual subprocess pairs with one pair.
1583        self.rd.cargo_workspace.precompute_crate_inputs()
1584        # Pre-fetch all image context files in a single batched git call,
1585        # replacing ~41 individual subprocess pairs with one pair.
1586        self._precompute_image_context_files()
1587
1588        resolved = OrderedDict()
1589        visiting = set()
1590
1591        def visit(image: Image, path: list[str] = []) -> None:
1592            if image.name in resolved:
1593                return
1594            if image.name in visiting:
1595                diagram = " -> ".join(path + [image.name])
1596                raise ValueError(f"circular dependency in mzbuild: {diagram}")
1597
1598            visiting.add(image.name)
1599            for d in sorted(image.depends_on):
1600                visit(self.images[d], path + [image.name])
1601            resolved[image.name] = image
1602
1603        for target_image in sorted(targets, key=lambda image: image.name):
1604            visit(target_image)
1605
1606        return DependencySet(resolved.values())

Compute the dependency set necessary to build target images.

The dependencies of targets will be crawled recursively until the complete set of transitive dependencies is determined or a circular dependency is discovered. The returned dependency set will be sorted in topological order.

Raises: ValueError: A circular dependency was discovered in the images in the repository.

def publish_multiarch_images( tag: str, dependency_sets: Iterable[Iterable[ResolvedImage]]) -> None:
1654def publish_multiarch_images(
1655    tag: str, dependency_sets: Iterable[Iterable[ResolvedImage]]
1656) -> None:
1657    """Publishes a set of docker images under a given tag."""
1658    always_push_tags = ("latest", "unstable")
1659    if ghcr_token := os.getenv("GITHUB_GHCR_TOKEN"):
1660        spawn.runv(
1661            [
1662                "docker",
1663                "login",
1664                "ghcr.io",
1665                "-u",
1666                "materialize-bot",
1667                "--password-stdin",
1668            ],
1669            stdin=ghcr_token.encode(),
1670        )
1671    for images in zip(*dependency_sets):
1672        names = set(image.image.name for image in images)
1673        assert len(names) == 1, "dependency sets did not contain identical images"
1674        name = images[0].image.docker_name(tag)
1675        if tag in always_push_tags or not is_docker_image_pushed(name):
1676            spawn.run_with_retries(
1677                lambda: (
1678                    spawn.runv(
1679                        [
1680                            "docker",
1681                            "manifest",
1682                            "create",
1683                            "--amend",
1684                            name,
1685                            *(image.spec() for image in images),
1686                        ]
1687                    ),
1688                    spawn.runv(["docker", "manifest", "push", name]),
1689                )
1690            )
1691
1692        ghcr_name = f"{GHCR_PREFIX}{name}"
1693        if ghcr_token and (
1694            tag in always_push_tags or not is_ghcr_image_pushed(ghcr_name)
1695        ):
1696            spawn.run_with_retries(
1697                lambda: (
1698                    spawn.runv(
1699                        [
1700                            "docker",
1701                            "manifest",
1702                            "create",
1703                            "--amend",
1704                            ghcr_name,
1705                            *(f"{GHCR_PREFIX}{image.spec()}" for image in images),
1706                        ]
1707                    ),
1708                    spawn.runv(["docker", "manifest", "push", ghcr_name]),
1709                )
1710            )
1711    print(f"--- Nofifying for tag {tag}")
1712    markdown = f"""Pushed images with Docker tag `{tag}`"""
1713    spawn.runv(
1714        [
1715            "buildkite-agent",
1716            "annotate",
1717            "--style=info",
1718            f"--context=build-tags-{tag}",
1719        ],
1720        stdin=markdown.encode(),
1721    )

Publishes a set of docker images under a given tag.