misc.python.materialize.mzbuild

The implementation of the mzbuild system for Docker images.

For an overview of what mzbuild is and why it exists, see the user-facing documentation.

   1# Copyright Materialize, Inc. and contributors. All rights reserved.
   2#
   3# Use of this software is governed by the Business Source License
   4# included in the LICENSE file at the root of this repository.
   5#
   6# As of the Change Date specified in that file, in accordance with
   7# the Business Source License, use of this software will be governed
   8# by the Apache License, Version 2.0.
   9
  10"""The implementation of the mzbuild system for Docker images.
  11
  12For an overview of what mzbuild is and why it exists, see the [user-facing
  13documentation][user-docs].
  14
  15[user-docs]: https://github.com/MaterializeInc/materialize/blob/main/doc/developer/mzbuild.md
  16"""
  17
  18import argparse
  19import base64
  20import collections
  21import hashlib
  22import io
  23import json
  24import multiprocessing
  25import os
  26import platform
  27import re
  28import selectors
  29import shutil
  30import stat
  31import subprocess
  32import sys
  33import time
  34from collections import OrderedDict
  35from collections.abc import Callable, Iterable, Iterator, Sequence
  36from concurrent.futures import ThreadPoolExecutor, as_completed
  37from enum import Enum, auto
  38from functools import cache
  39from pathlib import Path
  40from tempfile import TemporaryFile
  41from threading import Lock
  42from typing import IO, Any, cast
  43
  44import requests
  45import yaml
  46from requests.auth import HTTPBasicAuth
  47
  48from materialize import MZ_ROOT, buildkite, cargo, git, rustc_flags, spawn, ui, xcompile
  49from materialize.docker import image_registry
  50from materialize.rustc_flags import Sanitizer
  51from materialize.xcompile import Arch, target
  52
  53GHCR_PREFIX = "ghcr.io/materializeinc/"
  54
  55
  56class RustIncrementalBuildFailure(Exception):
  57    pass
  58
  59
  60def run_and_detect_rust_incremental_build_failure(
  61    cmd: list[str], cwd: str | Path
  62) -> subprocess.CompletedProcess:
  63    """This function is complex since it prints out each line immediately to
  64    stdout/stderr, but still records them at the same time so that we can scan
  65    for known incremental build failures."""
  66    stdout_result = io.StringIO()
  67    stderr_result = io.StringIO()
  68    p = subprocess.Popen(
  69        cmd,
  70        stdout=subprocess.PIPE,
  71        stderr=subprocess.PIPE,
  72        text=True,
  73        bufsize=1,
  74        env={**os.environ, "CARGO_TERM_COLOR": "always", "RUSTC_COLOR": "always"},
  75    )
  76
  77    sel = selectors.DefaultSelector()
  78    sel.register(p.stdout, selectors.EVENT_READ)  # type: ignore
  79    sel.register(p.stderr, selectors.EVENT_READ)  # type: ignore
  80    assert p.stdout is not None
  81    assert p.stderr is not None
  82    os.set_blocking(p.stdout.fileno(), False)
  83    os.set_blocking(p.stderr.fileno(), False)
  84    running = True
  85    while running:
  86        for key, val in sel.select():
  87            output = io.StringIO()
  88            running = False
  89            while True:
  90                new_output = key.fileobj.read(1024)  # type: ignore
  91                if not new_output:
  92                    break
  93                output.write(new_output)
  94            contents = output.getvalue()
  95            output.close()
  96            if not contents:
  97                continue
  98            # Keep running as long as stdout or stderr have any content
  99            running = True
 100            if key.fileobj is p.stdout:
 101                print(
 102                    contents,
 103                    end="",
 104                    flush=True,
 105                )
 106                stdout_result.write(contents)
 107            else:
 108                print(
 109                    contents,
 110                    end="",
 111                    file=sys.stderr,
 112                    flush=True,
 113                )
 114                stderr_result.write(contents)
 115    p.wait()
 116    retcode = p.poll()
 117    assert retcode is not None
 118    stdout_contents = stdout_result.getvalue()
 119    stdout_result.close()
 120    stderr_contents = stderr_result.getvalue()
 121    stderr_result.close()
 122    if retcode:
 123        incremental_build_failure_msgs = [
 124            "panicked at compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs",
 125            "ld.lld: error: undefined symbol",
 126            "signal: 11, SIGSEGV",
 127        ]
 128        combined = stdout_contents + stderr_contents
 129        if any(msg in combined for msg in incremental_build_failure_msgs):
 130            raise RustIncrementalBuildFailure()
 131
 132        raise subprocess.CalledProcessError(
 133            retcode, p.args, output=stdout_contents, stderr=stderr_contents
 134        )
 135    return subprocess.CompletedProcess(
 136        p.args, retcode, stdout_contents, stderr_contents
 137    )
 138
 139
 140class Fingerprint(bytes):
 141    """A SHA-1 hash of the inputs to an `Image`.
 142
 143    The string representation uses base32 encoding to distinguish mzbuild
 144    fingerprints from Git's hex encoded SHA-1 hashes while still being
 145    URL safe.
 146    """
 147
 148    def __str__(self) -> str:
 149        return base64.b32encode(self).decode()
 150
 151
 152class Profile(Enum):
 153    RELEASE = auto()
 154    OPTIMIZED = auto()
 155    DEV = auto()
 156
 157
 158class RepositoryDetails:
 159    """Immutable details about a `Repository`.
 160
 161    Used internally by mzbuild.
 162
 163    Attributes:
 164        root: The path to the root of the repository.
 165        arch: The CPU architecture to build for.
 166        profile: What profile the repository is being built with.
 167        coverage: Whether the repository has code coverage instrumentation
 168            enabled.
 169        sanitizer: Whether to use a sanitizer (address, hwaddress, cfi, thread, leak, memory, none)
 170        cargo_workspace: The `cargo.Workspace` associated with the repository.
 171        image_registry: The Docker image registry to pull images from and push
 172            images to.
 173        image_prefix: A prefix to apply to all Docker image names.
 174    """
 175
 176    def __init__(
 177        self,
 178        root: Path,
 179        arch: Arch,
 180        profile: Profile,
 181        coverage: bool,
 182        sanitizer: Sanitizer,
 183        image_registry: str,
 184        image_prefix: str,
 185    ):
 186        self.root = root
 187        self.arch = arch
 188        self.profile = profile
 189        self.coverage = coverage
 190        self.sanitizer = sanitizer
 191        self.cargo_workspace = cargo.Workspace(root)
 192        self.image_registry = image_registry
 193        self.image_prefix = image_prefix
 194
 195    def build(
 196        self,
 197        subcommand: str,
 198        rustflags: list[str],
 199        channel: str | None = None,
 200        extra_env: dict[str, str] = {},
 201    ) -> list[str]:
 202        """Start a build invocation for the configured architecture."""
 203        return xcompile.cargo(
 204            arch=self.arch,
 205            channel=channel,
 206            subcommand=subcommand,
 207            rustflags=rustflags,
 208            extra_env=extra_env,
 209        )
 210
 211    def tool(self, name: str) -> list[str]:
 212        """Start a binutils tool invocation for the configured architecture."""
 213        if platform.system() != "Linux":
 214            # We can't use the local tools from macOS to build a Linux executable
 215            return ["bin/ci-builder", "run", "stable", name]
 216        # If we're on Linux, trust that the tools are installed instead of
 217        # loading the slow ci-builder. If you don't have compilation tools
 218        # installed you can still run `bin/ci-builder run stable
 219        # bin/mzcompose ...`, and most likely the Cargo build will already
 220        # fail earlier if you don't have compilation tools installed and
 221        # run without the ci-builder.
 222        return [name]
 223
 224    def cargo_target_dir(self) -> Path:
 225        """Determine the path to the target directory for Cargo."""
 226        return self.root / "target-xcompile" / xcompile.target(self.arch)
 227
 228    def rewrite_builder_path_for_host(self, path: Path) -> Path:
 229        """Rewrite a path that is relative to the target directory inside the
 230        builder to a path that is relative to the target directory on the host.
 231
 232        If path does is not relative to the target directory inside the builder,
 233        it is returned unchanged.
 234        """
 235        builder_target_dir = Path("/mnt/build") / xcompile.target(self.arch)
 236        try:
 237            return self.cargo_target_dir() / path.relative_to(builder_target_dir)
 238        except ValueError:
 239            return path
 240
 241
 242@cache
 243def docker_images() -> frozenset[str]:
 244    """List the Docker images available on the local machine."""
 245    return frozenset(
 246        spawn.capture(["docker", "images", "--format", "{{.Repository}}:{{.Tag}}"])
 247        .strip()
 248        .split("\n")
 249    )
 250
 251
 252KNOWN_DOCKER_IMAGES_FILE = Path(MZ_ROOT / "known-docker-images.txt")
 253_known_docker_images: set[str] | None = None
 254_known_docker_images_lock = Lock()
 255
 256
 257def is_docker_image_pushed(name: str) -> bool:
 258    """Check whether the named image is pushed to Docker Hub.
 259
 260    Note that this operation requires a rather slow network request.
 261    """
 262    global _known_docker_images
 263
 264    if _known_docker_images is None:
 265        with _known_docker_images_lock:
 266            if not KNOWN_DOCKER_IMAGES_FILE.exists():
 267                _known_docker_images = set()
 268            else:
 269                with KNOWN_DOCKER_IMAGES_FILE.open() as f:
 270                    _known_docker_images = set(line.strip() for line in f)
 271
 272    if name in _known_docker_images:
 273        return True
 274
 275    if ":" not in name:
 276        image, tag = name, "latest"
 277    else:
 278        image, tag = name.rsplit(":", 1)
 279
 280    dockerhub_username = os.getenv("DOCKERHUB_USERNAME")
 281    dockerhub_token = os.getenv("DOCKERHUB_ACCESS_TOKEN")
 282
 283    exists: bool = False
 284
 285    try:
 286        if dockerhub_username and dockerhub_token:
 287            response = requests.head(
 288                f"https://registry-1.docker.io/v2/{image}/manifests/{tag}",
 289                headers={
 290                    "Accept": "application/vnd.docker.distribution.manifest.v2+json",
 291                },
 292                auth=HTTPBasicAuth(dockerhub_username, dockerhub_token),
 293            )
 294        else:
 295            token = requests.get(
 296                "https://auth.docker.io/token",
 297                params={
 298                    "service": "registry.docker.io",
 299                    "scope": f"repository:{image}:pull",
 300                },
 301            ).json()["token"]
 302            response = requests.head(
 303                f"https://registry-1.docker.io/v2/{image}/manifests/{tag}",
 304                headers={
 305                    "Accept": "application/vnd.docker.distribution.manifest.v2+json",
 306                    "Authorization": f"Bearer {token}",
 307                },
 308            )
 309
 310        if response.status_code in (401, 429, 500, 502, 503, 504):
 311            # Fall back to 5x slower method
 312            proc = subprocess.run(
 313                ["docker", "manifest", "inspect", name],
 314                stdout=subprocess.DEVNULL,
 315                stderr=subprocess.DEVNULL,
 316                env=dict(os.environ, DOCKER_CLI_EXPERIMENTAL="enabled"),
 317            )
 318            exists = proc.returncode == 0
 319        else:
 320            exists = response.status_code == 200
 321
 322    except Exception as e:
 323        print(f"Error checking Docker image: {e}")
 324        return False
 325
 326    if exists:
 327        with _known_docker_images_lock:
 328            _known_docker_images.add(name)
 329            with KNOWN_DOCKER_IMAGES_FILE.open("a") as f:
 330                print(name, file=f)
 331
 332    return exists
 333
 334
 335def is_ghcr_image_pushed(name: str) -> bool:
 336    global _known_docker_images
 337
 338    if _known_docker_images is None:
 339        with _known_docker_images_lock:
 340            if not KNOWN_DOCKER_IMAGES_FILE.exists():
 341                _known_docker_images = set()
 342            else:
 343                with KNOWN_DOCKER_IMAGES_FILE.open() as f:
 344                    _known_docker_images = set(line.strip() for line in f)
 345
 346    name_without_ghcr = name.removeprefix("ghcr.io/")
 347    if name in _known_docker_images:
 348        return True
 349
 350    if ":" not in name_without_ghcr:
 351        image, tag = name_without_ghcr, "latest"
 352    else:
 353        image, tag = name_without_ghcr.rsplit(":", 1)
 354
 355    exists: bool = False
 356
 357    try:
 358        token = requests.get(
 359            "https://ghcr.io/token",
 360            params={
 361                "scope": f"repository:{image}:pull",
 362            },
 363        ).json()["token"]
 364        response = requests.head(
 365            f"https://ghcr.io/v2/{image}/manifests/{tag}",
 366            headers={"Authorization": f"Bearer {token}"},
 367        )
 368
 369        if response.status_code in (401, 429, 500, 502, 503, 504):
 370            # Fall back to 5x slower method
 371            proc = subprocess.run(
 372                ["docker", "manifest", "inspect", name],
 373                stdout=subprocess.DEVNULL,
 374                stderr=subprocess.DEVNULL,
 375                env=dict(os.environ, DOCKER_CLI_EXPERIMENTAL="enabled"),
 376            )
 377            exists = proc.returncode == 0
 378        else:
 379            exists = response.status_code == 200
 380
 381    except Exception as e:
 382        print(f"Error checking Docker image: {e}")
 383        return False
 384
 385    if exists:
 386        with _known_docker_images_lock:
 387            _known_docker_images.add(name)
 388            with KNOWN_DOCKER_IMAGES_FILE.open("a") as f:
 389                print(name, file=f)
 390
 391    return exists
 392
 393
 394def chmod_x(path: Path) -> None:
 395    """Set the executable bit on a file or directory."""
 396    # https://stackoverflow.com/a/30463972/1122351
 397    mode = os.stat(path).st_mode
 398    mode |= (mode & 0o444) >> 2  # copy R bits to X
 399    os.chmod(path, mode)
 400
 401
 402class PreImage:
 403    """An action to run before building a Docker image.
 404
 405    Args:
 406        rd: The `RepositoryDetails` for the repository.
 407        path: The path to the `Image` associated with this action.
 408    """
 409
 410    def __init__(self, rd: RepositoryDetails, path: Path):
 411        self.rd = rd
 412        self.path = path
 413
 414    @classmethod
 415    def prepare_batch(cls, instances: list["PreImage"]) -> Any:
 416        """Prepare a batch of actions.
 417
 418        This is useful for `PreImage` actions that are more efficient when
 419        their actions are applied to several images in bulk.
 420
 421        Returns an arbitrary output that is passed to `PreImage.run`.
 422        """
 423        pass
 424
 425    def run(self, prep: Any) -> None:
 426        """Perform the action.
 427
 428        Args:
 429            prep: Any prep work returned by `prepare_batch`.
 430        """
 431        pass
 432
 433    def inputs(self) -> set[str]:
 434        """Return the files which are considered inputs to the action."""
 435        raise NotImplementedError
 436
 437    def extra(self) -> str:
 438        """Returns additional data for incorporation in the fingerprint."""
 439        return ""
 440
 441
 442class Copy(PreImage):
 443    """A `PreImage` action which copies files from a directory.
 444
 445    See doc/developer/mzbuild.md for an explanation of the user-facing
 446    parameters.
 447    """
 448
 449    def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]):
 450        super().__init__(rd, path)
 451
 452        self.source = config.pop("source", None)
 453        if self.source is None:
 454            raise ValueError("mzbuild config is missing 'source' argument")
 455
 456        self.destination = config.pop("destination", None)
 457        if self.destination is None:
 458            raise ValueError("mzbuild config is missing 'destination' argument")
 459
 460        self.matching = config.pop("matching", "*")
 461
 462    def run(self, prep: Any) -> None:
 463        super().run(prep)
 464        for src in self.inputs():
 465            dst = self.path / self.destination / src
 466            dst.parent.mkdir(parents=True, exist_ok=True)
 467            shutil.copy(self.rd.root / self.source / src, dst)
 468
 469    def inputs(self) -> set[str]:
 470        return set(git.expand_globs(self.rd.root / self.source, self.matching))
 471
 472
 473class CargoPreImage(PreImage):
 474    """A `PreImage` action that uses Cargo."""
 475
 476    @staticmethod
 477    @cache
 478    def _cargo_shared_inputs() -> frozenset[str]:
 479        """Resolve shared Cargo inputs once and cache the result.
 480
 481        This expands the 'ci/builder' directory glob and filters out
 482        non-existent files like '.cargo/config', avoiding repeated
 483        git subprocess calls in fingerprint().
 484        """
 485        inputs: set[str] = set()
 486        inputs |= git.expand_globs(Path("."), "ci/builder/**")
 487        inputs.add("Cargo.toml")
 488        inputs.add("Cargo.lock")
 489        if Path(".cargo/config").exists():
 490            inputs.add(".cargo/config")
 491        return frozenset(inputs)
 492
 493    def inputs(self) -> set[str]:
 494        return set(CargoPreImage._cargo_shared_inputs())
 495
 496    def extra(self) -> str:
 497        # Cargo images depend on the release mode and whether
 498        # coverage/sanitizer is enabled.
 499        flags: list[str] = []
 500        if self.rd.profile == Profile.RELEASE:
 501            flags += "release"
 502        if self.rd.profile == Profile.OPTIMIZED:
 503            flags += "optimized"
 504        if self.rd.coverage:
 505            flags += "coverage"
 506        if self.rd.sanitizer != Sanitizer.none:
 507            flags += self.rd.sanitizer.value
 508        flags.sort()
 509        return ",".join(flags)
 510
 511
 512class CargoBuild(CargoPreImage):
 513    """A `PreImage` action that builds a single binary with Cargo.
 514
 515    See doc/developer/mzbuild.md for an explanation of the user-facing
 516    parameters.
 517    """
 518
 519    def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]):
 520        super().__init__(rd, path)
 521        bin = config.pop("bin", [])
 522        self.bins = bin if isinstance(bin, list) else [bin]
 523        example = config.pop("example", [])
 524        self.examples = example if isinstance(example, list) else [example]
 525        self.strip = config.pop("strip", True)
 526        self.extract = config.pop("extract", {})
 527        features = config.pop("features", [])
 528        self.features = features if isinstance(features, list) else [features]
 529
 530        if len(self.bins) == 0 and len(self.examples) == 0:
 531            raise ValueError("mzbuild config is missing pre-build target")
 532
 533    @staticmethod
 534    def generate_cargo_build_command(
 535        rd: RepositoryDetails,
 536        bins: list[str],
 537        examples: list[str],
 538        features: list[str] | None = None,
 539    ) -> list[str]:
 540        rustflags = (
 541            rustc_flags.coverage
 542            if rd.coverage
 543            else (
 544                rustc_flags.sanitizer[rd.sanitizer]
 545                if rd.sanitizer != Sanitizer.none
 546                else ["--cfg=tokio_unstable"]
 547            )
 548        )
 549        cflags = (
 550            [
 551                f"--target={target(rd.arch)}",
 552                f"--gcc-toolchain=/opt/x-tools/{target(rd.arch)}/",
 553                "-fuse-ld=lld",
 554                f"--sysroot=/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/sysroot",
 555                f"-L/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/lib64",
 556            ]
 557            + rustc_flags.sanitizer_cflags[rd.sanitizer]
 558            if rd.sanitizer != Sanitizer.none
 559            else []
 560        )
 561        extra_env = (
 562            {
 563                "CFLAGS": " ".join(cflags),
 564                "CXXFLAGS": " ".join(cflags),
 565                "LDFLAGS": " ".join(cflags),
 566                "CXXSTDLIB": "stdc++",
 567                "CC": "cc",
 568                "CXX": "c++",
 569                "CPP": "clang-cpp-18",
 570                "CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER": "cc",
 571                "CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER": "cc",
 572                "PATH": f"/sanshim:/opt/x-tools/{target(rd.arch)}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
 573                "TSAN_OPTIONS": "report_bugs=0",  # build-scripts fail
 574            }
 575            if rd.sanitizer != Sanitizer.none
 576            else {}
 577        )
 578
 579        cargo_build = rd.build(
 580            "build", channel=None, rustflags=rustflags, extra_env=extra_env
 581        )
 582
 583        packages = set()
 584        for bin in bins:
 585            cargo_build.extend(["--bin", bin])
 586            packages.add(rd.cargo_workspace.crate_for_bin(bin).name)
 587        for example in examples:
 588            cargo_build.extend(["--example", example])
 589            packages.add(rd.cargo_workspace.crate_for_example(example).name)
 590        cargo_build.extend(f"--package={p}" for p in packages)
 591
 592        if rd.profile == Profile.RELEASE:
 593            cargo_build.append("--release")
 594        if rd.profile == Profile.OPTIMIZED:
 595            cargo_build.extend(["--profile", "optimized"])
 596        if rd.sanitizer != Sanitizer.none:
 597            # ASan doesn't work with jemalloc
 598            cargo_build.append("--no-default-features")
 599            # Uses more memory, so reduce the number of jobs
 600            cargo_build.extend(
 601                ["--jobs", str(round(multiprocessing.cpu_count() * 2 / 3))]
 602            )
 603        if features:
 604            cargo_build.append(f"--features={','.join(features)}")
 605
 606        return cargo_build
 607
 608    @classmethod
 609    def prepare_batch(cls, cargo_builds: list["PreImage"]) -> dict[str, Any]:
 610        super().prepare_batch(cargo_builds)
 611
 612        if not cargo_builds:
 613            return {}
 614
 615        # Building all binaries and examples in the same `cargo build` command
 616        # allows Cargo to link in parallel with other work, which can
 617        # meaningfully speed up builds.
 618
 619        rd: RepositoryDetails | None = None
 620        builds = cast(list[CargoBuild], cargo_builds)
 621        bins = set()
 622        examples = set()
 623        features = set()
 624        for build in builds:
 625            if not rd:
 626                rd = build.rd
 627            bins.update(build.bins)
 628            examples.update(build.examples)
 629            features.update(build.features)
 630        assert rd
 631
 632        ui.section(f"Common build for: {', '.join(bins | examples)}")
 633
 634        cargo_build = cls.generate_cargo_build_command(
 635            rd, list(bins), list(examples), list(features) if features else None
 636        )
 637
 638        run_and_detect_rust_incremental_build_failure(cargo_build, cwd=rd.root)
 639
 640        # Re-run with JSON-formatted messages and capture the output so we can
 641        # later analyze the build artifacts in `run`. This should be nearly
 642        # instantaneous since we just compiled above with the same crates and
 643        # features. (We don't want to do the compile above with JSON-formatted
 644        # messages because it wouldn't be human readable.)
 645        json_output = spawn.capture(
 646            cargo_build + ["--message-format=json"],
 647            cwd=rd.root,
 648        )
 649        prep = {"cargo": json_output}
 650
 651        return prep
 652
 653    def build(self, build_output: dict[str, Any]) -> None:
 654        cargo_profile = (
 655            "release"
 656            if self.rd.profile == Profile.RELEASE
 657            else "optimized" if self.rd.profile == Profile.OPTIMIZED else "debug"
 658        )
 659
 660        def copy(src: Path, relative_dst: Path) -> None:
 661            exe_path = self.path / relative_dst
 662            exe_path.parent.mkdir(parents=True, exist_ok=True)
 663            shutil.copy(src, exe_path)
 664
 665            if self.strip:
 666                # The debug information is large enough that it slows down CI,
 667                # since we're packaging these binaries up into Docker images and
 668                # shipping them around.
 669                spawn.runv(
 670                    [*self.rd.tool("strip"), "--strip-debug", exe_path],
 671                    cwd=self.rd.root,
 672                )
 673            else:
 674                # Even if we've been asked not to strip the binary, remove the
 675                # `.debug_pubnames` and `.debug_pubtypes` sections. These are just
 676                # indexes that speed up launching a debugger against the binary,
 677                # and we're happy to have slower debugger start up in exchange for
 678                # smaller binaries. Plus the sections have been obsoleted by a
 679                # `.debug_names` section in DWARF 5, and so debugger support for
 680                # `.debug_pubnames`/`.debug_pubtypes` is minimal anyway.
 681                # See: https://github.com/rust-lang/rust/issues/46034
 682                spawn.runv(
 683                    [
 684                        *self.rd.tool("objcopy"),
 685                        "-R",
 686                        ".debug_pubnames",
 687                        "-R",
 688                        ".debug_pubtypes",
 689                        exe_path,
 690                    ],
 691                    cwd=self.rd.root,
 692                )
 693
 694        for bin in self.bins:
 695            src_path = self.rd.cargo_target_dir() / cargo_profile / bin
 696            copy(src_path, bin)
 697        for example in self.examples:
 698            src_path = (
 699                self.rd.cargo_target_dir() / cargo_profile / Path("examples") / example
 700            )
 701            copy(src_path, Path("examples") / example)
 702
 703        if self.extract:
 704            cargo_build_json_output = build_output["cargo"]
 705
 706            target_dir = self.rd.cargo_target_dir()
 707            for line in cargo_build_json_output.split("\n"):
 708                if line.strip() == "" or not line.startswith("{"):
 709                    continue
 710                message = json.loads(line)
 711                if message["reason"] != "build-script-executed":
 712                    continue
 713                out_dir = self.rd.rewrite_builder_path_for_host(
 714                    Path(message["out_dir"])
 715                )
 716                if not out_dir.is_relative_to(target_dir):
 717                    # Some crates are built for both the host and the target.
 718                    # Ignore the built-for-host out dir.
 719                    continue
 720                # parse the package name from a package_id that looks like one of:
 721                # git+https://github.com/MaterializeInc/rust-server-sdk#launchdarkly-server-sdk@1.0.0
 722                # path+file:///Users/roshan/materialize/src/catalog#mz-catalog@0.0.0
 723                # registry+https://github.com/rust-lang/crates.io-index#num-rational@0.4.0
 724                # file:///path/to/my-package#0.1.0
 725                package_id = message["package_id"]
 726                if "@" in package_id:
 727                    package = package_id.split("@")[0].split("#")[-1]
 728                else:
 729                    package = message["package_id"].split("#")[0].split("/")[-1]
 730                for src, dst in self.extract.get(package, {}).items():
 731                    spawn.runv(["cp", "-R", out_dir / src, self.path / dst])
 732
 733        self.acquired = True
 734
 735    def run(self, prep: dict[str, Any]) -> None:
 736        super().run(prep)
 737        self.build(prep)
 738
 739    @cache
 740    def inputs(self) -> set[str]:
 741        deps = set()
 742
 743        for bin in self.bins:
 744            crate = self.rd.cargo_workspace.crate_for_bin(bin)
 745            deps |= self.rd.cargo_workspace.transitive_path_dependencies(crate)
 746        for example in self.examples:
 747            crate = self.rd.cargo_workspace.crate_for_example(example)
 748            deps |= self.rd.cargo_workspace.transitive_path_dependencies(
 749                crate, dev=True
 750            )
 751
 752        inputs = super().inputs() | set(inp for dep in deps for inp in dep.inputs())
 753        return inputs
 754
 755
 756class Image:
 757    """A Docker image whose build and dependencies are managed by mzbuild.
 758
 759    An image corresponds to a directory in a repository that contains a
 760    `mzbuild.yml` file. This directory is called an "mzbuild context."
 761
 762    Attributes:
 763        name: The name of the image.
 764        publish: Whether the image should be pushed to Docker Hub.
 765        depends_on: The names of the images upon which this image depends.
 766        root: The path to the root of the associated `Repository`.
 767        path: The path to the directory containing the `mzbuild.yml`
 768            configuration file.
 769        pre_images: Optional actions to perform before running `docker build`.
 770        build_args: An optional list of --build-arg to pass to the dockerfile
 771    """
 772
 773    _DOCKERFILE_MZFROM_RE = re.compile(rb"^MZFROM\s*(\S+)")
 774
 775    _context_files_cache: set[str] | None
 776
 777    def __init__(self, rd: RepositoryDetails, path: Path):
 778        self.rd = rd
 779        self.path = path
 780        self._context_files_cache = None
 781        self.pre_images: list[PreImage] = []
 782        with open(self.path / "mzbuild.yml") as f:
 783            data = yaml.safe_load(f)
 784            self.name: str = data.pop("name")
 785            self.publish: bool = data.pop("publish", True)
 786            self.description: str | None = data.pop("description", None)
 787            self.mainline: bool = data.pop("mainline", True)
 788            for pre_image in data.pop("pre-image", []):
 789                typ = pre_image.pop("type", None)
 790                if typ == "cargo-build":
 791                    self.pre_images.append(CargoBuild(self.rd, self.path, pre_image))
 792                elif typ == "copy":
 793                    self.pre_images.append(Copy(self.rd, self.path, pre_image))
 794                else:
 795                    raise ValueError(
 796                        f"mzbuild config in {self.path} has unknown pre-image type"
 797                    )
 798            self.build_args = data.pop("build-args", {})
 799
 800        if re.search(r"[^A-Za-z0-9\-]", self.name):
 801            raise ValueError(
 802                f"mzbuild image name {self.name} contains invalid character; only alphanumerics and hyphens allowed"
 803            )
 804
 805        self.depends_on: list[str] = []
 806        with open(self.path / "Dockerfile", "rb") as f:
 807            for line in f:
 808                match = self._DOCKERFILE_MZFROM_RE.match(line)
 809                if match:
 810                    self.depends_on.append(match.group(1).decode())
 811
 812    def sync_description(self) -> None:
 813        """Sync the description to Docker Hub if the image is publishable
 814        and a README.md file exists."""
 815
 816        if not self.publish:
 817            ui.say(f"{self.name} is not publishable")
 818            return
 819
 820        readme_path = self.path / "README.md"
 821        has_readme = readme_path.exists()
 822        if not has_readme:
 823            ui.say(f"{self.name} has no README.md or description")
 824            return
 825
 826        docker_config = os.getenv("DOCKER_CONFIG")
 827        spawn.runv(
 828            [
 829                "docker",
 830                "pushrm",
 831                f"--file={readme_path}",
 832                *([f"--config={docker_config}/config.json"] if docker_config else []),
 833                *([f"--short={self.description}"] if self.description else []),
 834                self.docker_name(),
 835            ]
 836        )
 837
 838    def docker_name(self, tag: str | None = None) -> str:
 839        """Return the name of the image on Docker Hub at the given tag."""
 840        name = f"{self.rd.image_registry}/{self.rd.image_prefix}{self.name}"
 841        if tag:
 842            name += f":{tag}"
 843        return name
 844
 845
 846class ResolvedImage:
 847    """An `Image` whose dependencies have been resolved.
 848
 849    Attributes:
 850        image: The underlying `Image`.
 851        acquired: Whether the image is available locally.
 852        dependencies: A mapping from dependency name to `ResolvedImage` for
 853            each of the images that `image` depends upon.
 854    """
 855
 856    def __init__(self, image: Image, dependencies: Iterable["ResolvedImage"]):
 857        self.image = image
 858        self.acquired = False
 859        self.dependencies = {}
 860        for d in dependencies:
 861            self.dependencies[d.name] = d
 862
 863    def __repr__(self) -> str:
 864        return f"ResolvedImage<{self.spec()}>"
 865
 866    @property
 867    def name(self) -> str:
 868        """The name of the underlying image."""
 869        return self.image.name
 870
 871    @property
 872    def publish(self) -> bool:
 873        """Whether the underlying image should be pushed to Docker Hub."""
 874        return self.image.publish
 875
 876    @cache
 877    def spec(self) -> str:
 878        """Return the "spec" for the image.
 879
 880        A spec is the unique identifier for the image given its current
 881        fingerprint. It is a valid Docker Hub name.
 882        """
 883        return self.image.docker_name(tag=f"mzbuild-{self.fingerprint()}")
 884
 885    def write_dockerfile(self) -> IO[bytes]:
 886        """Render the Dockerfile without mzbuild directives.
 887
 888        Returns:
 889            file: A handle to a temporary file containing the adjusted
 890                Dockerfile."""
 891        with open(self.image.path / "Dockerfile", "rb") as f:
 892            lines = f.readlines()
 893        f = TemporaryFile()
 894        for line in lines:
 895            match = Image._DOCKERFILE_MZFROM_RE.match(line)
 896            if match:
 897                image = match.group(1).decode()
 898                spec = self.dependencies[image].spec()
 899                line = Image._DOCKERFILE_MZFROM_RE.sub(b"FROM %b" % spec.encode(), line)
 900            f.write(line)
 901        f.seek(0)
 902        return f
 903
 904    def build(self, prep: dict[type[PreImage], Any], push: bool = False) -> None:
 905        """Build the image from source.
 906
 907        Requires that the caller has already acquired all dependencies and
 908        prepared all `PreImage` actions via `PreImage.prepare_batch`.
 909        """
 910        ui.section(f"Building {self.spec()}")
 911        spawn.runv(["git", "clean", "-ffdX", self.image.path])
 912
 913        for pre_image in self.image.pre_images:
 914            pre_image.run(prep[type(pre_image)])
 915        build_args = {
 916            **self.image.build_args,
 917            "BUILD_PROFILE": self.image.rd.profile.name,
 918            "ARCH_GCC": str(self.image.rd.arch),
 919            "ARCH_GO": self.image.rd.arch.go_str(),
 920            "CI_SANITIZER": str(self.image.rd.sanitizer),
 921        }
 922        f = self.write_dockerfile()
 923
 924        try:
 925            spawn.capture(["docker", "buildx", "version"])
 926        except subprocess.CalledProcessError:
 927            if push:
 928                print(
 929                    "docker buildx not found, required to push images. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing"
 930                )
 931                raise
 932            print(
 933                "docker buildx not found, you can install it to build faster. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing"
 934            )
 935            print("Falling back to docker build")
 936            cmd: Sequence[str] = [
 937                "docker",
 938                "build",
 939                "-f",
 940                "-",
 941                *(f"--build-arg={k}={v}" for k, v in build_args.items()),
 942                "-t",
 943                self.spec(),
 944                f"--platform=linux/{self.image.rd.arch.go_str()}",
 945                str(self.image.path),
 946            ]
 947        else:
 948            cmd: Sequence[str] = [
 949                "docker",
 950                "buildx",
 951                "build",
 952                "--progress=plain",  # less noisy
 953                "-f",
 954                "-",
 955                *(f"--build-arg={k}={v}" for k, v in build_args.items()),
 956                "-t",
 957                f"docker.io/{self.spec()}",
 958                "-t",
 959                f"ghcr.io/materializeinc/{self.spec()}",
 960                f"--platform=linux/{self.image.rd.arch.go_str()}",
 961                str(self.image.path),
 962                *(["--push"] if push else ["--load"]),
 963            ]
 964
 965        if token := os.getenv("GITHUB_GHCR_TOKEN"):
 966            spawn.runv(
 967                [
 968                    "docker",
 969                    "login",
 970                    "ghcr.io",
 971                    "-u",
 972                    "materialize-bot",
 973                    "--password-stdin",
 974                ],
 975                stdin=token.encode(),
 976            )
 977
 978        spawn.runv(cmd, stdin=f, stdout=sys.stderr.buffer)
 979
 980    def try_pull(self, max_retries: int) -> bool:
 981        """Download the image if it does not exist locally. Returns whether it was found."""
 982        ui.header(f"Acquiring {self.spec()}")
 983        command = ["docker", "pull"]
 984        # --quiet skips printing the progress bar, which does not display well in CI.
 985        if ui.env_is_truthy("CI"):
 986            command.append("--quiet")
 987        command.append(self.spec())
 988        if not self.acquired:
 989            sleep_time = 1
 990            for retry in range(1, max_retries + 1):
 991                try:
 992                    spawn.runv(
 993                        command,
 994                        stdin=subprocess.DEVNULL,
 995                        stdout=sys.stderr.buffer,
 996                    )
 997                    self.acquired = True
 998                    break
 999                except subprocess.CalledProcessError:
1000                    if retry < max_retries:
1001                        # There seems to be no good way to tell what error
1002                        # happened based on error code
1003                        # (https://github.com/docker/cli/issues/538) and we
1004                        # want to print output directly to terminal.
1005                        if build := os.getenv("CI_WAITING_FOR_BUILD"):
1006                            for retry in range(max_retries):
1007                                try:
1008                                    build_status = buildkite.get_build_status(build)
1009                                except subprocess.CalledProcessError:
1010                                    time.sleep(sleep_time)
1011                                    sleep_time = min(sleep_time * 2, 10)
1012                                    break
1013                                print(f"Build {build} status: {build_status}")
1014                                if build_status == "failed":
1015                                    print(
1016                                        f"Build {build} has been marked as failed, exiting hard"
1017                                    )
1018                                    sys.exit(1)
1019                                elif build_status == "success":
1020                                    break
1021                                assert (
1022                                    build_status == "pending"
1023                                ), f"Unknown build status {build_status}"
1024                                time.sleep(1)
1025                        else:
1026                            print(f"Retrying in {sleep_time}s ...")
1027                            time.sleep(sleep_time)
1028                            sleep_time = min(sleep_time * 2, 10)
1029                        continue
1030                    else:
1031                        break
1032        return self.acquired
1033
1034    def is_published_if_necessary(self) -> bool:
1035        """Report whether the image exists on DockerHub & GHCR if it is publishable."""
1036        if not self.publish:
1037            return False
1038        spec = self.spec()
1039        if spec.startswith(GHCR_PREFIX):
1040            spec = spec.removeprefix(GHCR_PREFIX)
1041        ghcr_spec = f"{GHCR_PREFIX}{spec}"
1042        if is_docker_image_pushed(spec) and is_ghcr_image_pushed(ghcr_spec):
1043            ui.say(f"{spec} already exists")
1044            return True
1045        return False
1046
1047    def run(
1048        self,
1049        args: list[str] = [],
1050        docker_args: list[str] = [],
1051        env: dict[str, str] = {},
1052    ) -> None:
1053        """Run a command in the image.
1054
1055        Creates a container from the image and runs the command described by
1056        `args` in the image.
1057        """
1058        envs = []
1059        for key, val in env.items():
1060            envs.extend(["--env", f"{key}={val}"])
1061        spawn.runv(
1062            [
1063                "docker",
1064                "run",
1065                "--tty",
1066                "--rm",
1067                *envs,
1068                "--init",
1069                *docker_args,
1070                self.spec(),
1071                *args,
1072            ],
1073        )
1074
1075    def list_dependencies(self, transitive: bool = False) -> set[str]:
1076        out = set()
1077        for dep in self.dependencies.values():
1078            out.add(dep.name)
1079            if transitive:
1080                out |= dep.list_dependencies(transitive)
1081        return out
1082
1083    @cache
1084    def inputs(self, transitive: bool = False) -> set[str]:
1085        """List the files tracked as inputs to the image.
1086
1087        These files are used to compute the fingerprint for the image. See
1088        `ResolvedImage.fingerprint` for details.
1089
1090        Returns:
1091            inputs: A list of input files, relative to the root of the
1092                repository.
1093        """
1094        if self.image._context_files_cache is not None:
1095            paths = set(self.image._context_files_cache)
1096        else:
1097            paths = set(git.expand_globs(self.image.rd.root, f"{self.image.path}/**"))
1098        if not paths:
1099            # While we could find an `mzbuild.yml` file for this service, expland_globs didn't
1100            # return any files that matched this service. At the very least, the `mzbuild.yml`
1101            # file itself should have been returned. We have a bug if paths is empty.
1102            raise AssertionError(
1103                f"{self.image.name} mzbuild exists but its files are unknown to git"
1104            )
1105        for pre_image in self.image.pre_images:
1106            paths |= pre_image.inputs()
1107        if transitive:
1108            for dep in self.dependencies.values():
1109                paths |= dep.inputs(transitive)
1110        return paths
1111
1112    @cache
1113    def fingerprint(self) -> Fingerprint:
1114        """Fingerprint the inputs to the image.
1115
1116        Compute the fingerprint of the image. Changing the contents of any of
1117        the files or adding or removing files to the image will change the
1118        fingerprint, as will modifying the inputs to any of its dependencies.
1119
1120        The image considers all non-gitignored files in its mzbuild context to
1121        be inputs. If it has a pre-image action, that action may add additional
1122        inputs via `PreImage.inputs`.
1123        """
1124        self_hash = hashlib.sha1()
1125        # When inputs come from precomputed sources (crate and image context
1126        # batching + resolved CargoPreImage paths), they are already individual
1127        # file paths from git. Skip the expensive expand_globs subprocess calls.
1128        inputs = self.inputs()
1129        if self.image._context_files_cache is not None:
1130            resolved_inputs = sorted(inputs)
1131        else:
1132            resolved_inputs = sorted(set(git.expand_globs(self.image.rd.root, *inputs)))
1133        for rel_path in resolved_inputs:
1134            abs_path = self.image.rd.root / rel_path
1135            file_hash = hashlib.sha1()
1136            raw_file_mode = os.lstat(abs_path).st_mode
1137            # Compute a simplified file mode using the same rules as Git.
1138            # https://github.com/git/git/blob/3bab5d562/Documentation/git-fast-import.txt#L610-L616
1139            if stat.S_ISLNK(raw_file_mode):
1140                file_mode = 0o120000
1141            elif raw_file_mode & stat.S_IXUSR:
1142                file_mode = 0o100755
1143            else:
1144                file_mode = 0o100644
1145            with open(abs_path, "rb") as f:
1146                file_hash.update(f.read())
1147            self_hash.update(file_mode.to_bytes(2, byteorder="big"))
1148            self_hash.update(rel_path.encode())
1149            self_hash.update(file_hash.digest())
1150            self_hash.update(b"\0")
1151
1152        for pre_image in self.image.pre_images:
1153            self_hash.update(pre_image.extra().encode())
1154            self_hash.update(b"\0")
1155
1156        self_hash.update(f"profile={self.image.rd.profile}".encode())
1157        self_hash.update(f"arch={self.image.rd.arch}".encode())
1158        self_hash.update(f"coverage={self.image.rd.coverage}".encode())
1159        self_hash.update(f"sanitizer={self.image.rd.sanitizer}".encode())
1160        # This exists to make sure all hashes from before we had a GHCR mirror are invalidated, so that we rebuild when an image doesn't exist on GHCR yet
1161        self_hash.update(b"mirror=ghcr")
1162
1163        full_hash = hashlib.sha1()
1164        full_hash.update(self_hash.digest())
1165        for dep in sorted(self.dependencies.values(), key=lambda d: d.name):
1166            full_hash.update(dep.name.encode())
1167            full_hash.update(dep.fingerprint())
1168            full_hash.update(b"\0")
1169
1170        return Fingerprint(full_hash.digest())
1171
1172
1173class DependencySet:
1174    """A set of `ResolvedImage`s.
1175
1176    Iterating over a dependency set yields the contained images in an arbitrary
1177    order. Indexing a dependency set yields the image with the specified name.
1178    """
1179
1180    def __init__(self, dependencies: Iterable[Image]):
1181        """Construct a new `DependencySet`.
1182
1183        The provided `dependencies` must be topologically sorted.
1184        """
1185        self._dependencies: dict[str, ResolvedImage] = {}
1186        known_images = docker_images()
1187        for d in dependencies:
1188            image = ResolvedImage(
1189                image=d,
1190                dependencies=(self._dependencies[d0] for d0 in d.depends_on),
1191            )
1192            image.acquired = image.spec() in known_images
1193            self._dependencies[d.name] = image
1194
1195    def _prepare_batch(self, images: list[ResolvedImage]) -> dict[type[PreImage], Any]:
1196        pre_images = collections.defaultdict(list)
1197        for image in images:
1198            for pre_image in image.image.pre_images:
1199                pre_images[type(pre_image)].append(pre_image)
1200        pre_image_prep = {}
1201        for cls, instances in pre_images.items():
1202            pre_image = cast(PreImage, cls)
1203            pre_image_prep[cls] = pre_image.prepare_batch(instances)
1204        return pre_image_prep
1205
1206    def acquire(self, max_retries: int | None = None) -> None:
1207        """Download or build all of the images in the dependency set that do not
1208        already exist locally.
1209
1210        Args:
1211            max_retries: Number of retries on failure.
1212        """
1213
1214        # Only retry in CI runs since we struggle with flaky docker pulls there
1215        if not max_retries:
1216            max_retries = (
1217                90
1218                if os.getenv("CI_WAITING_FOR_BUILD")
1219                else (
1220                    5
1221                    if ui.env_is_truthy("CI")
1222                    and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD")
1223                    else 1
1224                )
1225            )
1226        assert max_retries > 0
1227
1228        deps_to_check = [dep for dep in self if dep.publish]
1229        deps_to_build = [dep for dep in self if not dep.publish]
1230        if len(deps_to_check):
1231            with ThreadPoolExecutor(max_workers=len(deps_to_check)) as executor:
1232                futures = [
1233                    executor.submit(dep.try_pull, max_retries) for dep in deps_to_check
1234                ]
1235                for dep, future in zip(deps_to_check, futures):
1236                    try:
1237                        if not future.result():
1238                            deps_to_build.append(dep)
1239                    except Exception:
1240                        deps_to_build.append(dep)
1241
1242        # Don't attempt to build in CI, as our timeouts and small machines won't allow it anyway
1243        if ui.env_is_truthy("CI") and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD"):
1244            expected_deps = [dep for dep in deps_to_build if dep.publish]
1245            if expected_deps:
1246                print(
1247                    f"+++ Expected builds to be available, the build probably failed, so not proceeding: {expected_deps}"
1248                )
1249                sys.exit(5)
1250
1251        prep = self._prepare_batch(deps_to_build)
1252        for dep in deps_to_build:
1253            dep.build(prep)
1254
1255    def ensure(self, pre_build: Callable[[list[ResolvedImage]], None] | None = None):
1256        """Ensure all publishable images in this dependency set exist on Docker
1257        Hub.
1258
1259        Images are pushed using their spec as their tag.
1260
1261        Args:
1262            pre_build: A callback to invoke with all dependency that are going
1263                       to be built locally, invoked after their cargo build is
1264                       done, but before the Docker images are build and
1265                       uploaded to DockerHub.
1266        """
1267        num_deps = len(list(self))
1268        if not num_deps:
1269            deps_to_build = []
1270        else:
1271            with ThreadPoolExecutor(max_workers=num_deps) as executor:
1272                futures = list(
1273                    executor.map(
1274                        lambda dep: (dep, not dep.is_published_if_necessary()), self
1275                    )
1276                )
1277
1278            deps_to_build = [dep for dep, should_build in futures if should_build]
1279
1280        prep = self._prepare_batch(deps_to_build)
1281        if pre_build:
1282            pre_build(deps_to_build)
1283        lock = Lock()
1284        built_deps: set[str] = set([dep.name for dep in self]) - set(
1285            [dep.name for dep in deps_to_build]
1286        )
1287
1288        def build_dep(dep):
1289            end_time = time.time() + 600
1290            while True:
1291                if time.time() > end_time:
1292                    raise TimeoutError(
1293                        f"Timed out in {dep.name} waiting for {[dep2.name for dep2 in dep.dependencies if dep2 not in built_deps]}"
1294                    )
1295                with lock:
1296                    if all(dep2 in built_deps for dep2 in dep.dependencies):
1297                        break
1298                time.sleep(0.01)
1299            for attempts_remaining in reversed(range(3)):
1300                try:
1301                    dep.build(prep, push=dep.publish)
1302                    with lock:
1303                        built_deps.add(dep.name)
1304                    break
1305                except Exception:
1306                    if not dep.publish or attempts_remaining == 0:
1307                        raise
1308
1309        if deps_to_build:
1310            with ThreadPoolExecutor(max_workers=len(deps_to_build)) as executor:
1311                futures = [executor.submit(build_dep, dep) for dep in deps_to_build]
1312                for future in as_completed(futures):
1313                    future.result()
1314
1315    def check(self) -> bool:
1316        """Check all publishable images in this dependency set exist on Docker
1317        Hub. Don't try to download or build them."""
1318        num_deps = len(list(self))
1319        if num_deps == 0:
1320            return True
1321        with ThreadPoolExecutor(max_workers=num_deps) as executor:
1322            results = list(
1323                executor.map(lambda dep: dep.is_published_if_necessary(), list(self))
1324            )
1325        return all(results)
1326
1327    def __iter__(self) -> Iterator[ResolvedImage]:
1328        return iter(self._dependencies.values())
1329
1330    def __getitem__(self, key: str) -> ResolvedImage:
1331        return self._dependencies[key]
1332
1333
1334class Repository:
1335    """A collection of mzbuild `Image`s.
1336
1337    Creating a repository will walk the filesystem beneath `root` to
1338    automatically discover all contained `Image`s.
1339
1340    Iterating over a repository yields the contained images in an arbitrary
1341    order.
1342
1343    Args:
1344        root: The path to the root of the repository.
1345        arch: The CPU architecture to build for.
1346        profile: What profile to build the repository in.
1347        coverage: Whether to enable code coverage instrumentation.
1348        sanitizer: Whether to a sanitizer (address, thread, leak, memory, none)
1349        image_registry: The Docker image registry to pull images from and push
1350            images to.
1351        image_prefix: A prefix to apply to all Docker image names.
1352
1353    Attributes:
1354        images: A mapping from image name to `Image` for all contained images.
1355        compose_dirs: The set of directories containing a `mzcompose.py` file.
1356    """
1357
1358    def __init__(
1359        self,
1360        root: Path,
1361        arch: Arch = Arch.host(),
1362        profile: Profile = (
1363            Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED
1364        ),
1365        coverage: bool = False,
1366        sanitizer: Sanitizer = Sanitizer.none,
1367        image_registry: str = image_registry(),
1368        image_prefix: str = "",
1369    ):
1370        self.rd = RepositoryDetails(
1371            root,
1372            arch,
1373            profile,
1374            coverage,
1375            sanitizer,
1376            image_registry,
1377            image_prefix,
1378        )
1379        self.images: dict[str, Image] = {}
1380        self.compositions: dict[str, Path] = {}
1381        for path, dirs, files in os.walk(self.root, topdown=True):
1382            if path == str(root / "misc"):
1383                dirs.remove("python")
1384            # Filter out some particularly massive ignored directories to keep
1385            # things snappy. Not required for correctness.
1386            dirs[:] = set(dirs) - {
1387                ".git",
1388                ".mypy_cache",
1389                "target",
1390                "target-ra",
1391                "target-xcompile",
1392                "mzdata",
1393                "node_modules",
1394                "venv",
1395            }
1396            if "mzbuild.yml" in files:
1397                image = Image(self.rd, Path(path))
1398                if not image.name:
1399                    raise ValueError(f"config at {path} missing name")
1400                if image.name in self.images:
1401                    raise ValueError(f"image {image.name} exists twice")
1402                self.images[image.name] = image
1403            if "mzcompose.py" in files:
1404                name = Path(path).name
1405                if name in self.compositions:
1406                    raise ValueError(f"composition {name} exists twice")
1407                self.compositions[name] = Path(path)
1408
1409        # Validate dependencies.
1410        for image in self.images.values():
1411            for d in image.depends_on:
1412                if d not in self.images:
1413                    raise ValueError(
1414                        f"image {image.name} depends on non-existent image {d}"
1415                    )
1416
1417    @staticmethod
1418    def install_arguments(parser: argparse.ArgumentParser) -> None:
1419        """Install options to configure a repository into an argparse parser.
1420
1421        This function installs the following options:
1422
1423          * The mutually-exclusive `--dev`/`--optimized`/`--release` options to control the
1424            `profile` repository attribute.
1425          * The `--coverage` boolean option to control the `coverage` repository
1426            attribute.
1427
1428        Use `Repository.from_arguments` to construct a repository from the
1429        parsed command-line arguments.
1430        """
1431        build_mode = parser.add_mutually_exclusive_group()
1432        build_mode.add_argument(
1433            "--dev",
1434            action="store_true",
1435            help="build Rust binaries with the dev profile",
1436        )
1437        build_mode.add_argument(
1438            "--release",
1439            action="store_true",
1440            help="build Rust binaries with the release profile (default)",
1441        )
1442        build_mode.add_argument(
1443            "--optimized",
1444            action="store_true",
1445            help="build Rust binaries with the optimized profile (optimizations, no LTO, no debug symbols)",
1446        )
1447        parser.add_argument(
1448            "--coverage",
1449            help="whether to enable code coverage compilation flags",
1450            default=ui.env_is_truthy("CI_COVERAGE_ENABLED"),
1451            action="store_true",
1452        )
1453        parser.add_argument(
1454            "--sanitizer",
1455            help="whether to enable a sanitizer",
1456            default=Sanitizer[os.getenv("CI_SANITIZER", "none")],
1457            type=Sanitizer,
1458            choices=Sanitizer,
1459        )
1460        parser.add_argument(
1461            "--arch",
1462            default=Arch.host(),
1463            help="the CPU architecture to build for",
1464            type=Arch,
1465            choices=Arch,
1466        )
1467        parser.add_argument(
1468            "--image-registry",
1469            default=image_registry(),
1470            help="the Docker image registry to pull images from and push images to",
1471        )
1472        parser.add_argument(
1473            "--image-prefix",
1474            default="",
1475            help="a prefix to apply to all Docker image names",
1476        )
1477
1478    @classmethod
1479    def from_arguments(cls, root: Path, args: argparse.Namespace) -> "Repository":
1480        """Construct a repository from command-line arguments.
1481
1482        The provided namespace must contain the options installed by
1483        `Repository.install_arguments`.
1484        """
1485        if args.release:
1486            profile = Profile.RELEASE
1487        elif args.optimized:
1488            profile = Profile.OPTIMIZED
1489        elif args.dev:
1490            profile = Profile.DEV
1491        else:
1492            profile = (
1493                Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED
1494            )
1495
1496        return cls(
1497            root,
1498            profile=profile,
1499            coverage=args.coverage,
1500            sanitizer=args.sanitizer,
1501            image_registry=args.image_registry,
1502            image_prefix=args.image_prefix,
1503            arch=args.arch,
1504        )
1505
1506    @property
1507    def root(self) -> Path:
1508        """The path to the root directory for the repository."""
1509        return self.rd.root
1510
1511    def resolve_dependencies(self, targets: Iterable[Image]) -> DependencySet:
1512        """Compute the dependency set necessary to build target images.
1513
1514        The dependencies of `targets` will be crawled recursively until the
1515        complete set of transitive dependencies is determined or a circular
1516        dependency is discovered. The returned dependency set will be sorted
1517        in topological order.
1518
1519        Raises:
1520           ValueError: A circular dependency was discovered in the images
1521               in the repository.
1522        """
1523        # Pre-fetch all crate input files in a single batched git call,
1524        # replacing ~118 individual subprocess pairs with one pair.
1525        self.rd.cargo_workspace.precompute_crate_inputs()
1526        # Pre-fetch all image context files in a single batched git call,
1527        # replacing ~41 individual subprocess pairs with one pair.
1528        self._precompute_image_context_files()
1529
1530        resolved = OrderedDict()
1531        visiting = set()
1532
1533        def visit(image: Image, path: list[str] = []) -> None:
1534            if image.name in resolved:
1535                return
1536            if image.name in visiting:
1537                diagram = " -> ".join(path + [image.name])
1538                raise ValueError(f"circular dependency in mzbuild: {diagram}")
1539
1540            visiting.add(image.name)
1541            for d in sorted(image.depends_on):
1542                visit(self.images[d], path + [image.name])
1543            resolved[image.name] = image
1544
1545        for target_image in sorted(targets, key=lambda image: image.name):
1546            visit(target_image)
1547
1548        return DependencySet(resolved.values())
1549
1550    def _precompute_image_context_files(self) -> None:
1551        """Pre-fetch all image context files in a single batched git call.
1552
1553        This replaces ~41 individual pairs of git subprocess calls (one per
1554        image) with a single pair, then partitions the results by image path.
1555        """
1556        root = self.rd.root
1557        # Use paths relative to root for git specs and partitioning, since
1558        # git --relative outputs paths relative to cwd (root). Image paths
1559        # may be absolute when MZ_ROOT is an absolute path.
1560        image_rel_paths = sorted(
1561            set(str(img.path.relative_to(root)) for img in self.images.values())
1562        )
1563        specs = [f"{p}/**" for p in image_rel_paths]
1564
1565        empty_tree = "4b825dc642cb6eb9a060e54bf8d69288fbee4904"
1566        diff_files = spawn.capture(
1567            ["git", "diff", "--name-only", "-z", "--relative", empty_tree, "--"]
1568            + specs,
1569            cwd=root,
1570        )
1571        ls_files = spawn.capture(
1572            ["git", "ls-files", "--others", "--exclude-standard", "-z", "--"] + specs,
1573            cwd=root,
1574        )
1575        all_files = set(
1576            f for f in (diff_files + ls_files).split("\0") if f.strip() != ""
1577        )
1578
1579        # Partition files by image path (longest match first for nested paths)
1580        image_file_map: dict[str, set[str]] = {p: set() for p in image_rel_paths}
1581        sorted_paths = sorted(image_rel_paths, key=len, reverse=True)
1582        for f in all_files:
1583            for ip in sorted_paths:
1584                if f.startswith(ip + "/"):
1585                    image_file_map[ip].add(f)
1586                    break
1587
1588        for img in self.images.values():
1589            rel = str(img.path.relative_to(root))
1590            img._context_files_cache = image_file_map.get(rel, set())
1591
1592    def __iter__(self) -> Iterator[Image]:
1593        return iter(self.images.values())
1594
1595
1596def publish_multiarch_images(
1597    tag: str, dependency_sets: Iterable[Iterable[ResolvedImage]]
1598) -> None:
1599    """Publishes a set of docker images under a given tag."""
1600    always_push_tags = ("latest", "unstable")
1601    if ghcr_token := os.getenv("GITHUB_GHCR_TOKEN"):
1602        spawn.runv(
1603            [
1604                "docker",
1605                "login",
1606                "ghcr.io",
1607                "-u",
1608                "materialize-bot",
1609                "--password-stdin",
1610            ],
1611            stdin=ghcr_token.encode(),
1612        )
1613    for images in zip(*dependency_sets):
1614        names = set(image.image.name for image in images)
1615        assert len(names) == 1, "dependency sets did not contain identical images"
1616        name = images[0].image.docker_name(tag)
1617        if tag in always_push_tags or not is_docker_image_pushed(name):
1618            spawn.runv(
1619                [
1620                    "docker",
1621                    "manifest",
1622                    "create",
1623                    name,
1624                    *(image.spec() for image in images),
1625                ]
1626            )
1627            spawn.runv(["docker", "manifest", "push", name])
1628
1629        ghcr_name = f"{GHCR_PREFIX}{name}"
1630        if ghcr_token and (
1631            tag in always_push_tags or not is_ghcr_image_pushed(ghcr_name)
1632        ):
1633            spawn.runv(
1634                [
1635                    "docker",
1636                    "manifest",
1637                    "create",
1638                    ghcr_name,
1639                    *(f"{GHCR_PREFIX}{image.spec()}" for image in images),
1640                ]
1641            )
1642            spawn.runv(["docker", "manifest", "push", ghcr_name])
1643    print(f"--- Nofifying for tag {tag}")
1644    markdown = f"""Pushed images with Docker tag `{tag}`"""
1645    spawn.runv(
1646        [
1647            "buildkite-agent",
1648            "annotate",
1649            "--style=info",
1650            f"--context=build-tags-{tag}",
1651        ],
1652        stdin=markdown.encode(),
1653    )
1654
1655
1656def tag_multiarch_images(
1657    new_tag: str, previous_tag: str, dependency_sets: Iterable[Iterable[ResolvedImage]]
1658) -> None:
1659    """Publishes a set of docker images under a given tag."""
1660    for images in zip(*dependency_sets):
1661        names = set(image.image.name for image in images)
1662        assert len(names) == 1, "dependency sets did not contain identical images"
1663        new_name = images[0].image.docker_name(new_tag)
1664
1665        # Doesn't have tagged images
1666        if images[0].image.name == "mz":
1667            continue
1668
1669        previous_name = images[0].image.docker_name(previous_tag)
1670        spawn.runv(["docker", "pull", previous_name])
1671        spawn.runv(["docker", "tag", previous_name, new_name])
1672        spawn.runv(["docker", "push", new_name])
1673    print(f"--- Nofifying for tag {new_tag}")
1674    markdown = f"""Pushed images with Docker tag `{new_tag}`"""
1675    spawn.runv(
1676        [
1677            "buildkite-agent",
1678            "annotate",
1679            "--style=info",
1680            f"--context=build-tags-{new_tag}",
1681        ],
1682        stdin=markdown.encode(),
1683    )
GHCR_PREFIX = 'ghcr.io/materializeinc/'
class RustIncrementalBuildFailure(builtins.Exception):
57class RustIncrementalBuildFailure(Exception):
58    pass

Common base class for all non-exit exceptions.

def run_and_detect_rust_incremental_build_failure(cmd: list[str], cwd: str | pathlib.Path) -> subprocess.CompletedProcess:
 61def run_and_detect_rust_incremental_build_failure(
 62    cmd: list[str], cwd: str | Path
 63) -> subprocess.CompletedProcess:
 64    """This function is complex since it prints out each line immediately to
 65    stdout/stderr, but still records them at the same time so that we can scan
 66    for known incremental build failures."""
 67    stdout_result = io.StringIO()
 68    stderr_result = io.StringIO()
 69    p = subprocess.Popen(
 70        cmd,
 71        stdout=subprocess.PIPE,
 72        stderr=subprocess.PIPE,
 73        text=True,
 74        bufsize=1,
 75        env={**os.environ, "CARGO_TERM_COLOR": "always", "RUSTC_COLOR": "always"},
 76    )
 77
 78    sel = selectors.DefaultSelector()
 79    sel.register(p.stdout, selectors.EVENT_READ)  # type: ignore
 80    sel.register(p.stderr, selectors.EVENT_READ)  # type: ignore
 81    assert p.stdout is not None
 82    assert p.stderr is not None
 83    os.set_blocking(p.stdout.fileno(), False)
 84    os.set_blocking(p.stderr.fileno(), False)
 85    running = True
 86    while running:
 87        for key, val in sel.select():
 88            output = io.StringIO()
 89            running = False
 90            while True:
 91                new_output = key.fileobj.read(1024)  # type: ignore
 92                if not new_output:
 93                    break
 94                output.write(new_output)
 95            contents = output.getvalue()
 96            output.close()
 97            if not contents:
 98                continue
 99            # Keep running as long as stdout or stderr have any content
100            running = True
101            if key.fileobj is p.stdout:
102                print(
103                    contents,
104                    end="",
105                    flush=True,
106                )
107                stdout_result.write(contents)
108            else:
109                print(
110                    contents,
111                    end="",
112                    file=sys.stderr,
113                    flush=True,
114                )
115                stderr_result.write(contents)
116    p.wait()
117    retcode = p.poll()
118    assert retcode is not None
119    stdout_contents = stdout_result.getvalue()
120    stdout_result.close()
121    stderr_contents = stderr_result.getvalue()
122    stderr_result.close()
123    if retcode:
124        incremental_build_failure_msgs = [
125            "panicked at compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs",
126            "ld.lld: error: undefined symbol",
127            "signal: 11, SIGSEGV",
128        ]
129        combined = stdout_contents + stderr_contents
130        if any(msg in combined for msg in incremental_build_failure_msgs):
131            raise RustIncrementalBuildFailure()
132
133        raise subprocess.CalledProcessError(
134            retcode, p.args, output=stdout_contents, stderr=stderr_contents
135        )
136    return subprocess.CompletedProcess(
137        p.args, retcode, stdout_contents, stderr_contents
138    )

This function is complex since it prints out each line immediately to stdout/stderr, but still records them at the same time so that we can scan for known incremental build failures.

class Fingerprint(builtins.bytes):
141class Fingerprint(bytes):
142    """A SHA-1 hash of the inputs to an `Image`.
143
144    The string representation uses base32 encoding to distinguish mzbuild
145    fingerprints from Git's hex encoded SHA-1 hashes while still being
146    URL safe.
147    """
148
149    def __str__(self) -> str:
150        return base64.b32encode(self).decode()

A SHA-1 hash of the inputs to an Image.

The string representation uses base32 encoding to distinguish mzbuild fingerprints from Git's hex encoded SHA-1 hashes while still being URL safe.

class Profile(enum.Enum):
153class Profile(Enum):
154    RELEASE = auto()
155    OPTIMIZED = auto()
156    DEV = auto()
RELEASE = <Profile.RELEASE: 1>
OPTIMIZED = <Profile.OPTIMIZED: 2>
DEV = <Profile.DEV: 3>
class RepositoryDetails:
159class RepositoryDetails:
160    """Immutable details about a `Repository`.
161
162    Used internally by mzbuild.
163
164    Attributes:
165        root: The path to the root of the repository.
166        arch: The CPU architecture to build for.
167        profile: What profile the repository is being built with.
168        coverage: Whether the repository has code coverage instrumentation
169            enabled.
170        sanitizer: Whether to use a sanitizer (address, hwaddress, cfi, thread, leak, memory, none)
171        cargo_workspace: The `cargo.Workspace` associated with the repository.
172        image_registry: The Docker image registry to pull images from and push
173            images to.
174        image_prefix: A prefix to apply to all Docker image names.
175    """
176
177    def __init__(
178        self,
179        root: Path,
180        arch: Arch,
181        profile: Profile,
182        coverage: bool,
183        sanitizer: Sanitizer,
184        image_registry: str,
185        image_prefix: str,
186    ):
187        self.root = root
188        self.arch = arch
189        self.profile = profile
190        self.coverage = coverage
191        self.sanitizer = sanitizer
192        self.cargo_workspace = cargo.Workspace(root)
193        self.image_registry = image_registry
194        self.image_prefix = image_prefix
195
196    def build(
197        self,
198        subcommand: str,
199        rustflags: list[str],
200        channel: str | None = None,
201        extra_env: dict[str, str] = {},
202    ) -> list[str]:
203        """Start a build invocation for the configured architecture."""
204        return xcompile.cargo(
205            arch=self.arch,
206            channel=channel,
207            subcommand=subcommand,
208            rustflags=rustflags,
209            extra_env=extra_env,
210        )
211
212    def tool(self, name: str) -> list[str]:
213        """Start a binutils tool invocation for the configured architecture."""
214        if platform.system() != "Linux":
215            # We can't use the local tools from macOS to build a Linux executable
216            return ["bin/ci-builder", "run", "stable", name]
217        # If we're on Linux, trust that the tools are installed instead of
218        # loading the slow ci-builder. If you don't have compilation tools
219        # installed you can still run `bin/ci-builder run stable
220        # bin/mzcompose ...`, and most likely the Cargo build will already
221        # fail earlier if you don't have compilation tools installed and
222        # run without the ci-builder.
223        return [name]
224
225    def cargo_target_dir(self) -> Path:
226        """Determine the path to the target directory for Cargo."""
227        return self.root / "target-xcompile" / xcompile.target(self.arch)
228
229    def rewrite_builder_path_for_host(self, path: Path) -> Path:
230        """Rewrite a path that is relative to the target directory inside the
231        builder to a path that is relative to the target directory on the host.
232
233        If path does is not relative to the target directory inside the builder,
234        it is returned unchanged.
235        """
236        builder_target_dir = Path("/mnt/build") / xcompile.target(self.arch)
237        try:
238            return self.cargo_target_dir() / path.relative_to(builder_target_dir)
239        except ValueError:
240            return path

Immutable details about a Repository.

Used internally by mzbuild.

Attributes: root: The path to the root of the repository. arch: The CPU architecture to build for. profile: What profile the repository is being built with. coverage: Whether the repository has code coverage instrumentation enabled. sanitizer: Whether to use a sanitizer (address, hwaddress, cfi, thread, leak, memory, none) cargo_workspace: The cargo.Workspace associated with the repository. image_registry: The Docker image registry to pull images from and push images to. image_prefix: A prefix to apply to all Docker image names.

RepositoryDetails( root: pathlib.Path, arch: materialize.xcompile.Arch, profile: Profile, coverage: bool, sanitizer: materialize.rustc_flags.Sanitizer, image_registry: str, image_prefix: str)
177    def __init__(
178        self,
179        root: Path,
180        arch: Arch,
181        profile: Profile,
182        coverage: bool,
183        sanitizer: Sanitizer,
184        image_registry: str,
185        image_prefix: str,
186    ):
187        self.root = root
188        self.arch = arch
189        self.profile = profile
190        self.coverage = coverage
191        self.sanitizer = sanitizer
192        self.cargo_workspace = cargo.Workspace(root)
193        self.image_registry = image_registry
194        self.image_prefix = image_prefix
root
arch
profile
coverage
sanitizer
cargo_workspace
image_registry
image_prefix
def build( self, subcommand: str, rustflags: list[str], channel: str | None = None, extra_env: dict[str, str] = {}) -> list[str]:
196    def build(
197        self,
198        subcommand: str,
199        rustflags: list[str],
200        channel: str | None = None,
201        extra_env: dict[str, str] = {},
202    ) -> list[str]:
203        """Start a build invocation for the configured architecture."""
204        return xcompile.cargo(
205            arch=self.arch,
206            channel=channel,
207            subcommand=subcommand,
208            rustflags=rustflags,
209            extra_env=extra_env,
210        )

Start a build invocation for the configured architecture.

def tool(self, name: str) -> list[str]:
212    def tool(self, name: str) -> list[str]:
213        """Start a binutils tool invocation for the configured architecture."""
214        if platform.system() != "Linux":
215            # We can't use the local tools from macOS to build a Linux executable
216            return ["bin/ci-builder", "run", "stable", name]
217        # If we're on Linux, trust that the tools are installed instead of
218        # loading the slow ci-builder. If you don't have compilation tools
219        # installed you can still run `bin/ci-builder run stable
220        # bin/mzcompose ...`, and most likely the Cargo build will already
221        # fail earlier if you don't have compilation tools installed and
222        # run without the ci-builder.
223        return [name]

Start a binutils tool invocation for the configured architecture.

def cargo_target_dir(self) -> pathlib.Path:
225    def cargo_target_dir(self) -> Path:
226        """Determine the path to the target directory for Cargo."""
227        return self.root / "target-xcompile" / xcompile.target(self.arch)

Determine the path to the target directory for Cargo.

def rewrite_builder_path_for_host(self, path: pathlib.Path) -> pathlib.Path:
229    def rewrite_builder_path_for_host(self, path: Path) -> Path:
230        """Rewrite a path that is relative to the target directory inside the
231        builder to a path that is relative to the target directory on the host.
232
233        If path does is not relative to the target directory inside the builder,
234        it is returned unchanged.
235        """
236        builder_target_dir = Path("/mnt/build") / xcompile.target(self.arch)
237        try:
238            return self.cargo_target_dir() / path.relative_to(builder_target_dir)
239        except ValueError:
240            return path

Rewrite a path that is relative to the target directory inside the builder to a path that is relative to the target directory on the host.

If path does is not relative to the target directory inside the builder, it is returned unchanged.

@cache
def docker_images() -> frozenset[str]:
243@cache
244def docker_images() -> frozenset[str]:
245    """List the Docker images available on the local machine."""
246    return frozenset(
247        spawn.capture(["docker", "images", "--format", "{{.Repository}}:{{.Tag}}"])
248        .strip()
249        .split("\n")
250    )

List the Docker images available on the local machine.

KNOWN_DOCKER_IMAGES_FILE = PosixPath('/var/lib/buildkite-agent/builds/buildkite-15f2293-i-0cb7253b04a60f4c6-1/materialize/deploy/known-docker-images.txt')
def is_docker_image_pushed(name: str) -> bool:
258def is_docker_image_pushed(name: str) -> bool:
259    """Check whether the named image is pushed to Docker Hub.
260
261    Note that this operation requires a rather slow network request.
262    """
263    global _known_docker_images
264
265    if _known_docker_images is None:
266        with _known_docker_images_lock:
267            if not KNOWN_DOCKER_IMAGES_FILE.exists():
268                _known_docker_images = set()
269            else:
270                with KNOWN_DOCKER_IMAGES_FILE.open() as f:
271                    _known_docker_images = set(line.strip() for line in f)
272
273    if name in _known_docker_images:
274        return True
275
276    if ":" not in name:
277        image, tag = name, "latest"
278    else:
279        image, tag = name.rsplit(":", 1)
280
281    dockerhub_username = os.getenv("DOCKERHUB_USERNAME")
282    dockerhub_token = os.getenv("DOCKERHUB_ACCESS_TOKEN")
283
284    exists: bool = False
285
286    try:
287        if dockerhub_username and dockerhub_token:
288            response = requests.head(
289                f"https://registry-1.docker.io/v2/{image}/manifests/{tag}",
290                headers={
291                    "Accept": "application/vnd.docker.distribution.manifest.v2+json",
292                },
293                auth=HTTPBasicAuth(dockerhub_username, dockerhub_token),
294            )
295        else:
296            token = requests.get(
297                "https://auth.docker.io/token",
298                params={
299                    "service": "registry.docker.io",
300                    "scope": f"repository:{image}:pull",
301                },
302            ).json()["token"]
303            response = requests.head(
304                f"https://registry-1.docker.io/v2/{image}/manifests/{tag}",
305                headers={
306                    "Accept": "application/vnd.docker.distribution.manifest.v2+json",
307                    "Authorization": f"Bearer {token}",
308                },
309            )
310
311        if response.status_code in (401, 429, 500, 502, 503, 504):
312            # Fall back to 5x slower method
313            proc = subprocess.run(
314                ["docker", "manifest", "inspect", name],
315                stdout=subprocess.DEVNULL,
316                stderr=subprocess.DEVNULL,
317                env=dict(os.environ, DOCKER_CLI_EXPERIMENTAL="enabled"),
318            )
319            exists = proc.returncode == 0
320        else:
321            exists = response.status_code == 200
322
323    except Exception as e:
324        print(f"Error checking Docker image: {e}")
325        return False
326
327    if exists:
328        with _known_docker_images_lock:
329            _known_docker_images.add(name)
330            with KNOWN_DOCKER_IMAGES_FILE.open("a") as f:
331                print(name, file=f)
332
333    return exists

Check whether the named image is pushed to Docker Hub.

Note that this operation requires a rather slow network request.

def is_ghcr_image_pushed(name: str) -> bool:
336def is_ghcr_image_pushed(name: str) -> bool:
337    global _known_docker_images
338
339    if _known_docker_images is None:
340        with _known_docker_images_lock:
341            if not KNOWN_DOCKER_IMAGES_FILE.exists():
342                _known_docker_images = set()
343            else:
344                with KNOWN_DOCKER_IMAGES_FILE.open() as f:
345                    _known_docker_images = set(line.strip() for line in f)
346
347    name_without_ghcr = name.removeprefix("ghcr.io/")
348    if name in _known_docker_images:
349        return True
350
351    if ":" not in name_without_ghcr:
352        image, tag = name_without_ghcr, "latest"
353    else:
354        image, tag = name_without_ghcr.rsplit(":", 1)
355
356    exists: bool = False
357
358    try:
359        token = requests.get(
360            "https://ghcr.io/token",
361            params={
362                "scope": f"repository:{image}:pull",
363            },
364        ).json()["token"]
365        response = requests.head(
366            f"https://ghcr.io/v2/{image}/manifests/{tag}",
367            headers={"Authorization": f"Bearer {token}"},
368        )
369
370        if response.status_code in (401, 429, 500, 502, 503, 504):
371            # Fall back to 5x slower method
372            proc = subprocess.run(
373                ["docker", "manifest", "inspect", name],
374                stdout=subprocess.DEVNULL,
375                stderr=subprocess.DEVNULL,
376                env=dict(os.environ, DOCKER_CLI_EXPERIMENTAL="enabled"),
377            )
378            exists = proc.returncode == 0
379        else:
380            exists = response.status_code == 200
381
382    except Exception as e:
383        print(f"Error checking Docker image: {e}")
384        return False
385
386    if exists:
387        with _known_docker_images_lock:
388            _known_docker_images.add(name)
389            with KNOWN_DOCKER_IMAGES_FILE.open("a") as f:
390                print(name, file=f)
391
392    return exists
def chmod_x(path: pathlib.Path) -> None:
395def chmod_x(path: Path) -> None:
396    """Set the executable bit on a file or directory."""
397    # https://stackoverflow.com/a/30463972/1122351
398    mode = os.stat(path).st_mode
399    mode |= (mode & 0o444) >> 2  # copy R bits to X
400    os.chmod(path, mode)

Set the executable bit on a file or directory.

class PreImage:
403class PreImage:
404    """An action to run before building a Docker image.
405
406    Args:
407        rd: The `RepositoryDetails` for the repository.
408        path: The path to the `Image` associated with this action.
409    """
410
411    def __init__(self, rd: RepositoryDetails, path: Path):
412        self.rd = rd
413        self.path = path
414
415    @classmethod
416    def prepare_batch(cls, instances: list["PreImage"]) -> Any:
417        """Prepare a batch of actions.
418
419        This is useful for `PreImage` actions that are more efficient when
420        their actions are applied to several images in bulk.
421
422        Returns an arbitrary output that is passed to `PreImage.run`.
423        """
424        pass
425
426    def run(self, prep: Any) -> None:
427        """Perform the action.
428
429        Args:
430            prep: Any prep work returned by `prepare_batch`.
431        """
432        pass
433
434    def inputs(self) -> set[str]:
435        """Return the files which are considered inputs to the action."""
436        raise NotImplementedError
437
438    def extra(self) -> str:
439        """Returns additional data for incorporation in the fingerprint."""
440        return ""

An action to run before building a Docker image.

Args: rd: The RepositoryDetails for the repository. path: The path to the Image associated with this action.

PreImage( rd: RepositoryDetails, path: pathlib.Path)
411    def __init__(self, rd: RepositoryDetails, path: Path):
412        self.rd = rd
413        self.path = path
rd
path
@classmethod
def prepare_batch(cls, instances: list[PreImage]) -> Any:
415    @classmethod
416    def prepare_batch(cls, instances: list["PreImage"]) -> Any:
417        """Prepare a batch of actions.
418
419        This is useful for `PreImage` actions that are more efficient when
420        their actions are applied to several images in bulk.
421
422        Returns an arbitrary output that is passed to `PreImage.run`.
423        """
424        pass

Prepare a batch of actions.

This is useful for PreImage actions that are more efficient when their actions are applied to several images in bulk.

Returns an arbitrary output that is passed to PreImage.run.

def run(self, prep: Any) -> None:
426    def run(self, prep: Any) -> None:
427        """Perform the action.
428
429        Args:
430            prep: Any prep work returned by `prepare_batch`.
431        """
432        pass

Perform the action.

Args: prep: Any prep work returned by prepare_batch.

def inputs(self) -> set[str]:
434    def inputs(self) -> set[str]:
435        """Return the files which are considered inputs to the action."""
436        raise NotImplementedError

Return the files which are considered inputs to the action.

def extra(self) -> str:
438    def extra(self) -> str:
439        """Returns additional data for incorporation in the fingerprint."""
440        return ""

Returns additional data for incorporation in the fingerprint.

class Copy(PreImage):
443class Copy(PreImage):
444    """A `PreImage` action which copies files from a directory.
445
446    See doc/developer/mzbuild.md for an explanation of the user-facing
447    parameters.
448    """
449
450    def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]):
451        super().__init__(rd, path)
452
453        self.source = config.pop("source", None)
454        if self.source is None:
455            raise ValueError("mzbuild config is missing 'source' argument")
456
457        self.destination = config.pop("destination", None)
458        if self.destination is None:
459            raise ValueError("mzbuild config is missing 'destination' argument")
460
461        self.matching = config.pop("matching", "*")
462
463    def run(self, prep: Any) -> None:
464        super().run(prep)
465        for src in self.inputs():
466            dst = self.path / self.destination / src
467            dst.parent.mkdir(parents=True, exist_ok=True)
468            shutil.copy(self.rd.root / self.source / src, dst)
469
470    def inputs(self) -> set[str]:
471        return set(git.expand_globs(self.rd.root / self.source, self.matching))

A PreImage action which copies files from a directory.

See doc/developer/mzbuild.md for an explanation of the user-facing parameters.

Copy( rd: RepositoryDetails, path: pathlib.Path, config: dict[str, typing.Any])
450    def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]):
451        super().__init__(rd, path)
452
453        self.source = config.pop("source", None)
454        if self.source is None:
455            raise ValueError("mzbuild config is missing 'source' argument")
456
457        self.destination = config.pop("destination", None)
458        if self.destination is None:
459            raise ValueError("mzbuild config is missing 'destination' argument")
460
461        self.matching = config.pop("matching", "*")
source
destination
matching
def run(self, prep: Any) -> None:
463    def run(self, prep: Any) -> None:
464        super().run(prep)
465        for src in self.inputs():
466            dst = self.path / self.destination / src
467            dst.parent.mkdir(parents=True, exist_ok=True)
468            shutil.copy(self.rd.root / self.source / src, dst)

Perform the action.

Args: prep: Any prep work returned by prepare_batch.

def inputs(self) -> set[str]:
470    def inputs(self) -> set[str]:
471        return set(git.expand_globs(self.rd.root / self.source, self.matching))

Return the files which are considered inputs to the action.

Inherited Members
PreImage
rd
path
prepare_batch
extra
class CargoPreImage(PreImage):
474class CargoPreImage(PreImage):
475    """A `PreImage` action that uses Cargo."""
476
477    @staticmethod
478    @cache
479    def _cargo_shared_inputs() -> frozenset[str]:
480        """Resolve shared Cargo inputs once and cache the result.
481
482        This expands the 'ci/builder' directory glob and filters out
483        non-existent files like '.cargo/config', avoiding repeated
484        git subprocess calls in fingerprint().
485        """
486        inputs: set[str] = set()
487        inputs |= git.expand_globs(Path("."), "ci/builder/**")
488        inputs.add("Cargo.toml")
489        inputs.add("Cargo.lock")
490        if Path(".cargo/config").exists():
491            inputs.add(".cargo/config")
492        return frozenset(inputs)
493
494    def inputs(self) -> set[str]:
495        return set(CargoPreImage._cargo_shared_inputs())
496
497    def extra(self) -> str:
498        # Cargo images depend on the release mode and whether
499        # coverage/sanitizer is enabled.
500        flags: list[str] = []
501        if self.rd.profile == Profile.RELEASE:
502            flags += "release"
503        if self.rd.profile == Profile.OPTIMIZED:
504            flags += "optimized"
505        if self.rd.coverage:
506            flags += "coverage"
507        if self.rd.sanitizer != Sanitizer.none:
508            flags += self.rd.sanitizer.value
509        flags.sort()
510        return ",".join(flags)

A PreImage action that uses Cargo.

def inputs(self) -> set[str]:
494    def inputs(self) -> set[str]:
495        return set(CargoPreImage._cargo_shared_inputs())

Return the files which are considered inputs to the action.

def extra(self) -> str:
497    def extra(self) -> str:
498        # Cargo images depend on the release mode and whether
499        # coverage/sanitizer is enabled.
500        flags: list[str] = []
501        if self.rd.profile == Profile.RELEASE:
502            flags += "release"
503        if self.rd.profile == Profile.OPTIMIZED:
504            flags += "optimized"
505        if self.rd.coverage:
506            flags += "coverage"
507        if self.rd.sanitizer != Sanitizer.none:
508            flags += self.rd.sanitizer.value
509        flags.sort()
510        return ",".join(flags)

Returns additional data for incorporation in the fingerprint.

class CargoBuild(CargoPreImage):
513class CargoBuild(CargoPreImage):
514    """A `PreImage` action that builds a single binary with Cargo.
515
516    See doc/developer/mzbuild.md for an explanation of the user-facing
517    parameters.
518    """
519
520    def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]):
521        super().__init__(rd, path)
522        bin = config.pop("bin", [])
523        self.bins = bin if isinstance(bin, list) else [bin]
524        example = config.pop("example", [])
525        self.examples = example if isinstance(example, list) else [example]
526        self.strip = config.pop("strip", True)
527        self.extract = config.pop("extract", {})
528        features = config.pop("features", [])
529        self.features = features if isinstance(features, list) else [features]
530
531        if len(self.bins) == 0 and len(self.examples) == 0:
532            raise ValueError("mzbuild config is missing pre-build target")
533
534    @staticmethod
535    def generate_cargo_build_command(
536        rd: RepositoryDetails,
537        bins: list[str],
538        examples: list[str],
539        features: list[str] | None = None,
540    ) -> list[str]:
541        rustflags = (
542            rustc_flags.coverage
543            if rd.coverage
544            else (
545                rustc_flags.sanitizer[rd.sanitizer]
546                if rd.sanitizer != Sanitizer.none
547                else ["--cfg=tokio_unstable"]
548            )
549        )
550        cflags = (
551            [
552                f"--target={target(rd.arch)}",
553                f"--gcc-toolchain=/opt/x-tools/{target(rd.arch)}/",
554                "-fuse-ld=lld",
555                f"--sysroot=/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/sysroot",
556                f"-L/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/lib64",
557            ]
558            + rustc_flags.sanitizer_cflags[rd.sanitizer]
559            if rd.sanitizer != Sanitizer.none
560            else []
561        )
562        extra_env = (
563            {
564                "CFLAGS": " ".join(cflags),
565                "CXXFLAGS": " ".join(cflags),
566                "LDFLAGS": " ".join(cflags),
567                "CXXSTDLIB": "stdc++",
568                "CC": "cc",
569                "CXX": "c++",
570                "CPP": "clang-cpp-18",
571                "CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER": "cc",
572                "CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER": "cc",
573                "PATH": f"/sanshim:/opt/x-tools/{target(rd.arch)}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
574                "TSAN_OPTIONS": "report_bugs=0",  # build-scripts fail
575            }
576            if rd.sanitizer != Sanitizer.none
577            else {}
578        )
579
580        cargo_build = rd.build(
581            "build", channel=None, rustflags=rustflags, extra_env=extra_env
582        )
583
584        packages = set()
585        for bin in bins:
586            cargo_build.extend(["--bin", bin])
587            packages.add(rd.cargo_workspace.crate_for_bin(bin).name)
588        for example in examples:
589            cargo_build.extend(["--example", example])
590            packages.add(rd.cargo_workspace.crate_for_example(example).name)
591        cargo_build.extend(f"--package={p}" for p in packages)
592
593        if rd.profile == Profile.RELEASE:
594            cargo_build.append("--release")
595        if rd.profile == Profile.OPTIMIZED:
596            cargo_build.extend(["--profile", "optimized"])
597        if rd.sanitizer != Sanitizer.none:
598            # ASan doesn't work with jemalloc
599            cargo_build.append("--no-default-features")
600            # Uses more memory, so reduce the number of jobs
601            cargo_build.extend(
602                ["--jobs", str(round(multiprocessing.cpu_count() * 2 / 3))]
603            )
604        if features:
605            cargo_build.append(f"--features={','.join(features)}")
606
607        return cargo_build
608
609    @classmethod
610    def prepare_batch(cls, cargo_builds: list["PreImage"]) -> dict[str, Any]:
611        super().prepare_batch(cargo_builds)
612
613        if not cargo_builds:
614            return {}
615
616        # Building all binaries and examples in the same `cargo build` command
617        # allows Cargo to link in parallel with other work, which can
618        # meaningfully speed up builds.
619
620        rd: RepositoryDetails | None = None
621        builds = cast(list[CargoBuild], cargo_builds)
622        bins = set()
623        examples = set()
624        features = set()
625        for build in builds:
626            if not rd:
627                rd = build.rd
628            bins.update(build.bins)
629            examples.update(build.examples)
630            features.update(build.features)
631        assert rd
632
633        ui.section(f"Common build for: {', '.join(bins | examples)}")
634
635        cargo_build = cls.generate_cargo_build_command(
636            rd, list(bins), list(examples), list(features) if features else None
637        )
638
639        run_and_detect_rust_incremental_build_failure(cargo_build, cwd=rd.root)
640
641        # Re-run with JSON-formatted messages and capture the output so we can
642        # later analyze the build artifacts in `run`. This should be nearly
643        # instantaneous since we just compiled above with the same crates and
644        # features. (We don't want to do the compile above with JSON-formatted
645        # messages because it wouldn't be human readable.)
646        json_output = spawn.capture(
647            cargo_build + ["--message-format=json"],
648            cwd=rd.root,
649        )
650        prep = {"cargo": json_output}
651
652        return prep
653
654    def build(self, build_output: dict[str, Any]) -> None:
655        cargo_profile = (
656            "release"
657            if self.rd.profile == Profile.RELEASE
658            else "optimized" if self.rd.profile == Profile.OPTIMIZED else "debug"
659        )
660
661        def copy(src: Path, relative_dst: Path) -> None:
662            exe_path = self.path / relative_dst
663            exe_path.parent.mkdir(parents=True, exist_ok=True)
664            shutil.copy(src, exe_path)
665
666            if self.strip:
667                # The debug information is large enough that it slows down CI,
668                # since we're packaging these binaries up into Docker images and
669                # shipping them around.
670                spawn.runv(
671                    [*self.rd.tool("strip"), "--strip-debug", exe_path],
672                    cwd=self.rd.root,
673                )
674            else:
675                # Even if we've been asked not to strip the binary, remove the
676                # `.debug_pubnames` and `.debug_pubtypes` sections. These are just
677                # indexes that speed up launching a debugger against the binary,
678                # and we're happy to have slower debugger start up in exchange for
679                # smaller binaries. Plus the sections have been obsoleted by a
680                # `.debug_names` section in DWARF 5, and so debugger support for
681                # `.debug_pubnames`/`.debug_pubtypes` is minimal anyway.
682                # See: https://github.com/rust-lang/rust/issues/46034
683                spawn.runv(
684                    [
685                        *self.rd.tool("objcopy"),
686                        "-R",
687                        ".debug_pubnames",
688                        "-R",
689                        ".debug_pubtypes",
690                        exe_path,
691                    ],
692                    cwd=self.rd.root,
693                )
694
695        for bin in self.bins:
696            src_path = self.rd.cargo_target_dir() / cargo_profile / bin
697            copy(src_path, bin)
698        for example in self.examples:
699            src_path = (
700                self.rd.cargo_target_dir() / cargo_profile / Path("examples") / example
701            )
702            copy(src_path, Path("examples") / example)
703
704        if self.extract:
705            cargo_build_json_output = build_output["cargo"]
706
707            target_dir = self.rd.cargo_target_dir()
708            for line in cargo_build_json_output.split("\n"):
709                if line.strip() == "" or not line.startswith("{"):
710                    continue
711                message = json.loads(line)
712                if message["reason"] != "build-script-executed":
713                    continue
714                out_dir = self.rd.rewrite_builder_path_for_host(
715                    Path(message["out_dir"])
716                )
717                if not out_dir.is_relative_to(target_dir):
718                    # Some crates are built for both the host and the target.
719                    # Ignore the built-for-host out dir.
720                    continue
721                # parse the package name from a package_id that looks like one of:
722                # git+https://github.com/MaterializeInc/rust-server-sdk#launchdarkly-server-sdk@1.0.0
723                # path+file:///Users/roshan/materialize/src/catalog#mz-catalog@0.0.0
724                # registry+https://github.com/rust-lang/crates.io-index#num-rational@0.4.0
725                # file:///path/to/my-package#0.1.0
726                package_id = message["package_id"]
727                if "@" in package_id:
728                    package = package_id.split("@")[0].split("#")[-1]
729                else:
730                    package = message["package_id"].split("#")[0].split("/")[-1]
731                for src, dst in self.extract.get(package, {}).items():
732                    spawn.runv(["cp", "-R", out_dir / src, self.path / dst])
733
734        self.acquired = True
735
736    def run(self, prep: dict[str, Any]) -> None:
737        super().run(prep)
738        self.build(prep)
739
740    @cache
741    def inputs(self) -> set[str]:
742        deps = set()
743
744        for bin in self.bins:
745            crate = self.rd.cargo_workspace.crate_for_bin(bin)
746            deps |= self.rd.cargo_workspace.transitive_path_dependencies(crate)
747        for example in self.examples:
748            crate = self.rd.cargo_workspace.crate_for_example(example)
749            deps |= self.rd.cargo_workspace.transitive_path_dependencies(
750                crate, dev=True
751            )
752
753        inputs = super().inputs() | set(inp for dep in deps for inp in dep.inputs())
754        return inputs

A PreImage action that builds a single binary with Cargo.

See doc/developer/mzbuild.md for an explanation of the user-facing parameters.

CargoBuild( rd: RepositoryDetails, path: pathlib.Path, config: dict[str, typing.Any])
520    def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]):
521        super().__init__(rd, path)
522        bin = config.pop("bin", [])
523        self.bins = bin if isinstance(bin, list) else [bin]
524        example = config.pop("example", [])
525        self.examples = example if isinstance(example, list) else [example]
526        self.strip = config.pop("strip", True)
527        self.extract = config.pop("extract", {})
528        features = config.pop("features", [])
529        self.features = features if isinstance(features, list) else [features]
530
531        if len(self.bins) == 0 and len(self.examples) == 0:
532            raise ValueError("mzbuild config is missing pre-build target")
bins
examples
strip
extract
features
@staticmethod
def generate_cargo_build_command( rd: RepositoryDetails, bins: list[str], examples: list[str], features: list[str] | None = None) -> list[str]:
534    @staticmethod
535    def generate_cargo_build_command(
536        rd: RepositoryDetails,
537        bins: list[str],
538        examples: list[str],
539        features: list[str] | None = None,
540    ) -> list[str]:
541        rustflags = (
542            rustc_flags.coverage
543            if rd.coverage
544            else (
545                rustc_flags.sanitizer[rd.sanitizer]
546                if rd.sanitizer != Sanitizer.none
547                else ["--cfg=tokio_unstable"]
548            )
549        )
550        cflags = (
551            [
552                f"--target={target(rd.arch)}",
553                f"--gcc-toolchain=/opt/x-tools/{target(rd.arch)}/",
554                "-fuse-ld=lld",
555                f"--sysroot=/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/sysroot",
556                f"-L/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/lib64",
557            ]
558            + rustc_flags.sanitizer_cflags[rd.sanitizer]
559            if rd.sanitizer != Sanitizer.none
560            else []
561        )
562        extra_env = (
563            {
564                "CFLAGS": " ".join(cflags),
565                "CXXFLAGS": " ".join(cflags),
566                "LDFLAGS": " ".join(cflags),
567                "CXXSTDLIB": "stdc++",
568                "CC": "cc",
569                "CXX": "c++",
570                "CPP": "clang-cpp-18",
571                "CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER": "cc",
572                "CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER": "cc",
573                "PATH": f"/sanshim:/opt/x-tools/{target(rd.arch)}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
574                "TSAN_OPTIONS": "report_bugs=0",  # build-scripts fail
575            }
576            if rd.sanitizer != Sanitizer.none
577            else {}
578        )
579
580        cargo_build = rd.build(
581            "build", channel=None, rustflags=rustflags, extra_env=extra_env
582        )
583
584        packages = set()
585        for bin in bins:
586            cargo_build.extend(["--bin", bin])
587            packages.add(rd.cargo_workspace.crate_for_bin(bin).name)
588        for example in examples:
589            cargo_build.extend(["--example", example])
590            packages.add(rd.cargo_workspace.crate_for_example(example).name)
591        cargo_build.extend(f"--package={p}" for p in packages)
592
593        if rd.profile == Profile.RELEASE:
594            cargo_build.append("--release")
595        if rd.profile == Profile.OPTIMIZED:
596            cargo_build.extend(["--profile", "optimized"])
597        if rd.sanitizer != Sanitizer.none:
598            # ASan doesn't work with jemalloc
599            cargo_build.append("--no-default-features")
600            # Uses more memory, so reduce the number of jobs
601            cargo_build.extend(
602                ["--jobs", str(round(multiprocessing.cpu_count() * 2 / 3))]
603            )
604        if features:
605            cargo_build.append(f"--features={','.join(features)}")
606
607        return cargo_build
@classmethod
def prepare_batch( cls, cargo_builds: list[PreImage]) -> dict[str, typing.Any]:
609    @classmethod
610    def prepare_batch(cls, cargo_builds: list["PreImage"]) -> dict[str, Any]:
611        super().prepare_batch(cargo_builds)
612
613        if not cargo_builds:
614            return {}
615
616        # Building all binaries and examples in the same `cargo build` command
617        # allows Cargo to link in parallel with other work, which can
618        # meaningfully speed up builds.
619
620        rd: RepositoryDetails | None = None
621        builds = cast(list[CargoBuild], cargo_builds)
622        bins = set()
623        examples = set()
624        features = set()
625        for build in builds:
626            if not rd:
627                rd = build.rd
628            bins.update(build.bins)
629            examples.update(build.examples)
630            features.update(build.features)
631        assert rd
632
633        ui.section(f"Common build for: {', '.join(bins | examples)}")
634
635        cargo_build = cls.generate_cargo_build_command(
636            rd, list(bins), list(examples), list(features) if features else None
637        )
638
639        run_and_detect_rust_incremental_build_failure(cargo_build, cwd=rd.root)
640
641        # Re-run with JSON-formatted messages and capture the output so we can
642        # later analyze the build artifacts in `run`. This should be nearly
643        # instantaneous since we just compiled above with the same crates and
644        # features. (We don't want to do the compile above with JSON-formatted
645        # messages because it wouldn't be human readable.)
646        json_output = spawn.capture(
647            cargo_build + ["--message-format=json"],
648            cwd=rd.root,
649        )
650        prep = {"cargo": json_output}
651
652        return prep

Prepare a batch of actions.

This is useful for PreImage actions that are more efficient when their actions are applied to several images in bulk.

Returns an arbitrary output that is passed to PreImage.run.

def build(self, build_output: dict[str, typing.Any]) -> None:
654    def build(self, build_output: dict[str, Any]) -> None:
655        cargo_profile = (
656            "release"
657            if self.rd.profile == Profile.RELEASE
658            else "optimized" if self.rd.profile == Profile.OPTIMIZED else "debug"
659        )
660
661        def copy(src: Path, relative_dst: Path) -> None:
662            exe_path = self.path / relative_dst
663            exe_path.parent.mkdir(parents=True, exist_ok=True)
664            shutil.copy(src, exe_path)
665
666            if self.strip:
667                # The debug information is large enough that it slows down CI,
668                # since we're packaging these binaries up into Docker images and
669                # shipping them around.
670                spawn.runv(
671                    [*self.rd.tool("strip"), "--strip-debug", exe_path],
672                    cwd=self.rd.root,
673                )
674            else:
675                # Even if we've been asked not to strip the binary, remove the
676                # `.debug_pubnames` and `.debug_pubtypes` sections. These are just
677                # indexes that speed up launching a debugger against the binary,
678                # and we're happy to have slower debugger start up in exchange for
679                # smaller binaries. Plus the sections have been obsoleted by a
680                # `.debug_names` section in DWARF 5, and so debugger support for
681                # `.debug_pubnames`/`.debug_pubtypes` is minimal anyway.
682                # See: https://github.com/rust-lang/rust/issues/46034
683                spawn.runv(
684                    [
685                        *self.rd.tool("objcopy"),
686                        "-R",
687                        ".debug_pubnames",
688                        "-R",
689                        ".debug_pubtypes",
690                        exe_path,
691                    ],
692                    cwd=self.rd.root,
693                )
694
695        for bin in self.bins:
696            src_path = self.rd.cargo_target_dir() / cargo_profile / bin
697            copy(src_path, bin)
698        for example in self.examples:
699            src_path = (
700                self.rd.cargo_target_dir() / cargo_profile / Path("examples") / example
701            )
702            copy(src_path, Path("examples") / example)
703
704        if self.extract:
705            cargo_build_json_output = build_output["cargo"]
706
707            target_dir = self.rd.cargo_target_dir()
708            for line in cargo_build_json_output.split("\n"):
709                if line.strip() == "" or not line.startswith("{"):
710                    continue
711                message = json.loads(line)
712                if message["reason"] != "build-script-executed":
713                    continue
714                out_dir = self.rd.rewrite_builder_path_for_host(
715                    Path(message["out_dir"])
716                )
717                if not out_dir.is_relative_to(target_dir):
718                    # Some crates are built for both the host and the target.
719                    # Ignore the built-for-host out dir.
720                    continue
721                # parse the package name from a package_id that looks like one of:
722                # git+https://github.com/MaterializeInc/rust-server-sdk#launchdarkly-server-sdk@1.0.0
723                # path+file:///Users/roshan/materialize/src/catalog#mz-catalog@0.0.0
724                # registry+https://github.com/rust-lang/crates.io-index#num-rational@0.4.0
725                # file:///path/to/my-package#0.1.0
726                package_id = message["package_id"]
727                if "@" in package_id:
728                    package = package_id.split("@")[0].split("#")[-1]
729                else:
730                    package = message["package_id"].split("#")[0].split("/")[-1]
731                for src, dst in self.extract.get(package, {}).items():
732                    spawn.runv(["cp", "-R", out_dir / src, self.path / dst])
733
734        self.acquired = True
def run(self, prep: dict[str, typing.Any]) -> None:
736    def run(self, prep: dict[str, Any]) -> None:
737        super().run(prep)
738        self.build(prep)

Perform the action.

Args: prep: Any prep work returned by prepare_batch.

@cache
def inputs(self) -> set[str]:
740    @cache
741    def inputs(self) -> set[str]:
742        deps = set()
743
744        for bin in self.bins:
745            crate = self.rd.cargo_workspace.crate_for_bin(bin)
746            deps |= self.rd.cargo_workspace.transitive_path_dependencies(crate)
747        for example in self.examples:
748            crate = self.rd.cargo_workspace.crate_for_example(example)
749            deps |= self.rd.cargo_workspace.transitive_path_dependencies(
750                crate, dev=True
751            )
752
753        inputs = super().inputs() | set(inp for dep in deps for inp in dep.inputs())
754        return inputs

Return the files which are considered inputs to the action.

Inherited Members
CargoPreImage
extra
PreImage
rd
path
class Image:
757class Image:
758    """A Docker image whose build and dependencies are managed by mzbuild.
759
760    An image corresponds to a directory in a repository that contains a
761    `mzbuild.yml` file. This directory is called an "mzbuild context."
762
763    Attributes:
764        name: The name of the image.
765        publish: Whether the image should be pushed to Docker Hub.
766        depends_on: The names of the images upon which this image depends.
767        root: The path to the root of the associated `Repository`.
768        path: The path to the directory containing the `mzbuild.yml`
769            configuration file.
770        pre_images: Optional actions to perform before running `docker build`.
771        build_args: An optional list of --build-arg to pass to the dockerfile
772    """
773
774    _DOCKERFILE_MZFROM_RE = re.compile(rb"^MZFROM\s*(\S+)")
775
776    _context_files_cache: set[str] | None
777
778    def __init__(self, rd: RepositoryDetails, path: Path):
779        self.rd = rd
780        self.path = path
781        self._context_files_cache = None
782        self.pre_images: list[PreImage] = []
783        with open(self.path / "mzbuild.yml") as f:
784            data = yaml.safe_load(f)
785            self.name: str = data.pop("name")
786            self.publish: bool = data.pop("publish", True)
787            self.description: str | None = data.pop("description", None)
788            self.mainline: bool = data.pop("mainline", True)
789            for pre_image in data.pop("pre-image", []):
790                typ = pre_image.pop("type", None)
791                if typ == "cargo-build":
792                    self.pre_images.append(CargoBuild(self.rd, self.path, pre_image))
793                elif typ == "copy":
794                    self.pre_images.append(Copy(self.rd, self.path, pre_image))
795                else:
796                    raise ValueError(
797                        f"mzbuild config in {self.path} has unknown pre-image type"
798                    )
799            self.build_args = data.pop("build-args", {})
800
801        if re.search(r"[^A-Za-z0-9\-]", self.name):
802            raise ValueError(
803                f"mzbuild image name {self.name} contains invalid character; only alphanumerics and hyphens allowed"
804            )
805
806        self.depends_on: list[str] = []
807        with open(self.path / "Dockerfile", "rb") as f:
808            for line in f:
809                match = self._DOCKERFILE_MZFROM_RE.match(line)
810                if match:
811                    self.depends_on.append(match.group(1).decode())
812
813    def sync_description(self) -> None:
814        """Sync the description to Docker Hub if the image is publishable
815        and a README.md file exists."""
816
817        if not self.publish:
818            ui.say(f"{self.name} is not publishable")
819            return
820
821        readme_path = self.path / "README.md"
822        has_readme = readme_path.exists()
823        if not has_readme:
824            ui.say(f"{self.name} has no README.md or description")
825            return
826
827        docker_config = os.getenv("DOCKER_CONFIG")
828        spawn.runv(
829            [
830                "docker",
831                "pushrm",
832                f"--file={readme_path}",
833                *([f"--config={docker_config}/config.json"] if docker_config else []),
834                *([f"--short={self.description}"] if self.description else []),
835                self.docker_name(),
836            ]
837        )
838
839    def docker_name(self, tag: str | None = None) -> str:
840        """Return the name of the image on Docker Hub at the given tag."""
841        name = f"{self.rd.image_registry}/{self.rd.image_prefix}{self.name}"
842        if tag:
843            name += f":{tag}"
844        return name

A Docker image whose build and dependencies are managed by mzbuild.

An image corresponds to a directory in a repository that contains a mzbuild.yml file. This directory is called an "mzbuild context."

Attributes: name: The name of the image. publish: Whether the image should be pushed to Docker Hub. depends_on: The names of the images upon which this image depends. root: The path to the root of the associated Repository. path: The path to the directory containing the mzbuild.yml configuration file. pre_images: Optional actions to perform before running docker build. build_args: An optional list of --build-arg to pass to the dockerfile

Image( rd: RepositoryDetails, path: pathlib.Path)
778    def __init__(self, rd: RepositoryDetails, path: Path):
779        self.rd = rd
780        self.path = path
781        self._context_files_cache = None
782        self.pre_images: list[PreImage] = []
783        with open(self.path / "mzbuild.yml") as f:
784            data = yaml.safe_load(f)
785            self.name: str = data.pop("name")
786            self.publish: bool = data.pop("publish", True)
787            self.description: str | None = data.pop("description", None)
788            self.mainline: bool = data.pop("mainline", True)
789            for pre_image in data.pop("pre-image", []):
790                typ = pre_image.pop("type", None)
791                if typ == "cargo-build":
792                    self.pre_images.append(CargoBuild(self.rd, self.path, pre_image))
793                elif typ == "copy":
794                    self.pre_images.append(Copy(self.rd, self.path, pre_image))
795                else:
796                    raise ValueError(
797                        f"mzbuild config in {self.path} has unknown pre-image type"
798                    )
799            self.build_args = data.pop("build-args", {})
800
801        if re.search(r"[^A-Za-z0-9\-]", self.name):
802            raise ValueError(
803                f"mzbuild image name {self.name} contains invalid character; only alphanumerics and hyphens allowed"
804            )
805
806        self.depends_on: list[str] = []
807        with open(self.path / "Dockerfile", "rb") as f:
808            for line in f:
809                match = self._DOCKERFILE_MZFROM_RE.match(line)
810                if match:
811                    self.depends_on.append(match.group(1).decode())
rd
path
pre_images: list[PreImage]
depends_on: list[str]
def sync_description(self) -> None:
813    def sync_description(self) -> None:
814        """Sync the description to Docker Hub if the image is publishable
815        and a README.md file exists."""
816
817        if not self.publish:
818            ui.say(f"{self.name} is not publishable")
819            return
820
821        readme_path = self.path / "README.md"
822        has_readme = readme_path.exists()
823        if not has_readme:
824            ui.say(f"{self.name} has no README.md or description")
825            return
826
827        docker_config = os.getenv("DOCKER_CONFIG")
828        spawn.runv(
829            [
830                "docker",
831                "pushrm",
832                f"--file={readme_path}",
833                *([f"--config={docker_config}/config.json"] if docker_config else []),
834                *([f"--short={self.description}"] if self.description else []),
835                self.docker_name(),
836            ]
837        )

Sync the description to Docker Hub if the image is publishable and a README.md file exists.

def docker_name(self, tag: str | None = None) -> str:
839    def docker_name(self, tag: str | None = None) -> str:
840        """Return the name of the image on Docker Hub at the given tag."""
841        name = f"{self.rd.image_registry}/{self.rd.image_prefix}{self.name}"
842        if tag:
843            name += f":{tag}"
844        return name

Return the name of the image on Docker Hub at the given tag.

class ResolvedImage:
 847class ResolvedImage:
 848    """An `Image` whose dependencies have been resolved.
 849
 850    Attributes:
 851        image: The underlying `Image`.
 852        acquired: Whether the image is available locally.
 853        dependencies: A mapping from dependency name to `ResolvedImage` for
 854            each of the images that `image` depends upon.
 855    """
 856
 857    def __init__(self, image: Image, dependencies: Iterable["ResolvedImage"]):
 858        self.image = image
 859        self.acquired = False
 860        self.dependencies = {}
 861        for d in dependencies:
 862            self.dependencies[d.name] = d
 863
 864    def __repr__(self) -> str:
 865        return f"ResolvedImage<{self.spec()}>"
 866
 867    @property
 868    def name(self) -> str:
 869        """The name of the underlying image."""
 870        return self.image.name
 871
 872    @property
 873    def publish(self) -> bool:
 874        """Whether the underlying image should be pushed to Docker Hub."""
 875        return self.image.publish
 876
 877    @cache
 878    def spec(self) -> str:
 879        """Return the "spec" for the image.
 880
 881        A spec is the unique identifier for the image given its current
 882        fingerprint. It is a valid Docker Hub name.
 883        """
 884        return self.image.docker_name(tag=f"mzbuild-{self.fingerprint()}")
 885
 886    def write_dockerfile(self) -> IO[bytes]:
 887        """Render the Dockerfile without mzbuild directives.
 888
 889        Returns:
 890            file: A handle to a temporary file containing the adjusted
 891                Dockerfile."""
 892        with open(self.image.path / "Dockerfile", "rb") as f:
 893            lines = f.readlines()
 894        f = TemporaryFile()
 895        for line in lines:
 896            match = Image._DOCKERFILE_MZFROM_RE.match(line)
 897            if match:
 898                image = match.group(1).decode()
 899                spec = self.dependencies[image].spec()
 900                line = Image._DOCKERFILE_MZFROM_RE.sub(b"FROM %b" % spec.encode(), line)
 901            f.write(line)
 902        f.seek(0)
 903        return f
 904
 905    def build(self, prep: dict[type[PreImage], Any], push: bool = False) -> None:
 906        """Build the image from source.
 907
 908        Requires that the caller has already acquired all dependencies and
 909        prepared all `PreImage` actions via `PreImage.prepare_batch`.
 910        """
 911        ui.section(f"Building {self.spec()}")
 912        spawn.runv(["git", "clean", "-ffdX", self.image.path])
 913
 914        for pre_image in self.image.pre_images:
 915            pre_image.run(prep[type(pre_image)])
 916        build_args = {
 917            **self.image.build_args,
 918            "BUILD_PROFILE": self.image.rd.profile.name,
 919            "ARCH_GCC": str(self.image.rd.arch),
 920            "ARCH_GO": self.image.rd.arch.go_str(),
 921            "CI_SANITIZER": str(self.image.rd.sanitizer),
 922        }
 923        f = self.write_dockerfile()
 924
 925        try:
 926            spawn.capture(["docker", "buildx", "version"])
 927        except subprocess.CalledProcessError:
 928            if push:
 929                print(
 930                    "docker buildx not found, required to push images. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing"
 931                )
 932                raise
 933            print(
 934                "docker buildx not found, you can install it to build faster. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing"
 935            )
 936            print("Falling back to docker build")
 937            cmd: Sequence[str] = [
 938                "docker",
 939                "build",
 940                "-f",
 941                "-",
 942                *(f"--build-arg={k}={v}" for k, v in build_args.items()),
 943                "-t",
 944                self.spec(),
 945                f"--platform=linux/{self.image.rd.arch.go_str()}",
 946                str(self.image.path),
 947            ]
 948        else:
 949            cmd: Sequence[str] = [
 950                "docker",
 951                "buildx",
 952                "build",
 953                "--progress=plain",  # less noisy
 954                "-f",
 955                "-",
 956                *(f"--build-arg={k}={v}" for k, v in build_args.items()),
 957                "-t",
 958                f"docker.io/{self.spec()}",
 959                "-t",
 960                f"ghcr.io/materializeinc/{self.spec()}",
 961                f"--platform=linux/{self.image.rd.arch.go_str()}",
 962                str(self.image.path),
 963                *(["--push"] if push else ["--load"]),
 964            ]
 965
 966        if token := os.getenv("GITHUB_GHCR_TOKEN"):
 967            spawn.runv(
 968                [
 969                    "docker",
 970                    "login",
 971                    "ghcr.io",
 972                    "-u",
 973                    "materialize-bot",
 974                    "--password-stdin",
 975                ],
 976                stdin=token.encode(),
 977            )
 978
 979        spawn.runv(cmd, stdin=f, stdout=sys.stderr.buffer)
 980
 981    def try_pull(self, max_retries: int) -> bool:
 982        """Download the image if it does not exist locally. Returns whether it was found."""
 983        ui.header(f"Acquiring {self.spec()}")
 984        command = ["docker", "pull"]
 985        # --quiet skips printing the progress bar, which does not display well in CI.
 986        if ui.env_is_truthy("CI"):
 987            command.append("--quiet")
 988        command.append(self.spec())
 989        if not self.acquired:
 990            sleep_time = 1
 991            for retry in range(1, max_retries + 1):
 992                try:
 993                    spawn.runv(
 994                        command,
 995                        stdin=subprocess.DEVNULL,
 996                        stdout=sys.stderr.buffer,
 997                    )
 998                    self.acquired = True
 999                    break
1000                except subprocess.CalledProcessError:
1001                    if retry < max_retries:
1002                        # There seems to be no good way to tell what error
1003                        # happened based on error code
1004                        # (https://github.com/docker/cli/issues/538) and we
1005                        # want to print output directly to terminal.
1006                        if build := os.getenv("CI_WAITING_FOR_BUILD"):
1007                            for retry in range(max_retries):
1008                                try:
1009                                    build_status = buildkite.get_build_status(build)
1010                                except subprocess.CalledProcessError:
1011                                    time.sleep(sleep_time)
1012                                    sleep_time = min(sleep_time * 2, 10)
1013                                    break
1014                                print(f"Build {build} status: {build_status}")
1015                                if build_status == "failed":
1016                                    print(
1017                                        f"Build {build} has been marked as failed, exiting hard"
1018                                    )
1019                                    sys.exit(1)
1020                                elif build_status == "success":
1021                                    break
1022                                assert (
1023                                    build_status == "pending"
1024                                ), f"Unknown build status {build_status}"
1025                                time.sleep(1)
1026                        else:
1027                            print(f"Retrying in {sleep_time}s ...")
1028                            time.sleep(sleep_time)
1029                            sleep_time = min(sleep_time * 2, 10)
1030                        continue
1031                    else:
1032                        break
1033        return self.acquired
1034
1035    def is_published_if_necessary(self) -> bool:
1036        """Report whether the image exists on DockerHub & GHCR if it is publishable."""
1037        if not self.publish:
1038            return False
1039        spec = self.spec()
1040        if spec.startswith(GHCR_PREFIX):
1041            spec = spec.removeprefix(GHCR_PREFIX)
1042        ghcr_spec = f"{GHCR_PREFIX}{spec}"
1043        if is_docker_image_pushed(spec) and is_ghcr_image_pushed(ghcr_spec):
1044            ui.say(f"{spec} already exists")
1045            return True
1046        return False
1047
1048    def run(
1049        self,
1050        args: list[str] = [],
1051        docker_args: list[str] = [],
1052        env: dict[str, str] = {},
1053    ) -> None:
1054        """Run a command in the image.
1055
1056        Creates a container from the image and runs the command described by
1057        `args` in the image.
1058        """
1059        envs = []
1060        for key, val in env.items():
1061            envs.extend(["--env", f"{key}={val}"])
1062        spawn.runv(
1063            [
1064                "docker",
1065                "run",
1066                "--tty",
1067                "--rm",
1068                *envs,
1069                "--init",
1070                *docker_args,
1071                self.spec(),
1072                *args,
1073            ],
1074        )
1075
1076    def list_dependencies(self, transitive: bool = False) -> set[str]:
1077        out = set()
1078        for dep in self.dependencies.values():
1079            out.add(dep.name)
1080            if transitive:
1081                out |= dep.list_dependencies(transitive)
1082        return out
1083
1084    @cache
1085    def inputs(self, transitive: bool = False) -> set[str]:
1086        """List the files tracked as inputs to the image.
1087
1088        These files are used to compute the fingerprint for the image. See
1089        `ResolvedImage.fingerprint` for details.
1090
1091        Returns:
1092            inputs: A list of input files, relative to the root of the
1093                repository.
1094        """
1095        if self.image._context_files_cache is not None:
1096            paths = set(self.image._context_files_cache)
1097        else:
1098            paths = set(git.expand_globs(self.image.rd.root, f"{self.image.path}/**"))
1099        if not paths:
1100            # While we could find an `mzbuild.yml` file for this service, expland_globs didn't
1101            # return any files that matched this service. At the very least, the `mzbuild.yml`
1102            # file itself should have been returned. We have a bug if paths is empty.
1103            raise AssertionError(
1104                f"{self.image.name} mzbuild exists but its files are unknown to git"
1105            )
1106        for pre_image in self.image.pre_images:
1107            paths |= pre_image.inputs()
1108        if transitive:
1109            for dep in self.dependencies.values():
1110                paths |= dep.inputs(transitive)
1111        return paths
1112
1113    @cache
1114    def fingerprint(self) -> Fingerprint:
1115        """Fingerprint the inputs to the image.
1116
1117        Compute the fingerprint of the image. Changing the contents of any of
1118        the files or adding or removing files to the image will change the
1119        fingerprint, as will modifying the inputs to any of its dependencies.
1120
1121        The image considers all non-gitignored files in its mzbuild context to
1122        be inputs. If it has a pre-image action, that action may add additional
1123        inputs via `PreImage.inputs`.
1124        """
1125        self_hash = hashlib.sha1()
1126        # When inputs come from precomputed sources (crate and image context
1127        # batching + resolved CargoPreImage paths), they are already individual
1128        # file paths from git. Skip the expensive expand_globs subprocess calls.
1129        inputs = self.inputs()
1130        if self.image._context_files_cache is not None:
1131            resolved_inputs = sorted(inputs)
1132        else:
1133            resolved_inputs = sorted(set(git.expand_globs(self.image.rd.root, *inputs)))
1134        for rel_path in resolved_inputs:
1135            abs_path = self.image.rd.root / rel_path
1136            file_hash = hashlib.sha1()
1137            raw_file_mode = os.lstat(abs_path).st_mode
1138            # Compute a simplified file mode using the same rules as Git.
1139            # https://github.com/git/git/blob/3bab5d562/Documentation/git-fast-import.txt#L610-L616
1140            if stat.S_ISLNK(raw_file_mode):
1141                file_mode = 0o120000
1142            elif raw_file_mode & stat.S_IXUSR:
1143                file_mode = 0o100755
1144            else:
1145                file_mode = 0o100644
1146            with open(abs_path, "rb") as f:
1147                file_hash.update(f.read())
1148            self_hash.update(file_mode.to_bytes(2, byteorder="big"))
1149            self_hash.update(rel_path.encode())
1150            self_hash.update(file_hash.digest())
1151            self_hash.update(b"\0")
1152
1153        for pre_image in self.image.pre_images:
1154            self_hash.update(pre_image.extra().encode())
1155            self_hash.update(b"\0")
1156
1157        self_hash.update(f"profile={self.image.rd.profile}".encode())
1158        self_hash.update(f"arch={self.image.rd.arch}".encode())
1159        self_hash.update(f"coverage={self.image.rd.coverage}".encode())
1160        self_hash.update(f"sanitizer={self.image.rd.sanitizer}".encode())
1161        # This exists to make sure all hashes from before we had a GHCR mirror are invalidated, so that we rebuild when an image doesn't exist on GHCR yet
1162        self_hash.update(b"mirror=ghcr")
1163
1164        full_hash = hashlib.sha1()
1165        full_hash.update(self_hash.digest())
1166        for dep in sorted(self.dependencies.values(), key=lambda d: d.name):
1167            full_hash.update(dep.name.encode())
1168            full_hash.update(dep.fingerprint())
1169            full_hash.update(b"\0")
1170
1171        return Fingerprint(full_hash.digest())

An Image whose dependencies have been resolved.

Attributes: image: The underlying Image. acquired: Whether the image is available locally. dependencies: A mapping from dependency name to ResolvedImage for each of the images that image depends upon.

ResolvedImage( image: Image, dependencies: Iterable[ResolvedImage])
857    def __init__(self, image: Image, dependencies: Iterable["ResolvedImage"]):
858        self.image = image
859        self.acquired = False
860        self.dependencies = {}
861        for d in dependencies:
862            self.dependencies[d.name] = d
image
acquired
dependencies
name: str
867    @property
868    def name(self) -> str:
869        """The name of the underlying image."""
870        return self.image.name

The name of the underlying image.

publish: bool
872    @property
873    def publish(self) -> bool:
874        """Whether the underlying image should be pushed to Docker Hub."""
875        return self.image.publish

Whether the underlying image should be pushed to Docker Hub.

@cache
def spec(self) -> str:
877    @cache
878    def spec(self) -> str:
879        """Return the "spec" for the image.
880
881        A spec is the unique identifier for the image given its current
882        fingerprint. It is a valid Docker Hub name.
883        """
884        return self.image.docker_name(tag=f"mzbuild-{self.fingerprint()}")

Return the "spec" for the image.

A spec is the unique identifier for the image given its current fingerprint. It is a valid Docker Hub name.

def write_dockerfile(self) -> IO[bytes]:
886    def write_dockerfile(self) -> IO[bytes]:
887        """Render the Dockerfile without mzbuild directives.
888
889        Returns:
890            file: A handle to a temporary file containing the adjusted
891                Dockerfile."""
892        with open(self.image.path / "Dockerfile", "rb") as f:
893            lines = f.readlines()
894        f = TemporaryFile()
895        for line in lines:
896            match = Image._DOCKERFILE_MZFROM_RE.match(line)
897            if match:
898                image = match.group(1).decode()
899                spec = self.dependencies[image].spec()
900                line = Image._DOCKERFILE_MZFROM_RE.sub(b"FROM %b" % spec.encode(), line)
901            f.write(line)
902        f.seek(0)
903        return f

Render the Dockerfile without mzbuild directives.

Returns: file: A handle to a temporary file containing the adjusted Dockerfile.

def build( self, prep: dict[type[PreImage], typing.Any], push: bool = False) -> None:
905    def build(self, prep: dict[type[PreImage], Any], push: bool = False) -> None:
906        """Build the image from source.
907
908        Requires that the caller has already acquired all dependencies and
909        prepared all `PreImage` actions via `PreImage.prepare_batch`.
910        """
911        ui.section(f"Building {self.spec()}")
912        spawn.runv(["git", "clean", "-ffdX", self.image.path])
913
914        for pre_image in self.image.pre_images:
915            pre_image.run(prep[type(pre_image)])
916        build_args = {
917            **self.image.build_args,
918            "BUILD_PROFILE": self.image.rd.profile.name,
919            "ARCH_GCC": str(self.image.rd.arch),
920            "ARCH_GO": self.image.rd.arch.go_str(),
921            "CI_SANITIZER": str(self.image.rd.sanitizer),
922        }
923        f = self.write_dockerfile()
924
925        try:
926            spawn.capture(["docker", "buildx", "version"])
927        except subprocess.CalledProcessError:
928            if push:
929                print(
930                    "docker buildx not found, required to push images. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing"
931                )
932                raise
933            print(
934                "docker buildx not found, you can install it to build faster. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing"
935            )
936            print("Falling back to docker build")
937            cmd: Sequence[str] = [
938                "docker",
939                "build",
940                "-f",
941                "-",
942                *(f"--build-arg={k}={v}" for k, v in build_args.items()),
943                "-t",
944                self.spec(),
945                f"--platform=linux/{self.image.rd.arch.go_str()}",
946                str(self.image.path),
947            ]
948        else:
949            cmd: Sequence[str] = [
950                "docker",
951                "buildx",
952                "build",
953                "--progress=plain",  # less noisy
954                "-f",
955                "-",
956                *(f"--build-arg={k}={v}" for k, v in build_args.items()),
957                "-t",
958                f"docker.io/{self.spec()}",
959                "-t",
960                f"ghcr.io/materializeinc/{self.spec()}",
961                f"--platform=linux/{self.image.rd.arch.go_str()}",
962                str(self.image.path),
963                *(["--push"] if push else ["--load"]),
964            ]
965
966        if token := os.getenv("GITHUB_GHCR_TOKEN"):
967            spawn.runv(
968                [
969                    "docker",
970                    "login",
971                    "ghcr.io",
972                    "-u",
973                    "materialize-bot",
974                    "--password-stdin",
975                ],
976                stdin=token.encode(),
977            )
978
979        spawn.runv(cmd, stdin=f, stdout=sys.stderr.buffer)

Build the image from source.

Requires that the caller has already acquired all dependencies and prepared all PreImage actions via PreImage.prepare_batch.

def try_pull(self, max_retries: int) -> bool:
 981    def try_pull(self, max_retries: int) -> bool:
 982        """Download the image if it does not exist locally. Returns whether it was found."""
 983        ui.header(f"Acquiring {self.spec()}")
 984        command = ["docker", "pull"]
 985        # --quiet skips printing the progress bar, which does not display well in CI.
 986        if ui.env_is_truthy("CI"):
 987            command.append("--quiet")
 988        command.append(self.spec())
 989        if not self.acquired:
 990            sleep_time = 1
 991            for retry in range(1, max_retries + 1):
 992                try:
 993                    spawn.runv(
 994                        command,
 995                        stdin=subprocess.DEVNULL,
 996                        stdout=sys.stderr.buffer,
 997                    )
 998                    self.acquired = True
 999                    break
1000                except subprocess.CalledProcessError:
1001                    if retry < max_retries:
1002                        # There seems to be no good way to tell what error
1003                        # happened based on error code
1004                        # (https://github.com/docker/cli/issues/538) and we
1005                        # want to print output directly to terminal.
1006                        if build := os.getenv("CI_WAITING_FOR_BUILD"):
1007                            for retry in range(max_retries):
1008                                try:
1009                                    build_status = buildkite.get_build_status(build)
1010                                except subprocess.CalledProcessError:
1011                                    time.sleep(sleep_time)
1012                                    sleep_time = min(sleep_time * 2, 10)
1013                                    break
1014                                print(f"Build {build} status: {build_status}")
1015                                if build_status == "failed":
1016                                    print(
1017                                        f"Build {build} has been marked as failed, exiting hard"
1018                                    )
1019                                    sys.exit(1)
1020                                elif build_status == "success":
1021                                    break
1022                                assert (
1023                                    build_status == "pending"
1024                                ), f"Unknown build status {build_status}"
1025                                time.sleep(1)
1026                        else:
1027                            print(f"Retrying in {sleep_time}s ...")
1028                            time.sleep(sleep_time)
1029                            sleep_time = min(sleep_time * 2, 10)
1030                        continue
1031                    else:
1032                        break
1033        return self.acquired

Download the image if it does not exist locally. Returns whether it was found.

def is_published_if_necessary(self) -> bool:
1035    def is_published_if_necessary(self) -> bool:
1036        """Report whether the image exists on DockerHub & GHCR if it is publishable."""
1037        if not self.publish:
1038            return False
1039        spec = self.spec()
1040        if spec.startswith(GHCR_PREFIX):
1041            spec = spec.removeprefix(GHCR_PREFIX)
1042        ghcr_spec = f"{GHCR_PREFIX}{spec}"
1043        if is_docker_image_pushed(spec) and is_ghcr_image_pushed(ghcr_spec):
1044            ui.say(f"{spec} already exists")
1045            return True
1046        return False

Report whether the image exists on DockerHub & GHCR if it is publishable.

def run( self, args: list[str] = [], docker_args: list[str] = [], env: dict[str, str] = {}) -> None:
1048    def run(
1049        self,
1050        args: list[str] = [],
1051        docker_args: list[str] = [],
1052        env: dict[str, str] = {},
1053    ) -> None:
1054        """Run a command in the image.
1055
1056        Creates a container from the image and runs the command described by
1057        `args` in the image.
1058        """
1059        envs = []
1060        for key, val in env.items():
1061            envs.extend(["--env", f"{key}={val}"])
1062        spawn.runv(
1063            [
1064                "docker",
1065                "run",
1066                "--tty",
1067                "--rm",
1068                *envs,
1069                "--init",
1070                *docker_args,
1071                self.spec(),
1072                *args,
1073            ],
1074        )

Run a command in the image.

Creates a container from the image and runs the command described by args in the image.

def list_dependencies(self, transitive: bool = False) -> set[str]:
1076    def list_dependencies(self, transitive: bool = False) -> set[str]:
1077        out = set()
1078        for dep in self.dependencies.values():
1079            out.add(dep.name)
1080            if transitive:
1081                out |= dep.list_dependencies(transitive)
1082        return out
@cache
def inputs(self, transitive: bool = False) -> set[str]:
1084    @cache
1085    def inputs(self, transitive: bool = False) -> set[str]:
1086        """List the files tracked as inputs to the image.
1087
1088        These files are used to compute the fingerprint for the image. See
1089        `ResolvedImage.fingerprint` for details.
1090
1091        Returns:
1092            inputs: A list of input files, relative to the root of the
1093                repository.
1094        """
1095        if self.image._context_files_cache is not None:
1096            paths = set(self.image._context_files_cache)
1097        else:
1098            paths = set(git.expand_globs(self.image.rd.root, f"{self.image.path}/**"))
1099        if not paths:
1100            # While we could find an `mzbuild.yml` file for this service, expland_globs didn't
1101            # return any files that matched this service. At the very least, the `mzbuild.yml`
1102            # file itself should have been returned. We have a bug if paths is empty.
1103            raise AssertionError(
1104                f"{self.image.name} mzbuild exists but its files are unknown to git"
1105            )
1106        for pre_image in self.image.pre_images:
1107            paths |= pre_image.inputs()
1108        if transitive:
1109            for dep in self.dependencies.values():
1110                paths |= dep.inputs(transitive)
1111        return paths

List the files tracked as inputs to the image.

These files are used to compute the fingerprint for the image. See ResolvedImage.fingerprint for details.

Returns: inputs: A list of input files, relative to the root of the repository.

@cache
def fingerprint(self) -> Fingerprint:
1113    @cache
1114    def fingerprint(self) -> Fingerprint:
1115        """Fingerprint the inputs to the image.
1116
1117        Compute the fingerprint of the image. Changing the contents of any of
1118        the files or adding or removing files to the image will change the
1119        fingerprint, as will modifying the inputs to any of its dependencies.
1120
1121        The image considers all non-gitignored files in its mzbuild context to
1122        be inputs. If it has a pre-image action, that action may add additional
1123        inputs via `PreImage.inputs`.
1124        """
1125        self_hash = hashlib.sha1()
1126        # When inputs come from precomputed sources (crate and image context
1127        # batching + resolved CargoPreImage paths), they are already individual
1128        # file paths from git. Skip the expensive expand_globs subprocess calls.
1129        inputs = self.inputs()
1130        if self.image._context_files_cache is not None:
1131            resolved_inputs = sorted(inputs)
1132        else:
1133            resolved_inputs = sorted(set(git.expand_globs(self.image.rd.root, *inputs)))
1134        for rel_path in resolved_inputs:
1135            abs_path = self.image.rd.root / rel_path
1136            file_hash = hashlib.sha1()
1137            raw_file_mode = os.lstat(abs_path).st_mode
1138            # Compute a simplified file mode using the same rules as Git.
1139            # https://github.com/git/git/blob/3bab5d562/Documentation/git-fast-import.txt#L610-L616
1140            if stat.S_ISLNK(raw_file_mode):
1141                file_mode = 0o120000
1142            elif raw_file_mode & stat.S_IXUSR:
1143                file_mode = 0o100755
1144            else:
1145                file_mode = 0o100644
1146            with open(abs_path, "rb") as f:
1147                file_hash.update(f.read())
1148            self_hash.update(file_mode.to_bytes(2, byteorder="big"))
1149            self_hash.update(rel_path.encode())
1150            self_hash.update(file_hash.digest())
1151            self_hash.update(b"\0")
1152
1153        for pre_image in self.image.pre_images:
1154            self_hash.update(pre_image.extra().encode())
1155            self_hash.update(b"\0")
1156
1157        self_hash.update(f"profile={self.image.rd.profile}".encode())
1158        self_hash.update(f"arch={self.image.rd.arch}".encode())
1159        self_hash.update(f"coverage={self.image.rd.coverage}".encode())
1160        self_hash.update(f"sanitizer={self.image.rd.sanitizer}".encode())
1161        # This exists to make sure all hashes from before we had a GHCR mirror are invalidated, so that we rebuild when an image doesn't exist on GHCR yet
1162        self_hash.update(b"mirror=ghcr")
1163
1164        full_hash = hashlib.sha1()
1165        full_hash.update(self_hash.digest())
1166        for dep in sorted(self.dependencies.values(), key=lambda d: d.name):
1167            full_hash.update(dep.name.encode())
1168            full_hash.update(dep.fingerprint())
1169            full_hash.update(b"\0")
1170
1171        return Fingerprint(full_hash.digest())

Fingerprint the inputs to the image.

Compute the fingerprint of the image. Changing the contents of any of the files or adding or removing files to the image will change the fingerprint, as will modifying the inputs to any of its dependencies.

The image considers all non-gitignored files in its mzbuild context to be inputs. If it has a pre-image action, that action may add additional inputs via PreImage.inputs.

class DependencySet:
1174class DependencySet:
1175    """A set of `ResolvedImage`s.
1176
1177    Iterating over a dependency set yields the contained images in an arbitrary
1178    order. Indexing a dependency set yields the image with the specified name.
1179    """
1180
1181    def __init__(self, dependencies: Iterable[Image]):
1182        """Construct a new `DependencySet`.
1183
1184        The provided `dependencies` must be topologically sorted.
1185        """
1186        self._dependencies: dict[str, ResolvedImage] = {}
1187        known_images = docker_images()
1188        for d in dependencies:
1189            image = ResolvedImage(
1190                image=d,
1191                dependencies=(self._dependencies[d0] for d0 in d.depends_on),
1192            )
1193            image.acquired = image.spec() in known_images
1194            self._dependencies[d.name] = image
1195
1196    def _prepare_batch(self, images: list[ResolvedImage]) -> dict[type[PreImage], Any]:
1197        pre_images = collections.defaultdict(list)
1198        for image in images:
1199            for pre_image in image.image.pre_images:
1200                pre_images[type(pre_image)].append(pre_image)
1201        pre_image_prep = {}
1202        for cls, instances in pre_images.items():
1203            pre_image = cast(PreImage, cls)
1204            pre_image_prep[cls] = pre_image.prepare_batch(instances)
1205        return pre_image_prep
1206
1207    def acquire(self, max_retries: int | None = None) -> None:
1208        """Download or build all of the images in the dependency set that do not
1209        already exist locally.
1210
1211        Args:
1212            max_retries: Number of retries on failure.
1213        """
1214
1215        # Only retry in CI runs since we struggle with flaky docker pulls there
1216        if not max_retries:
1217            max_retries = (
1218                90
1219                if os.getenv("CI_WAITING_FOR_BUILD")
1220                else (
1221                    5
1222                    if ui.env_is_truthy("CI")
1223                    and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD")
1224                    else 1
1225                )
1226            )
1227        assert max_retries > 0
1228
1229        deps_to_check = [dep for dep in self if dep.publish]
1230        deps_to_build = [dep for dep in self if not dep.publish]
1231        if len(deps_to_check):
1232            with ThreadPoolExecutor(max_workers=len(deps_to_check)) as executor:
1233                futures = [
1234                    executor.submit(dep.try_pull, max_retries) for dep in deps_to_check
1235                ]
1236                for dep, future in zip(deps_to_check, futures):
1237                    try:
1238                        if not future.result():
1239                            deps_to_build.append(dep)
1240                    except Exception:
1241                        deps_to_build.append(dep)
1242
1243        # Don't attempt to build in CI, as our timeouts and small machines won't allow it anyway
1244        if ui.env_is_truthy("CI") and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD"):
1245            expected_deps = [dep for dep in deps_to_build if dep.publish]
1246            if expected_deps:
1247                print(
1248                    f"+++ Expected builds to be available, the build probably failed, so not proceeding: {expected_deps}"
1249                )
1250                sys.exit(5)
1251
1252        prep = self._prepare_batch(deps_to_build)
1253        for dep in deps_to_build:
1254            dep.build(prep)
1255
1256    def ensure(self, pre_build: Callable[[list[ResolvedImage]], None] | None = None):
1257        """Ensure all publishable images in this dependency set exist on Docker
1258        Hub.
1259
1260        Images are pushed using their spec as their tag.
1261
1262        Args:
1263            pre_build: A callback to invoke with all dependency that are going
1264                       to be built locally, invoked after their cargo build is
1265                       done, but before the Docker images are build and
1266                       uploaded to DockerHub.
1267        """
1268        num_deps = len(list(self))
1269        if not num_deps:
1270            deps_to_build = []
1271        else:
1272            with ThreadPoolExecutor(max_workers=num_deps) as executor:
1273                futures = list(
1274                    executor.map(
1275                        lambda dep: (dep, not dep.is_published_if_necessary()), self
1276                    )
1277                )
1278
1279            deps_to_build = [dep for dep, should_build in futures if should_build]
1280
1281        prep = self._prepare_batch(deps_to_build)
1282        if pre_build:
1283            pre_build(deps_to_build)
1284        lock = Lock()
1285        built_deps: set[str] = set([dep.name for dep in self]) - set(
1286            [dep.name for dep in deps_to_build]
1287        )
1288
1289        def build_dep(dep):
1290            end_time = time.time() + 600
1291            while True:
1292                if time.time() > end_time:
1293                    raise TimeoutError(
1294                        f"Timed out in {dep.name} waiting for {[dep2.name for dep2 in dep.dependencies if dep2 not in built_deps]}"
1295                    )
1296                with lock:
1297                    if all(dep2 in built_deps for dep2 in dep.dependencies):
1298                        break
1299                time.sleep(0.01)
1300            for attempts_remaining in reversed(range(3)):
1301                try:
1302                    dep.build(prep, push=dep.publish)
1303                    with lock:
1304                        built_deps.add(dep.name)
1305                    break
1306                except Exception:
1307                    if not dep.publish or attempts_remaining == 0:
1308                        raise
1309
1310        if deps_to_build:
1311            with ThreadPoolExecutor(max_workers=len(deps_to_build)) as executor:
1312                futures = [executor.submit(build_dep, dep) for dep in deps_to_build]
1313                for future in as_completed(futures):
1314                    future.result()
1315
1316    def check(self) -> bool:
1317        """Check all publishable images in this dependency set exist on Docker
1318        Hub. Don't try to download or build them."""
1319        num_deps = len(list(self))
1320        if num_deps == 0:
1321            return True
1322        with ThreadPoolExecutor(max_workers=num_deps) as executor:
1323            results = list(
1324                executor.map(lambda dep: dep.is_published_if_necessary(), list(self))
1325            )
1326        return all(results)
1327
1328    def __iter__(self) -> Iterator[ResolvedImage]:
1329        return iter(self._dependencies.values())
1330
1331    def __getitem__(self, key: str) -> ResolvedImage:
1332        return self._dependencies[key]

A set of ResolvedImages.

Iterating over a dependency set yields the contained images in an arbitrary order. Indexing a dependency set yields the image with the specified name.

DependencySet(dependencies: Iterable[Image])
1181    def __init__(self, dependencies: Iterable[Image]):
1182        """Construct a new `DependencySet`.
1183
1184        The provided `dependencies` must be topologically sorted.
1185        """
1186        self._dependencies: dict[str, ResolvedImage] = {}
1187        known_images = docker_images()
1188        for d in dependencies:
1189            image = ResolvedImage(
1190                image=d,
1191                dependencies=(self._dependencies[d0] for d0 in d.depends_on),
1192            )
1193            image.acquired = image.spec() in known_images
1194            self._dependencies[d.name] = image

Construct a new DependencySet.

The provided dependencies must be topologically sorted.

def acquire(self, max_retries: int | None = None) -> None:
1207    def acquire(self, max_retries: int | None = None) -> None:
1208        """Download or build all of the images in the dependency set that do not
1209        already exist locally.
1210
1211        Args:
1212            max_retries: Number of retries on failure.
1213        """
1214
1215        # Only retry in CI runs since we struggle with flaky docker pulls there
1216        if not max_retries:
1217            max_retries = (
1218                90
1219                if os.getenv("CI_WAITING_FOR_BUILD")
1220                else (
1221                    5
1222                    if ui.env_is_truthy("CI")
1223                    and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD")
1224                    else 1
1225                )
1226            )
1227        assert max_retries > 0
1228
1229        deps_to_check = [dep for dep in self if dep.publish]
1230        deps_to_build = [dep for dep in self if not dep.publish]
1231        if len(deps_to_check):
1232            with ThreadPoolExecutor(max_workers=len(deps_to_check)) as executor:
1233                futures = [
1234                    executor.submit(dep.try_pull, max_retries) for dep in deps_to_check
1235                ]
1236                for dep, future in zip(deps_to_check, futures):
1237                    try:
1238                        if not future.result():
1239                            deps_to_build.append(dep)
1240                    except Exception:
1241                        deps_to_build.append(dep)
1242
1243        # Don't attempt to build in CI, as our timeouts and small machines won't allow it anyway
1244        if ui.env_is_truthy("CI") and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD"):
1245            expected_deps = [dep for dep in deps_to_build if dep.publish]
1246            if expected_deps:
1247                print(
1248                    f"+++ Expected builds to be available, the build probably failed, so not proceeding: {expected_deps}"
1249                )
1250                sys.exit(5)
1251
1252        prep = self._prepare_batch(deps_to_build)
1253        for dep in deps_to_build:
1254            dep.build(prep)

Download or build all of the images in the dependency set that do not already exist locally.

Args: max_retries: Number of retries on failure.

def ensure( self, pre_build: Callable[[list[ResolvedImage]], None] | None = None):
1256    def ensure(self, pre_build: Callable[[list[ResolvedImage]], None] | None = None):
1257        """Ensure all publishable images in this dependency set exist on Docker
1258        Hub.
1259
1260        Images are pushed using their spec as their tag.
1261
1262        Args:
1263            pre_build: A callback to invoke with all dependency that are going
1264                       to be built locally, invoked after their cargo build is
1265                       done, but before the Docker images are build and
1266                       uploaded to DockerHub.
1267        """
1268        num_deps = len(list(self))
1269        if not num_deps:
1270            deps_to_build = []
1271        else:
1272            with ThreadPoolExecutor(max_workers=num_deps) as executor:
1273                futures = list(
1274                    executor.map(
1275                        lambda dep: (dep, not dep.is_published_if_necessary()), self
1276                    )
1277                )
1278
1279            deps_to_build = [dep for dep, should_build in futures if should_build]
1280
1281        prep = self._prepare_batch(deps_to_build)
1282        if pre_build:
1283            pre_build(deps_to_build)
1284        lock = Lock()
1285        built_deps: set[str] = set([dep.name for dep in self]) - set(
1286            [dep.name for dep in deps_to_build]
1287        )
1288
1289        def build_dep(dep):
1290            end_time = time.time() + 600
1291            while True:
1292                if time.time() > end_time:
1293                    raise TimeoutError(
1294                        f"Timed out in {dep.name} waiting for {[dep2.name for dep2 in dep.dependencies if dep2 not in built_deps]}"
1295                    )
1296                with lock:
1297                    if all(dep2 in built_deps for dep2 in dep.dependencies):
1298                        break
1299                time.sleep(0.01)
1300            for attempts_remaining in reversed(range(3)):
1301                try:
1302                    dep.build(prep, push=dep.publish)
1303                    with lock:
1304                        built_deps.add(dep.name)
1305                    break
1306                except Exception:
1307                    if not dep.publish or attempts_remaining == 0:
1308                        raise
1309
1310        if deps_to_build:
1311            with ThreadPoolExecutor(max_workers=len(deps_to_build)) as executor:
1312                futures = [executor.submit(build_dep, dep) for dep in deps_to_build]
1313                for future in as_completed(futures):
1314                    future.result()

Ensure all publishable images in this dependency set exist on Docker Hub.

Images are pushed using their spec as their tag.

Args: pre_build: A callback to invoke with all dependency that are going to be built locally, invoked after their cargo build is done, but before the Docker images are build and uploaded to DockerHub.

def check(self) -> bool:
1316    def check(self) -> bool:
1317        """Check all publishable images in this dependency set exist on Docker
1318        Hub. Don't try to download or build them."""
1319        num_deps = len(list(self))
1320        if num_deps == 0:
1321            return True
1322        with ThreadPoolExecutor(max_workers=num_deps) as executor:
1323            results = list(
1324                executor.map(lambda dep: dep.is_published_if_necessary(), list(self))
1325            )
1326        return all(results)

Check all publishable images in this dependency set exist on Docker Hub. Don't try to download or build them.

class Repository:
1335class Repository:
1336    """A collection of mzbuild `Image`s.
1337
1338    Creating a repository will walk the filesystem beneath `root` to
1339    automatically discover all contained `Image`s.
1340
1341    Iterating over a repository yields the contained images in an arbitrary
1342    order.
1343
1344    Args:
1345        root: The path to the root of the repository.
1346        arch: The CPU architecture to build for.
1347        profile: What profile to build the repository in.
1348        coverage: Whether to enable code coverage instrumentation.
1349        sanitizer: Whether to a sanitizer (address, thread, leak, memory, none)
1350        image_registry: The Docker image registry to pull images from and push
1351            images to.
1352        image_prefix: A prefix to apply to all Docker image names.
1353
1354    Attributes:
1355        images: A mapping from image name to `Image` for all contained images.
1356        compose_dirs: The set of directories containing a `mzcompose.py` file.
1357    """
1358
1359    def __init__(
1360        self,
1361        root: Path,
1362        arch: Arch = Arch.host(),
1363        profile: Profile = (
1364            Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED
1365        ),
1366        coverage: bool = False,
1367        sanitizer: Sanitizer = Sanitizer.none,
1368        image_registry: str = image_registry(),
1369        image_prefix: str = "",
1370    ):
1371        self.rd = RepositoryDetails(
1372            root,
1373            arch,
1374            profile,
1375            coverage,
1376            sanitizer,
1377            image_registry,
1378            image_prefix,
1379        )
1380        self.images: dict[str, Image] = {}
1381        self.compositions: dict[str, Path] = {}
1382        for path, dirs, files in os.walk(self.root, topdown=True):
1383            if path == str(root / "misc"):
1384                dirs.remove("python")
1385            # Filter out some particularly massive ignored directories to keep
1386            # things snappy. Not required for correctness.
1387            dirs[:] = set(dirs) - {
1388                ".git",
1389                ".mypy_cache",
1390                "target",
1391                "target-ra",
1392                "target-xcompile",
1393                "mzdata",
1394                "node_modules",
1395                "venv",
1396            }
1397            if "mzbuild.yml" in files:
1398                image = Image(self.rd, Path(path))
1399                if not image.name:
1400                    raise ValueError(f"config at {path} missing name")
1401                if image.name in self.images:
1402                    raise ValueError(f"image {image.name} exists twice")
1403                self.images[image.name] = image
1404            if "mzcompose.py" in files:
1405                name = Path(path).name
1406                if name in self.compositions:
1407                    raise ValueError(f"composition {name} exists twice")
1408                self.compositions[name] = Path(path)
1409
1410        # Validate dependencies.
1411        for image in self.images.values():
1412            for d in image.depends_on:
1413                if d not in self.images:
1414                    raise ValueError(
1415                        f"image {image.name} depends on non-existent image {d}"
1416                    )
1417
1418    @staticmethod
1419    def install_arguments(parser: argparse.ArgumentParser) -> None:
1420        """Install options to configure a repository into an argparse parser.
1421
1422        This function installs the following options:
1423
1424          * The mutually-exclusive `--dev`/`--optimized`/`--release` options to control the
1425            `profile` repository attribute.
1426          * The `--coverage` boolean option to control the `coverage` repository
1427            attribute.
1428
1429        Use `Repository.from_arguments` to construct a repository from the
1430        parsed command-line arguments.
1431        """
1432        build_mode = parser.add_mutually_exclusive_group()
1433        build_mode.add_argument(
1434            "--dev",
1435            action="store_true",
1436            help="build Rust binaries with the dev profile",
1437        )
1438        build_mode.add_argument(
1439            "--release",
1440            action="store_true",
1441            help="build Rust binaries with the release profile (default)",
1442        )
1443        build_mode.add_argument(
1444            "--optimized",
1445            action="store_true",
1446            help="build Rust binaries with the optimized profile (optimizations, no LTO, no debug symbols)",
1447        )
1448        parser.add_argument(
1449            "--coverage",
1450            help="whether to enable code coverage compilation flags",
1451            default=ui.env_is_truthy("CI_COVERAGE_ENABLED"),
1452            action="store_true",
1453        )
1454        parser.add_argument(
1455            "--sanitizer",
1456            help="whether to enable a sanitizer",
1457            default=Sanitizer[os.getenv("CI_SANITIZER", "none")],
1458            type=Sanitizer,
1459            choices=Sanitizer,
1460        )
1461        parser.add_argument(
1462            "--arch",
1463            default=Arch.host(),
1464            help="the CPU architecture to build for",
1465            type=Arch,
1466            choices=Arch,
1467        )
1468        parser.add_argument(
1469            "--image-registry",
1470            default=image_registry(),
1471            help="the Docker image registry to pull images from and push images to",
1472        )
1473        parser.add_argument(
1474            "--image-prefix",
1475            default="",
1476            help="a prefix to apply to all Docker image names",
1477        )
1478
1479    @classmethod
1480    def from_arguments(cls, root: Path, args: argparse.Namespace) -> "Repository":
1481        """Construct a repository from command-line arguments.
1482
1483        The provided namespace must contain the options installed by
1484        `Repository.install_arguments`.
1485        """
1486        if args.release:
1487            profile = Profile.RELEASE
1488        elif args.optimized:
1489            profile = Profile.OPTIMIZED
1490        elif args.dev:
1491            profile = Profile.DEV
1492        else:
1493            profile = (
1494                Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED
1495            )
1496
1497        return cls(
1498            root,
1499            profile=profile,
1500            coverage=args.coverage,
1501            sanitizer=args.sanitizer,
1502            image_registry=args.image_registry,
1503            image_prefix=args.image_prefix,
1504            arch=args.arch,
1505        )
1506
1507    @property
1508    def root(self) -> Path:
1509        """The path to the root directory for the repository."""
1510        return self.rd.root
1511
1512    def resolve_dependencies(self, targets: Iterable[Image]) -> DependencySet:
1513        """Compute the dependency set necessary to build target images.
1514
1515        The dependencies of `targets` will be crawled recursively until the
1516        complete set of transitive dependencies is determined or a circular
1517        dependency is discovered. The returned dependency set will be sorted
1518        in topological order.
1519
1520        Raises:
1521           ValueError: A circular dependency was discovered in the images
1522               in the repository.
1523        """
1524        # Pre-fetch all crate input files in a single batched git call,
1525        # replacing ~118 individual subprocess pairs with one pair.
1526        self.rd.cargo_workspace.precompute_crate_inputs()
1527        # Pre-fetch all image context files in a single batched git call,
1528        # replacing ~41 individual subprocess pairs with one pair.
1529        self._precompute_image_context_files()
1530
1531        resolved = OrderedDict()
1532        visiting = set()
1533
1534        def visit(image: Image, path: list[str] = []) -> None:
1535            if image.name in resolved:
1536                return
1537            if image.name in visiting:
1538                diagram = " -> ".join(path + [image.name])
1539                raise ValueError(f"circular dependency in mzbuild: {diagram}")
1540
1541            visiting.add(image.name)
1542            for d in sorted(image.depends_on):
1543                visit(self.images[d], path + [image.name])
1544            resolved[image.name] = image
1545
1546        for target_image in sorted(targets, key=lambda image: image.name):
1547            visit(target_image)
1548
1549        return DependencySet(resolved.values())
1550
1551    def _precompute_image_context_files(self) -> None:
1552        """Pre-fetch all image context files in a single batched git call.
1553
1554        This replaces ~41 individual pairs of git subprocess calls (one per
1555        image) with a single pair, then partitions the results by image path.
1556        """
1557        root = self.rd.root
1558        # Use paths relative to root for git specs and partitioning, since
1559        # git --relative outputs paths relative to cwd (root). Image paths
1560        # may be absolute when MZ_ROOT is an absolute path.
1561        image_rel_paths = sorted(
1562            set(str(img.path.relative_to(root)) for img in self.images.values())
1563        )
1564        specs = [f"{p}/**" for p in image_rel_paths]
1565
1566        empty_tree = "4b825dc642cb6eb9a060e54bf8d69288fbee4904"
1567        diff_files = spawn.capture(
1568            ["git", "diff", "--name-only", "-z", "--relative", empty_tree, "--"]
1569            + specs,
1570            cwd=root,
1571        )
1572        ls_files = spawn.capture(
1573            ["git", "ls-files", "--others", "--exclude-standard", "-z", "--"] + specs,
1574            cwd=root,
1575        )
1576        all_files = set(
1577            f for f in (diff_files + ls_files).split("\0") if f.strip() != ""
1578        )
1579
1580        # Partition files by image path (longest match first for nested paths)
1581        image_file_map: dict[str, set[str]] = {p: set() for p in image_rel_paths}
1582        sorted_paths = sorted(image_rel_paths, key=len, reverse=True)
1583        for f in all_files:
1584            for ip in sorted_paths:
1585                if f.startswith(ip + "/"):
1586                    image_file_map[ip].add(f)
1587                    break
1588
1589        for img in self.images.values():
1590            rel = str(img.path.relative_to(root))
1591            img._context_files_cache = image_file_map.get(rel, set())
1592
1593    def __iter__(self) -> Iterator[Image]:
1594        return iter(self.images.values())

A collection of mzbuild Images.

Creating a repository will walk the filesystem beneath root to automatically discover all contained Images.

Iterating over a repository yields the contained images in an arbitrary order.

Args: root: The path to the root of the repository. arch: The CPU architecture to build for. profile: What profile to build the repository in. coverage: Whether to enable code coverage instrumentation. sanitizer: Whether to a sanitizer (address, thread, leak, memory, none) image_registry: The Docker image registry to pull images from and push images to. image_prefix: A prefix to apply to all Docker image names.

Attributes: images: A mapping from image name to Image for all contained images. compose_dirs: The set of directories containing a mzcompose.py file.

Repository( root: pathlib.Path, arch: materialize.xcompile.Arch = <Arch.X86_64: 'x86_64'>, profile: Profile = <Profile.OPTIMIZED: 2>, coverage: bool = False, sanitizer: materialize.rustc_flags.Sanitizer = <Sanitizer.none: 'none'>, image_registry: str = 'ghcr.io/materializeinc/materialize', image_prefix: str = '')
1359    def __init__(
1360        self,
1361        root: Path,
1362        arch: Arch = Arch.host(),
1363        profile: Profile = (
1364            Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED
1365        ),
1366        coverage: bool = False,
1367        sanitizer: Sanitizer = Sanitizer.none,
1368        image_registry: str = image_registry(),
1369        image_prefix: str = "",
1370    ):
1371        self.rd = RepositoryDetails(
1372            root,
1373            arch,
1374            profile,
1375            coverage,
1376            sanitizer,
1377            image_registry,
1378            image_prefix,
1379        )
1380        self.images: dict[str, Image] = {}
1381        self.compositions: dict[str, Path] = {}
1382        for path, dirs, files in os.walk(self.root, topdown=True):
1383            if path == str(root / "misc"):
1384                dirs.remove("python")
1385            # Filter out some particularly massive ignored directories to keep
1386            # things snappy. Not required for correctness.
1387            dirs[:] = set(dirs) - {
1388                ".git",
1389                ".mypy_cache",
1390                "target",
1391                "target-ra",
1392                "target-xcompile",
1393                "mzdata",
1394                "node_modules",
1395                "venv",
1396            }
1397            if "mzbuild.yml" in files:
1398                image = Image(self.rd, Path(path))
1399                if not image.name:
1400                    raise ValueError(f"config at {path} missing name")
1401                if image.name in self.images:
1402                    raise ValueError(f"image {image.name} exists twice")
1403                self.images[image.name] = image
1404            if "mzcompose.py" in files:
1405                name = Path(path).name
1406                if name in self.compositions:
1407                    raise ValueError(f"composition {name} exists twice")
1408                self.compositions[name] = Path(path)
1409
1410        # Validate dependencies.
1411        for image in self.images.values():
1412            for d in image.depends_on:
1413                if d not in self.images:
1414                    raise ValueError(
1415                        f"image {image.name} depends on non-existent image {d}"
1416                    )
rd
images: dict[str, Image]
compositions: dict[str, pathlib.Path]
@staticmethod
def install_arguments(parser: argparse.ArgumentParser) -> None:
1418    @staticmethod
1419    def install_arguments(parser: argparse.ArgumentParser) -> None:
1420        """Install options to configure a repository into an argparse parser.
1421
1422        This function installs the following options:
1423
1424          * The mutually-exclusive `--dev`/`--optimized`/`--release` options to control the
1425            `profile` repository attribute.
1426          * The `--coverage` boolean option to control the `coverage` repository
1427            attribute.
1428
1429        Use `Repository.from_arguments` to construct a repository from the
1430        parsed command-line arguments.
1431        """
1432        build_mode = parser.add_mutually_exclusive_group()
1433        build_mode.add_argument(
1434            "--dev",
1435            action="store_true",
1436            help="build Rust binaries with the dev profile",
1437        )
1438        build_mode.add_argument(
1439            "--release",
1440            action="store_true",
1441            help="build Rust binaries with the release profile (default)",
1442        )
1443        build_mode.add_argument(
1444            "--optimized",
1445            action="store_true",
1446            help="build Rust binaries with the optimized profile (optimizations, no LTO, no debug symbols)",
1447        )
1448        parser.add_argument(
1449            "--coverage",
1450            help="whether to enable code coverage compilation flags",
1451            default=ui.env_is_truthy("CI_COVERAGE_ENABLED"),
1452            action="store_true",
1453        )
1454        parser.add_argument(
1455            "--sanitizer",
1456            help="whether to enable a sanitizer",
1457            default=Sanitizer[os.getenv("CI_SANITIZER", "none")],
1458            type=Sanitizer,
1459            choices=Sanitizer,
1460        )
1461        parser.add_argument(
1462            "--arch",
1463            default=Arch.host(),
1464            help="the CPU architecture to build for",
1465            type=Arch,
1466            choices=Arch,
1467        )
1468        parser.add_argument(
1469            "--image-registry",
1470            default=image_registry(),
1471            help="the Docker image registry to pull images from and push images to",
1472        )
1473        parser.add_argument(
1474            "--image-prefix",
1475            default="",
1476            help="a prefix to apply to all Docker image names",
1477        )

Install options to configure a repository into an argparse parser.

This function installs the following options:

  • The mutually-exclusive --dev/--optimized/--release options to control the profile repository attribute.
  • The --coverage boolean option to control the coverage repository attribute.

Use Repository.from_arguments to construct a repository from the parsed command-line arguments.

@classmethod
def from_arguments( cls, root: pathlib.Path, args: argparse.Namespace) -> Repository:
1479    @classmethod
1480    def from_arguments(cls, root: Path, args: argparse.Namespace) -> "Repository":
1481        """Construct a repository from command-line arguments.
1482
1483        The provided namespace must contain the options installed by
1484        `Repository.install_arguments`.
1485        """
1486        if args.release:
1487            profile = Profile.RELEASE
1488        elif args.optimized:
1489            profile = Profile.OPTIMIZED
1490        elif args.dev:
1491            profile = Profile.DEV
1492        else:
1493            profile = (
1494                Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED
1495            )
1496
1497        return cls(
1498            root,
1499            profile=profile,
1500            coverage=args.coverage,
1501            sanitizer=args.sanitizer,
1502            image_registry=args.image_registry,
1503            image_prefix=args.image_prefix,
1504            arch=args.arch,
1505        )

Construct a repository from command-line arguments.

The provided namespace must contain the options installed by Repository.install_arguments.

root: pathlib.Path
1507    @property
1508    def root(self) -> Path:
1509        """The path to the root directory for the repository."""
1510        return self.rd.root

The path to the root directory for the repository.

def resolve_dependencies( self, targets: Iterable[Image]) -> DependencySet:
1512    def resolve_dependencies(self, targets: Iterable[Image]) -> DependencySet:
1513        """Compute the dependency set necessary to build target images.
1514
1515        The dependencies of `targets` will be crawled recursively until the
1516        complete set of transitive dependencies is determined or a circular
1517        dependency is discovered. The returned dependency set will be sorted
1518        in topological order.
1519
1520        Raises:
1521           ValueError: A circular dependency was discovered in the images
1522               in the repository.
1523        """
1524        # Pre-fetch all crate input files in a single batched git call,
1525        # replacing ~118 individual subprocess pairs with one pair.
1526        self.rd.cargo_workspace.precompute_crate_inputs()
1527        # Pre-fetch all image context files in a single batched git call,
1528        # replacing ~41 individual subprocess pairs with one pair.
1529        self._precompute_image_context_files()
1530
1531        resolved = OrderedDict()
1532        visiting = set()
1533
1534        def visit(image: Image, path: list[str] = []) -> None:
1535            if image.name in resolved:
1536                return
1537            if image.name in visiting:
1538                diagram = " -> ".join(path + [image.name])
1539                raise ValueError(f"circular dependency in mzbuild: {diagram}")
1540
1541            visiting.add(image.name)
1542            for d in sorted(image.depends_on):
1543                visit(self.images[d], path + [image.name])
1544            resolved[image.name] = image
1545
1546        for target_image in sorted(targets, key=lambda image: image.name):
1547            visit(target_image)
1548
1549        return DependencySet(resolved.values())

Compute the dependency set necessary to build target images.

The dependencies of targets will be crawled recursively until the complete set of transitive dependencies is determined or a circular dependency is discovered. The returned dependency set will be sorted in topological order.

Raises: ValueError: A circular dependency was discovered in the images in the repository.

def publish_multiarch_images( tag: str, dependency_sets: Iterable[Iterable[ResolvedImage]]) -> None:
1597def publish_multiarch_images(
1598    tag: str, dependency_sets: Iterable[Iterable[ResolvedImage]]
1599) -> None:
1600    """Publishes a set of docker images under a given tag."""
1601    always_push_tags = ("latest", "unstable")
1602    if ghcr_token := os.getenv("GITHUB_GHCR_TOKEN"):
1603        spawn.runv(
1604            [
1605                "docker",
1606                "login",
1607                "ghcr.io",
1608                "-u",
1609                "materialize-bot",
1610                "--password-stdin",
1611            ],
1612            stdin=ghcr_token.encode(),
1613        )
1614    for images in zip(*dependency_sets):
1615        names = set(image.image.name for image in images)
1616        assert len(names) == 1, "dependency sets did not contain identical images"
1617        name = images[0].image.docker_name(tag)
1618        if tag in always_push_tags or not is_docker_image_pushed(name):
1619            spawn.runv(
1620                [
1621                    "docker",
1622                    "manifest",
1623                    "create",
1624                    name,
1625                    *(image.spec() for image in images),
1626                ]
1627            )
1628            spawn.runv(["docker", "manifest", "push", name])
1629
1630        ghcr_name = f"{GHCR_PREFIX}{name}"
1631        if ghcr_token and (
1632            tag in always_push_tags or not is_ghcr_image_pushed(ghcr_name)
1633        ):
1634            spawn.runv(
1635                [
1636                    "docker",
1637                    "manifest",
1638                    "create",
1639                    ghcr_name,
1640                    *(f"{GHCR_PREFIX}{image.spec()}" for image in images),
1641                ]
1642            )
1643            spawn.runv(["docker", "manifest", "push", ghcr_name])
1644    print(f"--- Nofifying for tag {tag}")
1645    markdown = f"""Pushed images with Docker tag `{tag}`"""
1646    spawn.runv(
1647        [
1648            "buildkite-agent",
1649            "annotate",
1650            "--style=info",
1651            f"--context=build-tags-{tag}",
1652        ],
1653        stdin=markdown.encode(),
1654    )

Publishes a set of docker images under a given tag.

def tag_multiarch_images( new_tag: str, previous_tag: str, dependency_sets: Iterable[Iterable[ResolvedImage]]) -> None:
1657def tag_multiarch_images(
1658    new_tag: str, previous_tag: str, dependency_sets: Iterable[Iterable[ResolvedImage]]
1659) -> None:
1660    """Publishes a set of docker images under a given tag."""
1661    for images in zip(*dependency_sets):
1662        names = set(image.image.name for image in images)
1663        assert len(names) == 1, "dependency sets did not contain identical images"
1664        new_name = images[0].image.docker_name(new_tag)
1665
1666        # Doesn't have tagged images
1667        if images[0].image.name == "mz":
1668            continue
1669
1670        previous_name = images[0].image.docker_name(previous_tag)
1671        spawn.runv(["docker", "pull", previous_name])
1672        spawn.runv(["docker", "tag", previous_name, new_name])
1673        spawn.runv(["docker", "push", new_name])
1674    print(f"--- Nofifying for tag {new_tag}")
1675    markdown = f"""Pushed images with Docker tag `{new_tag}`"""
1676    spawn.runv(
1677        [
1678            "buildkite-agent",
1679            "annotate",
1680            "--style=info",
1681            f"--context=build-tags-{new_tag}",
1682        ],
1683        stdin=markdown.encode(),
1684    )

Publishes a set of docker images under a given tag.