misc.python.materialize.mzbuild
The implementation of the mzbuild system for Docker images.
For an overview of what mzbuild is and why it exists, see the user-facing documentation.
1# Copyright Materialize, Inc. and contributors. All rights reserved. 2# 3# Use of this software is governed by the Business Source License 4# included in the LICENSE file at the root of this repository. 5# 6# As of the Change Date specified in that file, in accordance with 7# the Business Source License, use of this software will be governed 8# by the Apache License, Version 2.0. 9 10"""The implementation of the mzbuild system for Docker images. 11 12For an overview of what mzbuild is and why it exists, see the [user-facing 13documentation][user-docs]. 14 15[user-docs]: https://github.com/MaterializeInc/materialize/blob/main/doc/developer/mzbuild.md 16""" 17 18import argparse 19import base64 20import collections 21import hashlib 22import io 23import json 24import multiprocessing 25import os 26import platform 27import re 28import selectors 29import shutil 30import stat 31import subprocess 32import sys 33import time 34from collections import OrderedDict 35from collections.abc import Callable, Iterable, Iterator, Sequence 36from concurrent.futures import ThreadPoolExecutor, as_completed 37from enum import Enum, auto 38from functools import cache 39from pathlib import Path 40from tempfile import TemporaryFile 41from threading import Lock 42from typing import IO, Any, cast 43 44import requests 45import yaml 46from requests.auth import HTTPBasicAuth 47 48from materialize import MZ_ROOT, buildkite, cargo, git, rustc_flags, spawn, ui, xcompile 49from materialize.docker import image_registry 50from materialize.rustc_flags import Sanitizer 51from materialize.xcompile import Arch, target 52 53GHCR_PREFIX = "ghcr.io/materializeinc/" 54 55 56class RustIncrementalBuildFailure(Exception): 57 pass 58 59 60def run_and_detect_rust_incremental_build_failure( 61 cmd: list[str], cwd: str | Path 62) -> subprocess.CompletedProcess: 63 """This function is complex since it prints out each line immediately to 64 stdout/stderr, but still records them at the same time so that we can scan 65 for known incremental build failures.""" 66 stdout_result = io.StringIO() 67 stderr_result = io.StringIO() 68 p = subprocess.Popen( 69 cmd, 70 stdout=subprocess.PIPE, 71 stderr=subprocess.PIPE, 72 text=True, 73 bufsize=1, 74 env={**os.environ, "CARGO_TERM_COLOR": "always", "RUSTC_COLOR": "always"}, 75 ) 76 77 sel = selectors.DefaultSelector() 78 sel.register(p.stdout, selectors.EVENT_READ) # type: ignore 79 sel.register(p.stderr, selectors.EVENT_READ) # type: ignore 80 assert p.stdout is not None 81 assert p.stderr is not None 82 os.set_blocking(p.stdout.fileno(), False) 83 os.set_blocking(p.stderr.fileno(), False) 84 running = True 85 while running: 86 for key, val in sel.select(): 87 output = io.StringIO() 88 running = False 89 while True: 90 new_output = key.fileobj.read(1024) # type: ignore 91 if not new_output: 92 break 93 output.write(new_output) 94 contents = output.getvalue() 95 output.close() 96 if not contents: 97 continue 98 # Keep running as long as stdout or stderr have any content 99 running = True 100 if key.fileobj is p.stdout: 101 print( 102 contents, 103 end="", 104 flush=True, 105 ) 106 stdout_result.write(contents) 107 else: 108 print( 109 contents, 110 end="", 111 file=sys.stderr, 112 flush=True, 113 ) 114 stderr_result.write(contents) 115 p.wait() 116 retcode = p.poll() 117 assert retcode is not None 118 stdout_contents = stdout_result.getvalue() 119 stdout_result.close() 120 stderr_contents = stderr_result.getvalue() 121 stderr_result.close() 122 if retcode: 123 incremental_build_failure_msgs = [ 124 "panicked at compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs", 125 "ld.lld: error: undefined symbol", 126 "signal: 11, SIGSEGV", 127 ] 128 combined = stdout_contents + stderr_contents 129 if any(msg in combined for msg in incremental_build_failure_msgs): 130 raise RustIncrementalBuildFailure() 131 132 raise subprocess.CalledProcessError( 133 retcode, p.args, output=stdout_contents, stderr=stderr_contents 134 ) 135 return subprocess.CompletedProcess( 136 p.args, retcode, stdout_contents, stderr_contents 137 ) 138 139 140class Fingerprint(bytes): 141 """A SHA-1 hash of the inputs to an `Image`. 142 143 The string representation uses base32 encoding to distinguish mzbuild 144 fingerprints from Git's hex encoded SHA-1 hashes while still being 145 URL safe. 146 """ 147 148 def __str__(self) -> str: 149 return base64.b32encode(self).decode() 150 151 152class Profile(Enum): 153 RELEASE = auto() 154 OPTIMIZED = auto() 155 DEV = auto() 156 157 158class RepositoryDetails: 159 """Immutable details about a `Repository`. 160 161 Used internally by mzbuild. 162 163 Attributes: 164 root: The path to the root of the repository. 165 arch: The CPU architecture to build for. 166 profile: What profile the repository is being built with. 167 coverage: Whether the repository has code coverage instrumentation 168 enabled. 169 sanitizer: Whether to use a sanitizer (address, hwaddress, cfi, thread, leak, memory, none) 170 cargo_workspace: The `cargo.Workspace` associated with the repository. 171 image_registry: The Docker image registry to pull images from and push 172 images to. 173 image_prefix: A prefix to apply to all Docker image names. 174 """ 175 176 def __init__( 177 self, 178 root: Path, 179 arch: Arch, 180 profile: Profile, 181 coverage: bool, 182 sanitizer: Sanitizer, 183 image_registry: str, 184 image_prefix: str, 185 ): 186 self.root = root 187 self.arch = arch 188 self.profile = profile 189 self.coverage = coverage 190 self.sanitizer = sanitizer 191 self.cargo_workspace = cargo.Workspace(root) 192 self.image_registry = image_registry 193 self.image_prefix = image_prefix 194 195 def build( 196 self, 197 subcommand: str, 198 rustflags: list[str], 199 channel: str | None = None, 200 extra_env: dict[str, str] = {}, 201 ) -> list[str]: 202 """Start a build invocation for the configured architecture.""" 203 return xcompile.cargo( 204 arch=self.arch, 205 channel=channel, 206 subcommand=subcommand, 207 rustflags=rustflags, 208 extra_env=extra_env, 209 ) 210 211 def tool(self, name: str) -> list[str]: 212 """Start a binutils tool invocation for the configured architecture.""" 213 if platform.system() != "Linux": 214 # We can't use the local tools from macOS to build a Linux executable 215 return ["bin/ci-builder", "run", "stable", name] 216 # If we're on Linux, trust that the tools are installed instead of 217 # loading the slow ci-builder. If you don't have compilation tools 218 # installed you can still run `bin/ci-builder run stable 219 # bin/mzcompose ...`, and most likely the Cargo build will already 220 # fail earlier if you don't have compilation tools installed and 221 # run without the ci-builder. 222 return [name] 223 224 def cargo_target_dir(self) -> Path: 225 """Determine the path to the target directory for Cargo.""" 226 return self.root / "target-xcompile" / xcompile.target(self.arch) 227 228 def rewrite_builder_path_for_host(self, path: Path) -> Path: 229 """Rewrite a path that is relative to the target directory inside the 230 builder to a path that is relative to the target directory on the host. 231 232 If path does is not relative to the target directory inside the builder, 233 it is returned unchanged. 234 """ 235 builder_target_dir = Path("/mnt/build") / xcompile.target(self.arch) 236 try: 237 return self.cargo_target_dir() / path.relative_to(builder_target_dir) 238 except ValueError: 239 return path 240 241 242@cache 243def docker_images() -> frozenset[str]: 244 """List the Docker images available on the local machine.""" 245 return frozenset( 246 spawn.capture(["docker", "images", "--format", "{{.Repository}}:{{.Tag}}"]) 247 .strip() 248 .split("\n") 249 ) 250 251 252KNOWN_DOCKER_IMAGES_FILE = Path(MZ_ROOT / "known-docker-images.txt") 253_known_docker_images: set[str] | None = None 254_known_docker_images_lock = Lock() 255 256 257def is_docker_image_pushed(name: str) -> bool: 258 """Check whether the named image is pushed to Docker Hub. 259 260 Note that this operation requires a rather slow network request. 261 """ 262 global _known_docker_images 263 264 if _known_docker_images is None: 265 with _known_docker_images_lock: 266 if not KNOWN_DOCKER_IMAGES_FILE.exists(): 267 _known_docker_images = set() 268 else: 269 with KNOWN_DOCKER_IMAGES_FILE.open() as f: 270 _known_docker_images = set(line.strip() for line in f) 271 272 if name in _known_docker_images: 273 return True 274 275 if ":" not in name: 276 image, tag = name, "latest" 277 else: 278 image, tag = name.rsplit(":", 1) 279 280 dockerhub_username = os.getenv("DOCKERHUB_USERNAME") 281 dockerhub_token = os.getenv("DOCKERHUB_ACCESS_TOKEN") 282 283 exists: bool = False 284 285 try: 286 if dockerhub_username and dockerhub_token: 287 response = requests.head( 288 f"https://registry-1.docker.io/v2/{image}/manifests/{tag}", 289 headers={ 290 "Accept": "application/vnd.docker.distribution.manifest.v2+json", 291 }, 292 auth=HTTPBasicAuth(dockerhub_username, dockerhub_token), 293 ) 294 else: 295 token = requests.get( 296 "https://auth.docker.io/token", 297 params={ 298 "service": "registry.docker.io", 299 "scope": f"repository:{image}:pull", 300 }, 301 ).json()["token"] 302 response = requests.head( 303 f"https://registry-1.docker.io/v2/{image}/manifests/{tag}", 304 headers={ 305 "Accept": "application/vnd.docker.distribution.manifest.v2+json", 306 "Authorization": f"Bearer {token}", 307 }, 308 ) 309 310 if response.status_code in (401, 429, 500, 502, 503, 504): 311 # Fall back to 5x slower method 312 proc = subprocess.run( 313 ["docker", "manifest", "inspect", name], 314 stdout=subprocess.DEVNULL, 315 stderr=subprocess.DEVNULL, 316 env=dict(os.environ, DOCKER_CLI_EXPERIMENTAL="enabled"), 317 ) 318 exists = proc.returncode == 0 319 else: 320 exists = response.status_code == 200 321 322 except Exception as e: 323 print(f"Error checking Docker image: {e}") 324 return False 325 326 if exists: 327 with _known_docker_images_lock: 328 _known_docker_images.add(name) 329 with KNOWN_DOCKER_IMAGES_FILE.open("a") as f: 330 print(name, file=f) 331 332 return exists 333 334 335def is_ghcr_image_pushed(name: str) -> bool: 336 global _known_docker_images 337 338 if _known_docker_images is None: 339 with _known_docker_images_lock: 340 if not KNOWN_DOCKER_IMAGES_FILE.exists(): 341 _known_docker_images = set() 342 else: 343 with KNOWN_DOCKER_IMAGES_FILE.open() as f: 344 _known_docker_images = set(line.strip() for line in f) 345 346 name_without_ghcr = name.removeprefix("ghcr.io/") 347 if name in _known_docker_images: 348 return True 349 350 if ":" not in name_without_ghcr: 351 image, tag = name_without_ghcr, "latest" 352 else: 353 image, tag = name_without_ghcr.rsplit(":", 1) 354 355 exists: bool = False 356 357 try: 358 token = requests.get( 359 "https://ghcr.io/token", 360 params={ 361 "scope": f"repository:{image}:pull", 362 }, 363 ).json()["token"] 364 response = requests.head( 365 f"https://ghcr.io/v2/{image}/manifests/{tag}", 366 headers={"Authorization": f"Bearer {token}"}, 367 ) 368 369 if response.status_code in (401, 429, 500, 502, 503, 504): 370 # Fall back to 5x slower method 371 proc = subprocess.run( 372 ["docker", "manifest", "inspect", name], 373 stdout=subprocess.DEVNULL, 374 stderr=subprocess.DEVNULL, 375 env=dict(os.environ, DOCKER_CLI_EXPERIMENTAL="enabled"), 376 ) 377 exists = proc.returncode == 0 378 else: 379 exists = response.status_code == 200 380 381 except Exception as e: 382 print(f"Error checking Docker image: {e}") 383 return False 384 385 if exists: 386 with _known_docker_images_lock: 387 _known_docker_images.add(name) 388 with KNOWN_DOCKER_IMAGES_FILE.open("a") as f: 389 print(name, file=f) 390 391 return exists 392 393 394def chmod_x(path: Path) -> None: 395 """Set the executable bit on a file or directory.""" 396 # https://stackoverflow.com/a/30463972/1122351 397 mode = os.stat(path).st_mode 398 mode |= (mode & 0o444) >> 2 # copy R bits to X 399 os.chmod(path, mode) 400 401 402class PreImage: 403 """An action to run before building a Docker image. 404 405 Args: 406 rd: The `RepositoryDetails` for the repository. 407 path: The path to the `Image` associated with this action. 408 """ 409 410 def __init__(self, rd: RepositoryDetails, path: Path): 411 self.rd = rd 412 self.path = path 413 414 @classmethod 415 def prepare_batch(cls, instances: list["PreImage"]) -> Any: 416 """Prepare a batch of actions. 417 418 This is useful for `PreImage` actions that are more efficient when 419 their actions are applied to several images in bulk. 420 421 Returns an arbitrary output that is passed to `PreImage.run`. 422 """ 423 pass 424 425 def run(self, prep: Any) -> None: 426 """Perform the action. 427 428 Args: 429 prep: Any prep work returned by `prepare_batch`. 430 """ 431 pass 432 433 def inputs(self) -> set[str]: 434 """Return the files which are considered inputs to the action.""" 435 raise NotImplementedError 436 437 def extra(self) -> str: 438 """Returns additional data for incorporation in the fingerprint.""" 439 return "" 440 441 442class Copy(PreImage): 443 """A `PreImage` action which copies files from a directory. 444 445 See doc/developer/mzbuild.md for an explanation of the user-facing 446 parameters. 447 """ 448 449 def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]): 450 super().__init__(rd, path) 451 452 self.source = config.pop("source", None) 453 if self.source is None: 454 raise ValueError("mzbuild config is missing 'source' argument") 455 456 self.destination = config.pop("destination", None) 457 if self.destination is None: 458 raise ValueError("mzbuild config is missing 'destination' argument") 459 460 self.matching = config.pop("matching", "*") 461 462 def run(self, prep: Any) -> None: 463 super().run(prep) 464 for src in self.inputs(): 465 dst = self.path / self.destination / src 466 dst.parent.mkdir(parents=True, exist_ok=True) 467 shutil.copy(self.rd.root / self.source / src, dst) 468 469 def inputs(self) -> set[str]: 470 return set(git.expand_globs(self.rd.root / self.source, self.matching)) 471 472 473class CargoPreImage(PreImage): 474 """A `PreImage` action that uses Cargo.""" 475 476 @staticmethod 477 @cache 478 def _cargo_shared_inputs() -> frozenset[str]: 479 """Resolve shared Cargo inputs once and cache the result. 480 481 This expands the 'ci/builder' directory glob and filters out 482 non-existent files like '.cargo/config', avoiding repeated 483 git subprocess calls in fingerprint(). 484 """ 485 inputs: set[str] = set() 486 inputs |= git.expand_globs(Path("."), "ci/builder/**") 487 inputs.add("Cargo.toml") 488 inputs.add("Cargo.lock") 489 if Path(".cargo/config").exists(): 490 inputs.add(".cargo/config") 491 return frozenset(inputs) 492 493 def inputs(self) -> set[str]: 494 return set(CargoPreImage._cargo_shared_inputs()) 495 496 def extra(self) -> str: 497 # Cargo images depend on the release mode and whether 498 # coverage/sanitizer is enabled. 499 flags: list[str] = [] 500 if self.rd.profile == Profile.RELEASE: 501 flags += "release" 502 if self.rd.profile == Profile.OPTIMIZED: 503 flags += "optimized" 504 if self.rd.coverage: 505 flags += "coverage" 506 if self.rd.sanitizer != Sanitizer.none: 507 flags += self.rd.sanitizer.value 508 flags.sort() 509 return ",".join(flags) 510 511 512class CargoBuild(CargoPreImage): 513 """A `PreImage` action that builds a single binary with Cargo. 514 515 See doc/developer/mzbuild.md for an explanation of the user-facing 516 parameters. 517 """ 518 519 def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]): 520 super().__init__(rd, path) 521 bin = config.pop("bin", []) 522 self.bins = bin if isinstance(bin, list) else [bin] 523 example = config.pop("example", []) 524 self.examples = example if isinstance(example, list) else [example] 525 self.strip = config.pop("strip", True) 526 self.extract = config.pop("extract", {}) 527 features = config.pop("features", []) 528 self.features = features if isinstance(features, list) else [features] 529 530 if len(self.bins) == 0 and len(self.examples) == 0: 531 raise ValueError("mzbuild config is missing pre-build target") 532 533 @staticmethod 534 def generate_cargo_build_command( 535 rd: RepositoryDetails, 536 bins: list[str], 537 examples: list[str], 538 features: list[str] | None = None, 539 ) -> list[str]: 540 rustflags = ( 541 rustc_flags.coverage 542 if rd.coverage 543 else ( 544 rustc_flags.sanitizer[rd.sanitizer] 545 if rd.sanitizer != Sanitizer.none 546 else ["--cfg=tokio_unstable"] 547 ) 548 ) 549 cflags = ( 550 [ 551 f"--target={target(rd.arch)}", 552 f"--gcc-toolchain=/opt/x-tools/{target(rd.arch)}/", 553 "-fuse-ld=lld", 554 f"--sysroot=/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/sysroot", 555 f"-L/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/lib64", 556 ] 557 + rustc_flags.sanitizer_cflags[rd.sanitizer] 558 if rd.sanitizer != Sanitizer.none 559 else [] 560 ) 561 extra_env = ( 562 { 563 "CFLAGS": " ".join(cflags), 564 "CXXFLAGS": " ".join(cflags), 565 "LDFLAGS": " ".join(cflags), 566 "CXXSTDLIB": "stdc++", 567 "CC": "cc", 568 "CXX": "c++", 569 "CPP": "clang-cpp-18", 570 "CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER": "cc", 571 "CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER": "cc", 572 "PATH": f"/sanshim:/opt/x-tools/{target(rd.arch)}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 573 "TSAN_OPTIONS": "report_bugs=0", # build-scripts fail 574 } 575 if rd.sanitizer != Sanitizer.none 576 else {} 577 ) 578 579 cargo_build = rd.build( 580 "build", channel=None, rustflags=rustflags, extra_env=extra_env 581 ) 582 583 packages = set() 584 for bin in bins: 585 cargo_build.extend(["--bin", bin]) 586 packages.add(rd.cargo_workspace.crate_for_bin(bin).name) 587 for example in examples: 588 cargo_build.extend(["--example", example]) 589 packages.add(rd.cargo_workspace.crate_for_example(example).name) 590 cargo_build.extend(f"--package={p}" for p in packages) 591 592 if rd.profile == Profile.RELEASE: 593 cargo_build.append("--release") 594 if rd.profile == Profile.OPTIMIZED: 595 cargo_build.extend(["--profile", "optimized"]) 596 if rd.sanitizer != Sanitizer.none: 597 # ASan doesn't work with jemalloc 598 cargo_build.append("--no-default-features") 599 # Uses more memory, so reduce the number of jobs 600 cargo_build.extend( 601 ["--jobs", str(round(multiprocessing.cpu_count() * 2 / 3))] 602 ) 603 if features: 604 cargo_build.append(f"--features={','.join(features)}") 605 606 return cargo_build 607 608 @classmethod 609 def prepare_batch(cls, cargo_builds: list["PreImage"]) -> dict[str, Any]: 610 super().prepare_batch(cargo_builds) 611 612 if not cargo_builds: 613 return {} 614 615 # Building all binaries and examples in the same `cargo build` command 616 # allows Cargo to link in parallel with other work, which can 617 # meaningfully speed up builds. 618 619 rd: RepositoryDetails | None = None 620 builds = cast(list[CargoBuild], cargo_builds) 621 bins = set() 622 examples = set() 623 features = set() 624 for build in builds: 625 if not rd: 626 rd = build.rd 627 bins.update(build.bins) 628 examples.update(build.examples) 629 features.update(build.features) 630 assert rd 631 632 ui.section(f"Common build for: {', '.join(bins | examples)}") 633 634 cargo_build = cls.generate_cargo_build_command( 635 rd, list(bins), list(examples), list(features) if features else None 636 ) 637 638 run_and_detect_rust_incremental_build_failure(cargo_build, cwd=rd.root) 639 640 # Re-run with JSON-formatted messages and capture the output so we can 641 # later analyze the build artifacts in `run`. This should be nearly 642 # instantaneous since we just compiled above with the same crates and 643 # features. (We don't want to do the compile above with JSON-formatted 644 # messages because it wouldn't be human readable.) 645 json_output = spawn.capture( 646 cargo_build + ["--message-format=json"], 647 cwd=rd.root, 648 ) 649 prep = {"cargo": json_output} 650 651 return prep 652 653 def build(self, build_output: dict[str, Any]) -> None: 654 cargo_profile = ( 655 "release" 656 if self.rd.profile == Profile.RELEASE 657 else "optimized" if self.rd.profile == Profile.OPTIMIZED else "debug" 658 ) 659 660 def copy(src: Path, relative_dst: Path) -> None: 661 exe_path = self.path / relative_dst 662 exe_path.parent.mkdir(parents=True, exist_ok=True) 663 shutil.copy(src, exe_path) 664 665 if self.strip: 666 # The debug information is large enough that it slows down CI, 667 # since we're packaging these binaries up into Docker images and 668 # shipping them around. 669 spawn.runv( 670 [*self.rd.tool("strip"), "--strip-debug", exe_path], 671 cwd=self.rd.root, 672 ) 673 else: 674 # Even if we've been asked not to strip the binary, remove the 675 # `.debug_pubnames` and `.debug_pubtypes` sections. These are just 676 # indexes that speed up launching a debugger against the binary, 677 # and we're happy to have slower debugger start up in exchange for 678 # smaller binaries. Plus the sections have been obsoleted by a 679 # `.debug_names` section in DWARF 5, and so debugger support for 680 # `.debug_pubnames`/`.debug_pubtypes` is minimal anyway. 681 # See: https://github.com/rust-lang/rust/issues/46034 682 spawn.runv( 683 [ 684 *self.rd.tool("objcopy"), 685 "-R", 686 ".debug_pubnames", 687 "-R", 688 ".debug_pubtypes", 689 exe_path, 690 ], 691 cwd=self.rd.root, 692 ) 693 694 for bin in self.bins: 695 src_path = self.rd.cargo_target_dir() / cargo_profile / bin 696 copy(src_path, bin) 697 for example in self.examples: 698 src_path = ( 699 self.rd.cargo_target_dir() / cargo_profile / Path("examples") / example 700 ) 701 copy(src_path, Path("examples") / example) 702 703 if self.extract: 704 cargo_build_json_output = build_output["cargo"] 705 706 target_dir = self.rd.cargo_target_dir() 707 for line in cargo_build_json_output.split("\n"): 708 if line.strip() == "" or not line.startswith("{"): 709 continue 710 message = json.loads(line) 711 if message["reason"] != "build-script-executed": 712 continue 713 out_dir = self.rd.rewrite_builder_path_for_host( 714 Path(message["out_dir"]) 715 ) 716 if not out_dir.is_relative_to(target_dir): 717 # Some crates are built for both the host and the target. 718 # Ignore the built-for-host out dir. 719 continue 720 # parse the package name from a package_id that looks like one of: 721 # git+https://github.com/MaterializeInc/rust-server-sdk#launchdarkly-server-sdk@1.0.0 722 # path+file:///Users/roshan/materialize/src/catalog#mz-catalog@0.0.0 723 # registry+https://github.com/rust-lang/crates.io-index#num-rational@0.4.0 724 # file:///path/to/my-package#0.1.0 725 package_id = message["package_id"] 726 if "@" in package_id: 727 package = package_id.split("@")[0].split("#")[-1] 728 else: 729 package = message["package_id"].split("#")[0].split("/")[-1] 730 for src, dst in self.extract.get(package, {}).items(): 731 spawn.runv(["cp", "-R", out_dir / src, self.path / dst]) 732 733 self.acquired = True 734 735 def run(self, prep: dict[str, Any]) -> None: 736 super().run(prep) 737 self.build(prep) 738 739 @cache 740 def inputs(self) -> set[str]: 741 deps = set() 742 743 for bin in self.bins: 744 crate = self.rd.cargo_workspace.crate_for_bin(bin) 745 deps |= self.rd.cargo_workspace.transitive_path_dependencies(crate) 746 for example in self.examples: 747 crate = self.rd.cargo_workspace.crate_for_example(example) 748 deps |= self.rd.cargo_workspace.transitive_path_dependencies( 749 crate, dev=True 750 ) 751 752 inputs = super().inputs() | set(inp for dep in deps for inp in dep.inputs()) 753 return inputs 754 755 756class Image: 757 """A Docker image whose build and dependencies are managed by mzbuild. 758 759 An image corresponds to a directory in a repository that contains a 760 `mzbuild.yml` file. This directory is called an "mzbuild context." 761 762 Attributes: 763 name: The name of the image. 764 publish: Whether the image should be pushed to Docker Hub. 765 depends_on: The names of the images upon which this image depends. 766 root: The path to the root of the associated `Repository`. 767 path: The path to the directory containing the `mzbuild.yml` 768 configuration file. 769 pre_images: Optional actions to perform before running `docker build`. 770 build_args: An optional list of --build-arg to pass to the dockerfile 771 """ 772 773 _DOCKERFILE_MZFROM_RE = re.compile(rb"^MZFROM\s*(\S+)") 774 775 _context_files_cache: set[str] | None 776 777 def __init__(self, rd: RepositoryDetails, path: Path): 778 self.rd = rd 779 self.path = path 780 self._context_files_cache = None 781 self.pre_images: list[PreImage] = [] 782 with open(self.path / "mzbuild.yml") as f: 783 data = yaml.safe_load(f) 784 self.name: str = data.pop("name") 785 self.publish: bool = data.pop("publish", True) 786 self.description: str | None = data.pop("description", None) 787 self.mainline: bool = data.pop("mainline", True) 788 for pre_image in data.pop("pre-image", []): 789 typ = pre_image.pop("type", None) 790 if typ == "cargo-build": 791 self.pre_images.append(CargoBuild(self.rd, self.path, pre_image)) 792 elif typ == "copy": 793 self.pre_images.append(Copy(self.rd, self.path, pre_image)) 794 else: 795 raise ValueError( 796 f"mzbuild config in {self.path} has unknown pre-image type" 797 ) 798 self.build_args = data.pop("build-args", {}) 799 800 if re.search(r"[^A-Za-z0-9\-]", self.name): 801 raise ValueError( 802 f"mzbuild image name {self.name} contains invalid character; only alphanumerics and hyphens allowed" 803 ) 804 805 self.depends_on: list[str] = [] 806 with open(self.path / "Dockerfile", "rb") as f: 807 for line in f: 808 match = self._DOCKERFILE_MZFROM_RE.match(line) 809 if match: 810 self.depends_on.append(match.group(1).decode()) 811 812 def sync_description(self) -> None: 813 """Sync the description to Docker Hub if the image is publishable 814 and a README.md file exists.""" 815 816 if not self.publish: 817 ui.say(f"{self.name} is not publishable") 818 return 819 820 readme_path = self.path / "README.md" 821 has_readme = readme_path.exists() 822 if not has_readme: 823 ui.say(f"{self.name} has no README.md or description") 824 return 825 826 docker_config = os.getenv("DOCKER_CONFIG") 827 spawn.runv( 828 [ 829 "docker", 830 "pushrm", 831 f"--file={readme_path}", 832 *([f"--config={docker_config}/config.json"] if docker_config else []), 833 *([f"--short={self.description}"] if self.description else []), 834 self.docker_name(), 835 ] 836 ) 837 838 def docker_name(self, tag: str | None = None) -> str: 839 """Return the name of the image on Docker Hub at the given tag.""" 840 name = f"{self.rd.image_registry}/{self.rd.image_prefix}{self.name}" 841 if tag: 842 name += f":{tag}" 843 return name 844 845 846class ResolvedImage: 847 """An `Image` whose dependencies have been resolved. 848 849 Attributes: 850 image: The underlying `Image`. 851 acquired: Whether the image is available locally. 852 dependencies: A mapping from dependency name to `ResolvedImage` for 853 each of the images that `image` depends upon. 854 """ 855 856 def __init__(self, image: Image, dependencies: Iterable["ResolvedImage"]): 857 self.image = image 858 self.acquired = False 859 self.dependencies = {} 860 for d in dependencies: 861 self.dependencies[d.name] = d 862 863 def __repr__(self) -> str: 864 return f"ResolvedImage<{self.spec()}>" 865 866 @property 867 def name(self) -> str: 868 """The name of the underlying image.""" 869 return self.image.name 870 871 @property 872 def publish(self) -> bool: 873 """Whether the underlying image should be pushed to Docker Hub.""" 874 return self.image.publish 875 876 @cache 877 def spec(self) -> str: 878 """Return the "spec" for the image. 879 880 A spec is the unique identifier for the image given its current 881 fingerprint. It is a valid Docker Hub name. 882 """ 883 return self.image.docker_name(tag=f"mzbuild-{self.fingerprint()}") 884 885 def write_dockerfile(self) -> IO[bytes]: 886 """Render the Dockerfile without mzbuild directives. 887 888 Returns: 889 file: A handle to a temporary file containing the adjusted 890 Dockerfile.""" 891 with open(self.image.path / "Dockerfile", "rb") as f: 892 lines = f.readlines() 893 f = TemporaryFile() 894 for line in lines: 895 match = Image._DOCKERFILE_MZFROM_RE.match(line) 896 if match: 897 image = match.group(1).decode() 898 spec = self.dependencies[image].spec() 899 line = Image._DOCKERFILE_MZFROM_RE.sub(b"FROM %b" % spec.encode(), line) 900 f.write(line) 901 f.seek(0) 902 return f 903 904 def build(self, prep: dict[type[PreImage], Any], push: bool = False) -> None: 905 """Build the image from source. 906 907 Requires that the caller has already acquired all dependencies and 908 prepared all `PreImage` actions via `PreImage.prepare_batch`. 909 """ 910 ui.section(f"Building {self.spec()}") 911 spawn.runv(["git", "clean", "-ffdX", self.image.path]) 912 913 for pre_image in self.image.pre_images: 914 pre_image.run(prep[type(pre_image)]) 915 build_args = { 916 **self.image.build_args, 917 "BUILD_PROFILE": self.image.rd.profile.name, 918 "ARCH_GCC": str(self.image.rd.arch), 919 "ARCH_GO": self.image.rd.arch.go_str(), 920 "CI_SANITIZER": str(self.image.rd.sanitizer), 921 } 922 f = self.write_dockerfile() 923 924 try: 925 spawn.capture(["docker", "buildx", "version"]) 926 except subprocess.CalledProcessError: 927 if push: 928 print( 929 "docker buildx not found, required to push images. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing" 930 ) 931 raise 932 print( 933 "docker buildx not found, you can install it to build faster. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing" 934 ) 935 print("Falling back to docker build") 936 cmd: Sequence[str] = [ 937 "docker", 938 "build", 939 "-f", 940 "-", 941 *(f"--build-arg={k}={v}" for k, v in build_args.items()), 942 "-t", 943 self.spec(), 944 f"--platform=linux/{self.image.rd.arch.go_str()}", 945 str(self.image.path), 946 ] 947 else: 948 cmd: Sequence[str] = [ 949 "docker", 950 "buildx", 951 "build", 952 "--progress=plain", # less noisy 953 "-f", 954 "-", 955 *(f"--build-arg={k}={v}" for k, v in build_args.items()), 956 "-t", 957 f"docker.io/{self.spec()}", 958 "-t", 959 f"ghcr.io/materializeinc/{self.spec()}", 960 f"--platform=linux/{self.image.rd.arch.go_str()}", 961 str(self.image.path), 962 *(["--push"] if push else ["--load"]), 963 ] 964 965 if token := os.getenv("GITHUB_GHCR_TOKEN"): 966 spawn.runv( 967 [ 968 "docker", 969 "login", 970 "ghcr.io", 971 "-u", 972 "materialize-bot", 973 "--password-stdin", 974 ], 975 stdin=token.encode(), 976 ) 977 978 spawn.runv(cmd, stdin=f, stdout=sys.stderr.buffer) 979 980 def try_pull(self, max_retries: int) -> bool: 981 """Download the image if it does not exist locally. Returns whether it was found.""" 982 ui.header(f"Acquiring {self.spec()}") 983 command = ["docker", "pull"] 984 # --quiet skips printing the progress bar, which does not display well in CI. 985 if ui.env_is_truthy("CI"): 986 command.append("--quiet") 987 command.append(self.spec()) 988 if not self.acquired: 989 sleep_time = 1 990 for retry in range(1, max_retries + 1): 991 try: 992 spawn.runv( 993 command, 994 stdin=subprocess.DEVNULL, 995 stdout=sys.stderr.buffer, 996 ) 997 self.acquired = True 998 break 999 except subprocess.CalledProcessError: 1000 if retry < max_retries: 1001 # There seems to be no good way to tell what error 1002 # happened based on error code 1003 # (https://github.com/docker/cli/issues/538) and we 1004 # want to print output directly to terminal. 1005 if build := os.getenv("CI_WAITING_FOR_BUILD"): 1006 for retry in range(max_retries): 1007 try: 1008 build_status = buildkite.get_build_status(build) 1009 except subprocess.CalledProcessError: 1010 time.sleep(sleep_time) 1011 sleep_time = min(sleep_time * 2, 10) 1012 break 1013 print(f"Build {build} status: {build_status}") 1014 if build_status == "failed": 1015 print( 1016 f"Build {build} has been marked as failed, exiting hard" 1017 ) 1018 sys.exit(1) 1019 elif build_status == "success": 1020 break 1021 assert ( 1022 build_status == "pending" 1023 ), f"Unknown build status {build_status}" 1024 time.sleep(1) 1025 else: 1026 print(f"Retrying in {sleep_time}s ...") 1027 time.sleep(sleep_time) 1028 sleep_time = min(sleep_time * 2, 10) 1029 continue 1030 else: 1031 break 1032 return self.acquired 1033 1034 def is_published_if_necessary(self) -> bool: 1035 """Report whether the image exists on DockerHub & GHCR if it is publishable.""" 1036 if not self.publish: 1037 return False 1038 spec = self.spec() 1039 if spec.startswith(GHCR_PREFIX): 1040 spec = spec.removeprefix(GHCR_PREFIX) 1041 ghcr_spec = f"{GHCR_PREFIX}{spec}" 1042 if is_docker_image_pushed(spec) and is_ghcr_image_pushed(ghcr_spec): 1043 ui.say(f"{spec} already exists") 1044 return True 1045 return False 1046 1047 def run( 1048 self, 1049 args: list[str] = [], 1050 docker_args: list[str] = [], 1051 env: dict[str, str] = {}, 1052 ) -> None: 1053 """Run a command in the image. 1054 1055 Creates a container from the image and runs the command described by 1056 `args` in the image. 1057 """ 1058 envs = [] 1059 for key, val in env.items(): 1060 envs.extend(["--env", f"{key}={val}"]) 1061 spawn.runv( 1062 [ 1063 "docker", 1064 "run", 1065 "--tty", 1066 "--rm", 1067 *envs, 1068 "--init", 1069 *docker_args, 1070 self.spec(), 1071 *args, 1072 ], 1073 ) 1074 1075 def list_dependencies(self, transitive: bool = False) -> set[str]: 1076 out = set() 1077 for dep in self.dependencies.values(): 1078 out.add(dep.name) 1079 if transitive: 1080 out |= dep.list_dependencies(transitive) 1081 return out 1082 1083 @cache 1084 def inputs(self, transitive: bool = False) -> set[str]: 1085 """List the files tracked as inputs to the image. 1086 1087 These files are used to compute the fingerprint for the image. See 1088 `ResolvedImage.fingerprint` for details. 1089 1090 Returns: 1091 inputs: A list of input files, relative to the root of the 1092 repository. 1093 """ 1094 if self.image._context_files_cache is not None: 1095 paths = set(self.image._context_files_cache) 1096 else: 1097 paths = set(git.expand_globs(self.image.rd.root, f"{self.image.path}/**")) 1098 if not paths: 1099 # While we could find an `mzbuild.yml` file for this service, expland_globs didn't 1100 # return any files that matched this service. At the very least, the `mzbuild.yml` 1101 # file itself should have been returned. We have a bug if paths is empty. 1102 raise AssertionError( 1103 f"{self.image.name} mzbuild exists but its files are unknown to git" 1104 ) 1105 for pre_image in self.image.pre_images: 1106 paths |= pre_image.inputs() 1107 if transitive: 1108 for dep in self.dependencies.values(): 1109 paths |= dep.inputs(transitive) 1110 return paths 1111 1112 @cache 1113 def fingerprint(self) -> Fingerprint: 1114 """Fingerprint the inputs to the image. 1115 1116 Compute the fingerprint of the image. Changing the contents of any of 1117 the files or adding or removing files to the image will change the 1118 fingerprint, as will modifying the inputs to any of its dependencies. 1119 1120 The image considers all non-gitignored files in its mzbuild context to 1121 be inputs. If it has a pre-image action, that action may add additional 1122 inputs via `PreImage.inputs`. 1123 """ 1124 self_hash = hashlib.sha1() 1125 # When inputs come from precomputed sources (crate and image context 1126 # batching + resolved CargoPreImage paths), they are already individual 1127 # file paths from git. Skip the expensive expand_globs subprocess calls. 1128 inputs = self.inputs() 1129 if self.image._context_files_cache is not None: 1130 resolved_inputs = sorted(inputs) 1131 else: 1132 resolved_inputs = sorted(set(git.expand_globs(self.image.rd.root, *inputs))) 1133 for rel_path in resolved_inputs: 1134 abs_path = self.image.rd.root / rel_path 1135 file_hash = hashlib.sha1() 1136 raw_file_mode = os.lstat(abs_path).st_mode 1137 # Compute a simplified file mode using the same rules as Git. 1138 # https://github.com/git/git/blob/3bab5d562/Documentation/git-fast-import.txt#L610-L616 1139 if stat.S_ISLNK(raw_file_mode): 1140 file_mode = 0o120000 1141 elif raw_file_mode & stat.S_IXUSR: 1142 file_mode = 0o100755 1143 else: 1144 file_mode = 0o100644 1145 with open(abs_path, "rb") as f: 1146 file_hash.update(f.read()) 1147 self_hash.update(file_mode.to_bytes(2, byteorder="big")) 1148 self_hash.update(rel_path.encode()) 1149 self_hash.update(file_hash.digest()) 1150 self_hash.update(b"\0") 1151 1152 for pre_image in self.image.pre_images: 1153 self_hash.update(pre_image.extra().encode()) 1154 self_hash.update(b"\0") 1155 1156 self_hash.update(f"profile={self.image.rd.profile}".encode()) 1157 self_hash.update(f"arch={self.image.rd.arch}".encode()) 1158 self_hash.update(f"coverage={self.image.rd.coverage}".encode()) 1159 self_hash.update(f"sanitizer={self.image.rd.sanitizer}".encode()) 1160 # This exists to make sure all hashes from before we had a GHCR mirror are invalidated, so that we rebuild when an image doesn't exist on GHCR yet 1161 self_hash.update(b"mirror=ghcr") 1162 1163 full_hash = hashlib.sha1() 1164 full_hash.update(self_hash.digest()) 1165 for dep in sorted(self.dependencies.values(), key=lambda d: d.name): 1166 full_hash.update(dep.name.encode()) 1167 full_hash.update(dep.fingerprint()) 1168 full_hash.update(b"\0") 1169 1170 return Fingerprint(full_hash.digest()) 1171 1172 1173class DependencySet: 1174 """A set of `ResolvedImage`s. 1175 1176 Iterating over a dependency set yields the contained images in an arbitrary 1177 order. Indexing a dependency set yields the image with the specified name. 1178 """ 1179 1180 def __init__(self, dependencies: Iterable[Image]): 1181 """Construct a new `DependencySet`. 1182 1183 The provided `dependencies` must be topologically sorted. 1184 """ 1185 self._dependencies: dict[str, ResolvedImage] = {} 1186 known_images = docker_images() 1187 for d in dependencies: 1188 image = ResolvedImage( 1189 image=d, 1190 dependencies=(self._dependencies[d0] for d0 in d.depends_on), 1191 ) 1192 image.acquired = image.spec() in known_images 1193 self._dependencies[d.name] = image 1194 1195 def _prepare_batch(self, images: list[ResolvedImage]) -> dict[type[PreImage], Any]: 1196 pre_images = collections.defaultdict(list) 1197 for image in images: 1198 for pre_image in image.image.pre_images: 1199 pre_images[type(pre_image)].append(pre_image) 1200 pre_image_prep = {} 1201 for cls, instances in pre_images.items(): 1202 pre_image = cast(PreImage, cls) 1203 pre_image_prep[cls] = pre_image.prepare_batch(instances) 1204 return pre_image_prep 1205 1206 def acquire(self, max_retries: int | None = None) -> None: 1207 """Download or build all of the images in the dependency set that do not 1208 already exist locally. 1209 1210 Args: 1211 max_retries: Number of retries on failure. 1212 """ 1213 1214 # Only retry in CI runs since we struggle with flaky docker pulls there 1215 if not max_retries: 1216 max_retries = ( 1217 90 1218 if os.getenv("CI_WAITING_FOR_BUILD") 1219 else ( 1220 5 1221 if ui.env_is_truthy("CI") 1222 and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD") 1223 else 1 1224 ) 1225 ) 1226 assert max_retries > 0 1227 1228 deps_to_check = [dep for dep in self if dep.publish] 1229 deps_to_build = [dep for dep in self if not dep.publish] 1230 if len(deps_to_check): 1231 with ThreadPoolExecutor(max_workers=len(deps_to_check)) as executor: 1232 futures = [ 1233 executor.submit(dep.try_pull, max_retries) for dep in deps_to_check 1234 ] 1235 for dep, future in zip(deps_to_check, futures): 1236 try: 1237 if not future.result(): 1238 deps_to_build.append(dep) 1239 except Exception: 1240 deps_to_build.append(dep) 1241 1242 # Don't attempt to build in CI, as our timeouts and small machines won't allow it anyway 1243 if ui.env_is_truthy("CI") and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD"): 1244 expected_deps = [dep for dep in deps_to_build if dep.publish] 1245 if expected_deps: 1246 print( 1247 f"+++ Expected builds to be available, the build probably failed, so not proceeding: {expected_deps}" 1248 ) 1249 sys.exit(5) 1250 1251 prep = self._prepare_batch(deps_to_build) 1252 for dep in deps_to_build: 1253 dep.build(prep) 1254 1255 def ensure(self, pre_build: Callable[[list[ResolvedImage]], None] | None = None): 1256 """Ensure all publishable images in this dependency set exist on Docker 1257 Hub. 1258 1259 Images are pushed using their spec as their tag. 1260 1261 Args: 1262 pre_build: A callback to invoke with all dependency that are going 1263 to be built locally, invoked after their cargo build is 1264 done, but before the Docker images are build and 1265 uploaded to DockerHub. 1266 """ 1267 num_deps = len(list(self)) 1268 if not num_deps: 1269 deps_to_build = [] 1270 else: 1271 with ThreadPoolExecutor(max_workers=num_deps) as executor: 1272 futures = list( 1273 executor.map( 1274 lambda dep: (dep, not dep.is_published_if_necessary()), self 1275 ) 1276 ) 1277 1278 deps_to_build = [dep for dep, should_build in futures if should_build] 1279 1280 prep = self._prepare_batch(deps_to_build) 1281 if pre_build: 1282 pre_build(deps_to_build) 1283 lock = Lock() 1284 built_deps: set[str] = set([dep.name for dep in self]) - set( 1285 [dep.name for dep in deps_to_build] 1286 ) 1287 1288 def build_dep(dep): 1289 end_time = time.time() + 600 1290 while True: 1291 if time.time() > end_time: 1292 raise TimeoutError( 1293 f"Timed out in {dep.name} waiting for {[dep2.name for dep2 in dep.dependencies if dep2 not in built_deps]}" 1294 ) 1295 with lock: 1296 if all(dep2 in built_deps for dep2 in dep.dependencies): 1297 break 1298 time.sleep(0.01) 1299 for attempts_remaining in reversed(range(3)): 1300 try: 1301 dep.build(prep, push=dep.publish) 1302 with lock: 1303 built_deps.add(dep.name) 1304 break 1305 except Exception: 1306 if not dep.publish or attempts_remaining == 0: 1307 raise 1308 1309 if deps_to_build: 1310 with ThreadPoolExecutor(max_workers=len(deps_to_build)) as executor: 1311 futures = [executor.submit(build_dep, dep) for dep in deps_to_build] 1312 for future in as_completed(futures): 1313 future.result() 1314 1315 def check(self) -> bool: 1316 """Check all publishable images in this dependency set exist on Docker 1317 Hub. Don't try to download or build them.""" 1318 num_deps = len(list(self)) 1319 if num_deps == 0: 1320 return True 1321 with ThreadPoolExecutor(max_workers=num_deps) as executor: 1322 results = list( 1323 executor.map(lambda dep: dep.is_published_if_necessary(), list(self)) 1324 ) 1325 return all(results) 1326 1327 def __iter__(self) -> Iterator[ResolvedImage]: 1328 return iter(self._dependencies.values()) 1329 1330 def __getitem__(self, key: str) -> ResolvedImage: 1331 return self._dependencies[key] 1332 1333 1334class Repository: 1335 """A collection of mzbuild `Image`s. 1336 1337 Creating a repository will walk the filesystem beneath `root` to 1338 automatically discover all contained `Image`s. 1339 1340 Iterating over a repository yields the contained images in an arbitrary 1341 order. 1342 1343 Args: 1344 root: The path to the root of the repository. 1345 arch: The CPU architecture to build for. 1346 profile: What profile to build the repository in. 1347 coverage: Whether to enable code coverage instrumentation. 1348 sanitizer: Whether to a sanitizer (address, thread, leak, memory, none) 1349 image_registry: The Docker image registry to pull images from and push 1350 images to. 1351 image_prefix: A prefix to apply to all Docker image names. 1352 1353 Attributes: 1354 images: A mapping from image name to `Image` for all contained images. 1355 compose_dirs: The set of directories containing a `mzcompose.py` file. 1356 """ 1357 1358 def __init__( 1359 self, 1360 root: Path, 1361 arch: Arch = Arch.host(), 1362 profile: Profile = ( 1363 Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED 1364 ), 1365 coverage: bool = False, 1366 sanitizer: Sanitizer = Sanitizer.none, 1367 image_registry: str = image_registry(), 1368 image_prefix: str = "", 1369 ): 1370 self.rd = RepositoryDetails( 1371 root, 1372 arch, 1373 profile, 1374 coverage, 1375 sanitizer, 1376 image_registry, 1377 image_prefix, 1378 ) 1379 self.images: dict[str, Image] = {} 1380 self.compositions: dict[str, Path] = {} 1381 for path, dirs, files in os.walk(self.root, topdown=True): 1382 if path == str(root / "misc"): 1383 dirs.remove("python") 1384 # Filter out some particularly massive ignored directories to keep 1385 # things snappy. Not required for correctness. 1386 dirs[:] = set(dirs) - { 1387 ".git", 1388 ".mypy_cache", 1389 "target", 1390 "target-ra", 1391 "target-xcompile", 1392 "mzdata", 1393 "node_modules", 1394 "venv", 1395 } 1396 if "mzbuild.yml" in files: 1397 image = Image(self.rd, Path(path)) 1398 if not image.name: 1399 raise ValueError(f"config at {path} missing name") 1400 if image.name in self.images: 1401 raise ValueError(f"image {image.name} exists twice") 1402 self.images[image.name] = image 1403 if "mzcompose.py" in files: 1404 name = Path(path).name 1405 if name in self.compositions: 1406 raise ValueError(f"composition {name} exists twice") 1407 self.compositions[name] = Path(path) 1408 1409 # Validate dependencies. 1410 for image in self.images.values(): 1411 for d in image.depends_on: 1412 if d not in self.images: 1413 raise ValueError( 1414 f"image {image.name} depends on non-existent image {d}" 1415 ) 1416 1417 @staticmethod 1418 def install_arguments(parser: argparse.ArgumentParser) -> None: 1419 """Install options to configure a repository into an argparse parser. 1420 1421 This function installs the following options: 1422 1423 * The mutually-exclusive `--dev`/`--optimized`/`--release` options to control the 1424 `profile` repository attribute. 1425 * The `--coverage` boolean option to control the `coverage` repository 1426 attribute. 1427 1428 Use `Repository.from_arguments` to construct a repository from the 1429 parsed command-line arguments. 1430 """ 1431 build_mode = parser.add_mutually_exclusive_group() 1432 build_mode.add_argument( 1433 "--dev", 1434 action="store_true", 1435 help="build Rust binaries with the dev profile", 1436 ) 1437 build_mode.add_argument( 1438 "--release", 1439 action="store_true", 1440 help="build Rust binaries with the release profile (default)", 1441 ) 1442 build_mode.add_argument( 1443 "--optimized", 1444 action="store_true", 1445 help="build Rust binaries with the optimized profile (optimizations, no LTO, no debug symbols)", 1446 ) 1447 parser.add_argument( 1448 "--coverage", 1449 help="whether to enable code coverage compilation flags", 1450 default=ui.env_is_truthy("CI_COVERAGE_ENABLED"), 1451 action="store_true", 1452 ) 1453 parser.add_argument( 1454 "--sanitizer", 1455 help="whether to enable a sanitizer", 1456 default=Sanitizer[os.getenv("CI_SANITIZER", "none")], 1457 type=Sanitizer, 1458 choices=Sanitizer, 1459 ) 1460 parser.add_argument( 1461 "--arch", 1462 default=Arch.host(), 1463 help="the CPU architecture to build for", 1464 type=Arch, 1465 choices=Arch, 1466 ) 1467 parser.add_argument( 1468 "--image-registry", 1469 default=image_registry(), 1470 help="the Docker image registry to pull images from and push images to", 1471 ) 1472 parser.add_argument( 1473 "--image-prefix", 1474 default="", 1475 help="a prefix to apply to all Docker image names", 1476 ) 1477 1478 @classmethod 1479 def from_arguments(cls, root: Path, args: argparse.Namespace) -> "Repository": 1480 """Construct a repository from command-line arguments. 1481 1482 The provided namespace must contain the options installed by 1483 `Repository.install_arguments`. 1484 """ 1485 if args.release: 1486 profile = Profile.RELEASE 1487 elif args.optimized: 1488 profile = Profile.OPTIMIZED 1489 elif args.dev: 1490 profile = Profile.DEV 1491 else: 1492 profile = ( 1493 Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED 1494 ) 1495 1496 return cls( 1497 root, 1498 profile=profile, 1499 coverage=args.coverage, 1500 sanitizer=args.sanitizer, 1501 image_registry=args.image_registry, 1502 image_prefix=args.image_prefix, 1503 arch=args.arch, 1504 ) 1505 1506 @property 1507 def root(self) -> Path: 1508 """The path to the root directory for the repository.""" 1509 return self.rd.root 1510 1511 def resolve_dependencies(self, targets: Iterable[Image]) -> DependencySet: 1512 """Compute the dependency set necessary to build target images. 1513 1514 The dependencies of `targets` will be crawled recursively until the 1515 complete set of transitive dependencies is determined or a circular 1516 dependency is discovered. The returned dependency set will be sorted 1517 in topological order. 1518 1519 Raises: 1520 ValueError: A circular dependency was discovered in the images 1521 in the repository. 1522 """ 1523 # Pre-fetch all crate input files in a single batched git call, 1524 # replacing ~118 individual subprocess pairs with one pair. 1525 self.rd.cargo_workspace.precompute_crate_inputs() 1526 # Pre-fetch all image context files in a single batched git call, 1527 # replacing ~41 individual subprocess pairs with one pair. 1528 self._precompute_image_context_files() 1529 1530 resolved = OrderedDict() 1531 visiting = set() 1532 1533 def visit(image: Image, path: list[str] = []) -> None: 1534 if image.name in resolved: 1535 return 1536 if image.name in visiting: 1537 diagram = " -> ".join(path + [image.name]) 1538 raise ValueError(f"circular dependency in mzbuild: {diagram}") 1539 1540 visiting.add(image.name) 1541 for d in sorted(image.depends_on): 1542 visit(self.images[d], path + [image.name]) 1543 resolved[image.name] = image 1544 1545 for target_image in sorted(targets, key=lambda image: image.name): 1546 visit(target_image) 1547 1548 return DependencySet(resolved.values()) 1549 1550 def _precompute_image_context_files(self) -> None: 1551 """Pre-fetch all image context files in a single batched git call. 1552 1553 This replaces ~41 individual pairs of git subprocess calls (one per 1554 image) with a single pair, then partitions the results by image path. 1555 """ 1556 root = self.rd.root 1557 # Use paths relative to root for git specs and partitioning, since 1558 # git --relative outputs paths relative to cwd (root). Image paths 1559 # may be absolute when MZ_ROOT is an absolute path. 1560 image_rel_paths = sorted( 1561 set(str(img.path.relative_to(root)) for img in self.images.values()) 1562 ) 1563 specs = [f"{p}/**" for p in image_rel_paths] 1564 1565 empty_tree = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" 1566 diff_files = spawn.capture( 1567 ["git", "diff", "--name-only", "-z", "--relative", empty_tree, "--"] 1568 + specs, 1569 cwd=root, 1570 ) 1571 ls_files = spawn.capture( 1572 ["git", "ls-files", "--others", "--exclude-standard", "-z", "--"] + specs, 1573 cwd=root, 1574 ) 1575 all_files = set( 1576 f for f in (diff_files + ls_files).split("\0") if f.strip() != "" 1577 ) 1578 1579 # Partition files by image path (longest match first for nested paths) 1580 image_file_map: dict[str, set[str]] = {p: set() for p in image_rel_paths} 1581 sorted_paths = sorted(image_rel_paths, key=len, reverse=True) 1582 for f in all_files: 1583 for ip in sorted_paths: 1584 if f.startswith(ip + "/"): 1585 image_file_map[ip].add(f) 1586 break 1587 1588 for img in self.images.values(): 1589 rel = str(img.path.relative_to(root)) 1590 img._context_files_cache = image_file_map.get(rel, set()) 1591 1592 def __iter__(self) -> Iterator[Image]: 1593 return iter(self.images.values()) 1594 1595 1596def publish_multiarch_images( 1597 tag: str, dependency_sets: Iterable[Iterable[ResolvedImage]] 1598) -> None: 1599 """Publishes a set of docker images under a given tag.""" 1600 always_push_tags = ("latest", "unstable") 1601 if ghcr_token := os.getenv("GITHUB_GHCR_TOKEN"): 1602 spawn.runv( 1603 [ 1604 "docker", 1605 "login", 1606 "ghcr.io", 1607 "-u", 1608 "materialize-bot", 1609 "--password-stdin", 1610 ], 1611 stdin=ghcr_token.encode(), 1612 ) 1613 for images in zip(*dependency_sets): 1614 names = set(image.image.name for image in images) 1615 assert len(names) == 1, "dependency sets did not contain identical images" 1616 name = images[0].image.docker_name(tag) 1617 if tag in always_push_tags or not is_docker_image_pushed(name): 1618 spawn.runv( 1619 [ 1620 "docker", 1621 "manifest", 1622 "create", 1623 name, 1624 *(image.spec() for image in images), 1625 ] 1626 ) 1627 spawn.runv(["docker", "manifest", "push", name]) 1628 1629 ghcr_name = f"{GHCR_PREFIX}{name}" 1630 if ghcr_token and ( 1631 tag in always_push_tags or not is_ghcr_image_pushed(ghcr_name) 1632 ): 1633 spawn.runv( 1634 [ 1635 "docker", 1636 "manifest", 1637 "create", 1638 ghcr_name, 1639 *(f"{GHCR_PREFIX}{image.spec()}" for image in images), 1640 ] 1641 ) 1642 spawn.runv(["docker", "manifest", "push", ghcr_name]) 1643 print(f"--- Nofifying for tag {tag}") 1644 markdown = f"""Pushed images with Docker tag `{tag}`""" 1645 spawn.runv( 1646 [ 1647 "buildkite-agent", 1648 "annotate", 1649 "--style=info", 1650 f"--context=build-tags-{tag}", 1651 ], 1652 stdin=markdown.encode(), 1653 ) 1654 1655 1656def tag_multiarch_images( 1657 new_tag: str, previous_tag: str, dependency_sets: Iterable[Iterable[ResolvedImage]] 1658) -> None: 1659 """Publishes a set of docker images under a given tag.""" 1660 for images in zip(*dependency_sets): 1661 names = set(image.image.name for image in images) 1662 assert len(names) == 1, "dependency sets did not contain identical images" 1663 new_name = images[0].image.docker_name(new_tag) 1664 1665 # Doesn't have tagged images 1666 if images[0].image.name == "mz": 1667 continue 1668 1669 previous_name = images[0].image.docker_name(previous_tag) 1670 spawn.runv(["docker", "pull", previous_name]) 1671 spawn.runv(["docker", "tag", previous_name, new_name]) 1672 spawn.runv(["docker", "push", new_name]) 1673 print(f"--- Nofifying for tag {new_tag}") 1674 markdown = f"""Pushed images with Docker tag `{new_tag}`""" 1675 spawn.runv( 1676 [ 1677 "buildkite-agent", 1678 "annotate", 1679 "--style=info", 1680 f"--context=build-tags-{new_tag}", 1681 ], 1682 stdin=markdown.encode(), 1683 )
Common base class for all non-exit exceptions.
61def run_and_detect_rust_incremental_build_failure( 62 cmd: list[str], cwd: str | Path 63) -> subprocess.CompletedProcess: 64 """This function is complex since it prints out each line immediately to 65 stdout/stderr, but still records them at the same time so that we can scan 66 for known incremental build failures.""" 67 stdout_result = io.StringIO() 68 stderr_result = io.StringIO() 69 p = subprocess.Popen( 70 cmd, 71 stdout=subprocess.PIPE, 72 stderr=subprocess.PIPE, 73 text=True, 74 bufsize=1, 75 env={**os.environ, "CARGO_TERM_COLOR": "always", "RUSTC_COLOR": "always"}, 76 ) 77 78 sel = selectors.DefaultSelector() 79 sel.register(p.stdout, selectors.EVENT_READ) # type: ignore 80 sel.register(p.stderr, selectors.EVENT_READ) # type: ignore 81 assert p.stdout is not None 82 assert p.stderr is not None 83 os.set_blocking(p.stdout.fileno(), False) 84 os.set_blocking(p.stderr.fileno(), False) 85 running = True 86 while running: 87 for key, val in sel.select(): 88 output = io.StringIO() 89 running = False 90 while True: 91 new_output = key.fileobj.read(1024) # type: ignore 92 if not new_output: 93 break 94 output.write(new_output) 95 contents = output.getvalue() 96 output.close() 97 if not contents: 98 continue 99 # Keep running as long as stdout or stderr have any content 100 running = True 101 if key.fileobj is p.stdout: 102 print( 103 contents, 104 end="", 105 flush=True, 106 ) 107 stdout_result.write(contents) 108 else: 109 print( 110 contents, 111 end="", 112 file=sys.stderr, 113 flush=True, 114 ) 115 stderr_result.write(contents) 116 p.wait() 117 retcode = p.poll() 118 assert retcode is not None 119 stdout_contents = stdout_result.getvalue() 120 stdout_result.close() 121 stderr_contents = stderr_result.getvalue() 122 stderr_result.close() 123 if retcode: 124 incremental_build_failure_msgs = [ 125 "panicked at compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs", 126 "ld.lld: error: undefined symbol", 127 "signal: 11, SIGSEGV", 128 ] 129 combined = stdout_contents + stderr_contents 130 if any(msg in combined for msg in incremental_build_failure_msgs): 131 raise RustIncrementalBuildFailure() 132 133 raise subprocess.CalledProcessError( 134 retcode, p.args, output=stdout_contents, stderr=stderr_contents 135 ) 136 return subprocess.CompletedProcess( 137 p.args, retcode, stdout_contents, stderr_contents 138 )
This function is complex since it prints out each line immediately to stdout/stderr, but still records them at the same time so that we can scan for known incremental build failures.
141class Fingerprint(bytes): 142 """A SHA-1 hash of the inputs to an `Image`. 143 144 The string representation uses base32 encoding to distinguish mzbuild 145 fingerprints from Git's hex encoded SHA-1 hashes while still being 146 URL safe. 147 """ 148 149 def __str__(self) -> str: 150 return base64.b32encode(self).decode()
A SHA-1 hash of the inputs to an Image.
The string representation uses base32 encoding to distinguish mzbuild fingerprints from Git's hex encoded SHA-1 hashes while still being URL safe.
159class RepositoryDetails: 160 """Immutable details about a `Repository`. 161 162 Used internally by mzbuild. 163 164 Attributes: 165 root: The path to the root of the repository. 166 arch: The CPU architecture to build for. 167 profile: What profile the repository is being built with. 168 coverage: Whether the repository has code coverage instrumentation 169 enabled. 170 sanitizer: Whether to use a sanitizer (address, hwaddress, cfi, thread, leak, memory, none) 171 cargo_workspace: The `cargo.Workspace` associated with the repository. 172 image_registry: The Docker image registry to pull images from and push 173 images to. 174 image_prefix: A prefix to apply to all Docker image names. 175 """ 176 177 def __init__( 178 self, 179 root: Path, 180 arch: Arch, 181 profile: Profile, 182 coverage: bool, 183 sanitizer: Sanitizer, 184 image_registry: str, 185 image_prefix: str, 186 ): 187 self.root = root 188 self.arch = arch 189 self.profile = profile 190 self.coverage = coverage 191 self.sanitizer = sanitizer 192 self.cargo_workspace = cargo.Workspace(root) 193 self.image_registry = image_registry 194 self.image_prefix = image_prefix 195 196 def build( 197 self, 198 subcommand: str, 199 rustflags: list[str], 200 channel: str | None = None, 201 extra_env: dict[str, str] = {}, 202 ) -> list[str]: 203 """Start a build invocation for the configured architecture.""" 204 return xcompile.cargo( 205 arch=self.arch, 206 channel=channel, 207 subcommand=subcommand, 208 rustflags=rustflags, 209 extra_env=extra_env, 210 ) 211 212 def tool(self, name: str) -> list[str]: 213 """Start a binutils tool invocation for the configured architecture.""" 214 if platform.system() != "Linux": 215 # We can't use the local tools from macOS to build a Linux executable 216 return ["bin/ci-builder", "run", "stable", name] 217 # If we're on Linux, trust that the tools are installed instead of 218 # loading the slow ci-builder. If you don't have compilation tools 219 # installed you can still run `bin/ci-builder run stable 220 # bin/mzcompose ...`, and most likely the Cargo build will already 221 # fail earlier if you don't have compilation tools installed and 222 # run without the ci-builder. 223 return [name] 224 225 def cargo_target_dir(self) -> Path: 226 """Determine the path to the target directory for Cargo.""" 227 return self.root / "target-xcompile" / xcompile.target(self.arch) 228 229 def rewrite_builder_path_for_host(self, path: Path) -> Path: 230 """Rewrite a path that is relative to the target directory inside the 231 builder to a path that is relative to the target directory on the host. 232 233 If path does is not relative to the target directory inside the builder, 234 it is returned unchanged. 235 """ 236 builder_target_dir = Path("/mnt/build") / xcompile.target(self.arch) 237 try: 238 return self.cargo_target_dir() / path.relative_to(builder_target_dir) 239 except ValueError: 240 return path
Immutable details about a Repository.
Used internally by mzbuild.
Attributes:
root: The path to the root of the repository.
arch: The CPU architecture to build for.
profile: What profile the repository is being built with.
coverage: Whether the repository has code coverage instrumentation
enabled.
sanitizer: Whether to use a sanitizer (address, hwaddress, cfi, thread, leak, memory, none)
cargo_workspace: The cargo.Workspace associated with the repository.
image_registry: The Docker image registry to pull images from and push
images to.
image_prefix: A prefix to apply to all Docker image names.
177 def __init__( 178 self, 179 root: Path, 180 arch: Arch, 181 profile: Profile, 182 coverage: bool, 183 sanitizer: Sanitizer, 184 image_registry: str, 185 image_prefix: str, 186 ): 187 self.root = root 188 self.arch = arch 189 self.profile = profile 190 self.coverage = coverage 191 self.sanitizer = sanitizer 192 self.cargo_workspace = cargo.Workspace(root) 193 self.image_registry = image_registry 194 self.image_prefix = image_prefix
196 def build( 197 self, 198 subcommand: str, 199 rustflags: list[str], 200 channel: str | None = None, 201 extra_env: dict[str, str] = {}, 202 ) -> list[str]: 203 """Start a build invocation for the configured architecture.""" 204 return xcompile.cargo( 205 arch=self.arch, 206 channel=channel, 207 subcommand=subcommand, 208 rustflags=rustflags, 209 extra_env=extra_env, 210 )
Start a build invocation for the configured architecture.
212 def tool(self, name: str) -> list[str]: 213 """Start a binutils tool invocation for the configured architecture.""" 214 if platform.system() != "Linux": 215 # We can't use the local tools from macOS to build a Linux executable 216 return ["bin/ci-builder", "run", "stable", name] 217 # If we're on Linux, trust that the tools are installed instead of 218 # loading the slow ci-builder. If you don't have compilation tools 219 # installed you can still run `bin/ci-builder run stable 220 # bin/mzcompose ...`, and most likely the Cargo build will already 221 # fail earlier if you don't have compilation tools installed and 222 # run without the ci-builder. 223 return [name]
Start a binutils tool invocation for the configured architecture.
225 def cargo_target_dir(self) -> Path: 226 """Determine the path to the target directory for Cargo.""" 227 return self.root / "target-xcompile" / xcompile.target(self.arch)
Determine the path to the target directory for Cargo.
229 def rewrite_builder_path_for_host(self, path: Path) -> Path: 230 """Rewrite a path that is relative to the target directory inside the 231 builder to a path that is relative to the target directory on the host. 232 233 If path does is not relative to the target directory inside the builder, 234 it is returned unchanged. 235 """ 236 builder_target_dir = Path("/mnt/build") / xcompile.target(self.arch) 237 try: 238 return self.cargo_target_dir() / path.relative_to(builder_target_dir) 239 except ValueError: 240 return path
Rewrite a path that is relative to the target directory inside the builder to a path that is relative to the target directory on the host.
If path does is not relative to the target directory inside the builder, it is returned unchanged.
243@cache 244def docker_images() -> frozenset[str]: 245 """List the Docker images available on the local machine.""" 246 return frozenset( 247 spawn.capture(["docker", "images", "--format", "{{.Repository}}:{{.Tag}}"]) 248 .strip() 249 .split("\n") 250 )
List the Docker images available on the local machine.
258def is_docker_image_pushed(name: str) -> bool: 259 """Check whether the named image is pushed to Docker Hub. 260 261 Note that this operation requires a rather slow network request. 262 """ 263 global _known_docker_images 264 265 if _known_docker_images is None: 266 with _known_docker_images_lock: 267 if not KNOWN_DOCKER_IMAGES_FILE.exists(): 268 _known_docker_images = set() 269 else: 270 with KNOWN_DOCKER_IMAGES_FILE.open() as f: 271 _known_docker_images = set(line.strip() for line in f) 272 273 if name in _known_docker_images: 274 return True 275 276 if ":" not in name: 277 image, tag = name, "latest" 278 else: 279 image, tag = name.rsplit(":", 1) 280 281 dockerhub_username = os.getenv("DOCKERHUB_USERNAME") 282 dockerhub_token = os.getenv("DOCKERHUB_ACCESS_TOKEN") 283 284 exists: bool = False 285 286 try: 287 if dockerhub_username and dockerhub_token: 288 response = requests.head( 289 f"https://registry-1.docker.io/v2/{image}/manifests/{tag}", 290 headers={ 291 "Accept": "application/vnd.docker.distribution.manifest.v2+json", 292 }, 293 auth=HTTPBasicAuth(dockerhub_username, dockerhub_token), 294 ) 295 else: 296 token = requests.get( 297 "https://auth.docker.io/token", 298 params={ 299 "service": "registry.docker.io", 300 "scope": f"repository:{image}:pull", 301 }, 302 ).json()["token"] 303 response = requests.head( 304 f"https://registry-1.docker.io/v2/{image}/manifests/{tag}", 305 headers={ 306 "Accept": "application/vnd.docker.distribution.manifest.v2+json", 307 "Authorization": f"Bearer {token}", 308 }, 309 ) 310 311 if response.status_code in (401, 429, 500, 502, 503, 504): 312 # Fall back to 5x slower method 313 proc = subprocess.run( 314 ["docker", "manifest", "inspect", name], 315 stdout=subprocess.DEVNULL, 316 stderr=subprocess.DEVNULL, 317 env=dict(os.environ, DOCKER_CLI_EXPERIMENTAL="enabled"), 318 ) 319 exists = proc.returncode == 0 320 else: 321 exists = response.status_code == 200 322 323 except Exception as e: 324 print(f"Error checking Docker image: {e}") 325 return False 326 327 if exists: 328 with _known_docker_images_lock: 329 _known_docker_images.add(name) 330 with KNOWN_DOCKER_IMAGES_FILE.open("a") as f: 331 print(name, file=f) 332 333 return exists
Check whether the named image is pushed to Docker Hub.
Note that this operation requires a rather slow network request.
336def is_ghcr_image_pushed(name: str) -> bool: 337 global _known_docker_images 338 339 if _known_docker_images is None: 340 with _known_docker_images_lock: 341 if not KNOWN_DOCKER_IMAGES_FILE.exists(): 342 _known_docker_images = set() 343 else: 344 with KNOWN_DOCKER_IMAGES_FILE.open() as f: 345 _known_docker_images = set(line.strip() for line in f) 346 347 name_without_ghcr = name.removeprefix("ghcr.io/") 348 if name in _known_docker_images: 349 return True 350 351 if ":" not in name_without_ghcr: 352 image, tag = name_without_ghcr, "latest" 353 else: 354 image, tag = name_without_ghcr.rsplit(":", 1) 355 356 exists: bool = False 357 358 try: 359 token = requests.get( 360 "https://ghcr.io/token", 361 params={ 362 "scope": f"repository:{image}:pull", 363 }, 364 ).json()["token"] 365 response = requests.head( 366 f"https://ghcr.io/v2/{image}/manifests/{tag}", 367 headers={"Authorization": f"Bearer {token}"}, 368 ) 369 370 if response.status_code in (401, 429, 500, 502, 503, 504): 371 # Fall back to 5x slower method 372 proc = subprocess.run( 373 ["docker", "manifest", "inspect", name], 374 stdout=subprocess.DEVNULL, 375 stderr=subprocess.DEVNULL, 376 env=dict(os.environ, DOCKER_CLI_EXPERIMENTAL="enabled"), 377 ) 378 exists = proc.returncode == 0 379 else: 380 exists = response.status_code == 200 381 382 except Exception as e: 383 print(f"Error checking Docker image: {e}") 384 return False 385 386 if exists: 387 with _known_docker_images_lock: 388 _known_docker_images.add(name) 389 with KNOWN_DOCKER_IMAGES_FILE.open("a") as f: 390 print(name, file=f) 391 392 return exists
395def chmod_x(path: Path) -> None: 396 """Set the executable bit on a file or directory.""" 397 # https://stackoverflow.com/a/30463972/1122351 398 mode = os.stat(path).st_mode 399 mode |= (mode & 0o444) >> 2 # copy R bits to X 400 os.chmod(path, mode)
Set the executable bit on a file or directory.
403class PreImage: 404 """An action to run before building a Docker image. 405 406 Args: 407 rd: The `RepositoryDetails` for the repository. 408 path: The path to the `Image` associated with this action. 409 """ 410 411 def __init__(self, rd: RepositoryDetails, path: Path): 412 self.rd = rd 413 self.path = path 414 415 @classmethod 416 def prepare_batch(cls, instances: list["PreImage"]) -> Any: 417 """Prepare a batch of actions. 418 419 This is useful for `PreImage` actions that are more efficient when 420 their actions are applied to several images in bulk. 421 422 Returns an arbitrary output that is passed to `PreImage.run`. 423 """ 424 pass 425 426 def run(self, prep: Any) -> None: 427 """Perform the action. 428 429 Args: 430 prep: Any prep work returned by `prepare_batch`. 431 """ 432 pass 433 434 def inputs(self) -> set[str]: 435 """Return the files which are considered inputs to the action.""" 436 raise NotImplementedError 437 438 def extra(self) -> str: 439 """Returns additional data for incorporation in the fingerprint.""" 440 return ""
An action to run before building a Docker image.
Args:
rd: The RepositoryDetails for the repository.
path: The path to the Image associated with this action.
415 @classmethod 416 def prepare_batch(cls, instances: list["PreImage"]) -> Any: 417 """Prepare a batch of actions. 418 419 This is useful for `PreImage` actions that are more efficient when 420 their actions are applied to several images in bulk. 421 422 Returns an arbitrary output that is passed to `PreImage.run`. 423 """ 424 pass
Prepare a batch of actions.
This is useful for PreImage actions that are more efficient when
their actions are applied to several images in bulk.
Returns an arbitrary output that is passed to PreImage.run.
426 def run(self, prep: Any) -> None: 427 """Perform the action. 428 429 Args: 430 prep: Any prep work returned by `prepare_batch`. 431 """ 432 pass
Perform the action.
Args:
prep: Any prep work returned by prepare_batch.
443class Copy(PreImage): 444 """A `PreImage` action which copies files from a directory. 445 446 See doc/developer/mzbuild.md for an explanation of the user-facing 447 parameters. 448 """ 449 450 def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]): 451 super().__init__(rd, path) 452 453 self.source = config.pop("source", None) 454 if self.source is None: 455 raise ValueError("mzbuild config is missing 'source' argument") 456 457 self.destination = config.pop("destination", None) 458 if self.destination is None: 459 raise ValueError("mzbuild config is missing 'destination' argument") 460 461 self.matching = config.pop("matching", "*") 462 463 def run(self, prep: Any) -> None: 464 super().run(prep) 465 for src in self.inputs(): 466 dst = self.path / self.destination / src 467 dst.parent.mkdir(parents=True, exist_ok=True) 468 shutil.copy(self.rd.root / self.source / src, dst) 469 470 def inputs(self) -> set[str]: 471 return set(git.expand_globs(self.rd.root / self.source, self.matching))
A PreImage action which copies files from a directory.
See doc/developer/mzbuild.md for an explanation of the user-facing parameters.
450 def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]): 451 super().__init__(rd, path) 452 453 self.source = config.pop("source", None) 454 if self.source is None: 455 raise ValueError("mzbuild config is missing 'source' argument") 456 457 self.destination = config.pop("destination", None) 458 if self.destination is None: 459 raise ValueError("mzbuild config is missing 'destination' argument") 460 461 self.matching = config.pop("matching", "*")
463 def run(self, prep: Any) -> None: 464 super().run(prep) 465 for src in self.inputs(): 466 dst = self.path / self.destination / src 467 dst.parent.mkdir(parents=True, exist_ok=True) 468 shutil.copy(self.rd.root / self.source / src, dst)
Perform the action.
Args:
prep: Any prep work returned by prepare_batch.
470 def inputs(self) -> set[str]: 471 return set(git.expand_globs(self.rd.root / self.source, self.matching))
Return the files which are considered inputs to the action.
Inherited Members
474class CargoPreImage(PreImage): 475 """A `PreImage` action that uses Cargo.""" 476 477 @staticmethod 478 @cache 479 def _cargo_shared_inputs() -> frozenset[str]: 480 """Resolve shared Cargo inputs once and cache the result. 481 482 This expands the 'ci/builder' directory glob and filters out 483 non-existent files like '.cargo/config', avoiding repeated 484 git subprocess calls in fingerprint(). 485 """ 486 inputs: set[str] = set() 487 inputs |= git.expand_globs(Path("."), "ci/builder/**") 488 inputs.add("Cargo.toml") 489 inputs.add("Cargo.lock") 490 if Path(".cargo/config").exists(): 491 inputs.add(".cargo/config") 492 return frozenset(inputs) 493 494 def inputs(self) -> set[str]: 495 return set(CargoPreImage._cargo_shared_inputs()) 496 497 def extra(self) -> str: 498 # Cargo images depend on the release mode and whether 499 # coverage/sanitizer is enabled. 500 flags: list[str] = [] 501 if self.rd.profile == Profile.RELEASE: 502 flags += "release" 503 if self.rd.profile == Profile.OPTIMIZED: 504 flags += "optimized" 505 if self.rd.coverage: 506 flags += "coverage" 507 if self.rd.sanitizer != Sanitizer.none: 508 flags += self.rd.sanitizer.value 509 flags.sort() 510 return ",".join(flags)
A PreImage action that uses Cargo.
497 def extra(self) -> str: 498 # Cargo images depend on the release mode and whether 499 # coverage/sanitizer is enabled. 500 flags: list[str] = [] 501 if self.rd.profile == Profile.RELEASE: 502 flags += "release" 503 if self.rd.profile == Profile.OPTIMIZED: 504 flags += "optimized" 505 if self.rd.coverage: 506 flags += "coverage" 507 if self.rd.sanitizer != Sanitizer.none: 508 flags += self.rd.sanitizer.value 509 flags.sort() 510 return ",".join(flags)
Returns additional data for incorporation in the fingerprint.
513class CargoBuild(CargoPreImage): 514 """A `PreImage` action that builds a single binary with Cargo. 515 516 See doc/developer/mzbuild.md for an explanation of the user-facing 517 parameters. 518 """ 519 520 def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]): 521 super().__init__(rd, path) 522 bin = config.pop("bin", []) 523 self.bins = bin if isinstance(bin, list) else [bin] 524 example = config.pop("example", []) 525 self.examples = example if isinstance(example, list) else [example] 526 self.strip = config.pop("strip", True) 527 self.extract = config.pop("extract", {}) 528 features = config.pop("features", []) 529 self.features = features if isinstance(features, list) else [features] 530 531 if len(self.bins) == 0 and len(self.examples) == 0: 532 raise ValueError("mzbuild config is missing pre-build target") 533 534 @staticmethod 535 def generate_cargo_build_command( 536 rd: RepositoryDetails, 537 bins: list[str], 538 examples: list[str], 539 features: list[str] | None = None, 540 ) -> list[str]: 541 rustflags = ( 542 rustc_flags.coverage 543 if rd.coverage 544 else ( 545 rustc_flags.sanitizer[rd.sanitizer] 546 if rd.sanitizer != Sanitizer.none 547 else ["--cfg=tokio_unstable"] 548 ) 549 ) 550 cflags = ( 551 [ 552 f"--target={target(rd.arch)}", 553 f"--gcc-toolchain=/opt/x-tools/{target(rd.arch)}/", 554 "-fuse-ld=lld", 555 f"--sysroot=/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/sysroot", 556 f"-L/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/lib64", 557 ] 558 + rustc_flags.sanitizer_cflags[rd.sanitizer] 559 if rd.sanitizer != Sanitizer.none 560 else [] 561 ) 562 extra_env = ( 563 { 564 "CFLAGS": " ".join(cflags), 565 "CXXFLAGS": " ".join(cflags), 566 "LDFLAGS": " ".join(cflags), 567 "CXXSTDLIB": "stdc++", 568 "CC": "cc", 569 "CXX": "c++", 570 "CPP": "clang-cpp-18", 571 "CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER": "cc", 572 "CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER": "cc", 573 "PATH": f"/sanshim:/opt/x-tools/{target(rd.arch)}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 574 "TSAN_OPTIONS": "report_bugs=0", # build-scripts fail 575 } 576 if rd.sanitizer != Sanitizer.none 577 else {} 578 ) 579 580 cargo_build = rd.build( 581 "build", channel=None, rustflags=rustflags, extra_env=extra_env 582 ) 583 584 packages = set() 585 for bin in bins: 586 cargo_build.extend(["--bin", bin]) 587 packages.add(rd.cargo_workspace.crate_for_bin(bin).name) 588 for example in examples: 589 cargo_build.extend(["--example", example]) 590 packages.add(rd.cargo_workspace.crate_for_example(example).name) 591 cargo_build.extend(f"--package={p}" for p in packages) 592 593 if rd.profile == Profile.RELEASE: 594 cargo_build.append("--release") 595 if rd.profile == Profile.OPTIMIZED: 596 cargo_build.extend(["--profile", "optimized"]) 597 if rd.sanitizer != Sanitizer.none: 598 # ASan doesn't work with jemalloc 599 cargo_build.append("--no-default-features") 600 # Uses more memory, so reduce the number of jobs 601 cargo_build.extend( 602 ["--jobs", str(round(multiprocessing.cpu_count() * 2 / 3))] 603 ) 604 if features: 605 cargo_build.append(f"--features={','.join(features)}") 606 607 return cargo_build 608 609 @classmethod 610 def prepare_batch(cls, cargo_builds: list["PreImage"]) -> dict[str, Any]: 611 super().prepare_batch(cargo_builds) 612 613 if not cargo_builds: 614 return {} 615 616 # Building all binaries and examples in the same `cargo build` command 617 # allows Cargo to link in parallel with other work, which can 618 # meaningfully speed up builds. 619 620 rd: RepositoryDetails | None = None 621 builds = cast(list[CargoBuild], cargo_builds) 622 bins = set() 623 examples = set() 624 features = set() 625 for build in builds: 626 if not rd: 627 rd = build.rd 628 bins.update(build.bins) 629 examples.update(build.examples) 630 features.update(build.features) 631 assert rd 632 633 ui.section(f"Common build for: {', '.join(bins | examples)}") 634 635 cargo_build = cls.generate_cargo_build_command( 636 rd, list(bins), list(examples), list(features) if features else None 637 ) 638 639 run_and_detect_rust_incremental_build_failure(cargo_build, cwd=rd.root) 640 641 # Re-run with JSON-formatted messages and capture the output so we can 642 # later analyze the build artifacts in `run`. This should be nearly 643 # instantaneous since we just compiled above with the same crates and 644 # features. (We don't want to do the compile above with JSON-formatted 645 # messages because it wouldn't be human readable.) 646 json_output = spawn.capture( 647 cargo_build + ["--message-format=json"], 648 cwd=rd.root, 649 ) 650 prep = {"cargo": json_output} 651 652 return prep 653 654 def build(self, build_output: dict[str, Any]) -> None: 655 cargo_profile = ( 656 "release" 657 if self.rd.profile == Profile.RELEASE 658 else "optimized" if self.rd.profile == Profile.OPTIMIZED else "debug" 659 ) 660 661 def copy(src: Path, relative_dst: Path) -> None: 662 exe_path = self.path / relative_dst 663 exe_path.parent.mkdir(parents=True, exist_ok=True) 664 shutil.copy(src, exe_path) 665 666 if self.strip: 667 # The debug information is large enough that it slows down CI, 668 # since we're packaging these binaries up into Docker images and 669 # shipping them around. 670 spawn.runv( 671 [*self.rd.tool("strip"), "--strip-debug", exe_path], 672 cwd=self.rd.root, 673 ) 674 else: 675 # Even if we've been asked not to strip the binary, remove the 676 # `.debug_pubnames` and `.debug_pubtypes` sections. These are just 677 # indexes that speed up launching a debugger against the binary, 678 # and we're happy to have slower debugger start up in exchange for 679 # smaller binaries. Plus the sections have been obsoleted by a 680 # `.debug_names` section in DWARF 5, and so debugger support for 681 # `.debug_pubnames`/`.debug_pubtypes` is minimal anyway. 682 # See: https://github.com/rust-lang/rust/issues/46034 683 spawn.runv( 684 [ 685 *self.rd.tool("objcopy"), 686 "-R", 687 ".debug_pubnames", 688 "-R", 689 ".debug_pubtypes", 690 exe_path, 691 ], 692 cwd=self.rd.root, 693 ) 694 695 for bin in self.bins: 696 src_path = self.rd.cargo_target_dir() / cargo_profile / bin 697 copy(src_path, bin) 698 for example in self.examples: 699 src_path = ( 700 self.rd.cargo_target_dir() / cargo_profile / Path("examples") / example 701 ) 702 copy(src_path, Path("examples") / example) 703 704 if self.extract: 705 cargo_build_json_output = build_output["cargo"] 706 707 target_dir = self.rd.cargo_target_dir() 708 for line in cargo_build_json_output.split("\n"): 709 if line.strip() == "" or not line.startswith("{"): 710 continue 711 message = json.loads(line) 712 if message["reason"] != "build-script-executed": 713 continue 714 out_dir = self.rd.rewrite_builder_path_for_host( 715 Path(message["out_dir"]) 716 ) 717 if not out_dir.is_relative_to(target_dir): 718 # Some crates are built for both the host and the target. 719 # Ignore the built-for-host out dir. 720 continue 721 # parse the package name from a package_id that looks like one of: 722 # git+https://github.com/MaterializeInc/rust-server-sdk#launchdarkly-server-sdk@1.0.0 723 # path+file:///Users/roshan/materialize/src/catalog#mz-catalog@0.0.0 724 # registry+https://github.com/rust-lang/crates.io-index#num-rational@0.4.0 725 # file:///path/to/my-package#0.1.0 726 package_id = message["package_id"] 727 if "@" in package_id: 728 package = package_id.split("@")[0].split("#")[-1] 729 else: 730 package = message["package_id"].split("#")[0].split("/")[-1] 731 for src, dst in self.extract.get(package, {}).items(): 732 spawn.runv(["cp", "-R", out_dir / src, self.path / dst]) 733 734 self.acquired = True 735 736 def run(self, prep: dict[str, Any]) -> None: 737 super().run(prep) 738 self.build(prep) 739 740 @cache 741 def inputs(self) -> set[str]: 742 deps = set() 743 744 for bin in self.bins: 745 crate = self.rd.cargo_workspace.crate_for_bin(bin) 746 deps |= self.rd.cargo_workspace.transitive_path_dependencies(crate) 747 for example in self.examples: 748 crate = self.rd.cargo_workspace.crate_for_example(example) 749 deps |= self.rd.cargo_workspace.transitive_path_dependencies( 750 crate, dev=True 751 ) 752 753 inputs = super().inputs() | set(inp for dep in deps for inp in dep.inputs()) 754 return inputs
A PreImage action that builds a single binary with Cargo.
See doc/developer/mzbuild.md for an explanation of the user-facing parameters.
520 def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]): 521 super().__init__(rd, path) 522 bin = config.pop("bin", []) 523 self.bins = bin if isinstance(bin, list) else [bin] 524 example = config.pop("example", []) 525 self.examples = example if isinstance(example, list) else [example] 526 self.strip = config.pop("strip", True) 527 self.extract = config.pop("extract", {}) 528 features = config.pop("features", []) 529 self.features = features if isinstance(features, list) else [features] 530 531 if len(self.bins) == 0 and len(self.examples) == 0: 532 raise ValueError("mzbuild config is missing pre-build target")
534 @staticmethod 535 def generate_cargo_build_command( 536 rd: RepositoryDetails, 537 bins: list[str], 538 examples: list[str], 539 features: list[str] | None = None, 540 ) -> list[str]: 541 rustflags = ( 542 rustc_flags.coverage 543 if rd.coverage 544 else ( 545 rustc_flags.sanitizer[rd.sanitizer] 546 if rd.sanitizer != Sanitizer.none 547 else ["--cfg=tokio_unstable"] 548 ) 549 ) 550 cflags = ( 551 [ 552 f"--target={target(rd.arch)}", 553 f"--gcc-toolchain=/opt/x-tools/{target(rd.arch)}/", 554 "-fuse-ld=lld", 555 f"--sysroot=/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/sysroot", 556 f"-L/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/lib64", 557 ] 558 + rustc_flags.sanitizer_cflags[rd.sanitizer] 559 if rd.sanitizer != Sanitizer.none 560 else [] 561 ) 562 extra_env = ( 563 { 564 "CFLAGS": " ".join(cflags), 565 "CXXFLAGS": " ".join(cflags), 566 "LDFLAGS": " ".join(cflags), 567 "CXXSTDLIB": "stdc++", 568 "CC": "cc", 569 "CXX": "c++", 570 "CPP": "clang-cpp-18", 571 "CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER": "cc", 572 "CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER": "cc", 573 "PATH": f"/sanshim:/opt/x-tools/{target(rd.arch)}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 574 "TSAN_OPTIONS": "report_bugs=0", # build-scripts fail 575 } 576 if rd.sanitizer != Sanitizer.none 577 else {} 578 ) 579 580 cargo_build = rd.build( 581 "build", channel=None, rustflags=rustflags, extra_env=extra_env 582 ) 583 584 packages = set() 585 for bin in bins: 586 cargo_build.extend(["--bin", bin]) 587 packages.add(rd.cargo_workspace.crate_for_bin(bin).name) 588 for example in examples: 589 cargo_build.extend(["--example", example]) 590 packages.add(rd.cargo_workspace.crate_for_example(example).name) 591 cargo_build.extend(f"--package={p}" for p in packages) 592 593 if rd.profile == Profile.RELEASE: 594 cargo_build.append("--release") 595 if rd.profile == Profile.OPTIMIZED: 596 cargo_build.extend(["--profile", "optimized"]) 597 if rd.sanitizer != Sanitizer.none: 598 # ASan doesn't work with jemalloc 599 cargo_build.append("--no-default-features") 600 # Uses more memory, so reduce the number of jobs 601 cargo_build.extend( 602 ["--jobs", str(round(multiprocessing.cpu_count() * 2 / 3))] 603 ) 604 if features: 605 cargo_build.append(f"--features={','.join(features)}") 606 607 return cargo_build
609 @classmethod 610 def prepare_batch(cls, cargo_builds: list["PreImage"]) -> dict[str, Any]: 611 super().prepare_batch(cargo_builds) 612 613 if not cargo_builds: 614 return {} 615 616 # Building all binaries and examples in the same `cargo build` command 617 # allows Cargo to link in parallel with other work, which can 618 # meaningfully speed up builds. 619 620 rd: RepositoryDetails | None = None 621 builds = cast(list[CargoBuild], cargo_builds) 622 bins = set() 623 examples = set() 624 features = set() 625 for build in builds: 626 if not rd: 627 rd = build.rd 628 bins.update(build.bins) 629 examples.update(build.examples) 630 features.update(build.features) 631 assert rd 632 633 ui.section(f"Common build for: {', '.join(bins | examples)}") 634 635 cargo_build = cls.generate_cargo_build_command( 636 rd, list(bins), list(examples), list(features) if features else None 637 ) 638 639 run_and_detect_rust_incremental_build_failure(cargo_build, cwd=rd.root) 640 641 # Re-run with JSON-formatted messages and capture the output so we can 642 # later analyze the build artifacts in `run`. This should be nearly 643 # instantaneous since we just compiled above with the same crates and 644 # features. (We don't want to do the compile above with JSON-formatted 645 # messages because it wouldn't be human readable.) 646 json_output = spawn.capture( 647 cargo_build + ["--message-format=json"], 648 cwd=rd.root, 649 ) 650 prep = {"cargo": json_output} 651 652 return prep
Prepare a batch of actions.
This is useful for PreImage actions that are more efficient when
their actions are applied to several images in bulk.
Returns an arbitrary output that is passed to PreImage.run.
654 def build(self, build_output: dict[str, Any]) -> None: 655 cargo_profile = ( 656 "release" 657 if self.rd.profile == Profile.RELEASE 658 else "optimized" if self.rd.profile == Profile.OPTIMIZED else "debug" 659 ) 660 661 def copy(src: Path, relative_dst: Path) -> None: 662 exe_path = self.path / relative_dst 663 exe_path.parent.mkdir(parents=True, exist_ok=True) 664 shutil.copy(src, exe_path) 665 666 if self.strip: 667 # The debug information is large enough that it slows down CI, 668 # since we're packaging these binaries up into Docker images and 669 # shipping them around. 670 spawn.runv( 671 [*self.rd.tool("strip"), "--strip-debug", exe_path], 672 cwd=self.rd.root, 673 ) 674 else: 675 # Even if we've been asked not to strip the binary, remove the 676 # `.debug_pubnames` and `.debug_pubtypes` sections. These are just 677 # indexes that speed up launching a debugger against the binary, 678 # and we're happy to have slower debugger start up in exchange for 679 # smaller binaries. Plus the sections have been obsoleted by a 680 # `.debug_names` section in DWARF 5, and so debugger support for 681 # `.debug_pubnames`/`.debug_pubtypes` is minimal anyway. 682 # See: https://github.com/rust-lang/rust/issues/46034 683 spawn.runv( 684 [ 685 *self.rd.tool("objcopy"), 686 "-R", 687 ".debug_pubnames", 688 "-R", 689 ".debug_pubtypes", 690 exe_path, 691 ], 692 cwd=self.rd.root, 693 ) 694 695 for bin in self.bins: 696 src_path = self.rd.cargo_target_dir() / cargo_profile / bin 697 copy(src_path, bin) 698 for example in self.examples: 699 src_path = ( 700 self.rd.cargo_target_dir() / cargo_profile / Path("examples") / example 701 ) 702 copy(src_path, Path("examples") / example) 703 704 if self.extract: 705 cargo_build_json_output = build_output["cargo"] 706 707 target_dir = self.rd.cargo_target_dir() 708 for line in cargo_build_json_output.split("\n"): 709 if line.strip() == "" or not line.startswith("{"): 710 continue 711 message = json.loads(line) 712 if message["reason"] != "build-script-executed": 713 continue 714 out_dir = self.rd.rewrite_builder_path_for_host( 715 Path(message["out_dir"]) 716 ) 717 if not out_dir.is_relative_to(target_dir): 718 # Some crates are built for both the host and the target. 719 # Ignore the built-for-host out dir. 720 continue 721 # parse the package name from a package_id that looks like one of: 722 # git+https://github.com/MaterializeInc/rust-server-sdk#launchdarkly-server-sdk@1.0.0 723 # path+file:///Users/roshan/materialize/src/catalog#mz-catalog@0.0.0 724 # registry+https://github.com/rust-lang/crates.io-index#num-rational@0.4.0 725 # file:///path/to/my-package#0.1.0 726 package_id = message["package_id"] 727 if "@" in package_id: 728 package = package_id.split("@")[0].split("#")[-1] 729 else: 730 package = message["package_id"].split("#")[0].split("/")[-1] 731 for src, dst in self.extract.get(package, {}).items(): 732 spawn.runv(["cp", "-R", out_dir / src, self.path / dst]) 733 734 self.acquired = True
Perform the action.
Args:
prep: Any prep work returned by prepare_batch.
740 @cache 741 def inputs(self) -> set[str]: 742 deps = set() 743 744 for bin in self.bins: 745 crate = self.rd.cargo_workspace.crate_for_bin(bin) 746 deps |= self.rd.cargo_workspace.transitive_path_dependencies(crate) 747 for example in self.examples: 748 crate = self.rd.cargo_workspace.crate_for_example(example) 749 deps |= self.rd.cargo_workspace.transitive_path_dependencies( 750 crate, dev=True 751 ) 752 753 inputs = super().inputs() | set(inp for dep in deps for inp in dep.inputs()) 754 return inputs
Return the files which are considered inputs to the action.
Inherited Members
757class Image: 758 """A Docker image whose build and dependencies are managed by mzbuild. 759 760 An image corresponds to a directory in a repository that contains a 761 `mzbuild.yml` file. This directory is called an "mzbuild context." 762 763 Attributes: 764 name: The name of the image. 765 publish: Whether the image should be pushed to Docker Hub. 766 depends_on: The names of the images upon which this image depends. 767 root: The path to the root of the associated `Repository`. 768 path: The path to the directory containing the `mzbuild.yml` 769 configuration file. 770 pre_images: Optional actions to perform before running `docker build`. 771 build_args: An optional list of --build-arg to pass to the dockerfile 772 """ 773 774 _DOCKERFILE_MZFROM_RE = re.compile(rb"^MZFROM\s*(\S+)") 775 776 _context_files_cache: set[str] | None 777 778 def __init__(self, rd: RepositoryDetails, path: Path): 779 self.rd = rd 780 self.path = path 781 self._context_files_cache = None 782 self.pre_images: list[PreImage] = [] 783 with open(self.path / "mzbuild.yml") as f: 784 data = yaml.safe_load(f) 785 self.name: str = data.pop("name") 786 self.publish: bool = data.pop("publish", True) 787 self.description: str | None = data.pop("description", None) 788 self.mainline: bool = data.pop("mainline", True) 789 for pre_image in data.pop("pre-image", []): 790 typ = pre_image.pop("type", None) 791 if typ == "cargo-build": 792 self.pre_images.append(CargoBuild(self.rd, self.path, pre_image)) 793 elif typ == "copy": 794 self.pre_images.append(Copy(self.rd, self.path, pre_image)) 795 else: 796 raise ValueError( 797 f"mzbuild config in {self.path} has unknown pre-image type" 798 ) 799 self.build_args = data.pop("build-args", {}) 800 801 if re.search(r"[^A-Za-z0-9\-]", self.name): 802 raise ValueError( 803 f"mzbuild image name {self.name} contains invalid character; only alphanumerics and hyphens allowed" 804 ) 805 806 self.depends_on: list[str] = [] 807 with open(self.path / "Dockerfile", "rb") as f: 808 for line in f: 809 match = self._DOCKERFILE_MZFROM_RE.match(line) 810 if match: 811 self.depends_on.append(match.group(1).decode()) 812 813 def sync_description(self) -> None: 814 """Sync the description to Docker Hub if the image is publishable 815 and a README.md file exists.""" 816 817 if not self.publish: 818 ui.say(f"{self.name} is not publishable") 819 return 820 821 readme_path = self.path / "README.md" 822 has_readme = readme_path.exists() 823 if not has_readme: 824 ui.say(f"{self.name} has no README.md or description") 825 return 826 827 docker_config = os.getenv("DOCKER_CONFIG") 828 spawn.runv( 829 [ 830 "docker", 831 "pushrm", 832 f"--file={readme_path}", 833 *([f"--config={docker_config}/config.json"] if docker_config else []), 834 *([f"--short={self.description}"] if self.description else []), 835 self.docker_name(), 836 ] 837 ) 838 839 def docker_name(self, tag: str | None = None) -> str: 840 """Return the name of the image on Docker Hub at the given tag.""" 841 name = f"{self.rd.image_registry}/{self.rd.image_prefix}{self.name}" 842 if tag: 843 name += f":{tag}" 844 return name
A Docker image whose build and dependencies are managed by mzbuild.
An image corresponds to a directory in a repository that contains a
mzbuild.yml file. This directory is called an "mzbuild context."
Attributes:
name: The name of the image.
publish: Whether the image should be pushed to Docker Hub.
depends_on: The names of the images upon which this image depends.
root: The path to the root of the associated Repository.
path: The path to the directory containing the mzbuild.yml
configuration file.
pre_images: Optional actions to perform before running docker build.
build_args: An optional list of --build-arg to pass to the dockerfile
778 def __init__(self, rd: RepositoryDetails, path: Path): 779 self.rd = rd 780 self.path = path 781 self._context_files_cache = None 782 self.pre_images: list[PreImage] = [] 783 with open(self.path / "mzbuild.yml") as f: 784 data = yaml.safe_load(f) 785 self.name: str = data.pop("name") 786 self.publish: bool = data.pop("publish", True) 787 self.description: str | None = data.pop("description", None) 788 self.mainline: bool = data.pop("mainline", True) 789 for pre_image in data.pop("pre-image", []): 790 typ = pre_image.pop("type", None) 791 if typ == "cargo-build": 792 self.pre_images.append(CargoBuild(self.rd, self.path, pre_image)) 793 elif typ == "copy": 794 self.pre_images.append(Copy(self.rd, self.path, pre_image)) 795 else: 796 raise ValueError( 797 f"mzbuild config in {self.path} has unknown pre-image type" 798 ) 799 self.build_args = data.pop("build-args", {}) 800 801 if re.search(r"[^A-Za-z0-9\-]", self.name): 802 raise ValueError( 803 f"mzbuild image name {self.name} contains invalid character; only alphanumerics and hyphens allowed" 804 ) 805 806 self.depends_on: list[str] = [] 807 with open(self.path / "Dockerfile", "rb") as f: 808 for line in f: 809 match = self._DOCKERFILE_MZFROM_RE.match(line) 810 if match: 811 self.depends_on.append(match.group(1).decode())
813 def sync_description(self) -> None: 814 """Sync the description to Docker Hub if the image is publishable 815 and a README.md file exists.""" 816 817 if not self.publish: 818 ui.say(f"{self.name} is not publishable") 819 return 820 821 readme_path = self.path / "README.md" 822 has_readme = readme_path.exists() 823 if not has_readme: 824 ui.say(f"{self.name} has no README.md or description") 825 return 826 827 docker_config = os.getenv("DOCKER_CONFIG") 828 spawn.runv( 829 [ 830 "docker", 831 "pushrm", 832 f"--file={readme_path}", 833 *([f"--config={docker_config}/config.json"] if docker_config else []), 834 *([f"--short={self.description}"] if self.description else []), 835 self.docker_name(), 836 ] 837 )
Sync the description to Docker Hub if the image is publishable and a README.md file exists.
839 def docker_name(self, tag: str | None = None) -> str: 840 """Return the name of the image on Docker Hub at the given tag.""" 841 name = f"{self.rd.image_registry}/{self.rd.image_prefix}{self.name}" 842 if tag: 843 name += f":{tag}" 844 return name
Return the name of the image on Docker Hub at the given tag.
847class ResolvedImage: 848 """An `Image` whose dependencies have been resolved. 849 850 Attributes: 851 image: The underlying `Image`. 852 acquired: Whether the image is available locally. 853 dependencies: A mapping from dependency name to `ResolvedImage` for 854 each of the images that `image` depends upon. 855 """ 856 857 def __init__(self, image: Image, dependencies: Iterable["ResolvedImage"]): 858 self.image = image 859 self.acquired = False 860 self.dependencies = {} 861 for d in dependencies: 862 self.dependencies[d.name] = d 863 864 def __repr__(self) -> str: 865 return f"ResolvedImage<{self.spec()}>" 866 867 @property 868 def name(self) -> str: 869 """The name of the underlying image.""" 870 return self.image.name 871 872 @property 873 def publish(self) -> bool: 874 """Whether the underlying image should be pushed to Docker Hub.""" 875 return self.image.publish 876 877 @cache 878 def spec(self) -> str: 879 """Return the "spec" for the image. 880 881 A spec is the unique identifier for the image given its current 882 fingerprint. It is a valid Docker Hub name. 883 """ 884 return self.image.docker_name(tag=f"mzbuild-{self.fingerprint()}") 885 886 def write_dockerfile(self) -> IO[bytes]: 887 """Render the Dockerfile without mzbuild directives. 888 889 Returns: 890 file: A handle to a temporary file containing the adjusted 891 Dockerfile.""" 892 with open(self.image.path / "Dockerfile", "rb") as f: 893 lines = f.readlines() 894 f = TemporaryFile() 895 for line in lines: 896 match = Image._DOCKERFILE_MZFROM_RE.match(line) 897 if match: 898 image = match.group(1).decode() 899 spec = self.dependencies[image].spec() 900 line = Image._DOCKERFILE_MZFROM_RE.sub(b"FROM %b" % spec.encode(), line) 901 f.write(line) 902 f.seek(0) 903 return f 904 905 def build(self, prep: dict[type[PreImage], Any], push: bool = False) -> None: 906 """Build the image from source. 907 908 Requires that the caller has already acquired all dependencies and 909 prepared all `PreImage` actions via `PreImage.prepare_batch`. 910 """ 911 ui.section(f"Building {self.spec()}") 912 spawn.runv(["git", "clean", "-ffdX", self.image.path]) 913 914 for pre_image in self.image.pre_images: 915 pre_image.run(prep[type(pre_image)]) 916 build_args = { 917 **self.image.build_args, 918 "BUILD_PROFILE": self.image.rd.profile.name, 919 "ARCH_GCC": str(self.image.rd.arch), 920 "ARCH_GO": self.image.rd.arch.go_str(), 921 "CI_SANITIZER": str(self.image.rd.sanitizer), 922 } 923 f = self.write_dockerfile() 924 925 try: 926 spawn.capture(["docker", "buildx", "version"]) 927 except subprocess.CalledProcessError: 928 if push: 929 print( 930 "docker buildx not found, required to push images. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing" 931 ) 932 raise 933 print( 934 "docker buildx not found, you can install it to build faster. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing" 935 ) 936 print("Falling back to docker build") 937 cmd: Sequence[str] = [ 938 "docker", 939 "build", 940 "-f", 941 "-", 942 *(f"--build-arg={k}={v}" for k, v in build_args.items()), 943 "-t", 944 self.spec(), 945 f"--platform=linux/{self.image.rd.arch.go_str()}", 946 str(self.image.path), 947 ] 948 else: 949 cmd: Sequence[str] = [ 950 "docker", 951 "buildx", 952 "build", 953 "--progress=plain", # less noisy 954 "-f", 955 "-", 956 *(f"--build-arg={k}={v}" for k, v in build_args.items()), 957 "-t", 958 f"docker.io/{self.spec()}", 959 "-t", 960 f"ghcr.io/materializeinc/{self.spec()}", 961 f"--platform=linux/{self.image.rd.arch.go_str()}", 962 str(self.image.path), 963 *(["--push"] if push else ["--load"]), 964 ] 965 966 if token := os.getenv("GITHUB_GHCR_TOKEN"): 967 spawn.runv( 968 [ 969 "docker", 970 "login", 971 "ghcr.io", 972 "-u", 973 "materialize-bot", 974 "--password-stdin", 975 ], 976 stdin=token.encode(), 977 ) 978 979 spawn.runv(cmd, stdin=f, stdout=sys.stderr.buffer) 980 981 def try_pull(self, max_retries: int) -> bool: 982 """Download the image if it does not exist locally. Returns whether it was found.""" 983 ui.header(f"Acquiring {self.spec()}") 984 command = ["docker", "pull"] 985 # --quiet skips printing the progress bar, which does not display well in CI. 986 if ui.env_is_truthy("CI"): 987 command.append("--quiet") 988 command.append(self.spec()) 989 if not self.acquired: 990 sleep_time = 1 991 for retry in range(1, max_retries + 1): 992 try: 993 spawn.runv( 994 command, 995 stdin=subprocess.DEVNULL, 996 stdout=sys.stderr.buffer, 997 ) 998 self.acquired = True 999 break 1000 except subprocess.CalledProcessError: 1001 if retry < max_retries: 1002 # There seems to be no good way to tell what error 1003 # happened based on error code 1004 # (https://github.com/docker/cli/issues/538) and we 1005 # want to print output directly to terminal. 1006 if build := os.getenv("CI_WAITING_FOR_BUILD"): 1007 for retry in range(max_retries): 1008 try: 1009 build_status = buildkite.get_build_status(build) 1010 except subprocess.CalledProcessError: 1011 time.sleep(sleep_time) 1012 sleep_time = min(sleep_time * 2, 10) 1013 break 1014 print(f"Build {build} status: {build_status}") 1015 if build_status == "failed": 1016 print( 1017 f"Build {build} has been marked as failed, exiting hard" 1018 ) 1019 sys.exit(1) 1020 elif build_status == "success": 1021 break 1022 assert ( 1023 build_status == "pending" 1024 ), f"Unknown build status {build_status}" 1025 time.sleep(1) 1026 else: 1027 print(f"Retrying in {sleep_time}s ...") 1028 time.sleep(sleep_time) 1029 sleep_time = min(sleep_time * 2, 10) 1030 continue 1031 else: 1032 break 1033 return self.acquired 1034 1035 def is_published_if_necessary(self) -> bool: 1036 """Report whether the image exists on DockerHub & GHCR if it is publishable.""" 1037 if not self.publish: 1038 return False 1039 spec = self.spec() 1040 if spec.startswith(GHCR_PREFIX): 1041 spec = spec.removeprefix(GHCR_PREFIX) 1042 ghcr_spec = f"{GHCR_PREFIX}{spec}" 1043 if is_docker_image_pushed(spec) and is_ghcr_image_pushed(ghcr_spec): 1044 ui.say(f"{spec} already exists") 1045 return True 1046 return False 1047 1048 def run( 1049 self, 1050 args: list[str] = [], 1051 docker_args: list[str] = [], 1052 env: dict[str, str] = {}, 1053 ) -> None: 1054 """Run a command in the image. 1055 1056 Creates a container from the image and runs the command described by 1057 `args` in the image. 1058 """ 1059 envs = [] 1060 for key, val in env.items(): 1061 envs.extend(["--env", f"{key}={val}"]) 1062 spawn.runv( 1063 [ 1064 "docker", 1065 "run", 1066 "--tty", 1067 "--rm", 1068 *envs, 1069 "--init", 1070 *docker_args, 1071 self.spec(), 1072 *args, 1073 ], 1074 ) 1075 1076 def list_dependencies(self, transitive: bool = False) -> set[str]: 1077 out = set() 1078 for dep in self.dependencies.values(): 1079 out.add(dep.name) 1080 if transitive: 1081 out |= dep.list_dependencies(transitive) 1082 return out 1083 1084 @cache 1085 def inputs(self, transitive: bool = False) -> set[str]: 1086 """List the files tracked as inputs to the image. 1087 1088 These files are used to compute the fingerprint for the image. See 1089 `ResolvedImage.fingerprint` for details. 1090 1091 Returns: 1092 inputs: A list of input files, relative to the root of the 1093 repository. 1094 """ 1095 if self.image._context_files_cache is not None: 1096 paths = set(self.image._context_files_cache) 1097 else: 1098 paths = set(git.expand_globs(self.image.rd.root, f"{self.image.path}/**")) 1099 if not paths: 1100 # While we could find an `mzbuild.yml` file for this service, expland_globs didn't 1101 # return any files that matched this service. At the very least, the `mzbuild.yml` 1102 # file itself should have been returned. We have a bug if paths is empty. 1103 raise AssertionError( 1104 f"{self.image.name} mzbuild exists but its files are unknown to git" 1105 ) 1106 for pre_image in self.image.pre_images: 1107 paths |= pre_image.inputs() 1108 if transitive: 1109 for dep in self.dependencies.values(): 1110 paths |= dep.inputs(transitive) 1111 return paths 1112 1113 @cache 1114 def fingerprint(self) -> Fingerprint: 1115 """Fingerprint the inputs to the image. 1116 1117 Compute the fingerprint of the image. Changing the contents of any of 1118 the files or adding or removing files to the image will change the 1119 fingerprint, as will modifying the inputs to any of its dependencies. 1120 1121 The image considers all non-gitignored files in its mzbuild context to 1122 be inputs. If it has a pre-image action, that action may add additional 1123 inputs via `PreImage.inputs`. 1124 """ 1125 self_hash = hashlib.sha1() 1126 # When inputs come from precomputed sources (crate and image context 1127 # batching + resolved CargoPreImage paths), they are already individual 1128 # file paths from git. Skip the expensive expand_globs subprocess calls. 1129 inputs = self.inputs() 1130 if self.image._context_files_cache is not None: 1131 resolved_inputs = sorted(inputs) 1132 else: 1133 resolved_inputs = sorted(set(git.expand_globs(self.image.rd.root, *inputs))) 1134 for rel_path in resolved_inputs: 1135 abs_path = self.image.rd.root / rel_path 1136 file_hash = hashlib.sha1() 1137 raw_file_mode = os.lstat(abs_path).st_mode 1138 # Compute a simplified file mode using the same rules as Git. 1139 # https://github.com/git/git/blob/3bab5d562/Documentation/git-fast-import.txt#L610-L616 1140 if stat.S_ISLNK(raw_file_mode): 1141 file_mode = 0o120000 1142 elif raw_file_mode & stat.S_IXUSR: 1143 file_mode = 0o100755 1144 else: 1145 file_mode = 0o100644 1146 with open(abs_path, "rb") as f: 1147 file_hash.update(f.read()) 1148 self_hash.update(file_mode.to_bytes(2, byteorder="big")) 1149 self_hash.update(rel_path.encode()) 1150 self_hash.update(file_hash.digest()) 1151 self_hash.update(b"\0") 1152 1153 for pre_image in self.image.pre_images: 1154 self_hash.update(pre_image.extra().encode()) 1155 self_hash.update(b"\0") 1156 1157 self_hash.update(f"profile={self.image.rd.profile}".encode()) 1158 self_hash.update(f"arch={self.image.rd.arch}".encode()) 1159 self_hash.update(f"coverage={self.image.rd.coverage}".encode()) 1160 self_hash.update(f"sanitizer={self.image.rd.sanitizer}".encode()) 1161 # This exists to make sure all hashes from before we had a GHCR mirror are invalidated, so that we rebuild when an image doesn't exist on GHCR yet 1162 self_hash.update(b"mirror=ghcr") 1163 1164 full_hash = hashlib.sha1() 1165 full_hash.update(self_hash.digest()) 1166 for dep in sorted(self.dependencies.values(), key=lambda d: d.name): 1167 full_hash.update(dep.name.encode()) 1168 full_hash.update(dep.fingerprint()) 1169 full_hash.update(b"\0") 1170 1171 return Fingerprint(full_hash.digest())
An Image whose dependencies have been resolved.
Attributes:
image: The underlying Image.
acquired: Whether the image is available locally.
dependencies: A mapping from dependency name to ResolvedImage for
each of the images that image depends upon.
867 @property 868 def name(self) -> str: 869 """The name of the underlying image.""" 870 return self.image.name
The name of the underlying image.
872 @property 873 def publish(self) -> bool: 874 """Whether the underlying image should be pushed to Docker Hub.""" 875 return self.image.publish
Whether the underlying image should be pushed to Docker Hub.
877 @cache 878 def spec(self) -> str: 879 """Return the "spec" for the image. 880 881 A spec is the unique identifier for the image given its current 882 fingerprint. It is a valid Docker Hub name. 883 """ 884 return self.image.docker_name(tag=f"mzbuild-{self.fingerprint()}")
Return the "spec" for the image.
A spec is the unique identifier for the image given its current fingerprint. It is a valid Docker Hub name.
886 def write_dockerfile(self) -> IO[bytes]: 887 """Render the Dockerfile without mzbuild directives. 888 889 Returns: 890 file: A handle to a temporary file containing the adjusted 891 Dockerfile.""" 892 with open(self.image.path / "Dockerfile", "rb") as f: 893 lines = f.readlines() 894 f = TemporaryFile() 895 for line in lines: 896 match = Image._DOCKERFILE_MZFROM_RE.match(line) 897 if match: 898 image = match.group(1).decode() 899 spec = self.dependencies[image].spec() 900 line = Image._DOCKERFILE_MZFROM_RE.sub(b"FROM %b" % spec.encode(), line) 901 f.write(line) 902 f.seek(0) 903 return f
Render the Dockerfile without mzbuild directives.
Returns: file: A handle to a temporary file containing the adjusted Dockerfile.
905 def build(self, prep: dict[type[PreImage], Any], push: bool = False) -> None: 906 """Build the image from source. 907 908 Requires that the caller has already acquired all dependencies and 909 prepared all `PreImage` actions via `PreImage.prepare_batch`. 910 """ 911 ui.section(f"Building {self.spec()}") 912 spawn.runv(["git", "clean", "-ffdX", self.image.path]) 913 914 for pre_image in self.image.pre_images: 915 pre_image.run(prep[type(pre_image)]) 916 build_args = { 917 **self.image.build_args, 918 "BUILD_PROFILE": self.image.rd.profile.name, 919 "ARCH_GCC": str(self.image.rd.arch), 920 "ARCH_GO": self.image.rd.arch.go_str(), 921 "CI_SANITIZER": str(self.image.rd.sanitizer), 922 } 923 f = self.write_dockerfile() 924 925 try: 926 spawn.capture(["docker", "buildx", "version"]) 927 except subprocess.CalledProcessError: 928 if push: 929 print( 930 "docker buildx not found, required to push images. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing" 931 ) 932 raise 933 print( 934 "docker buildx not found, you can install it to build faster. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing" 935 ) 936 print("Falling back to docker build") 937 cmd: Sequence[str] = [ 938 "docker", 939 "build", 940 "-f", 941 "-", 942 *(f"--build-arg={k}={v}" for k, v in build_args.items()), 943 "-t", 944 self.spec(), 945 f"--platform=linux/{self.image.rd.arch.go_str()}", 946 str(self.image.path), 947 ] 948 else: 949 cmd: Sequence[str] = [ 950 "docker", 951 "buildx", 952 "build", 953 "--progress=plain", # less noisy 954 "-f", 955 "-", 956 *(f"--build-arg={k}={v}" for k, v in build_args.items()), 957 "-t", 958 f"docker.io/{self.spec()}", 959 "-t", 960 f"ghcr.io/materializeinc/{self.spec()}", 961 f"--platform=linux/{self.image.rd.arch.go_str()}", 962 str(self.image.path), 963 *(["--push"] if push else ["--load"]), 964 ] 965 966 if token := os.getenv("GITHUB_GHCR_TOKEN"): 967 spawn.runv( 968 [ 969 "docker", 970 "login", 971 "ghcr.io", 972 "-u", 973 "materialize-bot", 974 "--password-stdin", 975 ], 976 stdin=token.encode(), 977 ) 978 979 spawn.runv(cmd, stdin=f, stdout=sys.stderr.buffer)
Build the image from source.
Requires that the caller has already acquired all dependencies and
prepared all PreImage actions via PreImage.prepare_batch.
981 def try_pull(self, max_retries: int) -> bool: 982 """Download the image if it does not exist locally. Returns whether it was found.""" 983 ui.header(f"Acquiring {self.spec()}") 984 command = ["docker", "pull"] 985 # --quiet skips printing the progress bar, which does not display well in CI. 986 if ui.env_is_truthy("CI"): 987 command.append("--quiet") 988 command.append(self.spec()) 989 if not self.acquired: 990 sleep_time = 1 991 for retry in range(1, max_retries + 1): 992 try: 993 spawn.runv( 994 command, 995 stdin=subprocess.DEVNULL, 996 stdout=sys.stderr.buffer, 997 ) 998 self.acquired = True 999 break 1000 except subprocess.CalledProcessError: 1001 if retry < max_retries: 1002 # There seems to be no good way to tell what error 1003 # happened based on error code 1004 # (https://github.com/docker/cli/issues/538) and we 1005 # want to print output directly to terminal. 1006 if build := os.getenv("CI_WAITING_FOR_BUILD"): 1007 for retry in range(max_retries): 1008 try: 1009 build_status = buildkite.get_build_status(build) 1010 except subprocess.CalledProcessError: 1011 time.sleep(sleep_time) 1012 sleep_time = min(sleep_time * 2, 10) 1013 break 1014 print(f"Build {build} status: {build_status}") 1015 if build_status == "failed": 1016 print( 1017 f"Build {build} has been marked as failed, exiting hard" 1018 ) 1019 sys.exit(1) 1020 elif build_status == "success": 1021 break 1022 assert ( 1023 build_status == "pending" 1024 ), f"Unknown build status {build_status}" 1025 time.sleep(1) 1026 else: 1027 print(f"Retrying in {sleep_time}s ...") 1028 time.sleep(sleep_time) 1029 sleep_time = min(sleep_time * 2, 10) 1030 continue 1031 else: 1032 break 1033 return self.acquired
Download the image if it does not exist locally. Returns whether it was found.
1035 def is_published_if_necessary(self) -> bool: 1036 """Report whether the image exists on DockerHub & GHCR if it is publishable.""" 1037 if not self.publish: 1038 return False 1039 spec = self.spec() 1040 if spec.startswith(GHCR_PREFIX): 1041 spec = spec.removeprefix(GHCR_PREFIX) 1042 ghcr_spec = f"{GHCR_PREFIX}{spec}" 1043 if is_docker_image_pushed(spec) and is_ghcr_image_pushed(ghcr_spec): 1044 ui.say(f"{spec} already exists") 1045 return True 1046 return False
Report whether the image exists on DockerHub & GHCR if it is publishable.
1048 def run( 1049 self, 1050 args: list[str] = [], 1051 docker_args: list[str] = [], 1052 env: dict[str, str] = {}, 1053 ) -> None: 1054 """Run a command in the image. 1055 1056 Creates a container from the image and runs the command described by 1057 `args` in the image. 1058 """ 1059 envs = [] 1060 for key, val in env.items(): 1061 envs.extend(["--env", f"{key}={val}"]) 1062 spawn.runv( 1063 [ 1064 "docker", 1065 "run", 1066 "--tty", 1067 "--rm", 1068 *envs, 1069 "--init", 1070 *docker_args, 1071 self.spec(), 1072 *args, 1073 ], 1074 )
Run a command in the image.
Creates a container from the image and runs the command described by
args in the image.
1084 @cache 1085 def inputs(self, transitive: bool = False) -> set[str]: 1086 """List the files tracked as inputs to the image. 1087 1088 These files are used to compute the fingerprint for the image. See 1089 `ResolvedImage.fingerprint` for details. 1090 1091 Returns: 1092 inputs: A list of input files, relative to the root of the 1093 repository. 1094 """ 1095 if self.image._context_files_cache is not None: 1096 paths = set(self.image._context_files_cache) 1097 else: 1098 paths = set(git.expand_globs(self.image.rd.root, f"{self.image.path}/**")) 1099 if not paths: 1100 # While we could find an `mzbuild.yml` file for this service, expland_globs didn't 1101 # return any files that matched this service. At the very least, the `mzbuild.yml` 1102 # file itself should have been returned. We have a bug if paths is empty. 1103 raise AssertionError( 1104 f"{self.image.name} mzbuild exists but its files are unknown to git" 1105 ) 1106 for pre_image in self.image.pre_images: 1107 paths |= pre_image.inputs() 1108 if transitive: 1109 for dep in self.dependencies.values(): 1110 paths |= dep.inputs(transitive) 1111 return paths
List the files tracked as inputs to the image.
These files are used to compute the fingerprint for the image. See
ResolvedImage.fingerprint for details.
Returns: inputs: A list of input files, relative to the root of the repository.
1113 @cache 1114 def fingerprint(self) -> Fingerprint: 1115 """Fingerprint the inputs to the image. 1116 1117 Compute the fingerprint of the image. Changing the contents of any of 1118 the files or adding or removing files to the image will change the 1119 fingerprint, as will modifying the inputs to any of its dependencies. 1120 1121 The image considers all non-gitignored files in its mzbuild context to 1122 be inputs. If it has a pre-image action, that action may add additional 1123 inputs via `PreImage.inputs`. 1124 """ 1125 self_hash = hashlib.sha1() 1126 # When inputs come from precomputed sources (crate and image context 1127 # batching + resolved CargoPreImage paths), they are already individual 1128 # file paths from git. Skip the expensive expand_globs subprocess calls. 1129 inputs = self.inputs() 1130 if self.image._context_files_cache is not None: 1131 resolved_inputs = sorted(inputs) 1132 else: 1133 resolved_inputs = sorted(set(git.expand_globs(self.image.rd.root, *inputs))) 1134 for rel_path in resolved_inputs: 1135 abs_path = self.image.rd.root / rel_path 1136 file_hash = hashlib.sha1() 1137 raw_file_mode = os.lstat(abs_path).st_mode 1138 # Compute a simplified file mode using the same rules as Git. 1139 # https://github.com/git/git/blob/3bab5d562/Documentation/git-fast-import.txt#L610-L616 1140 if stat.S_ISLNK(raw_file_mode): 1141 file_mode = 0o120000 1142 elif raw_file_mode & stat.S_IXUSR: 1143 file_mode = 0o100755 1144 else: 1145 file_mode = 0o100644 1146 with open(abs_path, "rb") as f: 1147 file_hash.update(f.read()) 1148 self_hash.update(file_mode.to_bytes(2, byteorder="big")) 1149 self_hash.update(rel_path.encode()) 1150 self_hash.update(file_hash.digest()) 1151 self_hash.update(b"\0") 1152 1153 for pre_image in self.image.pre_images: 1154 self_hash.update(pre_image.extra().encode()) 1155 self_hash.update(b"\0") 1156 1157 self_hash.update(f"profile={self.image.rd.profile}".encode()) 1158 self_hash.update(f"arch={self.image.rd.arch}".encode()) 1159 self_hash.update(f"coverage={self.image.rd.coverage}".encode()) 1160 self_hash.update(f"sanitizer={self.image.rd.sanitizer}".encode()) 1161 # This exists to make sure all hashes from before we had a GHCR mirror are invalidated, so that we rebuild when an image doesn't exist on GHCR yet 1162 self_hash.update(b"mirror=ghcr") 1163 1164 full_hash = hashlib.sha1() 1165 full_hash.update(self_hash.digest()) 1166 for dep in sorted(self.dependencies.values(), key=lambda d: d.name): 1167 full_hash.update(dep.name.encode()) 1168 full_hash.update(dep.fingerprint()) 1169 full_hash.update(b"\0") 1170 1171 return Fingerprint(full_hash.digest())
Fingerprint the inputs to the image.
Compute the fingerprint of the image. Changing the contents of any of the files or adding or removing files to the image will change the fingerprint, as will modifying the inputs to any of its dependencies.
The image considers all non-gitignored files in its mzbuild context to
be inputs. If it has a pre-image action, that action may add additional
inputs via PreImage.inputs.
1174class DependencySet: 1175 """A set of `ResolvedImage`s. 1176 1177 Iterating over a dependency set yields the contained images in an arbitrary 1178 order. Indexing a dependency set yields the image with the specified name. 1179 """ 1180 1181 def __init__(self, dependencies: Iterable[Image]): 1182 """Construct a new `DependencySet`. 1183 1184 The provided `dependencies` must be topologically sorted. 1185 """ 1186 self._dependencies: dict[str, ResolvedImage] = {} 1187 known_images = docker_images() 1188 for d in dependencies: 1189 image = ResolvedImage( 1190 image=d, 1191 dependencies=(self._dependencies[d0] for d0 in d.depends_on), 1192 ) 1193 image.acquired = image.spec() in known_images 1194 self._dependencies[d.name] = image 1195 1196 def _prepare_batch(self, images: list[ResolvedImage]) -> dict[type[PreImage], Any]: 1197 pre_images = collections.defaultdict(list) 1198 for image in images: 1199 for pre_image in image.image.pre_images: 1200 pre_images[type(pre_image)].append(pre_image) 1201 pre_image_prep = {} 1202 for cls, instances in pre_images.items(): 1203 pre_image = cast(PreImage, cls) 1204 pre_image_prep[cls] = pre_image.prepare_batch(instances) 1205 return pre_image_prep 1206 1207 def acquire(self, max_retries: int | None = None) -> None: 1208 """Download or build all of the images in the dependency set that do not 1209 already exist locally. 1210 1211 Args: 1212 max_retries: Number of retries on failure. 1213 """ 1214 1215 # Only retry in CI runs since we struggle with flaky docker pulls there 1216 if not max_retries: 1217 max_retries = ( 1218 90 1219 if os.getenv("CI_WAITING_FOR_BUILD") 1220 else ( 1221 5 1222 if ui.env_is_truthy("CI") 1223 and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD") 1224 else 1 1225 ) 1226 ) 1227 assert max_retries > 0 1228 1229 deps_to_check = [dep for dep in self if dep.publish] 1230 deps_to_build = [dep for dep in self if not dep.publish] 1231 if len(deps_to_check): 1232 with ThreadPoolExecutor(max_workers=len(deps_to_check)) as executor: 1233 futures = [ 1234 executor.submit(dep.try_pull, max_retries) for dep in deps_to_check 1235 ] 1236 for dep, future in zip(deps_to_check, futures): 1237 try: 1238 if not future.result(): 1239 deps_to_build.append(dep) 1240 except Exception: 1241 deps_to_build.append(dep) 1242 1243 # Don't attempt to build in CI, as our timeouts and small machines won't allow it anyway 1244 if ui.env_is_truthy("CI") and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD"): 1245 expected_deps = [dep for dep in deps_to_build if dep.publish] 1246 if expected_deps: 1247 print( 1248 f"+++ Expected builds to be available, the build probably failed, so not proceeding: {expected_deps}" 1249 ) 1250 sys.exit(5) 1251 1252 prep = self._prepare_batch(deps_to_build) 1253 for dep in deps_to_build: 1254 dep.build(prep) 1255 1256 def ensure(self, pre_build: Callable[[list[ResolvedImage]], None] | None = None): 1257 """Ensure all publishable images in this dependency set exist on Docker 1258 Hub. 1259 1260 Images are pushed using their spec as their tag. 1261 1262 Args: 1263 pre_build: A callback to invoke with all dependency that are going 1264 to be built locally, invoked after their cargo build is 1265 done, but before the Docker images are build and 1266 uploaded to DockerHub. 1267 """ 1268 num_deps = len(list(self)) 1269 if not num_deps: 1270 deps_to_build = [] 1271 else: 1272 with ThreadPoolExecutor(max_workers=num_deps) as executor: 1273 futures = list( 1274 executor.map( 1275 lambda dep: (dep, not dep.is_published_if_necessary()), self 1276 ) 1277 ) 1278 1279 deps_to_build = [dep for dep, should_build in futures if should_build] 1280 1281 prep = self._prepare_batch(deps_to_build) 1282 if pre_build: 1283 pre_build(deps_to_build) 1284 lock = Lock() 1285 built_deps: set[str] = set([dep.name for dep in self]) - set( 1286 [dep.name for dep in deps_to_build] 1287 ) 1288 1289 def build_dep(dep): 1290 end_time = time.time() + 600 1291 while True: 1292 if time.time() > end_time: 1293 raise TimeoutError( 1294 f"Timed out in {dep.name} waiting for {[dep2.name for dep2 in dep.dependencies if dep2 not in built_deps]}" 1295 ) 1296 with lock: 1297 if all(dep2 in built_deps for dep2 in dep.dependencies): 1298 break 1299 time.sleep(0.01) 1300 for attempts_remaining in reversed(range(3)): 1301 try: 1302 dep.build(prep, push=dep.publish) 1303 with lock: 1304 built_deps.add(dep.name) 1305 break 1306 except Exception: 1307 if not dep.publish or attempts_remaining == 0: 1308 raise 1309 1310 if deps_to_build: 1311 with ThreadPoolExecutor(max_workers=len(deps_to_build)) as executor: 1312 futures = [executor.submit(build_dep, dep) for dep in deps_to_build] 1313 for future in as_completed(futures): 1314 future.result() 1315 1316 def check(self) -> bool: 1317 """Check all publishable images in this dependency set exist on Docker 1318 Hub. Don't try to download or build them.""" 1319 num_deps = len(list(self)) 1320 if num_deps == 0: 1321 return True 1322 with ThreadPoolExecutor(max_workers=num_deps) as executor: 1323 results = list( 1324 executor.map(lambda dep: dep.is_published_if_necessary(), list(self)) 1325 ) 1326 return all(results) 1327 1328 def __iter__(self) -> Iterator[ResolvedImage]: 1329 return iter(self._dependencies.values()) 1330 1331 def __getitem__(self, key: str) -> ResolvedImage: 1332 return self._dependencies[key]
A set of ResolvedImages.
Iterating over a dependency set yields the contained images in an arbitrary order. Indexing a dependency set yields the image with the specified name.
1181 def __init__(self, dependencies: Iterable[Image]): 1182 """Construct a new `DependencySet`. 1183 1184 The provided `dependencies` must be topologically sorted. 1185 """ 1186 self._dependencies: dict[str, ResolvedImage] = {} 1187 known_images = docker_images() 1188 for d in dependencies: 1189 image = ResolvedImage( 1190 image=d, 1191 dependencies=(self._dependencies[d0] for d0 in d.depends_on), 1192 ) 1193 image.acquired = image.spec() in known_images 1194 self._dependencies[d.name] = image
Construct a new DependencySet.
The provided dependencies must be topologically sorted.
1207 def acquire(self, max_retries: int | None = None) -> None: 1208 """Download or build all of the images in the dependency set that do not 1209 already exist locally. 1210 1211 Args: 1212 max_retries: Number of retries on failure. 1213 """ 1214 1215 # Only retry in CI runs since we struggle with flaky docker pulls there 1216 if not max_retries: 1217 max_retries = ( 1218 90 1219 if os.getenv("CI_WAITING_FOR_BUILD") 1220 else ( 1221 5 1222 if ui.env_is_truthy("CI") 1223 and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD") 1224 else 1 1225 ) 1226 ) 1227 assert max_retries > 0 1228 1229 deps_to_check = [dep for dep in self if dep.publish] 1230 deps_to_build = [dep for dep in self if not dep.publish] 1231 if len(deps_to_check): 1232 with ThreadPoolExecutor(max_workers=len(deps_to_check)) as executor: 1233 futures = [ 1234 executor.submit(dep.try_pull, max_retries) for dep in deps_to_check 1235 ] 1236 for dep, future in zip(deps_to_check, futures): 1237 try: 1238 if not future.result(): 1239 deps_to_build.append(dep) 1240 except Exception: 1241 deps_to_build.append(dep) 1242 1243 # Don't attempt to build in CI, as our timeouts and small machines won't allow it anyway 1244 if ui.env_is_truthy("CI") and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD"): 1245 expected_deps = [dep for dep in deps_to_build if dep.publish] 1246 if expected_deps: 1247 print( 1248 f"+++ Expected builds to be available, the build probably failed, so not proceeding: {expected_deps}" 1249 ) 1250 sys.exit(5) 1251 1252 prep = self._prepare_batch(deps_to_build) 1253 for dep in deps_to_build: 1254 dep.build(prep)
Download or build all of the images in the dependency set that do not already exist locally.
Args: max_retries: Number of retries on failure.
1256 def ensure(self, pre_build: Callable[[list[ResolvedImage]], None] | None = None): 1257 """Ensure all publishable images in this dependency set exist on Docker 1258 Hub. 1259 1260 Images are pushed using their spec as their tag. 1261 1262 Args: 1263 pre_build: A callback to invoke with all dependency that are going 1264 to be built locally, invoked after their cargo build is 1265 done, but before the Docker images are build and 1266 uploaded to DockerHub. 1267 """ 1268 num_deps = len(list(self)) 1269 if not num_deps: 1270 deps_to_build = [] 1271 else: 1272 with ThreadPoolExecutor(max_workers=num_deps) as executor: 1273 futures = list( 1274 executor.map( 1275 lambda dep: (dep, not dep.is_published_if_necessary()), self 1276 ) 1277 ) 1278 1279 deps_to_build = [dep for dep, should_build in futures if should_build] 1280 1281 prep = self._prepare_batch(deps_to_build) 1282 if pre_build: 1283 pre_build(deps_to_build) 1284 lock = Lock() 1285 built_deps: set[str] = set([dep.name for dep in self]) - set( 1286 [dep.name for dep in deps_to_build] 1287 ) 1288 1289 def build_dep(dep): 1290 end_time = time.time() + 600 1291 while True: 1292 if time.time() > end_time: 1293 raise TimeoutError( 1294 f"Timed out in {dep.name} waiting for {[dep2.name for dep2 in dep.dependencies if dep2 not in built_deps]}" 1295 ) 1296 with lock: 1297 if all(dep2 in built_deps for dep2 in dep.dependencies): 1298 break 1299 time.sleep(0.01) 1300 for attempts_remaining in reversed(range(3)): 1301 try: 1302 dep.build(prep, push=dep.publish) 1303 with lock: 1304 built_deps.add(dep.name) 1305 break 1306 except Exception: 1307 if not dep.publish or attempts_remaining == 0: 1308 raise 1309 1310 if deps_to_build: 1311 with ThreadPoolExecutor(max_workers=len(deps_to_build)) as executor: 1312 futures = [executor.submit(build_dep, dep) for dep in deps_to_build] 1313 for future in as_completed(futures): 1314 future.result()
Ensure all publishable images in this dependency set exist on Docker Hub.
Images are pushed using their spec as their tag.
Args: pre_build: A callback to invoke with all dependency that are going to be built locally, invoked after their cargo build is done, but before the Docker images are build and uploaded to DockerHub.
1316 def check(self) -> bool: 1317 """Check all publishable images in this dependency set exist on Docker 1318 Hub. Don't try to download or build them.""" 1319 num_deps = len(list(self)) 1320 if num_deps == 0: 1321 return True 1322 with ThreadPoolExecutor(max_workers=num_deps) as executor: 1323 results = list( 1324 executor.map(lambda dep: dep.is_published_if_necessary(), list(self)) 1325 ) 1326 return all(results)
Check all publishable images in this dependency set exist on Docker Hub. Don't try to download or build them.
1335class Repository: 1336 """A collection of mzbuild `Image`s. 1337 1338 Creating a repository will walk the filesystem beneath `root` to 1339 automatically discover all contained `Image`s. 1340 1341 Iterating over a repository yields the contained images in an arbitrary 1342 order. 1343 1344 Args: 1345 root: The path to the root of the repository. 1346 arch: The CPU architecture to build for. 1347 profile: What profile to build the repository in. 1348 coverage: Whether to enable code coverage instrumentation. 1349 sanitizer: Whether to a sanitizer (address, thread, leak, memory, none) 1350 image_registry: The Docker image registry to pull images from and push 1351 images to. 1352 image_prefix: A prefix to apply to all Docker image names. 1353 1354 Attributes: 1355 images: A mapping from image name to `Image` for all contained images. 1356 compose_dirs: The set of directories containing a `mzcompose.py` file. 1357 """ 1358 1359 def __init__( 1360 self, 1361 root: Path, 1362 arch: Arch = Arch.host(), 1363 profile: Profile = ( 1364 Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED 1365 ), 1366 coverage: bool = False, 1367 sanitizer: Sanitizer = Sanitizer.none, 1368 image_registry: str = image_registry(), 1369 image_prefix: str = "", 1370 ): 1371 self.rd = RepositoryDetails( 1372 root, 1373 arch, 1374 profile, 1375 coverage, 1376 sanitizer, 1377 image_registry, 1378 image_prefix, 1379 ) 1380 self.images: dict[str, Image] = {} 1381 self.compositions: dict[str, Path] = {} 1382 for path, dirs, files in os.walk(self.root, topdown=True): 1383 if path == str(root / "misc"): 1384 dirs.remove("python") 1385 # Filter out some particularly massive ignored directories to keep 1386 # things snappy. Not required for correctness. 1387 dirs[:] = set(dirs) - { 1388 ".git", 1389 ".mypy_cache", 1390 "target", 1391 "target-ra", 1392 "target-xcompile", 1393 "mzdata", 1394 "node_modules", 1395 "venv", 1396 } 1397 if "mzbuild.yml" in files: 1398 image = Image(self.rd, Path(path)) 1399 if not image.name: 1400 raise ValueError(f"config at {path} missing name") 1401 if image.name in self.images: 1402 raise ValueError(f"image {image.name} exists twice") 1403 self.images[image.name] = image 1404 if "mzcompose.py" in files: 1405 name = Path(path).name 1406 if name in self.compositions: 1407 raise ValueError(f"composition {name} exists twice") 1408 self.compositions[name] = Path(path) 1409 1410 # Validate dependencies. 1411 for image in self.images.values(): 1412 for d in image.depends_on: 1413 if d not in self.images: 1414 raise ValueError( 1415 f"image {image.name} depends on non-existent image {d}" 1416 ) 1417 1418 @staticmethod 1419 def install_arguments(parser: argparse.ArgumentParser) -> None: 1420 """Install options to configure a repository into an argparse parser. 1421 1422 This function installs the following options: 1423 1424 * The mutually-exclusive `--dev`/`--optimized`/`--release` options to control the 1425 `profile` repository attribute. 1426 * The `--coverage` boolean option to control the `coverage` repository 1427 attribute. 1428 1429 Use `Repository.from_arguments` to construct a repository from the 1430 parsed command-line arguments. 1431 """ 1432 build_mode = parser.add_mutually_exclusive_group() 1433 build_mode.add_argument( 1434 "--dev", 1435 action="store_true", 1436 help="build Rust binaries with the dev profile", 1437 ) 1438 build_mode.add_argument( 1439 "--release", 1440 action="store_true", 1441 help="build Rust binaries with the release profile (default)", 1442 ) 1443 build_mode.add_argument( 1444 "--optimized", 1445 action="store_true", 1446 help="build Rust binaries with the optimized profile (optimizations, no LTO, no debug symbols)", 1447 ) 1448 parser.add_argument( 1449 "--coverage", 1450 help="whether to enable code coverage compilation flags", 1451 default=ui.env_is_truthy("CI_COVERAGE_ENABLED"), 1452 action="store_true", 1453 ) 1454 parser.add_argument( 1455 "--sanitizer", 1456 help="whether to enable a sanitizer", 1457 default=Sanitizer[os.getenv("CI_SANITIZER", "none")], 1458 type=Sanitizer, 1459 choices=Sanitizer, 1460 ) 1461 parser.add_argument( 1462 "--arch", 1463 default=Arch.host(), 1464 help="the CPU architecture to build for", 1465 type=Arch, 1466 choices=Arch, 1467 ) 1468 parser.add_argument( 1469 "--image-registry", 1470 default=image_registry(), 1471 help="the Docker image registry to pull images from and push images to", 1472 ) 1473 parser.add_argument( 1474 "--image-prefix", 1475 default="", 1476 help="a prefix to apply to all Docker image names", 1477 ) 1478 1479 @classmethod 1480 def from_arguments(cls, root: Path, args: argparse.Namespace) -> "Repository": 1481 """Construct a repository from command-line arguments. 1482 1483 The provided namespace must contain the options installed by 1484 `Repository.install_arguments`. 1485 """ 1486 if args.release: 1487 profile = Profile.RELEASE 1488 elif args.optimized: 1489 profile = Profile.OPTIMIZED 1490 elif args.dev: 1491 profile = Profile.DEV 1492 else: 1493 profile = ( 1494 Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED 1495 ) 1496 1497 return cls( 1498 root, 1499 profile=profile, 1500 coverage=args.coverage, 1501 sanitizer=args.sanitizer, 1502 image_registry=args.image_registry, 1503 image_prefix=args.image_prefix, 1504 arch=args.arch, 1505 ) 1506 1507 @property 1508 def root(self) -> Path: 1509 """The path to the root directory for the repository.""" 1510 return self.rd.root 1511 1512 def resolve_dependencies(self, targets: Iterable[Image]) -> DependencySet: 1513 """Compute the dependency set necessary to build target images. 1514 1515 The dependencies of `targets` will be crawled recursively until the 1516 complete set of transitive dependencies is determined or a circular 1517 dependency is discovered. The returned dependency set will be sorted 1518 in topological order. 1519 1520 Raises: 1521 ValueError: A circular dependency was discovered in the images 1522 in the repository. 1523 """ 1524 # Pre-fetch all crate input files in a single batched git call, 1525 # replacing ~118 individual subprocess pairs with one pair. 1526 self.rd.cargo_workspace.precompute_crate_inputs() 1527 # Pre-fetch all image context files in a single batched git call, 1528 # replacing ~41 individual subprocess pairs with one pair. 1529 self._precompute_image_context_files() 1530 1531 resolved = OrderedDict() 1532 visiting = set() 1533 1534 def visit(image: Image, path: list[str] = []) -> None: 1535 if image.name in resolved: 1536 return 1537 if image.name in visiting: 1538 diagram = " -> ".join(path + [image.name]) 1539 raise ValueError(f"circular dependency in mzbuild: {diagram}") 1540 1541 visiting.add(image.name) 1542 for d in sorted(image.depends_on): 1543 visit(self.images[d], path + [image.name]) 1544 resolved[image.name] = image 1545 1546 for target_image in sorted(targets, key=lambda image: image.name): 1547 visit(target_image) 1548 1549 return DependencySet(resolved.values()) 1550 1551 def _precompute_image_context_files(self) -> None: 1552 """Pre-fetch all image context files in a single batched git call. 1553 1554 This replaces ~41 individual pairs of git subprocess calls (one per 1555 image) with a single pair, then partitions the results by image path. 1556 """ 1557 root = self.rd.root 1558 # Use paths relative to root for git specs and partitioning, since 1559 # git --relative outputs paths relative to cwd (root). Image paths 1560 # may be absolute when MZ_ROOT is an absolute path. 1561 image_rel_paths = sorted( 1562 set(str(img.path.relative_to(root)) for img in self.images.values()) 1563 ) 1564 specs = [f"{p}/**" for p in image_rel_paths] 1565 1566 empty_tree = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" 1567 diff_files = spawn.capture( 1568 ["git", "diff", "--name-only", "-z", "--relative", empty_tree, "--"] 1569 + specs, 1570 cwd=root, 1571 ) 1572 ls_files = spawn.capture( 1573 ["git", "ls-files", "--others", "--exclude-standard", "-z", "--"] + specs, 1574 cwd=root, 1575 ) 1576 all_files = set( 1577 f for f in (diff_files + ls_files).split("\0") if f.strip() != "" 1578 ) 1579 1580 # Partition files by image path (longest match first for nested paths) 1581 image_file_map: dict[str, set[str]] = {p: set() for p in image_rel_paths} 1582 sorted_paths = sorted(image_rel_paths, key=len, reverse=True) 1583 for f in all_files: 1584 for ip in sorted_paths: 1585 if f.startswith(ip + "/"): 1586 image_file_map[ip].add(f) 1587 break 1588 1589 for img in self.images.values(): 1590 rel = str(img.path.relative_to(root)) 1591 img._context_files_cache = image_file_map.get(rel, set()) 1592 1593 def __iter__(self) -> Iterator[Image]: 1594 return iter(self.images.values())
A collection of mzbuild Images.
Creating a repository will walk the filesystem beneath root to
automatically discover all contained Images.
Iterating over a repository yields the contained images in an arbitrary order.
Args: root: The path to the root of the repository. arch: The CPU architecture to build for. profile: What profile to build the repository in. coverage: Whether to enable code coverage instrumentation. sanitizer: Whether to a sanitizer (address, thread, leak, memory, none) image_registry: The Docker image registry to pull images from and push images to. image_prefix: A prefix to apply to all Docker image names.
Attributes:
images: A mapping from image name to Image for all contained images.
compose_dirs: The set of directories containing a mzcompose.py file.
1359 def __init__( 1360 self, 1361 root: Path, 1362 arch: Arch = Arch.host(), 1363 profile: Profile = ( 1364 Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED 1365 ), 1366 coverage: bool = False, 1367 sanitizer: Sanitizer = Sanitizer.none, 1368 image_registry: str = image_registry(), 1369 image_prefix: str = "", 1370 ): 1371 self.rd = RepositoryDetails( 1372 root, 1373 arch, 1374 profile, 1375 coverage, 1376 sanitizer, 1377 image_registry, 1378 image_prefix, 1379 ) 1380 self.images: dict[str, Image] = {} 1381 self.compositions: dict[str, Path] = {} 1382 for path, dirs, files in os.walk(self.root, topdown=True): 1383 if path == str(root / "misc"): 1384 dirs.remove("python") 1385 # Filter out some particularly massive ignored directories to keep 1386 # things snappy. Not required for correctness. 1387 dirs[:] = set(dirs) - { 1388 ".git", 1389 ".mypy_cache", 1390 "target", 1391 "target-ra", 1392 "target-xcompile", 1393 "mzdata", 1394 "node_modules", 1395 "venv", 1396 } 1397 if "mzbuild.yml" in files: 1398 image = Image(self.rd, Path(path)) 1399 if not image.name: 1400 raise ValueError(f"config at {path} missing name") 1401 if image.name in self.images: 1402 raise ValueError(f"image {image.name} exists twice") 1403 self.images[image.name] = image 1404 if "mzcompose.py" in files: 1405 name = Path(path).name 1406 if name in self.compositions: 1407 raise ValueError(f"composition {name} exists twice") 1408 self.compositions[name] = Path(path) 1409 1410 # Validate dependencies. 1411 for image in self.images.values(): 1412 for d in image.depends_on: 1413 if d not in self.images: 1414 raise ValueError( 1415 f"image {image.name} depends on non-existent image {d}" 1416 )
1418 @staticmethod 1419 def install_arguments(parser: argparse.ArgumentParser) -> None: 1420 """Install options to configure a repository into an argparse parser. 1421 1422 This function installs the following options: 1423 1424 * The mutually-exclusive `--dev`/`--optimized`/`--release` options to control the 1425 `profile` repository attribute. 1426 * The `--coverage` boolean option to control the `coverage` repository 1427 attribute. 1428 1429 Use `Repository.from_arguments` to construct a repository from the 1430 parsed command-line arguments. 1431 """ 1432 build_mode = parser.add_mutually_exclusive_group() 1433 build_mode.add_argument( 1434 "--dev", 1435 action="store_true", 1436 help="build Rust binaries with the dev profile", 1437 ) 1438 build_mode.add_argument( 1439 "--release", 1440 action="store_true", 1441 help="build Rust binaries with the release profile (default)", 1442 ) 1443 build_mode.add_argument( 1444 "--optimized", 1445 action="store_true", 1446 help="build Rust binaries with the optimized profile (optimizations, no LTO, no debug symbols)", 1447 ) 1448 parser.add_argument( 1449 "--coverage", 1450 help="whether to enable code coverage compilation flags", 1451 default=ui.env_is_truthy("CI_COVERAGE_ENABLED"), 1452 action="store_true", 1453 ) 1454 parser.add_argument( 1455 "--sanitizer", 1456 help="whether to enable a sanitizer", 1457 default=Sanitizer[os.getenv("CI_SANITIZER", "none")], 1458 type=Sanitizer, 1459 choices=Sanitizer, 1460 ) 1461 parser.add_argument( 1462 "--arch", 1463 default=Arch.host(), 1464 help="the CPU architecture to build for", 1465 type=Arch, 1466 choices=Arch, 1467 ) 1468 parser.add_argument( 1469 "--image-registry", 1470 default=image_registry(), 1471 help="the Docker image registry to pull images from and push images to", 1472 ) 1473 parser.add_argument( 1474 "--image-prefix", 1475 default="", 1476 help="a prefix to apply to all Docker image names", 1477 )
Install options to configure a repository into an argparse parser.
This function installs the following options:
- The mutually-exclusive
--dev/--optimized/--releaseoptions to control theprofilerepository attribute. - The
--coverageboolean option to control thecoveragerepository attribute.
Use Repository.from_arguments to construct a repository from the
parsed command-line arguments.
1479 @classmethod 1480 def from_arguments(cls, root: Path, args: argparse.Namespace) -> "Repository": 1481 """Construct a repository from command-line arguments. 1482 1483 The provided namespace must contain the options installed by 1484 `Repository.install_arguments`. 1485 """ 1486 if args.release: 1487 profile = Profile.RELEASE 1488 elif args.optimized: 1489 profile = Profile.OPTIMIZED 1490 elif args.dev: 1491 profile = Profile.DEV 1492 else: 1493 profile = ( 1494 Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED 1495 ) 1496 1497 return cls( 1498 root, 1499 profile=profile, 1500 coverage=args.coverage, 1501 sanitizer=args.sanitizer, 1502 image_registry=args.image_registry, 1503 image_prefix=args.image_prefix, 1504 arch=args.arch, 1505 )
Construct a repository from command-line arguments.
The provided namespace must contain the options installed by
Repository.install_arguments.
1507 @property 1508 def root(self) -> Path: 1509 """The path to the root directory for the repository.""" 1510 return self.rd.root
The path to the root directory for the repository.
1512 def resolve_dependencies(self, targets: Iterable[Image]) -> DependencySet: 1513 """Compute the dependency set necessary to build target images. 1514 1515 The dependencies of `targets` will be crawled recursively until the 1516 complete set of transitive dependencies is determined or a circular 1517 dependency is discovered. The returned dependency set will be sorted 1518 in topological order. 1519 1520 Raises: 1521 ValueError: A circular dependency was discovered in the images 1522 in the repository. 1523 """ 1524 # Pre-fetch all crate input files in a single batched git call, 1525 # replacing ~118 individual subprocess pairs with one pair. 1526 self.rd.cargo_workspace.precompute_crate_inputs() 1527 # Pre-fetch all image context files in a single batched git call, 1528 # replacing ~41 individual subprocess pairs with one pair. 1529 self._precompute_image_context_files() 1530 1531 resolved = OrderedDict() 1532 visiting = set() 1533 1534 def visit(image: Image, path: list[str] = []) -> None: 1535 if image.name in resolved: 1536 return 1537 if image.name in visiting: 1538 diagram = " -> ".join(path + [image.name]) 1539 raise ValueError(f"circular dependency in mzbuild: {diagram}") 1540 1541 visiting.add(image.name) 1542 for d in sorted(image.depends_on): 1543 visit(self.images[d], path + [image.name]) 1544 resolved[image.name] = image 1545 1546 for target_image in sorted(targets, key=lambda image: image.name): 1547 visit(target_image) 1548 1549 return DependencySet(resolved.values())
Compute the dependency set necessary to build target images.
The dependencies of targets will be crawled recursively until the
complete set of transitive dependencies is determined or a circular
dependency is discovered. The returned dependency set will be sorted
in topological order.
Raises: ValueError: A circular dependency was discovered in the images in the repository.
1597def publish_multiarch_images( 1598 tag: str, dependency_sets: Iterable[Iterable[ResolvedImage]] 1599) -> None: 1600 """Publishes a set of docker images under a given tag.""" 1601 always_push_tags = ("latest", "unstable") 1602 if ghcr_token := os.getenv("GITHUB_GHCR_TOKEN"): 1603 spawn.runv( 1604 [ 1605 "docker", 1606 "login", 1607 "ghcr.io", 1608 "-u", 1609 "materialize-bot", 1610 "--password-stdin", 1611 ], 1612 stdin=ghcr_token.encode(), 1613 ) 1614 for images in zip(*dependency_sets): 1615 names = set(image.image.name for image in images) 1616 assert len(names) == 1, "dependency sets did not contain identical images" 1617 name = images[0].image.docker_name(tag) 1618 if tag in always_push_tags or not is_docker_image_pushed(name): 1619 spawn.runv( 1620 [ 1621 "docker", 1622 "manifest", 1623 "create", 1624 name, 1625 *(image.spec() for image in images), 1626 ] 1627 ) 1628 spawn.runv(["docker", "manifest", "push", name]) 1629 1630 ghcr_name = f"{GHCR_PREFIX}{name}" 1631 if ghcr_token and ( 1632 tag in always_push_tags or not is_ghcr_image_pushed(ghcr_name) 1633 ): 1634 spawn.runv( 1635 [ 1636 "docker", 1637 "manifest", 1638 "create", 1639 ghcr_name, 1640 *(f"{GHCR_PREFIX}{image.spec()}" for image in images), 1641 ] 1642 ) 1643 spawn.runv(["docker", "manifest", "push", ghcr_name]) 1644 print(f"--- Nofifying for tag {tag}") 1645 markdown = f"""Pushed images with Docker tag `{tag}`""" 1646 spawn.runv( 1647 [ 1648 "buildkite-agent", 1649 "annotate", 1650 "--style=info", 1651 f"--context=build-tags-{tag}", 1652 ], 1653 stdin=markdown.encode(), 1654 )
Publishes a set of docker images under a given tag.
1657def tag_multiarch_images( 1658 new_tag: str, previous_tag: str, dependency_sets: Iterable[Iterable[ResolvedImage]] 1659) -> None: 1660 """Publishes a set of docker images under a given tag.""" 1661 for images in zip(*dependency_sets): 1662 names = set(image.image.name for image in images) 1663 assert len(names) == 1, "dependency sets did not contain identical images" 1664 new_name = images[0].image.docker_name(new_tag) 1665 1666 # Doesn't have tagged images 1667 if images[0].image.name == "mz": 1668 continue 1669 1670 previous_name = images[0].image.docker_name(previous_tag) 1671 spawn.runv(["docker", "pull", previous_name]) 1672 spawn.runv(["docker", "tag", previous_name, new_name]) 1673 spawn.runv(["docker", "push", new_name]) 1674 print(f"--- Nofifying for tag {new_tag}") 1675 markdown = f"""Pushed images with Docker tag `{new_tag}`""" 1676 spawn.runv( 1677 [ 1678 "buildkite-agent", 1679 "annotate", 1680 "--style=info", 1681 f"--context=build-tags-{new_tag}", 1682 ], 1683 stdin=markdown.encode(), 1684 )
Publishes a set of docker images under a given tag.