misc.python.materialize.mzbuild
The implementation of the mzbuild system for Docker images.
For an overview of what mzbuild is and why it exists, see the user-facing documentation.
1# Copyright Materialize, Inc. and contributors. All rights reserved. 2# 3# Use of this software is governed by the Business Source License 4# included in the LICENSE file at the root of this repository. 5# 6# As of the Change Date specified in that file, in accordance with 7# the Business Source License, use of this software will be governed 8# by the Apache License, Version 2.0. 9 10"""The implementation of the mzbuild system for Docker images. 11 12For an overview of what mzbuild is and why it exists, see the [user-facing 13documentation][user-docs]. 14 15[user-docs]: https://github.com/MaterializeInc/materialize/blob/main/doc/developer/mzbuild.md 16""" 17 18import argparse 19import base64 20import collections 21import fcntl 22import hashlib 23import io 24import json 25import multiprocessing 26import os 27import platform 28import re 29import selectors 30import shutil 31import stat 32import subprocess 33import sys 34import time 35from collections import OrderedDict 36from collections.abc import Callable, Iterable, Iterator, Sequence 37from concurrent.futures import ThreadPoolExecutor, as_completed 38from enum import Enum, auto 39from functools import cache 40from pathlib import Path 41from tempfile import TemporaryFile 42from threading import Lock 43from typing import IO, Any, cast 44 45import requests 46import yaml 47from requests.auth import HTTPBasicAuth 48 49from materialize import MZ_ROOT, buildkite, cargo, git, rustc_flags, spawn, ui, xcompile 50from materialize.docker import image_registry 51from materialize.rustc_flags import Sanitizer 52from materialize.xcompile import Arch, target 53 54GHCR_PREFIX = "ghcr.io/materializeinc/" 55 56 57class RustIncrementalBuildFailure(Exception): 58 pass 59 60 61def run_and_detect_rust_incremental_build_failure( 62 cmd: list[str], 63 cwd: str | Path, 64 env: dict[str, str] | None = None, 65) -> subprocess.CompletedProcess: 66 """This function is complex since it prints out each line immediately to 67 stdout/stderr, but still records them at the same time so that we can scan 68 for known incremental build failures.""" 69 stdout_result = io.StringIO() 70 stderr_result = io.StringIO() 71 base_env = env if env is not None else os.environ 72 p = subprocess.Popen( 73 cmd, 74 stdout=subprocess.PIPE, 75 stderr=subprocess.PIPE, 76 text=True, 77 bufsize=1, 78 env={**base_env, "CARGO_TERM_COLOR": "always", "RUSTC_COLOR": "always"}, 79 ) 80 81 sel = selectors.DefaultSelector() 82 sel.register(p.stdout, selectors.EVENT_READ) # type: ignore 83 sel.register(p.stderr, selectors.EVENT_READ) # type: ignore 84 assert p.stdout is not None 85 assert p.stderr is not None 86 os.set_blocking(p.stdout.fileno(), False) 87 os.set_blocking(p.stderr.fileno(), False) 88 running = True 89 while running: 90 for key, val in sel.select(): 91 output = io.StringIO() 92 running = False 93 while True: 94 new_output = key.fileobj.read(1024) # type: ignore 95 if not new_output: 96 break 97 output.write(new_output) 98 contents = output.getvalue() 99 output.close() 100 if not contents: 101 continue 102 # Keep running as long as stdout or stderr have any content 103 running = True 104 if key.fileobj is p.stdout: 105 print( 106 contents, 107 end="", 108 flush=True, 109 ) 110 stdout_result.write(contents) 111 else: 112 print( 113 contents, 114 end="", 115 file=sys.stderr, 116 flush=True, 117 ) 118 stderr_result.write(contents) 119 p.wait() 120 retcode = p.poll() 121 assert retcode is not None 122 stdout_contents = stdout_result.getvalue() 123 stdout_result.close() 124 stderr_contents = stderr_result.getvalue() 125 stderr_result.close() 126 if retcode: 127 incremental_build_failure_msgs = [ 128 "panicked at compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs", 129 "Found unstable fingerprints for", 130 "ld.lld: error: undefined symbol", 131 "signal: 11, SIGSEGV", 132 ] 133 combined = stdout_contents + stderr_contents 134 if any(msg in combined for msg in incremental_build_failure_msgs): 135 raise RustIncrementalBuildFailure() 136 137 raise subprocess.CalledProcessError( 138 retcode, p.args, output=stdout_contents, stderr=stderr_contents 139 ) 140 return subprocess.CompletedProcess( 141 p.args, retcode, stdout_contents, stderr_contents 142 ) 143 144 145class Fingerprint(bytes): 146 """A SHA-1 hash of the inputs to an `Image`. 147 148 The string representation uses base32 encoding to distinguish mzbuild 149 fingerprints from Git's hex encoded SHA-1 hashes while still being 150 URL safe. 151 """ 152 153 def __str__(self) -> str: 154 return base64.b32encode(self).decode() 155 156 157class Profile(Enum): 158 RELEASE = auto() 159 OPTIMIZED = auto() 160 DEV = auto() 161 162 163class RepositoryDetails: 164 """Immutable details about a `Repository`. 165 166 Used internally by mzbuild. 167 168 Attributes: 169 root: The path to the root of the repository. 170 arch: The CPU architecture to build for. 171 profile: What profile the repository is being built with. 172 coverage: Whether the repository has code coverage instrumentation 173 enabled. 174 sanitizer: Whether to use a sanitizer (address, hwaddress, cfi, thread, leak, memory, none) 175 cargo_workspace: The `cargo.Workspace` associated with the repository. 176 image_registry: The Docker image registry to pull images from and push 177 images to. 178 image_prefix: A prefix to apply to all Docker image names. 179 """ 180 181 def __init__( 182 self, 183 root: Path, 184 arch: Arch, 185 profile: Profile, 186 coverage: bool, 187 sanitizer: Sanitizer, 188 image_registry: str, 189 image_prefix: str, 190 ): 191 self.root = root 192 self.arch = arch 193 self.profile = profile 194 self.coverage = coverage 195 self.sanitizer = sanitizer 196 self.cargo_workspace = cargo.Workspace(root) 197 self.image_registry = image_registry 198 self.image_prefix = image_prefix 199 200 def build( 201 self, 202 subcommand: str, 203 rustflags: list[str], 204 channel: str | None = None, 205 extra_env: dict[str, str] = {}, 206 ) -> list[str]: 207 """Start a build invocation for the configured architecture.""" 208 return xcompile.cargo( 209 arch=self.arch, 210 channel=channel, 211 subcommand=subcommand, 212 rustflags=rustflags, 213 extra_env=extra_env, 214 ) 215 216 def tool(self, name: str) -> list[str]: 217 """Start a binutils tool invocation for the configured architecture.""" 218 if platform.system() != "Linux": 219 # We can't use the local tools from macOS to build a Linux executable 220 return ["bin/ci-builder", "run", "stable", name] 221 # If we're on Linux, trust that the tools are installed instead of 222 # loading the slow ci-builder. If you don't have compilation tools 223 # installed you can still run `bin/ci-builder run stable 224 # bin/mzcompose ...`, and most likely the Cargo build will already 225 # fail earlier if you don't have compilation tools installed and 226 # run without the ci-builder. 227 return [name] 228 229 def cargo_target_dir(self) -> Path: 230 """Determine the path to the target directory for Cargo.""" 231 return self.root / "target-xcompile" / xcompile.target(self.arch) 232 233 def rewrite_builder_path_for_host(self, path: Path) -> Path: 234 """Rewrite a path that is relative to the target directory inside the 235 builder to a path that is relative to the target directory on the host. 236 237 If path does is not relative to the target directory inside the builder, 238 it is returned unchanged. 239 """ 240 builder_target_dir = Path("/mnt/build") / xcompile.target(self.arch) 241 try: 242 return self.cargo_target_dir() / path.relative_to(builder_target_dir) 243 except ValueError: 244 return path 245 246 247@cache 248def docker_images() -> frozenset[str]: 249 """List the Docker images available on the local machine.""" 250 return frozenset( 251 spawn.capture(["docker", "images", "--format", "{{.Repository}}:{{.Tag}}"]) 252 .strip() 253 .split("\n") 254 ) 255 256 257KNOWN_DOCKER_IMAGES_FILE = Path(MZ_ROOT / "known-docker-images.txt") 258_known_docker_images: set[str] | None = None 259_known_docker_images_lock = Lock() 260 261 262def is_docker_image_pushed(name: str) -> bool: 263 """Check whether the named image is pushed to Docker Hub. 264 265 Note that this operation requires a rather slow network request. 266 """ 267 global _known_docker_images 268 269 if _known_docker_images is None: 270 with _known_docker_images_lock: 271 if not KNOWN_DOCKER_IMAGES_FILE.exists(): 272 _known_docker_images = set() 273 else: 274 with KNOWN_DOCKER_IMAGES_FILE.open() as f: 275 _known_docker_images = set(line.strip() for line in f) 276 277 if name in _known_docker_images: 278 return True 279 280 if ":" not in name: 281 image, tag = name, "latest" 282 else: 283 image, tag = name.rsplit(":", 1) 284 285 dockerhub_username = os.getenv("DOCKERHUB_USERNAME") 286 dockerhub_token = os.getenv("DOCKERHUB_ACCESS_TOKEN") 287 288 exists: bool = False 289 290 try: 291 if dockerhub_username and dockerhub_token: 292 response = requests.head( 293 f"https://registry-1.docker.io/v2/{image}/manifests/{tag}", 294 headers={ 295 "Accept": "application/vnd.docker.distribution.manifest.v2+json", 296 }, 297 auth=HTTPBasicAuth(dockerhub_username, dockerhub_token), 298 timeout=10, 299 ) 300 else: 301 token = requests.get( 302 "https://auth.docker.io/token", 303 params={ 304 "service": "registry.docker.io", 305 "scope": f"repository:{image}:pull", 306 }, 307 timeout=10, 308 ).json()["token"] 309 response = requests.head( 310 f"https://registry-1.docker.io/v2/{image}/manifests/{tag}", 311 headers={ 312 "Accept": "application/vnd.docker.distribution.manifest.v2+json", 313 "Authorization": f"Bearer {token}", 314 }, 315 timeout=10, 316 ) 317 318 if response.status_code in (401, 429, 500, 502, 503, 504): 319 # Fall back to 5x slower method 320 proc = subprocess.run( 321 ["docker", "manifest", "inspect", name], 322 stdout=subprocess.DEVNULL, 323 stderr=subprocess.DEVNULL, 324 env=dict(os.environ, DOCKER_CLI_EXPERIMENTAL="enabled"), 325 ) 326 exists = proc.returncode == 0 327 else: 328 exists = response.status_code == 200 329 330 except Exception as e: 331 print(f"Error checking Docker image: {e}") 332 return False 333 334 if exists: 335 with _known_docker_images_lock: 336 _known_docker_images.add(name) 337 with KNOWN_DOCKER_IMAGES_FILE.open("a") as f: 338 print(name, file=f) 339 340 return exists 341 342 343def is_ghcr_image_pushed(name: str) -> bool: 344 global _known_docker_images 345 346 if _known_docker_images is None: 347 with _known_docker_images_lock: 348 if not KNOWN_DOCKER_IMAGES_FILE.exists(): 349 _known_docker_images = set() 350 else: 351 with KNOWN_DOCKER_IMAGES_FILE.open() as f: 352 _known_docker_images = set(line.strip() for line in f) 353 354 name_without_ghcr = name.removeprefix("ghcr.io/") 355 if name in _known_docker_images: 356 return True 357 358 if ":" not in name_without_ghcr: 359 image, tag = name_without_ghcr, "latest" 360 else: 361 image, tag = name_without_ghcr.rsplit(":", 1) 362 363 exists: bool = False 364 365 try: 366 token = requests.get( 367 "https://ghcr.io/token", 368 params={ 369 "scope": f"repository:{image}:pull", 370 }, 371 timeout=10, 372 ).json()["token"] 373 response = requests.head( 374 f"https://ghcr.io/v2/{image}/manifests/{tag}", 375 headers={"Authorization": f"Bearer {token}"}, 376 timeout=10, 377 ) 378 379 if response.status_code in (401, 429, 500, 502, 503, 504): 380 # Fall back to 5x slower method 381 proc = subprocess.run( 382 ["docker", "manifest", "inspect", name], 383 stdout=subprocess.DEVNULL, 384 stderr=subprocess.DEVNULL, 385 env=dict(os.environ, DOCKER_CLI_EXPERIMENTAL="enabled"), 386 ) 387 exists = proc.returncode == 0 388 else: 389 exists = response.status_code == 200 390 391 except Exception as e: 392 print(f"Error checking Docker image: {e}") 393 return False 394 395 if exists: 396 with _known_docker_images_lock: 397 _known_docker_images.add(name) 398 with KNOWN_DOCKER_IMAGES_FILE.open("a") as f: 399 print(name, file=f) 400 401 return exists 402 403 404def chmod_x(path: Path) -> None: 405 """Set the executable bit on a file or directory.""" 406 # https://stackoverflow.com/a/30463972/1122351 407 mode = os.stat(path).st_mode 408 mode |= (mode & 0o444) >> 2 # copy R bits to X 409 os.chmod(path, mode) 410 411 412class PreImage: 413 """An action to run before building a Docker image. 414 415 Args: 416 rd: The `RepositoryDetails` for the repository. 417 path: The path to the `Image` associated with this action. 418 """ 419 420 def __init__(self, rd: RepositoryDetails, path: Path): 421 self.rd = rd 422 self.path = path 423 424 @classmethod 425 def prepare_batch(cls, instances: list["PreImage"]) -> Any: 426 """Prepare a batch of actions. 427 428 This is useful for `PreImage` actions that are more efficient when 429 their actions are applied to several images in bulk. 430 431 Returns an arbitrary output that is passed to `PreImage.run`. 432 """ 433 pass 434 435 def run(self, prep: Any) -> None: 436 """Perform the action. 437 438 Args: 439 prep: Any prep work returned by `prepare_batch`. 440 """ 441 pass 442 443 def inputs(self) -> set[str]: 444 """Return the files which are considered inputs to the action.""" 445 raise NotImplementedError 446 447 def extra(self) -> str: 448 """Returns additional data for incorporation in the fingerprint.""" 449 return "" 450 451 452class Copy(PreImage): 453 """A `PreImage` action which copies files from a directory. 454 455 See doc/developer/mzbuild.md for an explanation of the user-facing 456 parameters. 457 """ 458 459 def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]): 460 super().__init__(rd, path) 461 462 self.source = config.pop("source", None) 463 if self.source is None: 464 raise ValueError("mzbuild config is missing 'source' argument") 465 466 self.destination = config.pop("destination", None) 467 if self.destination is None: 468 raise ValueError("mzbuild config is missing 'destination' argument") 469 470 self.matching = config.pop("matching", "*") 471 472 def run(self, prep: Any) -> None: 473 super().run(prep) 474 for src in self.inputs(): 475 dst = self.path / self.destination / src 476 dst.parent.mkdir(parents=True, exist_ok=True) 477 shutil.copy(self.rd.root / self.source / src, dst) 478 479 def inputs(self) -> set[str]: 480 return set(git.expand_globs(self.rd.root / self.source, self.matching)) 481 482 483class CargoPreImage(PreImage): 484 """A `PreImage` action that uses Cargo.""" 485 486 @staticmethod 487 @cache 488 def _cargo_shared_inputs() -> frozenset[str]: 489 """Resolve shared Cargo inputs once and cache the result. 490 491 This expands the 'ci/builder' directory glob and filters out 492 non-existent files like '.cargo/config', avoiding repeated 493 git subprocess calls in fingerprint(). 494 """ 495 inputs: set[str] = set() 496 inputs |= git.expand_globs(Path("."), "ci/builder/**") 497 inputs.add("Cargo.toml") 498 inputs.add("Cargo.lock") 499 if Path(".cargo/config").exists(): 500 inputs.add(".cargo/config") 501 return frozenset(inputs) 502 503 def inputs(self) -> set[str]: 504 return set(CargoPreImage._cargo_shared_inputs()) 505 506 def extra(self) -> str: 507 # Cargo images depend on the release mode and whether 508 # coverage/sanitizer is enabled. 509 flags: list[str] = [] 510 if self.rd.profile == Profile.RELEASE: 511 flags += "release" 512 if self.rd.profile == Profile.OPTIMIZED: 513 flags += "optimized" 514 if self.rd.coverage: 515 flags += "coverage" 516 if self.rd.sanitizer != Sanitizer.none: 517 flags += self.rd.sanitizer.value 518 flags.sort() 519 return ",".join(flags) 520 521 522class CargoBuild(CargoPreImage): 523 """A `PreImage` action that builds a single binary with Cargo. 524 525 See doc/developer/mzbuild.md for an explanation of the user-facing 526 parameters. 527 """ 528 529 def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]): 530 super().__init__(rd, path) 531 bin = config.pop("bin", []) 532 self.bins = bin if isinstance(bin, list) else [bin] 533 example = config.pop("example", []) 534 self.examples = example if isinstance(example, list) else [example] 535 self.strip = config.pop("strip", True) 536 self.extract = config.pop("extract", {}) 537 features = config.pop("features", []) 538 self.features = features if isinstance(features, list) else [features] 539 540 if len(self.bins) == 0 and len(self.examples) == 0: 541 raise ValueError("mzbuild config is missing pre-build target") 542 543 @staticmethod 544 def generate_cargo_build_command( 545 rd: RepositoryDetails, 546 bins: list[str], 547 examples: list[str], 548 features: list[str] | None = None, 549 ) -> list[str]: 550 rustflags = ( 551 rustc_flags.coverage 552 if rd.coverage 553 else ( 554 rustc_flags.sanitizer[rd.sanitizer] 555 if rd.sanitizer != Sanitizer.none 556 else ["--cfg=tokio_unstable"] 557 ) 558 ) 559 cflags = ( 560 [ 561 f"--target={target(rd.arch)}", 562 f"--gcc-toolchain=/opt/x-tools/{target(rd.arch)}/", 563 "-fuse-ld=lld", 564 f"--sysroot=/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/sysroot", 565 f"-L/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/lib64", 566 ] 567 + rustc_flags.sanitizer_cflags[rd.sanitizer] 568 if rd.sanitizer != Sanitizer.none 569 else [] 570 ) 571 extra_env = ( 572 { 573 "CFLAGS": " ".join(cflags), 574 "CXXFLAGS": " ".join(cflags), 575 "LDFLAGS": " ".join(cflags), 576 "CXXSTDLIB": "stdc++", 577 "CC": "cc", 578 "CXX": "c++", 579 "CPP": "clang-cpp-18", 580 "CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER": "cc", 581 "CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER": "cc", 582 "PATH": f"/sanshim:/opt/x-tools/{target(rd.arch)}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 583 "TSAN_OPTIONS": "report_bugs=0", # build-scripts fail 584 } 585 if rd.sanitizer != Sanitizer.none 586 else {} 587 ) 588 589 cargo_build = rd.build( 590 "build", channel=None, rustflags=rustflags, extra_env=extra_env 591 ) 592 593 packages = set() 594 for bin in bins: 595 cargo_build.extend(["--bin", bin]) 596 packages.add(rd.cargo_workspace.crate_for_bin(bin).name) 597 for example in examples: 598 cargo_build.extend(["--example", example]) 599 packages.add(rd.cargo_workspace.crate_for_example(example).name) 600 cargo_build.extend(f"--package={p}" for p in packages) 601 602 if rd.profile == Profile.RELEASE: 603 cargo_build.append("--release") 604 if rd.profile == Profile.OPTIMIZED: 605 cargo_build.extend(["--profile", "optimized"]) 606 if rd.sanitizer != Sanitizer.none: 607 # ASan doesn't work with jemalloc 608 cargo_build.append("--no-default-features") 609 # Uses more memory, so reduce the number of jobs 610 cargo_build.extend( 611 ["--jobs", str(round(multiprocessing.cpu_count() * 2 / 3))] 612 ) 613 if features: 614 cargo_build.append(f"--features={','.join(features)}") 615 616 return cargo_build 617 618 @classmethod 619 def prepare_batch(cls, instances: list["PreImage"]) -> dict[str, Any]: 620 super().prepare_batch(instances) 621 622 if not instances: 623 return {} 624 625 # Building all binaries and examples in the same `cargo build` command 626 # allows Cargo to link in parallel with other work, which can 627 # meaningfully speed up builds. 628 629 rd: RepositoryDetails | None = None 630 builds = cast(list[CargoBuild], instances) 631 bins = set() 632 examples = set() 633 features = set() 634 for build in builds: 635 if not rd: 636 rd = build.rd 637 bins.update(build.bins) 638 examples.update(build.examples) 639 features.update(build.features) 640 assert rd 641 642 ui.section(f"Common build for: {', '.join(bins | examples)}") 643 644 cargo_build = cls.generate_cargo_build_command( 645 rd, list(bins), list(examples), list(features) if features else None 646 ) 647 648 run_and_detect_rust_incremental_build_failure(cargo_build, cwd=rd.root) 649 650 # Re-run with JSON-formatted messages and capture the output so we can 651 # later analyze the build artifacts in `run`. This should be nearly 652 # instantaneous since we just compiled above with the same crates and 653 # features. (We don't want to do the compile above with JSON-formatted 654 # messages because it wouldn't be human readable.) 655 json_output = spawn.capture( 656 cargo_build + ["--message-format=json"], 657 cwd=rd.root, 658 ) 659 prep = {"cargo": json_output} 660 661 return prep 662 663 def build(self, build_output: dict[str, Any]) -> None: 664 cargo_profile = ( 665 "release" 666 if self.rd.profile == Profile.RELEASE 667 else "optimized" if self.rd.profile == Profile.OPTIMIZED else "debug" 668 ) 669 670 def copy(src: Path, relative_dst: Path) -> None: 671 exe_path = self.path / relative_dst 672 exe_path.parent.mkdir(parents=True, exist_ok=True) 673 shutil.copy(src, exe_path) 674 675 if self.strip: 676 # The debug information is large enough that it slows down CI, 677 # since we're packaging these binaries up into Docker images and 678 # shipping them around. 679 spawn.runv( 680 [*self.rd.tool("strip"), "--strip-debug", exe_path], 681 cwd=self.rd.root, 682 ) 683 else: 684 # Even if we've been asked not to strip the binary, remove the 685 # `.debug_pubnames` and `.debug_pubtypes` sections. These are just 686 # indexes that speed up launching a debugger against the binary, 687 # and we're happy to have slower debugger start up in exchange for 688 # smaller binaries. Plus the sections have been obsoleted by a 689 # `.debug_names` section in DWARF 5, and so debugger support for 690 # `.debug_pubnames`/`.debug_pubtypes` is minimal anyway. 691 # See: https://github.com/rust-lang/rust/issues/46034 692 spawn.runv( 693 [ 694 *self.rd.tool("objcopy"), 695 "-R", 696 ".debug_pubnames", 697 "-R", 698 ".debug_pubtypes", 699 exe_path, 700 ], 701 cwd=self.rd.root, 702 ) 703 704 for bin in self.bins: 705 src_path = self.rd.cargo_target_dir() / cargo_profile / bin 706 copy(src_path, bin) 707 for example in self.examples: 708 src_path = ( 709 self.rd.cargo_target_dir() / cargo_profile / Path("examples") / example 710 ) 711 copy(src_path, Path("examples") / example) 712 713 if self.extract: 714 cargo_build_json_output = build_output["cargo"] 715 716 target_dir = self.rd.cargo_target_dir() 717 for line in cargo_build_json_output.split("\n"): 718 if line.strip() == "" or not line.startswith("{"): 719 continue 720 message = json.loads(line) 721 if message["reason"] != "build-script-executed": 722 continue 723 out_dir = self.rd.rewrite_builder_path_for_host( 724 Path(message["out_dir"]) 725 ) 726 if not out_dir.is_relative_to(target_dir): 727 # Some crates are built for both the host and the target. 728 # Ignore the built-for-host out dir. 729 continue 730 # parse the package name from a package_id that looks like one of: 731 # git+https://github.com/MaterializeInc/rust-server-sdk#launchdarkly-server-sdk@1.0.0 732 # path+file:///Users/roshan/materialize/src/catalog#mz-catalog@0.0.0 733 # registry+https://github.com/rust-lang/crates.io-index#num-rational@0.4.0 734 # file:///path/to/my-package#0.1.0 735 package_id = message["package_id"] 736 if "@" in package_id: 737 package = package_id.split("@")[0].split("#")[-1] 738 else: 739 package = message["package_id"].split("#")[0].split("/")[-1] 740 for src, dst in self.extract.get(package, {}).items(): 741 spawn.runv(["cp", "-R", out_dir / src, self.path / dst]) 742 743 self.acquired = True 744 745 def run(self, prep: dict[str, Any]) -> None: 746 super().run(prep) 747 self.build(prep) 748 749 @cache 750 def inputs(self) -> set[str]: 751 deps = set() 752 753 for bin in self.bins: 754 crate = self.rd.cargo_workspace.crate_for_bin(bin) 755 deps |= self.rd.cargo_workspace.transitive_path_dependencies(crate) 756 for example in self.examples: 757 crate = self.rd.cargo_workspace.crate_for_example(example) 758 deps |= self.rd.cargo_workspace.transitive_path_dependencies( 759 crate, dev=True 760 ) 761 762 inputs = super().inputs() | set(inp for dep in deps for inp in dep.inputs()) 763 return inputs 764 765 766class Image: 767 """A Docker image whose build and dependencies are managed by mzbuild. 768 769 An image corresponds to a directory in a repository that contains a 770 `mzbuild.yml` file. This directory is called an "mzbuild context." 771 772 Attributes: 773 name: The name of the image. 774 publish: Whether the image should be pushed to Docker Hub. 775 depends_on: The names of the images upon which this image depends. 776 root: The path to the root of the associated `Repository`. 777 path: The path to the directory containing the `mzbuild.yml` 778 configuration file. 779 pre_images: Optional actions to perform before running `docker build`. 780 build_args: An optional list of --build-arg to pass to the dockerfile 781 """ 782 783 _DOCKERFILE_MZFROM_RE = re.compile(rb"^MZFROM\s*(\S+)") 784 785 _context_files_cache: set[str] | None 786 787 def __init__(self, rd: RepositoryDetails, path: Path): 788 self.rd = rd 789 self.path = path 790 self._context_files_cache = None 791 self.pre_images: list[PreImage] = [] 792 with open(self.path / "mzbuild.yml") as f: 793 data = yaml.safe_load(f) 794 self.name: str = data.pop("name") 795 self.publish: bool = data.pop("publish", True) 796 self.description: str | None = data.pop("description", None) 797 self.mainline: bool = data.pop("mainline", True) 798 for pre_image in data.pop("pre-image", []): 799 typ = pre_image.pop("type", None) 800 if typ == "cargo-build": 801 self.pre_images.append(CargoBuild(self.rd, self.path, pre_image)) 802 elif typ == "copy": 803 self.pre_images.append(Copy(self.rd, self.path, pre_image)) 804 else: 805 raise ValueError( 806 f"mzbuild config in {self.path} has unknown pre-image type" 807 ) 808 self.build_args = data.pop("build-args", {}) 809 810 if re.search(r"[^A-Za-z0-9\-]", self.name): 811 raise ValueError( 812 f"mzbuild image name {self.name} contains invalid character; only alphanumerics and hyphens allowed" 813 ) 814 815 self.depends_on: list[str] = [] 816 with open(self.path / "Dockerfile", "rb") as f: 817 for line in f: 818 match = self._DOCKERFILE_MZFROM_RE.match(line) 819 if match: 820 self.depends_on.append(match.group(1).decode()) 821 822 def sync_description(self) -> None: 823 """Sync the description to Docker Hub if the image is publishable 824 and a README.md file exists.""" 825 826 if not self.publish: 827 ui.say(f"{self.name} is not publishable") 828 return 829 830 readme_path = self.path / "README.md" 831 has_readme = readme_path.exists() 832 if not has_readme: 833 ui.say(f"{self.name} has no README.md or description") 834 return 835 836 docker_config = os.getenv("DOCKER_CONFIG") 837 spawn.runv( 838 [ 839 "docker", 840 "pushrm", 841 f"--file={readme_path}", 842 *([f"--config={docker_config}/config.json"] if docker_config else []), 843 *([f"--short={self.description}"] if self.description else []), 844 self.docker_name(), 845 ] 846 ) 847 848 def docker_name(self, tag: str | None = None) -> str: 849 """Return the name of the image on Docker Hub at the given tag.""" 850 name = f"{self.rd.image_registry}/{self.rd.image_prefix}{self.name}" 851 if tag: 852 name += f":{tag}" 853 return name 854 855 856class ResolvedImage: 857 """An `Image` whose dependencies have been resolved. 858 859 Attributes: 860 image: The underlying `Image`. 861 acquired: Whether the image is available locally. 862 dependencies: A mapping from dependency name to `ResolvedImage` for 863 each of the images that `image` depends upon. 864 """ 865 866 def __init__(self, image: Image, dependencies: Iterable["ResolvedImage"]): 867 self.image = image 868 self.acquired = False 869 self.dependencies = {} 870 for d in dependencies: 871 self.dependencies[d.name] = d 872 873 def __repr__(self) -> str: 874 return f"ResolvedImage<{self.spec()}>" 875 876 @property 877 def name(self) -> str: 878 """The name of the underlying image.""" 879 return self.image.name 880 881 @property 882 def publish(self) -> bool: 883 """Whether the underlying image should be pushed to Docker Hub.""" 884 return self.image.publish 885 886 @cache 887 def spec(self) -> str: 888 """Return the "spec" for the image. 889 890 A spec is the unique identifier for the image given its current 891 fingerprint. It is a valid Docker Hub name. 892 """ 893 return self.image.docker_name(tag=f"mzbuild-{self.fingerprint()}") 894 895 def write_dockerfile(self) -> IO[bytes]: 896 """Render the Dockerfile without mzbuild directives. 897 898 Returns: 899 file: A handle to a temporary file containing the adjusted 900 Dockerfile.""" 901 with open(self.image.path / "Dockerfile", "rb") as f: 902 lines = f.readlines() 903 f = TemporaryFile() 904 for line in lines: 905 match = Image._DOCKERFILE_MZFROM_RE.match(line) 906 if match: 907 image = match.group(1).decode() 908 spec = self.dependencies[image].spec() 909 line = Image._DOCKERFILE_MZFROM_RE.sub(b"FROM %b" % spec.encode(), line) 910 f.write(line) 911 f.seek(0) 912 return f 913 914 def build(self, prep: dict[type[PreImage], Any], push: bool = False) -> None: 915 """Build the image from source. 916 917 Requires that the caller has already acquired all dependencies and 918 prepared all `PreImage` actions via `PreImage.prepare_batch`. 919 """ 920 # Use a file lock to prevent parallel mzcompose processes from 921 # racing on git clean / copy / strip for the same image directory. 922 lock_dir = self.image.rd.root / "target" / "mzbuild-locks" 923 lock_dir.mkdir(parents=True, exist_ok=True) 924 profile = self.image.rd.profile.name.lower() 925 lock_path = lock_dir / f"{self.image.name}-{profile}.lock" 926 with open(lock_path, "w") as lock_file: 927 ui.say(f"Acquiring lock for {self.spec()}") 928 fcntl.flock(lock_file, fcntl.LOCK_EX) 929 try: 930 self._build_locked(prep, push) 931 finally: 932 fcntl.flock(lock_file, fcntl.LOCK_UN) 933 934 def _build_locked( 935 self, prep: dict[type[PreImage], Any], push: bool = False 936 ) -> None: 937 ui.section(f"Building {self.spec()}") 938 spawn.runv(["git", "clean", "-ffdX", self.image.path]) 939 940 for pre_image in self.image.pre_images: 941 pre_image.run(prep[type(pre_image)]) 942 build_args = { 943 **self.image.build_args, 944 "BUILD_PROFILE": self.image.rd.profile.name, 945 "ARCH_GCC": str(self.image.rd.arch), 946 "ARCH_GO": self.image.rd.arch.go_str(), 947 "CI_SANITIZER": str(self.image.rd.sanitizer), 948 } 949 f = self.write_dockerfile() 950 951 try: 952 spawn.capture(["docker", "buildx", "version"]) 953 except subprocess.CalledProcessError: 954 if push: 955 print( 956 "docker buildx not found, required to push images. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing" 957 ) 958 raise 959 print( 960 "docker buildx not found, you can install it to build faster. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing" 961 ) 962 print("Falling back to docker build") 963 cmd: Sequence[str] = [ 964 "docker", 965 "build", 966 "-f", 967 "-", 968 *(f"--build-arg={k}={v}" for k, v in build_args.items()), 969 "-t", 970 self.spec(), 971 f"--platform=linux/{self.image.rd.arch.go_str()}", 972 str(self.image.path), 973 ] 974 else: 975 docker_tag = f"docker.io/{self.spec()}" 976 ghcr_tag = f"ghcr.io/materializeinc/{self.spec()}" 977 cmd: Sequence[str] = [ 978 "docker", 979 "buildx", 980 "build", 981 "--progress=plain", # less noisy 982 "-f", 983 "-", 984 *(f"--build-arg={k}={v}" for k, v in build_args.items()), 985 "-t", 986 docker_tag, 987 "-t", 988 ghcr_tag, 989 f"--platform=linux/{self.image.rd.arch.go_str()}", 990 str(self.image.path), 991 "--load", 992 ] 993 994 if token := os.getenv("GITHUB_GHCR_TOKEN"): 995 spawn.runv( 996 [ 997 "docker", 998 "login", 999 "ghcr.io", 1000 "-u", 1001 "materialize-bot", 1002 "--password-stdin", 1003 ], 1004 stdin=token.encode(), 1005 ) 1006 1007 spawn.runv(cmd, stdin=f, stdout=sys.stderr.buffer) 1008 1009 if push: 1010 # Push to both registries in parallel. With the docker driver, 1011 # the image is already in the local daemon after --load, so 1012 # docker push is the same mechanism buildx --push uses internally. 1013 pending = [docker_tag, ghcr_tag] 1014 for sleep_time in [5, 10, 20, 40, 60, None]: 1015 procs = [ 1016 subprocess.Popen( 1017 ["docker", "push", tag], 1018 stdout=sys.stderr, 1019 stderr=sys.stderr, 1020 ) 1021 for tag in pending 1022 ] 1023 pending = [tag for tag, proc in zip(pending, procs) if proc.wait() != 0] 1024 if not pending: 1025 break 1026 if sleep_time is None: 1027 raise subprocess.CalledProcessError( 1028 1, ["docker", "push", pending[0]] 1029 ) 1030 print(f"docker push failed for {pending}, retrying in {sleep_time}s") 1031 time.sleep(sleep_time) 1032 1033 def try_pull(self, max_retries: int) -> bool: 1034 """Download the image if it does not exist locally. Returns whether it was found.""" 1035 command = ["docker", "pull"] 1036 # --quiet skips printing the progress bar, which does not display well in CI. 1037 if ui.env_is_truthy("CI"): 1038 command.append("--quiet") 1039 command.append(self.spec()) 1040 if not self.acquired: 1041 ui.header(f"Acquiring {self.spec()}") 1042 sleep_time = 1 1043 for retry in range(1, max_retries + 1): 1044 try: 1045 spawn.runv( 1046 command, 1047 stdin=subprocess.DEVNULL, 1048 stdout=sys.stderr.buffer, 1049 ) 1050 self.acquired = True 1051 break 1052 except subprocess.CalledProcessError: 1053 if retry < max_retries: 1054 # There seems to be no good way to tell what error 1055 # happened based on error code 1056 # (https://github.com/docker/cli/issues/538) and we 1057 # want to print output directly to terminal. 1058 if build := os.getenv("CI_WAITING_FOR_BUILD"): 1059 for retry in range(max_retries): 1060 try: 1061 build_status = buildkite.get_build_status(build) 1062 except subprocess.CalledProcessError: 1063 time.sleep(sleep_time) 1064 sleep_time = min(sleep_time * 2, 10) 1065 break 1066 print(f"Build {build} status: {build_status}") 1067 if build_status == "failed": 1068 print( 1069 f"Build {build} has been marked as failed, exiting hard" 1070 ) 1071 sys.exit(1) 1072 elif build_status == "success": 1073 break 1074 assert ( 1075 build_status == "pending" 1076 ), f"Unknown build status {build_status}" 1077 time.sleep(1) 1078 else: 1079 print(f"Retrying in {sleep_time}s ...") 1080 time.sleep(sleep_time) 1081 sleep_time = min(sleep_time * 2, 10) 1082 continue 1083 else: 1084 break 1085 return self.acquired 1086 1087 def is_published_if_necessary(self) -> bool: 1088 """Report whether the image exists on DockerHub & GHCR if it is publishable.""" 1089 if not self.publish: 1090 return False 1091 spec = self.spec() 1092 if spec.startswith(GHCR_PREFIX): 1093 spec = spec.removeprefix(GHCR_PREFIX) 1094 ghcr_spec = f"{GHCR_PREFIX}{spec}" 1095 if is_docker_image_pushed(spec) and is_ghcr_image_pushed(ghcr_spec): 1096 ui.say(f"{spec} already exists") 1097 return True 1098 return False 1099 1100 def run( 1101 self, 1102 args: list[str] = [], 1103 docker_args: list[str] = [], 1104 env: dict[str, str] = {}, 1105 ) -> None: 1106 """Run a command in the image. 1107 1108 Creates a container from the image and runs the command described by 1109 `args` in the image. 1110 """ 1111 envs = [] 1112 for key, val in env.items(): 1113 envs.extend(["--env", f"{key}={val}"]) 1114 spawn.runv( 1115 [ 1116 "docker", 1117 "run", 1118 "--tty", 1119 "--rm", 1120 *envs, 1121 "--init", 1122 *docker_args, 1123 self.spec(), 1124 *args, 1125 ], 1126 ) 1127 1128 def list_dependencies(self, transitive: bool = False) -> set[str]: 1129 out = set() 1130 for dep in self.dependencies.values(): 1131 out.add(dep.name) 1132 if transitive: 1133 out |= dep.list_dependencies(transitive) 1134 return out 1135 1136 @cache 1137 def inputs(self, transitive: bool = False) -> set[str]: 1138 """List the files tracked as inputs to the image. 1139 1140 These files are used to compute the fingerprint for the image. See 1141 `ResolvedImage.fingerprint` for details. 1142 1143 Returns: 1144 inputs: A list of input files, relative to the root of the 1145 repository. 1146 """ 1147 if self.image._context_files_cache is not None: 1148 paths = set(self.image._context_files_cache) 1149 else: 1150 paths = set(git.expand_globs(self.image.rd.root, f"{self.image.path}/**")) 1151 if not paths: 1152 # While we could find an `mzbuild.yml` file for this service, expland_globs didn't 1153 # return any files that matched this service. At the very least, the `mzbuild.yml` 1154 # file itself should have been returned. We have a bug if paths is empty. 1155 raise AssertionError( 1156 f"{self.image.name} mzbuild exists but its files are unknown to git" 1157 ) 1158 for pre_image in self.image.pre_images: 1159 paths |= pre_image.inputs() 1160 if transitive: 1161 for dep in self.dependencies.values(): 1162 paths |= dep.inputs(transitive) 1163 return paths 1164 1165 @cache 1166 def fingerprint(self) -> Fingerprint: 1167 """Fingerprint the inputs to the image. 1168 1169 Compute the fingerprint of the image. Changing the contents of any of 1170 the files or adding or removing files to the image will change the 1171 fingerprint, as will modifying the inputs to any of its dependencies. 1172 1173 The image considers all non-gitignored files in its mzbuild context to 1174 be inputs. If it has a pre-image action, that action may add additional 1175 inputs via `PreImage.inputs`. 1176 """ 1177 self_hash = hashlib.sha1() 1178 # When inputs come from precomputed sources (crate and image context 1179 # batching + resolved CargoPreImage paths), they are already individual 1180 # file paths from git. Skip the expensive expand_globs subprocess calls. 1181 inputs = self.inputs() 1182 if self.image._context_files_cache is not None: 1183 resolved_inputs = sorted(inputs) 1184 else: 1185 resolved_inputs = sorted(set(git.expand_globs(self.image.rd.root, *inputs))) 1186 for rel_path in resolved_inputs: 1187 abs_path = self.image.rd.root / rel_path 1188 file_hash = hashlib.sha1() 1189 raw_file_mode = os.lstat(abs_path).st_mode 1190 # Compute a simplified file mode using the same rules as Git. 1191 # https://github.com/git/git/blob/3bab5d562/Documentation/git-fast-import.txt#L610-L616 1192 if stat.S_ISLNK(raw_file_mode): 1193 file_mode = 0o120000 1194 elif raw_file_mode & stat.S_IXUSR: 1195 file_mode = 0o100755 1196 else: 1197 file_mode = 0o100644 1198 with open(abs_path, "rb") as f: 1199 file_hash.update(f.read()) 1200 self_hash.update(file_mode.to_bytes(2, byteorder="big")) 1201 self_hash.update(rel_path.encode()) 1202 self_hash.update(file_hash.digest()) 1203 self_hash.update(b"\0") 1204 1205 for pre_image in self.image.pre_images: 1206 self_hash.update(pre_image.extra().encode()) 1207 self_hash.update(b"\0") 1208 1209 self_hash.update(f"profile={self.image.rd.profile}".encode()) 1210 self_hash.update(f"arch={self.image.rd.arch}".encode()) 1211 self_hash.update(f"coverage={self.image.rd.coverage}".encode()) 1212 self_hash.update(f"sanitizer={self.image.rd.sanitizer}".encode()) 1213 # This exists to make sure all hashes from before we had a GHCR mirror are invalidated, so that we rebuild when an image doesn't exist on GHCR yet 1214 self_hash.update(b"mirror=ghcr") 1215 1216 full_hash = hashlib.sha1() 1217 full_hash.update(self_hash.digest()) 1218 for dep in sorted(self.dependencies.values(), key=lambda d: d.name): 1219 full_hash.update(dep.name.encode()) 1220 full_hash.update(dep.fingerprint()) 1221 full_hash.update(b"\0") 1222 1223 return Fingerprint(full_hash.digest()) 1224 1225 1226class DependencySet: 1227 """A set of `ResolvedImage`s. 1228 1229 Iterating over a dependency set yields the contained images in an arbitrary 1230 order. Indexing a dependency set yields the image with the specified name. 1231 """ 1232 1233 def __init__(self, dependencies: Iterable[Image]): 1234 """Construct a new `DependencySet`. 1235 1236 The provided `dependencies` must be topologically sorted. 1237 """ 1238 self._dependencies: dict[str, ResolvedImage] = {} 1239 dependencies = list(dependencies) 1240 known_images = docker_images() if dependencies else set() 1241 for d in dependencies: 1242 image = ResolvedImage( 1243 image=d, 1244 dependencies=(self._dependencies[d0] for d0 in d.depends_on), 1245 ) 1246 image.acquired = image.spec() in known_images 1247 self._dependencies[d.name] = image 1248 1249 def _prepare_batch(self, images: list[ResolvedImage]) -> dict[type[PreImage], Any]: 1250 pre_images = collections.defaultdict(list) 1251 for image in images: 1252 for pre_image in image.image.pre_images: 1253 pre_images[type(pre_image)].append(pre_image) 1254 pre_image_prep = {} 1255 for cls, instances in pre_images.items(): 1256 pre_image = cast(PreImage, cls) 1257 pre_image_prep[cls] = pre_image.prepare_batch(instances) 1258 return pre_image_prep 1259 1260 def acquire(self, max_retries: int | None = None) -> None: 1261 """Download or build all of the images in the dependency set that do not 1262 already exist locally. 1263 1264 Args: 1265 max_retries: Number of retries on failure. 1266 """ 1267 1268 # Only retry in CI runs since we struggle with flaky docker pulls there 1269 if not max_retries: 1270 max_retries = ( 1271 90 1272 if os.getenv("CI_WAITING_FOR_BUILD") 1273 else ( 1274 5 1275 if ui.env_is_truthy("CI") 1276 and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD") 1277 else 1 1278 ) 1279 ) 1280 assert max_retries > 0 1281 1282 deps_to_check = [dep for dep in self if dep.publish] 1283 deps_to_build = [dep for dep in self if not dep.publish] 1284 if len(deps_to_check): 1285 with ThreadPoolExecutor(max_workers=len(deps_to_check)) as executor: 1286 futures = [ 1287 executor.submit(dep.try_pull, max_retries) for dep in deps_to_check 1288 ] 1289 for dep, future in zip(deps_to_check, futures): 1290 try: 1291 if not future.result(): 1292 deps_to_build.append(dep) 1293 except Exception: 1294 deps_to_build.append(dep) 1295 1296 # Don't attempt to build in CI, as our timeouts and small machines won't allow it anyway 1297 if ui.env_is_truthy("CI") and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD"): 1298 expected_deps = [dep for dep in deps_to_build if dep.publish] 1299 if expected_deps: 1300 print( 1301 f"+++ Expected builds to be available, the build probably failed, so not proceeding: {expected_deps}" 1302 ) 1303 sys.exit(128) 1304 1305 prep = self._prepare_batch(deps_to_build) 1306 for dep in deps_to_build: 1307 dep.build(prep) 1308 1309 def ensure(self, pre_build: Callable[[list[ResolvedImage]], None] | None = None): 1310 """Ensure all publishable images in this dependency set exist on Docker 1311 Hub. 1312 1313 Images are pushed using their spec as their tag. 1314 1315 Args: 1316 pre_build: A callback to invoke with all dependency that are going 1317 to be built locally, invoked after their cargo build is 1318 done, but before the Docker images are build and 1319 uploaded to DockerHub. 1320 """ 1321 num_deps = len(list(self)) 1322 if not num_deps: 1323 deps_to_build = [] 1324 else: 1325 with ThreadPoolExecutor(max_workers=num_deps) as executor: 1326 futures = list( 1327 executor.map( 1328 lambda dep: (dep, not dep.is_published_if_necessary()), self 1329 ) 1330 ) 1331 1332 deps_to_build = [dep for dep, should_build in futures if should_build] 1333 1334 prep = self._prepare_batch(deps_to_build) 1335 if pre_build: 1336 pre_build(deps_to_build) 1337 lock = Lock() 1338 built_deps: set[str] = set([dep.name for dep in self]) - set( 1339 [dep.name for dep in deps_to_build] 1340 ) 1341 1342 def build_dep(dep): 1343 end_time = time.time() + 600 1344 while True: 1345 if time.time() > end_time: 1346 raise TimeoutError( 1347 f"Timed out in {dep.name} waiting for {[dep2 for dep2 in dep.dependencies if dep2 not in built_deps]}" 1348 ) 1349 with lock: 1350 if all(dep2 in built_deps for dep2 in dep.dependencies): 1351 break 1352 time.sleep(0.01) 1353 for attempts_remaining in reversed(range(3)): 1354 try: 1355 dep.build(prep, push=dep.publish) 1356 with lock: 1357 built_deps.add(dep.name) 1358 break 1359 except Exception: 1360 if not dep.publish or attempts_remaining == 0: 1361 raise 1362 1363 if deps_to_build: 1364 with ThreadPoolExecutor(max_workers=len(deps_to_build)) as executor: 1365 futures = [executor.submit(build_dep, dep) for dep in deps_to_build] 1366 for future in as_completed(futures): 1367 future.result() 1368 1369 def check(self) -> bool: 1370 """Check all publishable images in this dependency set exist on Docker 1371 Hub. Don't try to download or build them.""" 1372 num_deps = len(list(self)) 1373 if num_deps == 0: 1374 return True 1375 with ThreadPoolExecutor(max_workers=num_deps) as executor: 1376 results = list( 1377 executor.map(lambda dep: dep.is_published_if_necessary(), list(self)) 1378 ) 1379 return all(results) 1380 1381 def __iter__(self) -> Iterator[ResolvedImage]: 1382 return iter(self._dependencies.values()) 1383 1384 def __getitem__(self, key: str) -> ResolvedImage: 1385 return self._dependencies[key] 1386 1387 1388class Repository: 1389 """A collection of mzbuild `Image`s. 1390 1391 Creating a repository will walk the filesystem beneath `root` to 1392 automatically discover all contained `Image`s. 1393 1394 Iterating over a repository yields the contained images in an arbitrary 1395 order. 1396 1397 Args: 1398 root: The path to the root of the repository. 1399 arch: The CPU architecture to build for. 1400 profile: What profile to build the repository in. 1401 coverage: Whether to enable code coverage instrumentation. 1402 sanitizer: Whether to a sanitizer (address, thread, leak, memory, none) 1403 image_registry: The Docker image registry to pull images from and push 1404 images to. 1405 image_prefix: A prefix to apply to all Docker image names. 1406 1407 Attributes: 1408 images: A mapping from image name to `Image` for all contained images. 1409 compose_dirs: The set of directories containing a `mzcompose.py` file. 1410 """ 1411 1412 def __init__( 1413 self, 1414 root: Path, 1415 arch: Arch = Arch.host(), 1416 profile: Profile = ( 1417 Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED 1418 ), 1419 coverage: bool = False, 1420 sanitizer: Sanitizer = Sanitizer.none, 1421 image_registry: str = image_registry(), 1422 image_prefix: str = "", 1423 ): 1424 self.rd = RepositoryDetails( 1425 root, 1426 arch, 1427 profile, 1428 coverage, 1429 sanitizer, 1430 image_registry, 1431 image_prefix, 1432 ) 1433 self.images: dict[str, Image] = {} 1434 self.compositions: dict[str, Path] = {} 1435 for rel_path_s in sorted( 1436 git.expand_globs(self.root, "**/mzbuild.yml", "**/mzcompose.py") 1437 ): 1438 rel_path = Path(rel_path_s) 1439 if rel_path.parts[:2] == ("misc", "python"): 1440 continue 1441 1442 parent = self.root / rel_path.parent 1443 if rel_path.name == "mzbuild.yml": 1444 image = Image(self.rd, parent) 1445 if not image.name: 1446 raise ValueError(f"config at {parent} missing name") 1447 if image.name in self.images: 1448 raise ValueError(f"image {image.name} exists twice") 1449 self.images[image.name] = image 1450 elif rel_path.name == "mzcompose.py": 1451 name = parent.name 1452 if name in self.compositions: 1453 raise ValueError(f"composition {name} exists twice") 1454 self.compositions[name] = parent 1455 1456 # Validate dependencies. 1457 for image in self.images.values(): 1458 for d in image.depends_on: 1459 if d not in self.images: 1460 raise ValueError( 1461 f"image {image.name} depends on non-existent image {d}" 1462 ) 1463 1464 @staticmethod 1465 def install_arguments(parser: argparse.ArgumentParser) -> None: 1466 """Install options to configure a repository into an argparse parser. 1467 1468 This function installs the following options: 1469 1470 * The mutually-exclusive `--dev`/`--optimized`/`--release` options to control the 1471 `profile` repository attribute. 1472 * The `--coverage` boolean option to control the `coverage` repository 1473 attribute. 1474 1475 Use `Repository.from_arguments` to construct a repository from the 1476 parsed command-line arguments. 1477 """ 1478 build_mode = parser.add_mutually_exclusive_group() 1479 build_mode.add_argument( 1480 "--dev", 1481 action="store_true", 1482 help="build Rust binaries with the dev profile", 1483 ) 1484 build_mode.add_argument( 1485 "--release", 1486 action="store_true", 1487 help="build Rust binaries with the release profile (default)", 1488 ) 1489 build_mode.add_argument( 1490 "--optimized", 1491 action="store_true", 1492 help="build Rust binaries with the optimized profile (optimizations, no LTO, no debug symbols)", 1493 ) 1494 parser.add_argument( 1495 "--coverage", 1496 help="whether to enable code coverage compilation flags", 1497 default=ui.env_is_truthy("CI_COVERAGE_ENABLED"), 1498 action="store_true", 1499 ) 1500 parser.add_argument( 1501 "--sanitizer", 1502 help="whether to enable a sanitizer", 1503 default=Sanitizer[os.getenv("CI_SANITIZER", "none")], 1504 type=Sanitizer, 1505 choices=Sanitizer, 1506 ) 1507 1508 def _parse_arch(s: str) -> Arch: 1509 try: 1510 return Arch(s) 1511 except ValueError: 1512 valid = ", ".join(m.value for m in Arch) 1513 raise argparse.ArgumentTypeError( 1514 f"invalid arch: {s!r} (choose from {valid})" 1515 ) 1516 1517 parser.add_argument( 1518 "--arch", 1519 default=Arch.host(), 1520 help="the CPU architecture to build for", 1521 type=_parse_arch, 1522 metavar="{" + ",".join(m.value for m in Arch) + "}", 1523 ) 1524 parser.add_argument( 1525 "--image-registry", 1526 default=image_registry(), 1527 help="the Docker image registry to pull images from and push images to", 1528 ) 1529 parser.add_argument( 1530 "--image-prefix", 1531 default="", 1532 help="a prefix to apply to all Docker image names", 1533 ) 1534 1535 @classmethod 1536 def from_arguments(cls, root: Path, args: argparse.Namespace) -> "Repository": 1537 """Construct a repository from command-line arguments. 1538 1539 The provided namespace must contain the options installed by 1540 `Repository.install_arguments`. 1541 """ 1542 if args.release: 1543 profile = Profile.RELEASE 1544 elif args.optimized: 1545 profile = Profile.OPTIMIZED 1546 elif args.dev: 1547 profile = Profile.DEV 1548 else: 1549 profile = ( 1550 Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED 1551 ) 1552 1553 return cls( 1554 root, 1555 profile=profile, 1556 coverage=args.coverage, 1557 sanitizer=args.sanitizer, 1558 image_registry=args.image_registry, 1559 image_prefix=args.image_prefix, 1560 arch=args.arch, 1561 ) 1562 1563 @property 1564 def root(self) -> Path: 1565 """The path to the root directory for the repository.""" 1566 return self.rd.root 1567 1568 def resolve_dependencies(self, targets: Iterable[Image]) -> DependencySet: 1569 """Compute the dependency set necessary to build target images. 1570 1571 The dependencies of `targets` will be crawled recursively until the 1572 complete set of transitive dependencies is determined or a circular 1573 dependency is discovered. The returned dependency set will be sorted 1574 in topological order. 1575 1576 Raises: 1577 ValueError: A circular dependency was discovered in the images 1578 in the repository. 1579 """ 1580 # Pre-fetch all crate input files in a single batched git call, 1581 # replacing ~118 individual subprocess pairs with one pair. 1582 self.rd.cargo_workspace.precompute_crate_inputs() 1583 # Pre-fetch all image context files in a single batched git call, 1584 # replacing ~41 individual subprocess pairs with one pair. 1585 self._precompute_image_context_files() 1586 1587 resolved = OrderedDict() 1588 visiting = set() 1589 1590 def visit(image: Image, path: list[str] = []) -> None: 1591 if image.name in resolved: 1592 return 1593 if image.name in visiting: 1594 diagram = " -> ".join(path + [image.name]) 1595 raise ValueError(f"circular dependency in mzbuild: {diagram}") 1596 1597 visiting.add(image.name) 1598 for d in sorted(image.depends_on): 1599 visit(self.images[d], path + [image.name]) 1600 resolved[image.name] = image 1601 1602 for target_image in sorted(targets, key=lambda image: image.name): 1603 visit(target_image) 1604 1605 return DependencySet(resolved.values()) 1606 1607 def _precompute_image_context_files(self) -> None: 1608 """Pre-fetch all image context files in a single batched git call. 1609 1610 This replaces ~41 individual pairs of git subprocess calls (one per 1611 image) with a single pair, then partitions the results by image path. 1612 """ 1613 root = self.rd.root 1614 # Use paths relative to root for git specs and partitioning, since 1615 # git --relative outputs paths relative to cwd (root). Image paths 1616 # may be absolute when MZ_ROOT is an absolute path. 1617 image_rel_paths = sorted( 1618 set(str(img.path.relative_to(root)) for img in self.images.values()) 1619 ) 1620 specs = [f"{p}/**" for p in image_rel_paths] 1621 1622 empty_tree = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" 1623 diff_files = spawn.capture( 1624 ["git", "diff", "--name-only", "-z", "--relative", empty_tree, "--"] 1625 + specs, 1626 cwd=root, 1627 ) 1628 ls_files = spawn.capture( 1629 ["git", "ls-files", "--others", "--exclude-standard", "-z", "--"] + specs, 1630 cwd=root, 1631 ) 1632 all_files = set( 1633 f for f in (diff_files + ls_files).split("\0") if f.strip() != "" 1634 ) 1635 1636 # Partition files by image path (longest match first for nested paths) 1637 image_file_map: dict[str, set[str]] = {p: set() for p in image_rel_paths} 1638 sorted_paths = sorted(image_rel_paths, key=len, reverse=True) 1639 for f in all_files: 1640 for ip in sorted_paths: 1641 if f.startswith(ip + "/"): 1642 image_file_map[ip].add(f) 1643 break 1644 1645 for img in self.images.values(): 1646 rel = str(img.path.relative_to(root)) 1647 img._context_files_cache = image_file_map.get(rel, set()) 1648 1649 def __iter__(self) -> Iterator[Image]: 1650 return iter(self.images.values()) 1651 1652 1653def publish_multiarch_images( 1654 tag: str, dependency_sets: Iterable[Iterable[ResolvedImage]] 1655) -> None: 1656 """Publishes a set of docker images under a given tag.""" 1657 always_push_tags = ("latest", "unstable") 1658 if ghcr_token := os.getenv("GITHUB_GHCR_TOKEN"): 1659 spawn.runv( 1660 [ 1661 "docker", 1662 "login", 1663 "ghcr.io", 1664 "-u", 1665 "materialize-bot", 1666 "--password-stdin", 1667 ], 1668 stdin=ghcr_token.encode(), 1669 ) 1670 for images in zip(*dependency_sets): 1671 names = set(image.image.name for image in images) 1672 assert len(names) == 1, "dependency sets did not contain identical images" 1673 name = images[0].image.docker_name(tag) 1674 if tag in always_push_tags or not is_docker_image_pushed(name): 1675 spawn.run_with_retries( 1676 lambda: ( 1677 spawn.runv( 1678 [ 1679 "docker", 1680 "manifest", 1681 "create", 1682 "--amend", 1683 name, 1684 *(image.spec() for image in images), 1685 ] 1686 ), 1687 spawn.runv(["docker", "manifest", "push", name]), 1688 ) 1689 ) 1690 1691 ghcr_name = f"{GHCR_PREFIX}{name}" 1692 if ghcr_token and ( 1693 tag in always_push_tags or not is_ghcr_image_pushed(ghcr_name) 1694 ): 1695 spawn.run_with_retries( 1696 lambda: ( 1697 spawn.runv( 1698 [ 1699 "docker", 1700 "manifest", 1701 "create", 1702 "--amend", 1703 ghcr_name, 1704 *(f"{GHCR_PREFIX}{image.spec()}" for image in images), 1705 ] 1706 ), 1707 spawn.runv(["docker", "manifest", "push", ghcr_name]), 1708 ) 1709 ) 1710 print(f"--- Nofifying for tag {tag}") 1711 markdown = f"""Pushed images with Docker tag `{tag}`""" 1712 spawn.runv( 1713 [ 1714 "buildkite-agent", 1715 "annotate", 1716 "--style=info", 1717 f"--context=build-tags-{tag}", 1718 ], 1719 stdin=markdown.encode(), 1720 )
Common base class for all non-exit exceptions.
62def run_and_detect_rust_incremental_build_failure( 63 cmd: list[str], 64 cwd: str | Path, 65 env: dict[str, str] | None = None, 66) -> subprocess.CompletedProcess: 67 """This function is complex since it prints out each line immediately to 68 stdout/stderr, but still records them at the same time so that we can scan 69 for known incremental build failures.""" 70 stdout_result = io.StringIO() 71 stderr_result = io.StringIO() 72 base_env = env if env is not None else os.environ 73 p = subprocess.Popen( 74 cmd, 75 stdout=subprocess.PIPE, 76 stderr=subprocess.PIPE, 77 text=True, 78 bufsize=1, 79 env={**base_env, "CARGO_TERM_COLOR": "always", "RUSTC_COLOR": "always"}, 80 ) 81 82 sel = selectors.DefaultSelector() 83 sel.register(p.stdout, selectors.EVENT_READ) # type: ignore 84 sel.register(p.stderr, selectors.EVENT_READ) # type: ignore 85 assert p.stdout is not None 86 assert p.stderr is not None 87 os.set_blocking(p.stdout.fileno(), False) 88 os.set_blocking(p.stderr.fileno(), False) 89 running = True 90 while running: 91 for key, val in sel.select(): 92 output = io.StringIO() 93 running = False 94 while True: 95 new_output = key.fileobj.read(1024) # type: ignore 96 if not new_output: 97 break 98 output.write(new_output) 99 contents = output.getvalue() 100 output.close() 101 if not contents: 102 continue 103 # Keep running as long as stdout or stderr have any content 104 running = True 105 if key.fileobj is p.stdout: 106 print( 107 contents, 108 end="", 109 flush=True, 110 ) 111 stdout_result.write(contents) 112 else: 113 print( 114 contents, 115 end="", 116 file=sys.stderr, 117 flush=True, 118 ) 119 stderr_result.write(contents) 120 p.wait() 121 retcode = p.poll() 122 assert retcode is not None 123 stdout_contents = stdout_result.getvalue() 124 stdout_result.close() 125 stderr_contents = stderr_result.getvalue() 126 stderr_result.close() 127 if retcode: 128 incremental_build_failure_msgs = [ 129 "panicked at compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs", 130 "Found unstable fingerprints for", 131 "ld.lld: error: undefined symbol", 132 "signal: 11, SIGSEGV", 133 ] 134 combined = stdout_contents + stderr_contents 135 if any(msg in combined for msg in incremental_build_failure_msgs): 136 raise RustIncrementalBuildFailure() 137 138 raise subprocess.CalledProcessError( 139 retcode, p.args, output=stdout_contents, stderr=stderr_contents 140 ) 141 return subprocess.CompletedProcess( 142 p.args, retcode, stdout_contents, stderr_contents 143 )
This function is complex since it prints out each line immediately to stdout/stderr, but still records them at the same time so that we can scan for known incremental build failures.
146class Fingerprint(bytes): 147 """A SHA-1 hash of the inputs to an `Image`. 148 149 The string representation uses base32 encoding to distinguish mzbuild 150 fingerprints from Git's hex encoded SHA-1 hashes while still being 151 URL safe. 152 """ 153 154 def __str__(self) -> str: 155 return base64.b32encode(self).decode()
A SHA-1 hash of the inputs to an Image.
The string representation uses base32 encoding to distinguish mzbuild fingerprints from Git's hex encoded SHA-1 hashes while still being URL safe.
164class RepositoryDetails: 165 """Immutable details about a `Repository`. 166 167 Used internally by mzbuild. 168 169 Attributes: 170 root: The path to the root of the repository. 171 arch: The CPU architecture to build for. 172 profile: What profile the repository is being built with. 173 coverage: Whether the repository has code coverage instrumentation 174 enabled. 175 sanitizer: Whether to use a sanitizer (address, hwaddress, cfi, thread, leak, memory, none) 176 cargo_workspace: The `cargo.Workspace` associated with the repository. 177 image_registry: The Docker image registry to pull images from and push 178 images to. 179 image_prefix: A prefix to apply to all Docker image names. 180 """ 181 182 def __init__( 183 self, 184 root: Path, 185 arch: Arch, 186 profile: Profile, 187 coverage: bool, 188 sanitizer: Sanitizer, 189 image_registry: str, 190 image_prefix: str, 191 ): 192 self.root = root 193 self.arch = arch 194 self.profile = profile 195 self.coverage = coverage 196 self.sanitizer = sanitizer 197 self.cargo_workspace = cargo.Workspace(root) 198 self.image_registry = image_registry 199 self.image_prefix = image_prefix 200 201 def build( 202 self, 203 subcommand: str, 204 rustflags: list[str], 205 channel: str | None = None, 206 extra_env: dict[str, str] = {}, 207 ) -> list[str]: 208 """Start a build invocation for the configured architecture.""" 209 return xcompile.cargo( 210 arch=self.arch, 211 channel=channel, 212 subcommand=subcommand, 213 rustflags=rustflags, 214 extra_env=extra_env, 215 ) 216 217 def tool(self, name: str) -> list[str]: 218 """Start a binutils tool invocation for the configured architecture.""" 219 if platform.system() != "Linux": 220 # We can't use the local tools from macOS to build a Linux executable 221 return ["bin/ci-builder", "run", "stable", name] 222 # If we're on Linux, trust that the tools are installed instead of 223 # loading the slow ci-builder. If you don't have compilation tools 224 # installed you can still run `bin/ci-builder run stable 225 # bin/mzcompose ...`, and most likely the Cargo build will already 226 # fail earlier if you don't have compilation tools installed and 227 # run without the ci-builder. 228 return [name] 229 230 def cargo_target_dir(self) -> Path: 231 """Determine the path to the target directory for Cargo.""" 232 return self.root / "target-xcompile" / xcompile.target(self.arch) 233 234 def rewrite_builder_path_for_host(self, path: Path) -> Path: 235 """Rewrite a path that is relative to the target directory inside the 236 builder to a path that is relative to the target directory on the host. 237 238 If path does is not relative to the target directory inside the builder, 239 it is returned unchanged. 240 """ 241 builder_target_dir = Path("/mnt/build") / xcompile.target(self.arch) 242 try: 243 return self.cargo_target_dir() / path.relative_to(builder_target_dir) 244 except ValueError: 245 return path
Immutable details about a Repository.
Used internally by mzbuild.
Attributes:
root: The path to the root of the repository.
arch: The CPU architecture to build for.
profile: What profile the repository is being built with.
coverage: Whether the repository has code coverage instrumentation
enabled.
sanitizer: Whether to use a sanitizer (address, hwaddress, cfi, thread, leak, memory, none)
cargo_workspace: The cargo.Workspace associated with the repository.
image_registry: The Docker image registry to pull images from and push
images to.
image_prefix: A prefix to apply to all Docker image names.
182 def __init__( 183 self, 184 root: Path, 185 arch: Arch, 186 profile: Profile, 187 coverage: bool, 188 sanitizer: Sanitizer, 189 image_registry: str, 190 image_prefix: str, 191 ): 192 self.root = root 193 self.arch = arch 194 self.profile = profile 195 self.coverage = coverage 196 self.sanitizer = sanitizer 197 self.cargo_workspace = cargo.Workspace(root) 198 self.image_registry = image_registry 199 self.image_prefix = image_prefix
201 def build( 202 self, 203 subcommand: str, 204 rustflags: list[str], 205 channel: str | None = None, 206 extra_env: dict[str, str] = {}, 207 ) -> list[str]: 208 """Start a build invocation for the configured architecture.""" 209 return xcompile.cargo( 210 arch=self.arch, 211 channel=channel, 212 subcommand=subcommand, 213 rustflags=rustflags, 214 extra_env=extra_env, 215 )
Start a build invocation for the configured architecture.
217 def tool(self, name: str) -> list[str]: 218 """Start a binutils tool invocation for the configured architecture.""" 219 if platform.system() != "Linux": 220 # We can't use the local tools from macOS to build a Linux executable 221 return ["bin/ci-builder", "run", "stable", name] 222 # If we're on Linux, trust that the tools are installed instead of 223 # loading the slow ci-builder. If you don't have compilation tools 224 # installed you can still run `bin/ci-builder run stable 225 # bin/mzcompose ...`, and most likely the Cargo build will already 226 # fail earlier if you don't have compilation tools installed and 227 # run without the ci-builder. 228 return [name]
Start a binutils tool invocation for the configured architecture.
230 def cargo_target_dir(self) -> Path: 231 """Determine the path to the target directory for Cargo.""" 232 return self.root / "target-xcompile" / xcompile.target(self.arch)
Determine the path to the target directory for Cargo.
234 def rewrite_builder_path_for_host(self, path: Path) -> Path: 235 """Rewrite a path that is relative to the target directory inside the 236 builder to a path that is relative to the target directory on the host. 237 238 If path does is not relative to the target directory inside the builder, 239 it is returned unchanged. 240 """ 241 builder_target_dir = Path("/mnt/build") / xcompile.target(self.arch) 242 try: 243 return self.cargo_target_dir() / path.relative_to(builder_target_dir) 244 except ValueError: 245 return path
Rewrite a path that is relative to the target directory inside the builder to a path that is relative to the target directory on the host.
If path does is not relative to the target directory inside the builder, it is returned unchanged.
248@cache 249def docker_images() -> frozenset[str]: 250 """List the Docker images available on the local machine.""" 251 return frozenset( 252 spawn.capture(["docker", "images", "--format", "{{.Repository}}:{{.Tag}}"]) 253 .strip() 254 .split("\n") 255 )
List the Docker images available on the local machine.
263def is_docker_image_pushed(name: str) -> bool: 264 """Check whether the named image is pushed to Docker Hub. 265 266 Note that this operation requires a rather slow network request. 267 """ 268 global _known_docker_images 269 270 if _known_docker_images is None: 271 with _known_docker_images_lock: 272 if not KNOWN_DOCKER_IMAGES_FILE.exists(): 273 _known_docker_images = set() 274 else: 275 with KNOWN_DOCKER_IMAGES_FILE.open() as f: 276 _known_docker_images = set(line.strip() for line in f) 277 278 if name in _known_docker_images: 279 return True 280 281 if ":" not in name: 282 image, tag = name, "latest" 283 else: 284 image, tag = name.rsplit(":", 1) 285 286 dockerhub_username = os.getenv("DOCKERHUB_USERNAME") 287 dockerhub_token = os.getenv("DOCKERHUB_ACCESS_TOKEN") 288 289 exists: bool = False 290 291 try: 292 if dockerhub_username and dockerhub_token: 293 response = requests.head( 294 f"https://registry-1.docker.io/v2/{image}/manifests/{tag}", 295 headers={ 296 "Accept": "application/vnd.docker.distribution.manifest.v2+json", 297 }, 298 auth=HTTPBasicAuth(dockerhub_username, dockerhub_token), 299 timeout=10, 300 ) 301 else: 302 token = requests.get( 303 "https://auth.docker.io/token", 304 params={ 305 "service": "registry.docker.io", 306 "scope": f"repository:{image}:pull", 307 }, 308 timeout=10, 309 ).json()["token"] 310 response = requests.head( 311 f"https://registry-1.docker.io/v2/{image}/manifests/{tag}", 312 headers={ 313 "Accept": "application/vnd.docker.distribution.manifest.v2+json", 314 "Authorization": f"Bearer {token}", 315 }, 316 timeout=10, 317 ) 318 319 if response.status_code in (401, 429, 500, 502, 503, 504): 320 # Fall back to 5x slower method 321 proc = subprocess.run( 322 ["docker", "manifest", "inspect", name], 323 stdout=subprocess.DEVNULL, 324 stderr=subprocess.DEVNULL, 325 env=dict(os.environ, DOCKER_CLI_EXPERIMENTAL="enabled"), 326 ) 327 exists = proc.returncode == 0 328 else: 329 exists = response.status_code == 200 330 331 except Exception as e: 332 print(f"Error checking Docker image: {e}") 333 return False 334 335 if exists: 336 with _known_docker_images_lock: 337 _known_docker_images.add(name) 338 with KNOWN_DOCKER_IMAGES_FILE.open("a") as f: 339 print(name, file=f) 340 341 return exists
Check whether the named image is pushed to Docker Hub.
Note that this operation requires a rather slow network request.
344def is_ghcr_image_pushed(name: str) -> bool: 345 global _known_docker_images 346 347 if _known_docker_images is None: 348 with _known_docker_images_lock: 349 if not KNOWN_DOCKER_IMAGES_FILE.exists(): 350 _known_docker_images = set() 351 else: 352 with KNOWN_DOCKER_IMAGES_FILE.open() as f: 353 _known_docker_images = set(line.strip() for line in f) 354 355 name_without_ghcr = name.removeprefix("ghcr.io/") 356 if name in _known_docker_images: 357 return True 358 359 if ":" not in name_without_ghcr: 360 image, tag = name_without_ghcr, "latest" 361 else: 362 image, tag = name_without_ghcr.rsplit(":", 1) 363 364 exists: bool = False 365 366 try: 367 token = requests.get( 368 "https://ghcr.io/token", 369 params={ 370 "scope": f"repository:{image}:pull", 371 }, 372 timeout=10, 373 ).json()["token"] 374 response = requests.head( 375 f"https://ghcr.io/v2/{image}/manifests/{tag}", 376 headers={"Authorization": f"Bearer {token}"}, 377 timeout=10, 378 ) 379 380 if response.status_code in (401, 429, 500, 502, 503, 504): 381 # Fall back to 5x slower method 382 proc = subprocess.run( 383 ["docker", "manifest", "inspect", name], 384 stdout=subprocess.DEVNULL, 385 stderr=subprocess.DEVNULL, 386 env=dict(os.environ, DOCKER_CLI_EXPERIMENTAL="enabled"), 387 ) 388 exists = proc.returncode == 0 389 else: 390 exists = response.status_code == 200 391 392 except Exception as e: 393 print(f"Error checking Docker image: {e}") 394 return False 395 396 if exists: 397 with _known_docker_images_lock: 398 _known_docker_images.add(name) 399 with KNOWN_DOCKER_IMAGES_FILE.open("a") as f: 400 print(name, file=f) 401 402 return exists
405def chmod_x(path: Path) -> None: 406 """Set the executable bit on a file or directory.""" 407 # https://stackoverflow.com/a/30463972/1122351 408 mode = os.stat(path).st_mode 409 mode |= (mode & 0o444) >> 2 # copy R bits to X 410 os.chmod(path, mode)
Set the executable bit on a file or directory.
413class PreImage: 414 """An action to run before building a Docker image. 415 416 Args: 417 rd: The `RepositoryDetails` for the repository. 418 path: The path to the `Image` associated with this action. 419 """ 420 421 def __init__(self, rd: RepositoryDetails, path: Path): 422 self.rd = rd 423 self.path = path 424 425 @classmethod 426 def prepare_batch(cls, instances: list["PreImage"]) -> Any: 427 """Prepare a batch of actions. 428 429 This is useful for `PreImage` actions that are more efficient when 430 their actions are applied to several images in bulk. 431 432 Returns an arbitrary output that is passed to `PreImage.run`. 433 """ 434 pass 435 436 def run(self, prep: Any) -> None: 437 """Perform the action. 438 439 Args: 440 prep: Any prep work returned by `prepare_batch`. 441 """ 442 pass 443 444 def inputs(self) -> set[str]: 445 """Return the files which are considered inputs to the action.""" 446 raise NotImplementedError 447 448 def extra(self) -> str: 449 """Returns additional data for incorporation in the fingerprint.""" 450 return ""
An action to run before building a Docker image.
Args:
rd: The RepositoryDetails for the repository.
path: The path to the Image associated with this action.
425 @classmethod 426 def prepare_batch(cls, instances: list["PreImage"]) -> Any: 427 """Prepare a batch of actions. 428 429 This is useful for `PreImage` actions that are more efficient when 430 their actions are applied to several images in bulk. 431 432 Returns an arbitrary output that is passed to `PreImage.run`. 433 """ 434 pass
Prepare a batch of actions.
This is useful for PreImage actions that are more efficient when
their actions are applied to several images in bulk.
Returns an arbitrary output that is passed to PreImage.run.
436 def run(self, prep: Any) -> None: 437 """Perform the action. 438 439 Args: 440 prep: Any prep work returned by `prepare_batch`. 441 """ 442 pass
Perform the action.
Args:
prep: Any prep work returned by prepare_batch.
453class Copy(PreImage): 454 """A `PreImage` action which copies files from a directory. 455 456 See doc/developer/mzbuild.md for an explanation of the user-facing 457 parameters. 458 """ 459 460 def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]): 461 super().__init__(rd, path) 462 463 self.source = config.pop("source", None) 464 if self.source is None: 465 raise ValueError("mzbuild config is missing 'source' argument") 466 467 self.destination = config.pop("destination", None) 468 if self.destination is None: 469 raise ValueError("mzbuild config is missing 'destination' argument") 470 471 self.matching = config.pop("matching", "*") 472 473 def run(self, prep: Any) -> None: 474 super().run(prep) 475 for src in self.inputs(): 476 dst = self.path / self.destination / src 477 dst.parent.mkdir(parents=True, exist_ok=True) 478 shutil.copy(self.rd.root / self.source / src, dst) 479 480 def inputs(self) -> set[str]: 481 return set(git.expand_globs(self.rd.root / self.source, self.matching))
A PreImage action which copies files from a directory.
See doc/developer/mzbuild.md for an explanation of the user-facing parameters.
460 def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]): 461 super().__init__(rd, path) 462 463 self.source = config.pop("source", None) 464 if self.source is None: 465 raise ValueError("mzbuild config is missing 'source' argument") 466 467 self.destination = config.pop("destination", None) 468 if self.destination is None: 469 raise ValueError("mzbuild config is missing 'destination' argument") 470 471 self.matching = config.pop("matching", "*")
473 def run(self, prep: Any) -> None: 474 super().run(prep) 475 for src in self.inputs(): 476 dst = self.path / self.destination / src 477 dst.parent.mkdir(parents=True, exist_ok=True) 478 shutil.copy(self.rd.root / self.source / src, dst)
Perform the action.
Args:
prep: Any prep work returned by prepare_batch.
480 def inputs(self) -> set[str]: 481 return set(git.expand_globs(self.rd.root / self.source, self.matching))
Return the files which are considered inputs to the action.
Inherited Members
484class CargoPreImage(PreImage): 485 """A `PreImage` action that uses Cargo.""" 486 487 @staticmethod 488 @cache 489 def _cargo_shared_inputs() -> frozenset[str]: 490 """Resolve shared Cargo inputs once and cache the result. 491 492 This expands the 'ci/builder' directory glob and filters out 493 non-existent files like '.cargo/config', avoiding repeated 494 git subprocess calls in fingerprint(). 495 """ 496 inputs: set[str] = set() 497 inputs |= git.expand_globs(Path("."), "ci/builder/**") 498 inputs.add("Cargo.toml") 499 inputs.add("Cargo.lock") 500 if Path(".cargo/config").exists(): 501 inputs.add(".cargo/config") 502 return frozenset(inputs) 503 504 def inputs(self) -> set[str]: 505 return set(CargoPreImage._cargo_shared_inputs()) 506 507 def extra(self) -> str: 508 # Cargo images depend on the release mode and whether 509 # coverage/sanitizer is enabled. 510 flags: list[str] = [] 511 if self.rd.profile == Profile.RELEASE: 512 flags += "release" 513 if self.rd.profile == Profile.OPTIMIZED: 514 flags += "optimized" 515 if self.rd.coverage: 516 flags += "coverage" 517 if self.rd.sanitizer != Sanitizer.none: 518 flags += self.rd.sanitizer.value 519 flags.sort() 520 return ",".join(flags)
A PreImage action that uses Cargo.
507 def extra(self) -> str: 508 # Cargo images depend on the release mode and whether 509 # coverage/sanitizer is enabled. 510 flags: list[str] = [] 511 if self.rd.profile == Profile.RELEASE: 512 flags += "release" 513 if self.rd.profile == Profile.OPTIMIZED: 514 flags += "optimized" 515 if self.rd.coverage: 516 flags += "coverage" 517 if self.rd.sanitizer != Sanitizer.none: 518 flags += self.rd.sanitizer.value 519 flags.sort() 520 return ",".join(flags)
Returns additional data for incorporation in the fingerprint.
523class CargoBuild(CargoPreImage): 524 """A `PreImage` action that builds a single binary with Cargo. 525 526 See doc/developer/mzbuild.md for an explanation of the user-facing 527 parameters. 528 """ 529 530 def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]): 531 super().__init__(rd, path) 532 bin = config.pop("bin", []) 533 self.bins = bin if isinstance(bin, list) else [bin] 534 example = config.pop("example", []) 535 self.examples = example if isinstance(example, list) else [example] 536 self.strip = config.pop("strip", True) 537 self.extract = config.pop("extract", {}) 538 features = config.pop("features", []) 539 self.features = features if isinstance(features, list) else [features] 540 541 if len(self.bins) == 0 and len(self.examples) == 0: 542 raise ValueError("mzbuild config is missing pre-build target") 543 544 @staticmethod 545 def generate_cargo_build_command( 546 rd: RepositoryDetails, 547 bins: list[str], 548 examples: list[str], 549 features: list[str] | None = None, 550 ) -> list[str]: 551 rustflags = ( 552 rustc_flags.coverage 553 if rd.coverage 554 else ( 555 rustc_flags.sanitizer[rd.sanitizer] 556 if rd.sanitizer != Sanitizer.none 557 else ["--cfg=tokio_unstable"] 558 ) 559 ) 560 cflags = ( 561 [ 562 f"--target={target(rd.arch)}", 563 f"--gcc-toolchain=/opt/x-tools/{target(rd.arch)}/", 564 "-fuse-ld=lld", 565 f"--sysroot=/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/sysroot", 566 f"-L/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/lib64", 567 ] 568 + rustc_flags.sanitizer_cflags[rd.sanitizer] 569 if rd.sanitizer != Sanitizer.none 570 else [] 571 ) 572 extra_env = ( 573 { 574 "CFLAGS": " ".join(cflags), 575 "CXXFLAGS": " ".join(cflags), 576 "LDFLAGS": " ".join(cflags), 577 "CXXSTDLIB": "stdc++", 578 "CC": "cc", 579 "CXX": "c++", 580 "CPP": "clang-cpp-18", 581 "CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER": "cc", 582 "CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER": "cc", 583 "PATH": f"/sanshim:/opt/x-tools/{target(rd.arch)}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 584 "TSAN_OPTIONS": "report_bugs=0", # build-scripts fail 585 } 586 if rd.sanitizer != Sanitizer.none 587 else {} 588 ) 589 590 cargo_build = rd.build( 591 "build", channel=None, rustflags=rustflags, extra_env=extra_env 592 ) 593 594 packages = set() 595 for bin in bins: 596 cargo_build.extend(["--bin", bin]) 597 packages.add(rd.cargo_workspace.crate_for_bin(bin).name) 598 for example in examples: 599 cargo_build.extend(["--example", example]) 600 packages.add(rd.cargo_workspace.crate_for_example(example).name) 601 cargo_build.extend(f"--package={p}" for p in packages) 602 603 if rd.profile == Profile.RELEASE: 604 cargo_build.append("--release") 605 if rd.profile == Profile.OPTIMIZED: 606 cargo_build.extend(["--profile", "optimized"]) 607 if rd.sanitizer != Sanitizer.none: 608 # ASan doesn't work with jemalloc 609 cargo_build.append("--no-default-features") 610 # Uses more memory, so reduce the number of jobs 611 cargo_build.extend( 612 ["--jobs", str(round(multiprocessing.cpu_count() * 2 / 3))] 613 ) 614 if features: 615 cargo_build.append(f"--features={','.join(features)}") 616 617 return cargo_build 618 619 @classmethod 620 def prepare_batch(cls, instances: list["PreImage"]) -> dict[str, Any]: 621 super().prepare_batch(instances) 622 623 if not instances: 624 return {} 625 626 # Building all binaries and examples in the same `cargo build` command 627 # allows Cargo to link in parallel with other work, which can 628 # meaningfully speed up builds. 629 630 rd: RepositoryDetails | None = None 631 builds = cast(list[CargoBuild], instances) 632 bins = set() 633 examples = set() 634 features = set() 635 for build in builds: 636 if not rd: 637 rd = build.rd 638 bins.update(build.bins) 639 examples.update(build.examples) 640 features.update(build.features) 641 assert rd 642 643 ui.section(f"Common build for: {', '.join(bins | examples)}") 644 645 cargo_build = cls.generate_cargo_build_command( 646 rd, list(bins), list(examples), list(features) if features else None 647 ) 648 649 run_and_detect_rust_incremental_build_failure(cargo_build, cwd=rd.root) 650 651 # Re-run with JSON-formatted messages and capture the output so we can 652 # later analyze the build artifacts in `run`. This should be nearly 653 # instantaneous since we just compiled above with the same crates and 654 # features. (We don't want to do the compile above with JSON-formatted 655 # messages because it wouldn't be human readable.) 656 json_output = spawn.capture( 657 cargo_build + ["--message-format=json"], 658 cwd=rd.root, 659 ) 660 prep = {"cargo": json_output} 661 662 return prep 663 664 def build(self, build_output: dict[str, Any]) -> None: 665 cargo_profile = ( 666 "release" 667 if self.rd.profile == Profile.RELEASE 668 else "optimized" if self.rd.profile == Profile.OPTIMIZED else "debug" 669 ) 670 671 def copy(src: Path, relative_dst: Path) -> None: 672 exe_path = self.path / relative_dst 673 exe_path.parent.mkdir(parents=True, exist_ok=True) 674 shutil.copy(src, exe_path) 675 676 if self.strip: 677 # The debug information is large enough that it slows down CI, 678 # since we're packaging these binaries up into Docker images and 679 # shipping them around. 680 spawn.runv( 681 [*self.rd.tool("strip"), "--strip-debug", exe_path], 682 cwd=self.rd.root, 683 ) 684 else: 685 # Even if we've been asked not to strip the binary, remove the 686 # `.debug_pubnames` and `.debug_pubtypes` sections. These are just 687 # indexes that speed up launching a debugger against the binary, 688 # and we're happy to have slower debugger start up in exchange for 689 # smaller binaries. Plus the sections have been obsoleted by a 690 # `.debug_names` section in DWARF 5, and so debugger support for 691 # `.debug_pubnames`/`.debug_pubtypes` is minimal anyway. 692 # See: https://github.com/rust-lang/rust/issues/46034 693 spawn.runv( 694 [ 695 *self.rd.tool("objcopy"), 696 "-R", 697 ".debug_pubnames", 698 "-R", 699 ".debug_pubtypes", 700 exe_path, 701 ], 702 cwd=self.rd.root, 703 ) 704 705 for bin in self.bins: 706 src_path = self.rd.cargo_target_dir() / cargo_profile / bin 707 copy(src_path, bin) 708 for example in self.examples: 709 src_path = ( 710 self.rd.cargo_target_dir() / cargo_profile / Path("examples") / example 711 ) 712 copy(src_path, Path("examples") / example) 713 714 if self.extract: 715 cargo_build_json_output = build_output["cargo"] 716 717 target_dir = self.rd.cargo_target_dir() 718 for line in cargo_build_json_output.split("\n"): 719 if line.strip() == "" or not line.startswith("{"): 720 continue 721 message = json.loads(line) 722 if message["reason"] != "build-script-executed": 723 continue 724 out_dir = self.rd.rewrite_builder_path_for_host( 725 Path(message["out_dir"]) 726 ) 727 if not out_dir.is_relative_to(target_dir): 728 # Some crates are built for both the host and the target. 729 # Ignore the built-for-host out dir. 730 continue 731 # parse the package name from a package_id that looks like one of: 732 # git+https://github.com/MaterializeInc/rust-server-sdk#launchdarkly-server-sdk@1.0.0 733 # path+file:///Users/roshan/materialize/src/catalog#mz-catalog@0.0.0 734 # registry+https://github.com/rust-lang/crates.io-index#num-rational@0.4.0 735 # file:///path/to/my-package#0.1.0 736 package_id = message["package_id"] 737 if "@" in package_id: 738 package = package_id.split("@")[0].split("#")[-1] 739 else: 740 package = message["package_id"].split("#")[0].split("/")[-1] 741 for src, dst in self.extract.get(package, {}).items(): 742 spawn.runv(["cp", "-R", out_dir / src, self.path / dst]) 743 744 self.acquired = True 745 746 def run(self, prep: dict[str, Any]) -> None: 747 super().run(prep) 748 self.build(prep) 749 750 @cache 751 def inputs(self) -> set[str]: 752 deps = set() 753 754 for bin in self.bins: 755 crate = self.rd.cargo_workspace.crate_for_bin(bin) 756 deps |= self.rd.cargo_workspace.transitive_path_dependencies(crate) 757 for example in self.examples: 758 crate = self.rd.cargo_workspace.crate_for_example(example) 759 deps |= self.rd.cargo_workspace.transitive_path_dependencies( 760 crate, dev=True 761 ) 762 763 inputs = super().inputs() | set(inp for dep in deps for inp in dep.inputs()) 764 return inputs
A PreImage action that builds a single binary with Cargo.
See doc/developer/mzbuild.md for an explanation of the user-facing parameters.
530 def __init__(self, rd: RepositoryDetails, path: Path, config: dict[str, Any]): 531 super().__init__(rd, path) 532 bin = config.pop("bin", []) 533 self.bins = bin if isinstance(bin, list) else [bin] 534 example = config.pop("example", []) 535 self.examples = example if isinstance(example, list) else [example] 536 self.strip = config.pop("strip", True) 537 self.extract = config.pop("extract", {}) 538 features = config.pop("features", []) 539 self.features = features if isinstance(features, list) else [features] 540 541 if len(self.bins) == 0 and len(self.examples) == 0: 542 raise ValueError("mzbuild config is missing pre-build target")
544 @staticmethod 545 def generate_cargo_build_command( 546 rd: RepositoryDetails, 547 bins: list[str], 548 examples: list[str], 549 features: list[str] | None = None, 550 ) -> list[str]: 551 rustflags = ( 552 rustc_flags.coverage 553 if rd.coverage 554 else ( 555 rustc_flags.sanitizer[rd.sanitizer] 556 if rd.sanitizer != Sanitizer.none 557 else ["--cfg=tokio_unstable"] 558 ) 559 ) 560 cflags = ( 561 [ 562 f"--target={target(rd.arch)}", 563 f"--gcc-toolchain=/opt/x-tools/{target(rd.arch)}/", 564 "-fuse-ld=lld", 565 f"--sysroot=/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/sysroot", 566 f"-L/opt/x-tools/{target(rd.arch)}/{target(rd.arch)}/lib64", 567 ] 568 + rustc_flags.sanitizer_cflags[rd.sanitizer] 569 if rd.sanitizer != Sanitizer.none 570 else [] 571 ) 572 extra_env = ( 573 { 574 "CFLAGS": " ".join(cflags), 575 "CXXFLAGS": " ".join(cflags), 576 "LDFLAGS": " ".join(cflags), 577 "CXXSTDLIB": "stdc++", 578 "CC": "cc", 579 "CXX": "c++", 580 "CPP": "clang-cpp-18", 581 "CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER": "cc", 582 "CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER": "cc", 583 "PATH": f"/sanshim:/opt/x-tools/{target(rd.arch)}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 584 "TSAN_OPTIONS": "report_bugs=0", # build-scripts fail 585 } 586 if rd.sanitizer != Sanitizer.none 587 else {} 588 ) 589 590 cargo_build = rd.build( 591 "build", channel=None, rustflags=rustflags, extra_env=extra_env 592 ) 593 594 packages = set() 595 for bin in bins: 596 cargo_build.extend(["--bin", bin]) 597 packages.add(rd.cargo_workspace.crate_for_bin(bin).name) 598 for example in examples: 599 cargo_build.extend(["--example", example]) 600 packages.add(rd.cargo_workspace.crate_for_example(example).name) 601 cargo_build.extend(f"--package={p}" for p in packages) 602 603 if rd.profile == Profile.RELEASE: 604 cargo_build.append("--release") 605 if rd.profile == Profile.OPTIMIZED: 606 cargo_build.extend(["--profile", "optimized"]) 607 if rd.sanitizer != Sanitizer.none: 608 # ASan doesn't work with jemalloc 609 cargo_build.append("--no-default-features") 610 # Uses more memory, so reduce the number of jobs 611 cargo_build.extend( 612 ["--jobs", str(round(multiprocessing.cpu_count() * 2 / 3))] 613 ) 614 if features: 615 cargo_build.append(f"--features={','.join(features)}") 616 617 return cargo_build
619 @classmethod 620 def prepare_batch(cls, instances: list["PreImage"]) -> dict[str, Any]: 621 super().prepare_batch(instances) 622 623 if not instances: 624 return {} 625 626 # Building all binaries and examples in the same `cargo build` command 627 # allows Cargo to link in parallel with other work, which can 628 # meaningfully speed up builds. 629 630 rd: RepositoryDetails | None = None 631 builds = cast(list[CargoBuild], instances) 632 bins = set() 633 examples = set() 634 features = set() 635 for build in builds: 636 if not rd: 637 rd = build.rd 638 bins.update(build.bins) 639 examples.update(build.examples) 640 features.update(build.features) 641 assert rd 642 643 ui.section(f"Common build for: {', '.join(bins | examples)}") 644 645 cargo_build = cls.generate_cargo_build_command( 646 rd, list(bins), list(examples), list(features) if features else None 647 ) 648 649 run_and_detect_rust_incremental_build_failure(cargo_build, cwd=rd.root) 650 651 # Re-run with JSON-formatted messages and capture the output so we can 652 # later analyze the build artifacts in `run`. This should be nearly 653 # instantaneous since we just compiled above with the same crates and 654 # features. (We don't want to do the compile above with JSON-formatted 655 # messages because it wouldn't be human readable.) 656 json_output = spawn.capture( 657 cargo_build + ["--message-format=json"], 658 cwd=rd.root, 659 ) 660 prep = {"cargo": json_output} 661 662 return prep
Prepare a batch of actions.
This is useful for PreImage actions that are more efficient when
their actions are applied to several images in bulk.
Returns an arbitrary output that is passed to PreImage.run.
664 def build(self, build_output: dict[str, Any]) -> None: 665 cargo_profile = ( 666 "release" 667 if self.rd.profile == Profile.RELEASE 668 else "optimized" if self.rd.profile == Profile.OPTIMIZED else "debug" 669 ) 670 671 def copy(src: Path, relative_dst: Path) -> None: 672 exe_path = self.path / relative_dst 673 exe_path.parent.mkdir(parents=True, exist_ok=True) 674 shutil.copy(src, exe_path) 675 676 if self.strip: 677 # The debug information is large enough that it slows down CI, 678 # since we're packaging these binaries up into Docker images and 679 # shipping them around. 680 spawn.runv( 681 [*self.rd.tool("strip"), "--strip-debug", exe_path], 682 cwd=self.rd.root, 683 ) 684 else: 685 # Even if we've been asked not to strip the binary, remove the 686 # `.debug_pubnames` and `.debug_pubtypes` sections. These are just 687 # indexes that speed up launching a debugger against the binary, 688 # and we're happy to have slower debugger start up in exchange for 689 # smaller binaries. Plus the sections have been obsoleted by a 690 # `.debug_names` section in DWARF 5, and so debugger support for 691 # `.debug_pubnames`/`.debug_pubtypes` is minimal anyway. 692 # See: https://github.com/rust-lang/rust/issues/46034 693 spawn.runv( 694 [ 695 *self.rd.tool("objcopy"), 696 "-R", 697 ".debug_pubnames", 698 "-R", 699 ".debug_pubtypes", 700 exe_path, 701 ], 702 cwd=self.rd.root, 703 ) 704 705 for bin in self.bins: 706 src_path = self.rd.cargo_target_dir() / cargo_profile / bin 707 copy(src_path, bin) 708 for example in self.examples: 709 src_path = ( 710 self.rd.cargo_target_dir() / cargo_profile / Path("examples") / example 711 ) 712 copy(src_path, Path("examples") / example) 713 714 if self.extract: 715 cargo_build_json_output = build_output["cargo"] 716 717 target_dir = self.rd.cargo_target_dir() 718 for line in cargo_build_json_output.split("\n"): 719 if line.strip() == "" or not line.startswith("{"): 720 continue 721 message = json.loads(line) 722 if message["reason"] != "build-script-executed": 723 continue 724 out_dir = self.rd.rewrite_builder_path_for_host( 725 Path(message["out_dir"]) 726 ) 727 if not out_dir.is_relative_to(target_dir): 728 # Some crates are built for both the host and the target. 729 # Ignore the built-for-host out dir. 730 continue 731 # parse the package name from a package_id that looks like one of: 732 # git+https://github.com/MaterializeInc/rust-server-sdk#launchdarkly-server-sdk@1.0.0 733 # path+file:///Users/roshan/materialize/src/catalog#mz-catalog@0.0.0 734 # registry+https://github.com/rust-lang/crates.io-index#num-rational@0.4.0 735 # file:///path/to/my-package#0.1.0 736 package_id = message["package_id"] 737 if "@" in package_id: 738 package = package_id.split("@")[0].split("#")[-1] 739 else: 740 package = message["package_id"].split("#")[0].split("/")[-1] 741 for src, dst in self.extract.get(package, {}).items(): 742 spawn.runv(["cp", "-R", out_dir / src, self.path / dst]) 743 744 self.acquired = True
Perform the action.
Args:
prep: Any prep work returned by prepare_batch.
750 @cache 751 def inputs(self) -> set[str]: 752 deps = set() 753 754 for bin in self.bins: 755 crate = self.rd.cargo_workspace.crate_for_bin(bin) 756 deps |= self.rd.cargo_workspace.transitive_path_dependencies(crate) 757 for example in self.examples: 758 crate = self.rd.cargo_workspace.crate_for_example(example) 759 deps |= self.rd.cargo_workspace.transitive_path_dependencies( 760 crate, dev=True 761 ) 762 763 inputs = super().inputs() | set(inp for dep in deps for inp in dep.inputs()) 764 return inputs
Return the files which are considered inputs to the action.
Inherited Members
767class Image: 768 """A Docker image whose build and dependencies are managed by mzbuild. 769 770 An image corresponds to a directory in a repository that contains a 771 `mzbuild.yml` file. This directory is called an "mzbuild context." 772 773 Attributes: 774 name: The name of the image. 775 publish: Whether the image should be pushed to Docker Hub. 776 depends_on: The names of the images upon which this image depends. 777 root: The path to the root of the associated `Repository`. 778 path: The path to the directory containing the `mzbuild.yml` 779 configuration file. 780 pre_images: Optional actions to perform before running `docker build`. 781 build_args: An optional list of --build-arg to pass to the dockerfile 782 """ 783 784 _DOCKERFILE_MZFROM_RE = re.compile(rb"^MZFROM\s*(\S+)") 785 786 _context_files_cache: set[str] | None 787 788 def __init__(self, rd: RepositoryDetails, path: Path): 789 self.rd = rd 790 self.path = path 791 self._context_files_cache = None 792 self.pre_images: list[PreImage] = [] 793 with open(self.path / "mzbuild.yml") as f: 794 data = yaml.safe_load(f) 795 self.name: str = data.pop("name") 796 self.publish: bool = data.pop("publish", True) 797 self.description: str | None = data.pop("description", None) 798 self.mainline: bool = data.pop("mainline", True) 799 for pre_image in data.pop("pre-image", []): 800 typ = pre_image.pop("type", None) 801 if typ == "cargo-build": 802 self.pre_images.append(CargoBuild(self.rd, self.path, pre_image)) 803 elif typ == "copy": 804 self.pre_images.append(Copy(self.rd, self.path, pre_image)) 805 else: 806 raise ValueError( 807 f"mzbuild config in {self.path} has unknown pre-image type" 808 ) 809 self.build_args = data.pop("build-args", {}) 810 811 if re.search(r"[^A-Za-z0-9\-]", self.name): 812 raise ValueError( 813 f"mzbuild image name {self.name} contains invalid character; only alphanumerics and hyphens allowed" 814 ) 815 816 self.depends_on: list[str] = [] 817 with open(self.path / "Dockerfile", "rb") as f: 818 for line in f: 819 match = self._DOCKERFILE_MZFROM_RE.match(line) 820 if match: 821 self.depends_on.append(match.group(1).decode()) 822 823 def sync_description(self) -> None: 824 """Sync the description to Docker Hub if the image is publishable 825 and a README.md file exists.""" 826 827 if not self.publish: 828 ui.say(f"{self.name} is not publishable") 829 return 830 831 readme_path = self.path / "README.md" 832 has_readme = readme_path.exists() 833 if not has_readme: 834 ui.say(f"{self.name} has no README.md or description") 835 return 836 837 docker_config = os.getenv("DOCKER_CONFIG") 838 spawn.runv( 839 [ 840 "docker", 841 "pushrm", 842 f"--file={readme_path}", 843 *([f"--config={docker_config}/config.json"] if docker_config else []), 844 *([f"--short={self.description}"] if self.description else []), 845 self.docker_name(), 846 ] 847 ) 848 849 def docker_name(self, tag: str | None = None) -> str: 850 """Return the name of the image on Docker Hub at the given tag.""" 851 name = f"{self.rd.image_registry}/{self.rd.image_prefix}{self.name}" 852 if tag: 853 name += f":{tag}" 854 return name
A Docker image whose build and dependencies are managed by mzbuild.
An image corresponds to a directory in a repository that contains a
mzbuild.yml file. This directory is called an "mzbuild context."
Attributes:
name: The name of the image.
publish: Whether the image should be pushed to Docker Hub.
depends_on: The names of the images upon which this image depends.
root: The path to the root of the associated Repository.
path: The path to the directory containing the mzbuild.yml
configuration file.
pre_images: Optional actions to perform before running docker build.
build_args: An optional list of --build-arg to pass to the dockerfile
788 def __init__(self, rd: RepositoryDetails, path: Path): 789 self.rd = rd 790 self.path = path 791 self._context_files_cache = None 792 self.pre_images: list[PreImage] = [] 793 with open(self.path / "mzbuild.yml") as f: 794 data = yaml.safe_load(f) 795 self.name: str = data.pop("name") 796 self.publish: bool = data.pop("publish", True) 797 self.description: str | None = data.pop("description", None) 798 self.mainline: bool = data.pop("mainline", True) 799 for pre_image in data.pop("pre-image", []): 800 typ = pre_image.pop("type", None) 801 if typ == "cargo-build": 802 self.pre_images.append(CargoBuild(self.rd, self.path, pre_image)) 803 elif typ == "copy": 804 self.pre_images.append(Copy(self.rd, self.path, pre_image)) 805 else: 806 raise ValueError( 807 f"mzbuild config in {self.path} has unknown pre-image type" 808 ) 809 self.build_args = data.pop("build-args", {}) 810 811 if re.search(r"[^A-Za-z0-9\-]", self.name): 812 raise ValueError( 813 f"mzbuild image name {self.name} contains invalid character; only alphanumerics and hyphens allowed" 814 ) 815 816 self.depends_on: list[str] = [] 817 with open(self.path / "Dockerfile", "rb") as f: 818 for line in f: 819 match = self._DOCKERFILE_MZFROM_RE.match(line) 820 if match: 821 self.depends_on.append(match.group(1).decode())
823 def sync_description(self) -> None: 824 """Sync the description to Docker Hub if the image is publishable 825 and a README.md file exists.""" 826 827 if not self.publish: 828 ui.say(f"{self.name} is not publishable") 829 return 830 831 readme_path = self.path / "README.md" 832 has_readme = readme_path.exists() 833 if not has_readme: 834 ui.say(f"{self.name} has no README.md or description") 835 return 836 837 docker_config = os.getenv("DOCKER_CONFIG") 838 spawn.runv( 839 [ 840 "docker", 841 "pushrm", 842 f"--file={readme_path}", 843 *([f"--config={docker_config}/config.json"] if docker_config else []), 844 *([f"--short={self.description}"] if self.description else []), 845 self.docker_name(), 846 ] 847 )
Sync the description to Docker Hub if the image is publishable and a README.md file exists.
849 def docker_name(self, tag: str | None = None) -> str: 850 """Return the name of the image on Docker Hub at the given tag.""" 851 name = f"{self.rd.image_registry}/{self.rd.image_prefix}{self.name}" 852 if tag: 853 name += f":{tag}" 854 return name
Return the name of the image on Docker Hub at the given tag.
857class ResolvedImage: 858 """An `Image` whose dependencies have been resolved. 859 860 Attributes: 861 image: The underlying `Image`. 862 acquired: Whether the image is available locally. 863 dependencies: A mapping from dependency name to `ResolvedImage` for 864 each of the images that `image` depends upon. 865 """ 866 867 def __init__(self, image: Image, dependencies: Iterable["ResolvedImage"]): 868 self.image = image 869 self.acquired = False 870 self.dependencies = {} 871 for d in dependencies: 872 self.dependencies[d.name] = d 873 874 def __repr__(self) -> str: 875 return f"ResolvedImage<{self.spec()}>" 876 877 @property 878 def name(self) -> str: 879 """The name of the underlying image.""" 880 return self.image.name 881 882 @property 883 def publish(self) -> bool: 884 """Whether the underlying image should be pushed to Docker Hub.""" 885 return self.image.publish 886 887 @cache 888 def spec(self) -> str: 889 """Return the "spec" for the image. 890 891 A spec is the unique identifier for the image given its current 892 fingerprint. It is a valid Docker Hub name. 893 """ 894 return self.image.docker_name(tag=f"mzbuild-{self.fingerprint()}") 895 896 def write_dockerfile(self) -> IO[bytes]: 897 """Render the Dockerfile without mzbuild directives. 898 899 Returns: 900 file: A handle to a temporary file containing the adjusted 901 Dockerfile.""" 902 with open(self.image.path / "Dockerfile", "rb") as f: 903 lines = f.readlines() 904 f = TemporaryFile() 905 for line in lines: 906 match = Image._DOCKERFILE_MZFROM_RE.match(line) 907 if match: 908 image = match.group(1).decode() 909 spec = self.dependencies[image].spec() 910 line = Image._DOCKERFILE_MZFROM_RE.sub(b"FROM %b" % spec.encode(), line) 911 f.write(line) 912 f.seek(0) 913 return f 914 915 def build(self, prep: dict[type[PreImage], Any], push: bool = False) -> None: 916 """Build the image from source. 917 918 Requires that the caller has already acquired all dependencies and 919 prepared all `PreImage` actions via `PreImage.prepare_batch`. 920 """ 921 # Use a file lock to prevent parallel mzcompose processes from 922 # racing on git clean / copy / strip for the same image directory. 923 lock_dir = self.image.rd.root / "target" / "mzbuild-locks" 924 lock_dir.mkdir(parents=True, exist_ok=True) 925 profile = self.image.rd.profile.name.lower() 926 lock_path = lock_dir / f"{self.image.name}-{profile}.lock" 927 with open(lock_path, "w") as lock_file: 928 ui.say(f"Acquiring lock for {self.spec()}") 929 fcntl.flock(lock_file, fcntl.LOCK_EX) 930 try: 931 self._build_locked(prep, push) 932 finally: 933 fcntl.flock(lock_file, fcntl.LOCK_UN) 934 935 def _build_locked( 936 self, prep: dict[type[PreImage], Any], push: bool = False 937 ) -> None: 938 ui.section(f"Building {self.spec()}") 939 spawn.runv(["git", "clean", "-ffdX", self.image.path]) 940 941 for pre_image in self.image.pre_images: 942 pre_image.run(prep[type(pre_image)]) 943 build_args = { 944 **self.image.build_args, 945 "BUILD_PROFILE": self.image.rd.profile.name, 946 "ARCH_GCC": str(self.image.rd.arch), 947 "ARCH_GO": self.image.rd.arch.go_str(), 948 "CI_SANITIZER": str(self.image.rd.sanitizer), 949 } 950 f = self.write_dockerfile() 951 952 try: 953 spawn.capture(["docker", "buildx", "version"]) 954 except subprocess.CalledProcessError: 955 if push: 956 print( 957 "docker buildx not found, required to push images. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing" 958 ) 959 raise 960 print( 961 "docker buildx not found, you can install it to build faster. Installation: https://github.com/docker/buildx?tab=readme-ov-file#installing" 962 ) 963 print("Falling back to docker build") 964 cmd: Sequence[str] = [ 965 "docker", 966 "build", 967 "-f", 968 "-", 969 *(f"--build-arg={k}={v}" for k, v in build_args.items()), 970 "-t", 971 self.spec(), 972 f"--platform=linux/{self.image.rd.arch.go_str()}", 973 str(self.image.path), 974 ] 975 else: 976 docker_tag = f"docker.io/{self.spec()}" 977 ghcr_tag = f"ghcr.io/materializeinc/{self.spec()}" 978 cmd: Sequence[str] = [ 979 "docker", 980 "buildx", 981 "build", 982 "--progress=plain", # less noisy 983 "-f", 984 "-", 985 *(f"--build-arg={k}={v}" for k, v in build_args.items()), 986 "-t", 987 docker_tag, 988 "-t", 989 ghcr_tag, 990 f"--platform=linux/{self.image.rd.arch.go_str()}", 991 str(self.image.path), 992 "--load", 993 ] 994 995 if token := os.getenv("GITHUB_GHCR_TOKEN"): 996 spawn.runv( 997 [ 998 "docker", 999 "login", 1000 "ghcr.io", 1001 "-u", 1002 "materialize-bot", 1003 "--password-stdin", 1004 ], 1005 stdin=token.encode(), 1006 ) 1007 1008 spawn.runv(cmd, stdin=f, stdout=sys.stderr.buffer) 1009 1010 if push: 1011 # Push to both registries in parallel. With the docker driver, 1012 # the image is already in the local daemon after --load, so 1013 # docker push is the same mechanism buildx --push uses internally. 1014 pending = [docker_tag, ghcr_tag] 1015 for sleep_time in [5, 10, 20, 40, 60, None]: 1016 procs = [ 1017 subprocess.Popen( 1018 ["docker", "push", tag], 1019 stdout=sys.stderr, 1020 stderr=sys.stderr, 1021 ) 1022 for tag in pending 1023 ] 1024 pending = [tag for tag, proc in zip(pending, procs) if proc.wait() != 0] 1025 if not pending: 1026 break 1027 if sleep_time is None: 1028 raise subprocess.CalledProcessError( 1029 1, ["docker", "push", pending[0]] 1030 ) 1031 print(f"docker push failed for {pending}, retrying in {sleep_time}s") 1032 time.sleep(sleep_time) 1033 1034 def try_pull(self, max_retries: int) -> bool: 1035 """Download the image if it does not exist locally. Returns whether it was found.""" 1036 command = ["docker", "pull"] 1037 # --quiet skips printing the progress bar, which does not display well in CI. 1038 if ui.env_is_truthy("CI"): 1039 command.append("--quiet") 1040 command.append(self.spec()) 1041 if not self.acquired: 1042 ui.header(f"Acquiring {self.spec()}") 1043 sleep_time = 1 1044 for retry in range(1, max_retries + 1): 1045 try: 1046 spawn.runv( 1047 command, 1048 stdin=subprocess.DEVNULL, 1049 stdout=sys.stderr.buffer, 1050 ) 1051 self.acquired = True 1052 break 1053 except subprocess.CalledProcessError: 1054 if retry < max_retries: 1055 # There seems to be no good way to tell what error 1056 # happened based on error code 1057 # (https://github.com/docker/cli/issues/538) and we 1058 # want to print output directly to terminal. 1059 if build := os.getenv("CI_WAITING_FOR_BUILD"): 1060 for retry in range(max_retries): 1061 try: 1062 build_status = buildkite.get_build_status(build) 1063 except subprocess.CalledProcessError: 1064 time.sleep(sleep_time) 1065 sleep_time = min(sleep_time * 2, 10) 1066 break 1067 print(f"Build {build} status: {build_status}") 1068 if build_status == "failed": 1069 print( 1070 f"Build {build} has been marked as failed, exiting hard" 1071 ) 1072 sys.exit(1) 1073 elif build_status == "success": 1074 break 1075 assert ( 1076 build_status == "pending" 1077 ), f"Unknown build status {build_status}" 1078 time.sleep(1) 1079 else: 1080 print(f"Retrying in {sleep_time}s ...") 1081 time.sleep(sleep_time) 1082 sleep_time = min(sleep_time * 2, 10) 1083 continue 1084 else: 1085 break 1086 return self.acquired 1087 1088 def is_published_if_necessary(self) -> bool: 1089 """Report whether the image exists on DockerHub & GHCR if it is publishable.""" 1090 if not self.publish: 1091 return False 1092 spec = self.spec() 1093 if spec.startswith(GHCR_PREFIX): 1094 spec = spec.removeprefix(GHCR_PREFIX) 1095 ghcr_spec = f"{GHCR_PREFIX}{spec}" 1096 if is_docker_image_pushed(spec) and is_ghcr_image_pushed(ghcr_spec): 1097 ui.say(f"{spec} already exists") 1098 return True 1099 return False 1100 1101 def run( 1102 self, 1103 args: list[str] = [], 1104 docker_args: list[str] = [], 1105 env: dict[str, str] = {}, 1106 ) -> None: 1107 """Run a command in the image. 1108 1109 Creates a container from the image and runs the command described by 1110 `args` in the image. 1111 """ 1112 envs = [] 1113 for key, val in env.items(): 1114 envs.extend(["--env", f"{key}={val}"]) 1115 spawn.runv( 1116 [ 1117 "docker", 1118 "run", 1119 "--tty", 1120 "--rm", 1121 *envs, 1122 "--init", 1123 *docker_args, 1124 self.spec(), 1125 *args, 1126 ], 1127 ) 1128 1129 def list_dependencies(self, transitive: bool = False) -> set[str]: 1130 out = set() 1131 for dep in self.dependencies.values(): 1132 out.add(dep.name) 1133 if transitive: 1134 out |= dep.list_dependencies(transitive) 1135 return out 1136 1137 @cache 1138 def inputs(self, transitive: bool = False) -> set[str]: 1139 """List the files tracked as inputs to the image. 1140 1141 These files are used to compute the fingerprint for the image. See 1142 `ResolvedImage.fingerprint` for details. 1143 1144 Returns: 1145 inputs: A list of input files, relative to the root of the 1146 repository. 1147 """ 1148 if self.image._context_files_cache is not None: 1149 paths = set(self.image._context_files_cache) 1150 else: 1151 paths = set(git.expand_globs(self.image.rd.root, f"{self.image.path}/**")) 1152 if not paths: 1153 # While we could find an `mzbuild.yml` file for this service, expland_globs didn't 1154 # return any files that matched this service. At the very least, the `mzbuild.yml` 1155 # file itself should have been returned. We have a bug if paths is empty. 1156 raise AssertionError( 1157 f"{self.image.name} mzbuild exists but its files are unknown to git" 1158 ) 1159 for pre_image in self.image.pre_images: 1160 paths |= pre_image.inputs() 1161 if transitive: 1162 for dep in self.dependencies.values(): 1163 paths |= dep.inputs(transitive) 1164 return paths 1165 1166 @cache 1167 def fingerprint(self) -> Fingerprint: 1168 """Fingerprint the inputs to the image. 1169 1170 Compute the fingerprint of the image. Changing the contents of any of 1171 the files or adding or removing files to the image will change the 1172 fingerprint, as will modifying the inputs to any of its dependencies. 1173 1174 The image considers all non-gitignored files in its mzbuild context to 1175 be inputs. If it has a pre-image action, that action may add additional 1176 inputs via `PreImage.inputs`. 1177 """ 1178 self_hash = hashlib.sha1() 1179 # When inputs come from precomputed sources (crate and image context 1180 # batching + resolved CargoPreImage paths), they are already individual 1181 # file paths from git. Skip the expensive expand_globs subprocess calls. 1182 inputs = self.inputs() 1183 if self.image._context_files_cache is not None: 1184 resolved_inputs = sorted(inputs) 1185 else: 1186 resolved_inputs = sorted(set(git.expand_globs(self.image.rd.root, *inputs))) 1187 for rel_path in resolved_inputs: 1188 abs_path = self.image.rd.root / rel_path 1189 file_hash = hashlib.sha1() 1190 raw_file_mode = os.lstat(abs_path).st_mode 1191 # Compute a simplified file mode using the same rules as Git. 1192 # https://github.com/git/git/blob/3bab5d562/Documentation/git-fast-import.txt#L610-L616 1193 if stat.S_ISLNK(raw_file_mode): 1194 file_mode = 0o120000 1195 elif raw_file_mode & stat.S_IXUSR: 1196 file_mode = 0o100755 1197 else: 1198 file_mode = 0o100644 1199 with open(abs_path, "rb") as f: 1200 file_hash.update(f.read()) 1201 self_hash.update(file_mode.to_bytes(2, byteorder="big")) 1202 self_hash.update(rel_path.encode()) 1203 self_hash.update(file_hash.digest()) 1204 self_hash.update(b"\0") 1205 1206 for pre_image in self.image.pre_images: 1207 self_hash.update(pre_image.extra().encode()) 1208 self_hash.update(b"\0") 1209 1210 self_hash.update(f"profile={self.image.rd.profile}".encode()) 1211 self_hash.update(f"arch={self.image.rd.arch}".encode()) 1212 self_hash.update(f"coverage={self.image.rd.coverage}".encode()) 1213 self_hash.update(f"sanitizer={self.image.rd.sanitizer}".encode()) 1214 # This exists to make sure all hashes from before we had a GHCR mirror are invalidated, so that we rebuild when an image doesn't exist on GHCR yet 1215 self_hash.update(b"mirror=ghcr") 1216 1217 full_hash = hashlib.sha1() 1218 full_hash.update(self_hash.digest()) 1219 for dep in sorted(self.dependencies.values(), key=lambda d: d.name): 1220 full_hash.update(dep.name.encode()) 1221 full_hash.update(dep.fingerprint()) 1222 full_hash.update(b"\0") 1223 1224 return Fingerprint(full_hash.digest())
An Image whose dependencies have been resolved.
Attributes:
image: The underlying Image.
acquired: Whether the image is available locally.
dependencies: A mapping from dependency name to ResolvedImage for
each of the images that image depends upon.
877 @property 878 def name(self) -> str: 879 """The name of the underlying image.""" 880 return self.image.name
The name of the underlying image.
882 @property 883 def publish(self) -> bool: 884 """Whether the underlying image should be pushed to Docker Hub.""" 885 return self.image.publish
Whether the underlying image should be pushed to Docker Hub.
887 @cache 888 def spec(self) -> str: 889 """Return the "spec" for the image. 890 891 A spec is the unique identifier for the image given its current 892 fingerprint. It is a valid Docker Hub name. 893 """ 894 return self.image.docker_name(tag=f"mzbuild-{self.fingerprint()}")
Return the "spec" for the image.
A spec is the unique identifier for the image given its current fingerprint. It is a valid Docker Hub name.
896 def write_dockerfile(self) -> IO[bytes]: 897 """Render the Dockerfile without mzbuild directives. 898 899 Returns: 900 file: A handle to a temporary file containing the adjusted 901 Dockerfile.""" 902 with open(self.image.path / "Dockerfile", "rb") as f: 903 lines = f.readlines() 904 f = TemporaryFile() 905 for line in lines: 906 match = Image._DOCKERFILE_MZFROM_RE.match(line) 907 if match: 908 image = match.group(1).decode() 909 spec = self.dependencies[image].spec() 910 line = Image._DOCKERFILE_MZFROM_RE.sub(b"FROM %b" % spec.encode(), line) 911 f.write(line) 912 f.seek(0) 913 return f
Render the Dockerfile without mzbuild directives.
Returns: file: A handle to a temporary file containing the adjusted Dockerfile.
915 def build(self, prep: dict[type[PreImage], Any], push: bool = False) -> None: 916 """Build the image from source. 917 918 Requires that the caller has already acquired all dependencies and 919 prepared all `PreImage` actions via `PreImage.prepare_batch`. 920 """ 921 # Use a file lock to prevent parallel mzcompose processes from 922 # racing on git clean / copy / strip for the same image directory. 923 lock_dir = self.image.rd.root / "target" / "mzbuild-locks" 924 lock_dir.mkdir(parents=True, exist_ok=True) 925 profile = self.image.rd.profile.name.lower() 926 lock_path = lock_dir / f"{self.image.name}-{profile}.lock" 927 with open(lock_path, "w") as lock_file: 928 ui.say(f"Acquiring lock for {self.spec()}") 929 fcntl.flock(lock_file, fcntl.LOCK_EX) 930 try: 931 self._build_locked(prep, push) 932 finally: 933 fcntl.flock(lock_file, fcntl.LOCK_UN)
Build the image from source.
Requires that the caller has already acquired all dependencies and
prepared all PreImage actions via PreImage.prepare_batch.
1034 def try_pull(self, max_retries: int) -> bool: 1035 """Download the image if it does not exist locally. Returns whether it was found.""" 1036 command = ["docker", "pull"] 1037 # --quiet skips printing the progress bar, which does not display well in CI. 1038 if ui.env_is_truthy("CI"): 1039 command.append("--quiet") 1040 command.append(self.spec()) 1041 if not self.acquired: 1042 ui.header(f"Acquiring {self.spec()}") 1043 sleep_time = 1 1044 for retry in range(1, max_retries + 1): 1045 try: 1046 spawn.runv( 1047 command, 1048 stdin=subprocess.DEVNULL, 1049 stdout=sys.stderr.buffer, 1050 ) 1051 self.acquired = True 1052 break 1053 except subprocess.CalledProcessError: 1054 if retry < max_retries: 1055 # There seems to be no good way to tell what error 1056 # happened based on error code 1057 # (https://github.com/docker/cli/issues/538) and we 1058 # want to print output directly to terminal. 1059 if build := os.getenv("CI_WAITING_FOR_BUILD"): 1060 for retry in range(max_retries): 1061 try: 1062 build_status = buildkite.get_build_status(build) 1063 except subprocess.CalledProcessError: 1064 time.sleep(sleep_time) 1065 sleep_time = min(sleep_time * 2, 10) 1066 break 1067 print(f"Build {build} status: {build_status}") 1068 if build_status == "failed": 1069 print( 1070 f"Build {build} has been marked as failed, exiting hard" 1071 ) 1072 sys.exit(1) 1073 elif build_status == "success": 1074 break 1075 assert ( 1076 build_status == "pending" 1077 ), f"Unknown build status {build_status}" 1078 time.sleep(1) 1079 else: 1080 print(f"Retrying in {sleep_time}s ...") 1081 time.sleep(sleep_time) 1082 sleep_time = min(sleep_time * 2, 10) 1083 continue 1084 else: 1085 break 1086 return self.acquired
Download the image if it does not exist locally. Returns whether it was found.
1088 def is_published_if_necessary(self) -> bool: 1089 """Report whether the image exists on DockerHub & GHCR if it is publishable.""" 1090 if not self.publish: 1091 return False 1092 spec = self.spec() 1093 if spec.startswith(GHCR_PREFIX): 1094 spec = spec.removeprefix(GHCR_PREFIX) 1095 ghcr_spec = f"{GHCR_PREFIX}{spec}" 1096 if is_docker_image_pushed(spec) and is_ghcr_image_pushed(ghcr_spec): 1097 ui.say(f"{spec} already exists") 1098 return True 1099 return False
Report whether the image exists on DockerHub & GHCR if it is publishable.
1101 def run( 1102 self, 1103 args: list[str] = [], 1104 docker_args: list[str] = [], 1105 env: dict[str, str] = {}, 1106 ) -> None: 1107 """Run a command in the image. 1108 1109 Creates a container from the image and runs the command described by 1110 `args` in the image. 1111 """ 1112 envs = [] 1113 for key, val in env.items(): 1114 envs.extend(["--env", f"{key}={val}"]) 1115 spawn.runv( 1116 [ 1117 "docker", 1118 "run", 1119 "--tty", 1120 "--rm", 1121 *envs, 1122 "--init", 1123 *docker_args, 1124 self.spec(), 1125 *args, 1126 ], 1127 )
Run a command in the image.
Creates a container from the image and runs the command described by
args in the image.
1137 @cache 1138 def inputs(self, transitive: bool = False) -> set[str]: 1139 """List the files tracked as inputs to the image. 1140 1141 These files are used to compute the fingerprint for the image. See 1142 `ResolvedImage.fingerprint` for details. 1143 1144 Returns: 1145 inputs: A list of input files, relative to the root of the 1146 repository. 1147 """ 1148 if self.image._context_files_cache is not None: 1149 paths = set(self.image._context_files_cache) 1150 else: 1151 paths = set(git.expand_globs(self.image.rd.root, f"{self.image.path}/**")) 1152 if not paths: 1153 # While we could find an `mzbuild.yml` file for this service, expland_globs didn't 1154 # return any files that matched this service. At the very least, the `mzbuild.yml` 1155 # file itself should have been returned. We have a bug if paths is empty. 1156 raise AssertionError( 1157 f"{self.image.name} mzbuild exists but its files are unknown to git" 1158 ) 1159 for pre_image in self.image.pre_images: 1160 paths |= pre_image.inputs() 1161 if transitive: 1162 for dep in self.dependencies.values(): 1163 paths |= dep.inputs(transitive) 1164 return paths
List the files tracked as inputs to the image.
These files are used to compute the fingerprint for the image. See
ResolvedImage.fingerprint for details.
Returns: inputs: A list of input files, relative to the root of the repository.
1166 @cache 1167 def fingerprint(self) -> Fingerprint: 1168 """Fingerprint the inputs to the image. 1169 1170 Compute the fingerprint of the image. Changing the contents of any of 1171 the files or adding or removing files to the image will change the 1172 fingerprint, as will modifying the inputs to any of its dependencies. 1173 1174 The image considers all non-gitignored files in its mzbuild context to 1175 be inputs. If it has a pre-image action, that action may add additional 1176 inputs via `PreImage.inputs`. 1177 """ 1178 self_hash = hashlib.sha1() 1179 # When inputs come from precomputed sources (crate and image context 1180 # batching + resolved CargoPreImage paths), they are already individual 1181 # file paths from git. Skip the expensive expand_globs subprocess calls. 1182 inputs = self.inputs() 1183 if self.image._context_files_cache is not None: 1184 resolved_inputs = sorted(inputs) 1185 else: 1186 resolved_inputs = sorted(set(git.expand_globs(self.image.rd.root, *inputs))) 1187 for rel_path in resolved_inputs: 1188 abs_path = self.image.rd.root / rel_path 1189 file_hash = hashlib.sha1() 1190 raw_file_mode = os.lstat(abs_path).st_mode 1191 # Compute a simplified file mode using the same rules as Git. 1192 # https://github.com/git/git/blob/3bab5d562/Documentation/git-fast-import.txt#L610-L616 1193 if stat.S_ISLNK(raw_file_mode): 1194 file_mode = 0o120000 1195 elif raw_file_mode & stat.S_IXUSR: 1196 file_mode = 0o100755 1197 else: 1198 file_mode = 0o100644 1199 with open(abs_path, "rb") as f: 1200 file_hash.update(f.read()) 1201 self_hash.update(file_mode.to_bytes(2, byteorder="big")) 1202 self_hash.update(rel_path.encode()) 1203 self_hash.update(file_hash.digest()) 1204 self_hash.update(b"\0") 1205 1206 for pre_image in self.image.pre_images: 1207 self_hash.update(pre_image.extra().encode()) 1208 self_hash.update(b"\0") 1209 1210 self_hash.update(f"profile={self.image.rd.profile}".encode()) 1211 self_hash.update(f"arch={self.image.rd.arch}".encode()) 1212 self_hash.update(f"coverage={self.image.rd.coverage}".encode()) 1213 self_hash.update(f"sanitizer={self.image.rd.sanitizer}".encode()) 1214 # This exists to make sure all hashes from before we had a GHCR mirror are invalidated, so that we rebuild when an image doesn't exist on GHCR yet 1215 self_hash.update(b"mirror=ghcr") 1216 1217 full_hash = hashlib.sha1() 1218 full_hash.update(self_hash.digest()) 1219 for dep in sorted(self.dependencies.values(), key=lambda d: d.name): 1220 full_hash.update(dep.name.encode()) 1221 full_hash.update(dep.fingerprint()) 1222 full_hash.update(b"\0") 1223 1224 return Fingerprint(full_hash.digest())
Fingerprint the inputs to the image.
Compute the fingerprint of the image. Changing the contents of any of the files or adding or removing files to the image will change the fingerprint, as will modifying the inputs to any of its dependencies.
The image considers all non-gitignored files in its mzbuild context to
be inputs. If it has a pre-image action, that action may add additional
inputs via PreImage.inputs.
1227class DependencySet: 1228 """A set of `ResolvedImage`s. 1229 1230 Iterating over a dependency set yields the contained images in an arbitrary 1231 order. Indexing a dependency set yields the image with the specified name. 1232 """ 1233 1234 def __init__(self, dependencies: Iterable[Image]): 1235 """Construct a new `DependencySet`. 1236 1237 The provided `dependencies` must be topologically sorted. 1238 """ 1239 self._dependencies: dict[str, ResolvedImage] = {} 1240 dependencies = list(dependencies) 1241 known_images = docker_images() if dependencies else set() 1242 for d in dependencies: 1243 image = ResolvedImage( 1244 image=d, 1245 dependencies=(self._dependencies[d0] for d0 in d.depends_on), 1246 ) 1247 image.acquired = image.spec() in known_images 1248 self._dependencies[d.name] = image 1249 1250 def _prepare_batch(self, images: list[ResolvedImage]) -> dict[type[PreImage], Any]: 1251 pre_images = collections.defaultdict(list) 1252 for image in images: 1253 for pre_image in image.image.pre_images: 1254 pre_images[type(pre_image)].append(pre_image) 1255 pre_image_prep = {} 1256 for cls, instances in pre_images.items(): 1257 pre_image = cast(PreImage, cls) 1258 pre_image_prep[cls] = pre_image.prepare_batch(instances) 1259 return pre_image_prep 1260 1261 def acquire(self, max_retries: int | None = None) -> None: 1262 """Download or build all of the images in the dependency set that do not 1263 already exist locally. 1264 1265 Args: 1266 max_retries: Number of retries on failure. 1267 """ 1268 1269 # Only retry in CI runs since we struggle with flaky docker pulls there 1270 if not max_retries: 1271 max_retries = ( 1272 90 1273 if os.getenv("CI_WAITING_FOR_BUILD") 1274 else ( 1275 5 1276 if ui.env_is_truthy("CI") 1277 and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD") 1278 else 1 1279 ) 1280 ) 1281 assert max_retries > 0 1282 1283 deps_to_check = [dep for dep in self if dep.publish] 1284 deps_to_build = [dep for dep in self if not dep.publish] 1285 if len(deps_to_check): 1286 with ThreadPoolExecutor(max_workers=len(deps_to_check)) as executor: 1287 futures = [ 1288 executor.submit(dep.try_pull, max_retries) for dep in deps_to_check 1289 ] 1290 for dep, future in zip(deps_to_check, futures): 1291 try: 1292 if not future.result(): 1293 deps_to_build.append(dep) 1294 except Exception: 1295 deps_to_build.append(dep) 1296 1297 # Don't attempt to build in CI, as our timeouts and small machines won't allow it anyway 1298 if ui.env_is_truthy("CI") and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD"): 1299 expected_deps = [dep for dep in deps_to_build if dep.publish] 1300 if expected_deps: 1301 print( 1302 f"+++ Expected builds to be available, the build probably failed, so not proceeding: {expected_deps}" 1303 ) 1304 sys.exit(128) 1305 1306 prep = self._prepare_batch(deps_to_build) 1307 for dep in deps_to_build: 1308 dep.build(prep) 1309 1310 def ensure(self, pre_build: Callable[[list[ResolvedImage]], None] | None = None): 1311 """Ensure all publishable images in this dependency set exist on Docker 1312 Hub. 1313 1314 Images are pushed using their spec as their tag. 1315 1316 Args: 1317 pre_build: A callback to invoke with all dependency that are going 1318 to be built locally, invoked after their cargo build is 1319 done, but before the Docker images are build and 1320 uploaded to DockerHub. 1321 """ 1322 num_deps = len(list(self)) 1323 if not num_deps: 1324 deps_to_build = [] 1325 else: 1326 with ThreadPoolExecutor(max_workers=num_deps) as executor: 1327 futures = list( 1328 executor.map( 1329 lambda dep: (dep, not dep.is_published_if_necessary()), self 1330 ) 1331 ) 1332 1333 deps_to_build = [dep for dep, should_build in futures if should_build] 1334 1335 prep = self._prepare_batch(deps_to_build) 1336 if pre_build: 1337 pre_build(deps_to_build) 1338 lock = Lock() 1339 built_deps: set[str] = set([dep.name for dep in self]) - set( 1340 [dep.name for dep in deps_to_build] 1341 ) 1342 1343 def build_dep(dep): 1344 end_time = time.time() + 600 1345 while True: 1346 if time.time() > end_time: 1347 raise TimeoutError( 1348 f"Timed out in {dep.name} waiting for {[dep2 for dep2 in dep.dependencies if dep2 not in built_deps]}" 1349 ) 1350 with lock: 1351 if all(dep2 in built_deps for dep2 in dep.dependencies): 1352 break 1353 time.sleep(0.01) 1354 for attempts_remaining in reversed(range(3)): 1355 try: 1356 dep.build(prep, push=dep.publish) 1357 with lock: 1358 built_deps.add(dep.name) 1359 break 1360 except Exception: 1361 if not dep.publish or attempts_remaining == 0: 1362 raise 1363 1364 if deps_to_build: 1365 with ThreadPoolExecutor(max_workers=len(deps_to_build)) as executor: 1366 futures = [executor.submit(build_dep, dep) for dep in deps_to_build] 1367 for future in as_completed(futures): 1368 future.result() 1369 1370 def check(self) -> bool: 1371 """Check all publishable images in this dependency set exist on Docker 1372 Hub. Don't try to download or build them.""" 1373 num_deps = len(list(self)) 1374 if num_deps == 0: 1375 return True 1376 with ThreadPoolExecutor(max_workers=num_deps) as executor: 1377 results = list( 1378 executor.map(lambda dep: dep.is_published_if_necessary(), list(self)) 1379 ) 1380 return all(results) 1381 1382 def __iter__(self) -> Iterator[ResolvedImage]: 1383 return iter(self._dependencies.values()) 1384 1385 def __getitem__(self, key: str) -> ResolvedImage: 1386 return self._dependencies[key]
A set of ResolvedImages.
Iterating over a dependency set yields the contained images in an arbitrary order. Indexing a dependency set yields the image with the specified name.
1234 def __init__(self, dependencies: Iterable[Image]): 1235 """Construct a new `DependencySet`. 1236 1237 The provided `dependencies` must be topologically sorted. 1238 """ 1239 self._dependencies: dict[str, ResolvedImage] = {} 1240 dependencies = list(dependencies) 1241 known_images = docker_images() if dependencies else set() 1242 for d in dependencies: 1243 image = ResolvedImage( 1244 image=d, 1245 dependencies=(self._dependencies[d0] for d0 in d.depends_on), 1246 ) 1247 image.acquired = image.spec() in known_images 1248 self._dependencies[d.name] = image
Construct a new DependencySet.
The provided dependencies must be topologically sorted.
1261 def acquire(self, max_retries: int | None = None) -> None: 1262 """Download or build all of the images in the dependency set that do not 1263 already exist locally. 1264 1265 Args: 1266 max_retries: Number of retries on failure. 1267 """ 1268 1269 # Only retry in CI runs since we struggle with flaky docker pulls there 1270 if not max_retries: 1271 max_retries = ( 1272 90 1273 if os.getenv("CI_WAITING_FOR_BUILD") 1274 else ( 1275 5 1276 if ui.env_is_truthy("CI") 1277 and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD") 1278 else 1 1279 ) 1280 ) 1281 assert max_retries > 0 1282 1283 deps_to_check = [dep for dep in self if dep.publish] 1284 deps_to_build = [dep for dep in self if not dep.publish] 1285 if len(deps_to_check): 1286 with ThreadPoolExecutor(max_workers=len(deps_to_check)) as executor: 1287 futures = [ 1288 executor.submit(dep.try_pull, max_retries) for dep in deps_to_check 1289 ] 1290 for dep, future in zip(deps_to_check, futures): 1291 try: 1292 if not future.result(): 1293 deps_to_build.append(dep) 1294 except Exception: 1295 deps_to_build.append(dep) 1296 1297 # Don't attempt to build in CI, as our timeouts and small machines won't allow it anyway 1298 if ui.env_is_truthy("CI") and not ui.env_is_truthy("CI_ALLOW_LOCAL_BUILD"): 1299 expected_deps = [dep for dep in deps_to_build if dep.publish] 1300 if expected_deps: 1301 print( 1302 f"+++ Expected builds to be available, the build probably failed, so not proceeding: {expected_deps}" 1303 ) 1304 sys.exit(128) 1305 1306 prep = self._prepare_batch(deps_to_build) 1307 for dep in deps_to_build: 1308 dep.build(prep)
Download or build all of the images in the dependency set that do not already exist locally.
Args: max_retries: Number of retries on failure.
1310 def ensure(self, pre_build: Callable[[list[ResolvedImage]], None] | None = None): 1311 """Ensure all publishable images in this dependency set exist on Docker 1312 Hub. 1313 1314 Images are pushed using their spec as their tag. 1315 1316 Args: 1317 pre_build: A callback to invoke with all dependency that are going 1318 to be built locally, invoked after their cargo build is 1319 done, but before the Docker images are build and 1320 uploaded to DockerHub. 1321 """ 1322 num_deps = len(list(self)) 1323 if not num_deps: 1324 deps_to_build = [] 1325 else: 1326 with ThreadPoolExecutor(max_workers=num_deps) as executor: 1327 futures = list( 1328 executor.map( 1329 lambda dep: (dep, not dep.is_published_if_necessary()), self 1330 ) 1331 ) 1332 1333 deps_to_build = [dep for dep, should_build in futures if should_build] 1334 1335 prep = self._prepare_batch(deps_to_build) 1336 if pre_build: 1337 pre_build(deps_to_build) 1338 lock = Lock() 1339 built_deps: set[str] = set([dep.name for dep in self]) - set( 1340 [dep.name for dep in deps_to_build] 1341 ) 1342 1343 def build_dep(dep): 1344 end_time = time.time() + 600 1345 while True: 1346 if time.time() > end_time: 1347 raise TimeoutError( 1348 f"Timed out in {dep.name} waiting for {[dep2 for dep2 in dep.dependencies if dep2 not in built_deps]}" 1349 ) 1350 with lock: 1351 if all(dep2 in built_deps for dep2 in dep.dependencies): 1352 break 1353 time.sleep(0.01) 1354 for attempts_remaining in reversed(range(3)): 1355 try: 1356 dep.build(prep, push=dep.publish) 1357 with lock: 1358 built_deps.add(dep.name) 1359 break 1360 except Exception: 1361 if not dep.publish or attempts_remaining == 0: 1362 raise 1363 1364 if deps_to_build: 1365 with ThreadPoolExecutor(max_workers=len(deps_to_build)) as executor: 1366 futures = [executor.submit(build_dep, dep) for dep in deps_to_build] 1367 for future in as_completed(futures): 1368 future.result()
Ensure all publishable images in this dependency set exist on Docker Hub.
Images are pushed using their spec as their tag.
Args: pre_build: A callback to invoke with all dependency that are going to be built locally, invoked after their cargo build is done, but before the Docker images are build and uploaded to DockerHub.
1370 def check(self) -> bool: 1371 """Check all publishable images in this dependency set exist on Docker 1372 Hub. Don't try to download or build them.""" 1373 num_deps = len(list(self)) 1374 if num_deps == 0: 1375 return True 1376 with ThreadPoolExecutor(max_workers=num_deps) as executor: 1377 results = list( 1378 executor.map(lambda dep: dep.is_published_if_necessary(), list(self)) 1379 ) 1380 return all(results)
Check all publishable images in this dependency set exist on Docker Hub. Don't try to download or build them.
1389class Repository: 1390 """A collection of mzbuild `Image`s. 1391 1392 Creating a repository will walk the filesystem beneath `root` to 1393 automatically discover all contained `Image`s. 1394 1395 Iterating over a repository yields the contained images in an arbitrary 1396 order. 1397 1398 Args: 1399 root: The path to the root of the repository. 1400 arch: The CPU architecture to build for. 1401 profile: What profile to build the repository in. 1402 coverage: Whether to enable code coverage instrumentation. 1403 sanitizer: Whether to a sanitizer (address, thread, leak, memory, none) 1404 image_registry: The Docker image registry to pull images from and push 1405 images to. 1406 image_prefix: A prefix to apply to all Docker image names. 1407 1408 Attributes: 1409 images: A mapping from image name to `Image` for all contained images. 1410 compose_dirs: The set of directories containing a `mzcompose.py` file. 1411 """ 1412 1413 def __init__( 1414 self, 1415 root: Path, 1416 arch: Arch = Arch.host(), 1417 profile: Profile = ( 1418 Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED 1419 ), 1420 coverage: bool = False, 1421 sanitizer: Sanitizer = Sanitizer.none, 1422 image_registry: str = image_registry(), 1423 image_prefix: str = "", 1424 ): 1425 self.rd = RepositoryDetails( 1426 root, 1427 arch, 1428 profile, 1429 coverage, 1430 sanitizer, 1431 image_registry, 1432 image_prefix, 1433 ) 1434 self.images: dict[str, Image] = {} 1435 self.compositions: dict[str, Path] = {} 1436 for rel_path_s in sorted( 1437 git.expand_globs(self.root, "**/mzbuild.yml", "**/mzcompose.py") 1438 ): 1439 rel_path = Path(rel_path_s) 1440 if rel_path.parts[:2] == ("misc", "python"): 1441 continue 1442 1443 parent = self.root / rel_path.parent 1444 if rel_path.name == "mzbuild.yml": 1445 image = Image(self.rd, parent) 1446 if not image.name: 1447 raise ValueError(f"config at {parent} missing name") 1448 if image.name in self.images: 1449 raise ValueError(f"image {image.name} exists twice") 1450 self.images[image.name] = image 1451 elif rel_path.name == "mzcompose.py": 1452 name = parent.name 1453 if name in self.compositions: 1454 raise ValueError(f"composition {name} exists twice") 1455 self.compositions[name] = parent 1456 1457 # Validate dependencies. 1458 for image in self.images.values(): 1459 for d in image.depends_on: 1460 if d not in self.images: 1461 raise ValueError( 1462 f"image {image.name} depends on non-existent image {d}" 1463 ) 1464 1465 @staticmethod 1466 def install_arguments(parser: argparse.ArgumentParser) -> None: 1467 """Install options to configure a repository into an argparse parser. 1468 1469 This function installs the following options: 1470 1471 * The mutually-exclusive `--dev`/`--optimized`/`--release` options to control the 1472 `profile` repository attribute. 1473 * The `--coverage` boolean option to control the `coverage` repository 1474 attribute. 1475 1476 Use `Repository.from_arguments` to construct a repository from the 1477 parsed command-line arguments. 1478 """ 1479 build_mode = parser.add_mutually_exclusive_group() 1480 build_mode.add_argument( 1481 "--dev", 1482 action="store_true", 1483 help="build Rust binaries with the dev profile", 1484 ) 1485 build_mode.add_argument( 1486 "--release", 1487 action="store_true", 1488 help="build Rust binaries with the release profile (default)", 1489 ) 1490 build_mode.add_argument( 1491 "--optimized", 1492 action="store_true", 1493 help="build Rust binaries with the optimized profile (optimizations, no LTO, no debug symbols)", 1494 ) 1495 parser.add_argument( 1496 "--coverage", 1497 help="whether to enable code coverage compilation flags", 1498 default=ui.env_is_truthy("CI_COVERAGE_ENABLED"), 1499 action="store_true", 1500 ) 1501 parser.add_argument( 1502 "--sanitizer", 1503 help="whether to enable a sanitizer", 1504 default=Sanitizer[os.getenv("CI_SANITIZER", "none")], 1505 type=Sanitizer, 1506 choices=Sanitizer, 1507 ) 1508 1509 def _parse_arch(s: str) -> Arch: 1510 try: 1511 return Arch(s) 1512 except ValueError: 1513 valid = ", ".join(m.value for m in Arch) 1514 raise argparse.ArgumentTypeError( 1515 f"invalid arch: {s!r} (choose from {valid})" 1516 ) 1517 1518 parser.add_argument( 1519 "--arch", 1520 default=Arch.host(), 1521 help="the CPU architecture to build for", 1522 type=_parse_arch, 1523 metavar="{" + ",".join(m.value for m in Arch) + "}", 1524 ) 1525 parser.add_argument( 1526 "--image-registry", 1527 default=image_registry(), 1528 help="the Docker image registry to pull images from and push images to", 1529 ) 1530 parser.add_argument( 1531 "--image-prefix", 1532 default="", 1533 help="a prefix to apply to all Docker image names", 1534 ) 1535 1536 @classmethod 1537 def from_arguments(cls, root: Path, args: argparse.Namespace) -> "Repository": 1538 """Construct a repository from command-line arguments. 1539 1540 The provided namespace must contain the options installed by 1541 `Repository.install_arguments`. 1542 """ 1543 if args.release: 1544 profile = Profile.RELEASE 1545 elif args.optimized: 1546 profile = Profile.OPTIMIZED 1547 elif args.dev: 1548 profile = Profile.DEV 1549 else: 1550 profile = ( 1551 Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED 1552 ) 1553 1554 return cls( 1555 root, 1556 profile=profile, 1557 coverage=args.coverage, 1558 sanitizer=args.sanitizer, 1559 image_registry=args.image_registry, 1560 image_prefix=args.image_prefix, 1561 arch=args.arch, 1562 ) 1563 1564 @property 1565 def root(self) -> Path: 1566 """The path to the root directory for the repository.""" 1567 return self.rd.root 1568 1569 def resolve_dependencies(self, targets: Iterable[Image]) -> DependencySet: 1570 """Compute the dependency set necessary to build target images. 1571 1572 The dependencies of `targets` will be crawled recursively until the 1573 complete set of transitive dependencies is determined or a circular 1574 dependency is discovered. The returned dependency set will be sorted 1575 in topological order. 1576 1577 Raises: 1578 ValueError: A circular dependency was discovered in the images 1579 in the repository. 1580 """ 1581 # Pre-fetch all crate input files in a single batched git call, 1582 # replacing ~118 individual subprocess pairs with one pair. 1583 self.rd.cargo_workspace.precompute_crate_inputs() 1584 # Pre-fetch all image context files in a single batched git call, 1585 # replacing ~41 individual subprocess pairs with one pair. 1586 self._precompute_image_context_files() 1587 1588 resolved = OrderedDict() 1589 visiting = set() 1590 1591 def visit(image: Image, path: list[str] = []) -> None: 1592 if image.name in resolved: 1593 return 1594 if image.name in visiting: 1595 diagram = " -> ".join(path + [image.name]) 1596 raise ValueError(f"circular dependency in mzbuild: {diagram}") 1597 1598 visiting.add(image.name) 1599 for d in sorted(image.depends_on): 1600 visit(self.images[d], path + [image.name]) 1601 resolved[image.name] = image 1602 1603 for target_image in sorted(targets, key=lambda image: image.name): 1604 visit(target_image) 1605 1606 return DependencySet(resolved.values()) 1607 1608 def _precompute_image_context_files(self) -> None: 1609 """Pre-fetch all image context files in a single batched git call. 1610 1611 This replaces ~41 individual pairs of git subprocess calls (one per 1612 image) with a single pair, then partitions the results by image path. 1613 """ 1614 root = self.rd.root 1615 # Use paths relative to root for git specs and partitioning, since 1616 # git --relative outputs paths relative to cwd (root). Image paths 1617 # may be absolute when MZ_ROOT is an absolute path. 1618 image_rel_paths = sorted( 1619 set(str(img.path.relative_to(root)) for img in self.images.values()) 1620 ) 1621 specs = [f"{p}/**" for p in image_rel_paths] 1622 1623 empty_tree = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" 1624 diff_files = spawn.capture( 1625 ["git", "diff", "--name-only", "-z", "--relative", empty_tree, "--"] 1626 + specs, 1627 cwd=root, 1628 ) 1629 ls_files = spawn.capture( 1630 ["git", "ls-files", "--others", "--exclude-standard", "-z", "--"] + specs, 1631 cwd=root, 1632 ) 1633 all_files = set( 1634 f for f in (diff_files + ls_files).split("\0") if f.strip() != "" 1635 ) 1636 1637 # Partition files by image path (longest match first for nested paths) 1638 image_file_map: dict[str, set[str]] = {p: set() for p in image_rel_paths} 1639 sorted_paths = sorted(image_rel_paths, key=len, reverse=True) 1640 for f in all_files: 1641 for ip in sorted_paths: 1642 if f.startswith(ip + "/"): 1643 image_file_map[ip].add(f) 1644 break 1645 1646 for img in self.images.values(): 1647 rel = str(img.path.relative_to(root)) 1648 img._context_files_cache = image_file_map.get(rel, set()) 1649 1650 def __iter__(self) -> Iterator[Image]: 1651 return iter(self.images.values())
A collection of mzbuild Images.
Creating a repository will walk the filesystem beneath root to
automatically discover all contained Images.
Iterating over a repository yields the contained images in an arbitrary order.
Args: root: The path to the root of the repository. arch: The CPU architecture to build for. profile: What profile to build the repository in. coverage: Whether to enable code coverage instrumentation. sanitizer: Whether to a sanitizer (address, thread, leak, memory, none) image_registry: The Docker image registry to pull images from and push images to. image_prefix: A prefix to apply to all Docker image names.
Attributes:
images: A mapping from image name to Image for all contained images.
compose_dirs: The set of directories containing a mzcompose.py file.
1413 def __init__( 1414 self, 1415 root: Path, 1416 arch: Arch = Arch.host(), 1417 profile: Profile = ( 1418 Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED 1419 ), 1420 coverage: bool = False, 1421 sanitizer: Sanitizer = Sanitizer.none, 1422 image_registry: str = image_registry(), 1423 image_prefix: str = "", 1424 ): 1425 self.rd = RepositoryDetails( 1426 root, 1427 arch, 1428 profile, 1429 coverage, 1430 sanitizer, 1431 image_registry, 1432 image_prefix, 1433 ) 1434 self.images: dict[str, Image] = {} 1435 self.compositions: dict[str, Path] = {} 1436 for rel_path_s in sorted( 1437 git.expand_globs(self.root, "**/mzbuild.yml", "**/mzcompose.py") 1438 ): 1439 rel_path = Path(rel_path_s) 1440 if rel_path.parts[:2] == ("misc", "python"): 1441 continue 1442 1443 parent = self.root / rel_path.parent 1444 if rel_path.name == "mzbuild.yml": 1445 image = Image(self.rd, parent) 1446 if not image.name: 1447 raise ValueError(f"config at {parent} missing name") 1448 if image.name in self.images: 1449 raise ValueError(f"image {image.name} exists twice") 1450 self.images[image.name] = image 1451 elif rel_path.name == "mzcompose.py": 1452 name = parent.name 1453 if name in self.compositions: 1454 raise ValueError(f"composition {name} exists twice") 1455 self.compositions[name] = parent 1456 1457 # Validate dependencies. 1458 for image in self.images.values(): 1459 for d in image.depends_on: 1460 if d not in self.images: 1461 raise ValueError( 1462 f"image {image.name} depends on non-existent image {d}" 1463 )
1465 @staticmethod 1466 def install_arguments(parser: argparse.ArgumentParser) -> None: 1467 """Install options to configure a repository into an argparse parser. 1468 1469 This function installs the following options: 1470 1471 * The mutually-exclusive `--dev`/`--optimized`/`--release` options to control the 1472 `profile` repository attribute. 1473 * The `--coverage` boolean option to control the `coverage` repository 1474 attribute. 1475 1476 Use `Repository.from_arguments` to construct a repository from the 1477 parsed command-line arguments. 1478 """ 1479 build_mode = parser.add_mutually_exclusive_group() 1480 build_mode.add_argument( 1481 "--dev", 1482 action="store_true", 1483 help="build Rust binaries with the dev profile", 1484 ) 1485 build_mode.add_argument( 1486 "--release", 1487 action="store_true", 1488 help="build Rust binaries with the release profile (default)", 1489 ) 1490 build_mode.add_argument( 1491 "--optimized", 1492 action="store_true", 1493 help="build Rust binaries with the optimized profile (optimizations, no LTO, no debug symbols)", 1494 ) 1495 parser.add_argument( 1496 "--coverage", 1497 help="whether to enable code coverage compilation flags", 1498 default=ui.env_is_truthy("CI_COVERAGE_ENABLED"), 1499 action="store_true", 1500 ) 1501 parser.add_argument( 1502 "--sanitizer", 1503 help="whether to enable a sanitizer", 1504 default=Sanitizer[os.getenv("CI_SANITIZER", "none")], 1505 type=Sanitizer, 1506 choices=Sanitizer, 1507 ) 1508 1509 def _parse_arch(s: str) -> Arch: 1510 try: 1511 return Arch(s) 1512 except ValueError: 1513 valid = ", ".join(m.value for m in Arch) 1514 raise argparse.ArgumentTypeError( 1515 f"invalid arch: {s!r} (choose from {valid})" 1516 ) 1517 1518 parser.add_argument( 1519 "--arch", 1520 default=Arch.host(), 1521 help="the CPU architecture to build for", 1522 type=_parse_arch, 1523 metavar="{" + ",".join(m.value for m in Arch) + "}", 1524 ) 1525 parser.add_argument( 1526 "--image-registry", 1527 default=image_registry(), 1528 help="the Docker image registry to pull images from and push images to", 1529 ) 1530 parser.add_argument( 1531 "--image-prefix", 1532 default="", 1533 help="a prefix to apply to all Docker image names", 1534 )
Install options to configure a repository into an argparse parser.
This function installs the following options:
- The mutually-exclusive
--dev/--optimized/--releaseoptions to control theprofilerepository attribute. - The
--coverageboolean option to control thecoveragerepository attribute.
Use Repository.from_arguments to construct a repository from the
parsed command-line arguments.
1536 @classmethod 1537 def from_arguments(cls, root: Path, args: argparse.Namespace) -> "Repository": 1538 """Construct a repository from command-line arguments. 1539 1540 The provided namespace must contain the options installed by 1541 `Repository.install_arguments`. 1542 """ 1543 if args.release: 1544 profile = Profile.RELEASE 1545 elif args.optimized: 1546 profile = Profile.OPTIMIZED 1547 elif args.dev: 1548 profile = Profile.DEV 1549 else: 1550 profile = ( 1551 Profile.RELEASE if ui.env_is_truthy("CI_LTO") else Profile.OPTIMIZED 1552 ) 1553 1554 return cls( 1555 root, 1556 profile=profile, 1557 coverage=args.coverage, 1558 sanitizer=args.sanitizer, 1559 image_registry=args.image_registry, 1560 image_prefix=args.image_prefix, 1561 arch=args.arch, 1562 )
Construct a repository from command-line arguments.
The provided namespace must contain the options installed by
Repository.install_arguments.
1564 @property 1565 def root(self) -> Path: 1566 """The path to the root directory for the repository.""" 1567 return self.rd.root
The path to the root directory for the repository.
1569 def resolve_dependencies(self, targets: Iterable[Image]) -> DependencySet: 1570 """Compute the dependency set necessary to build target images. 1571 1572 The dependencies of `targets` will be crawled recursively until the 1573 complete set of transitive dependencies is determined or a circular 1574 dependency is discovered. The returned dependency set will be sorted 1575 in topological order. 1576 1577 Raises: 1578 ValueError: A circular dependency was discovered in the images 1579 in the repository. 1580 """ 1581 # Pre-fetch all crate input files in a single batched git call, 1582 # replacing ~118 individual subprocess pairs with one pair. 1583 self.rd.cargo_workspace.precompute_crate_inputs() 1584 # Pre-fetch all image context files in a single batched git call, 1585 # replacing ~41 individual subprocess pairs with one pair. 1586 self._precompute_image_context_files() 1587 1588 resolved = OrderedDict() 1589 visiting = set() 1590 1591 def visit(image: Image, path: list[str] = []) -> None: 1592 if image.name in resolved: 1593 return 1594 if image.name in visiting: 1595 diagram = " -> ".join(path + [image.name]) 1596 raise ValueError(f"circular dependency in mzbuild: {diagram}") 1597 1598 visiting.add(image.name) 1599 for d in sorted(image.depends_on): 1600 visit(self.images[d], path + [image.name]) 1601 resolved[image.name] = image 1602 1603 for target_image in sorted(targets, key=lambda image: image.name): 1604 visit(target_image) 1605 1606 return DependencySet(resolved.values())
Compute the dependency set necessary to build target images.
The dependencies of targets will be crawled recursively until the
complete set of transitive dependencies is determined or a circular
dependency is discovered. The returned dependency set will be sorted
in topological order.
Raises: ValueError: A circular dependency was discovered in the images in the repository.
1654def publish_multiarch_images( 1655 tag: str, dependency_sets: Iterable[Iterable[ResolvedImage]] 1656) -> None: 1657 """Publishes a set of docker images under a given tag.""" 1658 always_push_tags = ("latest", "unstable") 1659 if ghcr_token := os.getenv("GITHUB_GHCR_TOKEN"): 1660 spawn.runv( 1661 [ 1662 "docker", 1663 "login", 1664 "ghcr.io", 1665 "-u", 1666 "materialize-bot", 1667 "--password-stdin", 1668 ], 1669 stdin=ghcr_token.encode(), 1670 ) 1671 for images in zip(*dependency_sets): 1672 names = set(image.image.name for image in images) 1673 assert len(names) == 1, "dependency sets did not contain identical images" 1674 name = images[0].image.docker_name(tag) 1675 if tag in always_push_tags or not is_docker_image_pushed(name): 1676 spawn.run_with_retries( 1677 lambda: ( 1678 spawn.runv( 1679 [ 1680 "docker", 1681 "manifest", 1682 "create", 1683 "--amend", 1684 name, 1685 *(image.spec() for image in images), 1686 ] 1687 ), 1688 spawn.runv(["docker", "manifest", "push", name]), 1689 ) 1690 ) 1691 1692 ghcr_name = f"{GHCR_PREFIX}{name}" 1693 if ghcr_token and ( 1694 tag in always_push_tags or not is_ghcr_image_pushed(ghcr_name) 1695 ): 1696 spawn.run_with_retries( 1697 lambda: ( 1698 spawn.runv( 1699 [ 1700 "docker", 1701 "manifest", 1702 "create", 1703 "--amend", 1704 ghcr_name, 1705 *(f"{GHCR_PREFIX}{image.spec()}" for image in images), 1706 ] 1707 ), 1708 spawn.runv(["docker", "manifest", "push", ghcr_name]), 1709 ) 1710 ) 1711 print(f"--- Nofifying for tag {tag}") 1712 markdown = f"""Pushed images with Docker tag `{tag}`""" 1713 spawn.runv( 1714 [ 1715 "buildkite-agent", 1716 "annotate", 1717 "--style=info", 1718 f"--context=build-tags-{tag}", 1719 ], 1720 stdin=markdown.encode(), 1721 )
Publishes a set of docker images under a given tag.