misc.python.materialize.bazel

  1# Copyright Materialize, Inc. and contributors. All rights reserved.
  2#
  3# Use of this software is governed by the Business Source License
  4# included in the LICENSE file at the root of this repository.
  5#
  6# As of the Change Date specified in that file, in accordance with
  7# the Business Source License, use of this software will be governed
  8# by the Apache License, Version 2.0.
  9
 10import hashlib
 11import os
 12import pathlib
 13import subprocess
 14from enum import Enum
 15
 16import requests
 17
 18from materialize import MZ_ROOT, ui
 19from materialize.build_config import BuildConfig
 20from materialize.teleport import TeleportProxy
 21
 22"""Utilities for interacting with Bazel from python scripts"""
 23
 24# Path where we put the current revision of the repo that we can side channel
 25# into Bazel.
 26MZ_GIT_HASH_FILE = "/tmp/mz_git_hash.txt"
 27
 28
 29def output_paths(target, options=[]) -> list[pathlib.Path]:
 30    """Returns the absolute path of outputs from the built Bazel target."""
 31
 32    cmd_args = ["bazel", "cquery", f"{target}", *options, "--output=files"]
 33    paths = subprocess.check_output(
 34        cmd_args, text=True, stderr=subprocess.DEVNULL
 35    ).splitlines()
 36    return [pathlib.Path(path) for path in paths]
 37
 38
 39def write_git_hash():
 40    """
 41    Temporary file where we write the current git hash, so we can side channel
 42    it into Bazel.
 43
 44    For production releases we stamp builds with the `workspace_status_command`
 45    but this workflow is not friendly to remote caching. Specifically, the
 46    "volatile status" of a workspace is not supposed to cause builds to get
 47    invalidated, and it doesn't when the result is cached locally, but it does
 48    when it's cached remotely.
 49
 50    See: <https://bazel.build/docs/user-manual#workspace-status>
 51         <https://github.com/bazelbuild/bazel/issues/10075>
 52    """
 53
 54    repo = MZ_ROOT / ".git"
 55    cmd_args = ["git", f"--git-dir={repo}", "rev-parse", "HEAD"]
 56    result = subprocess.run(
 57        cmd_args, text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL
 58    )
 59
 60    if result.returncode == 0:
 61        with open(MZ_GIT_HASH_FILE, "w") as f:
 62            f.write(result.stdout.strip())
 63    else:
 64        ui.warn(f"Failed to get current revision of {MZ_ROOT}, falling back to all 0s")
 65
 66
 67def calc_ingerity(path) -> str:
 68    """
 69    Calculate the 'integrity' for a given file.
 70
 71    'integrity' is a hash of the file used in rules like 'http_archive'.
 72
 73    See: <https://bazel.build/rules/lib/repo/http#http_archive-integrity>
 74    """
 75
 76    digest = subprocess.run(
 77        ["openssl", "dgst", "-sha256", "-binary", str(path)], stdout=subprocess.PIPE
 78    )
 79    base64 = subprocess.run(
 80        ["openssl", "base64", "-A"], input=digest.stdout, stdout=subprocess.PIPE
 81    )
 82    formatted = subprocess.run(
 83        ["sed", "s/^/sha256-/"], input=base64.stdout, stdout=subprocess.PIPE
 84    )
 85
 86    return formatted.stdout.decode("utf-8")
 87
 88
 89def toolchain_hashes(stable, nightly) -> dict[str, dict[str, str]]:
 90    """
 91    Generates the hashes for our Bazel toolchains.
 92
 93    Fetches the specified Stable and Nightly version of the Rust compiler from our toolchains repo,
 94    hashes the downloaded files, and returns a properly formatted dictionary for Bazel.
 95    """
 96
 97    ARCHS = [
 98        "aarch64-apple-darwin",
 99        "aarch64-unknown-linux-gnu",
100        "x86_64-apple-darwin",
101        "x86_64-unknown-linux-gnu",
102    ]
103    TOOLS = [
104        "cargo",
105        "clippy",
106        "llvm-tools",
107        "rust-std",
108        "rustc",
109    ]
110    VERSIONS = {"stable": stable, "nightly": nightly}
111    URL_TEMPLATE = "https://github.com/MaterializeInc/toolchains/releases/download/rust-{version}/{tool}-{channel}-{arch}.tar.zst"
112
113    hashes = {}
114
115    for arch in ARCHS:
116        hashes[arch] = {}
117        for channel, version in VERSIONS.items():
118            hashes[arch][channel] = {}
119            for tool in TOOLS:
120                if channel == "stable":
121                    url_channel = version
122                else:
123                    url_channel = channel
124
125                print(f"Processing {tool} {version} {arch}")
126
127                # Download the file.
128                url = URL_TEMPLATE.format(
129                    version=version, tool=tool, channel=url_channel, arch=arch
130                )
131                response = requests.get(url, stream=True)
132                response.raise_for_status()
133
134                # Hash the response.
135                sha256_hash = hashlib.sha256()
136                for chunk in response.iter_content(chunk_size=8192):
137                    if chunk:
138                        sha256_hash.update(chunk)
139                hashes[arch][channel][tool] = sha256_hash.hexdigest()
140
141    return hashes
142
143
144def remote_cache_arg(config: BuildConfig) -> list[str]:
145    """List of arguments that could possibly enable use of a remote cache."""
146
147    ci_remote = os.getenv("CI_BAZEL_REMOTE_CACHE")
148    config_remote = config.bazel.remote_cache
149
150    if ci_remote:
151        remote_cache = ci_remote
152    elif config_remote:
153        bazel_remote = RemoteCache(config_remote)
154        remote_cache = bazel_remote.address()
155    else:
156        remote_cache = None
157
158    if remote_cache:
159        return [f"--remote_cache={remote_cache}"]
160    else:
161        return []
162
163
164class RemoteCache:
165    """The remote cache we're conecting to."""
166
167    def __init__(self, value: str):
168        if value.startswith("teleport"):
169            app_name = value.split(":")[1]
170            self.kind = RemoteCacheKind.teleport
171            self.data = app_name
172        else:
173            self.kind = RemoteCacheKind.normal
174            self.data = value
175
176    def address(self) -> str:
177        """Address for connecting to this remote cache."""
178        if self.kind == RemoteCacheKind.normal:
179            return self.data
180        else:
181            TeleportProxy.spawn(self.data, "6889")
182            return "http://localhost:6889"
183
184
185class RemoteCacheKind(Enum):
186    """Kind of remote cache we're connecting to."""
187
188    teleport = "teleport"
189    """Connecting to a remote cache through a teleport proxy."""
190
191    normal = "normal"
192    """An HTTP address for the cache."""
193
194    def __str__(self):
195        return self.value
MZ_GIT_HASH_FILE = '/tmp/mz_git_hash.txt'
def output_paths(target, options=[]) -> list[pathlib.Path]:
30def output_paths(target, options=[]) -> list[pathlib.Path]:
31    """Returns the absolute path of outputs from the built Bazel target."""
32
33    cmd_args = ["bazel", "cquery", f"{target}", *options, "--output=files"]
34    paths = subprocess.check_output(
35        cmd_args, text=True, stderr=subprocess.DEVNULL
36    ).splitlines()
37    return [pathlib.Path(path) for path in paths]

Returns the absolute path of outputs from the built Bazel target.

def write_git_hash():
40def write_git_hash():
41    """
42    Temporary file where we write the current git hash, so we can side channel
43    it into Bazel.
44
45    For production releases we stamp builds with the `workspace_status_command`
46    but this workflow is not friendly to remote caching. Specifically, the
47    "volatile status" of a workspace is not supposed to cause builds to get
48    invalidated, and it doesn't when the result is cached locally, but it does
49    when it's cached remotely.
50
51    See: <https://bazel.build/docs/user-manual#workspace-status>
52         <https://github.com/bazelbuild/bazel/issues/10075>
53    """
54
55    repo = MZ_ROOT / ".git"
56    cmd_args = ["git", f"--git-dir={repo}", "rev-parse", "HEAD"]
57    result = subprocess.run(
58        cmd_args, text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL
59    )
60
61    if result.returncode == 0:
62        with open(MZ_GIT_HASH_FILE, "w") as f:
63            f.write(result.stdout.strip())
64    else:
65        ui.warn(f"Failed to get current revision of {MZ_ROOT}, falling back to all 0s")

Temporary file where we write the current git hash, so we can side channel it into Bazel.

For production releases we stamp builds with the workspace_status_command but this workflow is not friendly to remote caching. Specifically, the "volatile status" of a workspace is not supposed to cause builds to get invalidated, and it doesn't when the result is cached locally, but it does when it's cached remotely.

See: https://bazel.build/docs/user-manual#workspace-status https://github.com/bazelbuild/bazel/issues/10075

def calc_ingerity(path) -> str:
68def calc_ingerity(path) -> str:
69    """
70    Calculate the 'integrity' for a given file.
71
72    'integrity' is a hash of the file used in rules like 'http_archive'.
73
74    See: <https://bazel.build/rules/lib/repo/http#http_archive-integrity>
75    """
76
77    digest = subprocess.run(
78        ["openssl", "dgst", "-sha256", "-binary", str(path)], stdout=subprocess.PIPE
79    )
80    base64 = subprocess.run(
81        ["openssl", "base64", "-A"], input=digest.stdout, stdout=subprocess.PIPE
82    )
83    formatted = subprocess.run(
84        ["sed", "s/^/sha256-/"], input=base64.stdout, stdout=subprocess.PIPE
85    )
86
87    return formatted.stdout.decode("utf-8")

Calculate the 'integrity' for a given file.

'integrity' is a hash of the file used in rules like 'http_archive'.

See: https://bazel.build/rules/lib/repo/http#http_archive-integrity

def toolchain_hashes(stable, nightly) -> dict[str, dict[str, str]]:
 90def toolchain_hashes(stable, nightly) -> dict[str, dict[str, str]]:
 91    """
 92    Generates the hashes for our Bazel toolchains.
 93
 94    Fetches the specified Stable and Nightly version of the Rust compiler from our toolchains repo,
 95    hashes the downloaded files, and returns a properly formatted dictionary for Bazel.
 96    """
 97
 98    ARCHS = [
 99        "aarch64-apple-darwin",
100        "aarch64-unknown-linux-gnu",
101        "x86_64-apple-darwin",
102        "x86_64-unknown-linux-gnu",
103    ]
104    TOOLS = [
105        "cargo",
106        "clippy",
107        "llvm-tools",
108        "rust-std",
109        "rustc",
110    ]
111    VERSIONS = {"stable": stable, "nightly": nightly}
112    URL_TEMPLATE = "https://github.com/MaterializeInc/toolchains/releases/download/rust-{version}/{tool}-{channel}-{arch}.tar.zst"
113
114    hashes = {}
115
116    for arch in ARCHS:
117        hashes[arch] = {}
118        for channel, version in VERSIONS.items():
119            hashes[arch][channel] = {}
120            for tool in TOOLS:
121                if channel == "stable":
122                    url_channel = version
123                else:
124                    url_channel = channel
125
126                print(f"Processing {tool} {version} {arch}")
127
128                # Download the file.
129                url = URL_TEMPLATE.format(
130                    version=version, tool=tool, channel=url_channel, arch=arch
131                )
132                response = requests.get(url, stream=True)
133                response.raise_for_status()
134
135                # Hash the response.
136                sha256_hash = hashlib.sha256()
137                for chunk in response.iter_content(chunk_size=8192):
138                    if chunk:
139                        sha256_hash.update(chunk)
140                hashes[arch][channel][tool] = sha256_hash.hexdigest()
141
142    return hashes

Generates the hashes for our Bazel toolchains.

Fetches the specified Stable and Nightly version of the Rust compiler from our toolchains repo, hashes the downloaded files, and returns a properly formatted dictionary for Bazel.

def remote_cache_arg(config: materialize.build_config.BuildConfig) -> list[str]:
145def remote_cache_arg(config: BuildConfig) -> list[str]:
146    """List of arguments that could possibly enable use of a remote cache."""
147
148    ci_remote = os.getenv("CI_BAZEL_REMOTE_CACHE")
149    config_remote = config.bazel.remote_cache
150
151    if ci_remote:
152        remote_cache = ci_remote
153    elif config_remote:
154        bazel_remote = RemoteCache(config_remote)
155        remote_cache = bazel_remote.address()
156    else:
157        remote_cache = None
158
159    if remote_cache:
160        return [f"--remote_cache={remote_cache}"]
161    else:
162        return []

List of arguments that could possibly enable use of a remote cache.

class RemoteCache:
165class RemoteCache:
166    """The remote cache we're conecting to."""
167
168    def __init__(self, value: str):
169        if value.startswith("teleport"):
170            app_name = value.split(":")[1]
171            self.kind = RemoteCacheKind.teleport
172            self.data = app_name
173        else:
174            self.kind = RemoteCacheKind.normal
175            self.data = value
176
177    def address(self) -> str:
178        """Address for connecting to this remote cache."""
179        if self.kind == RemoteCacheKind.normal:
180            return self.data
181        else:
182            TeleportProxy.spawn(self.data, "6889")
183            return "http://localhost:6889"

The remote cache we're conecting to.

RemoteCache(value: str)
168    def __init__(self, value: str):
169        if value.startswith("teleport"):
170            app_name = value.split(":")[1]
171            self.kind = RemoteCacheKind.teleport
172            self.data = app_name
173        else:
174            self.kind = RemoteCacheKind.normal
175            self.data = value
def address(self) -> str:
177    def address(self) -> str:
178        """Address for connecting to this remote cache."""
179        if self.kind == RemoteCacheKind.normal:
180            return self.data
181        else:
182            TeleportProxy.spawn(self.data, "6889")
183            return "http://localhost:6889"

Address for connecting to this remote cache.

class RemoteCacheKind(enum.Enum):
186class RemoteCacheKind(Enum):
187    """Kind of remote cache we're connecting to."""
188
189    teleport = "teleport"
190    """Connecting to a remote cache through a teleport proxy."""
191
192    normal = "normal"
193    """An HTTP address for the cache."""
194
195    def __str__(self):
196        return self.value

Kind of remote cache we're connecting to.

teleport = <RemoteCacheKind.teleport: 'teleport'>

Connecting to a remote cache through a teleport proxy.

normal = <RemoteCacheKind.normal: 'normal'>

An HTTP address for the cache.