misc.python.materialize.bazel
1# Copyright Materialize, Inc. and contributors. All rights reserved. 2# 3# Use of this software is governed by the Business Source License 4# included in the LICENSE file at the root of this repository. 5# 6# As of the Change Date specified in that file, in accordance with 7# the Business Source License, use of this software will be governed 8# by the Apache License, Version 2.0. 9 10import hashlib 11import os 12import pathlib 13import subprocess 14from enum import Enum 15 16import requests 17 18from materialize import MZ_ROOT, ui 19from materialize.build_config import BuildConfig 20from materialize.teleport import TeleportProxy 21 22"""Utilities for interacting with Bazel from python scripts""" 23 24# Path where we put the current revision of the repo that we can side channel 25# into Bazel. 26MZ_GIT_HASH_FILE = "/tmp/mz_git_hash.txt" 27 28 29def output_paths(target, options=[]) -> list[pathlib.Path]: 30 """Returns the absolute path of outputs from the built Bazel target.""" 31 32 cmd_args = ["bazel", "cquery", f"{target}", *options, "--output=files"] 33 paths = subprocess.check_output( 34 cmd_args, text=True, stderr=subprocess.DEVNULL 35 ).splitlines() 36 return [pathlib.Path(path) for path in paths] 37 38 39def write_git_hash(): 40 """ 41 Temporary file where we write the current git hash, so we can side channel 42 it into Bazel. 43 44 For production releases we stamp builds with the `workspace_status_command` 45 but this workflow is not friendly to remote caching. Specifically, the 46 "volatile status" of a workspace is not supposed to cause builds to get 47 invalidated, and it doesn't when the result is cached locally, but it does 48 when it's cached remotely. 49 50 See: <https://bazel.build/docs/user-manual#workspace-status> 51 <https://github.com/bazelbuild/bazel/issues/10075> 52 """ 53 54 repo = MZ_ROOT / ".git" 55 cmd_args = ["git", f"--git-dir={repo}", "rev-parse", "HEAD"] 56 result = subprocess.run( 57 cmd_args, text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL 58 ) 59 60 if result.returncode == 0: 61 with open(MZ_GIT_HASH_FILE, "w") as f: 62 f.write(result.stdout.strip()) 63 else: 64 ui.warn(f"Failed to get current revision of {MZ_ROOT}, falling back to all 0s") 65 66 67def calc_ingerity(path) -> str: 68 """ 69 Calculate the 'integrity' for a given file. 70 71 'integrity' is a hash of the file used in rules like 'http_archive'. 72 73 See: <https://bazel.build/rules/lib/repo/http#http_archive-integrity> 74 """ 75 76 digest = subprocess.run( 77 ["openssl", "dgst", "-sha256", "-binary", str(path)], stdout=subprocess.PIPE 78 ) 79 base64 = subprocess.run( 80 ["openssl", "base64", "-A"], input=digest.stdout, stdout=subprocess.PIPE 81 ) 82 formatted = subprocess.run( 83 ["sed", "s/^/sha256-/"], input=base64.stdout, stdout=subprocess.PIPE 84 ) 85 86 return formatted.stdout.decode("utf-8") 87 88 89def toolchain_hashes(stable, nightly) -> dict[str, dict[str, str]]: 90 """ 91 Generates the hashes for our Bazel toolchains. 92 93 Fetches the specified Stable and Nightly version of the Rust compiler from our toolchains repo, 94 hashes the downloaded files, and returns a properly formatted dictionary for Bazel. 95 """ 96 97 ARCHS = [ 98 "aarch64-apple-darwin", 99 "aarch64-unknown-linux-gnu", 100 "x86_64-apple-darwin", 101 "x86_64-unknown-linux-gnu", 102 ] 103 TOOLS = [ 104 "cargo", 105 "clippy", 106 "llvm-tools", 107 "rust-std", 108 "rustc", 109 ] 110 VERSIONS = {"stable": stable, "nightly": nightly} 111 URL_TEMPLATE = "https://github.com/MaterializeInc/toolchains/releases/download/rust-{version}/{tool}-{channel}-{arch}.tar.zst" 112 113 hashes = {} 114 115 for arch in ARCHS: 116 hashes[arch] = {} 117 for channel, version in VERSIONS.items(): 118 hashes[arch][channel] = {} 119 for tool in TOOLS: 120 if channel == "stable": 121 url_channel = version 122 else: 123 url_channel = channel 124 125 print(f"Processing {tool} {version} {arch}") 126 127 # Download the file. 128 url = URL_TEMPLATE.format( 129 version=version, tool=tool, channel=url_channel, arch=arch 130 ) 131 response = requests.get(url, stream=True) 132 response.raise_for_status() 133 134 # Hash the response. 135 sha256_hash = hashlib.sha256() 136 for chunk in response.iter_content(chunk_size=8192): 137 if chunk: 138 sha256_hash.update(chunk) 139 hashes[arch][channel][tool] = sha256_hash.hexdigest() 140 141 return hashes 142 143 144def remote_cache_arg(config: BuildConfig) -> list[str]: 145 """List of arguments that could possibly enable use of a remote cache.""" 146 147 ci_remote = os.getenv("CI_BAZEL_REMOTE_CACHE") 148 config_remote = config.bazel.remote_cache 149 150 if ci_remote: 151 remote_cache = ci_remote 152 elif config_remote: 153 bazel_remote = RemoteCache(config_remote) 154 remote_cache = bazel_remote.address() 155 else: 156 remote_cache = None 157 158 if remote_cache: 159 return [f"--remote_cache={remote_cache}"] 160 else: 161 return [] 162 163 164class RemoteCache: 165 """The remote cache we're conecting to.""" 166 167 def __init__(self, value: str): 168 if value.startswith("teleport"): 169 app_name = value.split(":")[1] 170 self.kind = RemoteCacheKind.teleport 171 self.data = app_name 172 else: 173 self.kind = RemoteCacheKind.normal 174 self.data = value 175 176 def address(self) -> str: 177 """Address for connecting to this remote cache.""" 178 if self.kind == RemoteCacheKind.normal: 179 return self.data 180 else: 181 TeleportProxy.spawn(self.data, "6889") 182 return "http://localhost:6889" 183 184 185class RemoteCacheKind(Enum): 186 """Kind of remote cache we're connecting to.""" 187 188 teleport = "teleport" 189 """Connecting to a remote cache through a teleport proxy.""" 190 191 normal = "normal" 192 """An HTTP address for the cache.""" 193 194 def __str__(self): 195 return self.value
30def output_paths(target, options=[]) -> list[pathlib.Path]: 31 """Returns the absolute path of outputs from the built Bazel target.""" 32 33 cmd_args = ["bazel", "cquery", f"{target}", *options, "--output=files"] 34 paths = subprocess.check_output( 35 cmd_args, text=True, stderr=subprocess.DEVNULL 36 ).splitlines() 37 return [pathlib.Path(path) for path in paths]
Returns the absolute path of outputs from the built Bazel target.
40def write_git_hash(): 41 """ 42 Temporary file where we write the current git hash, so we can side channel 43 it into Bazel. 44 45 For production releases we stamp builds with the `workspace_status_command` 46 but this workflow is not friendly to remote caching. Specifically, the 47 "volatile status" of a workspace is not supposed to cause builds to get 48 invalidated, and it doesn't when the result is cached locally, but it does 49 when it's cached remotely. 50 51 See: <https://bazel.build/docs/user-manual#workspace-status> 52 <https://github.com/bazelbuild/bazel/issues/10075> 53 """ 54 55 repo = MZ_ROOT / ".git" 56 cmd_args = ["git", f"--git-dir={repo}", "rev-parse", "HEAD"] 57 result = subprocess.run( 58 cmd_args, text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL 59 ) 60 61 if result.returncode == 0: 62 with open(MZ_GIT_HASH_FILE, "w") as f: 63 f.write(result.stdout.strip()) 64 else: 65 ui.warn(f"Failed to get current revision of {MZ_ROOT}, falling back to all 0s")
Temporary file where we write the current git hash, so we can side channel it into Bazel.
For production releases we stamp builds with the workspace_status_command
but this workflow is not friendly to remote caching. Specifically, the
"volatile status" of a workspace is not supposed to cause builds to get
invalidated, and it doesn't when the result is cached locally, but it does
when it's cached remotely.
See: https://bazel.build/docs/user-manual#workspace-status https://github.com/bazelbuild/bazel/issues/10075
68def calc_ingerity(path) -> str: 69 """ 70 Calculate the 'integrity' for a given file. 71 72 'integrity' is a hash of the file used in rules like 'http_archive'. 73 74 See: <https://bazel.build/rules/lib/repo/http#http_archive-integrity> 75 """ 76 77 digest = subprocess.run( 78 ["openssl", "dgst", "-sha256", "-binary", str(path)], stdout=subprocess.PIPE 79 ) 80 base64 = subprocess.run( 81 ["openssl", "base64", "-A"], input=digest.stdout, stdout=subprocess.PIPE 82 ) 83 formatted = subprocess.run( 84 ["sed", "s/^/sha256-/"], input=base64.stdout, stdout=subprocess.PIPE 85 ) 86 87 return formatted.stdout.decode("utf-8")
Calculate the 'integrity' for a given file.
'integrity' is a hash of the file used in rules like 'http_archive'.
See: https://bazel.build/rules/lib/repo/http#http_archive-integrity
90def toolchain_hashes(stable, nightly) -> dict[str, dict[str, str]]: 91 """ 92 Generates the hashes for our Bazel toolchains. 93 94 Fetches the specified Stable and Nightly version of the Rust compiler from our toolchains repo, 95 hashes the downloaded files, and returns a properly formatted dictionary for Bazel. 96 """ 97 98 ARCHS = [ 99 "aarch64-apple-darwin", 100 "aarch64-unknown-linux-gnu", 101 "x86_64-apple-darwin", 102 "x86_64-unknown-linux-gnu", 103 ] 104 TOOLS = [ 105 "cargo", 106 "clippy", 107 "llvm-tools", 108 "rust-std", 109 "rustc", 110 ] 111 VERSIONS = {"stable": stable, "nightly": nightly} 112 URL_TEMPLATE = "https://github.com/MaterializeInc/toolchains/releases/download/rust-{version}/{tool}-{channel}-{arch}.tar.zst" 113 114 hashes = {} 115 116 for arch in ARCHS: 117 hashes[arch] = {} 118 for channel, version in VERSIONS.items(): 119 hashes[arch][channel] = {} 120 for tool in TOOLS: 121 if channel == "stable": 122 url_channel = version 123 else: 124 url_channel = channel 125 126 print(f"Processing {tool} {version} {arch}") 127 128 # Download the file. 129 url = URL_TEMPLATE.format( 130 version=version, tool=tool, channel=url_channel, arch=arch 131 ) 132 response = requests.get(url, stream=True) 133 response.raise_for_status() 134 135 # Hash the response. 136 sha256_hash = hashlib.sha256() 137 for chunk in response.iter_content(chunk_size=8192): 138 if chunk: 139 sha256_hash.update(chunk) 140 hashes[arch][channel][tool] = sha256_hash.hexdigest() 141 142 return hashes
Generates the hashes for our Bazel toolchains.
Fetches the specified Stable and Nightly version of the Rust compiler from our toolchains repo, hashes the downloaded files, and returns a properly formatted dictionary for Bazel.
145def remote_cache_arg(config: BuildConfig) -> list[str]: 146 """List of arguments that could possibly enable use of a remote cache.""" 147 148 ci_remote = os.getenv("CI_BAZEL_REMOTE_CACHE") 149 config_remote = config.bazel.remote_cache 150 151 if ci_remote: 152 remote_cache = ci_remote 153 elif config_remote: 154 bazel_remote = RemoteCache(config_remote) 155 remote_cache = bazel_remote.address() 156 else: 157 remote_cache = None 158 159 if remote_cache: 160 return [f"--remote_cache={remote_cache}"] 161 else: 162 return []
List of arguments that could possibly enable use of a remote cache.
165class RemoteCache: 166 """The remote cache we're conecting to.""" 167 168 def __init__(self, value: str): 169 if value.startswith("teleport"): 170 app_name = value.split(":")[1] 171 self.kind = RemoteCacheKind.teleport 172 self.data = app_name 173 else: 174 self.kind = RemoteCacheKind.normal 175 self.data = value 176 177 def address(self) -> str: 178 """Address for connecting to this remote cache.""" 179 if self.kind == RemoteCacheKind.normal: 180 return self.data 181 else: 182 TeleportProxy.spawn(self.data, "6889") 183 return "http://localhost:6889"
The remote cache we're conecting to.
177 def address(self) -> str: 178 """Address for connecting to this remote cache.""" 179 if self.kind == RemoteCacheKind.normal: 180 return self.data 181 else: 182 TeleportProxy.spawn(self.data, "6889") 183 return "http://localhost:6889"
Address for connecting to this remote cache.
186class RemoteCacheKind(Enum): 187 """Kind of remote cache we're connecting to.""" 188 189 teleport = "teleport" 190 """Connecting to a remote cache through a teleport proxy.""" 191 192 normal = "normal" 193 """An HTTP address for the cache.""" 194 195 def __str__(self): 196 return self.value
Kind of remote cache we're connecting to.
Connecting to a remote cache through a teleport proxy.