misc.python.materialize.ci_util.upload_debug_symbols_to_polarsignals
1# Copyright Materialize, Inc. and contributors. All rights reserved. 2# 3# Use of this software is governed by the Business Source License 4# included in the LICENSE file at the root of this repository. 5# 6# As of the Change Date specified in that file, in accordance with 7# the Business Source License, use of this software will be governed 8# by the Apache License, Version 2.0. 9 10import argparse 11import os 12import subprocess 13import tempfile 14import urllib.request 15from pathlib import Path 16from tempfile import _TemporaryFileWrapper 17 18import boto3 19 20from materialize import mzbuild, spawn, ui 21from materialize.ci_util.upload_debug_symbols_to_s3 import ( 22 DEBUGINFO_BINS, 23 DEBUGINFO_S3_BUCKET, 24) 25from materialize.mzbuild import Repository, ResolvedImage 26from materialize.rustc_flags import Sanitizer 27from materialize.xcompile import Arch 28 29# Upload debuginfo and sources to Polar Signals (our continuous 30# profiling provider). 31# This script is only invoked for build tags. Polar Signals is 32# expensive, so we don't want to upload development or unstable builds 33# that won't ever be profiled by Polar Signals. 34 35DEBUGINFO_URL = "https://debuginfo.dev.materialize.com" 36 37DEFAULT_TOKENS = [ 38 t 39 for t in ( 40 os.getenv("POLAR_SIGNALS_API_TOKEN"), 41 os.getenv("POLAR_SIGNALS_SELF_MANAGED_1_API_TOKEN"), 42 ) 43 if t is not None 44] 45 46 47def main() -> None: 48 parser = argparse.ArgumentParser( 49 prog="upload_debug_symbols_to_polarsignals", 50 description="""Upload debug symbols to Polar Signals.""", 51 ) 52 parser.add_argument( 53 "--arch", 54 help="the architecture of the binaries to upload", 55 choices=[str(Arch.X86_64), str(Arch.AARCH64)], 56 default=str(Arch.host()), 57 ) 58 parser.add_argument( 59 "--protocol", 60 help="the source for downloading debug symbols", 61 choices=["http", "s3"], 62 default="s3", 63 ) 64 parser.add_argument( 65 "--token", 66 action="append", 67 help="the Polar Signals API token", 68 default=DEFAULT_TOKENS, 69 ) 70 parser.add_argument( 71 "--build-id", 72 help="directly fetch and upload debug symbols for a specific build ID, skipping docker image resolution", 73 ) 74 75 parser.add_argument( 76 "--release", 77 action="store_true", 78 help="Use release build", 79 default=os.getenv("CI_LTO"), 80 ) 81 args = parser.parse_intermixed_args() 82 83 assert args.token, "Need at least one Polar Signals API token" 84 85 coverage = ui.env_is_truthy("CI_COVERAGE_ENABLED") 86 sanitizer = Sanitizer[os.getenv("CI_SANITIZER", "none")] 87 88 repo = mzbuild.Repository( 89 Path("."), 90 coverage=coverage, 91 sanitizer=sanitizer, 92 arch=Arch(args.arch), 93 profile=mzbuild.Profile.RELEASE if args.release else mzbuild.Profile.OPTIMIZED, 94 image_registry="materialize", 95 ) 96 97 if args.build_id: 98 upload_debug_data_by_build_id(repo, args.build_id, args.protocol, args.token) 99 else: 100 collect_and_upload_debug_data_to_polarsignals( 101 repo, DEBUGINFO_BINS, args.protocol, args.token 102 ) 103 104 105def upload_debug_data_by_build_id( 106 repo: mzbuild.Repository, 107 build_id: str, 108 protocol: str, 109 polar_signals_api_tokens: list[str], 110) -> None: 111 """Fetch debug symbols by build ID and upload to Polar Signals.""" 112 ui.section(f"Uploading debug data for build ID {build_id} to PolarSignals...") 113 114 if protocol == "s3": 115 bin_path, dbg_path = fetch_debug_symbols_from_s3(build_id) 116 elif protocol == "http": 117 bin_path, dbg_path = fetch_debug_symbols_from_http(build_id) 118 else: 119 raise ValueError(f"Unknown protocol: {protocol}") 120 print(f"Fetched debug symbols for build ID {build_id} from {protocol}") 121 122 upload_completed = upload_debug_data_to_polarsignals( 123 repo, build_id, bin_path, dbg_path, polar_signals_api_tokens 124 ) 125 if upload_completed: 126 print(f"Uploaded debug symbols for build ID {build_id} to PolarSignals") 127 else: 128 print(f"Did not upload debug symbols for build ID {build_id} to PolarSignals") 129 130 131def collect_and_upload_debug_data_to_polarsignals( 132 repo: mzbuild.Repository, 133 debuginfo_bins: set[str], 134 protocol: str, 135 polar_signals_api_tokens: list[str], 136) -> None: 137 ui.section("Collecting and uploading debug data to PolarSignals...") 138 139 relevant_images_by_name = get_build_images(repo, debuginfo_bins) 140 print(f"Considered images are: {relevant_images_by_name.keys()}") 141 142 for image_name, image in relevant_images_by_name.items(): 143 remove_docker_container_if_exists(image_name) 144 container_name = create_docker_container(image_name, image) 145 print( 146 f"Created docker container from image {image_name} (spec: {image.spec()})" 147 ) 148 149 path_to_binary = copy_binary_from_image(image_name, container_name) 150 print(f"Copied binary from image {image_name}") 151 152 build_id = get_build_id(repo, path_to_binary) 153 print(f"{image_name} has build_id {build_id}") 154 155 if protocol == "s3": 156 bin_path, dbg_path = fetch_debug_symbols_from_s3(build_id) 157 elif protocol == "http": 158 bin_path, dbg_path = fetch_debug_symbols_from_http(build_id) 159 else: 160 raise ValueError(f"Unknown protocol: {protocol}") 161 print(f"Fetched debug symbols of {image_name} from {protocol}") 162 163 upload_completed = upload_debug_data_to_polarsignals( 164 repo, build_id, bin_path, dbg_path, polar_signals_api_tokens 165 ) 166 if upload_completed: 167 print(f"Uploaded debug symbols of {image_name} to PolarSignals") 168 else: 169 print(f"Did not upload debug symbols of {image_name} to PolarSignals") 170 171 172def get_build_images( 173 repo: mzbuild.Repository, image_names: set[str] 174) -> dict[str, ResolvedImage]: 175 relevant_images = [] 176 for image_name, image in repo.images.items(): 177 if image_name in image_names: 178 relevant_images.append(image) 179 180 dependency_set = repo.resolve_dependencies(relevant_images) 181 182 resolved_images = dict() 183 for image_name in image_names: 184 resolved_images[image_name] = dependency_set[image_name] 185 186 return resolved_images 187 188 189def remove_docker_container_if_exists(image_name: str) -> None: 190 try: 191 subprocess.run(["docker", "rm", image_name], check=True) 192 except subprocess.CalledProcessError as e: 193 print(f"Removing container failed, ignoring: {e}") 194 195 196def create_docker_container(image_name: str, image: ResolvedImage) -> str: 197 try: 198 image_spec = image.spec() 199 docker_container_name = image_name 200 command = ["docker", "create", "--name", docker_container_name, image_spec] 201 subprocess.run(command, check=True) 202 return docker_container_name 203 except subprocess.CalledProcessError as e: 204 if "manifest unknown" in str(e): 205 raise RuntimeError(f"Docker image not found: {image.spec()}") 206 print(f"Error creating docker container: {e}") 207 raise e 208 209 210def copy_binary_from_image(image_name: str, docker_container_name: str) -> str: 211 try: 212 source_path = f"/usr/local/bin/{image_name}" 213 target_path = f"./{image_name}" 214 command = [ 215 "docker", 216 "cp", 217 f"{docker_container_name}:{source_path}", 218 target_path, 219 ] 220 subprocess.run(command, check=True) 221 222 return target_path 223 except subprocess.CalledProcessError as e: 224 print(f"Error copying file: {e}") 225 raise e 226 227 228def get_build_id(repo: mzbuild.Repository, path_to_binary: str) -> str: 229 return spawn.run_with_retries( 230 lambda: spawn.capture( 231 ["parca-debuginfo", "buildid", path_to_binary], 232 cwd=repo.rd.root, 233 ).strip() 234 ) 235 236 237def fetch_debug_symbols_from_http(build_id: str) -> tuple[str, str]: 238 file_names = [ 239 "executable", 240 "debuginfo", 241 ] 242 243 downloaded_file_paths = dict() 244 245 for file_name in file_names: 246 key = f"buildid/{build_id}/{file_name}" 247 target_file_name = key.replace("/", "_") 248 print( 249 f"Downloading {file_name} from {DEBUGINFO_URL}/{key} to {target_file_name}" 250 ) 251 252 urllib.request.urlretrieve(f"{DEBUGINFO_URL}/{key}", target_file_name) 253 254 downloaded_file_paths[file_name] = target_file_name 255 256 return downloaded_file_paths["executable"], downloaded_file_paths["debuginfo"] 257 258 259def fetch_debug_symbols_from_s3(build_id: str) -> tuple[str, str]: 260 s3 = boto3.client("s3") 261 262 file_names = [ 263 "executable", 264 "debuginfo", 265 ] 266 267 downloaded_file_paths = dict() 268 269 for file_name in file_names: 270 key = f"buildid/{build_id}/{file_name}" 271 target_file_name = key.replace("/", "_") 272 print( 273 f"Downloading {file_name} from s3://{DEBUGINFO_S3_BUCKET}/{key} to {target_file_name}" 274 ) 275 276 with open(target_file_name, "wb") as data: 277 s3.download_fileobj(DEBUGINFO_S3_BUCKET, key, data) 278 279 downloaded_file_paths[file_name] = target_file_name 280 281 return downloaded_file_paths["executable"], downloaded_file_paths["debuginfo"] 282 283 284def upload_debug_data_to_polarsignals( 285 repo: Repository, 286 build_id: str, 287 bin_path: Path | str, 288 dbg_path: Path | str, 289 polar_signals_api_tokens: list[str], 290) -> bool: 291 result = True 292 for token in polar_signals_api_tokens: 293 _upload_debug_info_to_polarsignals(repo, dbg_path, token) 294 295 with tempfile.NamedTemporaryFile() as tarball: 296 _create_source_tarball(repo, bin_path, tarball) 297 result = result or _upload_source_tarball_to_polarsignals( 298 repo, bin_path, tarball, build_id, token 299 ) 300 return result 301 302 303def _upload_debug_info_to_polarsignals( 304 repo: mzbuild.Repository, 305 dbg_path: Path | str, 306 polar_signals_api_token: str, 307) -> None: 308 print(f"Uploading debuginfo for {dbg_path} to Polar Signals...") 309 spawn.run_with_retries( 310 lambda: spawn.runv( 311 [ 312 "parca-debuginfo", 313 "upload", 314 "--store-address=grpc.polarsignals.com:443", 315 "--no-extract", 316 dbg_path, 317 ], 318 cwd=repo.rd.root, 319 env=dict(os.environ, PARCA_DEBUGINFO_BEARER_TOKEN=polar_signals_api_token), 320 ) 321 ) 322 323 324def _create_source_tarball( 325 repo: mzbuild.Repository, bin_path: Path | str, tarball: _TemporaryFileWrapper 326) -> None: 327 print(f"Constructing source tarball for {bin_path}...") 328 p1 = subprocess.Popen( 329 ["llvm-dwarfdump", "--show-sources", bin_path], 330 stdout=subprocess.PIPE, 331 ) 332 p2 = subprocess.Popen( 333 [ 334 "tar", 335 "-cf", 336 tarball.name, 337 "--zstd", 338 "-T", 339 "-", 340 "--ignore-failed-read", 341 ], 342 stdin=p1.stdout, 343 # Suppress noisy warnings about missing files. 344 stdout=subprocess.DEVNULL, 345 stderr=subprocess.DEVNULL, 346 ) 347 348 # This causes p1 to receive SIGPIPE if p2 exits early, 349 # like in the shell. 350 assert p1.stdout 351 p1.stdout.close() 352 353 for p in [p1, p2]: 354 if p.wait(): 355 raise subprocess.CalledProcessError(p.returncode, p.args) 356 357 358def _upload_source_tarball_to_polarsignals( 359 repo: mzbuild.Repository, 360 bin_path: Path | str, 361 tarball: _TemporaryFileWrapper, 362 build_id: str, 363 polar_signals_api_token: str, 364) -> bool: 365 print(f"Uploading source tarball for {bin_path} to Polar Signals...") 366 output = spawn.run_with_retries( 367 lambda: spawn.capture( 368 [ 369 "parca-debuginfo", 370 "upload", 371 "--store-address=grpc.polarsignals.com:443", 372 "--type=sources", 373 f"--build-id={build_id}", 374 tarball.name, 375 ], 376 cwd=repo.rd.root, 377 env=dict( 378 os.environ, 379 PARCA_DEBUGINFO_BEARER_TOKEN=polar_signals_api_token, 380 ), 381 ).strip() 382 ) 383 384 if "Skipping upload of" in output: 385 return False 386 387 return True 388 389 390if __name__ == "__main__": 391 main()
DEBUGINFO_URL =
'https://debuginfo.dev.materialize.com'
DEFAULT_TOKENS =
[]
def
main() -> None:
48def main() -> None: 49 parser = argparse.ArgumentParser( 50 prog="upload_debug_symbols_to_polarsignals", 51 description="""Upload debug symbols to Polar Signals.""", 52 ) 53 parser.add_argument( 54 "--arch", 55 help="the architecture of the binaries to upload", 56 choices=[str(Arch.X86_64), str(Arch.AARCH64)], 57 default=str(Arch.host()), 58 ) 59 parser.add_argument( 60 "--protocol", 61 help="the source for downloading debug symbols", 62 choices=["http", "s3"], 63 default="s3", 64 ) 65 parser.add_argument( 66 "--token", 67 action="append", 68 help="the Polar Signals API token", 69 default=DEFAULT_TOKENS, 70 ) 71 parser.add_argument( 72 "--build-id", 73 help="directly fetch and upload debug symbols for a specific build ID, skipping docker image resolution", 74 ) 75 76 parser.add_argument( 77 "--release", 78 action="store_true", 79 help="Use release build", 80 default=os.getenv("CI_LTO"), 81 ) 82 args = parser.parse_intermixed_args() 83 84 assert args.token, "Need at least one Polar Signals API token" 85 86 coverage = ui.env_is_truthy("CI_COVERAGE_ENABLED") 87 sanitizer = Sanitizer[os.getenv("CI_SANITIZER", "none")] 88 89 repo = mzbuild.Repository( 90 Path("."), 91 coverage=coverage, 92 sanitizer=sanitizer, 93 arch=Arch(args.arch), 94 profile=mzbuild.Profile.RELEASE if args.release else mzbuild.Profile.OPTIMIZED, 95 image_registry="materialize", 96 ) 97 98 if args.build_id: 99 upload_debug_data_by_build_id(repo, args.build_id, args.protocol, args.token) 100 else: 101 collect_and_upload_debug_data_to_polarsignals( 102 repo, DEBUGINFO_BINS, args.protocol, args.token 103 )
def
upload_debug_data_by_build_id( repo: materialize.mzbuild.Repository, build_id: str, protocol: str, polar_signals_api_tokens: list[str]) -> None:
106def upload_debug_data_by_build_id( 107 repo: mzbuild.Repository, 108 build_id: str, 109 protocol: str, 110 polar_signals_api_tokens: list[str], 111) -> None: 112 """Fetch debug symbols by build ID and upload to Polar Signals.""" 113 ui.section(f"Uploading debug data for build ID {build_id} to PolarSignals...") 114 115 if protocol == "s3": 116 bin_path, dbg_path = fetch_debug_symbols_from_s3(build_id) 117 elif protocol == "http": 118 bin_path, dbg_path = fetch_debug_symbols_from_http(build_id) 119 else: 120 raise ValueError(f"Unknown protocol: {protocol}") 121 print(f"Fetched debug symbols for build ID {build_id} from {protocol}") 122 123 upload_completed = upload_debug_data_to_polarsignals( 124 repo, build_id, bin_path, dbg_path, polar_signals_api_tokens 125 ) 126 if upload_completed: 127 print(f"Uploaded debug symbols for build ID {build_id} to PolarSignals") 128 else: 129 print(f"Did not upload debug symbols for build ID {build_id} to PolarSignals")
Fetch debug symbols by build ID and upload to Polar Signals.
def
collect_and_upload_debug_data_to_polarsignals( repo: materialize.mzbuild.Repository, debuginfo_bins: set[str], protocol: str, polar_signals_api_tokens: list[str]) -> None:
132def collect_and_upload_debug_data_to_polarsignals( 133 repo: mzbuild.Repository, 134 debuginfo_bins: set[str], 135 protocol: str, 136 polar_signals_api_tokens: list[str], 137) -> None: 138 ui.section("Collecting and uploading debug data to PolarSignals...") 139 140 relevant_images_by_name = get_build_images(repo, debuginfo_bins) 141 print(f"Considered images are: {relevant_images_by_name.keys()}") 142 143 for image_name, image in relevant_images_by_name.items(): 144 remove_docker_container_if_exists(image_name) 145 container_name = create_docker_container(image_name, image) 146 print( 147 f"Created docker container from image {image_name} (spec: {image.spec()})" 148 ) 149 150 path_to_binary = copy_binary_from_image(image_name, container_name) 151 print(f"Copied binary from image {image_name}") 152 153 build_id = get_build_id(repo, path_to_binary) 154 print(f"{image_name} has build_id {build_id}") 155 156 if protocol == "s3": 157 bin_path, dbg_path = fetch_debug_symbols_from_s3(build_id) 158 elif protocol == "http": 159 bin_path, dbg_path = fetch_debug_symbols_from_http(build_id) 160 else: 161 raise ValueError(f"Unknown protocol: {protocol}") 162 print(f"Fetched debug symbols of {image_name} from {protocol}") 163 164 upload_completed = upload_debug_data_to_polarsignals( 165 repo, build_id, bin_path, dbg_path, polar_signals_api_tokens 166 ) 167 if upload_completed: 168 print(f"Uploaded debug symbols of {image_name} to PolarSignals") 169 else: 170 print(f"Did not upload debug symbols of {image_name} to PolarSignals")
def
get_build_images( repo: materialize.mzbuild.Repository, image_names: set[str]) -> dict[str, materialize.mzbuild.ResolvedImage]:
173def get_build_images( 174 repo: mzbuild.Repository, image_names: set[str] 175) -> dict[str, ResolvedImage]: 176 relevant_images = [] 177 for image_name, image in repo.images.items(): 178 if image_name in image_names: 179 relevant_images.append(image) 180 181 dependency_set = repo.resolve_dependencies(relevant_images) 182 183 resolved_images = dict() 184 for image_name in image_names: 185 resolved_images[image_name] = dependency_set[image_name] 186 187 return resolved_images
def
remove_docker_container_if_exists(image_name: str) -> None:
def
create_docker_container(image_name: str, image: materialize.mzbuild.ResolvedImage) -> str:
197def create_docker_container(image_name: str, image: ResolvedImage) -> str: 198 try: 199 image_spec = image.spec() 200 docker_container_name = image_name 201 command = ["docker", "create", "--name", docker_container_name, image_spec] 202 subprocess.run(command, check=True) 203 return docker_container_name 204 except subprocess.CalledProcessError as e: 205 if "manifest unknown" in str(e): 206 raise RuntimeError(f"Docker image not found: {image.spec()}") 207 print(f"Error creating docker container: {e}") 208 raise e
def
copy_binary_from_image(image_name: str, docker_container_name: str) -> str:
211def copy_binary_from_image(image_name: str, docker_container_name: str) -> str: 212 try: 213 source_path = f"/usr/local/bin/{image_name}" 214 target_path = f"./{image_name}" 215 command = [ 216 "docker", 217 "cp", 218 f"{docker_container_name}:{source_path}", 219 target_path, 220 ] 221 subprocess.run(command, check=True) 222 223 return target_path 224 except subprocess.CalledProcessError as e: 225 print(f"Error copying file: {e}") 226 raise e
def
get_build_id(repo: materialize.mzbuild.Repository, path_to_binary: str) -> str:
def
fetch_debug_symbols_from_http(build_id: str) -> tuple[str, str]:
238def fetch_debug_symbols_from_http(build_id: str) -> tuple[str, str]: 239 file_names = [ 240 "executable", 241 "debuginfo", 242 ] 243 244 downloaded_file_paths = dict() 245 246 for file_name in file_names: 247 key = f"buildid/{build_id}/{file_name}" 248 target_file_name = key.replace("/", "_") 249 print( 250 f"Downloading {file_name} from {DEBUGINFO_URL}/{key} to {target_file_name}" 251 ) 252 253 urllib.request.urlretrieve(f"{DEBUGINFO_URL}/{key}", target_file_name) 254 255 downloaded_file_paths[file_name] = target_file_name 256 257 return downloaded_file_paths["executable"], downloaded_file_paths["debuginfo"]
def
fetch_debug_symbols_from_s3(build_id: str) -> tuple[str, str]:
260def fetch_debug_symbols_from_s3(build_id: str) -> tuple[str, str]: 261 s3 = boto3.client("s3") 262 263 file_names = [ 264 "executable", 265 "debuginfo", 266 ] 267 268 downloaded_file_paths = dict() 269 270 for file_name in file_names: 271 key = f"buildid/{build_id}/{file_name}" 272 target_file_name = key.replace("/", "_") 273 print( 274 f"Downloading {file_name} from s3://{DEBUGINFO_S3_BUCKET}/{key} to {target_file_name}" 275 ) 276 277 with open(target_file_name, "wb") as data: 278 s3.download_fileobj(DEBUGINFO_S3_BUCKET, key, data) 279 280 downloaded_file_paths[file_name] = target_file_name 281 282 return downloaded_file_paths["executable"], downloaded_file_paths["debuginfo"]
def
upload_debug_data_to_polarsignals( repo: materialize.mzbuild.Repository, build_id: str, bin_path: pathlib._local.Path | str, dbg_path: pathlib._local.Path | str, polar_signals_api_tokens: list[str]) -> bool:
285def upload_debug_data_to_polarsignals( 286 repo: Repository, 287 build_id: str, 288 bin_path: Path | str, 289 dbg_path: Path | str, 290 polar_signals_api_tokens: list[str], 291) -> bool: 292 result = True 293 for token in polar_signals_api_tokens: 294 _upload_debug_info_to_polarsignals(repo, dbg_path, token) 295 296 with tempfile.NamedTemporaryFile() as tarball: 297 _create_source_tarball(repo, bin_path, tarball) 298 result = result or _upload_source_tarball_to_polarsignals( 299 repo, bin_path, tarball, build_id, token 300 ) 301 return result