misc.python.materialize.ci_util.upload_debug_symbols_to_polarsignals
1# Copyright Materialize, Inc. and contributors. All rights reserved. 2# 3# Use of this software is governed by the Business Source License 4# included in the LICENSE file at the root of this repository. 5# 6# As of the Change Date specified in that file, in accordance with 7# the Business Source License, use of this software will be governed 8# by the Apache License, Version 2.0. 9 10import argparse 11import os 12import subprocess 13import tempfile 14import urllib.request 15from pathlib import Path 16from tempfile import _TemporaryFileWrapper 17 18import boto3 19 20from materialize import mzbuild, spawn, ui 21from materialize.ci_util.upload_debug_symbols_to_s3 import ( 22 DEBUGINFO_BINS, 23 DEBUGINFO_S3_BUCKET, 24) 25from materialize.mzbuild import Repository, ResolvedImage 26from materialize.rustc_flags import Sanitizer 27from materialize.xcompile import Arch 28 29# Upload debuginfo and sources to Polar Signals (our continuous 30# profiling provider). 31# This script is only invoked for build tags. Polar Signals is 32# expensive, so we don't want to upload development or unstable builds 33# that won't ever be profiled by Polar Signals. 34 35DEBUGINFO_URL = "https://debuginfo.dev.materialize.com" 36 37 38def main() -> None: 39 parser = argparse.ArgumentParser( 40 prog="upload_debug_symbols_to_polarsignals", 41 description="""Upload debug symbols to Polar Signals.""", 42 ) 43 parser.add_argument( 44 "--arch", 45 help="the architecture of the binaries to upload", 46 choices=[str(Arch.X86_64), str(Arch.AARCH64)], 47 default=str(Arch.host()), 48 ) 49 parser.add_argument( 50 "--protocol", 51 help="the source for downloading debug symbols", 52 choices=["http", "s3"], 53 default="s3", 54 ) 55 parser.add_argument( 56 "--token", 57 help="the Polar Signals API token", 58 default=os.getenv("POLAR_SIGNALS_API_TOKEN"), 59 ) 60 parser.add_argument( 61 "--build-id", 62 help="directly fetch and upload debug symbols for a specific build ID, skipping docker image resolution", 63 ) 64 65 parser.add_argument( 66 "--release", 67 action="store_true", 68 help="Use release build", 69 default=os.getenv("CI_LTO"), 70 ) 71 args = parser.parse_intermixed_args() 72 73 coverage = ui.env_is_truthy("CI_COVERAGE_ENABLED") 74 sanitizer = Sanitizer[os.getenv("CI_SANITIZER", "none")] 75 76 repo = mzbuild.Repository( 77 Path("."), 78 coverage=coverage, 79 sanitizer=sanitizer, 80 arch=Arch(args.arch), 81 profile=mzbuild.Profile.RELEASE if args.release else mzbuild.Profile.OPTIMIZED, 82 image_registry="materialize", 83 ) 84 85 if args.build_id: 86 upload_debug_data_by_build_id(repo, args.build_id, args.protocol, args.token) 87 else: 88 collect_and_upload_debug_data_to_polarsignals( 89 repo, DEBUGINFO_BINS, args.protocol, args.token 90 ) 91 92 93def upload_debug_data_by_build_id( 94 repo: mzbuild.Repository, 95 build_id: str, 96 protocol: str, 97 polar_signals_api_token: str, 98) -> None: 99 """Fetch debug symbols by build ID and upload to Polar Signals.""" 100 ui.section(f"Uploading debug data for build ID {build_id} to PolarSignals...") 101 102 if protocol == "s3": 103 bin_path, dbg_path = fetch_debug_symbols_from_s3(build_id) 104 elif protocol == "http": 105 bin_path, dbg_path = fetch_debug_symbols_from_http(build_id) 106 else: 107 raise ValueError(f"Unknown protocol: {protocol}") 108 print(f"Fetched debug symbols for build ID {build_id} from {protocol}") 109 110 upload_completed = upload_debug_data_to_polarsignals( 111 repo, build_id, bin_path, dbg_path, polar_signals_api_token 112 ) 113 if upload_completed: 114 print(f"Uploaded debug symbols for build ID {build_id} to PolarSignals") 115 else: 116 print(f"Did not upload debug symbols for build ID {build_id} to PolarSignals") 117 118 119def collect_and_upload_debug_data_to_polarsignals( 120 repo: mzbuild.Repository, 121 debuginfo_bins: set[str], 122 protocol: str, 123 polar_signals_api_token: str, 124) -> None: 125 ui.section("Collecting and uploading debug data to PolarSignals...") 126 127 relevant_images_by_name = get_build_images(repo, debuginfo_bins) 128 print(f"Considered images are: {relevant_images_by_name.keys()}") 129 130 for image_name, image in relevant_images_by_name.items(): 131 remove_docker_container_if_exists(image_name) 132 container_name = create_docker_container(image_name, image) 133 print( 134 f"Created docker container from image {image_name} (spec: {image.spec()})" 135 ) 136 137 path_to_binary = copy_binary_from_image(image_name, container_name) 138 print(f"Copied binary from image {image_name}") 139 140 build_id = get_build_id(repo, path_to_binary) 141 print(f"{image_name} has build_id {build_id}") 142 143 if protocol == "s3": 144 bin_path, dbg_path = fetch_debug_symbols_from_s3(build_id) 145 elif protocol == "http": 146 bin_path, dbg_path = fetch_debug_symbols_from_http(build_id) 147 else: 148 raise ValueError(f"Unknown protocol: {protocol}") 149 print(f"Fetched debug symbols of {image_name} from {protocol}") 150 151 upload_completed = upload_debug_data_to_polarsignals( 152 repo, build_id, bin_path, dbg_path, polar_signals_api_token 153 ) 154 if upload_completed: 155 print(f"Uploaded debug symbols of {image_name} to PolarSignals") 156 else: 157 print(f"Did not upload debug symbols of {image_name} to PolarSignals") 158 159 160def get_build_images( 161 repo: mzbuild.Repository, image_names: set[str] 162) -> dict[str, ResolvedImage]: 163 relevant_images = [] 164 for image_name, image in repo.images.items(): 165 if image_name in image_names: 166 relevant_images.append(image) 167 168 dependency_set = repo.resolve_dependencies(relevant_images) 169 170 resolved_images = dict() 171 for image_name in image_names: 172 resolved_images[image_name] = dependency_set[image_name] 173 174 return resolved_images 175 176 177def remove_docker_container_if_exists(image_name: str) -> None: 178 try: 179 subprocess.run(["docker", "rm", image_name], check=True) 180 except subprocess.CalledProcessError as e: 181 print(f"Removing container failed, ignoring: {e}") 182 183 184def create_docker_container(image_name: str, image: ResolvedImage) -> str: 185 try: 186 image_spec = image.spec() 187 docker_container_name = image_name 188 command = ["docker", "create", "--name", docker_container_name, image_spec] 189 subprocess.run(command, check=True) 190 return docker_container_name 191 except subprocess.CalledProcessError as e: 192 if "manifest unknown" in str(e): 193 raise RuntimeError(f"Docker image not found: {image.spec()}") 194 print(f"Error creating docker container: {e}") 195 raise e 196 197 198def copy_binary_from_image(image_name: str, docker_container_name: str) -> str: 199 try: 200 source_path = f"/usr/local/bin/{image_name}" 201 target_path = f"./{image_name}" 202 command = [ 203 "docker", 204 "cp", 205 f"{docker_container_name}:{source_path}", 206 target_path, 207 ] 208 subprocess.run(command, check=True) 209 210 return target_path 211 except subprocess.CalledProcessError as e: 212 print(f"Error copying file: {e}") 213 raise e 214 215 216def get_build_id(repo: mzbuild.Repository, path_to_binary: str) -> str: 217 return spawn.run_with_retries( 218 lambda: spawn.capture( 219 ["parca-debuginfo", "buildid", path_to_binary], 220 cwd=repo.rd.root, 221 ).strip() 222 ) 223 224 225def fetch_debug_symbols_from_http(build_id: str) -> tuple[str, str]: 226 file_names = [ 227 "executable", 228 "debuginfo", 229 ] 230 231 downloaded_file_paths = dict() 232 233 for file_name in file_names: 234 key = f"buildid/{build_id}/{file_name}" 235 target_file_name = key.replace("/", "_") 236 print( 237 f"Downloading {file_name} from {DEBUGINFO_URL}/{key} to {target_file_name}" 238 ) 239 240 urllib.request.urlretrieve(f"{DEBUGINFO_URL}/{key}", target_file_name) 241 242 downloaded_file_paths[file_name] = target_file_name 243 244 return downloaded_file_paths["executable"], downloaded_file_paths["debuginfo"] 245 246 247def fetch_debug_symbols_from_s3(build_id: str) -> tuple[str, str]: 248 s3 = boto3.client("s3") 249 250 file_names = [ 251 "executable", 252 "debuginfo", 253 ] 254 255 downloaded_file_paths = dict() 256 257 for file_name in file_names: 258 key = f"buildid/{build_id}/{file_name}" 259 target_file_name = key.replace("/", "_") 260 print( 261 f"Downloading {file_name} from s3://{DEBUGINFO_S3_BUCKET}/{key} to {target_file_name}" 262 ) 263 264 with open(target_file_name, "wb") as data: 265 s3.download_fileobj(DEBUGINFO_S3_BUCKET, key, data) 266 267 downloaded_file_paths[file_name] = target_file_name 268 269 return downloaded_file_paths["executable"], downloaded_file_paths["debuginfo"] 270 271 272def upload_debug_data_to_polarsignals( 273 repo: Repository, 274 build_id: str, 275 bin_path: Path | str, 276 dbg_path: Path | str, 277 polar_signals_api_token: str, 278) -> bool: 279 _upload_debug_info_to_polarsignals(repo, dbg_path, polar_signals_api_token) 280 281 with tempfile.NamedTemporaryFile() as tarball: 282 _create_source_tarball(repo, bin_path, tarball) 283 return _upload_source_tarball_to_polarsignals( 284 repo, bin_path, tarball, build_id, polar_signals_api_token 285 ) 286 287 288def _upload_debug_info_to_polarsignals( 289 repo: mzbuild.Repository, dbg_path: Path | str, polar_signals_api_token: str 290) -> None: 291 print(f"Uploading debuginfo for {dbg_path} to Polar Signals...") 292 spawn.run_with_retries( 293 lambda: spawn.runv( 294 [ 295 "parca-debuginfo", 296 "upload", 297 "--store-address=grpc.polarsignals.com:443", 298 "--no-extract", 299 dbg_path, 300 ], 301 cwd=repo.rd.root, 302 env=dict(os.environ, PARCA_DEBUGINFO_BEARER_TOKEN=polar_signals_api_token), 303 ) 304 ) 305 306 307def _create_source_tarball( 308 repo: mzbuild.Repository, bin_path: Path | str, tarball: _TemporaryFileWrapper 309) -> None: 310 print(f"Constructing source tarball for {bin_path}...") 311 p1 = subprocess.Popen( 312 ["llvm-dwarfdump", "--show-sources", bin_path], 313 stdout=subprocess.PIPE, 314 ) 315 p2 = subprocess.Popen( 316 [ 317 "tar", 318 "-cf", 319 tarball.name, 320 "--zstd", 321 "-T", 322 "-", 323 "--ignore-failed-read", 324 ], 325 stdin=p1.stdout, 326 # Suppress noisy warnings about missing files. 327 stdout=subprocess.DEVNULL, 328 stderr=subprocess.DEVNULL, 329 ) 330 331 # This causes p1 to receive SIGPIPE if p2 exits early, 332 # like in the shell. 333 assert p1.stdout 334 p1.stdout.close() 335 336 for p in [p1, p2]: 337 if p.wait(): 338 raise subprocess.CalledProcessError(p.returncode, p.args) 339 340 341def _upload_source_tarball_to_polarsignals( 342 repo: mzbuild.Repository, 343 bin_path: Path | str, 344 tarball: _TemporaryFileWrapper, 345 build_id: str, 346 polar_signals_api_token: str, 347) -> bool: 348 print(f"Uploading source tarball for {bin_path} to Polar Signals...") 349 output = spawn.run_with_retries( 350 lambda: spawn.capture( 351 [ 352 "parca-debuginfo", 353 "upload", 354 "--store-address=grpc.polarsignals.com:443", 355 "--type=sources", 356 f"--build-id={build_id}", 357 tarball.name, 358 ], 359 cwd=repo.rd.root, 360 env=dict( 361 os.environ, 362 PARCA_DEBUGINFO_BEARER_TOKEN=polar_signals_api_token, 363 ), 364 ).strip() 365 ) 366 367 if "Skipping upload of" in output: 368 return False 369 370 return True 371 372 373if __name__ == "__main__": 374 main()
DEBUGINFO_URL =
'https://debuginfo.dev.materialize.com'
def
main() -> None:
39def main() -> None: 40 parser = argparse.ArgumentParser( 41 prog="upload_debug_symbols_to_polarsignals", 42 description="""Upload debug symbols to Polar Signals.""", 43 ) 44 parser.add_argument( 45 "--arch", 46 help="the architecture of the binaries to upload", 47 choices=[str(Arch.X86_64), str(Arch.AARCH64)], 48 default=str(Arch.host()), 49 ) 50 parser.add_argument( 51 "--protocol", 52 help="the source for downloading debug symbols", 53 choices=["http", "s3"], 54 default="s3", 55 ) 56 parser.add_argument( 57 "--token", 58 help="the Polar Signals API token", 59 default=os.getenv("POLAR_SIGNALS_API_TOKEN"), 60 ) 61 parser.add_argument( 62 "--build-id", 63 help="directly fetch and upload debug symbols for a specific build ID, skipping docker image resolution", 64 ) 65 66 parser.add_argument( 67 "--release", 68 action="store_true", 69 help="Use release build", 70 default=os.getenv("CI_LTO"), 71 ) 72 args = parser.parse_intermixed_args() 73 74 coverage = ui.env_is_truthy("CI_COVERAGE_ENABLED") 75 sanitizer = Sanitizer[os.getenv("CI_SANITIZER", "none")] 76 77 repo = mzbuild.Repository( 78 Path("."), 79 coverage=coverage, 80 sanitizer=sanitizer, 81 arch=Arch(args.arch), 82 profile=mzbuild.Profile.RELEASE if args.release else mzbuild.Profile.OPTIMIZED, 83 image_registry="materialize", 84 ) 85 86 if args.build_id: 87 upload_debug_data_by_build_id(repo, args.build_id, args.protocol, args.token) 88 else: 89 collect_and_upload_debug_data_to_polarsignals( 90 repo, DEBUGINFO_BINS, args.protocol, args.token 91 )
def
upload_debug_data_by_build_id( repo: materialize.mzbuild.Repository, build_id: str, protocol: str, polar_signals_api_token: str) -> None:
94def upload_debug_data_by_build_id( 95 repo: mzbuild.Repository, 96 build_id: str, 97 protocol: str, 98 polar_signals_api_token: str, 99) -> None: 100 """Fetch debug symbols by build ID and upload to Polar Signals.""" 101 ui.section(f"Uploading debug data for build ID {build_id} to PolarSignals...") 102 103 if protocol == "s3": 104 bin_path, dbg_path = fetch_debug_symbols_from_s3(build_id) 105 elif protocol == "http": 106 bin_path, dbg_path = fetch_debug_symbols_from_http(build_id) 107 else: 108 raise ValueError(f"Unknown protocol: {protocol}") 109 print(f"Fetched debug symbols for build ID {build_id} from {protocol}") 110 111 upload_completed = upload_debug_data_to_polarsignals( 112 repo, build_id, bin_path, dbg_path, polar_signals_api_token 113 ) 114 if upload_completed: 115 print(f"Uploaded debug symbols for build ID {build_id} to PolarSignals") 116 else: 117 print(f"Did not upload debug symbols for build ID {build_id} to PolarSignals")
Fetch debug symbols by build ID and upload to Polar Signals.
def
collect_and_upload_debug_data_to_polarsignals( repo: materialize.mzbuild.Repository, debuginfo_bins: set[str], protocol: str, polar_signals_api_token: str) -> None:
120def collect_and_upload_debug_data_to_polarsignals( 121 repo: mzbuild.Repository, 122 debuginfo_bins: set[str], 123 protocol: str, 124 polar_signals_api_token: str, 125) -> None: 126 ui.section("Collecting and uploading debug data to PolarSignals...") 127 128 relevant_images_by_name = get_build_images(repo, debuginfo_bins) 129 print(f"Considered images are: {relevant_images_by_name.keys()}") 130 131 for image_name, image in relevant_images_by_name.items(): 132 remove_docker_container_if_exists(image_name) 133 container_name = create_docker_container(image_name, image) 134 print( 135 f"Created docker container from image {image_name} (spec: {image.spec()})" 136 ) 137 138 path_to_binary = copy_binary_from_image(image_name, container_name) 139 print(f"Copied binary from image {image_name}") 140 141 build_id = get_build_id(repo, path_to_binary) 142 print(f"{image_name} has build_id {build_id}") 143 144 if protocol == "s3": 145 bin_path, dbg_path = fetch_debug_symbols_from_s3(build_id) 146 elif protocol == "http": 147 bin_path, dbg_path = fetch_debug_symbols_from_http(build_id) 148 else: 149 raise ValueError(f"Unknown protocol: {protocol}") 150 print(f"Fetched debug symbols of {image_name} from {protocol}") 151 152 upload_completed = upload_debug_data_to_polarsignals( 153 repo, build_id, bin_path, dbg_path, polar_signals_api_token 154 ) 155 if upload_completed: 156 print(f"Uploaded debug symbols of {image_name} to PolarSignals") 157 else: 158 print(f"Did not upload debug symbols of {image_name} to PolarSignals")
def
get_build_images( repo: materialize.mzbuild.Repository, image_names: set[str]) -> dict[str, materialize.mzbuild.ResolvedImage]:
161def get_build_images( 162 repo: mzbuild.Repository, image_names: set[str] 163) -> dict[str, ResolvedImage]: 164 relevant_images = [] 165 for image_name, image in repo.images.items(): 166 if image_name in image_names: 167 relevant_images.append(image) 168 169 dependency_set = repo.resolve_dependencies(relevant_images) 170 171 resolved_images = dict() 172 for image_name in image_names: 173 resolved_images[image_name] = dependency_set[image_name] 174 175 return resolved_images
def
remove_docker_container_if_exists(image_name: str) -> None:
def
create_docker_container(image_name: str, image: materialize.mzbuild.ResolvedImage) -> str:
185def create_docker_container(image_name: str, image: ResolvedImage) -> str: 186 try: 187 image_spec = image.spec() 188 docker_container_name = image_name 189 command = ["docker", "create", "--name", docker_container_name, image_spec] 190 subprocess.run(command, check=True) 191 return docker_container_name 192 except subprocess.CalledProcessError as e: 193 if "manifest unknown" in str(e): 194 raise RuntimeError(f"Docker image not found: {image.spec()}") 195 print(f"Error creating docker container: {e}") 196 raise e
def
copy_binary_from_image(image_name: str, docker_container_name: str) -> str:
199def copy_binary_from_image(image_name: str, docker_container_name: str) -> str: 200 try: 201 source_path = f"/usr/local/bin/{image_name}" 202 target_path = f"./{image_name}" 203 command = [ 204 "docker", 205 "cp", 206 f"{docker_container_name}:{source_path}", 207 target_path, 208 ] 209 subprocess.run(command, check=True) 210 211 return target_path 212 except subprocess.CalledProcessError as e: 213 print(f"Error copying file: {e}") 214 raise e
def
get_build_id(repo: materialize.mzbuild.Repository, path_to_binary: str) -> str:
def
fetch_debug_symbols_from_http(build_id: str) -> tuple[str, str]:
226def fetch_debug_symbols_from_http(build_id: str) -> tuple[str, str]: 227 file_names = [ 228 "executable", 229 "debuginfo", 230 ] 231 232 downloaded_file_paths = dict() 233 234 for file_name in file_names: 235 key = f"buildid/{build_id}/{file_name}" 236 target_file_name = key.replace("/", "_") 237 print( 238 f"Downloading {file_name} from {DEBUGINFO_URL}/{key} to {target_file_name}" 239 ) 240 241 urllib.request.urlretrieve(f"{DEBUGINFO_URL}/{key}", target_file_name) 242 243 downloaded_file_paths[file_name] = target_file_name 244 245 return downloaded_file_paths["executable"], downloaded_file_paths["debuginfo"]
def
fetch_debug_symbols_from_s3(build_id: str) -> tuple[str, str]:
248def fetch_debug_symbols_from_s3(build_id: str) -> tuple[str, str]: 249 s3 = boto3.client("s3") 250 251 file_names = [ 252 "executable", 253 "debuginfo", 254 ] 255 256 downloaded_file_paths = dict() 257 258 for file_name in file_names: 259 key = f"buildid/{build_id}/{file_name}" 260 target_file_name = key.replace("/", "_") 261 print( 262 f"Downloading {file_name} from s3://{DEBUGINFO_S3_BUCKET}/{key} to {target_file_name}" 263 ) 264 265 with open(target_file_name, "wb") as data: 266 s3.download_fileobj(DEBUGINFO_S3_BUCKET, key, data) 267 268 downloaded_file_paths[file_name] = target_file_name 269 270 return downloaded_file_paths["executable"], downloaded_file_paths["debuginfo"]
def
upload_debug_data_to_polarsignals( repo: materialize.mzbuild.Repository, build_id: str, bin_path: pathlib._local.Path | str, dbg_path: pathlib._local.Path | str, polar_signals_api_token: str) -> bool:
273def upload_debug_data_to_polarsignals( 274 repo: Repository, 275 build_id: str, 276 bin_path: Path | str, 277 dbg_path: Path | str, 278 polar_signals_api_token: str, 279) -> bool: 280 _upload_debug_info_to_polarsignals(repo, dbg_path, polar_signals_api_token) 281 282 with tempfile.NamedTemporaryFile() as tarball: 283 _create_source_tarball(repo, bin_path, tarball) 284 return _upload_source_tarball_to_polarsignals( 285 repo, bin_path, tarball, build_id, polar_signals_api_token 286 )