misc.python.materialize.ci_util.upload_debug_symbols_to_polarsignals

  1# Copyright Materialize, Inc. and contributors. All rights reserved.
  2#
  3# Use of this software is governed by the Business Source License
  4# included in the LICENSE file at the root of this repository.
  5#
  6# As of the Change Date specified in that file, in accordance with
  7# the Business Source License, use of this software will be governed
  8# by the Apache License, Version 2.0.
  9
 10import argparse
 11import os
 12import subprocess
 13import tempfile
 14import urllib.request
 15from pathlib import Path
 16from tempfile import _TemporaryFileWrapper
 17
 18import boto3
 19
 20from materialize import mzbuild, spawn, ui
 21from materialize.ci_util.upload_debug_symbols_to_s3 import (
 22    DEBUGINFO_BINS,
 23    DEBUGINFO_S3_BUCKET,
 24)
 25from materialize.mzbuild import Repository, ResolvedImage
 26from materialize.rustc_flags import Sanitizer
 27from materialize.xcompile import Arch
 28
 29# Upload debuginfo and sources to Polar Signals (our continuous
 30# profiling provider).
 31# This script is only invoked for build tags. Polar Signals is
 32# expensive, so we don't want to upload development or unstable builds
 33# that won't ever be profiled by Polar Signals.
 34
 35DEBUGINFO_URL = "https://debuginfo.dev.materialize.com"
 36
 37DEFAULT_TOKENS = [
 38    t
 39    for t in (
 40        os.getenv("POLAR_SIGNALS_API_TOKEN"),
 41        os.getenv("POLAR_SIGNALS_SELF_MANAGED_1_API_TOKEN"),
 42    )
 43    if t is not None
 44]
 45
 46
 47def main() -> None:
 48    parser = argparse.ArgumentParser(
 49        prog="upload_debug_symbols_to_polarsignals",
 50        description="""Upload debug symbols to Polar Signals.""",
 51    )
 52    parser.add_argument(
 53        "--arch",
 54        help="the architecture of the binaries to upload",
 55        choices=[str(Arch.X86_64), str(Arch.AARCH64)],
 56        default=str(Arch.host()),
 57    )
 58    parser.add_argument(
 59        "--protocol",
 60        help="the source for downloading debug symbols",
 61        choices=["http", "s3"],
 62        default="s3",
 63    )
 64    parser.add_argument(
 65        "--token",
 66        action="append",
 67        help="the Polar Signals API token",
 68        default=DEFAULT_TOKENS,
 69    )
 70    parser.add_argument(
 71        "--build-id",
 72        help="directly fetch and upload debug symbols for a specific build ID, skipping docker image resolution",
 73    )
 74
 75    parser.add_argument(
 76        "--release",
 77        action="store_true",
 78        help="Use release build",
 79        default=os.getenv("CI_LTO"),
 80    )
 81    args = parser.parse_intermixed_args()
 82
 83    assert args.token, "Need at least one Polar Signals API token"
 84
 85    coverage = ui.env_is_truthy("CI_COVERAGE_ENABLED")
 86    sanitizer = Sanitizer[os.getenv("CI_SANITIZER", "none")]
 87
 88    repo = mzbuild.Repository(
 89        Path("."),
 90        coverage=coverage,
 91        sanitizer=sanitizer,
 92        arch=Arch(args.arch),
 93        profile=mzbuild.Profile.RELEASE if args.release else mzbuild.Profile.OPTIMIZED,
 94        image_registry="materialize",
 95    )
 96
 97    if args.build_id:
 98        upload_debug_data_by_build_id(repo, args.build_id, args.protocol, args.token)
 99    else:
100        collect_and_upload_debug_data_to_polarsignals(
101            repo, DEBUGINFO_BINS, args.protocol, args.token
102        )
103
104
105def upload_debug_data_by_build_id(
106    repo: mzbuild.Repository,
107    build_id: str,
108    protocol: str,
109    polar_signals_api_tokens: list[str],
110) -> None:
111    """Fetch debug symbols by build ID and upload to Polar Signals."""
112    ui.section(f"Uploading debug data for build ID {build_id} to PolarSignals...")
113
114    if protocol == "s3":
115        bin_path, dbg_path = fetch_debug_symbols_from_s3(build_id)
116    elif protocol == "http":
117        bin_path, dbg_path = fetch_debug_symbols_from_http(build_id)
118    else:
119        raise ValueError(f"Unknown protocol: {protocol}")
120    print(f"Fetched debug symbols for build ID {build_id} from {protocol}")
121
122    upload_completed = upload_debug_data_to_polarsignals(
123        repo, build_id, bin_path, dbg_path, polar_signals_api_tokens
124    )
125    if upload_completed:
126        print(f"Uploaded debug symbols for build ID {build_id} to PolarSignals")
127    else:
128        print(f"Did not upload debug symbols for build ID {build_id} to PolarSignals")
129
130
131def collect_and_upload_debug_data_to_polarsignals(
132    repo: mzbuild.Repository,
133    debuginfo_bins: set[str],
134    protocol: str,
135    polar_signals_api_tokens: list[str],
136) -> None:
137    ui.section("Collecting and uploading debug data to PolarSignals...")
138
139    relevant_images_by_name = get_build_images(repo, debuginfo_bins)
140    print(f"Considered images are: {relevant_images_by_name.keys()}")
141
142    for image_name, image in relevant_images_by_name.items():
143        remove_docker_container_if_exists(image_name)
144        container_name = create_docker_container(image_name, image)
145        print(
146            f"Created docker container from image {image_name} (spec: {image.spec()})"
147        )
148
149        path_to_binary = copy_binary_from_image(image_name, container_name)
150        print(f"Copied binary from image {image_name}")
151
152        build_id = get_build_id(repo, path_to_binary)
153        print(f"{image_name} has build_id {build_id}")
154
155        if protocol == "s3":
156            bin_path, dbg_path = fetch_debug_symbols_from_s3(build_id)
157        elif protocol == "http":
158            bin_path, dbg_path = fetch_debug_symbols_from_http(build_id)
159        else:
160            raise ValueError(f"Unknown protocol: {protocol}")
161        print(f"Fetched debug symbols of {image_name} from {protocol}")
162
163        upload_completed = upload_debug_data_to_polarsignals(
164            repo, build_id, bin_path, dbg_path, polar_signals_api_tokens
165        )
166        if upload_completed:
167            print(f"Uploaded debug symbols of {image_name} to PolarSignals")
168        else:
169            print(f"Did not upload debug symbols of {image_name} to PolarSignals")
170
171
172def get_build_images(
173    repo: mzbuild.Repository, image_names: set[str]
174) -> dict[str, ResolvedImage]:
175    relevant_images = []
176    for image_name, image in repo.images.items():
177        if image_name in image_names:
178            relevant_images.append(image)
179
180    dependency_set = repo.resolve_dependencies(relevant_images)
181
182    resolved_images = dict()
183    for image_name in image_names:
184        resolved_images[image_name] = dependency_set[image_name]
185
186    return resolved_images
187
188
189def remove_docker_container_if_exists(image_name: str) -> None:
190    try:
191        subprocess.run(["docker", "rm", image_name], check=True)
192    except subprocess.CalledProcessError as e:
193        print(f"Removing container failed, ignoring: {e}")
194
195
196def create_docker_container(image_name: str, image: ResolvedImage) -> str:
197    try:
198        image_spec = image.spec()
199        docker_container_name = image_name
200        command = ["docker", "create", "--name", docker_container_name, image_spec]
201        subprocess.run(command, check=True)
202        return docker_container_name
203    except subprocess.CalledProcessError as e:
204        if "manifest unknown" in str(e):
205            raise RuntimeError(f"Docker image not found: {image.spec()}")
206        print(f"Error creating docker container: {e}")
207        raise e
208
209
210def copy_binary_from_image(image_name: str, docker_container_name: str) -> str:
211    try:
212        source_path = f"/usr/local/bin/{image_name}"
213        target_path = f"./{image_name}"
214        command = [
215            "docker",
216            "cp",
217            f"{docker_container_name}:{source_path}",
218            target_path,
219        ]
220        subprocess.run(command, check=True)
221
222        return target_path
223    except subprocess.CalledProcessError as e:
224        print(f"Error copying file: {e}")
225        raise e
226
227
228def get_build_id(repo: mzbuild.Repository, path_to_binary: str) -> str:
229    return spawn.run_with_retries(
230        lambda: spawn.capture(
231            ["parca-debuginfo", "buildid", path_to_binary],
232            cwd=repo.rd.root,
233        ).strip()
234    )
235
236
237def fetch_debug_symbols_from_http(build_id: str) -> tuple[str, str]:
238    file_names = [
239        "executable",
240        "debuginfo",
241    ]
242
243    downloaded_file_paths = dict()
244
245    for file_name in file_names:
246        key = f"buildid/{build_id}/{file_name}"
247        target_file_name = key.replace("/", "_")
248        print(
249            f"Downloading {file_name} from {DEBUGINFO_URL}/{key} to {target_file_name}"
250        )
251
252        urllib.request.urlretrieve(f"{DEBUGINFO_URL}/{key}", target_file_name)
253
254        downloaded_file_paths[file_name] = target_file_name
255
256    return downloaded_file_paths["executable"], downloaded_file_paths["debuginfo"]
257
258
259def fetch_debug_symbols_from_s3(build_id: str) -> tuple[str, str]:
260    s3 = boto3.client("s3")
261
262    file_names = [
263        "executable",
264        "debuginfo",
265    ]
266
267    downloaded_file_paths = dict()
268
269    for file_name in file_names:
270        key = f"buildid/{build_id}/{file_name}"
271        target_file_name = key.replace("/", "_")
272        print(
273            f"Downloading {file_name} from s3://{DEBUGINFO_S3_BUCKET}/{key} to {target_file_name}"
274        )
275
276        with open(target_file_name, "wb") as data:
277            s3.download_fileobj(DEBUGINFO_S3_BUCKET, key, data)
278
279        downloaded_file_paths[file_name] = target_file_name
280
281    return downloaded_file_paths["executable"], downloaded_file_paths["debuginfo"]
282
283
284def upload_debug_data_to_polarsignals(
285    repo: Repository,
286    build_id: str,
287    bin_path: Path | str,
288    dbg_path: Path | str,
289    polar_signals_api_tokens: list[str],
290) -> bool:
291    result = True
292    for token in polar_signals_api_tokens:
293        _upload_debug_info_to_polarsignals(repo, dbg_path, token)
294
295        with tempfile.NamedTemporaryFile() as tarball:
296            _create_source_tarball(repo, bin_path, tarball)
297            result = result or _upload_source_tarball_to_polarsignals(
298                repo, bin_path, tarball, build_id, token
299            )
300    return result
301
302
303def _upload_debug_info_to_polarsignals(
304    repo: mzbuild.Repository,
305    dbg_path: Path | str,
306    polar_signals_api_token: str,
307) -> None:
308    print(f"Uploading debuginfo for {dbg_path} to Polar Signals...")
309    spawn.run_with_retries(
310        lambda: spawn.runv(
311            [
312                "parca-debuginfo",
313                "upload",
314                "--store-address=grpc.polarsignals.com:443",
315                "--no-extract",
316                dbg_path,
317            ],
318            cwd=repo.rd.root,
319            env=dict(os.environ, PARCA_DEBUGINFO_BEARER_TOKEN=polar_signals_api_token),
320        )
321    )
322
323
324def _create_source_tarball(
325    repo: mzbuild.Repository, bin_path: Path | str, tarball: _TemporaryFileWrapper
326) -> None:
327    print(f"Constructing source tarball for {bin_path}...")
328    p1 = subprocess.Popen(
329        ["llvm-dwarfdump", "--show-sources", bin_path],
330        stdout=subprocess.PIPE,
331    )
332    p2 = subprocess.Popen(
333        [
334            "tar",
335            "-cf",
336            tarball.name,
337            "--zstd",
338            "-T",
339            "-",
340            "--ignore-failed-read",
341        ],
342        stdin=p1.stdout,
343        # Suppress noisy warnings about missing files.
344        stdout=subprocess.DEVNULL,
345        stderr=subprocess.DEVNULL,
346    )
347
348    # This causes p1 to receive SIGPIPE if p2 exits early,
349    # like in the shell.
350    assert p1.stdout
351    p1.stdout.close()
352
353    for p in [p1, p2]:
354        if p.wait():
355            raise subprocess.CalledProcessError(p.returncode, p.args)
356
357
358def _upload_source_tarball_to_polarsignals(
359    repo: mzbuild.Repository,
360    bin_path: Path | str,
361    tarball: _TemporaryFileWrapper,
362    build_id: str,
363    polar_signals_api_token: str,
364) -> bool:
365    print(f"Uploading source tarball for {bin_path} to Polar Signals...")
366    output = spawn.run_with_retries(
367        lambda: spawn.capture(
368            [
369                "parca-debuginfo",
370                "upload",
371                "--store-address=grpc.polarsignals.com:443",
372                "--type=sources",
373                f"--build-id={build_id}",
374                tarball.name,
375            ],
376            cwd=repo.rd.root,
377            env=dict(
378                os.environ,
379                PARCA_DEBUGINFO_BEARER_TOKEN=polar_signals_api_token,
380            ),
381        ).strip()
382    )
383
384    if "Skipping upload of" in output:
385        return False
386
387    return True
388
389
390if __name__ == "__main__":
391    main()
DEBUGINFO_URL = 'https://debuginfo.dev.materialize.com'
DEFAULT_TOKENS = []
def main() -> None:
 48def main() -> None:
 49    parser = argparse.ArgumentParser(
 50        prog="upload_debug_symbols_to_polarsignals",
 51        description="""Upload debug symbols to Polar Signals.""",
 52    )
 53    parser.add_argument(
 54        "--arch",
 55        help="the architecture of the binaries to upload",
 56        choices=[str(Arch.X86_64), str(Arch.AARCH64)],
 57        default=str(Arch.host()),
 58    )
 59    parser.add_argument(
 60        "--protocol",
 61        help="the source for downloading debug symbols",
 62        choices=["http", "s3"],
 63        default="s3",
 64    )
 65    parser.add_argument(
 66        "--token",
 67        action="append",
 68        help="the Polar Signals API token",
 69        default=DEFAULT_TOKENS,
 70    )
 71    parser.add_argument(
 72        "--build-id",
 73        help="directly fetch and upload debug symbols for a specific build ID, skipping docker image resolution",
 74    )
 75
 76    parser.add_argument(
 77        "--release",
 78        action="store_true",
 79        help="Use release build",
 80        default=os.getenv("CI_LTO"),
 81    )
 82    args = parser.parse_intermixed_args()
 83
 84    assert args.token, "Need at least one Polar Signals API token"
 85
 86    coverage = ui.env_is_truthy("CI_COVERAGE_ENABLED")
 87    sanitizer = Sanitizer[os.getenv("CI_SANITIZER", "none")]
 88
 89    repo = mzbuild.Repository(
 90        Path("."),
 91        coverage=coverage,
 92        sanitizer=sanitizer,
 93        arch=Arch(args.arch),
 94        profile=mzbuild.Profile.RELEASE if args.release else mzbuild.Profile.OPTIMIZED,
 95        image_registry="materialize",
 96    )
 97
 98    if args.build_id:
 99        upload_debug_data_by_build_id(repo, args.build_id, args.protocol, args.token)
100    else:
101        collect_and_upload_debug_data_to_polarsignals(
102            repo, DEBUGINFO_BINS, args.protocol, args.token
103        )
def upload_debug_data_by_build_id( repo: materialize.mzbuild.Repository, build_id: str, protocol: str, polar_signals_api_tokens: list[str]) -> None:
106def upload_debug_data_by_build_id(
107    repo: mzbuild.Repository,
108    build_id: str,
109    protocol: str,
110    polar_signals_api_tokens: list[str],
111) -> None:
112    """Fetch debug symbols by build ID and upload to Polar Signals."""
113    ui.section(f"Uploading debug data for build ID {build_id} to PolarSignals...")
114
115    if protocol == "s3":
116        bin_path, dbg_path = fetch_debug_symbols_from_s3(build_id)
117    elif protocol == "http":
118        bin_path, dbg_path = fetch_debug_symbols_from_http(build_id)
119    else:
120        raise ValueError(f"Unknown protocol: {protocol}")
121    print(f"Fetched debug symbols for build ID {build_id} from {protocol}")
122
123    upload_completed = upload_debug_data_to_polarsignals(
124        repo, build_id, bin_path, dbg_path, polar_signals_api_tokens
125    )
126    if upload_completed:
127        print(f"Uploaded debug symbols for build ID {build_id} to PolarSignals")
128    else:
129        print(f"Did not upload debug symbols for build ID {build_id} to PolarSignals")

Fetch debug symbols by build ID and upload to Polar Signals.

def collect_and_upload_debug_data_to_polarsignals( repo: materialize.mzbuild.Repository, debuginfo_bins: set[str], protocol: str, polar_signals_api_tokens: list[str]) -> None:
132def collect_and_upload_debug_data_to_polarsignals(
133    repo: mzbuild.Repository,
134    debuginfo_bins: set[str],
135    protocol: str,
136    polar_signals_api_tokens: list[str],
137) -> None:
138    ui.section("Collecting and uploading debug data to PolarSignals...")
139
140    relevant_images_by_name = get_build_images(repo, debuginfo_bins)
141    print(f"Considered images are: {relevant_images_by_name.keys()}")
142
143    for image_name, image in relevant_images_by_name.items():
144        remove_docker_container_if_exists(image_name)
145        container_name = create_docker_container(image_name, image)
146        print(
147            f"Created docker container from image {image_name} (spec: {image.spec()})"
148        )
149
150        path_to_binary = copy_binary_from_image(image_name, container_name)
151        print(f"Copied binary from image {image_name}")
152
153        build_id = get_build_id(repo, path_to_binary)
154        print(f"{image_name} has build_id {build_id}")
155
156        if protocol == "s3":
157            bin_path, dbg_path = fetch_debug_symbols_from_s3(build_id)
158        elif protocol == "http":
159            bin_path, dbg_path = fetch_debug_symbols_from_http(build_id)
160        else:
161            raise ValueError(f"Unknown protocol: {protocol}")
162        print(f"Fetched debug symbols of {image_name} from {protocol}")
163
164        upload_completed = upload_debug_data_to_polarsignals(
165            repo, build_id, bin_path, dbg_path, polar_signals_api_tokens
166        )
167        if upload_completed:
168            print(f"Uploaded debug symbols of {image_name} to PolarSignals")
169        else:
170            print(f"Did not upload debug symbols of {image_name} to PolarSignals")
def get_build_images( repo: materialize.mzbuild.Repository, image_names: set[str]) -> dict[str, materialize.mzbuild.ResolvedImage]:
173def get_build_images(
174    repo: mzbuild.Repository, image_names: set[str]
175) -> dict[str, ResolvedImage]:
176    relevant_images = []
177    for image_name, image in repo.images.items():
178        if image_name in image_names:
179            relevant_images.append(image)
180
181    dependency_set = repo.resolve_dependencies(relevant_images)
182
183    resolved_images = dict()
184    for image_name in image_names:
185        resolved_images[image_name] = dependency_set[image_name]
186
187    return resolved_images
def remove_docker_container_if_exists(image_name: str) -> None:
190def remove_docker_container_if_exists(image_name: str) -> None:
191    try:
192        subprocess.run(["docker", "rm", image_name], check=True)
193    except subprocess.CalledProcessError as e:
194        print(f"Removing container failed, ignoring: {e}")
def create_docker_container(image_name: str, image: materialize.mzbuild.ResolvedImage) -> str:
197def create_docker_container(image_name: str, image: ResolvedImage) -> str:
198    try:
199        image_spec = image.spec()
200        docker_container_name = image_name
201        command = ["docker", "create", "--name", docker_container_name, image_spec]
202        subprocess.run(command, check=True)
203        return docker_container_name
204    except subprocess.CalledProcessError as e:
205        if "manifest unknown" in str(e):
206            raise RuntimeError(f"Docker image not found: {image.spec()}")
207        print(f"Error creating docker container: {e}")
208        raise e
def copy_binary_from_image(image_name: str, docker_container_name: str) -> str:
211def copy_binary_from_image(image_name: str, docker_container_name: str) -> str:
212    try:
213        source_path = f"/usr/local/bin/{image_name}"
214        target_path = f"./{image_name}"
215        command = [
216            "docker",
217            "cp",
218            f"{docker_container_name}:{source_path}",
219            target_path,
220        ]
221        subprocess.run(command, check=True)
222
223        return target_path
224    except subprocess.CalledProcessError as e:
225        print(f"Error copying file: {e}")
226        raise e
def get_build_id(repo: materialize.mzbuild.Repository, path_to_binary: str) -> str:
229def get_build_id(repo: mzbuild.Repository, path_to_binary: str) -> str:
230    return spawn.run_with_retries(
231        lambda: spawn.capture(
232            ["parca-debuginfo", "buildid", path_to_binary],
233            cwd=repo.rd.root,
234        ).strip()
235    )
def fetch_debug_symbols_from_http(build_id: str) -> tuple[str, str]:
238def fetch_debug_symbols_from_http(build_id: str) -> tuple[str, str]:
239    file_names = [
240        "executable",
241        "debuginfo",
242    ]
243
244    downloaded_file_paths = dict()
245
246    for file_name in file_names:
247        key = f"buildid/{build_id}/{file_name}"
248        target_file_name = key.replace("/", "_")
249        print(
250            f"Downloading {file_name} from {DEBUGINFO_URL}/{key} to {target_file_name}"
251        )
252
253        urllib.request.urlretrieve(f"{DEBUGINFO_URL}/{key}", target_file_name)
254
255        downloaded_file_paths[file_name] = target_file_name
256
257    return downloaded_file_paths["executable"], downloaded_file_paths["debuginfo"]
def fetch_debug_symbols_from_s3(build_id: str) -> tuple[str, str]:
260def fetch_debug_symbols_from_s3(build_id: str) -> tuple[str, str]:
261    s3 = boto3.client("s3")
262
263    file_names = [
264        "executable",
265        "debuginfo",
266    ]
267
268    downloaded_file_paths = dict()
269
270    for file_name in file_names:
271        key = f"buildid/{build_id}/{file_name}"
272        target_file_name = key.replace("/", "_")
273        print(
274            f"Downloading {file_name} from s3://{DEBUGINFO_S3_BUCKET}/{key} to {target_file_name}"
275        )
276
277        with open(target_file_name, "wb") as data:
278            s3.download_fileobj(DEBUGINFO_S3_BUCKET, key, data)
279
280        downloaded_file_paths[file_name] = target_file_name
281
282    return downloaded_file_paths["executable"], downloaded_file_paths["debuginfo"]
def upload_debug_data_to_polarsignals( repo: materialize.mzbuild.Repository, build_id: str, bin_path: pathlib._local.Path | str, dbg_path: pathlib._local.Path | str, polar_signals_api_tokens: list[str]) -> bool:
285def upload_debug_data_to_polarsignals(
286    repo: Repository,
287    build_id: str,
288    bin_path: Path | str,
289    dbg_path: Path | str,
290    polar_signals_api_tokens: list[str],
291) -> bool:
292    result = True
293    for token in polar_signals_api_tokens:
294        _upload_debug_info_to_polarsignals(repo, dbg_path, token)
295
296        with tempfile.NamedTemporaryFile() as tarball:
297            _create_source_tarball(repo, bin_path, tarball)
298            result = result or _upload_source_tarball_to_polarsignals(
299                repo, bin_path, tarball, build_id, token
300            )
301    return result