misc.python.materialize.ci_util.upload_debug_symbols_to_polarsignals

  1# Copyright Materialize, Inc. and contributors. All rights reserved.
  2#
  3# Use of this software is governed by the Business Source License
  4# included in the LICENSE file at the root of this repository.
  5#
  6# As of the Change Date specified in that file, in accordance with
  7# the Business Source License, use of this software will be governed
  8# by the Apache License, Version 2.0.
  9
 10import argparse
 11import os
 12import subprocess
 13import tempfile
 14import urllib.request
 15from pathlib import Path
 16from tempfile import _TemporaryFileWrapper
 17
 18import boto3
 19
 20from materialize import mzbuild, spawn, ui
 21from materialize.ci_util.upload_debug_symbols_to_s3 import (
 22    DEBUGINFO_BINS,
 23    DEBUGINFO_S3_BUCKET,
 24)
 25from materialize.mzbuild import Repository, ResolvedImage
 26from materialize.rustc_flags import Sanitizer
 27from materialize.xcompile import Arch
 28
 29# Upload debuginfo and sources to Polar Signals (our continuous
 30# profiling provider).
 31# This script is only invoked for build tags. Polar Signals is
 32# expensive, so we don't want to upload development or unstable builds
 33# that won't ever be profiled by Polar Signals.
 34
 35DEBUGINFO_URL = "https://debuginfo.dev.materialize.com"
 36
 37
 38def main() -> None:
 39    parser = argparse.ArgumentParser(
 40        prog="upload_debug_symbols_to_polarsignals",
 41        description="""Upload debug symbols to Polar Signals.""",
 42    )
 43    parser.add_argument(
 44        "--arch",
 45        help="the architecture of the binaries to upload",
 46        choices=[str(Arch.X86_64), str(Arch.AARCH64)],
 47        default=str(Arch.host()),
 48    )
 49    parser.add_argument(
 50        "--protocol",
 51        help="the source for downloading debug symbols",
 52        choices=["http", "s3"],
 53        default="s3",
 54    )
 55    parser.add_argument(
 56        "--token",
 57        help="the Polar Signals API token",
 58        default=os.getenv("POLAR_SIGNALS_API_TOKEN"),
 59    )
 60    parser.add_argument(
 61        "--build-id",
 62        help="directly fetch and upload debug symbols for a specific build ID, skipping docker image resolution",
 63    )
 64
 65    parser.add_argument(
 66        "--release",
 67        action="store_true",
 68        help="Use release build",
 69        default=os.getenv("CI_LTO"),
 70    )
 71    args = parser.parse_intermixed_args()
 72
 73    coverage = ui.env_is_truthy("CI_COVERAGE_ENABLED")
 74    sanitizer = Sanitizer[os.getenv("CI_SANITIZER", "none")]
 75
 76    repo = mzbuild.Repository(
 77        Path("."),
 78        coverage=coverage,
 79        sanitizer=sanitizer,
 80        arch=Arch(args.arch),
 81        profile=mzbuild.Profile.RELEASE if args.release else mzbuild.Profile.OPTIMIZED,
 82        image_registry="materialize",
 83    )
 84
 85    if args.build_id:
 86        upload_debug_data_by_build_id(repo, args.build_id, args.protocol, args.token)
 87    else:
 88        collect_and_upload_debug_data_to_polarsignals(
 89            repo, DEBUGINFO_BINS, args.protocol, args.token
 90        )
 91
 92
 93def upload_debug_data_by_build_id(
 94    repo: mzbuild.Repository,
 95    build_id: str,
 96    protocol: str,
 97    polar_signals_api_token: str,
 98) -> None:
 99    """Fetch debug symbols by build ID and upload to Polar Signals."""
100    ui.section(f"Uploading debug data for build ID {build_id} to PolarSignals...")
101
102    if protocol == "s3":
103        bin_path, dbg_path = fetch_debug_symbols_from_s3(build_id)
104    elif protocol == "http":
105        bin_path, dbg_path = fetch_debug_symbols_from_http(build_id)
106    else:
107        raise ValueError(f"Unknown protocol: {protocol}")
108    print(f"Fetched debug symbols for build ID {build_id} from {protocol}")
109
110    upload_completed = upload_debug_data_to_polarsignals(
111        repo, build_id, bin_path, dbg_path, polar_signals_api_token
112    )
113    if upload_completed:
114        print(f"Uploaded debug symbols for build ID {build_id} to PolarSignals")
115    else:
116        print(f"Did not upload debug symbols for build ID {build_id} to PolarSignals")
117
118
119def collect_and_upload_debug_data_to_polarsignals(
120    repo: mzbuild.Repository,
121    debuginfo_bins: set[str],
122    protocol: str,
123    polar_signals_api_token: str,
124) -> None:
125    ui.section("Collecting and uploading debug data to PolarSignals...")
126
127    relevant_images_by_name = get_build_images(repo, debuginfo_bins)
128    print(f"Considered images are: {relevant_images_by_name.keys()}")
129
130    for image_name, image in relevant_images_by_name.items():
131        remove_docker_container_if_exists(image_name)
132        container_name = create_docker_container(image_name, image)
133        print(
134            f"Created docker container from image {image_name} (spec: {image.spec()})"
135        )
136
137        path_to_binary = copy_binary_from_image(image_name, container_name)
138        print(f"Copied binary from image {image_name}")
139
140        build_id = get_build_id(repo, path_to_binary)
141        print(f"{image_name} has build_id {build_id}")
142
143        if protocol == "s3":
144            bin_path, dbg_path = fetch_debug_symbols_from_s3(build_id)
145        elif protocol == "http":
146            bin_path, dbg_path = fetch_debug_symbols_from_http(build_id)
147        else:
148            raise ValueError(f"Unknown protocol: {protocol}")
149        print(f"Fetched debug symbols of {image_name} from {protocol}")
150
151        upload_completed = upload_debug_data_to_polarsignals(
152            repo, build_id, bin_path, dbg_path, polar_signals_api_token
153        )
154        if upload_completed:
155            print(f"Uploaded debug symbols of {image_name} to PolarSignals")
156        else:
157            print(f"Did not upload debug symbols of {image_name} to PolarSignals")
158
159
160def get_build_images(
161    repo: mzbuild.Repository, image_names: set[str]
162) -> dict[str, ResolvedImage]:
163    relevant_images = []
164    for image_name, image in repo.images.items():
165        if image_name in image_names:
166            relevant_images.append(image)
167
168    dependency_set = repo.resolve_dependencies(relevant_images)
169
170    resolved_images = dict()
171    for image_name in image_names:
172        resolved_images[image_name] = dependency_set[image_name]
173
174    return resolved_images
175
176
177def remove_docker_container_if_exists(image_name: str) -> None:
178    try:
179        subprocess.run(["docker", "rm", image_name], check=True)
180    except subprocess.CalledProcessError as e:
181        print(f"Removing container failed, ignoring: {e}")
182
183
184def create_docker_container(image_name: str, image: ResolvedImage) -> str:
185    try:
186        image_spec = image.spec()
187        docker_container_name = image_name
188        command = ["docker", "create", "--name", docker_container_name, image_spec]
189        subprocess.run(command, check=True)
190        return docker_container_name
191    except subprocess.CalledProcessError as e:
192        if "manifest unknown" in str(e):
193            raise RuntimeError(f"Docker image not found: {image.spec()}")
194        print(f"Error creating docker container: {e}")
195        raise e
196
197
198def copy_binary_from_image(image_name: str, docker_container_name: str) -> str:
199    try:
200        source_path = f"/usr/local/bin/{image_name}"
201        target_path = f"./{image_name}"
202        command = [
203            "docker",
204            "cp",
205            f"{docker_container_name}:{source_path}",
206            target_path,
207        ]
208        subprocess.run(command, check=True)
209
210        return target_path
211    except subprocess.CalledProcessError as e:
212        print(f"Error copying file: {e}")
213        raise e
214
215
216def get_build_id(repo: mzbuild.Repository, path_to_binary: str) -> str:
217    return spawn.run_with_retries(
218        lambda: spawn.capture(
219            ["parca-debuginfo", "buildid", path_to_binary],
220            cwd=repo.rd.root,
221        ).strip()
222    )
223
224
225def fetch_debug_symbols_from_http(build_id: str) -> tuple[str, str]:
226    file_names = [
227        "executable",
228        "debuginfo",
229    ]
230
231    downloaded_file_paths = dict()
232
233    for file_name in file_names:
234        key = f"buildid/{build_id}/{file_name}"
235        target_file_name = key.replace("/", "_")
236        print(
237            f"Downloading {file_name} from {DEBUGINFO_URL}/{key} to {target_file_name}"
238        )
239
240        urllib.request.urlretrieve(f"{DEBUGINFO_URL}/{key}", target_file_name)
241
242        downloaded_file_paths[file_name] = target_file_name
243
244    return downloaded_file_paths["executable"], downloaded_file_paths["debuginfo"]
245
246
247def fetch_debug_symbols_from_s3(build_id: str) -> tuple[str, str]:
248    s3 = boto3.client("s3")
249
250    file_names = [
251        "executable",
252        "debuginfo",
253    ]
254
255    downloaded_file_paths = dict()
256
257    for file_name in file_names:
258        key = f"buildid/{build_id}/{file_name}"
259        target_file_name = key.replace("/", "_")
260        print(
261            f"Downloading {file_name} from s3://{DEBUGINFO_S3_BUCKET}/{key} to {target_file_name}"
262        )
263
264        with open(target_file_name, "wb") as data:
265            s3.download_fileobj(DEBUGINFO_S3_BUCKET, key, data)
266
267        downloaded_file_paths[file_name] = target_file_name
268
269    return downloaded_file_paths["executable"], downloaded_file_paths["debuginfo"]
270
271
272def upload_debug_data_to_polarsignals(
273    repo: Repository,
274    build_id: str,
275    bin_path: Path | str,
276    dbg_path: Path | str,
277    polar_signals_api_token: str,
278) -> bool:
279    _upload_debug_info_to_polarsignals(repo, dbg_path, polar_signals_api_token)
280
281    with tempfile.NamedTemporaryFile() as tarball:
282        _create_source_tarball(repo, bin_path, tarball)
283        return _upload_source_tarball_to_polarsignals(
284            repo, bin_path, tarball, build_id, polar_signals_api_token
285        )
286
287
288def _upload_debug_info_to_polarsignals(
289    repo: mzbuild.Repository, dbg_path: Path | str, polar_signals_api_token: str
290) -> None:
291    print(f"Uploading debuginfo for {dbg_path} to Polar Signals...")
292    spawn.run_with_retries(
293        lambda: spawn.runv(
294            [
295                "parca-debuginfo",
296                "upload",
297                "--store-address=grpc.polarsignals.com:443",
298                "--no-extract",
299                dbg_path,
300            ],
301            cwd=repo.rd.root,
302            env=dict(os.environ, PARCA_DEBUGINFO_BEARER_TOKEN=polar_signals_api_token),
303        )
304    )
305
306
307def _create_source_tarball(
308    repo: mzbuild.Repository, bin_path: Path | str, tarball: _TemporaryFileWrapper
309) -> None:
310    print(f"Constructing source tarball for {bin_path}...")
311    p1 = subprocess.Popen(
312        ["llvm-dwarfdump", "--show-sources", bin_path],
313        stdout=subprocess.PIPE,
314    )
315    p2 = subprocess.Popen(
316        [
317            "tar",
318            "-cf",
319            tarball.name,
320            "--zstd",
321            "-T",
322            "-",
323            "--ignore-failed-read",
324        ],
325        stdin=p1.stdout,
326        # Suppress noisy warnings about missing files.
327        stdout=subprocess.DEVNULL,
328        stderr=subprocess.DEVNULL,
329    )
330
331    # This causes p1 to receive SIGPIPE if p2 exits early,
332    # like in the shell.
333    assert p1.stdout
334    p1.stdout.close()
335
336    for p in [p1, p2]:
337        if p.wait():
338            raise subprocess.CalledProcessError(p.returncode, p.args)
339
340
341def _upload_source_tarball_to_polarsignals(
342    repo: mzbuild.Repository,
343    bin_path: Path | str,
344    tarball: _TemporaryFileWrapper,
345    build_id: str,
346    polar_signals_api_token: str,
347) -> bool:
348    print(f"Uploading source tarball for {bin_path} to Polar Signals...")
349    output = spawn.run_with_retries(
350        lambda: spawn.capture(
351            [
352                "parca-debuginfo",
353                "upload",
354                "--store-address=grpc.polarsignals.com:443",
355                "--type=sources",
356                f"--build-id={build_id}",
357                tarball.name,
358            ],
359            cwd=repo.rd.root,
360            env=dict(
361                os.environ,
362                PARCA_DEBUGINFO_BEARER_TOKEN=polar_signals_api_token,
363            ),
364        ).strip()
365    )
366
367    if "Skipping upload of" in output:
368        return False
369
370    return True
371
372
373if __name__ == "__main__":
374    main()
DEBUGINFO_URL = 'https://debuginfo.dev.materialize.com'
def main() -> None:
39def main() -> None:
40    parser = argparse.ArgumentParser(
41        prog="upload_debug_symbols_to_polarsignals",
42        description="""Upload debug symbols to Polar Signals.""",
43    )
44    parser.add_argument(
45        "--arch",
46        help="the architecture of the binaries to upload",
47        choices=[str(Arch.X86_64), str(Arch.AARCH64)],
48        default=str(Arch.host()),
49    )
50    parser.add_argument(
51        "--protocol",
52        help="the source for downloading debug symbols",
53        choices=["http", "s3"],
54        default="s3",
55    )
56    parser.add_argument(
57        "--token",
58        help="the Polar Signals API token",
59        default=os.getenv("POLAR_SIGNALS_API_TOKEN"),
60    )
61    parser.add_argument(
62        "--build-id",
63        help="directly fetch and upload debug symbols for a specific build ID, skipping docker image resolution",
64    )
65
66    parser.add_argument(
67        "--release",
68        action="store_true",
69        help="Use release build",
70        default=os.getenv("CI_LTO"),
71    )
72    args = parser.parse_intermixed_args()
73
74    coverage = ui.env_is_truthy("CI_COVERAGE_ENABLED")
75    sanitizer = Sanitizer[os.getenv("CI_SANITIZER", "none")]
76
77    repo = mzbuild.Repository(
78        Path("."),
79        coverage=coverage,
80        sanitizer=sanitizer,
81        arch=Arch(args.arch),
82        profile=mzbuild.Profile.RELEASE if args.release else mzbuild.Profile.OPTIMIZED,
83        image_registry="materialize",
84    )
85
86    if args.build_id:
87        upload_debug_data_by_build_id(repo, args.build_id, args.protocol, args.token)
88    else:
89        collect_and_upload_debug_data_to_polarsignals(
90            repo, DEBUGINFO_BINS, args.protocol, args.token
91        )
def upload_debug_data_by_build_id( repo: materialize.mzbuild.Repository, build_id: str, protocol: str, polar_signals_api_token: str) -> None:
 94def upload_debug_data_by_build_id(
 95    repo: mzbuild.Repository,
 96    build_id: str,
 97    protocol: str,
 98    polar_signals_api_token: str,
 99) -> None:
100    """Fetch debug symbols by build ID and upload to Polar Signals."""
101    ui.section(f"Uploading debug data for build ID {build_id} to PolarSignals...")
102
103    if protocol == "s3":
104        bin_path, dbg_path = fetch_debug_symbols_from_s3(build_id)
105    elif protocol == "http":
106        bin_path, dbg_path = fetch_debug_symbols_from_http(build_id)
107    else:
108        raise ValueError(f"Unknown protocol: {protocol}")
109    print(f"Fetched debug symbols for build ID {build_id} from {protocol}")
110
111    upload_completed = upload_debug_data_to_polarsignals(
112        repo, build_id, bin_path, dbg_path, polar_signals_api_token
113    )
114    if upload_completed:
115        print(f"Uploaded debug symbols for build ID {build_id} to PolarSignals")
116    else:
117        print(f"Did not upload debug symbols for build ID {build_id} to PolarSignals")

Fetch debug symbols by build ID and upload to Polar Signals.

def collect_and_upload_debug_data_to_polarsignals( repo: materialize.mzbuild.Repository, debuginfo_bins: set[str], protocol: str, polar_signals_api_token: str) -> None:
120def collect_and_upload_debug_data_to_polarsignals(
121    repo: mzbuild.Repository,
122    debuginfo_bins: set[str],
123    protocol: str,
124    polar_signals_api_token: str,
125) -> None:
126    ui.section("Collecting and uploading debug data to PolarSignals...")
127
128    relevant_images_by_name = get_build_images(repo, debuginfo_bins)
129    print(f"Considered images are: {relevant_images_by_name.keys()}")
130
131    for image_name, image in relevant_images_by_name.items():
132        remove_docker_container_if_exists(image_name)
133        container_name = create_docker_container(image_name, image)
134        print(
135            f"Created docker container from image {image_name} (spec: {image.spec()})"
136        )
137
138        path_to_binary = copy_binary_from_image(image_name, container_name)
139        print(f"Copied binary from image {image_name}")
140
141        build_id = get_build_id(repo, path_to_binary)
142        print(f"{image_name} has build_id {build_id}")
143
144        if protocol == "s3":
145            bin_path, dbg_path = fetch_debug_symbols_from_s3(build_id)
146        elif protocol == "http":
147            bin_path, dbg_path = fetch_debug_symbols_from_http(build_id)
148        else:
149            raise ValueError(f"Unknown protocol: {protocol}")
150        print(f"Fetched debug symbols of {image_name} from {protocol}")
151
152        upload_completed = upload_debug_data_to_polarsignals(
153            repo, build_id, bin_path, dbg_path, polar_signals_api_token
154        )
155        if upload_completed:
156            print(f"Uploaded debug symbols of {image_name} to PolarSignals")
157        else:
158            print(f"Did not upload debug symbols of {image_name} to PolarSignals")
def get_build_images( repo: materialize.mzbuild.Repository, image_names: set[str]) -> dict[str, materialize.mzbuild.ResolvedImage]:
161def get_build_images(
162    repo: mzbuild.Repository, image_names: set[str]
163) -> dict[str, ResolvedImage]:
164    relevant_images = []
165    for image_name, image in repo.images.items():
166        if image_name in image_names:
167            relevant_images.append(image)
168
169    dependency_set = repo.resolve_dependencies(relevant_images)
170
171    resolved_images = dict()
172    for image_name in image_names:
173        resolved_images[image_name] = dependency_set[image_name]
174
175    return resolved_images
def remove_docker_container_if_exists(image_name: str) -> None:
178def remove_docker_container_if_exists(image_name: str) -> None:
179    try:
180        subprocess.run(["docker", "rm", image_name], check=True)
181    except subprocess.CalledProcessError as e:
182        print(f"Removing container failed, ignoring: {e}")
def create_docker_container(image_name: str, image: materialize.mzbuild.ResolvedImage) -> str:
185def create_docker_container(image_name: str, image: ResolvedImage) -> str:
186    try:
187        image_spec = image.spec()
188        docker_container_name = image_name
189        command = ["docker", "create", "--name", docker_container_name, image_spec]
190        subprocess.run(command, check=True)
191        return docker_container_name
192    except subprocess.CalledProcessError as e:
193        if "manifest unknown" in str(e):
194            raise RuntimeError(f"Docker image not found: {image.spec()}")
195        print(f"Error creating docker container: {e}")
196        raise e
def copy_binary_from_image(image_name: str, docker_container_name: str) -> str:
199def copy_binary_from_image(image_name: str, docker_container_name: str) -> str:
200    try:
201        source_path = f"/usr/local/bin/{image_name}"
202        target_path = f"./{image_name}"
203        command = [
204            "docker",
205            "cp",
206            f"{docker_container_name}:{source_path}",
207            target_path,
208        ]
209        subprocess.run(command, check=True)
210
211        return target_path
212    except subprocess.CalledProcessError as e:
213        print(f"Error copying file: {e}")
214        raise e
def get_build_id(repo: materialize.mzbuild.Repository, path_to_binary: str) -> str:
217def get_build_id(repo: mzbuild.Repository, path_to_binary: str) -> str:
218    return spawn.run_with_retries(
219        lambda: spawn.capture(
220            ["parca-debuginfo", "buildid", path_to_binary],
221            cwd=repo.rd.root,
222        ).strip()
223    )
def fetch_debug_symbols_from_http(build_id: str) -> tuple[str, str]:
226def fetch_debug_symbols_from_http(build_id: str) -> tuple[str, str]:
227    file_names = [
228        "executable",
229        "debuginfo",
230    ]
231
232    downloaded_file_paths = dict()
233
234    for file_name in file_names:
235        key = f"buildid/{build_id}/{file_name}"
236        target_file_name = key.replace("/", "_")
237        print(
238            f"Downloading {file_name} from {DEBUGINFO_URL}/{key} to {target_file_name}"
239        )
240
241        urllib.request.urlretrieve(f"{DEBUGINFO_URL}/{key}", target_file_name)
242
243        downloaded_file_paths[file_name] = target_file_name
244
245    return downloaded_file_paths["executable"], downloaded_file_paths["debuginfo"]
def fetch_debug_symbols_from_s3(build_id: str) -> tuple[str, str]:
248def fetch_debug_symbols_from_s3(build_id: str) -> tuple[str, str]:
249    s3 = boto3.client("s3")
250
251    file_names = [
252        "executable",
253        "debuginfo",
254    ]
255
256    downloaded_file_paths = dict()
257
258    for file_name in file_names:
259        key = f"buildid/{build_id}/{file_name}"
260        target_file_name = key.replace("/", "_")
261        print(
262            f"Downloading {file_name} from s3://{DEBUGINFO_S3_BUCKET}/{key} to {target_file_name}"
263        )
264
265        with open(target_file_name, "wb") as data:
266            s3.download_fileobj(DEBUGINFO_S3_BUCKET, key, data)
267
268        downloaded_file_paths[file_name] = target_file_name
269
270    return downloaded_file_paths["executable"], downloaded_file_paths["debuginfo"]
def upload_debug_data_to_polarsignals( repo: materialize.mzbuild.Repository, build_id: str, bin_path: pathlib._local.Path | str, dbg_path: pathlib._local.Path | str, polar_signals_api_token: str) -> bool:
273def upload_debug_data_to_polarsignals(
274    repo: Repository,
275    build_id: str,
276    bin_path: Path | str,
277    dbg_path: Path | str,
278    polar_signals_api_token: str,
279) -> bool:
280    _upload_debug_info_to_polarsignals(repo, dbg_path, polar_signals_api_token)
281
282    with tempfile.NamedTemporaryFile() as tarball:
283        _create_source_tarball(repo, bin_path, tarball)
284        return _upload_source_tarball_to_polarsignals(
285            repo, bin_path, tarball, build_id, polar_signals_api_token
286        )