From 66cb3e29aed1bfbf173002a6f836a1fc1ec113af Mon Sep 17 00:00:00 2001 From: romintomasetti Date: Tue, 21 Oct 2025 14:08:47 +0000 Subject: [PATCH] cicd: don't use emulation anymore Signed-off-by: romintomasetti --- .github/workflows/build.yml | 10 ++---- .github/workflows/strategy.py | 65 ++++++++++++++++++++++++++--------- 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 54c7f320..863c2851 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -174,7 +174,7 @@ jobs: build-images: needs: [setup-job-matrix, build-images-image, setup-skips] - runs-on: [self-hosted, linux, docker, amd64] + runs-on: ${{ matrix.build-images.runs-on }} container: image: ghcr.io/${{ github.repository }}/build-images:latest services: @@ -191,8 +191,6 @@ jobs: if: ${{ ! failure() && ! cancelled() && needs.setup-skips.outputs.build-images == 'true' }} steps: - uses: actions/checkout@v5 - with: - set-safe-directory: true - uses: docker/login-action@v3 with: @@ -200,14 +198,12 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - uses: docker/setup-qemu-action@v3 - - uses: docker/setup-buildx-action@v3 - uses: docker/build-push-action@v6.18.0 with: context: . - platforms: ${{ matrix.build_platforms }} + platforms: ${{ matrix.platform }} push: ${{ github.ref == 'refs/heads/main' || (github.event_name == 'workflow_dispatch' && github.event.inputs['build-images']) }} file: docker/dockerfile tags: ${{ matrix.image }} @@ -225,7 +221,7 @@ jobs: - uses: docker/build-push-action@v6.18.0 with: context: . - platforms: ${{ matrix.build_platforms }} + platforms: ${{ matrix.platform }} push: ${{ github.ref == 'refs/heads/main' || (github.event_name == 'workflow_dispatch' && github.event.inputs['build-images']) }} file: docker/dockerfile.kokkos tags: ${{ matrix.kokkos }} diff --git a/.github/workflows/strategy.py b/.github/workflows/strategy.py index 33a13c27..15711f04 100644 --- a/.github/workflows/strategy.py +++ b/.github/workflows/strategy.py @@ -38,14 +38,23 @@ class Compiler: path : typing.Optional[str] = None @typeguard.typechecked -def full_image(*, name : str, tag : str, args : argparse.Namespace) -> str: +def full_image(*, name : str, tag : str, platform : str, args : argparse.Namespace) -> str: """ Full image from its `name` and `tag`, with remote. + + For now, `linux/arm64` are suffixed with `-arm64`, and we don't build a manifest for multi-arch images. """ - return f'{args.registry}/{args.repository}/{name}:{tag}' + value = f'{args.registry}/{args.repository}/{name}:{tag}' + match platform: + case 'linux/amd64': + return value + case 'linux/arm64': + return value + '-arm64' + case _: + raise ValueError(f'unsupported platform {platform!r}') @typeguard.typechecked -def complete_job(partial : dict, args : argparse.Namespace) -> dict: +def complete_job_impl(*, partial : dict, args : argparse.Namespace) -> dict: """ Add fields to a job. """ @@ -93,11 +102,8 @@ def complete_job(partial : dict, args : argparse.Namespace) -> dict: partial['nvidia_arch'] = str(arch) partial['base_image'] = f'{base_name}:{base_tag}@{base_digest}' - partial[ 'image'] = full_image(name = name, tag = base_tag, args = args) - partial[ 'kokkos'] = full_image(name = f'{name}-kokkos', tag = f'{base_tag}-{arch}'.lower(), args = args) - - partial['build_platforms'] = ','.join(['linux/amd64'] + partial['additional_build_platforms'] if 'additional_build_platforms' in partial else []) - + partial[ 'image'] = full_image(name = name, tag = base_tag, platform = partial['platform'], args = args) + partial[ 'kokkos'] = full_image(name = f'{name}-kokkos', tag = f'{base_tag}-{arch}'.lower(), platform = partial['platform'], args = args) # Write compilers as dictionaries. for lang in partial['compilers']: @@ -108,7 +114,8 @@ def complete_job(partial : dict, args : argparse.Namespace) -> dict: # Specifics to the 'tests', 'examples' and 'install-as-package-and-test' jobs. # Testing is opt-out. - if 'tests' not in partial or partial['tests']: + # We only test for 'linux/amd64'. + if ('tests' not in partial or partial['tests']) and partial['platform'] == 'linux/amd64': partial['tests' ] = {'container' : {'image' : partial['image']}} partial['examples' ] = {'container' : {'image' : partial['kokkos']}} partial['install-as-package-and-test'] = {'container' : {'image' : partial['kokkos']}} @@ -147,6 +154,26 @@ def complete_job(partial : dict, args : argparse.Namespace) -> dict: return partial +@typeguard.typechecked +def complete_job(partial : dict, args : argparse.Namespace) -> list[dict]: + """ + Each platform is a separate job, because multi-arch builds are too slow due to emulation. + """ + jobs = [] + + for platform in partial.pop('platforms'): + job = copy.deepcopy(partial) + job['platform'] = platform + + job = complete_job_impl(partial = job, args = args) + job['build-images'] = { + 'runs-on' : ['self-hosted', 'linux', 'docker', platform.split('/')[1]], + } + + jobs.append(job) + + return jobs + @typeguard.typechecked def main(*, args : argparse.Namespace) -> None: """ @@ -154,43 +181,47 @@ def main(*, args : argparse.Namespace) -> None: """ matrix = [] - matrix.append(complete_job({ + matrix.extend(complete_job({ 'cuda_version' : '12.8.1', 'compilers' : {'CXX' : Compiler(ID = 'gnu', version = '13'), 'CUDA' : Compiler(ID = 'nvidia')}, 'nvidia_compute_capability' : 70, - 'additional_build_platforms' : ['linux/arm64'], + 'platforms' : ['linux/amd64', 'linux/arm64'], }, args = args)) - matrix.append(complete_job({ + matrix.extend(complete_job({ 'cuda_version' : '13.0.0', 'compilers' : {'CXX' : Compiler(ID = 'gnu', version = '14'), 'CUDA' : Compiler(ID = 'nvidia')}, 'nvidia_compute_capability' : 120, - 'additional_build_platforms' : ['linux/arm64'], + 'platforms' : ['linux/amd64', 'linux/arm64'], }, args = args)) - matrix.append(complete_job({ + matrix.extend(complete_job({ 'cuda_version' : '13.0.0', 'compilers' : {'CXX' : Compiler(ID = 'gnu', version = '14'), 'CUDA' : Compiler(ID = 'nvidia')}, 'nvidia_compute_capability' : 86, + 'platforms' : ['linux/amd64'], 'tests' : True, }, args = args)) - matrix.append(complete_job({ + matrix.extend(complete_job({ 'cuda_version' : '12.8.1', 'compilers' : {'CXX' : Compiler(ID = 'clang', version = '19'), 'CUDA' : Compiler(ID = 'nvidia')}, 'nvidia_compute_capability' : 70, + 'platforms' : ['linux/amd64'], }, args = args)) - matrix.append(complete_job({ + matrix.extend(complete_job({ 'cuda_version' : '13.0.0', 'compilers' : {'CXX' : Compiler(ID = 'clang', version = '20'), 'CUDA' : Compiler(ID = 'nvidia')}, 'nvidia_compute_capability' : 120, + 'platforms' : ['linux/amd64'], }, args = args)) - matrix.append(complete_job({ + matrix.extend(complete_job({ 'cuda_version' : '12.8.1', 'compilers' : {'CXX' : Compiler(ID = 'clang', version = '21')}, 'nvidia_compute_capability' : 120, + 'platforms' : ['linux/amd64'], }, args = args)) logging.info(f'Strategy matrix:\n{pprint.pformat(matrix)}')