Skip to content

Commit bf96991

Browse files
authored
[CI] Move CUDA AWS job to nightly (#14845)
There have been no issues with the self-hosted runners over the last 1+ week since we enabled them (July 19), and the only test failures on them have been actual product issues. Removing this job saves resources and developer time waiting for CI (self-hosted runners are faster). --------- Signed-off-by: Sarnie, Nick <[email protected]>
1 parent 0382be0 commit bf96991

File tree

3 files changed

+41
-17
lines changed

3 files changed

+41
-17
lines changed

.github/workflows/sycl-aws.yml

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,18 @@ on:
1414
# See devops/actions/aws-ec2/action.yml for more details.
1515
description: "JSON string with array of objects with aws-type, runs-on, aws-ami, aws-spot, aws-disk, aws-timebomb, one-job properties"
1616
type: string
17-
required: true
17+
default: '[{"runs-on":"aws_cuda-${{ github.run_id }}-${{ github.run_attempt }}","aws-ami":"ami-01cb0573cb039ab24","aws-type":["g5.2xlarge","g5.4xlarge"],"aws-disk":"/dev/sda1:64","aws-spot":"false"}]'
1818

1919
jobs:
2020
aws:
2121
runs-on: ubuntu-20.04
2222
environment: aws
2323
steps:
24-
- name: Setup script
25-
run: |
26-
mkdir -p ./aws-ec2
27-
wget raw.githubusercontent.com/intel/llvm/sycl/devops/actions/aws-ec2/action.yml -P ./aws-ec2
28-
wget raw.githubusercontent.com/intel/llvm/sycl/devops/actions/aws-ec2/aws-ec2.js -P ./aws-ec2
29-
wget raw.githubusercontent.com/intel/llvm/sycl/devops/actions/aws-ec2/package.json -P ./aws-ec2
30-
npm install ./aws-ec2
31-
- name: Start AWS EC2 runners
32-
uses: ./aws-ec2
24+
- uses: actions/checkout@v4
25+
with:
26+
sparse-checkout: devops/actions/aws-ec2
27+
- run: npm install ./devops/actions/aws-ec2
28+
- uses: ./devops/actions/aws-ec2
3329
with:
3430
mode: ${{ inputs.mode }}
3531
runs-on-list: ${{ inputs.runs-on-list }}

.github/workflows/sycl-linux-precommit-aws.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
name: E2E on AWS CUDA
22
run-name: E2E on AWS CUDA - ${{ github.event.workflow_run.display_title }}
3+
# Note: This workflow is currently disabled in the Github Actions UI because
4+
# we run CUDA testing on the self-hosted runners.
35
# We have to keep pre-commit AWS CUDA testing in a separate workflow because we
46
# need access to AWS secret and that isn't available on pull_request jobs for
57
# PRs from forks. And github's "require approval for all outside collaborators"

.github/workflows/sycl-nightly.yml

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,6 @@ jobs:
3737
target_devices: ext_oneapi_hip:gpu
3838
tests_selector: e2e
3939

40-
- name: NVIDIA/CUDA
41-
runner: '["Linux", "cuda"]'
42-
image: ghcr.io/intel/llvm/ubuntu2204_build:latest
43-
image_options: -u 1001 --gpus all --cap-add SYS_ADMIN
44-
target_devices: ext_oneapi_cuda:gpu
45-
tests_selector: e2e
46-
4740
- name: Intel L0 GPU
4841
runner: '["Linux", "gen12"]'
4942
image: ghcr.io/intel/llvm/ubuntu2204_intel_drivers:latest
@@ -135,6 +128,39 @@ jobs:
135128
sycl_toolchain_archive: ${{ needs.build-win.outputs.artifact_archive_name }}
136129
extra_lit_opts: --param gpu-intel-gen12=True
137130

131+
cuda-aws-start:
132+
needs: [ubuntu2204_build]
133+
if: ${{ always() && !cancelled() && needs.ubuntu2204_build.outputs.build_conclusion == 'success' }}
134+
uses: ./.github/workflows/sycl-aws.yml
135+
secrets: inherit
136+
with:
137+
mode: start
138+
139+
cuda-run-tests:
140+
needs: [ubuntu2204_build, cuda-aws-start]
141+
if: ${{ always() && !cancelled() && needs.ubuntu2204_build.outputs.build_conclusion == 'success' }}
142+
uses: ./.github/workflows/sycl-linux-run-tests.yml
143+
with:
144+
name: CUDA E2E
145+
runner: '["aws_cuda-${{ github.run_id }}-${{ github.run_attempt }}"]'
146+
image: ghcr.io/intel/llvm/ubuntu2204_build:latest-0300ac924620a51f76c4929794637b82790f12ab
147+
image_options: -u 1001 --gpus all --cap-add SYS_ADMIN --env NVIDIA_DISABLE_REQUIRE=1
148+
target_devices: ext_oneapi_cuda:gpu
149+
ref: ${{ github.sha }}
150+
merge_ref: ''
151+
152+
sycl_toolchain_artifact: sycl_linux_default
153+
sycl_toolchain_archive: ${{ needs.ubuntu2204_build.outputs.artifact_archive_name }}
154+
sycl_toolchain_decompress_command: ${{ needs.ubuntu2204_build.outputs.artifact_decompress_command }}
155+
156+
cuda-aws-stop:
157+
needs: [cuda-aws-start, cuda-run-tests]
158+
if: always()
159+
uses: ./.github/workflows/sycl-aws.yml
160+
secrets: inherit
161+
with:
162+
mode: stop
163+
138164
nightly_build_upload:
139165
name: Nightly Build Upload
140166
if: ${{ github.ref_name == 'sycl' }}

0 commit comments

Comments
 (0)