Skip to content

Commit 6ee5a60

Browse files
author
Pavel Chupin
authored
[CI] Add CUDA on AWS run in pre-commit (#7846)
Reintroduce the change #7790 Add CUDA on AWS in addition to CUDA on self-hosted runner. Self-hosted runner will be turned off for OS upgrade. Note that pre-commit testing on this PR will test nothing due to pull_request_target trigger. This change has to be merged first and we'll see effect on the other pre-commit PRs where we expect both self-hosted and AWS run done in parallel. Tested on #7806
1 parent f23c3d6 commit 6ee5a60

File tree

5 files changed

+32
-6
lines changed

5 files changed

+32
-6
lines changed

.github/workflows/sycl_linux_build_and_test.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,9 @@ jobs:
195195

196196
llvm_test_suite:
197197
needs: [build, aws-start]
198-
if: ${{ !failure() && inputs.lts_matrix != '[]' }}
198+
# Continue if build was successful. If aws-start is not successful all
199+
# AWS tasks will fail, but all non-AWS tasks should continue.
200+
if: ${{ always() && needs.build.result == 'success' && inputs.lts_matrix != '[]' }}
199201
strategy:
200202
fail-fast: false
201203
max-parallel: ${{ inputs.max_parallel }}
@@ -242,7 +244,7 @@ jobs:
242244
check_sycl_all: ${{ matrix.check_sycl_all }}
243245
results_name_suffix: ${{ matrix.config }}_${{ inputs.build_artifact_suffix }}
244246
cmake_args: '${{ matrix.cmake_args }} ${{ inputs.lts_cmake_extra_args }}'
245-
247+
246248
khronos_sycl_cts:
247249
needs: build
248250
if: ${{ inputs.cts_matrix != '' }}
@@ -293,6 +295,8 @@ jobs:
293295
aws-stop:
294296
name: Stop AWS
295297
needs: [ aws-start, llvm_test_suite ]
298+
# Always attempt to shutdown AWS instance, even if AWS start was not
299+
# successful.
296300
if: ${{ always() && inputs.lts_aws_matrix != '[]' }}
297301
runs-on: ubuntu-latest
298302
environment: aws

.github/workflows/sycl_precommit.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ jobs:
4848
uses: ./.github/workflows/sycl_gen_test_matrix.yml
4949
with:
5050
ref: ${{ github.event.pull_request.head.sha }}
51-
lts_config: "hip_amdgpu;ocl_x64;ocl_gen9;l0_gen9;esimd_emu;cuda"
51+
lts_config: "hip_amdgpu;ocl_x64;ocl_gen9;l0_gen9;esimd_emu;cuda;cuda_aws"
5252

5353
linux_default:
5454
name: Linux

devops/actions/aws-ec2/action.yml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,15 @@ inputs:
3434
# groupadd -g 1001 gh_runner; useradd gh_runner -u 1001 -g 1001 -m -s /bin/bash; usermod -aG docker gh_runner; usermod -aG video gh_runner
3535
# sync; shutdown -h now
3636

37+
# us-east-1 region:
38+
39+
# ami-01cb0573cb039ab24 (for g5 instances): NVIDIA GPU-Optimized AMI 22.06.0-676eed8d-dcf5-4784-87d7-0de463205c17 (ami-003f25e6e2d2db8f1 with /dev/sda1 disk) with docker and and gh_runner (1001)
40+
# sudo -s
41+
# groupadd -g 1001 gh_runner; useradd gh_runner -u 1001 -g 1001 -m -s /bin/bash; usermod -aG docker gh_runner; usermod -aG video gh_runner
42+
# sync; shutdown -h now
43+
44+
# ami-058347ad2ce9aef73: ami-02ec0f344128253f9 copy in us-east-1 region
45+
3746
# aws-spot: Enable usage of spot instances to save money (less reliable). Makes sense only for start mode. Default true.
3847
# aws-disk: AWS EC2 instance AMI specific disk device path and size in GB (8 by default). Makes sense only for start mode. Default "/dev/sda1:16".
3948
# aws-timebomp: AWS EC2 instance maximum live time. Makes sense only for start mode. Default "1h".
@@ -59,7 +68,7 @@ inputs:
5968
aws-region:
6069
description: "AWS EC2 region"
6170
required: false
62-
default: "us-east-2" # Ohio
71+
default: "us-east-1" # North Virginia
6372

6473
runs:
6574
using: node16

devops/actions/aws-ec2/aws-ec2.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ async function start(param_type, param_label, param_ami, param_spot, param_disk,
4141
const ec2types = typeof param_type === 'string' ? [ param_type ] : param_type;
4242
const label = typeof param_label === 'string' ? param_label : param_label[0];
4343
const ec2ami = typeof param_ami !== 'undefined' ? param_ami : "ami-0966bccbb521ccb24";
44-
const ec2spot = typeof param_spot !== 'undefined' ? param_spot : true;
44+
const ec2spot = typeof param_spot !== 'undefined' ? (param_spot === "false" ? false : true) : true;
4545
const ec2disk = typeof param_disk !== 'undefined' ? param_disk : "/dev/sda1:16";
4646
const timebomb = typeof param_timebomb !== 'undefined' ? param_timebomb : "1h";
47-
const onejob = typeof param_onejob !== 'undefined' ? param_onejob : true;
47+
const onejob = typeof param_onejob !== 'undefined' ? (param_onejob === "false" ? false : true) : true;
4848
// ephemeral runner will exit after one job so we will terminate instance sooner
4949
const ephemeral_str = onejob ? "--ephemeral" : "";
5050

devops/test_configs.json

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,19 @@
7171
"container_options": "--gpus all",
7272
"check_sycl_all": "cuda:gpu",
7373
"cmake_args": ""
74+
},
75+
{
76+
"config": "cuda_aws",
77+
"name": "[AWS] CUDA LLVM Test Suite",
78+
"runs-on": "aws-cuda_${{ inputs.uniq }}",
79+
"aws-ami": "ami-01cb0573cb039ab24",
80+
"aws-type": [ "g5.2xlarge", "g5.4xlarge" ],
81+
"aws-disk": "/dev/sda1:64",
82+
"aws-spot": "false",
83+
"image": "${{ inputs.cuda_image }}",
84+
"container_options": "--gpus all",
85+
"check_sycl_all": "cuda:gpu",
86+
"cmake_args": ""
7487
}
7588
],
7689
"cts": [

0 commit comments

Comments
 (0)