Skip to content

Commit 2ddb42c

Browse files
[CI] Fix GPU reset (#10085)
#9546 and similar accidentally broke code that was responsible to reset Intel GPU before running End-to-End tests. Restore functionality + add reset for the pre-commit task running End-to-End tests with a nightly build.
1 parent 2f36ea8 commit 2ddb42c

File tree

3 files changed

+13
-1
lines changed

3 files changed

+13
-1
lines changed

.github/workflows/linux_matrix_e2e_on_nightly.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,23 @@ jobs:
2525
extra_cmake_args: -DHIP_PLATFORM="AMD" -DAMD_ARCH="gfx1031"
2626
extra_image_opts: --device=/dev/kfd
2727
target_devices: all
28+
reset_gpu: false
2829

2930
- name: Intel
3031
runner: '["Linux", "gen9"]'
3132
image: ghcr.io/intel/llvm/sycl_ubuntu2204_nightly:latest
3233
extra_cmake_args:
3334
extra_image_opts: -u 1001
3435
target_devices: all
36+
reset_gpu: true
3537

3638
- name: ESIMD Emu
3739
runner: '["Linux", "x86-cpu"]'
3840
image: ghcr.io/intel/llvm/sycl_ubuntu2204_nightly:latest
3941
extra_cmake_args:
4042
extra_image_opts: -u 1001
4143
target_devices: ext_intel_esimd_emulator:gpu
44+
reset_gpu: false
4245
uses: ./.github/workflows/linux_single_e2e_on_nightly.yml
4346
with:
4447
name: ${{ matrix.name }}
@@ -48,6 +51,7 @@ jobs:
4851
extra_image_opts: ${{ matrix.extra_image_opts }}
4952
target_devices: ${{ matrix.target_devices }}
5053
ref: ${{ inputs.ref }}
54+
reset_gpu: ${{ matrix.reset_gpu }}
5155

5256
aws_start:
5357
name: AWS Start
@@ -69,6 +73,7 @@ jobs:
6973
extra_image_opts: --gpus all
7074
target_devices: all
7175
ref: ${{ inputs.ref }}
76+
reset_gpu: false
7277

7378
aws_stop:
7479
name: AWS Stop

.github/workflows/linux_single_e2e_on_nightly.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ on:
1717
type: string
1818
ref:
1919
type: string
20+
reset_gpu:
21+
type: string
2022

2123
jobs:
2224
lin_e2e_only:
@@ -26,6 +28,11 @@ jobs:
2628
image: ${{ inputs.image }}
2729
options: --device=/dev/dri --privileged --cap-add SYS_ADMIN ${{ inputs.extra_image_opts }}
2830
steps:
31+
- name: Reset GPU
32+
if: inputs.reset_gpu == 'true'
33+
run: |
34+
sudo mount -t debugfs none /sys/kernel/debug
35+
sudo bash -c 'echo 1 > /sys/kernel/debug/dri/0/i915_wedged'
2936
- uses: actions/checkout@v3
3037
with:
3138
path: llvm

.github/workflows/sycl_linux_build_and_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ jobs:
211211
options: ${{ matrix.container_options }}
212212
steps:
213213
- name: Reset GPU
214-
if: ${{ contains(matrix.config, 'gen9') }}
214+
if: ${{ contains(matrix.runs-on, 'gen9') && contains(matrix.runs-on, 'Linux') }}
215215
run: |
216216
sudo mount -t debugfs none /sys/kernel/debug
217217
sudo bash -c 'echo 1 > /sys/kernel/debug/dri/0/i915_wedged'

0 commit comments

Comments
 (0)