-
Notifications
You must be signed in to change notification settings - Fork 171
CI: Add Windows GPU runner for tests #444
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
29 commits
Select commit
Hold shift + click to select a range
7134ed9
add simple windows runner
leofang a7d188e
try this
leofang f878192
shrik build matrix for now
leofang 79d0651
duplicate workflow for windows
leofang 3b9b631
fix conflicts
leofang b96c246
try to use default (power) shell on windows runner
leofang f5066ad
Merge branch 'main' into win_ci
ksimpson-work 6e01fec
Merge branch 'main' into win_ci
leofang 1d3aaf7
use bash shell too
leofang 5f0db6d
add driver update step + start rewriting to ps1
leofang 5dff197
custom driver installation + debug
leofang 30bfedb
update driver version for using on different VM
leofang d1e6e57
avoid Resolve-Path; add gh install step
leofang f9f0cb3
restore job dependency
leofang 76e1025
port the remaining steps to PS too
leofang f82c893
try to fix extras
leofang f9807bc
avoid using our own fetch_ctk since it's using bash
leofang a8d3c4f
fix typo
leofang 9fcb968
fix typo again
leofang c1e8564
fixes
leofang ec9be19
force evaluation
leofang 87c68e3
resume driver install
leofang 367050b
clean up and add MINI_CTK_DEPS
leofang ee0be11
fix nvvm tests with local CTK
leofang b677dbd
debug
leofang eb224ec
it does not seem we need to escape
leofang ba01e17
Merge branch 'main' into win_ci
leofang 792745b
fix again...
leofang 176c94d
clean up for review
leofang File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#Requires -RunAsAdministrator | ||
|
||
# Install the driver | ||
function Install-Driver { | ||
|
||
# Set the correct URL, filename, and arguments to the installer | ||
# This driver is picked to support Windows 11 & CUDA 12.8 | ||
$url = 'https://us.download.nvidia.com/tesla/572.13/572.13-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe'; | ||
$file_dir = 'C:\NVIDIA-Driver\572.13-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe'; | ||
$install_args = '/s /noeula /noreboot'; | ||
|
||
# Create the folder for the driver download | ||
if (!(Test-Path -Path 'C:\NVIDIA-Driver')) { | ||
New-Item -Path 'C:\' -Name 'NVIDIA-Driver' -ItemType 'directory' | Out-Null | ||
} | ||
|
||
# Download the file to a specified directory | ||
# Disabling progress bar due to https://github.com/GoogleCloudPlatform/compute-gpu-installation/issues/29 | ||
$ProgressPreference_tmp = $ProgressPreference | ||
$ProgressPreference = 'SilentlyContinue' | ||
Write-Output 'Downloading the driver installer...' | ||
Invoke-WebRequest $url -OutFile $file_dir | ||
$ProgressPreference = $ProgressPreference_tmp | ||
Write-Output 'Download complete!' | ||
|
||
# Install the file with the specified path from earlier as well as the RunAs admin option | ||
Write-Output 'Running the driver installer...' | ||
Start-Process -FilePath $file_dir -ArgumentList $install_args -Wait | ||
Write-Output 'Done!' | ||
} | ||
|
||
# Run the functions | ||
Install-Driver |
File renamed without changes.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,222 @@ | ||
name: "CI: Test wheels" | ||
|
||
on: | ||
workflow_call: | ||
inputs: | ||
host-platform: | ||
type: string | ||
required: true | ||
python-version: | ||
type: string | ||
required: true | ||
build-ctk-ver: | ||
type: string | ||
required: true | ||
cuda-version: | ||
type: string | ||
required: true | ||
local-ctk: | ||
type: string | ||
required: true | ||
runner: | ||
type: string | ||
required: true | ||
|
||
jobs: | ||
test: | ||
# The build stage could fail but we want the CI to keep moving. | ||
if: ${{ github.repository_owner == 'nvidia' && !cancelled() }} | ||
runs-on: ${{ (inputs.runner == 'default' && inputs.host-platform == 'win-64' && 'cuda-python-windows-gpu-github') }} | ||
steps: | ||
- name: Checkout ${{ github.event.repository.name }} | ||
uses: actions/checkout@v4 | ||
with: | ||
fetch-depth: 0 | ||
|
||
- name: Update driver | ||
run: | | ||
.github/workflows/install_gpu_driver.ps1 | ||
|
||
- name: Ensure GPU is working | ||
run: nvidia-smi | ||
|
||
- name: Set environment variables | ||
run: | | ||
$PYTHON_VERSION_FORMATTED = '${{ inputs.python-version }}' -replace '\.' | ||
$REPO_DIR = $PWD.Path | ||
|
||
$BUILD_CUDA_MAJOR = '${{ inputs.build-ctk-ver }}' -split '\.' | Select-Object -First 1 | ||
$TEST_CUDA_MAJOR = '${{ inputs.cuda-version }}' -split '\.' | Select-Object -First 1 | ||
if ($BUILD_CUDA_MAJOR -ne $TEST_CUDA_MAJOR) { | ||
$SKIP_CUDA_BINDINGS_TEST = 1 | ||
} else { | ||
$SKIP_CUDA_BINDINGS_TEST = 0 | ||
} | ||
|
||
if ('${{ inputs.local-ctk }}' -eq '1') { | ||
if ($TEST_CUDA_MAJOR -eq '12') { | ||
$MINI_CTK_DEPS = '["nvcc", "nvrtc", "nvjitlink"]' | ||
} else { | ||
$MINI_CTK_DEPS = '["nvcc", "nvrtc"]' | ||
} | ||
} | ||
|
||
# Make outputs from the previous job as env vars | ||
$CUDA_CORE_ARTIFACT_BASENAME = "cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ inputs.host-platform }}" | ||
"PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}" >> $env:GITHUB_ENV | ||
"CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $env:GITHUB_ENV | ||
"CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $env:GITHUB_ENV | ||
"CUDA_CORE_ARTIFACTS_DIR=$($ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath("$REPO_DIR\cuda_core\dist"))" >> $env:GITHUB_ENV | ||
$CUDA_BINDINGS_ARTIFACT_BASENAME = "cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ inputs.build-ctk-ver }}-${{ inputs.host-platform }}" | ||
"CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $env:GITHUB_ENV | ||
"CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $env:GITHUB_ENV | ||
"CUDA_BINDINGS_ARTIFACTS_DIR=$($ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath("$REPO_DIR\cuda_bindings\dist"))" >> $env:GITHUB_ENV | ||
"SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $env:GITHUB_ENV | ||
"MINI_CTK_DEPS=${MINI_CTK_DEPS}" >> $env:GITHUB_ENV | ||
|
||
- name: Download cuda-python build artifacts | ||
if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0'}} | ||
uses: actions/download-artifact@v4 | ||
with: | ||
name: cuda-python-wheel | ||
path: . | ||
|
||
- name: Download cuda.bindings build artifacts | ||
if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0'}} | ||
uses: actions/download-artifact@v4 | ||
with: | ||
name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} | ||
path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} | ||
|
||
- name: Install gh cli | ||
# the GPU runner image does not have gh pre-installed... | ||
env: | ||
# doesn't seem there's an easy way to avoid hard-coding it? | ||
GH_MSI_URL: https://github.com/cli/cli/releases/download/v2.67.0/gh_2.67.0_windows_amd64.msi | ||
run: | | ||
Invoke-WebRequest -Uri "$env:GH_MSI_URL" -OutFile "gh_installer.msi" | ||
Start-Process msiexec.exe -Wait -Verbose -ArgumentList '/i "gh_installer.msi" /qn' | ||
$GH_POSSIBLE_PATHS = "C:\\Program Files\\GitHub CLI", "C:\\Program Files (x86)\\GitHub CLI" | ||
foreach ($p in $GH_POSSIBLE_PATHS) { | ||
echo "$p" >> $env:GITHUB_PATH | ||
$env:Path += ";$p" | ||
} | ||
gh --version | ||
|
||
- name: Download cuda-python & cuda.bindings build artifacts from the prior branch | ||
if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '1'}} | ||
env: | ||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
run: | | ||
$OLD_BRANCH = Get-Content .github/BACKPORT_BRANCH | ||
$OLD_BASENAME = "cuda-bindings-python${env:PYTHON_VERSION_FORMATTED}-cuda*-${{ inputs.host-platform }}*" | ||
$runData = gh run list -b $OLD_BRANCH -L 1 -w "CI: Build and test" -s completed -R NVIDIA/cuda-python --json databaseId | ConvertFrom-Json | ||
if (-not $runData -or $runData.Length -eq 0 -or -not $runData[0].databaseId -or [string]::IsNullOrEmpty($runData[0].databaseId)) { | ||
Write-Host "LATEST_PRIOR_RUN_ID not found!" | ||
exit 1 | ||
} | ||
$LATEST_PRIOR_RUN_ID = $runData[0].databaseId | ||
|
||
gh run download $LATEST_PRIOR_RUN_ID -p $OLD_BASENAME -R NVIDIA/cuda-python | ||
Get-ChildItem -Path $OLD_BASENAME | ||
New-Item -Path "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}" -ItemType Directory -Force | ||
Move-Item -Path "$OLD_BASENAME/*.whl" -Destination "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}" | ||
Remove-Item -Path $OLD_BASENAME -Force | ||
|
||
gh run download $LATEST_PRIOR_RUN_ID -p cuda-python-wheel -R NVIDIA/cuda-python | ||
Get-ChildItem -Path cuda-python-wheel | ||
Move-Item -Path "cuda-python-wheel/*.whl" -Destination . | ||
Remove-Item -Path cuda-python-wheel -Force | ||
|
||
- name: Display structure of downloaded cuda-python artifacts | ||
run: | | ||
Get-Location | ||
Get-ChildItem -Recurse -Force | Select-Object Mode, LastWriteTime, Length, FullName | ||
|
||
- name: Display structure of downloaded cuda.bindings artifacts | ||
run: | | ||
Get-Location | ||
Get-ChildItem -Recurse -Force $env:CUDA_BINDINGS_ARTIFACTS_DIR | Select-Object Mode, LastWriteTime, Length, FullName | ||
|
||
- name: Download cuda.core build artifacts | ||
uses: actions/download-artifact@v4 | ||
with: | ||
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }} | ||
path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} | ||
|
||
- name: Display structure of downloaded cuda.core build artifacts | ||
run: | | ||
Get-Location | ||
Get-ChildItem -Recurse -Force $env:CUDA_CORE_ARTIFACTS_DIR | Select-Object Mode, LastWriteTime, Length, FullName | ||
|
||
- name: Set up Python ${{ inputs.python-version }} | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: ${{ inputs.python-version }} | ||
|
||
- name: Set up mini CTK | ||
if: ${{ inputs.local-ctk == '1' }} | ||
# Note: The GH-hosted Windows GPU runner does not have Git for Windows pre-installed, | ||
# so we cannot use our own fetch_ctk action unfortunately... | ||
uses: Jimver/[email protected] | ||
with: | ||
cuda: ${{ inputs.cuda-version }} | ||
method: 'network' | ||
sub-packages: ${{ env.MINI_CTK_DEPS }} | ||
|
||
- name: Update PATH | ||
if: ${{ inputs.local-ctk == '1' }} | ||
run: | | ||
# mimics actual CTK installation | ||
echo $PATH | ||
echo "$env:CUDA_PATH\nvvm\bin" >> $env:GITHUB_PATH | ||
|
||
- name: Run cuda.bindings tests | ||
if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} | ||
run: | | ||
Push-Location $env:CUDA_BINDINGS_ARTIFACTS_DIR | ||
if ('${{ inputs.local-ctk }}' -eq '1') { | ||
Get-ChildItem $env:CUDA_PATH | ||
echo $PATH | ||
pip install (Get-ChildItem -Filter *.whl).FullName | ||
} else { | ||
pip install "$((Get-ChildItem -Filter *.whl).FullName)[all]" | ||
} | ||
Pop-Location | ||
|
||
Push-Location ./cuda_bindings | ||
pip install -r requirements.txt | ||
pytest -rxXs tests/ | ||
# skip Cython tests for now | ||
Pop-Location | ||
|
||
- name: Run cuda.core tests | ||
run: | | ||
# If build/test majors match: cuda.bindings is installed in the previous step. | ||
# If mismatch: cuda.bindings is installed from the backport branch. | ||
if ($env:SKIP_CUDA_BINDINGS_TEST -eq '1') { | ||
Push-Location $env:CUDA_BINDINGS_ARTIFACTS_DIR | ||
if ('${{ inputs.local-ctk }}' -eq '1') { | ||
pip install (Get-ChildItem -Filter *.whl).FullName | ||
} else { | ||
pip install "$((Get-ChildItem -Filter *.whl).FullName)[all]" | ||
} | ||
Pop-Location | ||
} | ||
$TEST_CUDA_MAJOR = '${{ inputs.cuda-version }}' -split '\.' | Select-Object -First 1 | ||
Push-Location $env:CUDA_CORE_ARTIFACTS_DIR | ||
pip install "$((Get-ChildItem -Filter *.whl).FullName)[cu${TEST_CUDA_MAJOR}]" | ||
Pop-Location | ||
|
||
Push-Location ./cuda_core | ||
pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" | ||
pytest -rxXs tests/ | ||
Pop-Location | ||
|
||
- name: Ensure cuda-python installable | ||
run: | | ||
if ('${{ inputs.local-ctk }}' -eq '1') { | ||
pip install (Get-ChildItem -Filter cuda_python*.whl).FullName | ||
} else { | ||
pip install "$((Get-ChildItem -Filter cuda_python*.whl).FullName)[all]" | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.