Skip to content

Commit 2622b1e

Browse files
committed
upload artifacts to GHA Cache when merged to main
1 parent aba2cd7 commit 2622b1e

File tree

1 file changed

+157
-19
lines changed

1 file changed

+157
-19
lines changed

.github/workflows/build-and-test.yml

Lines changed: 157 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ jobs:
4242
# (matrix.host-platform == 'win-64' && 'windows-amd64-cpu8') }}
4343
outputs:
4444
BUILD_CTK_VER: ${{ steps.pass_env.outputs.CUDA_VERSION }}
45+
defaults:
46+
run:
47+
shell: bash --noprofile --norc -xeuo pipefail {0}
4548
steps:
4649
- name: Checkout ${{ github.event.repository.name }}
4750
uses: actions/checkout@v4
@@ -62,7 +65,6 @@ jobs:
6265
uses: ilammy/msvc-dev-cmd@v1
6366

6467
- name: Set environment variables
65-
shell: bash --noprofile --norc -xeuo pipefail {0}
6668
run: |
6769
PYTHON_VERSION_FORMATTED=$(echo '${{ matrix.python-version }}' | tr -d '.')
6870
if [[ "${{ matrix.host-platform }}" == linux* ]]; then
@@ -75,14 +77,60 @@ jobs:
7577
fi
7678
7779
echo "PARALLEL_LEVEL=$(nproc)" >> $GITHUB_ENV
78-
echo "CUDA_CORE_ARTIFACT_NAME=cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
80+
CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}"
81+
echo "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $GITHUB_ENV
82+
echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
7983
echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV
80-
echo "CUDA_BINDINGS_ARTIFACT_NAME=cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ matrix.cuda-version }}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
84+
CUDA_BINDINGS_ARTIFACT_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ matrix.cuda-version }}-${{ matrix.host-platform }}"
85+
echo "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $GITHUB_ENV
86+
echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
8187
echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV
8288
echo "CIBW_BUILD=${CIBW_BUILD}" >> $GITHUB_ENV
83-
89+
90+
# When the CI is run due to merging to main, we want it to populate GHA Cache not Artifacts,
91+
# so that CI workflows running on every branch have a fallback to use.
92+
if [[ "${{ github.ref_name}}" == main ]]; then
93+
echo "USE_CACHE=1" >> $GITHUB_ENV
94+
else
95+
echo "USE_CACHE=0" >> $GITHUB_ENV
96+
fi
97+
98+
# TODO: revert me before merging; this is to test the cache restore in the PR
99+
echo "USE_CACHE=1" >> $GITHUB_ENV
100+
101+
- name: Install dependencies
102+
if: ${{ env.USE_CACHE == '1' }}
103+
run: |
104+
# For GHA Cache
105+
dependencies=(zstd)
106+
dependent_exes=(zstd)
107+
108+
not_found=0
109+
for dep in ${dependent_exes[@]}; do
110+
if ! (command -v curl 2>&1 >/dev/null); then
111+
not_found=1
112+
break
113+
fi
114+
done
115+
if [[ $not_found == 0 ]]; then
116+
echo "All dependencies are found. Do nothing."
117+
exit 0
118+
fi
119+
if ! (command -v sudo 2>&1 >/dev/null); then
120+
if [[ $EUID == 0 ]]; then
121+
alias SUDO=""
122+
else
123+
echo "The following oprations require root access."
124+
exit 1
125+
fi
126+
else
127+
alias SUDO="sudo"
128+
fi
129+
shopt -s expand_aliases
130+
SUDO apt update
131+
SUDO apt install -y ${dependencies[@]}
132+
84133
- name: Dump environment
85-
shell: bash --noprofile --norc -xeuo pipefail {0}
86134
run: |
87135
env
88136
@@ -97,7 +145,6 @@ jobs:
97145
output-dir: ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
98146

99147
- name: List the cuda.core artifacts directory
100-
shell: bash --noprofile --norc -xeuo pipefail {0}
101148
run: |
102149
if [[ "${{ matrix.host-platform }}" == win* ]]; then
103150
export CHOWN=chown
@@ -108,19 +155,40 @@ jobs:
108155
ls -lahR ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
109156
110157
- name: Check cuda.core wheel
111-
shell: bash --noprofile --norc -xeuo pipefail {0}
112158
run: |
113159
pip install twine
114160
twine check ${{ env.CUDA_CORE_ARTIFACTS_DIR }}/*.whl
115161
116162
- name: Upload cuda.core build artifacts
163+
if: ${{ env.USE_CACHE == '0' }}
117164
uses: actions/upload-artifact@v4
118165
with:
119166
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}
120167
path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }}/*.whl
121168
if-no-files-found: error
122169
overwrite: 'true'
123170

171+
- name: Prepare cuda.core cache
172+
if: ${{ env.USE_CACHE == '1' }}
173+
run: |
174+
if [[ "${{ env.USE_CACHE }}" == 1 ]]; then
175+
# this file is uploaded to GHA Cache
176+
tar -c -f "${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz" -C "${{ env.CUDA_CORE_ARTIFACTS_DIR }}" .
177+
du -h "${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz"
178+
# check if the previous runs from the same PR have populated the cache, if so need to clean it up
179+
CACHE_KEY=${{ env.CUDA_CORE_ARTIFACT_NAME }}
180+
if [ $(gh cache list | grep $CACHE_KEY | wc -l) == "1" ]; then
181+
gh cache delete $CACHE_KEY
182+
fi
183+
fi
184+
185+
- name: Cache cuda.core build artifacts
186+
if: ${{ env.USE_CACHE == '1' }}
187+
uses: actions/cache/save@v4
188+
with:
189+
key: ${{ env.CUDA_CORE_ARTIFACT_NAME }}
190+
path: ${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz
191+
124192
- name: Set up mini CTK
125193
uses: ./.github/actions/fetch_ctk
126194
continue-on-error: false
@@ -146,7 +214,6 @@ jobs:
146214
output-dir: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
147215

148216
- name: List the cuda.bindings artifacts directory
149-
shell: bash --noprofile --norc -xeuo pipefail {0}
150217
run: |
151218
if [[ "${{ matrix.host-platform }}" == win* ]]; then
152219
export CHOWN=chown
@@ -158,18 +225,39 @@ jobs:
158225
159226
# TODO: enable this after NVIDIA/cuda-python#297 is resolved
160227
# - name: Check cuda.bindings wheel
161-
# shell: bash --noprofile --norc -xeuo pipefail {0}
162228
# run: |
163229
# twine check ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl
164230

231+
- name: Prepare cuda.bindings cache
232+
if: ${{ env.USE_CACHE == '1' }}
233+
run: |
234+
if [[ "${{ env.USE_CACHE }}" == 1 ]]; then
235+
# this file is uploaded to GHA Cache
236+
tar -c -f "${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz" -C "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}" .
237+
du -h "${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz"
238+
# check if the previous runs from the same PR have populated the cache, if so need to clean it up
239+
CACHE_KEY=${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
240+
if [ $(gh cache list | grep $CACHE_KEY | wc -l) == "1" ]; then
241+
gh cache delete $CACHE_KEY
242+
fi
243+
fi
244+
165245
- name: Upload cuda.bindings build artifacts
246+
if: ${{ env.USE_CACHE == '0' }}
166247
uses: actions/upload-artifact@v4
167248
with:
168249
name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
169250
path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl
170251
if-no-files-found: error
171252
overwrite: 'true'
172253

254+
- name: Cache cuda.bindings build artifacts
255+
if: ${{ env.USE_CACHE == '1' }}
256+
uses: actions/cache/save@v4
257+
with:
258+
key: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
259+
path: ${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz
260+
173261
- name: Pass environment variables to the next runner
174262
id: pass_env
175263
run: |
@@ -205,7 +293,7 @@ jobs:
205293
runner: H100
206294
name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }})
207295
# The build stage could fail but we want the CI to keep moving.
208-
if: ${{ github.repository_owner == 'nvidia' && always() }}
296+
if: ${{ github.repository_owner == 'nvidia' && !cancelled() }}
209297
permissions:
210298
id-token: write # This is required for configure-aws-credentials
211299
contents: read # This is required for actions/checkout
@@ -221,9 +309,11 @@ jobs:
221309
NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
222310
needs:
223311
- build
312+
defaults:
313+
run:
314+
shell: bash --noprofile --norc -xeuo pipefail {0}
224315
steps:
225316
- name: Ensure GPU is working
226-
shell: bash --noprofile --norc -xeuo pipefail {0}
227317
run: nvidia-smi
228318

229319
- name: Checkout ${{ github.event.repository.name }}
@@ -232,7 +322,6 @@ jobs:
232322
fetch-depth: 0
233323

234324
- name: Set environment variables
235-
shell: bash --noprofile --norc -xeuo pipefail {0}
236325
run: |
237326
PYTHON_VERSION_FORMATTED=$(echo '${{ matrix.python-version }}' | tr -d '.')
238327
if [[ "${{ matrix.host-platform }}" == linux* ]]; then
@@ -251,32 +340,83 @@ jobs:
251340
fi
252341
253342
# make outputs from the previous job as env vars
254-
echo "CUDA_CORE_ARTIFACT_NAME=cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
343+
CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}"
344+
echo "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $GITHUB_ENV
345+
echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
255346
echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV
256-
echo "CUDA_BINDINGS_ARTIFACT_NAME=cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ needs.build.outputs.BUILD_CTK_VER }}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
347+
CUDA_BINDINGS_ARTIFACT_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ matrix.cuda-version }}-${{ matrix.host-platform }}"
348+
echo "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $GITHUB_ENV
349+
echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
257350
echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV
258351
echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV
259352
353+
# We'll try GHA Artifacts first, and then fall back to GHA Cache
260354
- name: Download cuda.bindings build artifacts
355+
id: cuda-bindings-download
261356
uses: actions/download-artifact@v4
262357
with:
263358
name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
264359
path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
265360

361+
- name: Restore cuda.bindings cache
362+
if: ${{ failure() && steps.cuda-bindings-download.conclusion == 'failure' }}
363+
id: cuda-bindings-cache
364+
uses: actions/cache/restore@v4
365+
with:
366+
key: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
367+
path: ${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz
368+
restore-keys: ${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}
369+
fail-on-cache-miss: true
370+
371+
- name: Report cache restore status (hit)
372+
if: ${{ steps.cuda-bindings-cache.conclusion != 'skipped' &&
373+
steps.cuda-bindings-cache.outputs.cache-hit == 'true' }}
374+
run: |
375+
echo "cache is found"
376+
CACHE_DIR="${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}"
377+
CACHE_ARCHIVE="${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz"
378+
ls -l $CACHE_ARCHIVE
379+
mkdir -p $CACHE_DIR
380+
du -h $CACHE_ARCHIVE &&
381+
tar -x -f $CACHE_ARCHIVE -C $CACHE_DIR &&
382+
rm -f $CACHE_ARCHIVE || echo "WARNING: cache could not be retrieved."
383+
266384
- name: Display structure of downloaded cuda.bindings artifacts
267-
shell: bash --noprofile --norc -xeuo pipefail {0}
268385
run: |
269386
pwd
270387
ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR
271388
272389
- name: Download cuda.core build artifacts
390+
id: cuda-core-download
273391
uses: actions/download-artifact@v4
274392
with:
275393
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}
276394
path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
277395

396+
- name: Restore cuda.core cache
397+
if: ${{ failure() && steps.cuda-core-download.conclusion == 'failure' }}
398+
id: cuda-core-cache
399+
uses: actions/cache/restore@v4
400+
with:
401+
key: ${{ env.CUDA_CORE_ARTIFACT_NAME }}
402+
path: ${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz
403+
restore-keys: ${{ env.CUDA_CORE_ARTIFACT_BASENAME }}
404+
fail-on-cache-miss: true
405+
406+
- name: Report cache restore status (hit)
407+
if: ${{ steps.cuda-core-cache.conclusion != 'skipped' &&
408+
steps.cuda-core-cache.outputs.cache-hit == 'true' }}
409+
run: |
410+
echo "cache is found"
411+
CACHE_DIR="${{ env.CUDA_CORE_ARTIFACTS_DIR }}"
412+
CACHE_ARCHIVE="${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz"
413+
ls -l $CACHE_ARCHIVE
414+
mkdir -p $CACHE_DIR
415+
du -h $CACHE_ARCHIVE &&
416+
tar -x -f $CACHE_ARCHIVE -C $CACHE_DIR &&
417+
rm -f $CACHE_ARCHIVE || echo "WARNING: cache could not be retrieved."
418+
278419
- name: Display structure of downloaded cuda.core build artifacts
279-
shell: bash --noprofile --norc -xeuo pipefail {0}
280420
run: |
281421
pwd
282422
ls -lahR $CUDA_CORE_ARTIFACTS_DIR
@@ -295,7 +435,6 @@ jobs:
295435

296436
- name: Run cuda.bindings tests
297437
if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }}
298-
shell: bash --noprofile --norc -xeuo pipefail {0}
299438
run: |
300439
ls $CUDA_PATH
301440
@@ -311,7 +450,6 @@ jobs:
311450
popd
312451
313452
- name: Run cuda.core tests
314-
shell: bash --noprofile --norc -xeuo pipefail {0}
315453
run: |
316454
if [[ ${{ matrix.python-version }} == "3.13" ]]; then
317455
# TODO: remove this hack once cuda-python has a cp313 build
@@ -336,7 +474,7 @@ jobs:
336474
doc:
337475
name: Docs
338476
# The build stage could fail but we want the CI to keep moving.
339-
if: ${{ github.repository_owner == 'nvidia' && always() }}
477+
if: ${{ github.repository_owner == 'nvidia' && !cancelled() }}
340478
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
341479
permissions:
342480
id-token: write

0 commit comments

Comments
 (0)