Skip to content

Commit 3ba670f

Browse files
committed
upload artifacts to GHA Cache when merged to main
1 parent 61ef224 commit 3ba670f

File tree

1 file changed

+171
-19
lines changed

1 file changed

+171
-19
lines changed

.github/workflows/build-and-test.yml

Lines changed: 171 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ jobs:
4242
# (matrix.host-platform == 'win-64' && 'windows-amd64-cpu8') }}
4343
outputs:
4444
BUILD_CTK_VER: ${{ steps.pass_env.outputs.CUDA_VERSION }}
45+
defaults:
46+
run:
47+
shell: bash --noprofile --norc -xeuo pipefail {0}
4548
steps:
4649
- name: Checkout ${{ github.event.repository.name }}
4750
uses: actions/checkout@v4
@@ -62,7 +65,6 @@ jobs:
6265
uses: ilammy/msvc-dev-cmd@v1
6366

6467
- name: Set environment variables
65-
shell: bash --noprofile --norc -xeuo pipefail {0}
6668
run: |
6769
PYTHON_VERSION_FORMATTED=$(echo '${{ matrix.python-version }}' | tr -d '.')
6870
if [[ "${{ matrix.host-platform }}" == linux* ]]; then
@@ -75,14 +77,60 @@ jobs:
7577
fi
7678
7779
echo "PARALLEL_LEVEL=$(nproc)" >> $GITHUB_ENV
78-
echo "CUDA_CORE_ARTIFACT_NAME=cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
80+
CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}"
81+
echo "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $GITHUB_ENV
82+
echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
7983
echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV
80-
echo "CUDA_BINDINGS_ARTIFACT_NAME=cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ matrix.cuda-version }}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
84+
CUDA_BINDINGS_ARTIFACT_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ matrix.cuda-version }}-${{ matrix.host-platform }}"
85+
echo "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $GITHUB_ENV
86+
echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
8187
echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV
8288
echo "CIBW_BUILD=${CIBW_BUILD}" >> $GITHUB_ENV
83-
89+
90+
# When the CI is run due to merging to main, we want it to populate GHA Cache not Artifacts,
91+
# so that CI workflows running on every branch have a fallback to use.
92+
if [[ "${{ github.ref_name}}" == main ]]; then
93+
echo "USE_CACHE=1" >> $GITHUB_ENV
94+
else
95+
echo "USE_CACHE=0" >> $GITHUB_ENV
96+
fi
97+
98+
# TODO: revert me before merging; this is to test the cache restore in the PR
99+
echo "USE_CACHE=1" >> $GITHUB_ENV
100+
101+
- name: Install dependencies
102+
if: ${{ env.USE_CACHE == '1' }}
103+
run: |
104+
# For GHA Cache
105+
dependencies=(zstd)
106+
dependent_exes=(zstd)
107+
108+
not_found=0
109+
for dep in ${dependent_exes[@]}; do
110+
if ! (command -v curl 2>&1 >/dev/null); then
111+
not_found=1
112+
break
113+
fi
114+
done
115+
if [[ $not_found == 0 ]]; then
116+
echo "All dependencies are found. Do nothing."
117+
exit 0
118+
fi
119+
if ! (command -v sudo 2>&1 >/dev/null); then
120+
if [[ $EUID == 0 ]]; then
121+
alias SUDO=""
122+
else
123+
echo "The following oprations require root access."
124+
exit 1
125+
fi
126+
else
127+
alias SUDO="sudo"
128+
fi
129+
shopt -s expand_aliases
130+
SUDO apt update
131+
SUDO apt install -y ${dependencies[@]}
132+
84133
- name: Dump environment
85-
shell: bash --noprofile --norc -xeuo pipefail {0}
86134
run: |
87135
env
88136
@@ -97,7 +145,6 @@ jobs:
97145
output-dir: ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
98146

99147
- name: List the cuda.core artifacts directory
100-
shell: bash --noprofile --norc -xeuo pipefail {0}
101148
run: |
102149
if [[ "${{ matrix.host-platform }}" == win* ]]; then
103150
export CHOWN=chown
@@ -108,19 +155,42 @@ jobs:
108155
ls -lahR ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
109156
110157
- name: Check cuda.core wheel
111-
shell: bash --noprofile --norc -xeuo pipefail {0}
112158
run: |
113159
pip install twine
114160
twine check ${{ env.CUDA_CORE_ARTIFACTS_DIR }}/*.whl
115161
116162
- name: Upload cuda.core build artifacts
163+
if: ${{ env.USE_CACHE == '0' }}
117164
uses: actions/upload-artifact@v4
118165
with:
119166
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}
120167
path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }}/*.whl
121168
if-no-files-found: error
122169
overwrite: 'true'
123170

171+
- name: Prepare cuda.core cache
172+
if: ${{ env.USE_CACHE == '1' }}
173+
env:
174+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
175+
run: |
176+
if [[ "${{ env.USE_CACHE }}" == 1 ]]; then
177+
# this file is uploaded to GHA Cache
178+
tar -c -f "${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz" -C "${{ env.CUDA_CORE_ARTIFACTS_DIR }}" .
179+
du -h "${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz"
180+
# check if the previous runs from the same PR have populated the cache, if so need to clean it up
181+
CACHE_KEY=${{ env.CUDA_CORE_ARTIFACT_NAME }}
182+
if [ $(gh cache list | grep $CACHE_KEY | wc -l) == "1" ]; then
183+
gh cache delete $CACHE_KEY
184+
fi
185+
fi
186+
187+
- name: Cache cuda.core build artifacts
188+
if: ${{ env.USE_CACHE == '1' }}
189+
uses: actions/cache/save@v4
190+
with:
191+
key: ${{ env.CUDA_CORE_ARTIFACT_NAME }}
192+
path: ${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz
193+
124194
- name: Set up mini CTK
125195
uses: ./.github/actions/fetch_ctk
126196
continue-on-error: false
@@ -146,7 +216,6 @@ jobs:
146216
output-dir: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
147217

148218
- name: List the cuda.bindings artifacts directory
149-
shell: bash --noprofile --norc -xeuo pipefail {0}
150219
run: |
151220
if [[ "${{ matrix.host-platform }}" == win* ]]; then
152221
export CHOWN=chown
@@ -158,18 +227,41 @@ jobs:
158227
159228
# TODO: enable this after NVIDIA/cuda-python#297 is resolved
160229
# - name: Check cuda.bindings wheel
161-
# shell: bash --noprofile --norc -xeuo pipefail {0}
162230
# run: |
163231
# twine check ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl
164232

233+
- name: Prepare cuda.bindings cache
234+
if: ${{ env.USE_CACHE == '1' }}
235+
env:
236+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
237+
run: |
238+
if [[ "${{ env.USE_CACHE }}" == 1 ]]; then
239+
# this file is uploaded to GHA Cache
240+
tar -c -f "${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz" -C "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}" .
241+
du -h "${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz"
242+
# check if the previous runs from the same PR have populated the cache, if so need to clean it up
243+
CACHE_KEY=${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
244+
if [ $(gh cache list | grep $CACHE_KEY | wc -l) == "1" ]; then
245+
gh cache delete $CACHE_KEY
246+
fi
247+
fi
248+
165249
- name: Upload cuda.bindings build artifacts
250+
if: ${{ env.USE_CACHE == '0' }}
166251
uses: actions/upload-artifact@v4
167252
with:
168253
name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
169254
path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl
170255
if-no-files-found: error
171256
overwrite: 'true'
172257

258+
- name: Cache cuda.bindings build artifacts
259+
if: ${{ env.USE_CACHE == '1' }}
260+
uses: actions/cache/save@v4
261+
with:
262+
key: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
263+
path: ${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz
264+
173265
- name: Pass environment variables to the next runner
174266
id: pass_env
175267
run: |
@@ -205,7 +297,7 @@ jobs:
205297
runner: H100
206298
name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }})
207299
# The build stage could fail but we want the CI to keep moving.
208-
if: ${{ github.repository_owner == 'nvidia' && always() }}
300+
if: ${{ github.repository_owner == 'nvidia' && !cancelled() }}
209301
permissions:
210302
id-token: write # This is required for configure-aws-credentials
211303
contents: read # This is required for actions/checkout
@@ -221,9 +313,11 @@ jobs:
221313
NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
222314
needs:
223315
- build
316+
defaults:
317+
run:
318+
shell: bash --noprofile --norc -xeuo pipefail {0}
224319
steps:
225320
- name: Ensure GPU is working
226-
shell: bash --noprofile --norc -xeuo pipefail {0}
227321
run: nvidia-smi
228322

229323
- name: Checkout ${{ github.event.repository.name }}
@@ -232,7 +326,6 @@ jobs:
232326
fetch-depth: 0
233327

234328
- name: Set environment variables
235-
shell: bash --noprofile --norc -xeuo pipefail {0}
236329
run: |
237330
PYTHON_VERSION_FORMATTED=$(echo '${{ matrix.python-version }}' | tr -d '.')
238331
if [[ "${{ matrix.host-platform }}" == linux* ]]; then
@@ -251,32 +344,93 @@ jobs:
251344
fi
252345
253346
# make outputs from the previous job as env vars
254-
echo "CUDA_CORE_ARTIFACT_NAME=cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
347+
CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}"
348+
echo "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $GITHUB_ENV
349+
echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
255350
echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV
256-
echo "CUDA_BINDINGS_ARTIFACT_NAME=cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ needs.build.outputs.BUILD_CTK_VER }}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
351+
CUDA_BINDINGS_ARTIFACT_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ needs.build.outputs.BUILD_CTK_VER }}-${{ matrix.host-platform }}"
352+
echo "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $GITHUB_ENV
353+
echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
257354
echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV
258355
echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV
259356
357+
# We'll try GHA Artifacts first, and then fall back to GHA Cache
260358
- name: Download cuda.bindings build artifacts
359+
id: cuda-bindings-download
261360
uses: actions/download-artifact@v4
361+
continue-on-error: true
262362
with:
263363
name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
264364
path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
265365

366+
- name: Restore cuda.bindings cache
367+
if: ${{ steps.cuda-bindings-download.outcome == 'failure' }}
368+
id: cuda-bindings-cache
369+
uses: actions/cache/restore@v4
370+
with:
371+
key: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
372+
path: ${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz
373+
restore-keys: ${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}
374+
fail-on-cache-miss: true
375+
376+
- name: Report cache restore status
377+
if: ${{ steps.cuda-bindings-cache.outcome != 'skipped' }}
378+
run: |
379+
if [[ "${{ steps.cuda-bindings-cache.outputs.cache-hit }}" == true ]]; then
380+
echo "cache is found"
381+
else
382+
echo "cache is not found"
383+
exit 1
384+
fi
385+
CACHE_DIR="${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}"
386+
CACHE_ARCHIVE="${{ env.CUDA_BINDINGS_ARTIFACT_BASENAME }}.tar.gz"
387+
ls -l $CACHE_ARCHIVE
388+
mkdir -p $CACHE_DIR
389+
du -h $CACHE_ARCHIVE &&
390+
tar -x -f $CACHE_ARCHIVE -C $CACHE_DIR &&
391+
rm -f $CACHE_ARCHIVE || exit 1
392+
266393
- name: Display structure of downloaded cuda.bindings artifacts
267-
shell: bash --noprofile --norc -xeuo pipefail {0}
268394
run: |
269395
pwd
270396
ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR
271397
272398
- name: Download cuda.core build artifacts
399+
id: cuda-core-download
273400
uses: actions/download-artifact@v4
401+
continue-on-error: true
274402
with:
275403
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}
276404
path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
277405

406+
- name: Restore cuda.core cache
407+
if: ${{ steps.cuda-core-download.outcome == 'failure' }}
408+
id: cuda-core-cache
409+
uses: actions/cache/restore@v4
410+
with:
411+
key: ${{ env.CUDA_CORE_ARTIFACT_NAME }}
412+
path: ${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz
413+
restore-keys: ${{ env.CUDA_CORE_ARTIFACT_BASENAME }}
414+
fail-on-cache-miss: true
415+
416+
- name: Report cache restore status
417+
if: ${{ steps.cuda-core-cache.outcome != 'skipped' }}
418+
run: |
419+
if [[ "${{ steps.cuda-core-cache.outputs.cache-hit }}" == true ]]; then
420+
echo "cache is found"
421+
else
422+
echo "cache is not found"
423+
exit 1
424+
fi
425+
CACHE_DIR="${{ env.CUDA_CORE_ARTIFACTS_DIR }}"
426+
CACHE_ARCHIVE="${{ env.CUDA_CORE_ARTIFACT_BASENAME }}.tar.gz"
427+
ls -l $CACHE_ARCHIVE
428+
mkdir -p $CACHE_DIR
429+
du -h $CACHE_ARCHIVE &&
430+
tar -x -f $CACHE_ARCHIVE -C $CACHE_DIR &&
431+
rm -f $CACHE_ARCHIVE || exit 1
432+
278433
- name: Display structure of downloaded cuda.core build artifacts
279-
shell: bash --noprofile --norc -xeuo pipefail {0}
280434
run: |
281435
pwd
282436
ls -lahR $CUDA_CORE_ARTIFACTS_DIR
@@ -298,7 +452,6 @@ jobs:
298452

299453
- name: Run cuda.bindings tests
300454
if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }}
301-
shell: bash --noprofile --norc -xeuo pipefail {0}
302455
run: |
303456
ls $CUDA_PATH
304457
@@ -321,7 +474,6 @@ jobs:
321474
popd
322475
323476
- name: Run cuda.core tests
324-
shell: bash --noprofile --norc -xeuo pipefail {0}
325477
run: |
326478
if [[ ${{ matrix.python-version }} == "3.13" ]]; then
327479
# TODO: remove this hack once cuda-python has a cp313 build
@@ -346,7 +498,7 @@ jobs:
346498
doc:
347499
name: Docs
348500
# The build stage could fail but we want the CI to keep moving.
349-
if: ${{ github.repository_owner == 'nvidia' && always() }}
501+
if: ${{ github.repository_owner == 'nvidia' && !cancelled() }}
350502
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
351503
permissions:
352504
id-token: write

0 commit comments

Comments
 (0)