Skip to content

Commit 034ffbf

Browse files
authored
Merge pull request #566 from carterbox/dching/add-compute-sanitizer-to-ci
CI: Run some tests with compute-sanitizer
2 parents bc2e426 + 53a01cb commit 034ffbf

File tree

9 files changed

+64
-6
lines changed

9 files changed

+64
-6
lines changed

.github/workflows/test-wheel-linux.yml

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,24 @@ jobs:
184184
host-platform: ${{ inputs.host-platform }}
185185
cuda-version: ${{ inputs.cuda-version }}
186186

187+
- name: Set up compute-sanitizer
188+
run: |
189+
# We don't test compute-sanitizer on CTK<12 because backporting fixes is too much effort
190+
# We only test compute-sanitizer on python 3.12 arbitrarily; we don't need to use sanitizer on the entire matrix
191+
# Only local ctk installs have compute-sanitizer; there is not wheel for it
192+
if [[ "${{ inputs.python-version }}" == "3.12" && "${{ inputs.cuda-version }}" != "11.8.0" && "${{ inputs.local-ctk }}" == 1 ]]; then
193+
COMPUTE_SANITIZER="${CUDA_HOME}/bin/compute-sanitizer"
194+
COMPUTE_SANITIZER_VERSION=$(${COMPUTE_SANITIZER} --version | grep -Eo "[0-9]{4}\.[0-9]\.[0-9]" | sed -e 's/\.//g')
195+
SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=all --launch-timeout=0 --tool=memcheck --error-exitcode=1"
196+
if [[ "$COMPUTE_SANITIZER_VERSION" -ge 202111 ]]; then
197+
SANITIZER_CMD="${SANITIZER_CMD} --padding=32"
198+
fi
199+
echo "CUDA_PYTHON_TESTING_WITH_COMPUTE_SANITIZER=1" >> $GITHUB_ENV
200+
else
201+
SANITIZER_CMD=""
202+
fi
203+
echo "SANITIZER_CMD=${SANITIZER_CMD}" >> $GITHUB_ENV
204+
187205
- name: Run cuda.bindings tests
188206
if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }}
189207
run: |
@@ -198,18 +216,18 @@ jobs:
198216
199217
pushd ./cuda_bindings
200218
pip install -r requirements.txt
201-
pytest -rxXs -v tests/
219+
${SANITIZER_CMD} pytest -rxXs -v tests/
202220
203221
# It is a bit convoluted to run the Cython tests against CTK wheels,
204222
# so let's just skip them.
205223
if [[ "${{ inputs.local-ctk }}" == 1 ]]; then
206224
if [[ "${{ inputs.host-platform }}" == linux* ]]; then
207225
bash tests/cython/build_tests.sh
208226
elif [[ "${{ inputs.host-platform }}" == win* ]]; then
209-
# TODO: enable this once win-64 runners are up
227+
# TODO: enable this once win-64 runners are up
210228
exit 1
211-
fi
212-
pytest -rxXs -v tests/cython
229+
fi
230+
${SANITIZER_CMD} pytest -rxXs -v tests/cython
213231
fi
214232
popd
215233
@@ -233,7 +251,7 @@ jobs:
233251
234252
pushd ./cuda_core
235253
pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt"
236-
pytest -rxXs -v tests/
254+
${SANITIZER_CMD} pytest -rxXs -v tests/
237255
238256
# It is a bit convoluted to run the Cython tests against CTK wheels,
239257
# so let's just skip them. Also, currently our CI always installs the
@@ -247,7 +265,7 @@ jobs:
247265
# TODO: enable this once win-64 runners are up
248266
exit 1
249267
fi
250-
pytest -rxXs -v tests/cython
268+
${SANITIZER_CMD} pytest -rxXs -v tests/cython
251269
fi
252270
popd
253271

cuda_bindings/docs/source/environment_variables.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,8 @@
1111
## Runtime Environment Variables
1212

1313
- `CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM` : When set to 1, the default stream is the per-thread default stream. When set to 0, the default stream is the legacy default stream. This defaults to 0, for the legacy default stream. See [Stream Synchronization Behavior](https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html) for an explanation of the legacy and per-thread default streams.
14+
15+
16+
## Test-Time Environment Variables
17+
18+
- `CUDA_PYTHON_TESTING_WITH_COMPUTE_SANITIZER` : When set to 1, tests are skipped that would cause [compute-sanitizer](https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html) to raise an error.

cuda_bindings/tests/conftest.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
import os
2+
3+
import pytest
4+
5+
skipif_testing_with_compute_sanitizer = pytest.mark.skipif(
6+
os.environ.get("CUDA_PYTHON_TESTING_WITH_COMPUTE_SANITIZER", "0") == "1",
7+
reason="The compute-sanitizer is running, and this test causes an API error.",
8+
)

cuda_bindings/tests/test_cuda.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import numpy as np
1313
import pytest
14+
from conftest import skipif_testing_with_compute_sanitizer
1415

1516
import cuda.cuda as cuda
1617
import cuda.cudart as cudart
@@ -83,6 +84,7 @@ def test_cuda_memcpy():
8384
assert err == cuda.CUresult.CUDA_SUCCESS
8485

8586

87+
@skipif_testing_with_compute_sanitizer
8688
def test_cuda_array():
8789
(err,) = cuda.cuInit(0)
8890
assert err == cuda.CUresult.CUDA_SUCCESS
@@ -236,6 +238,7 @@ def test_cuda_uuid_list_access():
236238
assert err == cuda.CUresult.CUDA_SUCCESS
237239

238240

241+
@skipif_testing_with_compute_sanitizer
239242
def test_cuda_cuModuleLoadDataEx():
240243
(err,) = cuda.cuInit(0)
241244
assert err == cuda.CUresult.CUDA_SUCCESS
@@ -251,6 +254,7 @@ def test_cuda_cuModuleLoadDataEx():
251254
cuda.CUjit_option.CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
252255
cuda.CUjit_option.CU_JIT_LOG_VERBOSE,
253256
]
257+
# FIXME: This function call raises CUDA_ERROR_INVALID_VALUE
254258
err, mod = cuda.cuModuleLoadDataEx(0, 0, option_keys, [])
255259

256260
(err,) = cuda.cuCtxDestroy(ctx)
@@ -622,6 +626,7 @@ def test_cuda_coredump_attr():
622626
assert err == cuda.CUresult.CUDA_SUCCESS
623627

624628

629+
@skipif_testing_with_compute_sanitizer
625630
def test_get_error_name_and_string():
626631
(err,) = cuda.cuInit(0)
627632
assert err == cuda.CUresult.CUDA_SUCCESS
@@ -951,6 +956,7 @@ def test_CUmemDecompressParams_st():
951956
assert int(desc.dstActBytes) == 0
952957

953958

959+
@skipif_testing_with_compute_sanitizer
954960
def test_all_CUresult_codes():
955961
max_code = int(max(cuda.CUresult))
956962
# Smoke test. CUDA_ERROR_UNKNOWN = 999, but intentionally using literal value.
@@ -983,18 +989,21 @@ def test_all_CUresult_codes():
983989
assert num_good >= 76 # CTK 11.0.3_450.51.06
984990

985991

992+
@skipif_testing_with_compute_sanitizer
986993
def test_cuKernelGetName_failure():
987994
err, name = cuda.cuKernelGetName(0)
988995
assert err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE
989996
assert name is None
990997

991998

999+
@skipif_testing_with_compute_sanitizer
9921000
def test_cuFuncGetName_failure():
9931001
err, name = cuda.cuFuncGetName(0)
9941002
assert err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE
9951003
assert name is None
9961004

9971005

1006+
@skipif_testing_with_compute_sanitizer
9981007
@pytest.mark.skipif(
9991008
driverVersionLessThan(12080) or not supportsCudaAPI("cuCheckpointProcessGetState"),
10001009
reason="When API was introduced",

cuda_bindings/tests/test_cudart.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import numpy as np
1212
import pytest
13+
from conftest import skipif_testing_with_compute_sanitizer
1314

1415
import cuda.cuda as cuda
1516
import cuda.cudart as cudart
@@ -70,6 +71,7 @@ def test_cudart_memcpy():
7071
assertSuccess(err)
7172

7273

74+
@skipif_testing_with_compute_sanitizer
7375
def test_cudart_hostRegister():
7476
# Use hostRegister API to check for correct enum return values
7577
page_size = 80

cuda_core/tests/conftest.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,9 @@ def clean_up_cffi_files():
6464
os.remove(f)
6565
except FileNotFoundError:
6666
pass # noqa: SIM105
67+
68+
69+
skipif_testing_with_compute_sanitizer = pytest.mark.skipif(
70+
os.environ.get("CUDA_PYTHON_TESTING_WITH_COMPUTE_SANITIZER", "0") == "1",
71+
reason="The compute-sanitizer is running, and this test causes an API error.",
72+
)

cuda_core/tests/test_cuda_utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
44

55
import pytest
6+
from conftest import skipif_testing_with_compute_sanitizer
67

78
from cuda.bindings import driver, runtime
89
from cuda.core.experimental._utils import cuda_utils
@@ -40,6 +41,8 @@ def test_runtime_cuda_error_explanations_health():
4041
assert not extra_expl
4142

4243

44+
# this test causes an API error when the driver is too old to know about all of the error codes
45+
@skipif_testing_with_compute_sanitizer
4346
def test_check_driver_error():
4447
num_unexpected = 0
4548
for error in driver.CUresult:

cuda_core/tests/test_event.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
import numpy as np
1414
import pytest
15+
from conftest import skipif_testing_with_compute_sanitizer
1516

1617
import cuda.core.experimental
1718
from cuda.core.experimental import Device, EventOptions, LaunchConfig, Program, ProgramOptions, launch
@@ -75,6 +76,7 @@ def test_is_done(init_cuda):
7576
assert event.is_done in (True, False)
7677

7778

79+
@skipif_testing_with_compute_sanitizer
7880
def test_error_timing_disabled():
7981
device = Device()
8082
device.set_current()
@@ -97,6 +99,7 @@ def test_error_timing_disabled():
9799
event2 - event1
98100

99101

102+
@skipif_testing_with_compute_sanitizer
100103
def test_error_timing_recorded():
101104
device = Device()
102105
device.set_current()
@@ -117,6 +120,7 @@ def test_error_timing_recorded():
117120

118121

119122
# TODO: improve this once path finder can find headers
123+
@skipif_testing_with_compute_sanitizer
120124
@pytest.mark.skipif(os.environ.get("CUDA_PATH") is None, reason="need libcu++ header")
121125
@pytest.mark.skipif(tuple(int(i) for i in np.__version__.split(".")[:2]) < (2, 1), reason="need numpy 2.1.0+")
122126
def test_error_timing_incomplete():

cuda_core/tests/test_linker.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
44

55
import pytest
6+
from conftest import skipif_testing_with_compute_sanitizer
67

78
from cuda.core.experimental import Device, Linker, LinkerOptions, Program, ProgramOptions, _linker
89
from cuda.core.experimental._module import ObjectCode
@@ -140,6 +141,8 @@ def test_linker_link_invalid_target_type(compile_ptx_functions):
140141
linker.link("invalid_target")
141142

142143

144+
# this test causes an API error when using the culink API
145+
@skipif_testing_with_compute_sanitizer
143146
def test_linker_get_error_log(compile_ptx_functions):
144147
options = LinkerOptions(arch=ARCH)
145148

0 commit comments

Comments
 (0)