|
1 | 1 | # Copyright 2024 NVIDIA Corporation. All rights reserved.
|
2 | 2 | # SPDX-License-Identifier: Apache-2.0
|
3 | 3 |
|
| 4 | +import ctypes |
4 | 5 | import warnings
|
5 | 6 |
|
6 | 7 | import pytest
|
| 8 | +from conftest import skipif_testing_with_compute_sanitizer |
7 | 9 |
|
8 | 10 | import cuda.core.experimental
|
9 | 11 | from cuda.core.experimental import ObjectCode, Program, ProgramOptions, system
|
| 12 | +from cuda.core.experimental._utils.cuda_utils import CUDAError, driver, get_binding_version, handle_return |
10 | 13 |
|
11 |
| -SAXPY_KERNEL = """ |
| 14 | +SAXPY_KERNEL = r""" |
12 | 15 | template<typename T>
|
13 | 16 | __global__ void saxpy(const T a,
|
14 | 17 | const T* x,
|
|
23 | 26 | """
|
24 | 27 |
|
25 | 28 |
|
| 29 | +@pytest.fixture(scope="module") |
| 30 | +def cuda12_prerequisite_check(): |
| 31 | + # binding availability depends on cuda-python version |
| 32 | + # and version of underlying CUDA toolkit |
| 33 | + _py_major_ver, _ = get_binding_version() |
| 34 | + _driver_ver = handle_return(driver.cuDriverGetVersion()) |
| 35 | + return _py_major_ver >= 12 and _driver_ver >= 12000 |
| 36 | + |
| 37 | + |
26 | 38 | def test_kernel_attributes_init_disabled():
|
27 | 39 | with pytest.raises(RuntimeError, match=r"^KernelAttributes cannot be instantiated directly\."):
|
28 | 40 | cuda.core.experimental._module.KernelAttributes() # Ensure back door is locked.
|
@@ -156,3 +168,80 @@ def test_object_code_load_cubin_from_file(get_saxpy_kernel, tmp_path):
|
156 | 168 | def test_object_code_handle(get_saxpy_object_code):
|
157 | 169 | mod = get_saxpy_object_code
|
158 | 170 | assert mod.handle is not None
|
| 171 | + |
| 172 | + |
| 173 | +@skipif_testing_with_compute_sanitizer |
| 174 | +def test_saxpy_arguments(get_saxpy_kernel, cuda12_prerequisite_check): |
| 175 | + if not cuda12_prerequisite_check: |
| 176 | + pytest.skip("Test requires CUDA 12") |
| 177 | + krn, _ = get_saxpy_kernel |
| 178 | + |
| 179 | + assert krn.num_arguments == 5 |
| 180 | + |
| 181 | + assert "ParamInfo" in str(type(krn).arguments_info.fget.__annotations__) |
| 182 | + arg_info = krn.arguments_info |
| 183 | + n_args = len(arg_info) |
| 184 | + assert n_args == krn.num_arguments |
| 185 | + |
| 186 | + class ExpectedStruct(ctypes.Structure): |
| 187 | + _fields_ = [ |
| 188 | + ("a", ctypes.c_float), |
| 189 | + ("x", ctypes.POINTER(ctypes.c_float)), |
| 190 | + ("y", ctypes.POINTER(ctypes.c_float)), |
| 191 | + ("out", ctypes.POINTER(ctypes.c_float)), |
| 192 | + ("N", ctypes.c_size_t), |
| 193 | + ] |
| 194 | + |
| 195 | + offsets = [p.offset for p in arg_info] |
| 196 | + sizes = [p.size for p in arg_info] |
| 197 | + members = [getattr(ExpectedStruct, name) for name, _ in ExpectedStruct._fields_] |
| 198 | + expected_offsets = tuple(m.offset for m in members) |
| 199 | + assert all(actual == expected for actual, expected in zip(offsets, expected_offsets)) |
| 200 | + expected_sizes = tuple(m.size for m in members) |
| 201 | + assert all(actual == expected for actual, expected in zip(sizes, expected_sizes)) |
| 202 | + |
| 203 | + |
| 204 | +@skipif_testing_with_compute_sanitizer |
| 205 | +@pytest.mark.parametrize("nargs", [0, 1, 2, 3, 16]) |
| 206 | +@pytest.mark.parametrize("c_type_name,c_type", [("int", ctypes.c_int), ("short", ctypes.c_short)], ids=["int", "short"]) |
| 207 | +def test_num_arguments(init_cuda, nargs, c_type_name, c_type, cuda12_prerequisite_check): |
| 208 | + if not cuda12_prerequisite_check: |
| 209 | + pytest.skip("Test requires CUDA 12") |
| 210 | + args_str = ", ".join([f"{c_type_name} p_{i}" for i in range(nargs)]) |
| 211 | + src = f"__global__ void foo{nargs}({args_str}) {{ }}" |
| 212 | + prog = Program(src, code_type="c++") |
| 213 | + mod = prog.compile( |
| 214 | + "cubin", |
| 215 | + name_expressions=(f"foo{nargs}",), |
| 216 | + ) |
| 217 | + krn = mod.get_kernel(f"foo{nargs}") |
| 218 | + assert krn.num_arguments == nargs |
| 219 | + |
| 220 | + class ExpectedStruct(ctypes.Structure): |
| 221 | + _fields_ = [(f"arg_{i}", c_type) for i in range(nargs)] |
| 222 | + |
| 223 | + members = tuple(getattr(ExpectedStruct, f"arg_{i}") for i in range(nargs)) |
| 224 | + |
| 225 | + arg_info = krn.arguments_info |
| 226 | + assert all([actual.offset == expected.offset for actual, expected in zip(arg_info, members)]) |
| 227 | + assert all([actual.size == expected.size for actual, expected in zip(arg_info, members)]) |
| 228 | + |
| 229 | + |
| 230 | +@skipif_testing_with_compute_sanitizer |
| 231 | +def test_num_args_error_handling(deinit_all_contexts_function, cuda12_prerequisite_check): |
| 232 | + if not cuda12_prerequisite_check: |
| 233 | + pytest.skip("Test requires CUDA 12") |
| 234 | + src = "__global__ void foo(int a) { }" |
| 235 | + prog = Program(src, code_type="c++") |
| 236 | + mod = prog.compile( |
| 237 | + "cubin", |
| 238 | + name_expressions=("foo",), |
| 239 | + ) |
| 240 | + krn = mod.get_kernel("foo") |
| 241 | + # empty driver's context stack using function from conftest |
| 242 | + deinit_all_contexts_function() |
| 243 | + # with no current context, cuKernelGetParamInfo would report |
| 244 | + # exception which we expect to handle by raising |
| 245 | + with pytest.raises(CUDAError): |
| 246 | + # assignment resolves linter error "B018: useless expression" |
| 247 | + _ = krn.num_arguments |
0 commit comments