Skip to content

Commit da6310d

Browse files
committed
Merge remote-tracking branch 'origin/documentation-remove-gpu-dependency' into documentation-remove-gpu-dependency
2 parents 661dc70 + 6ca8eda commit da6310d

File tree

12 files changed

+119
-124
lines changed

12 files changed

+119
-124
lines changed

cuda_core/cuda/core/experimental/_context.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from dataclasses import dataclass
66

7-
from cuda import cuda
7+
from cuda.core.experimental._utils import driver
88

99

1010
@dataclass
@@ -20,7 +20,7 @@ def __init__(self):
2020

2121
@staticmethod
2222
def _from_ctx(obj, dev_id):
23-
assert isinstance(obj, cuda.CUcontext)
23+
assert isinstance(obj, driver.CUcontext)
2424
ctx = Context.__new__(Context)
2525
ctx._handle = obj
2626
ctx._id = dev_id

cuda_core/cuda/core/experimental/_device.py

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,10 @@
55
import threading
66
from typing import Union
77

8-
from cuda import cuda, cudart
98
from cuda.core.experimental._context import Context, ContextOptions
109
from cuda.core.experimental._memory import Buffer, MemoryResource, _DefaultAsyncMempool, _SynchronousMemoryResource
1110
from cuda.core.experimental._stream import Stream, StreamOptions, default_stream
12-
from cuda.core.experimental._utils import ComputeCapability, CUDAError, handle_return, precondition
11+
from cuda.core.experimental._utils import ComputeCapability, CUDAError, driver, handle_return, precondition, runtime
1312

1413
_tls = threading.local()
1514
_tls_lock = threading.Lock()
@@ -47,17 +46,17 @@ class Device:
4746
def __new__(cls, device_id=None):
4847
# important: creating a Device instance does not initialize the GPU!
4948
if device_id is None:
50-
device_id = handle_return(cudart.cudaGetDevice())
49+
device_id = handle_return(runtime.cudaGetDevice())
5150
assert isinstance(device_id, int), f"{device_id=}"
5251
else:
53-
total = handle_return(cudart.cudaGetDeviceCount())
52+
total = handle_return(runtime.cudaGetDeviceCount())
5453
if not isinstance(device_id, int) or not (0 <= device_id < total):
5554
raise ValueError(f"device_id must be within [0, {total}), got {device_id}")
5655

5756
# ensure Device is singleton
5857
with _tls_lock:
5958
if not hasattr(_tls, "devices"):
60-
total = handle_return(cudart.cudaGetDeviceCount())
59+
total = handle_return(runtime.cudaGetDeviceCount())
6160
_tls.devices = []
6261
for dev_id in range(total):
6362
dev = super().__new__(cls)
@@ -66,7 +65,7 @@ def __new__(cls, device_id=None):
6665
# use the SynchronousMemoryResource which does not use memory pools.
6766
if (
6867
handle_return(
69-
cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrMemoryPoolsSupported, 0)
68+
runtime.cudaDeviceGetAttribute(runtime.cudaDeviceAttr.cudaDevAttrMemoryPoolsSupported, 0)
7069
)
7170
) == 1:
7271
dev._mr = _DefaultAsyncMempool(dev_id)
@@ -90,7 +89,7 @@ def device_id(self) -> int:
9089
@property
9190
def pci_bus_id(self) -> str:
9291
"""Return a PCI Bus Id string for this device."""
93-
bus_id = handle_return(cudart.cudaDeviceGetPCIBusId(13, self._id))
92+
bus_id = handle_return(runtime.cudaDeviceGetPCIBusId(13, self._id))
9493
return bus_id[:12].decode()
9594

9695
@property
@@ -107,11 +106,11 @@ def uuid(self) -> str:
107106
driver is older than CUDA 11.4.
108107
109108
"""
110-
driver_ver = handle_return(cuda.cuDriverGetVersion())
109+
driver_ver = handle_return(driver.cuDriverGetVersion())
111110
if driver_ver >= 11040:
112-
uuid = handle_return(cuda.cuDeviceGetUuid_v2(self._id))
111+
uuid = handle_return(driver.cuDeviceGetUuid_v2(self._id))
113112
else:
114-
uuid = handle_return(cuda.cuDeviceGetUuid(self._id))
113+
uuid = handle_return(driver.cuDeviceGetUuid(self._id))
115114
uuid = uuid.bytes.hex()
116115
# 8-4-4-4-12
117116
return f"{uuid[:8]}-{uuid[8:12]}-{uuid[12:16]}-{uuid[16:20]}-{uuid[20:]}"
@@ -120,24 +119,24 @@ def uuid(self) -> str:
120119
def name(self) -> str:
121120
"""Return the device name."""
122121
# Use 256 characters to be consistent with CUDA Runtime
123-
name = handle_return(cuda.cuDeviceGetName(256, self._id))
122+
name = handle_return(driver.cuDeviceGetName(256, self._id))
124123
name = name.split(b"\0")[0]
125124
return name.decode()
126125

127126
@property
128127
def properties(self) -> dict:
129128
"""Return information about the compute-device."""
130129
# TODO: pythonize the key names
131-
return handle_return(cudart.cudaGetDeviceProperties(self._id))
130+
return handle_return(runtime.cudaGetDeviceProperties(self._id))
132131

133132
@property
134133
def compute_capability(self) -> ComputeCapability:
135134
"""Return a named tuple with 2 fields: major and minor."""
136135
major = handle_return(
137-
cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, self._id)
136+
runtime.cudaDeviceGetAttribute(runtime.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, self._id)
138137
)
139138
minor = handle_return(
140-
cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, self._id)
139+
runtime.cudaDeviceGetAttribute(runtime.cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, self._id)
141140
)
142141
return ComputeCapability(major, minor)
143142

@@ -151,7 +150,7 @@ def context(self) -> Context:
151150
Device must be initialized.
152151
153152
"""
154-
ctx = handle_return(cuda.cuCtxGetCurrent())
153+
ctx = handle_return(driver.cuCtxGetCurrent())
155154
assert int(ctx) != 0
156155
return Context._from_ctx(ctx, self._id)
157156

@@ -224,23 +223,23 @@ def set_current(self, ctx: Context = None) -> Union[Context, None]:
224223
"the provided context was created on a different "
225224
f"device {ctx._id} other than the target {self._id}"
226225
)
227-
prev_ctx = handle_return(cuda.cuCtxPopCurrent())
228-
handle_return(cuda.cuCtxPushCurrent(ctx._handle))
226+
prev_ctx = handle_return(driver.cuCtxPopCurrent())
227+
handle_return(driver.cuCtxPushCurrent(ctx._handle))
229228
self._has_inited = True
230229
if int(prev_ctx) != 0:
231230
return Context._from_ctx(prev_ctx, self._id)
232231
else:
233-
ctx = handle_return(cuda.cuCtxGetCurrent())
232+
ctx = handle_return(driver.cuCtxGetCurrent())
234233
if int(ctx) == 0:
235234
# use primary ctx
236-
ctx = handle_return(cuda.cuDevicePrimaryCtxRetain(self._id))
237-
handle_return(cuda.cuCtxPushCurrent(ctx))
235+
ctx = handle_return(driver.cuDevicePrimaryCtxRetain(self._id))
236+
handle_return(driver.cuCtxPushCurrent(ctx))
238237
else:
239-
ctx_id = handle_return(cuda.cuCtxGetDevice())
238+
ctx_id = handle_return(driver.cuCtxGetDevice())
240239
if ctx_id != self._id:
241240
# use primary ctx
242-
ctx = handle_return(cuda.cuDevicePrimaryCtxRetain(self._id))
243-
handle_return(cuda.cuCtxPushCurrent(ctx))
241+
ctx = handle_return(driver.cuDevicePrimaryCtxRetain(self._id))
242+
handle_return(driver.cuCtxPushCurrent(ctx))
244243
else:
245244
# no-op, a valid context already exists and is set current
246245
pass
@@ -337,4 +336,4 @@ def sync(self):
337336
Device must be initialized.
338337
339338
"""
340-
handle_return(cudart.cudaDeviceSynchronize())
339+
handle_return(runtime.cudaDeviceSynchronize())

cuda_core/cuda/core/experimental/_event.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@
66
from dataclasses import dataclass
77
from typing import Optional
88

9-
from cuda import cuda
10-
from cuda.core.experimental._utils import CUDAError, check_or_create_options, handle_return
9+
from cuda.core.experimental._utils import CUDAError, check_or_create_options, driver, handle_return
1110

1211

1312
@dataclass
@@ -60,7 +59,7 @@ def __init__(self, event_obj, handle):
6059

6160
def close(self):
6261
if self.handle is not None:
63-
handle_return(cuda.cuEventDestroy(self.handle))
62+
handle_return(driver.cuEventDestroy(self.handle))
6463
self.handle = None
6564

6665
__slots__ = ("__weakref__", "_mnff", "_timing_disabled", "_busy_waited")
@@ -80,14 +79,14 @@ def _init(options: Optional[EventOptions] = None):
8079
self._timing_disabled = False
8180
self._busy_waited = False
8281
if not options.enable_timing:
83-
flags |= cuda.CUevent_flags.CU_EVENT_DISABLE_TIMING
82+
flags |= driver.CUevent_flags.CU_EVENT_DISABLE_TIMING
8483
self._timing_disabled = True
8584
if options.busy_waited_sync:
86-
flags |= cuda.CUevent_flags.CU_EVENT_BLOCKING_SYNC
85+
flags |= driver.CUevent_flags.CU_EVENT_BLOCKING_SYNC
8786
self._busy_waited = True
8887
if options.support_ipc:
8988
raise NotImplementedError("TODO")
90-
self._mnff.handle = handle_return(cuda.cuEventCreate(flags))
89+
self._mnff.handle = handle_return(driver.cuEventCreate(flags))
9190
return self
9291

9392
def close(self):
@@ -119,15 +118,15 @@ def sync(self):
119118
has been completed.
120119
121120
"""
122-
handle_return(cuda.cuEventSynchronize(self._mnff.handle))
121+
handle_return(driver.cuEventSynchronize(self._mnff.handle))
123122

124123
@property
125124
def is_done(self) -> bool:
126125
"""Return True if all captured works have been completed, otherwise False."""
127-
(result,) = cuda.cuEventQuery(self._mnff.handle)
128-
if result == cuda.CUresult.CUDA_SUCCESS:
126+
(result,) = driver.cuEventQuery(self._mnff.handle)
127+
if result == driver.CUresult.CUDA_SUCCESS:
129128
return True
130-
elif result == cuda.CUresult.CUDA_ERROR_NOT_READY:
129+
elif result == driver.CUresult.CUDA_ERROR_NOT_READY:
131130
return False
132131
else:
133132
raise CUDAError(f"unexpected error: {result}")

cuda_core/cuda/core/experimental/_launcher.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,11 @@
55
from dataclasses import dataclass
66
from typing import Optional, Union
77

8-
from cuda import cuda
98
from cuda.core.experimental._device import Device
109
from cuda.core.experimental._kernel_arg_handler import ParamHolder
1110
from cuda.core.experimental._module import Kernel
1211
from cuda.core.experimental._stream import Stream
13-
from cuda.core.experimental._utils import CUDAError, check_or_create_options, get_binding_version, handle_return
12+
from cuda.core.experimental._utils import CUDAError, check_or_create_options, driver, get_binding_version, handle_return
1413

1514
# TODO: revisit this treatment for py313t builds
1615
_inited = False
@@ -25,7 +24,7 @@ def _lazy_init():
2524
global _use_ex
2625
# binding availability depends on cuda-python version
2726
_py_major_minor = get_binding_version()
28-
_driver_ver = handle_return(cuda.cuDriverGetVersion())
27+
_driver_ver = handle_return(driver.cuDriverGetVersion())
2928
_use_ex = (_driver_ver >= 11080) and (_py_major_minor >= (11, 8))
3029
_inited = True
3130

@@ -139,25 +138,25 @@ def launch(kernel, config, *kernel_args):
139138
# mainly to see if the "Ex" API is available and if so we use it, as it's more feature
140139
# rich.
141140
if _use_ex:
142-
drv_cfg = cuda.CUlaunchConfig()
141+
drv_cfg = driver.CUlaunchConfig()
143142
drv_cfg.gridDimX, drv_cfg.gridDimY, drv_cfg.gridDimZ = config.grid
144143
drv_cfg.blockDimX, drv_cfg.blockDimY, drv_cfg.blockDimZ = config.block
145144
drv_cfg.hStream = config.stream.handle
146145
drv_cfg.sharedMemBytes = config.shmem_size
147146
attrs = [] # TODO: support more attributes
148147
if config.cluster:
149-
attr = cuda.CUlaunchAttribute()
150-
attr.id = cuda.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION
148+
attr = driver.CUlaunchAttribute()
149+
attr.id = driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION
151150
dim = attr.value.clusterDim
152151
dim.x, dim.y, dim.z = config.cluster
153152
attrs.append(attr)
154153
drv_cfg.numAttrs = len(attrs)
155154
drv_cfg.attrs = attrs
156-
handle_return(cuda.cuLaunchKernelEx(drv_cfg, int(kernel._handle), args_ptr, 0))
155+
handle_return(driver.cuLaunchKernelEx(drv_cfg, int(kernel._handle), args_ptr, 0))
157156
else:
158157
# TODO: check if config has any unsupported attrs
159158
handle_return(
160-
cuda.cuLaunchKernel(
159+
driver.cuLaunchKernel(
161160
int(kernel._handle), *config.grid, *config.block, config.shmem_size, config.stream._handle, args_ptr, 0
162161
)
163162
)

cuda_core/cuda/core/experimental/_linker.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,8 @@
99
from dataclasses import dataclass
1010
from typing import List, Optional
1111

12-
from cuda import cuda
1312
from cuda.core.experimental._module import ObjectCode
14-
from cuda.core.experimental._utils import check_or_create_options, handle_return
13+
from cuda.core.experimental._utils import check_or_create_options, driver, handle_return
1514

1615
# TODO: revisit this treatment for py313t builds
1716
_driver = None # populated if nvJitLink cannot be used
@@ -29,7 +28,7 @@ def _decide_nvjitlink_or_driver():
2928
if _driver or _nvjitlink:
3029
return
3130

32-
_driver_ver = handle_return(cuda.cuDriverGetVersion())
31+
_driver_ver = handle_return(driver.cuDriverGetVersion())
3332
_driver_ver = (_driver_ver // 1000, (_driver_ver % 1000) // 10)
3433
try:
3534
from cuda.bindings import nvjitlink as _nvjitlink
@@ -49,7 +48,7 @@ def _decide_nvjitlink_or_driver():
4948
stacklevel=3,
5049
category=RuntimeWarning,
5150
)
52-
_driver = cuda
51+
_driver = driver
5352
return True
5453
else:
5554
return False

cuda_core/cuda/core/experimental/_memory.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,9 @@
88
import weakref
99
from typing import Optional, Tuple, TypeVar
1010

11-
from cuda import cuda
1211
from cuda.core.experimental._dlpack import DLDeviceType, make_py_capsule
1312
from cuda.core.experimental._stream import default_stream
14-
from cuda.core.experimental._utils import handle_return
13+
from cuda.core.experimental._utils import driver, handle_return
1514

1615
PyCapsule = TypeVar("PyCapsule")
1716

@@ -141,7 +140,7 @@ def copy_to(self, dst: Buffer = None, *, stream) -> Buffer:
141140
dst = self._mnff.mr.allocate(self._mnff.size, stream)
142141
if dst._mnff.size != self._mnff.size:
143142
raise ValueError("buffer sizes mismatch between src and dst")
144-
handle_return(cuda.cuMemcpyAsync(dst._mnff.ptr, self._mnff.ptr, self._mnff.size, stream.handle))
143+
handle_return(driver.cuMemcpyAsync(dst._mnff.ptr, self._mnff.ptr, self._mnff.size, stream.handle))
145144
return dst
146145

147146
def copy_from(self, src: Buffer, *, stream):
@@ -160,7 +159,7 @@ def copy_from(self, src: Buffer, *, stream):
160159
raise ValueError("stream must be provided")
161160
if src._mnff.size != self._mnff.size:
162161
raise ValueError("buffer sizes mismatch between src and dst")
163-
handle_return(cuda.cuMemcpyAsync(self._mnff.ptr, src._mnff.ptr, self._mnff.size, stream.handle))
162+
handle_return(driver.cuMemcpyAsync(self._mnff.ptr, src._mnff.ptr, self._mnff.size, stream.handle))
164163

165164
def __dlpack__(
166165
self,
@@ -243,19 +242,19 @@ class _DefaultAsyncMempool(MemoryResource):
243242
__slots__ = ("_dev_id",)
244243

245244
def __init__(self, dev_id):
246-
self._handle = handle_return(cuda.cuDeviceGetMemPool(dev_id))
245+
self._handle = handle_return(driver.cuDeviceGetMemPool(dev_id))
247246
self._dev_id = dev_id
248247

249248
def allocate(self, size, stream=None) -> Buffer:
250249
if stream is None:
251250
stream = default_stream()
252-
ptr = handle_return(cuda.cuMemAllocFromPoolAsync(size, self._handle, stream.handle))
251+
ptr = handle_return(driver.cuMemAllocFromPoolAsync(size, self._handle, stream.handle))
253252
return Buffer(ptr, size, self)
254253

255254
def deallocate(self, ptr, size, stream=None):
256255
if stream is None:
257256
stream = default_stream()
258-
handle_return(cuda.cuMemFreeAsync(ptr, stream.handle))
257+
handle_return(driver.cuMemFreeAsync(ptr, stream.handle))
259258

260259
@property
261260
def is_device_accessible(self) -> bool:
@@ -276,11 +275,11 @@ def __init__(self):
276275
self._handle = None
277276

278277
def allocate(self, size, stream=None) -> Buffer:
279-
ptr = handle_return(cuda.cuMemAllocHost(size))
278+
ptr = handle_return(driver.cuMemAllocHost(size))
280279
return Buffer(ptr, size, self)
281280

282281
def deallocate(self, ptr, size, stream=None):
283-
handle_return(cuda.cuMemFreeHost(ptr))
282+
handle_return(driver.cuMemFreeHost(ptr))
284283

285284
@property
286285
def is_device_accessible(self) -> bool:
@@ -303,14 +302,14 @@ def __init__(self, dev_id):
303302
self._dev_id = dev_id
304303

305304
def allocate(self, size, stream=None) -> Buffer:
306-
ptr = handle_return(cuda.cuMemAlloc(size))
305+
ptr = handle_return(driver.cuMemAlloc(size))
307306
return Buffer(ptr, size, self)
308307

309308
def deallocate(self, ptr, size, stream=None):
310309
if stream is None:
311310
stream = default_stream()
312311
stream.sync()
313-
handle_return(cuda.cuMemFree(ptr))
312+
handle_return(driver.cuMemFree(ptr))
314313

315314
@property
316315
def is_device_accessible(self) -> bool:

0 commit comments

Comments
 (0)