Skip to content

Commit 219e2b7

Browse files
committed
naive implementation first draft
1 parent 6820f30 commit 219e2b7

File tree

2 files changed

+39
-2
lines changed

2 files changed

+39
-2
lines changed

cuda_core/cuda/core/experimental/_device.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from cuda.core.experimental._utils import handle_return, ComputeCapability, CUDAError, \
1111
precondition
1212
from cuda.core.experimental._context import Context, ContextOptions
13-
from cuda.core.experimental._memory import _DefaultAsyncMempool, Buffer, MemoryResource
13+
from cuda.core.experimental._memory import _DefaultAsyncMempool, _AsyncMemoryResource, Buffer, MemoryResource
1414
from cuda.core.experimental._stream import default_stream, Stream, StreamOptions
1515

1616

@@ -65,7 +65,13 @@ def __new__(cls, device_id=None):
6565
for dev_id in range(total):
6666
dev = super().__new__(cls)
6767
dev._id = dev_id
68-
dev._mr = _DefaultAsyncMempool(dev_id)
68+
# If the device is in TCC mode, or does not support memory pools for some other reason, use the AsyncMemoryResource which does not use memory pools.
69+
# The DefaultAsyncMempool uses memory pools, which are not always supported.
70+
if handle_return(cudart.cudaGetDeviceProperties(dev_id))['CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED'] == 0:
71+
dev._mr = _AsyncMemoryResource(dev_id)
72+
else:
73+
dev._mr = _DefaultAsyncMempool(dev_id)
74+
6975
dev._has_inited = False
7076
_tls.devices.append(dev)
7177

cuda_core/cuda/core/experimental/_memory.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,3 +296,34 @@ def is_host_accessible(self) -> bool:
296296
@property
297297
def device_id(self) -> int:
298298
raise RuntimeError("the pinned memory resource is not bound to any GPU")
299+
300+
class _AsyncMemoryResource(MemoryResource):
301+
302+
__slots__ = ("_dev_id",)
303+
304+
def __init__(self, dev_id):
305+
self._handle = None
306+
self._dev_id = dev_id
307+
308+
def allocate(self, size, stream=None) -> Buffer:
309+
if stream is None:
310+
stream = default_stream()
311+
ptr = handle_return(cuda.cuMemAllocAsync(size, stream._handle))
312+
return Buffer(ptr, size, self)
313+
314+
def deallocate(self, ptr, size, stream=None):
315+
if stream is None:
316+
stream = default_stream()
317+
handle_return(cuda.cuMemFreeAsync(ptr, stream._handle))
318+
319+
@property
320+
def is_device_accessible(self) -> bool:
321+
return True
322+
323+
@property
324+
def is_host_accessible(self) -> bool:
325+
return False
326+
327+
@property
328+
def device_id(self) -> int:
329+
return self._dev_id

0 commit comments

Comments
 (0)