Skip to content

Commit 6044e4e

Browse files
committed
Rebase to CTK 12.5
1 parent 2be0aac commit 6044e4e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+4624
-482
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ There're differences in each of these options that are described further in [Ins
1717
CUDA Python is supported on all platforms that CUDA is supported. Specific dependencies are as follows:
1818

1919
* Driver: Linux (450.80.02 or later) Windows (456.38 or later)
20-
* CUDA Toolkit 12.0 to 12.4
20+
* CUDA Toolkit 12.0 to 12.5
2121

2222
Only the NVRTC redistributable component is required from the CUDA Toolkit. [CUDA Toolkit Documentation](https://docs.nvidia.com/cuda/index.html) Installation Guides can be used for guidance. Note that the NVRTC component in the Toolkit can be obtained via PYPI, Conda or Local Installer.
2323

cuda/_cuda/ccuda.pxd.in

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,11 @@ cdef CUresult _cuCtxCreate_v2(CUcontext* pctx, unsigned int flags, CUdevice dev)
147147
cdef CUresult _cuCtxCreate_v3(CUcontext* pctx, CUexecAffinityParam* paramsArray, int numParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
148148
{{endif}}
149149

150+
{{if 'cuCtxCreate_v4' in found_functions}}
151+
152+
cdef CUresult _cuCtxCreate_v4(CUcontext* pctx, CUctxCreateParams* ctxCreateParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
153+
{{endif}}
154+
150155
{{if 'cuCtxDestroy_v2' in found_functions}}
151156

152157
cdef CUresult _cuCtxDestroy_v2(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
@@ -237,6 +242,16 @@ cdef CUresult _cuCtxResetPersistingL2Cache() except ?CUDA_ERROR_NOT_FOUND nogil
237242
cdef CUresult _cuCtxGetExecAffinity(CUexecAffinityParam* pExecAffinity, CUexecAffinityType typename) except ?CUDA_ERROR_NOT_FOUND nogil
238243
{{endif}}
239244

245+
{{if 'cuCtxRecordEvent' in found_functions}}
246+
247+
cdef CUresult _cuCtxRecordEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
248+
{{endif}}
249+
250+
{{if 'cuCtxWaitEvent' in found_functions}}
251+
252+
cdef CUresult _cuCtxWaitEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
253+
{{endif}}
254+
240255
{{if 'cuCtxAttach' in found_functions}}
241256

242257
cdef CUresult _cuCtxAttach(CUcontext* pctx, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
@@ -382,6 +397,11 @@ cdef CUresult _cuLibraryGetModule(CUmodule* pMod, CUlibrary library) except ?CUD
382397
cdef CUresult _cuKernelGetFunction(CUfunction* pFunc, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil
383398
{{endif}}
384399

400+
{{if 'cuKernelGetLibrary' in found_functions}}
401+
402+
cdef CUresult _cuKernelGetLibrary(CUlibrary* pLib, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil
403+
{{endif}}
404+
385405
{{if 'cuLibraryGetGlobal' in found_functions}}
386406

387407
cdef CUresult _cuLibraryGetGlobal(CUdeviceptr* dptr, size_t* numbytes, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil
@@ -1027,6 +1047,11 @@ cdef CUresult _cuStreamGetId(CUstream hStream, unsigned long long* streamId) exc
10271047
cdef CUresult _cuStreamGetCtx(CUstream hStream, CUcontext* pctx) except ?CUDA_ERROR_NOT_FOUND nogil
10281048
{{endif}}
10291049

1050+
{{if 'cuStreamGetCtx_v2' in found_functions}}
1051+
1052+
cdef CUresult _cuStreamGetCtx_v2(CUstream hStream, CUcontext* pCtx, CUgreenCtx* pGreenCtx) except ?CUDA_ERROR_NOT_FOUND nogil
1053+
{{endif}}
1054+
10301055
{{if 'cuStreamWaitEvent' in found_functions}}
10311056

10321057
cdef CUresult _cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
@@ -2152,6 +2177,11 @@ cdef CUresult _cuGreenCtxWaitEvent(CUgreenCtx hCtx, CUevent hEvent) except ?CUDA
21522177
cdef CUresult _cuStreamGetGreenCtx(CUstream hStream, CUgreenCtx* phCtx) except ?CUDA_ERROR_NOT_FOUND nogil
21532178
{{endif}}
21542179

2180+
{{if 'cuGreenCtxStreamCreate' in found_functions}}
2181+
2182+
cdef CUresult _cuGreenCtxStreamCreate(CUstream* phStream, CUgreenCtx greenCtx, unsigned int flags, int priority) except ?CUDA_ERROR_NOT_FOUND nogil
2183+
{{endif}}
2184+
21552185
{{if 'cuProfilerStart' in found_functions}}
21562186

21572187
cdef CUresult _cuProfilerStart() except ?CUDA_ERROR_NOT_FOUND nogil

cuda/_cuda/ccuda.pyx.in

Lines changed: 183 additions & 0 deletions
Large diffs are not rendered by default.

cuda/_lib/ccudart/ccudart.pxd.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,3 +308,4 @@ from libcpp cimport bool
308308
{{if True}}cdef cudaError_t _getLocalRuntimeVersion(int* runtimeVersion) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
309309
{{if 'cudaDeviceRegisterAsyncNotification' in found_functions}}cdef cudaError_t _cudaDeviceRegisterAsyncNotification(int device, cudaAsyncCallback callbackFunc, void* userData, cudaAsyncCallbackHandle_t* callback) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
310310
{{if 'cudaDeviceUnregisterAsyncNotification' in found_functions}}cdef cudaError_t _cudaDeviceUnregisterAsyncNotification(int device, cudaAsyncCallbackHandle_t callback) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
311+
{{if 'cudaGetDriverEntryPointByVersion' in found_functions}}cdef cudaError_t _cudaGetDriverEntryPointByVersion(const char* symbol, void** funcPtr, unsigned int cudaVersion, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}

cuda/_lib/ccudart/ccudart.pyx.in

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4916,4 +4916,15 @@ cdef cudaError_t _cudaDeviceUnregisterAsyncNotification(int device, cudaAsyncCal
49164916
if err != cudaSuccess:
49174917
_setLastError(err)
49184918
return err
4919+
4920+
{{endif}}
4921+
{{if 'cudaGetDriverEntryPointByVersion' in found_functions}}
4922+
4923+
cdef cudaError_t _cudaGetDriverEntryPointByVersion(const char* symbol, void** funcPtr, unsigned int cudaVersion, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil:
4924+
cdef cudaError_t err = cudaSuccess
4925+
err = <cudaError_t>ccuda._cuGetProcAddress_v2(symbol, funcPtr, cudaVersion, flags, <ccuda.CUdriverProcAddressQueryResult*>driverStatus)
4926+
if err != cudaSuccess:
4927+
_setLastError(err)
4928+
return err
4929+
49194930
{{endif}}

cuda/_lib/utils.pyx.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,8 @@ cdef class HelperInputVoidPtr:
142142
elif isinstance(ptr, (int)):
143143
# Easy run, user gave us an already configured void** address
144144
self._cptr = <void*><void_ptr>ptr
145+
elif isinstance(ptr, (cuda.CUdeviceptr)):
146+
self._cptr = <void*><void_ptr>int(ptr)
145147
elif PyObject_CheckBuffer(ptr):
146148
# Easy run, get address from Python Buffer Protocol
147149
err_buffer = PyObject_GetBuffer(ptr, &self._pybuffer, PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS)

cuda/ccuda.pxd.in

Lines changed: 93 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ cdef extern from "cuda.h":
116116
pass
117117
ctypedef CUasyncCallbackEntry_st* CUasyncCallbackHandle
118118

119+
cdef struct CUgreenCtx_st:
120+
pass
121+
ctypedef CUgreenCtx_st* CUgreenCtx
122+
119123
cdef struct CUuuid_st:
120124
char bytes[16]
121125

@@ -359,7 +363,20 @@ cdef extern from "cuda.h":
359363
CU_AD_FORMAT_BC6H_SF16 = 156
360364
CU_AD_FORMAT_BC7_UNORM = 157
361365
CU_AD_FORMAT_BC7_UNORM_SRGB = 158
366+
CU_AD_FORMAT_P010 = 159
367+
CU_AD_FORMAT_P016 = 161
368+
CU_AD_FORMAT_NV16 = 162
369+
CU_AD_FORMAT_P210 = 163
370+
CU_AD_FORMAT_P216 = 164
371+
CU_AD_FORMAT_YUY2 = 165
372+
CU_AD_FORMAT_Y210 = 166
373+
CU_AD_FORMAT_Y216 = 167
374+
CU_AD_FORMAT_AYUV = 168
375+
CU_AD_FORMAT_Y410 = 169
362376
CU_AD_FORMAT_NV12 = 176
377+
CU_AD_FORMAT_Y416 = 177
378+
CU_AD_FORMAT_Y444_PLANAR8 = 178
379+
CU_AD_FORMAT_Y444_PLANAR10 = 179
363380
CU_AD_FORMAT_UNORM_INT8X1 = 192
364381
CU_AD_FORMAT_UNORM_INT8X2 = 193
365382
CU_AD_FORMAT_UNORM_INT8X4 = 194
@@ -372,6 +389,7 @@ cdef extern from "cuda.h":
372389
CU_AD_FORMAT_SNORM_INT16X1 = 201
373390
CU_AD_FORMAT_SNORM_INT16X2 = 202
374391
CU_AD_FORMAT_SNORM_INT16X4 = 203
392+
CU_AD_FORMAT_MAX = 2147483647
375393

376394
ctypedef CUarray_format_enum CUarray_format
377395

@@ -530,7 +548,8 @@ cdef extern from "cuda.h":
530548
CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED = 132
531549
CU_DEVICE_ATTRIBUTE_MPS_ENABLED = 133
532550
CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID = 134
533-
CU_DEVICE_ATTRIBUTE_MAX = 135
551+
CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED = 135
552+
CU_DEVICE_ATTRIBUTE_MAX = 136
534553

535554
ctypedef CUdevice_attribute_enum CUdevice_attribute
536555

@@ -778,7 +797,10 @@ cdef extern from "cuda.h":
778797
CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 4
779798
CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 5
780799
CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 6
781-
CU_LIMIT_MAX = 7
800+
CU_LIMIT_SHMEM_SIZE = 7
801+
CU_LIMIT_CIG_ENABLED = 8
802+
CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED = 9
803+
CU_LIMIT_MAX = 10
782804

783805
ctypedef CUlimit_enum CUlimit
784806

@@ -998,6 +1020,7 @@ cdef extern from "cuda.h":
9981020
CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN = 10
9991021
CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT = 12
10001022
CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = 13
1023+
CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 14
10011024

10021025
ctypedef CUlaunchAttributeID_enum CUlaunchAttributeID
10031026

@@ -1033,6 +1056,7 @@ cdef extern from "cuda.h":
10331056
CUlaunchMemSyncDomainMap memSyncDomainMap
10341057
CUlaunchMemSyncDomain memSyncDomain
10351058
anon_struct4 deviceUpdatableKernelNode
1059+
unsigned int sharedMemCarveout
10361060

10371061
ctypedef CUlaunchAttributeValue_union CUlaunchAttributeValue
10381062

@@ -1120,6 +1144,24 @@ cdef extern from "cuda.h":
11201144

11211145
ctypedef CUexecAffinityParam_v1 CUexecAffinityParam
11221146

1147+
cdef enum CUcigDataType_enum:
1148+
CIG_DATA_TYPE_D3D12_COMMAND_QUEUE = 1
1149+
1150+
ctypedef CUcigDataType_enum CUcigDataType
1151+
1152+
cdef struct CUctxCigParam_st:
1153+
CUcigDataType sharedDataType
1154+
void* sharedData
1155+
1156+
ctypedef CUctxCigParam_st CUctxCigParam
1157+
1158+
cdef struct CUctxCreateParams_st:
1159+
CUexecAffinityParam* execAffinityParams
1160+
int numExecAffinityParams
1161+
CUctxCigParam* cigParams
1162+
1163+
ctypedef CUctxCreateParams_st CUctxCreateParams
1164+
11231165
cdef enum CUlibraryOption_enum:
11241166
CU_LIBRARY_HOST_UNIVERSAL_FUNCTION_AND_DATA_TABLE = 0
11251167
CU_LIBRARY_BINARY_IS_PRESERVED = 1
@@ -2123,13 +2165,19 @@ cdef extern from "cuda.h":
21232165
CU_COREDUMP_ENABLE_USER_TRIGGER = 4
21242166
CU_COREDUMP_FILE = 5
21252167
CU_COREDUMP_PIPE = 6
2126-
CU_COREDUMP_MAX = 7
2168+
CU_COREDUMP_GENERATION_FLAGS = 7
2169+
CU_COREDUMP_MAX = 8
21272170

21282171
ctypedef CUcoredumpSettings_enum CUcoredumpSettings
21292172

2130-
cdef struct CUgreenCtx_st:
2131-
pass
2132-
ctypedef CUgreenCtx_st* CUgreenCtx
2173+
cdef enum CUCoredumpGenerationFlags:
2174+
CU_COREDUMP_DEFAULT_FLAGS = 0
2175+
CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES = 1
2176+
CU_COREDUMP_SKIP_GLOBAL_MEMORY = 2
2177+
CU_COREDUMP_SKIP_SHARED_MEMORY = 4
2178+
CU_COREDUMP_SKIP_LOCAL_MEMORY = 8
2179+
CU_COREDUMP_LIGHTWEIGHT_FLAGS = 15
2180+
CU_COREDUMP_SKIP_ABORT = 16
21332181

21342182
cdef struct CUdevResourceDesc_st:
21352183
pass
@@ -2138,6 +2186,10 @@ cdef extern from "cuda.h":
21382186
ctypedef enum CUgreenCtxCreate_flags:
21392187
CU_GREEN_CTX_DEFAULT_STREAM = 1
21402188

2189+
ctypedef enum CUdevSmResourceSplit_flags:
2190+
CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING = 1
2191+
CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE = 2
2192+
21412193
ctypedef enum CUdevResourceType:
21422194
CU_DEV_RESOURCE_TYPE_INVALID = 0
21432195
CU_DEV_RESOURCE_TYPE_SM = 1
@@ -2504,6 +2556,11 @@ cdef CUresult cuCtxCreate(CUcontext* pctx, unsigned int flags, CUdevice dev) exc
25042556
cdef CUresult cuCtxCreate_v3(CUcontext* pctx, CUexecAffinityParam* paramsArray, int numParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
25052557
{{endif}}
25062558

2559+
{{if 'cuCtxCreate_v4' in found_functions}}
2560+
2561+
cdef CUresult cuCtxCreate_v4(CUcontext* pctx, CUctxCreateParams* ctxCreateParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
2562+
{{endif}}
2563+
25072564
{{if 'cuCtxDestroy_v2' in found_functions}}
25082565

25092566
cdef CUresult cuCtxDestroy(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
@@ -2594,6 +2651,16 @@ cdef CUresult cuCtxResetPersistingL2Cache() except ?CUDA_ERROR_NOT_FOUND nogil
25942651
cdef CUresult cuCtxGetExecAffinity(CUexecAffinityParam* pExecAffinity, CUexecAffinityType typename) except ?CUDA_ERROR_NOT_FOUND nogil
25952652
{{endif}}
25962653

2654+
{{if 'cuCtxRecordEvent' in found_functions}}
2655+
2656+
cdef CUresult cuCtxRecordEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
2657+
{{endif}}
2658+
2659+
{{if 'cuCtxWaitEvent' in found_functions}}
2660+
2661+
cdef CUresult cuCtxWaitEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
2662+
{{endif}}
2663+
25972664
{{if 'cuCtxAttach' in found_functions}}
25982665

25992666
cdef CUresult cuCtxAttach(CUcontext* pctx, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
@@ -2739,6 +2806,11 @@ cdef CUresult cuLibraryGetModule(CUmodule* pMod, CUlibrary library) except ?CUDA
27392806
cdef CUresult cuKernelGetFunction(CUfunction* pFunc, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil
27402807
{{endif}}
27412808

2809+
{{if 'cuKernelGetLibrary' in found_functions}}
2810+
2811+
cdef CUresult cuKernelGetLibrary(CUlibrary* pLib, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil
2812+
{{endif}}
2813+
27422814
{{if 'cuLibraryGetGlobal' in found_functions}}
27432815

27442816
cdef CUresult cuLibraryGetGlobal(CUdeviceptr* dptr, size_t* numbytes, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil
@@ -3384,6 +3456,11 @@ cdef CUresult cuStreamGetId(CUstream hStream, unsigned long long* streamId) exce
33843456
cdef CUresult cuStreamGetCtx(CUstream hStream, CUcontext* pctx) except ?CUDA_ERROR_NOT_FOUND nogil
33853457
{{endif}}
33863458

3459+
{{if 'cuStreamGetCtx_v2' in found_functions}}
3460+
3461+
cdef CUresult cuStreamGetCtx_v2(CUstream hStream, CUcontext* pCtx, CUgreenCtx* pGreenCtx) except ?CUDA_ERROR_NOT_FOUND nogil
3462+
{{endif}}
3463+
33873464
{{if 'cuStreamWaitEvent' in found_functions}}
33883465

33893466
cdef CUresult cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
@@ -4509,6 +4586,11 @@ cdef CUresult cuGreenCtxWaitEvent(CUgreenCtx hCtx, CUevent hEvent) except ?CUDA_
45094586
cdef CUresult cuStreamGetGreenCtx(CUstream hStream, CUgreenCtx* phCtx) except ?CUDA_ERROR_NOT_FOUND nogil
45104587
{{endif}}
45114588

4589+
{{if 'cuGreenCtxStreamCreate' in found_functions}}
4590+
4591+
cdef CUresult cuGreenCtxStreamCreate(CUstream* phStream, CUgreenCtx greenCtx, unsigned int flags, int priority) except ?CUDA_ERROR_NOT_FOUND nogil
4592+
{{endif}}
4593+
45124594
{{if 'cuProfilerStart' in found_functions}}
45134595

45144596
cdef CUresult cuProfilerStart() except ?CUDA_ERROR_NOT_FOUND nogil
@@ -4614,7 +4696,7 @@ cdef CUresult cuGraphicsVDPAURegisterVideoSurface(CUgraphicsResource* pCudaResou
46144696
cdef CUresult cuGraphicsVDPAURegisterOutputSurface(CUgraphicsResource* pCudaResource, VdpOutputSurface vdpSurface, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
46154697
{{endif}}
46164698

4617-
cdef enum: CUDA_VERSION = 12040
4699+
cdef enum: CUDA_VERSION = 12050
46184700

46194701
cdef enum: CU_IPC_HANDLE_SIZE = 64
46204702

@@ -4648,6 +4730,8 @@ cdef enum: CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN = 10
46484730

46494731
cdef enum: CU_KERNEL_NODE_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = 13
46504732

4733+
cdef enum: CU_KERNEL_NODE_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 14
4734+
46514735
cdef enum: CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1
46524736

46534737
cdef enum: CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY = 3
@@ -4710,6 +4794,8 @@ cdef enum: CUDA_ARRAY3D_SPARSE = 64
47104794

47114795
cdef enum: CUDA_ARRAY3D_DEFERRED_MAPPING = 128
47124796

4797+
cdef enum: CUDA_ARRAY3D_VIDEO_ENCODE_DECODE = 256
4798+
47134799
cdef enum: CU_TRSA_OVERRIDE_FORMAT = 1
47144800

47154801
cdef enum: CU_TRSF_READ_AS_INTEGER = 1

0 commit comments

Comments
 (0)