@@ -203,10 +203,17 @@ cdef enum CUstreamBatchMemOpType_enum:
203
203
CU_STREAM_MEM_OP_WRITE_VALUE_32 = 2
204
204
CU_STREAM_MEM_OP_WAIT_VALUE_64 = 4
205
205
CU_STREAM_MEM_OP_WRITE_VALUE_64 = 5
206
+ CU_STREAM_MEM_OP_BARRIER = 6
206
207
CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES = 3
207
208
208
209
ctypedef CUstreamBatchMemOpType_enum CUstreamBatchMemOpType
209
210
211
+ cdef enum CUstreamMemoryBarrier_flags_enum:
212
+ CU_STREAM_MEMORY_BARRIER_TYPE_SYS = 0
213
+ CU_STREAM_MEMORY_BARRIER_TYPE_GPU = 1
214
+
215
+ ctypedef CUstreamMemoryBarrier_flags_enum CUstreamMemoryBarrier_flags
216
+
210
217
cdef struct CUstreamMemOpWaitValueParams_st:
211
218
CUstreamBatchMemOpType operation
212
219
CUdeviceptr address
@@ -225,17 +232,30 @@ cdef struct CUstreamMemOpFlushRemoteWritesParams_st:
225
232
CUstreamBatchMemOpType operation
226
233
unsigned int flags
227
234
235
+ cdef struct CUstreamMemOpMemoryBarrierParams_st:
236
+ CUstreamBatchMemOpType operation
237
+ unsigned int flags
238
+
228
239
cdef union CUstreamBatchMemOpParams_union:
229
240
CUstreamBatchMemOpType operation
230
241
CUstreamMemOpWaitValueParams_st waitValue
231
242
CUstreamMemOpWriteValueParams_st writeValue
232
243
CUstreamMemOpFlushRemoteWritesParams_st flushRemoteWrites
244
+ CUstreamMemOpMemoryBarrierParams_st memoryBarrier
233
245
cuuint64_t pad[6 ]
234
246
235
247
ctypedef CUstreamBatchMemOpParams_union CUstreamBatchMemOpParams_v1
236
248
237
249
ctypedef CUstreamBatchMemOpParams_v1 CUstreamBatchMemOpParams
238
250
251
+ cdef struct CUDA_BATCH_MEM_OP_NODE_PARAMS_st:
252
+ CUcontext ctx
253
+ unsigned int count
254
+ CUstreamBatchMemOpParams* paramArray
255
+ unsigned int flags
256
+
257
+ ctypedef CUDA_BATCH_MEM_OP_NODE_PARAMS_st CUDA_BATCH_MEM_OP_NODE_PARAMS
258
+
239
259
cdef enum CUoccupancy_flags_enum:
240
260
CU_OCCUPANCY_DEFAULT = 0
241
261
CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 1
@@ -428,7 +448,10 @@ cdef enum CUdevice_attribute_enum:
428
448
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = 118
429
449
CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = 119
430
450
CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED = 121
431
- CU_DEVICE_ATTRIBUTE_MAX = 122
451
+ CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V2 = 122
452
+ CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2 = 123
453
+ CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED = 124
454
+ CU_DEVICE_ATTRIBUTE_MAX = 125
432
455
433
456
ctypedef CUdevice_attribute_enum CUdevice_attribute
434
457
@@ -466,6 +489,9 @@ cdef enum CUpointer_attribute_enum:
466
489
CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE = 15
467
490
CU_POINTER_ATTRIBUTE_ACCESS_FLAGS = 16
468
491
CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE = 17
492
+ CU_POINTER_ATTRIBUTE_MAPPING_SIZE = 18
493
+ CU_POINTER_ATTRIBUTE_MAPPING_BASE_ADDR = 19
494
+ CU_POINTER_ATTRIBUTE_MEMORY_BLOCK_ID = 20
469
495
470
496
ctypedef CUpointer_attribute_enum CUpointer_attribute
471
497
@@ -565,7 +591,12 @@ cdef enum CUjit_option_enum:
565
591
CU_JIT_PREC_DIV = 22
566
592
CU_JIT_PREC_SQRT = 23
567
593
CU_JIT_FMA = 24
568
- CU_JIT_NUM_OPTIONS = 25
594
+ CU_JIT_REFERENCED_KERNEL_NAMES = 25
595
+ CU_JIT_REFERENCED_KERNEL_COUNT = 26
596
+ CU_JIT_REFERENCED_VARIABLE_NAMES = 27
597
+ CU_JIT_REFERENCED_VARIABLE_COUNT = 28
598
+ CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES = 29
599
+ CU_JIT_NUM_OPTIONS = 30
569
600
570
601
ctypedef CUjit_option_enum CUjit_option
571
602
@@ -587,6 +618,7 @@ cdef enum CUjit_target_enum:
587
618
CU_TARGET_COMPUTE_75 = 75
588
619
CU_TARGET_COMPUTE_80 = 80
589
620
CU_TARGET_COMPUTE_86 = 86
621
+ CU_TARGET_COMPUTE_87 = 87
590
622
591
623
ctypedef CUjit_target_enum CUjit_target
592
624
@@ -734,6 +766,7 @@ cdef enum CUgraphNodeType_enum:
734
766
CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT = 9
735
767
CU_GRAPH_NODE_TYPE_MEM_ALLOC = 10
736
768
CU_GRAPH_NODE_TYPE_MEM_FREE = 11
769
+ CU_GRAPH_NODE_TYPE_BATCH_MEM_OP = 12
737
770
738
771
ctypedef CUgraphNodeType_enum CUgraphNodeType
739
772
@@ -748,12 +781,14 @@ ctypedef CUsynchronizationPolicy_enum CUsynchronizationPolicy
748
781
cdef enum CUkernelNodeAttrID_enum:
749
782
CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1
750
783
CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE = 2
784
+ CU_KERNEL_NODE_ATTRIBUTE_PRIORITY = 8
751
785
752
786
ctypedef CUkernelNodeAttrID_enum CUkernelNodeAttrID
753
787
754
788
cdef union CUkernelNodeAttrValue_union:
755
789
CUaccessPolicyWindow accessPolicyWindow
756
790
int cooperative
791
+ int priority
757
792
758
793
ctypedef CUkernelNodeAttrValue_union CUkernelNodeAttrValue_v1
759
794
@@ -829,6 +864,7 @@ cdef enum cudaError_enum:
829
864
CUDA_ERROR_PROFILER_ALREADY_STARTED = 7
830
865
CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8
831
866
CUDA_ERROR_STUB_LIBRARY = 34
867
+ CUDA_ERROR_DEVICE_UNAVAILABLE = 46
832
868
CUDA_ERROR_NO_DEVICE = 100
833
869
CUDA_ERROR_INVALID_DEVICE = 101
834
870
CUDA_ERROR_DEVICE_NOT_LICENSED = 102
@@ -1388,6 +1424,12 @@ cdef enum CUmemAllocationGranularity_flags_enum:
1388
1424
1389
1425
ctypedef CUmemAllocationGranularity_flags_enum CUmemAllocationGranularity_flags
1390
1426
1427
+ cdef enum CUmemRangeHandleType_enum:
1428
+ CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD = 1
1429
+ CU_MEM_RANGE_HANDLE_TYPE_MAX = 2147483647
1430
+
1431
+ ctypedef CUmemRangeHandleType_enum CUmemRangeHandleType
1432
+
1391
1433
cdef enum CUarraySparseSubresourceType_enum:
1392
1434
CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL = 0
1393
1435
CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL = 1
@@ -1585,6 +1627,7 @@ cdef enum CUgraphDebugDot_flags_enum:
1585
1627
CU_GRAPH_DEBUG_DOT_FLAGS_HANDLES = 1024
1586
1628
CU_GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS = 2048
1587
1629
CU_GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS = 4096
1630
+ CU_GRAPH_DEBUG_DOT_FLAGS_BATCH_MEM_OP_NODE_PARAMS = 8192
1588
1631
1589
1632
ctypedef CUgraphDebugDot_flags_enum CUgraphDebugDot_flags
1590
1633
@@ -1600,6 +1643,7 @@ ctypedef CUuserObjectRetain_flags_enum CUuserObjectRetain_flags
1600
1643
1601
1644
cdef enum CUgraphInstantiate_flags_enum:
1602
1645
CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH = 1
1646
+ CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY = 8
1603
1647
1604
1648
ctypedef CUgraphInstantiate_flags_enum CUgraphInstantiate_flags
1605
1649
@@ -2017,6 +2061,16 @@ cdef CUresult cuStreamWriteValue64(CUstream stream, CUdeviceptr addr, cuuint64_t
2017
2061
2018
2062
cdef CUresult cuStreamBatchMemOp(CUstream stream, unsigned int count, CUstreamBatchMemOpParams* paramArray, unsigned int flags) nogil except ?CUDA_ERROR_NOT_FOUND
2019
2063
2064
+ cdef CUresult cuStreamWaitValue32_v2(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags) nogil except ?CUDA_ERROR_NOT_FOUND
2065
+
2066
+ cdef CUresult cuStreamWaitValue64_v2(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags) nogil except ?CUDA_ERROR_NOT_FOUND
2067
+
2068
+ cdef CUresult cuStreamWriteValue32_v2(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags) nogil except ?CUDA_ERROR_NOT_FOUND
2069
+
2070
+ cdef CUresult cuStreamWriteValue64_v2(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags) nogil except ?CUDA_ERROR_NOT_FOUND
2071
+
2072
+ cdef CUresult cuStreamBatchMemOp_v2(CUstream stream, unsigned int count, CUstreamBatchMemOpParams* paramArray, unsigned int flags) nogil except ?CUDA_ERROR_NOT_FOUND
2073
+
2020
2074
cdef CUresult cuFuncGetAttribute(int * pi, CUfunction_attribute attrib, CUfunction hfunc) nogil except ?CUDA_ERROR_NOT_FOUND
2021
2075
2022
2076
cdef CUresult cuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value) nogil except ?CUDA_ERROR_NOT_FOUND
@@ -2111,6 +2165,14 @@ cdef CUresult cuGraphExternalSemaphoresWaitNodeGetParams(CUgraphNode hNode, CUDA
2111
2165
2112
2166
cdef CUresult cuGraphExternalSemaphoresWaitNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams) nogil except ?CUDA_ERROR_NOT_FOUND
2113
2167
2168
+ cdef CUresult cuGraphAddBatchMemOpNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams) nogil except ?CUDA_ERROR_NOT_FOUND
2169
+
2170
+ cdef CUresult cuGraphBatchMemOpNodeGetParams(CUgraphNode hNode, CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams_out) nogil except ?CUDA_ERROR_NOT_FOUND
2171
+
2172
+ cdef CUresult cuGraphBatchMemOpNodeSetParams(CUgraphNode hNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams) nogil except ?CUDA_ERROR_NOT_FOUND
2173
+
2174
+ cdef CUresult cuGraphExecBatchMemOpNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams) nogil except ?CUDA_ERROR_NOT_FOUND
2175
+
2114
2176
cdef CUresult cuGraphAddMemAllocNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUDA_MEM_ALLOC_NODE_PARAMS* nodeParams) nogil except ?CUDA_ERROR_NOT_FOUND
2115
2177
2116
2178
cdef CUresult cuGraphMemAllocNodeGetParams(CUgraphNode hNode, CUDA_MEM_ALLOC_NODE_PARAMS* params_out) nogil except ?CUDA_ERROR_NOT_FOUND
@@ -2309,6 +2371,16 @@ cdef CUresult cuGraphicsUnmapResources(unsigned int count, CUgraphicsResource* r
2309
2371
2310
2372
cdef CUresult cuGetProcAddress(const char * symbol, void ** pfn, int cudaVersion, cuuint64_t flags) nogil except ?CUDA_ERROR_NOT_FOUND
2311
2373
2374
+ cdef enum CUmoduleLoadingMode_enum:
2375
+ CU_MODULE_EAGER_LOADING = 1
2376
+ CU_MODULE_LAZY_LOADING = 2
2377
+
2378
+ ctypedef CUmoduleLoadingMode_enum CUmoduleLoadingMode
2379
+
2380
+ cdef CUresult cuModuleGetLoadingMode(CUmoduleLoadingMode* mode) nogil except ?CUDA_ERROR_NOT_FOUND
2381
+
2382
+ cdef CUresult cuMemGetHandleForAddressRange(void * handle, CUdeviceptr dptr, size_t size, CUmemRangeHandleType handleType, unsigned long long flags) nogil except ?CUDA_ERROR_NOT_FOUND
2383
+
2312
2384
cdef CUresult cuGetExportTable(const void ** ppExportTable, const CUuuid* pExportTableId) nogil except ?CUDA_ERROR_NOT_FOUND
2313
2385
2314
2386
cdef enum CUoutput_mode_enum:
@@ -2562,7 +2634,7 @@ cdef enum CUGLmap_flags_enum:
2562
2634
2563
2635
ctypedef CUGLmap_flags_enum CUGLmap_flags
2564
2636
2565
- cdef enum : CUDA_VERSION = 11060
2637
+ cdef enum : CUDA_VERSION = 11070
2566
2638
2567
2639
cdef enum : CU_IPC_HANDLE_SIZE = 64
2568
2640
@@ -2632,10 +2704,16 @@ cdef enum: CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION = 32
2632
2704
2633
2705
cdef enum : CU_TRSF_SEAMLESS_CUBEMAP = 64
2634
2706
2707
+ cdef enum : CU_LAUNCH_PARAM_END_AS_INT = 0
2708
+
2635
2709
cdef enum : CU_LAUNCH_PARAM_END = 0
2636
2710
2711
+ cdef enum : CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT = 1
2712
+
2637
2713
cdef enum : CU_LAUNCH_PARAM_BUFFER_POINTER = 1
2638
2714
2715
+ cdef enum : CU_LAUNCH_PARAM_BUFFER_SIZE_AS_INT = 2
2716
+
2639
2717
cdef enum : CU_LAUNCH_PARAM_BUFFER_SIZE = 2
2640
2718
2641
2719
cdef enum : CU_PARAM_TR_DEFAULT = - 1
0 commit comments