8
8
IF UNAME_SYSNAME == " Windows" :
9
9
import win32api
10
10
import struct
11
+ from pywintypes import error
11
12
ELSE :
12
13
cimport cuda._lib.dlfcn as dlfcn
14
+ import sys
15
+ cimport cuda._cuda.loader as loader
13
16
cdef bint __cuPythonInit = False
14
17
cdef void * __cuGetErrorString = NULL
15
18
cdef void * __cuGetErrorName = NULL
@@ -28,6 +31,7 @@ cdef void *__cuDeviceGetNvSciSyncAttributes = NULL
28
31
cdef void * __cuDeviceSetMemPool = NULL
29
32
cdef void * __cuDeviceGetMemPool = NULL
30
33
cdef void * __cuDeviceGetDefaultMemPool = NULL
34
+ cdef void * __cuFlushGPUDirectRDMAWrites = NULL
31
35
cdef void * __cuDeviceGetProperties = NULL
32
36
cdef void * __cuDeviceComputeCapability = NULL
33
37
cdef void * __cuDevicePrimaryCtxRetain = NULL
@@ -360,29 +364,34 @@ cdef void *__cuGraphicsMapResources = NULL
360
364
cdef void * __cuGraphicsUnmapResources = NULL
361
365
cdef void * __cuGetProcAddress = NULL
362
366
cdef void * __cuGetExportTable = NULL
363
- cdef void * __cuFlushGPUDirectRDMAWrites = NULL
364
367
365
368
cdef int cuPythonInit() nogil except - 1 :
366
369
global __cuPythonInit
367
370
if __cuPythonInit:
368
371
return 0
369
372
__cuPythonInit = True
370
- IF UNAME_SYSNAME == " Windows" :
371
- LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800
372
- with gil:
373
- if 8 * struct .calcsize(" P" ) == 32 :
374
- try :
375
- handle = win32api.LoadLibraryEx(' nvcuda32.dll' , 0 , LOAD_LIBRARY_SEARCH_SYSTEM32)
376
- except :
377
- handle = win32api.LoadLibraryEx(' nvcuda.dll' , 0 , LOAD_LIBRARY_SEARCH_SYSTEM32)
378
- else :
379
- handle = win32api.LoadLibraryEx(' nvcuda.dll' , 0 , LOAD_LIBRARY_SEARCH_SYSTEM32)
380
- ELSE :
381
- handle = dlfcn.dlopen(' libcuda.so' , dlfcn.RTLD_NOW)
382
- if (handle == NULL ):
383
- with gil:
373
+ cdef char libPath[260 ]
374
+ libPath[0 ] = 0
375
+ with gil:
376
+ status = loader.getCUDALibraryPath(libPath, sys.maxsize > 2 ** 32 )
377
+ if status == 0 and len (libPath) != 0 :
378
+ path = libPath.decode(' utf-8' )
379
+ else :
380
+ IF UNAME_SYSNAME == " Windows" :
381
+ path = ' nvcuda.dll'
382
+ ELSE :
383
+ path = ' libcuda.so'
384
+
385
+ IF UNAME_SYSNAME == " Windows" :
386
+ LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800
387
+ try :
388
+ handle = win32api.LoadLibraryEx(path, 0 , LOAD_LIBRARY_SEARCH_SYSTEM32)
389
+ except error as e:
390
+ raise RuntimeError (' Failed to LoadLibraryEx ' + path)
391
+ ELSE :
392
+ handle = dlfcn.dlopen(bytes(path, encoding = ' utf-8' ), dlfcn.RTLD_NOW)
393
+ if (handle == NULL ):
384
394
raise RuntimeError (' Failed to dlopen libcuda.so' )
385
-
386
395
# All Globals
387
396
global __cuGetErrorString
388
397
global __cuGetErrorName
@@ -401,6 +410,7 @@ cdef int cuPythonInit() nogil except -1:
401
410
global __cuDeviceSetMemPool
402
411
global __cuDeviceGetMemPool
403
412
global __cuDeviceGetDefaultMemPool
413
+ global __cuFlushGPUDirectRDMAWrites
404
414
global __cuDeviceGetProperties
405
415
global __cuDeviceComputeCapability
406
416
global __cuDevicePrimaryCtxRetain
@@ -733,7 +743,6 @@ cdef int cuPythonInit() nogil except -1:
733
743
global __cuGraphicsUnmapResources
734
744
global __cuGetProcAddress
735
745
global __cuGetExportTable
736
- global __cuFlushGPUDirectRDMAWrites
737
746
# Get latest __cuGetProcAddress
738
747
IF UNAME_SYSNAME == " Windows" :
739
748
with gil:
@@ -763,6 +772,7 @@ cdef int cuPythonInit() nogil except -1:
763
772
_cuGetProcAddress(' cuDeviceSetMemPool' , & __cuDeviceSetMemPool, 11020 , 0 )
764
773
_cuGetProcAddress(' cuDeviceGetMemPool' , & __cuDeviceGetMemPool, 11020 , 0 )
765
774
_cuGetProcAddress(' cuDeviceGetDefaultMemPool' , & __cuDeviceGetDefaultMemPool, 11020 , 0 )
775
+ _cuGetProcAddress(' cuFlushGPUDirectRDMAWrites' , & __cuFlushGPUDirectRDMAWrites, 11030 , 0 )
766
776
_cuGetProcAddress(' cuDeviceGetProperties' , & __cuDeviceGetProperties, 2000 , 0 )
767
777
_cuGetProcAddress(' cuDeviceComputeCapability' , & __cuDeviceComputeCapability, 2000 , 0 )
768
778
_cuGetProcAddress(' cuDevicePrimaryCtxRetain' , & __cuDevicePrimaryCtxRetain, 7000 , 0 )
@@ -1095,7 +1105,6 @@ cdef int cuPythonInit() nogil except -1:
1095
1105
_cuGetProcAddress(' cuGraphicsUnmapResources' , & __cuGraphicsUnmapResources, 3000 , 0 )
1096
1106
_cuGetProcAddress(' cuGetProcAddress' , & __cuGetProcAddress, 11030 , 0 )
1097
1107
_cuGetProcAddress(' cuGetExportTable' , & __cuGetExportTable, 3000 , 0 )
1098
- _cuGetProcAddress(' cuFlushGPUDirectRDMAWrites' , & __cuFlushGPUDirectRDMAWrites, 11030 , 0 )
1099
1108
return 0
1100
1109
# dlsym calls
1101
1110
IF UNAME_SYSNAME == " Windows" :
@@ -1168,6 +1177,10 @@ cdef int cuPythonInit() nogil except -1:
1168
1177
__cuDeviceGetDefaultMemPool = < void * >< unsigned long long > win32api.GetProcAddress(handle, ' cuDeviceGetDefaultMemPool' )
1169
1178
except :
1170
1179
pass
1180
+ try :
1181
+ __cuFlushGPUDirectRDMAWrites = < void * >< unsigned long long > win32api.GetProcAddress(handle, ' cuFlushGPUDirectRDMAWrites' )
1182
+ except :
1183
+ pass
1171
1184
try :
1172
1185
__cuDeviceGetProperties = < void * >< unsigned long long > win32api.GetProcAddress(handle, ' cuDeviceGetProperties' )
1173
1186
except :
@@ -2496,10 +2509,6 @@ cdef int cuPythonInit() nogil except -1:
2496
2509
__cuGetExportTable = < void * >< unsigned long long > win32api.GetProcAddress(handle, ' cuGetExportTable' )
2497
2510
except :
2498
2511
pass
2499
- try :
2500
- __cuFlushGPUDirectRDMAWrites = < void * >< unsigned long long > win32api.GetProcAddress(handle, ' cuFlushGPUDirectRDMAWrites' )
2501
- except :
2502
- pass
2503
2512
ELSE :
2504
2513
__cuGetErrorString = dlfcn.dlsym(handle, ' cuGetErrorString' )
2505
2514
__cuGetErrorName = dlfcn.dlsym(handle, ' cuGetErrorName' )
@@ -2518,6 +2527,7 @@ cdef int cuPythonInit() nogil except -1:
2518
2527
__cuDeviceSetMemPool = dlfcn.dlsym(handle, ' cuDeviceSetMemPool' )
2519
2528
__cuDeviceGetMemPool = dlfcn.dlsym(handle, ' cuDeviceGetMemPool' )
2520
2529
__cuDeviceGetDefaultMemPool = dlfcn.dlsym(handle, ' cuDeviceGetDefaultMemPool' )
2530
+ __cuFlushGPUDirectRDMAWrites = dlfcn.dlsym(handle, ' cuFlushGPUDirectRDMAWrites' )
2521
2531
__cuDeviceGetProperties = dlfcn.dlsym(handle, ' cuDeviceGetProperties' )
2522
2532
__cuDeviceComputeCapability = dlfcn.dlsym(handle, ' cuDeviceComputeCapability' )
2523
2533
__cuDevicePrimaryCtxRetain = dlfcn.dlsym(handle, ' cuDevicePrimaryCtxRetain' )
@@ -2850,7 +2860,6 @@ cdef int cuPythonInit() nogil except -1:
2850
2860
__cuGraphicsUnmapResources = dlfcn.dlsym(handle, ' cuGraphicsUnmapResources' )
2851
2861
__cuGetProcAddress = dlfcn.dlsym(handle, ' cuGetProcAddress' )
2852
2862
__cuGetExportTable = dlfcn.dlsym(handle, ' cuGetExportTable' )
2853
- __cuFlushGPUDirectRDMAWrites = dlfcn.dlsym(handle, ' cuFlushGPUDirectRDMAWrites' )
2854
2863
2855
2864
cdef CUresult _cuGetErrorString(CUresult error, const char ** pStr) nogil except ?CUDA_ERROR_NOT_FOUND:
2856
2865
global __cuGetErrorString
@@ -3005,6 +3014,15 @@ cdef CUresult _cuDeviceGetDefaultMemPool(CUmemoryPool* pool_out, CUdevice dev) n
3005
3014
err = (< CUresult (* )(CUmemoryPool* , CUdevice) nogil> __cuDeviceGetDefaultMemPool)(pool_out, dev)
3006
3015
return err
3007
3016
3017
+ cdef CUresult _cuFlushGPUDirectRDMAWrites(CUflushGPUDirectRDMAWritesTarget target, CUflushGPUDirectRDMAWritesScope scope) nogil except ?CUDA_ERROR_NOT_FOUND:
3018
+ global __cuFlushGPUDirectRDMAWrites
3019
+ cuPythonInit()
3020
+ if __cuFlushGPUDirectRDMAWrites == NULL :
3021
+ with gil:
3022
+ raise RuntimeError (' Function "cuFlushGPUDirectRDMAWrites" not found' )
3023
+ err = (< CUresult (* )(CUflushGPUDirectRDMAWritesTarget, CUflushGPUDirectRDMAWritesScope) nogil> __cuFlushGPUDirectRDMAWrites)(target, scope)
3024
+ return err
3025
+
3008
3026
cdef CUresult _cuDeviceGetProperties(CUdevprop* prop, CUdevice dev) nogil except ?CUDA_ERROR_NOT_FOUND:
3009
3027
global __cuDeviceGetProperties
3010
3028
cuPythonInit()
@@ -5992,12 +6010,3 @@ cdef CUresult _cuGetExportTable(const void** ppExportTable, const CUuuid* pExpor
5992
6010
raise RuntimeError (' Function "cuGetExportTable" not found' )
5993
6011
err = (< CUresult (* )(const void ** , const CUuuid* ) nogil> __cuGetExportTable)(ppExportTable, pExportTableId)
5994
6012
return err
5995
-
5996
- cdef CUresult _cuFlushGPUDirectRDMAWrites(CUflushGPUDirectRDMAWritesTarget target, CUflushGPUDirectRDMAWritesScope scope) nogil except ?CUDA_ERROR_NOT_FOUND:
5997
- global __cuFlushGPUDirectRDMAWrites
5998
- cuPythonInit()
5999
- if __cuFlushGPUDirectRDMAWrites == NULL :
6000
- with gil:
6001
- raise RuntimeError (' Function "cuFlushGPUDirectRDMAWrites" not found' )
6002
- err = (< CUresult (* )(CUflushGPUDirectRDMAWritesTarget, CUflushGPUDirectRDMAWritesScope) nogil> __cuFlushGPUDirectRDMAWrites)(target, scope)
6003
- return err
0 commit comments