@@ -29,6 +29,7 @@ ctypedef cudaStreamCallbackData_st cudaStreamCallbackData
29
29
cdef class cudaPythonGlobal:
30
30
def __cinit__(self):
31
31
self._cudaPythonInit = False
32
+ self._cudaPythonGlobalInit = False
32
33
self._numDevices = 0
33
34
self._driverDevice = NULL
34
35
self._driverContext = NULL
@@ -54,25 +55,10 @@ cdef class cudaPythonGlobal:
54
55
if self._numDevices > 0:
55
56
ccuda._cuCtxSetCurrent(self._driverContext[0])
56
57
return cudaSuccess
57
- err = ccuda._cuInit(0)
58
- if err != ccuda.cudaError_enum.CUDA_SUCCESS:
59
- return <cudaError_t>err
60
- err = ccuda._cuDeviceGetCount(&self._numDevices)
61
- if err != ccuda.cudaError_enum.CUDA_SUCCESS:
62
- return cudaErrorInitializationError
63
58
64
- self._driverDevice = <ccuda.CUdevice *>calloc(self._numDevices, sizeof(ccuda.CUdevice))
65
- if self._driverDevice == NULL:
66
- return cudaErrorMemoryAllocation
67
- self._driverContext = <ccuda.CUcontext *>calloc(self._numDevices, sizeof(ccuda.CUcontext))
68
- if self._driverContext == NULL:
69
- return cudaErrorMemoryAllocation
70
- self._deviceProperties = <cudaDeviceProp *>calloc(self._numDevices, sizeof(cudaDeviceProp))
71
- if self._deviceProperties == NULL:
72
- return cudaErrorMemoryAllocation
73
- self._deviceInit = <bool *>calloc(self._numDevices, sizeof(bool))
74
- if self._deviceInit == NULL:
75
- return cudaErrorMemoryAllocation
59
+ err_rt = self.lazyInitGlobal()
60
+ if err_rt != cudaSuccess:
61
+ return err_rt
76
62
77
63
err_rt = self.lazyInitDevice(0)
78
64
if err_rt != cudaSuccess:
@@ -83,6 +69,44 @@ cdef class cudaPythonGlobal:
83
69
return cudaErrorInitializationError
84
70
self._cudaPythonInit = True
85
71
72
+ cdef cudaError_t lazyInitGlobal(self) nogil:
73
+ cdef cudaError_t err = cudaSuccess
74
+ if self._cudaPythonGlobalInit:
75
+ return err
76
+
77
+ err = <cudaError_t>ccuda._cuInit(0)
78
+ if err != cudaSuccess:
79
+ return err
80
+ err = <cudaError_t>ccuda._cuDeviceGetCount(&self._numDevices)
81
+ if err != cudaSuccess:
82
+ return cudaErrorInitializationError
83
+
84
+ self._driverDevice = <ccuda.CUdevice *>calloc(self._numDevices, sizeof(ccuda.CUdevice))
85
+ if err != cudaSuccess or self._driverDevice == NULL:
86
+ err = cudaErrorMemoryAllocation
87
+ self._driverContext = <ccuda.CUcontext *>calloc(self._numDevices, sizeof(ccuda.CUcontext))
88
+ if err != cudaSuccess or self._driverContext == NULL:
89
+ err = cudaErrorMemoryAllocation
90
+ self._deviceProperties = <cudaDeviceProp *>calloc(self._numDevices, sizeof(cudaDeviceProp))
91
+ if err != cudaSuccess or self._deviceProperties == NULL:
92
+ err = cudaErrorMemoryAllocation
93
+ self._deviceInit = <bool *>calloc(self._numDevices, sizeof(bool))
94
+ if err != cudaSuccess or self._deviceInit == NULL:
95
+ err = cudaErrorMemoryAllocation
96
+
97
+ if err != cudaSuccess:
98
+ if self._deviceInit is not NULL:
99
+ free(self._deviceInit)
100
+ if self._deviceProperties is not NULL:
101
+ free(self._deviceProperties)
102
+ if self._driverContext is not NULL:
103
+ free(self._driverContext)
104
+ if self._driverDevice is not NULL:
105
+ free(self._driverDevice)
106
+ else:
107
+ self._cudaPythonGlobalInit = True
108
+ return err
109
+
86
110
cdef cudaError_t lazyInitDevice(self, int deviceOrdinal) nogil:
87
111
if self._deviceInit[deviceOrdinal]:
88
112
return cudaSuccess
0 commit comments