File tree Expand file tree Collapse file tree 4 files changed +79
-5
lines changed Expand file tree Collapse file tree 4 files changed +79
-5
lines changed Original file line number Diff line number Diff line change @@ -4,6 +4,7 @@ message(STATUS "Including the PI API CUDA backend.")
4
4
# we only require the CUDA driver API to be used
5
5
# CUDA_CUDA_LIBRARY variable defines the path to libcuda.so, the CUDA Driver API library.
6
6
7
+ find_package (Threads REQUIRED )
7
8
find_package (CUDA 10.0 REQUIRED )
8
9
9
10
add_library (cudadrv SHARED IMPORTED )
@@ -33,7 +34,7 @@ target_include_directories(pi_cuda PRIVATE "${sycl_inc_dir}")
33
34
34
35
target_include_directories (pi_cuda INTERFACE ${CUDA_INCLUDE_DIRS} )
35
36
36
- target_link_libraries (pi_cuda PUBLIC OpenCL-Headers cudadrv )
37
+ target_link_libraries (pi_cuda PUBLIC OpenCL-Headers cudadrv Threads::Threads )
37
38
38
39
target_link_libraries (sycl INTERFACE pi_cuda )
39
40
Original file line number Diff line number Diff line change @@ -97,6 +97,31 @@ pi_result cuda_piEventRetain(pi_event event);
97
97
98
98
} // extern "C"
99
99
100
+ void worker::execute () {
101
+ bool Terminate = false ;
102
+ while (!Terminate) {
103
+ std::unique_lock<std::mutex> lock (workQueueGateMutex_);
104
+ workQueueGate_.wait (lock);
105
+ while (!workQueue_.empty ()) {
106
+ work item = workQueue_.front ();
107
+ workQueue_.pop ();
108
+ switch (item.kind_ ) {
109
+ case work::kind::complete_event:
110
+ complete_event (static_cast <pi_event>(item.content_ ));
111
+ break ;
112
+ case work::kind::terminate:
113
+ Terminate = true ;
114
+ break ;
115
+ }
116
+ }
117
+ }
118
+ }
119
+
120
+ void worker::complete_event (pi_event event) {
121
+ event->set_event_complete ();
122
+ cuda_piEventRelease (event);
123
+ }
124
+
100
125
_pi_event::_pi_event (pi_command_type type, pi_context context, pi_queue queue)
101
126
: commandType_{type}, refCount_{1 }, isCompleted_{false },
102
127
isRecorded_{false },
@@ -174,8 +199,9 @@ pi_result _pi_event::record() {
174
199
cuStream,
175
200
[](void *userData) {
176
201
pi_event event = reinterpret_cast <pi_event>(userData);
177
- event->set_event_complete ();
178
- cuda_piEventRelease (event);
202
+ pi_platform platform =
203
+ event->get_context ()->get_device ()->platform_ ;
204
+ platform->worker_ .enqueue_complete_event (event);
179
205
},
180
206
this ));
181
207
} catch (...) {
Original file line number Diff line number Diff line change 17
17
#include < array>
18
18
#include < atomic>
19
19
#include < cassert>
20
+ #include < condition_variable>
20
21
#include < cstring>
21
22
#include < cuda.h>
23
+ #include < functional>
22
24
#include < limits>
25
+ #include < mutex>
23
26
#include < numeric>
27
+ #include < queue>
24
28
#include < stdint.h>
25
29
#include < string>
30
+ #include < thread>
26
31
#include < vector>
27
- #include < functional>
28
- #include < mutex>
29
32
30
33
extern " C" {
31
34
@@ -45,8 +48,48 @@ pi_result cuda_piKernelRelease(pi_kernel);
45
48
46
49
}
47
50
51
+ class worker {
52
+ public:
53
+ worker ()
54
+ : workQueue_{}, workQueueGateMutex_{}, workQueueGate_{},
55
+ workerThread_{&worker::execute, this } {}
56
+
57
+ ~worker () {
58
+ enqueue_work (work{work::kind::terminate, nullptr });
59
+ workerThread_.join ();
60
+ }
61
+
62
+ void enqueue_complete_event (pi_event event) {
63
+ enqueue_work (work{work::kind::complete_event, event});
64
+ }
65
+
66
+ private:
67
+ struct work {
68
+ enum kind { complete_event, terminate } kind_;
69
+ void *content_;
70
+
71
+ work (kind k, void *c) : kind_{k}, content_{c} {}
72
+ };
73
+
74
+ void enqueue_work (work w) {
75
+ std::unique_lock<std::mutex> lock (workQueueGateMutex_);
76
+ workQueue_.push (w);
77
+ workQueueGate_.notify_one ();
78
+ }
79
+
80
+ void complete_event (pi_event event);
81
+
82
+ void execute ();
83
+
84
+ std::queue<work> workQueue_;
85
+ std::mutex workQueueGateMutex_;
86
+ std::condition_variable workQueueGate_;
87
+ std::thread workerThread_;
88
+ };
89
+
48
90
struct _pi_platform {
49
91
std::vector<std::unique_ptr<_pi_device>> devices_;
92
+ worker worker_;
50
93
};
51
94
52
95
struct _pi_device {
Original file line number Diff line number Diff line change @@ -27,12 +27,16 @@ function(add_sycl_rt_library LIB_NAME)
27
27
set_target_properties (${LIB_NAME} PROPERTIES LINK_DEPENDS ${linker_script} )
28
28
endif ()
29
29
30
+ find_package (Threads REQUIRED )
31
+ set_property (TARGET ${LIB_NAME} PROPERTY LINK_WHAT_YOU_USE TRUE )
32
+
30
33
target_include_directories (
31
34
${LIB_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} "${sycl_inc_dir} " )
32
35
target_link_libraries (${LIB_NAME}
33
36
PRIVATE OpenCL::Headers
34
37
PRIVATE ${OpenCL_LIBRARIES}
35
38
PRIVATE ${CMAKE_DL_LIBS}
39
+ PRIVATE Threads::Threads
36
40
)
37
41
38
42
add_common_options (${LIB_NAME} )
You can’t perform that action at this time.
0 commit comments