Skip to content

Commit b99499f

Browse files
npmillerkbenzie
authored andcommitted
Create stream vectors in queue constructor (#17823)
This patch simplifies the stream queue constructor by using in-class initialization when appropriate. And uses the constructors to initialize the stream vectors.
1 parent 8ad50b9 commit b99499f

File tree

3 files changed

+35
-59
lines changed

3 files changed

+35
-59
lines changed

source/adapters/cuda/queue.cpp

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -86,14 +86,8 @@ urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice,
8686
}
8787
}
8888

89-
std::vector<CUstream> ComputeCuStreams(
90-
IsOutOfOrder ? ur_queue_handle_t_::DefaultNumComputeStreams : 1);
91-
std::vector<CUstream> TransferCuStreams(
92-
IsOutOfOrder ? ur_queue_handle_t_::DefaultNumTransferStreams : 0);
93-
9489
Queue = std::unique_ptr<ur_queue_handle_t_>(new ur_queue_handle_t_{
95-
std::move(ComputeCuStreams), std::move(TransferCuStreams), hContext,
96-
hDevice, Flags, URFlags, Priority});
90+
IsOutOfOrder, hContext, hDevice, Flags, URFlags, Priority});
9791

9892
*phQueue = Queue.release();
9993

@@ -205,23 +199,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle(
205199
else
206200
die("Unknown cuda stream");
207201

208-
std::vector<CUstream> ComputeCuStreams(1, CuStream);
209-
std::vector<CUstream> TransferCuStreams(0);
210-
211202
auto isNativeHandleOwned =
212203
pProperties ? pProperties->isNativeHandleOwned : false;
213204

214-
// Create queue and set num_compute_streams to 1, as computeCuStreams has
215-
// valid stream
216-
*phQueue = new ur_queue_handle_t_{std::move(ComputeCuStreams),
217-
std::move(TransferCuStreams),
218-
hContext,
219-
hDevice,
220-
CuFlags,
221-
Flags,
222-
/*priority*/ 0,
223-
/*backend_owns*/ isNativeHandleOwned};
224-
(*phQueue)->NumComputeStreams = 1;
205+
// Create queue from a native stream
206+
*phQueue = new ur_queue_handle_t_{CuStream, hContext, hDevice,
207+
CuFlags, Flags, isNativeHandleOwned};
225208

226209
return UR_RESULT_SUCCESS;
227210
}

source/adapters/hip/queue.cpp

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -75,14 +75,8 @@ urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice,
7575
pProps ? pProps->flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE
7676
: false;
7777

78-
std::vector<hipStream_t> ComputeHipStreams(
79-
IsOutOfOrder ? ur_queue_handle_t_::DefaultNumComputeStreams : 1);
80-
std::vector<hipStream_t> TransferHipStreams(
81-
IsOutOfOrder ? ur_queue_handle_t_::DefaultNumTransferStreams : 0);
82-
8378
QueueImpl = std::unique_ptr<ur_queue_handle_t_>(new ur_queue_handle_t_{
84-
std::move(ComputeHipStreams), std::move(TransferHipStreams), hContext,
85-
hDevice, Flags, URFlags, Priority});
79+
IsOutOfOrder, hContext, hDevice, Flags, URFlags, Priority});
8680

8781
*phQueue = QueueImpl.release();
8882

@@ -239,23 +233,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle(
239233
else
240234
die("Unknown hip stream");
241235

242-
std::vector<hipStream_t> ComputeHIPStreams(1, HIPStream);
243-
std::vector<hipStream_t> TransferHIPStreams(0);
244-
245236
auto isNativeHandleOwned =
246237
pProperties ? pProperties->isNativeHandleOwned : false;
247238

248239
// Create queue and set num_compute_streams to 1, as computeHIPStreams has
249240
// valid stream
250-
*phQueue = new ur_queue_handle_t_{std::move(ComputeHIPStreams),
251-
std::move(TransferHIPStreams),
252-
hContext,
253-
hDevice,
254-
HIPFlags,
255-
Flags,
256-
/*priority*/ 0,
257-
/*backend_owns*/ isNativeHandleOwned};
258-
(*phQueue)->NumComputeStreams = 1;
241+
*phQueue = new ur_queue_handle_t_{HIPStream, hContext, hDevice,
242+
HIPFlags, Flags, isNativeHandleOwned};
259243

260244
return UR_RESULT_SUCCESS;
261245
}

source/common/cuda-hip/stream_queue.hpp

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,14 @@ template <typename ST, int CS, int TS> struct stream_queue_t {
4242
std::vector<bool> TransferAppliedBarrier;
4343
ur_context_handle_t_ *Context;
4444
ur_device_handle_t_ *Device;
45-
std::atomic_uint32_t RefCount;
46-
std::atomic_uint32_t EventCount;
47-
std::atomic_uint32_t ComputeStreamIndex;
48-
std::atomic_uint32_t TransferStreamIndex;
49-
unsigned int NumComputeStreams;
50-
unsigned int NumTransferStreams;
51-
unsigned int LastSyncComputeStreams;
52-
unsigned int LastSyncTransferStreams;
45+
std::atomic_uint32_t RefCount{1};
46+
std::atomic_uint32_t EventCount{0};
47+
std::atomic_uint32_t ComputeStreamIndex{0};
48+
std::atomic_uint32_t TransferStreamIndex{0};
49+
unsigned int NumComputeStreams{0};
50+
unsigned int NumTransferStreams{0};
51+
unsigned int LastSyncComputeStreams{0};
52+
unsigned int LastSyncTransferStreams{0};
5353
unsigned int Flags;
5454
ur_queue_flags_t URFlags;
5555
int Priority;
@@ -62,20 +62,29 @@ template <typename ST, int CS, int TS> struct stream_queue_t {
6262
std::mutex BarrierMutex;
6363
bool HasOwnership;
6464

65-
stream_queue_t(std::vector<native_type> &&ComputeStreams,
66-
std::vector<native_type> &&TransferStreams,
67-
ur_context_handle_t_ *Context, ur_device_handle_t_ *Device,
68-
unsigned int Flags, ur_queue_flags_t URFlags, int Priority,
69-
bool BackendOwns = true)
70-
: ComputeStreams{std::move(ComputeStreams)},
71-
TransferStreams{std::move(TransferStreams)},
65+
stream_queue_t(bool IsOutOfOrder, ur_context_handle_t_ *Context,
66+
ur_device_handle_t_ *Device, unsigned int Flags,
67+
ur_queue_flags_t URFlags, int Priority)
68+
: ComputeStreams(IsOutOfOrder ? DefaultNumComputeStreams : 1),
69+
TransferStreams(IsOutOfOrder ? DefaultNumTransferStreams : 0),
7270
DelayCompute(this->ComputeStreams.size(), false),
7371
ComputeAppliedBarrier(this->ComputeStreams.size()),
7472
TransferAppliedBarrier(this->TransferStreams.size()), Context{Context},
75-
Device{Device}, RefCount{1}, EventCount{0}, ComputeStreamIndex{0},
76-
TransferStreamIndex{0}, NumComputeStreams{0}, NumTransferStreams{0},
77-
LastSyncComputeStreams{0}, LastSyncTransferStreams{0}, Flags(Flags),
78-
URFlags(URFlags), Priority(Priority), HasOwnership{BackendOwns} {
73+
Device{Device}, Flags(Flags), URFlags(URFlags), Priority(Priority),
74+
HasOwnership{true} {
75+
urContextRetain(Context);
76+
}
77+
78+
// Create a queue from a native handle
79+
stream_queue_t(native_type stream, ur_context_handle_t_ *Context,
80+
ur_device_handle_t_ *Device, unsigned int Flags,
81+
ur_queue_flags_t URFlags, bool BackendOwns)
82+
: ComputeStreams(1, stream), TransferStreams(0),
83+
DelayCompute(this->ComputeStreams.size(), false),
84+
ComputeAppliedBarrier(this->ComputeStreams.size()),
85+
TransferAppliedBarrier(this->TransferStreams.size()), Context{Context},
86+
Device{Device}, NumComputeStreams{1}, Flags(Flags), URFlags(URFlags),
87+
Priority(0), HasOwnership{BackendOwns} {
7988
urContextRetain(Context);
8089
}
8190

0 commit comments

Comments
 (0)