Skip to content

Commit b205652

Browse files
authored
Merge branch 'adapters' into l0_usm_error_checking_2
2 parents f056f97 + be53fb3 commit b205652

File tree

12 files changed

+1282
-707
lines changed

12 files changed

+1282
-707
lines changed

source/adapters/cuda/command_buffer.cpp

Lines changed: 438 additions & 138 deletions
Large diffs are not rendered by default.

source/adapters/cuda/command_buffer.hpp

Lines changed: 217 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,222 @@
99
//===----------------------------------------------------------------------===//
1010

1111
#include <ur/ur.hpp>
12+
#include <ur_api.h>
1213

13-
/// Stub implementation of command-buffers for CUDA
14+
#include "context.hpp"
15+
#include <cuda.h>
16+
#include <memory>
1417

15-
struct ur_exp_command_buffer_handle_t_ {};
18+
static inline const char *getUrResultString(ur_result_t Result) {
19+
switch (Result) {
20+
case UR_RESULT_SUCCESS:
21+
return "UR_RESULT_SUCCESS";
22+
case UR_RESULT_ERROR_INVALID_OPERATION:
23+
return "UR_RESULT_ERROR_INVALID_OPERATION";
24+
case UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES:
25+
return "UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES";
26+
case UR_RESULT_ERROR_INVALID_QUEUE:
27+
return "UR_RESULT_ERROR_INVALID_QUEUE";
28+
case UR_RESULT_ERROR_INVALID_VALUE:
29+
return "UR_RESULT_ERROR_INVALID_VALUE";
30+
case UR_RESULT_ERROR_INVALID_CONTEXT:
31+
return "UR_RESULT_ERROR_INVALID_CONTEXT";
32+
case UR_RESULT_ERROR_INVALID_PLATFORM:
33+
return "UR_RESULT_ERROR_INVALID_PLATFORM";
34+
case UR_RESULT_ERROR_INVALID_BINARY:
35+
return "UR_RESULT_ERROR_INVALID_BINARY";
36+
case UR_RESULT_ERROR_INVALID_PROGRAM:
37+
return "UR_RESULT_ERROR_INVALID_PROGRAM";
38+
case UR_RESULT_ERROR_INVALID_SAMPLER:
39+
return "UR_RESULT_ERROR_INVALID_SAMPLER";
40+
case UR_RESULT_ERROR_INVALID_BUFFER_SIZE:
41+
return "UR_RESULT_ERROR_INVALID_BUFFER_SIZE";
42+
case UR_RESULT_ERROR_INVALID_MEM_OBJECT:
43+
return "UR_RESULT_ERROR_INVALID_MEM_OBJECT";
44+
case UR_RESULT_ERROR_INVALID_EVENT:
45+
return "UR_RESULT_ERROR_INVALID_EVENT";
46+
case UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST:
47+
return "UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST";
48+
case UR_RESULT_ERROR_MISALIGNED_SUB_BUFFER_OFFSET:
49+
return "UR_RESULT_ERROR_MISALIGNED_SUB_BUFFER_OFFSET";
50+
case UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE:
51+
return "UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE";
52+
case UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE:
53+
return "UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE";
54+
case UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE:
55+
return "UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE";
56+
case UR_RESULT_ERROR_DEVICE_NOT_FOUND:
57+
return "UR_RESULT_ERROR_DEVICE_NOT_FOUND";
58+
case UR_RESULT_ERROR_INVALID_DEVICE:
59+
return "UR_RESULT_ERROR_INVALID_DEVICE";
60+
case UR_RESULT_ERROR_DEVICE_LOST:
61+
return "UR_RESULT_ERROR_DEVICE_LOST";
62+
case UR_RESULT_ERROR_DEVICE_REQUIRES_RESET:
63+
return "UR_RESULT_ERROR_DEVICE_REQUIRES_RESET";
64+
case UR_RESULT_ERROR_DEVICE_IN_LOW_POWER_STATE:
65+
return "UR_RESULT_ERROR_DEVICE_IN_LOW_POWER_STATE";
66+
case UR_RESULT_ERROR_DEVICE_PARTITION_FAILED:
67+
return "UR_RESULT_ERROR_DEVICE_PARTITION_FAILED";
68+
case UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT:
69+
return "UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT";
70+
case UR_RESULT_ERROR_INVALID_WORK_ITEM_SIZE:
71+
return "UR_RESULT_ERROR_INVALID_WORK_ITEM_SIZE";
72+
case UR_RESULT_ERROR_INVALID_WORK_DIMENSION:
73+
return "UR_RESULT_ERROR_INVALID_WORK_DIMENSION";
74+
case UR_RESULT_ERROR_INVALID_KERNEL_ARGS:
75+
return "UR_RESULT_ERROR_INVALID_KERNEL_ARGS";
76+
case UR_RESULT_ERROR_INVALID_KERNEL:
77+
return "UR_RESULT_ERROR_INVALID_KERNEL";
78+
case UR_RESULT_ERROR_INVALID_KERNEL_NAME:
79+
return "UR_RESULT_ERROR_INVALID_KERNEL_NAME";
80+
case UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX:
81+
return "UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX";
82+
case UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE:
83+
return "UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE";
84+
case UR_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE:
85+
return "UR_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE";
86+
case UR_RESULT_ERROR_INVALID_IMAGE_SIZE:
87+
return "UR_RESULT_ERROR_INVALID_IMAGE_SIZE";
88+
case UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR:
89+
return "UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR";
90+
case UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED:
91+
return "UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED";
92+
case UR_RESULT_ERROR_MEM_OBJECT_ALLOCATION_FAILURE:
93+
return "UR_RESULT_ERROR_MEM_OBJECT_ALLOCATION_FAILURE";
94+
case UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE:
95+
return "UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE";
96+
case UR_RESULT_ERROR_UNINITIALIZED:
97+
return "UR_RESULT_ERROR_UNINITIALIZED";
98+
case UR_RESULT_ERROR_OUT_OF_HOST_MEMORY:
99+
return "UR_RESULT_ERROR_OUT_OF_HOST_MEMORY";
100+
case UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY:
101+
return "UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY";
102+
case UR_RESULT_ERROR_OUT_OF_RESOURCES:
103+
return "UR_RESULT_ERROR_OUT_OF_RESOURCES";
104+
case UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE:
105+
return "UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE";
106+
case UR_RESULT_ERROR_PROGRAM_LINK_FAILURE:
107+
return "UR_RESULT_ERROR_PROGRAM_LINK_FAILURE";
108+
case UR_RESULT_ERROR_UNSUPPORTED_VERSION:
109+
return "UR_RESULT_ERROR_UNSUPPORTED_VERSION";
110+
case UR_RESULT_ERROR_UNSUPPORTED_FEATURE:
111+
return "UR_RESULT_ERROR_UNSUPPORTED_FEATURE";
112+
case UR_RESULT_ERROR_INVALID_ARGUMENT:
113+
return "UR_RESULT_ERROR_INVALID_ARGUMENT";
114+
case UR_RESULT_ERROR_INVALID_NULL_HANDLE:
115+
return "UR_RESULT_ERROR_INVALID_NULL_HANDLE";
116+
case UR_RESULT_ERROR_HANDLE_OBJECT_IN_USE:
117+
return "UR_RESULT_ERROR_HANDLE_OBJECT_IN_USE";
118+
case UR_RESULT_ERROR_INVALID_NULL_POINTER:
119+
return "UR_RESULT_ERROR_INVALID_NULL_POINTER";
120+
case UR_RESULT_ERROR_INVALID_SIZE:
121+
return "UR_RESULT_ERROR_INVALID_SIZE";
122+
case UR_RESULT_ERROR_UNSUPPORTED_SIZE:
123+
return "UR_RESULT_ERROR_UNSUPPORTED_SIZE";
124+
case UR_RESULT_ERROR_UNSUPPORTED_ALIGNMENT:
125+
return "UR_RESULT_ERROR_UNSUPPORTED_ALIGNMENT";
126+
case UR_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT:
127+
return "UR_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT";
128+
case UR_RESULT_ERROR_INVALID_ENUMERATION:
129+
return "UR_RESULT_ERROR_INVALID_ENUMERATION";
130+
case UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION:
131+
return "UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION";
132+
case UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT:
133+
return "UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT";
134+
case UR_RESULT_ERROR_INVALID_NATIVE_BINARY:
135+
return "UR_RESULT_ERROR_INVALID_NATIVE_BINARY";
136+
case UR_RESULT_ERROR_INVALID_GLOBAL_NAME:
137+
return "UR_RESULT_ERROR_INVALID_GLOBAL_NAME";
138+
case UR_RESULT_ERROR_INVALID_FUNCTION_NAME:
139+
return "UR_RESULT_ERROR_INVALID_FUNCTION_NAME";
140+
case UR_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION:
141+
return "UR_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION";
142+
case UR_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION:
143+
return "UR_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION";
144+
case UR_RESULT_ERROR_PROGRAM_UNLINKED:
145+
return "UR_RESULT_ERROR_PROGRAM_UNLINKED";
146+
case UR_RESULT_ERROR_OVERLAPPING_REGIONS:
147+
return "UR_RESULT_ERROR_OVERLAPPING_REGIONS";
148+
case UR_RESULT_ERROR_INVALID_HOST_PTR:
149+
return "UR_RESULT_ERROR_INVALID_HOST_PTR";
150+
case UR_RESULT_ERROR_INVALID_USM_SIZE:
151+
return "UR_RESULT_ERROR_INVALID_USM_SIZE";
152+
case UR_RESULT_ERROR_OBJECT_ALLOCATION_FAILURE:
153+
return "UR_RESULT_ERROR_OBJECT_ALLOCATION_FAILURE";
154+
case UR_RESULT_ERROR_ADAPTER_SPECIFIC:
155+
return "UR_RESULT_ERROR_ADAPTER_SPECIFIC";
156+
default:
157+
return "UR_RESULT_ERROR_UNKNOWN";
158+
}
159+
}
160+
161+
// Trace an internal UR call
162+
#define UR_TRACE(Call) \
163+
{ \
164+
ur_result_t Result; \
165+
UR_CALL(Call, Result); \
166+
}
167+
168+
// Trace an internal UR call and return the result to the user.
169+
#define UR_CALL(Call, Result) \
170+
{ \
171+
if (PrintTrace) \
172+
fprintf(stderr, "UR ---> %s\n", #Call); \
173+
Result = (Call); \
174+
if (PrintTrace) \
175+
fprintf(stderr, "UR <--- %s(%s)\n", #Call, getUrResultString(Result)); \
176+
}
177+
178+
struct ur_exp_command_buffer_handle_t_ {
179+
180+
ur_exp_command_buffer_handle_t_(ur_context_handle_t hContext,
181+
ur_device_handle_t hDevice);
182+
183+
~ur_exp_command_buffer_handle_t_();
184+
185+
void RegisterSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint,
186+
std::shared_ptr<CUgraphNode> CuNode) {
187+
SyncPoints[SyncPoint] = CuNode;
188+
NextSyncPoint++;
189+
}
190+
191+
ur_exp_command_buffer_sync_point_t GetNextSyncPoint() const {
192+
return NextSyncPoint;
193+
}
194+
195+
// Helper to register next sync point
196+
// @param CuNode Node to register as next sycn point
197+
// @return Pointer to the sync that registers the Node
198+
ur_exp_command_buffer_sync_point_t
199+
AddSyncPoint(std::shared_ptr<CUgraphNode> CuNode) {
200+
ur_exp_command_buffer_sync_point_t SyncPoint = NextSyncPoint;
201+
RegisterSyncPoint(SyncPoint, CuNode);
202+
return SyncPoint;
203+
}
204+
205+
// UR context associated with this command-buffer
206+
ur_context_handle_t Context;
207+
// Device associated with this command buffer
208+
ur_device_handle_t Device;
209+
// Cuda Graph handle
210+
CUgraph CudaGraph;
211+
// Cuda Graph Exec handle
212+
CUgraphExec CudaGraphExec;
213+
// Atomic variable counting the number of reference to this command_buffer
214+
// using std::atomic prevents data race when incrementing/decrementing.
215+
std::atomic_uint32_t RefCount;
216+
217+
// Map of sync_points to ur_events
218+
std::unordered_map<ur_exp_command_buffer_sync_point_t,
219+
std::shared_ptr<CUgraphNode>>
220+
SyncPoints;
221+
// Next sync_point value (may need to consider ways to reuse values if 32-bits
222+
// is not enough)
223+
ur_exp_command_buffer_sync_point_t NextSyncPoint;
224+
225+
// Used when retaining an object.
226+
uint32_t incrementReferenceCount() noexcept { return ++RefCount; }
227+
// Used when releasing an object.
228+
uint32_t decrementReferenceCount() noexcept { return --RefCount; }
229+
uint32_t getReferenceCount() const noexcept { return RefCount; }
230+
};

source/adapters/cuda/device.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
613613

614614
std::string SupportedExtensions = "cl_khr_fp64 cl_khr_subgroups ";
615615
SupportedExtensions += "pi_ext_intel_devicelib_assert ";
616+
// Return supported for the UR command-buffer experimental feature
617+
SupportedExtensions += "ur_exp_command_buffer ";
616618
SupportedExtensions += " ";
617619

618620
int Major = 0;

0 commit comments

Comments
 (0)