Skip to content

Commit 289597c

Browse files
committed
SYCL: Avoid using with SYCL-Graph for unsupported nodes
Currently on a CUDA backend to SYCL when running `GGML_SYCL_DISABLE_GRAPH=0 ./bin/test-backend-ops -b SYCL0` there are two operations that throw an exception from the blocking waits during queue recording. * `-o CONCAT` : Use of blocking waits on a queue that's being recorded https://github.com/ggml-org/llama.cpp/blob/master/ggml/src/ggml-sycl/concat.cpp#L185-L187 * `-o MUL_MAT_ID`: Blocking wait on a recording queue for a copy to host memory https://github.com/ggml-org/llama.cpp/blob/master/ggml/src/ggml-sycl/ggml-sycl.cpp#L3072-L3074 We've noticed that `ggml-cuda.cu` has the [check_node_graph_compatibility_and_refresh_copy_ops](https://github.com/ggml-org/llama.cpp/blob/39e73ae0d69f882d7e29cecc6dd8f5052fca6731/ggml/src/ggml-cuda/ggml-cuda.cu#L2458-L2458) method for checking if a graph can be used, even if enabled. I've taken a similar approach in this PR by adding a method to `ggml-sycl.cpp` for checking if a graph can be used for the operations even if a user has asked for it to be enabled.
1 parent 0a338ed commit 289597c

File tree

1 file changed

+28
-1
lines changed

1 file changed

+28
-1
lines changed

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3810,11 +3810,38 @@ static void ggml_backend_sycl_graph_compute_impl(ggml_backend_sycl_context * syc
38103810
}
38113811
}
38123812

3813+
#ifdef GGML_SYCL_GRAPH
3814+
static bool check_node_graph_compatibility(ggml_cgraph * cgraph) {
3815+
for (int i = 0; i < cgraph->n_nodes; i++) {
3816+
ggml_tensor * node = cgraph->nodes[i];
3817+
switch (node->op) {
3818+
default:
3819+
break;
3820+
case GGML_OP_CONCAT:
3821+
// ggml_sycl_op_concat() does a blocking host wait after memcpy operations,
3822+
// but wait() can't be called on the events returned by a queue recording
3823+
// to a graph.
3824+
[[fallthrough]];
3825+
case GGML_OP_MUL_MAT_ID:
3826+
// ggml_sycl_mul_mat_id() does a blocking host wait on the sycl queue after
3827+
// submitting a memcpy operation, but wait() can't be called on a queue that
3828+
// is recording to a graph.
3829+
# ifndef NDEBUG
3830+
GGML_LOG_DEBUG("%s: disabling SYCL graphs due to unsupported node type\n", __func__);
3831+
# endif
3832+
return false;
3833+
}
3834+
}
3835+
return true;
3836+
}
3837+
#endif
3838+
38133839
static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
38143840
auto * sycl_ctx = static_cast<ggml_backend_sycl_context *>(backend->context);
38153841

38163842
#ifdef GGML_SYCL_GRAPH
3817-
if (!g_ggml_sycl_disable_graph) {
3843+
bool use_sycl_graph = !g_ggml_sycl_disable_graph && check_node_graph_compatibility(cgraph);
3844+
if (use_sycl_graph) {
38183845
const bool graph_support = dpct::get_device(sycl_ctx->device).has(sycl::aspect::ext_oneapi_limited_graph);
38193846
if (!graph_support) {
38203847
GGML_SYCL_DEBUG("[SYCL-GRAPH] can not use graphs on device:%d\n", sycl_ctx->device);

0 commit comments

Comments
 (0)