Skip to content

Commit bb590f1

Browse files
committed
Workrounnd to set node->backend
1 parent 9106232 commit bb590f1

File tree

3 files changed

+15
-3
lines changed

3 files changed

+15
-3
lines changed

ggml-opencl.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1599,8 +1599,8 @@ bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tens
15991599
// TODO: find the optimal values for these
16001600
if ((src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) &&
16011601
src1->type == GGML_TYPE_F32 &&
1602-
dst->type == GGML_TYPE_F32 &&
1603-
((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend == GGML_BACKEND_GPU)) {
1602+
dst->type == GGML_TYPE_F32 /*&&
1603+
((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend == GGML_BACKEND_GPU)*/) {
16041604
return true;
16051605
}
16061606

ggml.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15938,6 +15938,18 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
1593815938

1593915939
struct ggml_task_stage *stages = node->task_profile.stages;
1594015940

15941+
// Workrounnd to set node->backend.
15942+
for (int j = 0; j < 3; j++) {
15943+
if (node->backend == GGML_BACKEND_CPU &&
15944+
(stages[j].backend & GGML_TASK_BACKEND_GPU)) {
15945+
if (ggml_cpu_has_cublas() || ggml_cpu_has_clblast()) {
15946+
node->backend = GGML_BACKEND_GPU;
15947+
} else {
15948+
GGML_ASSERT(false);
15949+
}
15950+
}
15951+
}
15952+
1594115953
// compute stage n_tasks.
1594215954
int n_tasks = stages[1].parallel ? n_threads : 1;
1594315955

@@ -16008,6 +16020,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
1600816020

1600916021
if (comp_backend == GGML_TASK_BACKEND_GPU_CL) {
1601016022
#if defined(GGML_USE_CLBLAST)
16023+
GGML_ASSERT(ggml_cl_can_mul_mat(node->src0, node->src1, node));
1601116024
cur = ggml_cl_mul_mat_get_wsize(node->src0, node->src1, node);
1601216025
#else
1601316026
GGML_ASSERT(false);

tests/test-ggml-tune.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ ggml_task_profiles_mock_qxx_provider(struct ggml_tensor *node,
8585
struct ggml_task_profile *profiles) {
8686
UNUSED(node);
8787
profiles[0].stages[0].backend = GGML_TASK_BACKEND_CPU;
88-
profiles[0].stages[0].backend = GGML_TASK_BACKEND_CPU;
8988
profiles[0].stages[1].backend = GGML_TASK_BACKEND_CPU;
9089
profiles[1].stages[0].backend = GGML_TASK_BACKEND_CPU;
9190
profiles[1].stages[1].backend = GGML_TASK_BACKEND_CPU_BLAS;

0 commit comments

Comments
 (0)