Skip to content

Commit ce2afc5

Browse files
committed
kompute : disable LLAMA_SPLIT_LAYER after ggml-org#5321
1 parent 6be1fda commit ce2afc5

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

src/llama.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6951,6 +6951,7 @@ static bool llm_load_tensors(
69516951
model.buft_layer[i] = llama_default_buffer_type_cpu(true);
69526952
}
69536953

6954+
#ifndef GGML_USE_KOMPUTE
69546955
if (split_mode == LLAMA_SPLIT_MODE_LAYER) {
69556956
// calculate the split points
69566957
int device_count = llama_get_device_count(model);
@@ -6988,7 +6989,9 @@ static bool llm_load_tensors(
69886989
} else {
69896990
model.buft_output = llama_default_buffer_type_cpu(true);
69906991
}
6991-
} else {
6992+
} else
6993+
#endif
6994+
{
69926995
ggml_backend_buffer_type_t split_buft;
69936996
if (split_mode == LLAMA_SPLIT_MODE_ROW) {
69946997
split_buft = llama_default_buffer_type_split(model, main_gpu, tensor_split);

0 commit comments

Comments
 (0)