Skip to content

Commit c00ff52

Browse files
rgerganovslaren
authored andcommitted
rpc : fix segfault with nkvo (llama/9389)
* rpc : fix nkvo * rpc : buf_size must not be static ref: #9337 --------- Co-authored-by: slaren <[email protected]>
1 parent dbd164c commit c00ff52

File tree

3 files changed

+16
-10
lines changed

3 files changed

+16
-10
lines changed

ggml/src/ggml-cuda.cu

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2579,7 +2579,11 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t
25792579
for (int i = 0; i < cgraph->n_nodes; i++) {
25802580
ggml_tensor * node = cgraph->nodes[i];
25812581

2582-
if (node->src[0] && ggml_backend_buffer_is_cuda_split(node->src[0]->buffer)) {
2582+
if (ggml_is_empty(node) || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) {
2583+
continue;
2584+
}
2585+
2586+
if (node->src[0] && node->src[0]->buffer && ggml_backend_buffer_is_cuda_split(node->src[0]->buffer)) {
25832587
use_cuda_graph = false; // Split buffers are not supported by CUDA graph capture
25842588
#ifndef NDEBUG
25852589
GGML_CUDA_LOG_WARN("%s: disabling CUDA graphs due to split buffer\n", __func__);

ggml/src/ggml-rpc.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -884,15 +884,17 @@ ggml_tensor * rpc_server::deserialize_tensor(struct ggml_context * ctx, const rp
884884
}
885885
result->buffer = reinterpret_cast<ggml_backend_buffer_t>(tensor->buffer);
886886
if (result->buffer && buffers.find(result->buffer) == buffers.end()) {
887-
return nullptr;
887+
result->buffer = nullptr;
888888
}
889889

890-
// require that the tensor data does not go beyond the buffer end
891-
uint64_t tensor_size = (uint64_t) ggml_nbytes(result);
892-
uint64_t buffer_start = (uint64_t) ggml_backend_buffer_get_base(result->buffer);
893-
uint64_t buffer_size = (uint64_t) ggml_backend_buffer_get_size(result->buffer);
894-
GGML_ASSERT(tensor->data + tensor_size >= tensor->data); // check for overflow
895-
GGML_ASSERT(tensor->data >= buffer_start && tensor->data + tensor_size <= buffer_start + buffer_size);
890+
if (result->buffer) {
891+
// require that the tensor data does not go beyond the buffer end
892+
uint64_t tensor_size = (uint64_t) ggml_nbytes(result);
893+
uint64_t buffer_start = (uint64_t) ggml_backend_buffer_get_base(result->buffer);
894+
uint64_t buffer_size = (uint64_t) ggml_backend_buffer_get_size(result->buffer);
895+
GGML_ASSERT(tensor->data + tensor_size >= tensor->data); // check for overflow
896+
GGML_ASSERT(tensor->data >= buffer_start && tensor->data + tensor_size <= buffer_start + buffer_size);
897+
}
896898

897899
result->op = (ggml_op) tensor->op;
898900
for (uint32_t i = 0; i < GGML_MAX_OP_PARAMS / sizeof(int32_t); i++) {
@@ -1061,7 +1063,7 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, std::vector<u
10611063
const rpc_tensor * tensors = (const rpc_tensor *)(input.data() + sizeof(n_nodes) + n_nodes*sizeof(uint64_t) + sizeof(n_tensors));
10621064
GGML_PRINT_DEBUG("[%s] n_nodes: %u, n_tensors: %u\n", __func__, n_nodes, n_tensors);
10631065

1064-
static size_t buf_size = ggml_tensor_overhead()*(n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false);
1066+
size_t buf_size = ggml_tensor_overhead()*(n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false);
10651067
struct ggml_init_params params = {
10661068
/*.mem_size =*/ buf_size,
10671069
/*.mem_buffer =*/ NULL,

ggml/src/ggml.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3850,7 +3850,7 @@ static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml
38503850

38513851
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
38523852
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
3853-
__func__, cur_end + size_needed, ctx->mem_size);
3853+
__func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
38543854
assert(false);
38553855
return NULL;
38563856
}

0 commit comments

Comments
 (0)