Skip to content

Commit 83c96d5

Browse files
committed
sync : ggml-cuda
ggml-ci
1 parent 4fe646f commit 83c96d5

File tree

2 files changed

+14
-5
lines changed

2 files changed

+14
-5
lines changed

ggml-cuda.cu

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181

8282
#include "ggml-cuda.h"
8383
#include "ggml.h"
84+
#include "ggml-backend-impl.h"
8485

8586
#define MIN_CC_DP4A 610 // minimum compute capability for __dp4a, an intrinsic for byte-wise dot products
8687
#define CC_VOLTA 700
@@ -7649,11 +7650,11 @@ static size_t g_temp_tensor_extra_index = 0;
76497650

76507651
static ggml_tensor_extra_gpu * ggml_cuda_alloc_temp_tensor_extra() {
76517652
if (g_temp_tensor_extras == nullptr) {
7652-
g_temp_tensor_extras = new ggml_tensor_extra_gpu[GGML_MAX_NODES];
7653+
g_temp_tensor_extras = new ggml_tensor_extra_gpu[GGML_DEFAULT_GRAPH_SIZE];
76537654
}
76547655

76557656
size_t alloc_index = g_temp_tensor_extra_index;
7656-
g_temp_tensor_extra_index = (g_temp_tensor_extra_index + 1) % GGML_MAX_NODES;
7657+
g_temp_tensor_extra_index = (g_temp_tensor_extra_index + 1) % GGML_DEFAULT_GRAPH_SIZE;
76577658
ggml_tensor_extra_gpu * extra = &g_temp_tensor_extras[alloc_index];
76587659
memset(extra, 0, sizeof(*extra));
76597660

@@ -7960,11 +7961,11 @@ struct ggml_backend_buffer_context_cuda {
79607961

79617962
ggml_tensor_extra_gpu * ggml_cuda_alloc_temp_tensor_extra() {
79627963
if (temp_tensor_extras == nullptr) {
7963-
temp_tensor_extras = new ggml_tensor_extra_gpu[GGML_MAX_NODES];
7964+
temp_tensor_extras = new ggml_tensor_extra_gpu[GGML_DEFAULT_GRAPH_SIZE];
79647965
}
79657966

79667967
size_t alloc_index = temp_tensor_extra_index;
7967-
temp_tensor_extra_index = (temp_tensor_extra_index + 1) % GGML_MAX_NODES;
7968+
temp_tensor_extra_index = (temp_tensor_extra_index + 1) % GGML_DEFAULT_GRAPH_SIZE;
79687969
ggml_tensor_extra_gpu * extra = &temp_tensor_extras[alloc_index];
79697970
memset(extra, 0, sizeof(*extra));
79707971

@@ -8050,7 +8051,12 @@ static ggml_backend_buffer_t ggml_backend_cuda_alloc_buffer(ggml_backend_t backe
80508051
ggml_cuda_set_device(g_main_device);
80518052

80528053
ggml_backend_buffer_context_cuda * ctx = new ggml_backend_buffer_context_cuda;
8054+
8055+
size = std::max(size, (size_t)1); // cudaMalloc returns null for size 0
8056+
8057+
ggml_cuda_set_device(g_main_device);
80538058
CUDA_CHECK(cudaMalloc(&ctx->device, size));
8059+
80548060
return ggml_backend_buffer_init(backend, cuda_backend_buffer_interface, ctx, size);
80558061
}
80568062

@@ -8117,6 +8123,8 @@ static void ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph
81178123
for (int i = 0; i < cgraph->n_nodes; i++) {
81188124
ggml_tensor * node = cgraph->nodes[i];
81198125

8126+
if (node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE)
8127+
continue;
81208128
assert(node->backend == GGML_BACKEND_GPU);
81218129
for (int j = 0; j < GGML_MAX_SRC; j++) {
81228130
if (node->src[j] != nullptr) {

ggml.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ typedef void * thread_ret_t;
100100
#include <hbwmalloc.h>
101101
#endif
102102

103-
#if defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
103+
#if (defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)) && \
104+
(!defined(TARGET_OS_TV))
104105

105106
#include <sys/wait.h>
106107

0 commit comments

Comments
 (0)