Skip to content

Commit 1103bdb

Browse files
author
Lorenzo Toniazzi
committed
Fixed buffer allocation
1 parent 028d3f7 commit 1103bdb

File tree

2 files changed

+25
-51
lines changed

2 files changed

+25
-51
lines changed

examples/main/main.cpp

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -139,52 +139,6 @@ void verify_tensor_allocation(struct ggml_context * ctx, ggml_backend_buffer_t b
139139

140140
int main(int argc, char ** argv) {
141141

142-
143-
// The library allows the user to define a certain function using the available tensor operations. This function
144-
// definition is represented internally via a computation graph. Each tensor operation in the function definition
145-
// corresponds to a node in the graph. Having the computation graph defined, the user can choose to compute the
146-
// function's value and/or its gradient with respect to the input variables. Optionally, the function can be optimized
147-
// using one of the available optimization algorithms.
148-
//
149-
// For example, here we define the function: f(x) = a*x^2 + b
150-
151-
// memory allocation happens here
152-
// Create context allogating memory
153-
struct ggml_init_params _params = {
154-
.mem_size = 16*1024*1024,
155-
.mem_buffer = NULL,
156-
.no_alloc = true,
157-
};
158-
struct ggml_context * _ctx = ggml_init(_params);
159-
160-
struct ggml_tensor * x = ggml_new_tensor_1d(_ctx, GGML_TYPE_F32, 1);
161-
162-
// ggml_set_param(_ctx, x); // x is an input variable
163-
164-
// struct ggml_tensor * a = ggml_new_tensor_1d(_ctx, GGML_TYPE_F32, 1);
165-
// struct ggml_tensor * b = ggml_new_tensor_1d(_ctx, GGML_TYPE_F32, 1);
166-
// struct ggml_tensor * x2 = ggml_mul(_ctx, x, x);
167-
// struct ggml_tensor * f = ggml_add(_ctx, ggml_mul(_ctx, a, x2), b);
168-
169-
// struct ggml_cgraph * gf = ggml_new_graph(_ctx);
170-
171-
// // ggml_backend_alloc_ctx_tensors_from_buft(_ctx, ggml_backend_cpu_buffer_type());
172-
// // ggml_backend_alloc_ctx_tensors_from_buft(_ctx, ggml_backend_metal_buffer_type());
173-
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(_ctx, ggml_backend_metal_buffer_type());
174-
if (buf == nullptr) {
175-
throw std::runtime_error("unable to allocate backend buffer");
176-
}
177-
else {
178-
size_t buffer_size = ggml_backend_buft_get_max_size(ggml_backend_metal_buffer_type());
179-
180-
// Verify tensor allocations
181-
verify_tensor_allocation(_ctx, buf, buffer_size);
182-
}
183-
ggml_used_mem(_ctx);
184-
//
185-
186-
187-
188142
gpt_params params;
189143
g_params = &params;
190144

llama.cpp

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,11 @@ struct lora_data {
150150
struct lora_info info;
151151
std::vector<uint8_t> data;
152152
struct ggml_context * ctx;
153+
// the backend to perform the computation (CPU, CUDA, METAL)
154+
ggml_backend_t backend = NULL;
155+
156+
// the backend buffer to storage the tensors data of a and b
157+
ggml_backend_buffer_t buffer;
153158

154159
uint32_t lora_r;
155160
uint32_t lora_alpha;
@@ -253,9 +258,17 @@ static struct lora_data * load_lora(struct lora_info * info) {
253258
struct lora_data * result = new struct lora_data;
254259
result->info = *info;
255260
result->ctx = NULL;
261+
result->backend = NULL;
262+
result->buffer = NULL;
256263
result->lora_r = 1;
257264
result->lora_alpha = 1;
258265

266+
fprintf(stderr, "%s: using Metal backend\n", __func__);
267+
result->backend = ggml_backend_metal_init();
268+
if (!result->backend) {
269+
fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
270+
}
271+
259272
struct llama_file_lora file(info->filename.c_str(), "rb");
260273
if (file.fp == NULL) {
261274
fprintf(stderr, "warning: Could not open lora adapter '%s'. Ignoring this adapter.\n",
@@ -307,9 +320,10 @@ static struct lora_data * load_lora(struct lora_info * info) {
307320
tensors_offset.push_back(offset);
308321
file.seek(nbytes, SEEK_CUR);
309322
}
323+
result->buffer = ggml_backend_alloc_ctx_tensors(result->ctx, result->backend);
310324

311-
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(result->ctx, ggml_backend_metal_buffer_type());
312-
if (!buf) {
325+
// ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(result->ctx, ggml_backend_metal_buffer_type());
326+
if (!result->buffer) {
313327
LLAMA_LOG_ERROR("%s: failed to allocate buffer for lora tensors\n", __func__);
314328
}
315329
// read tensor data
@@ -321,9 +335,15 @@ static struct lora_data * load_lora(struct lora_info * info) {
321335
size_t nbytes = ggml_nbytes(tensor);
322336
size_t nbytes_pad = ggml_nbytes_pad(tensor);
323337
file.seek(offset, SEEK_SET);
324-
tensor->data = result->data.data() + data_offset;
325-
file.read_raw(tensor->data, nbytes);
326-
data_offset += nbytes_pad;
338+
339+
std::vector<char> read_buf;
340+
read_buf.resize(ggml_nbytes(tensor));
341+
file.read_raw(read_buf.data(), ggml_nbytes(tensor));
342+
ggml_backend_tensor_set(tensor, read_buf.data(), 0, ggml_nbytes(tensor));
343+
// tensor_tmp->data = result->data.data() + data_offset;
344+
// file.read_raw(tensor_tmp->data, nbytes);
345+
// data_offset += nbytes_pad;
346+
// ggml_backend_tensor_set(tensor, tensor_tmp->data, 0, ggml_nbytes(tensor));
327347
}
328348
return result;
329349
}

0 commit comments

Comments
 (0)