@@ -150,6 +150,11 @@ struct lora_data {
150
150
struct lora_info info;
151
151
std::vector<uint8_t> data;
152
152
struct ggml_context * ctx;
153
+ // the backend to perform the computation (CPU, CUDA, METAL)
154
+ ggml_backend_t backend = NULL;
155
+
156
+ // the backend buffer to storage the tensors data of a and b
157
+ ggml_backend_buffer_t buffer;
153
158
154
159
uint32_t lora_r;
155
160
uint32_t lora_alpha;
@@ -253,9 +258,17 @@ static struct lora_data * load_lora(struct lora_info * info) {
253
258
struct lora_data * result = new struct lora_data;
254
259
result->info = *info;
255
260
result->ctx = NULL;
261
+ result->backend = NULL;
262
+ result->buffer = NULL;
256
263
result->lora_r = 1;
257
264
result->lora_alpha = 1;
258
265
266
+ fprintf(stderr, "%s: using Metal backend\n", __func__);
267
+ result->backend = ggml_backend_metal_init();
268
+ if (!result->backend) {
269
+ fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
270
+ }
271
+
259
272
struct llama_file_lora file(info->filename.c_str(), "rb");
260
273
if (file.fp == NULL) {
261
274
fprintf(stderr, "warning: Could not open lora adapter '%s'. Ignoring this adapter.\n",
@@ -307,9 +320,10 @@ static struct lora_data * load_lora(struct lora_info * info) {
307
320
tensors_offset.push_back(offset);
308
321
file.seek(nbytes, SEEK_CUR);
309
322
}
323
+ result->buffer = ggml_backend_alloc_ctx_tensors(result->ctx, result->backend);
310
324
311
- ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(result->ctx, ggml_backend_metal_buffer_type());
312
- if (!buf ) {
325
+ // ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(result->ctx, ggml_backend_metal_buffer_type());
326
+ if (!result->buffer ) {
313
327
LLAMA_LOG_ERROR("%s: failed to allocate buffer for lora tensors\n", __func__);
314
328
}
315
329
// read tensor data
@@ -321,9 +335,15 @@ static struct lora_data * load_lora(struct lora_info * info) {
321
335
size_t nbytes = ggml_nbytes(tensor);
322
336
size_t nbytes_pad = ggml_nbytes_pad(tensor);
323
337
file.seek(offset, SEEK_SET);
324
- tensor->data = result->data.data() + data_offset;
325
- file.read_raw(tensor->data, nbytes);
326
- data_offset += nbytes_pad;
338
+
339
+ std::vector<char> read_buf;
340
+ read_buf.resize(ggml_nbytes(tensor));
341
+ file.read_raw(read_buf.data(), ggml_nbytes(tensor));
342
+ ggml_backend_tensor_set(tensor, read_buf.data(), 0, ggml_nbytes(tensor));
343
+ // tensor_tmp->data = result->data.data() + data_offset;
344
+ // file.read_raw(tensor_tmp->data, nbytes);
345
+ // data_offset += nbytes_pad;
346
+ // ggml_backend_tensor_set(tensor, tensor_tmp->data, 0, ggml_nbytes(tensor));
327
347
}
328
348
return result;
329
349
}
0 commit comments