Skip to content

Commit f00780b

Browse files
authored
llama : sync gguf-llama.cpp with latest llama.cpp (#2608)
* llama : sync gguf-llama.cpp with latest llama.cpp * minor : indentation + assert * llama : refactor gguf_buffer and gguf_ctx_buffer * llama : minor
1 parent 6f64b6c commit f00780b

File tree

6 files changed

+688
-459
lines changed

6 files changed

+688
-459
lines changed

examples/gguf/gguf.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,19 @@
88
#include <sstream>
99
#include <fstream>
1010
#include <vector>
11-
/*
11+
12+
#undef MIN
13+
#undef MAX
14+
#define MIN(a, b) ((a) < (b) ? (a) : (b))
15+
#define MAX(a, b) ((a) > (b) ? (a) : (b))
16+
1217
template<typename T>
1318
static std::string to_string(const T & val) {
1419
std::stringstream ss;
1520
ss << val;
1621
return ss.str();
1722
}
18-
*/
23+
1924
void gguf_ex_write_str(std::ofstream & fout, const std::string & val) {
2025
const int32_t n = val.size();
2126
fout.write((const char *) &n, sizeof(n));
@@ -377,28 +382,28 @@ bool gguf_ex_read_2(const std::string & fname) {
377382

378383
struct gguf_file file(fname.c_str(), "rb");
379384
gguf_mmap data_mmap(&file, 0, false);
385+
380386
const int n_tensors = gguf_get_n_tensors(ctx);
381387

382388
for (int i = 0; i < n_tensors; ++i) {
383-
const char * name = gguf_get_tensor_name(ctx, i);
384-
const size_t offset = gguf_get_data_offset(ctx) + gguf_get_tensor_offset(ctx, i);
389+
const char * name = gguf_get_tensor_name(ctx, i);
390+
const size_t offset = gguf_get_data_offset(ctx) + gguf_get_tensor_offset(ctx, i);
391+
385392
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
386393

387394
cur->data = static_cast<char *>(data_mmap.addr) + offset;
388395

389396
// print first 10 elements
390-
const float * data = (const float *) cur->data;
397+
const float * data = (const float *) cur->data;
391398

392399
printf("%s data[:10] : ", name);
393-
394-
for (int j = 0; j < 10; ++j) {
400+
for (int j = 0; j < MIN(10, ggml_nelements(cur)); ++j) {
395401
printf("%f ", data[j]);
396402
}
397-
398403
printf("\n\n");
399404
}
400405

401-
fprintf(stdout, "%s: ctx_data size: %zu\n", __func__, ggml_get_mem_size(ctx_data));
406+
fprintf(stdout, "%s: ctx_data size: %zu\n", __func__, ggml_get_mem_size(ctx_data));
402407

403408
ggml_free(ctx_data);
404409
gguf_free(ctx);

ggml-metal.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ struct ggml_metal_context;
3838
struct ggml_metal_context * ggml_metal_init(int n_cb);
3939
void ggml_metal_free(struct ggml_metal_context * ctx);
4040

41+
void * ggml_metal_host_malloc(size_t n);
42+
void ggml_metal_host_free (void * data);
43+
4144
// set the number of command buffers to use
4245
void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb);
4346

ggml-metal.m

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,21 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
224224
free(ctx);
225225
}
226226

227+
void * ggml_metal_host_malloc(size_t n) {
228+
void * data = NULL;
229+
const int result = posix_memalign((void **) &data, getpagesize(), n);
230+
if (result != 0) {
231+
fprintf(stderr, "%s: error: posix_memalign failed\n", __func__);
232+
return NULL;
233+
}
234+
235+
return data;
236+
}
237+
238+
void ggml_metal_host_free(void * data) {
239+
free(data);
240+
}
241+
227242
void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb) {
228243
ctx->n_cb = n_cb;
229244
}

0 commit comments

Comments
 (0)