Skip to content

Commit 8d117a5

Browse files
committed
llama : model loader
ggml-ci
1 parent 736e692 commit 8d117a5

File tree

7 files changed

+1264
-1164
lines changed

7 files changed

+1264
-1164
lines changed

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ add_library(llama
2020
llama-kv-cache.cpp
2121
llama-mmap.cpp
2222
llama-model.cpp
23+
llama-model-loader.cpp
2324
llama-sampling.cpp
2425
llama-vocab.cpp
2526
unicode.h

src/llama-hparams.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,13 @@ struct llama_hparams {
6969
uint32_t time_decay_extra_dim = 0;
7070
uint32_t wkv_head_size = 0;
7171

72-
float rope_attn_factor = 1.0f;
73-
float rope_freq_base_train;
74-
float rope_freq_scale_train;
75-
uint32_t n_ctx_orig_yarn;
76-
float rope_yarn_log_mul;
77-
int rope_sections[4]; // TODO: actually this should be std::array (I was wrong)
72+
float rope_attn_factor = 1.0f;
73+
float rope_freq_base_train;
74+
float rope_freq_scale_train;
75+
uint32_t n_ctx_orig_yarn;
76+
float rope_yarn_log_mul;
77+
78+
std::array<int, 4> rope_sections;
7879

7980
// for State Space Models
8081
uint32_t ssm_d_conv = 0;

src/llama-impl.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22

33
#include "llama.h"
44

5+
#include <cinttypes>
56
#include <climits>
67
#include <cstdarg>
78
#include <vector>
9+
#include <sstream>
810

911
struct llama_logger_state {
1012
ggml_log_callback log_callback = llama_log_callback_default;
@@ -89,3 +91,75 @@ std::string format(const char * fmt, ...) {
8991
va_end(ap);
9092
return std::string(buf.data(), size);
9193
}
94+
95+
std::string llama_format_tensor_shape(const std::vector<int64_t> & ne) {
96+
char buf[256];
97+
snprintf(buf, sizeof(buf), "%5" PRId64, ne.at(0));
98+
for (size_t i = 1; i < ne.size(); i++) {
99+
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ", %5" PRId64, ne.at(i));
100+
}
101+
return buf;
102+
}
103+
104+
std::string llama_format_tensor_shape(const struct ggml_tensor * t) {
105+
char buf[256];
106+
snprintf(buf, sizeof(buf), "%5" PRId64, t->ne[0]);
107+
for (int i = 1; i < GGML_MAX_DIMS; i++) {
108+
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ", %5" PRId64, t->ne[i]);
109+
}
110+
return buf;
111+
}
112+
113+
static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) {
114+
switch (type) {
115+
case GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]);
116+
case GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]);
117+
case GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]);
118+
case GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]);
119+
case GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]);
120+
case GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]);
121+
case GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]);
122+
case GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]);
123+
case GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]);
124+
case GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]);
125+
case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false";
126+
default: return format("unknown type %d", type);
127+
}
128+
}
129+
130+
std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
131+
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
132+
133+
switch (type) {
134+
case GGUF_TYPE_STRING:
135+
return gguf_get_val_str(ctx_gguf, i);
136+
case GGUF_TYPE_ARRAY:
137+
{
138+
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
139+
int arr_n = gguf_get_arr_n(ctx_gguf, i);
140+
const void * data = gguf_get_arr_data(ctx_gguf, i);
141+
std::stringstream ss;
142+
ss << "[";
143+
for (int j = 0; j < arr_n; j++) {
144+
if (arr_type == GGUF_TYPE_STRING) {
145+
std::string val = gguf_get_arr_str(ctx_gguf, i, j);
146+
// escape quotes
147+
replace_all(val, "\\", "\\\\");
148+
replace_all(val, "\"", "\\\"");
149+
ss << '"' << val << '"';
150+
} else if (arr_type == GGUF_TYPE_ARRAY) {
151+
ss << "???";
152+
} else {
153+
ss << gguf_data_to_str(arr_type, data, j);
154+
}
155+
if (j < arr_n - 1) {
156+
ss << ", ";
157+
}
158+
}
159+
ss << "]";
160+
return ss.str();
161+
}
162+
default:
163+
return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0);
164+
}
165+
}

src/llama-impl.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "ggml.h" // for ggml_log_level
44

55
#include <string>
6+
#include <vector>
67

78
#ifdef __GNUC__
89
#ifdef __MINGW32__
@@ -33,6 +34,12 @@ void llama_log_callback_default(ggml_log_level level, const char * text, void *
3334
// helpers
3435
//
3536

37+
template <typename T>
38+
struct no_init {
39+
T value;
40+
no_init() { /* do nothing */ }
41+
};
42+
3643
struct time_meas {
3744
time_meas(int64_t & t_acc, bool disable = false);
3845
~time_meas();
@@ -47,3 +54,8 @@ void replace_all(std::string & s, const std::string & search, const std::string
4754
// TODO: rename to llama_format ?
4855
LLAMA_ATTRIBUTE_FORMAT(1, 2)
4956
std::string format(const char * fmt, ...);
57+
58+
std::string llama_format_tensor_shape(const std::vector<int64_t> & ne);
59+
std::string llama_format_tensor_shape(const struct ggml_tensor * t);
60+
61+
std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i);

0 commit comments

Comments
 (0)