Skip to content

Commit a7df071

Browse files
committed
llama : impl
ggml-ci
1 parent b0d6b66 commit a7df071

16 files changed

+230
-209
lines changed

common/common.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -638,6 +638,10 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c
638638
// Split utils
639639
//
640640

641-
static const char * const LLM_KV_SPLIT_NO = "split.no";
642-
static const char * const LLM_KV_SPLIT_COUNT = "split.count";
643-
static const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
641+
namespace {
642+
643+
const char * const LLM_KV_SPLIT_NO = "split.no";
644+
const char * const LLM_KV_SPLIT_COUNT = "split.count";
645+
const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
646+
647+
}

examples/gguf-split/gguf-split.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,14 @@
22
#include "common.h"
33

44
#include <algorithm>
5-
#include <cmath>
65
#include <cstdlib>
76
#include <fstream>
87
#include <string>
98
#include <vector>
10-
11-
#include <stdio.h>
12-
#include <string.h>
139
#include <climits>
10+
11+
#include <cstdio>
12+
#include <cstring>
1413
#include <stdexcept>
1514

1615
#if defined(_WIN32)

examples/quantize-stats/quantize-stats.cpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,17 @@
1-
#include "common.h"
21
#include "ggml.h"
32
#include "llama.h"
4-
#include "llama-impl.h"
3+
#include "llama-context.h"
4+
#include "common.h"
55

66
#include <algorithm>
77
#include <cassert>
88
#include <cinttypes>
99
#include <cmath>
1010
#include <cstdio>
1111
#include <cstring>
12-
#include <map>
1312
#include <numeric>
1413
#include <regex>
1514
#include <string>
16-
#include <unordered_map>
1715
#include <vector>
1816
#include <thread>
1917
#include <mutex>
@@ -330,13 +328,13 @@ int main(int argc, char ** argv) {
330328
}
331329
}
332330

333-
const auto &tensors = llama_internal_get_tensor_map(ctx);
331+
const auto & tensors = llama_internal_get_tensor_map(ctx);
334332

335333
// check layer tensors
336334
int included_layers = 0;
337335
int64_t max_nelements = 0;
338336
bool is_f16 = false;
339-
for (const auto& kv_tensor : tensors) {
337+
for (const auto & kv_tensor : tensors) {
340338
if (!layer_included(params, kv_tensor.first)) {
341339
continue;
342340
}
@@ -371,8 +369,8 @@ int main(int argc, char ** argv) {
371369
if (!params.include_types.empty() && std::find(params.include_types.begin(), params.include_types.end(), i) == params.include_types.end()) {
372370
continue;
373371
}
374-
const auto * qfns = ggml_get_type_traits(type);
375-
const auto * qfns_cpu = ggml_get_type_traits_cpu(type);
372+
const auto * qfns = ggml_get_type_traits(type);
373+
const auto * qfns_cpu = ggml_get_type_traits_cpu(type);
376374
if (qfns_cpu->from_float && qfns->to_float) {
377375
if (params.verbose) {
378376
printf("testing %s ...\n", ggml_type_name(type));
@@ -382,7 +380,7 @@ int main(int argc, char ** argv) {
382380

383381
error_stats global_stats {};
384382

385-
for (const auto& kv_tensor : tensors) {
383+
for (const auto & kv_tensor : tensors) {
386384
if (!layer_included(params, kv_tensor.first)) {
387385
continue;
388386
}

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ add_library(llama
1515
llama-chat.cpp
1616
llama-context.cpp
1717
llama-hparams.cpp
18+
llama-impl.cpp
1819
llama-grammar.cpp
1920
llama-kv-cache.cpp
2021
llama-mmap.cpp

src/llama-adapter.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <algorithm>
66
#include <map>
77
#include <cassert>
8+
#include <stdexcept>
89

910
// vec
1011

src/llama-batch.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ struct llama_ubatch {
2626

2727
struct llama_sbatch_seq {
2828
int32_t n_seq_id;
29+
2930
llama_seq_id * seq_id;
31+
3032
size_t offset;
3133
size_t length;
3234
};
@@ -112,8 +114,8 @@ struct llama_sbatch {
112114
if (ubatch.equal_seqs) {
113115
for (size_t i = 0; i < length; ++i) {
114116
memcpy(
115-
ubatch.embd + n_embd * (ubatch.n_tokens + i),
116-
batch->embd + n_embd * ids[seq.offset + i],
117+
ubatch.embd + (n_embd * (ubatch.n_tokens + i)),
118+
batch->embd + (n_embd * ids[seq.offset + i]),
117119
n_embd * sizeof(float)
118120
);
119121
}

src/llama-context.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include "llama-context.h"
22

3+
#include <stdexcept>
4+
35
// deprecated
46
size_t llama_get_state_size(struct llama_context * ctx) {
57
return llama_state_get_size(ctx);
@@ -968,3 +970,8 @@ size_t llama_state_seq_load_file(struct llama_context * ctx, const char * filepa
968970
}
969971
}
970972

973+
const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal_get_tensor_map(
974+
struct llama_context * ctx
975+
) {
976+
return ctx->model.tensors_by_name;
977+
}

src/llama-context.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,3 +219,7 @@ static void llama_output_reorder(struct llama_context * ctx) {
219219
out_ids.clear();
220220
}
221221
}
222+
223+
// For internal test use
224+
// TODO: remove
225+
const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal_get_tensor_map(struct llama_context * ctx);

src/llama-grammar.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "llama-grammar.h"
22

3+
#include "llama-impl.h"
34
#include "llama-vocab.h"
45
#include "llama-sampling.h"
56

src/llama-grammar.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
#pragma once
22

3-
#include "llama-impl.h"
3+
#include "llama.h"
44

55
#include <map>
6+
#include <string>
7+
#include <vector>
68

79
struct llama_vocab;
810

src/llama-impl.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#include "llama-impl.h"
2+
3+
#include "llama.h"
4+
5+
#include <cstdarg>
6+
7+
struct llama_logger_state {
8+
ggml_log_callback log_callback = llama_log_callback_default;
9+
void * log_callback_user_data = nullptr;
10+
};
11+
12+
static llama_logger_state g_logger_state;
13+
14+
time_meas::time_meas(int64_t & t_acc, bool disable) : t_start_us(disable ? -1 : ggml_time_us()), t_acc(t_acc) {}
15+
16+
time_meas::~time_meas() {
17+
if (t_start_us >= 0) {
18+
t_acc += ggml_time_us() - t_start_us;
19+
}
20+
}
21+
22+
void replace_all(std::string & s, const std::string & search, const std::string & replace) {
23+
if (search.empty()) {
24+
return;
25+
}
26+
std::string builder;
27+
builder.reserve(s.length());
28+
size_t pos = 0;
29+
size_t last_pos = 0;
30+
while ((pos = s.find(search, last_pos)) != std::string::npos) {
31+
builder.append(s, last_pos, pos - last_pos);
32+
builder.append(replace);
33+
last_pos = pos + search.length();
34+
}
35+
builder.append(s, last_pos, std::string::npos);
36+
s = std::move(builder);
37+
}
38+
39+
void llama_log_set(ggml_log_callback log_callback, void * user_data) {
40+
ggml_log_set(log_callback, user_data);
41+
g_logger_state.log_callback = log_callback ? log_callback : llama_log_callback_default;
42+
g_logger_state.log_callback_user_data = user_data;
43+
}
44+
45+
static void llama_log_internal_v(ggml_log_level level, const char * format, va_list args) {
46+
va_list args_copy;
47+
va_copy(args_copy, args);
48+
char buffer[128];
49+
int len = vsnprintf(buffer, 128, format, args);
50+
if (len < 128) {
51+
g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
52+
} else {
53+
char * buffer2 = new char[len + 1];
54+
vsnprintf(buffer2, len + 1, format, args_copy);
55+
buffer2[len] = 0;
56+
g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
57+
delete[] buffer2;
58+
}
59+
va_end(args_copy);
60+
}
61+
62+
void llama_log_internal(ggml_log_level level, const char * format, ...) {
63+
va_list args;
64+
va_start(args, format);
65+
llama_log_internal_v(level, format, args);
66+
va_end(args);
67+
}
68+
69+
void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data) {
70+
(void) level;
71+
(void) user_data;
72+
fputs(text, stderr);
73+
fflush(stderr);
74+
}

0 commit comments

Comments
 (0)