Skip to content

Commit 90568a6

Browse files
author
mike dupont
committed
now server has it
1 parent e8e94f4 commit 90568a6

File tree

2 files changed

+131
-10
lines changed

2 files changed

+131
-10
lines changed

binding.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@
1414
cxxClientRoot = "/home/mdupont/experiments/llama.cpp/"
1515

1616
fileList = [
17-
"ggml.cpp",
18-
"llama.cpp"
17+
# "ggml.cpp",
18+
# "llama.cpp",
19+
"examples/server/server.cpp",
1920
]
2021

2122
typeList = [
@@ -224,10 +225,11 @@ def parse_type_str(typeStr):
224225

225226
def traverse(node, namespace, main_file):
226227
# only scan the elements of the file we parsed
227-
#print("FILE", node.location.file )
228+
228229

229230
if node.kind == clang.cindex.CursorKind.STRUCT_DECL or node.kind == clang.cindex.CursorKind.CLASS_DECL:
230231
fullStructName = "::".join([*namespace, node.displayname])
232+
print("#FILE", node.location.file )
231233
print("REFL_TYPE(" + fullStructName + ")")
232234

233235
structFields = []
@@ -247,14 +249,15 @@ def traverse(node, namespace, main_file):
247249
"type": struct_type,
248250
})
249251
# replica read changes introduced duplicate get requests
250-
if any(map(lambda op: op['name'] == fullStructName, opTypes)):
251-
return
252+
#if any(map(lambda op: op['name'] == fullStructName, opTypes)):
253+
# return
252254

253-
opTypes.append({
254-
"name": fullStructName,
255-
"fields": structFields,
256-
})
255+
#opTypes.append({
256+
# "name": fullStructName,
257+
# "fields": structFields,
258+
#})
257259
print("REFL_END")
260+
258261

259262
if node.kind == clang.cindex.CursorKind.TYPE_ALIAS_DECL:
260263
fullStructName = "::".join([*namespace, node.displayname])

examples/server/server.cpp

Lines changed: 119 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <thread>
2525
#include <mutex>
2626
#include <chrono>
27+
#include "print.hpp"
2728

2829
#ifndef SERVER_VERBOSE
2930
#define SERVER_VERBOSE 1
@@ -33,6 +34,9 @@
3334

3435
using json = nlohmann::json;
3536

37+
REFL_TYPE(std::less< ::nlohmann::detail::value_t>)
38+
REFL_END
39+
3640
struct server_params
3741
{
3842
std::string hostname = "127.0.0.1";
@@ -41,6 +45,13 @@ struct server_params
4145
int32_t read_timeout = 600;
4246
int32_t write_timeout = 600;
4347
};
48+
REFL_TYPE(server_params)
49+
REFL_FIELD(hostname)
50+
REFL_FIELD(public_path)
51+
REFL_FIELD(port)
52+
REFL_FIELD(read_timeout)
53+
REFL_FIELD(write_timeout)
54+
REFL_END
4455

4556
static bool server_verbose = false;
4657

@@ -157,6 +168,15 @@ struct task_server {
157168
bool embedding_mode = false;
158169
};
159170

171+
REFL_TYPE(task_server)
172+
REFL_FIELD(id)
173+
REFL_FIELD(target_id)
174+
REFL_FIELD(type)
175+
REFL_FIELD(data)
176+
REFL_FIELD(infill_mode)
177+
REFL_FIELD(embedding_mode)
178+
REFL_END
179+
160180
struct task_result {
161181
int id;
162182
bool stop;
@@ -193,6 +213,18 @@ struct slot_params
193213
json input_suffix;
194214
};
195215

216+
REFL_TYPE(slot_params)
217+
REFL_FIELD(stream)
218+
REFL_FIELD(cache_prompt)
219+
REFL_FIELD(seed)
220+
REFL_FIELD(n_keep)
221+
REFL_FIELD(n_predict)
222+
REFL_FIELD(antiprompt)
223+
REFL_FIELD(input_prefix)
224+
REFL_FIELD(input_suffix)
225+
REFL_END
226+
227+
196228
struct slot_image
197229
{
198230
int32_t id;
@@ -220,6 +252,17 @@ struct completion_token_output
220252
std::string text_to_send;
221253
};
222254

255+
REFL_TYPE(completion_token_output)
256+
REFL_FIELD(probs)
257+
REFL_FIELD(tok)
258+
REFL_FIELD(text_to_send)
259+
REFL_END
260+
261+
REFL_TYPE(completion_token_output::token_prob)
262+
REFL_FIELD(tok)
263+
REFL_FIELD(prob)
264+
REFL_END
265+
223266
static size_t common_part(const std::vector<llama_token> &a, const std::vector<llama_token> &b)
224267
{
225268
size_t i;
@@ -496,6 +539,51 @@ struct llama_client_slot
496539
}
497540
};
498541

542+
//REFL_TYPE(llama_client_slot::llama_sampling_params)
543+
//REFL_END
544+
545+
REFL_TYPE(llama_client_slot)
546+
REFL_FIELD(id)
547+
REFL_FIELD(task_id)
548+
REFL_FIELD(params)
549+
REFL_FIELD(state)
550+
REFL_FIELD(command)
551+
REFL_FIELD(t_last_used)
552+
REFL_FIELD(n_ctx)
553+
REFL_FIELD(n_past)
554+
REFL_FIELD(n_decoded)
555+
REFL_FIELD(n_remaining)
556+
REFL_FIELD(i_batch)
557+
REFL_FIELD(num_prompt_tokens)
558+
REFL_FIELD(num_prompt_tokens_processed)
559+
REFL_FIELD(multibyte_pending)
560+
REFL_FIELD(prompt)
561+
REFL_FIELD(generated_text)
562+
REFL_FIELD(sampled)
563+
REFL_FIELD(cache_tokens)
564+
REFL_FIELD(generated_token_probs)
565+
REFL_FIELD(infill)
566+
REFL_FIELD(embedding)
567+
REFL_FIELD(has_next_token)
568+
REFL_FIELD(truncated)
569+
REFL_FIELD(stopped_eos)
570+
REFL_FIELD(stopped_word)
571+
REFL_FIELD(stopped_limit)
572+
REFL_FIELD(oaicompat)
573+
REFL_FIELD(oaicompat_model)
574+
REFL_FIELD(stopping_word)
575+
REFL_FIELD(sparams)
576+
REFL_FIELD(ctx_sampling)
577+
REFL_FIELD(images)
578+
REFL_FIELD(sent_count)
579+
REFL_FIELD(sent_token_probs_index)
580+
REFL_FIELD(t_start_process_prompt)
581+
REFL_FIELD(t_start_genereration)
582+
REFL_FIELD(t_prompt_processing)
583+
REFL_FIELD(t_token_generation)
584+
REFL_END
585+
586+
499587
struct llama_server_context
500588
{
501589
llama_model *model = nullptr;
@@ -878,7 +966,7 @@ struct llama_server_context
878966
all_slots_are_idle = false;
879967

880968
LOG_TEE("slot %i is processing [task id: %i]\n", slot->id, slot->task_id);
881-
969+
print_fields(*slot);
882970
return true;
883971
}
884972

@@ -1787,6 +1875,31 @@ struct llama_server_context
17871875
}
17881876
};
17891877

1878+
REFL_TYPE(llama_server_context)
1879+
REFL_FIELD(model)
1880+
REFL_FIELD(ctx)
1881+
REFL_FIELD(clp_ctx)
1882+
REFL_FIELD(params)
1883+
REFL_FIELD(batch)
1884+
REFL_FIELD(multimodal)
1885+
REFL_FIELD(clean_kv_cache)
1886+
REFL_FIELD(all_slots_are_idle)
1887+
REFL_FIELD(add_bos_token)
1888+
REFL_FIELD(id_gen)
1889+
REFL_FIELD(n_ctx)
1890+
REFL_FIELD(system_need_update)
1891+
REFL_FIELD(system_prompt)
1892+
REFL_FIELD(system_tokens)
1893+
REFL_FIELD(name_user)
1894+
REFL_FIELD(name_assistant)
1895+
REFL_FIELD(slots)
1896+
REFL_FIELD(queue_tasks)
1897+
REFL_FIELD(queue_results)
1898+
REFL_FIELD(mutex_tasks)
1899+
REFL_FIELD(mutex_results)
1900+
REFL_END
1901+
1902+
17901903
static void server_print_usage(const char *argv0, const gpt_params &params,
17911904
const server_params &sparams)
17921905
{
@@ -2497,6 +2610,11 @@ struct token_translator
24972610
std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); }
24982611
};
24992612

2613+
2614+
REFL_TYPE(token_translator)
2615+
REFL_FIELD(ctx)
2616+
REFL_END
2617+
25002618
static void append_to_generated_text_from_generated_token_probs(llama_server_context &llama, llama_client_slot *slot)
25012619
{
25022620
auto & gtps = slot->generated_token_probs;

0 commit comments

Comments
 (0)