24
24
#include < thread>
25
25
#include < mutex>
26
26
#include < chrono>
27
+ #include " print.hpp"
27
28
28
29
#ifndef SERVER_VERBOSE
29
30
#define SERVER_VERBOSE 1
33
34
34
35
using json = nlohmann::json;
35
36
37
+ REFL_TYPE (std::less< ::nlohmann::detail::value_t >)
38
+ REFL_END
39
+
36
40
struct server_params
37
41
{
38
42
std::string hostname = " 127.0.0.1" ;
@@ -41,6 +45,13 @@ struct server_params
41
45
int32_t read_timeout = 600 ;
42
46
int32_t write_timeout = 600 ;
43
47
};
48
+ REFL_TYPE (server_params)
49
+ REFL_FIELD(hostname)
50
+ REFL_FIELD(public_path)
51
+ REFL_FIELD(port)
52
+ REFL_FIELD(read_timeout)
53
+ REFL_FIELD(write_timeout)
54
+ REFL_END
44
55
45
56
static bool server_verbose = false;
46
57
@@ -157,6 +168,15 @@ struct task_server {
157
168
bool embedding_mode = false ;
158
169
};
159
170
171
+ REFL_TYPE (task_server)
172
+ REFL_FIELD(id)
173
+ REFL_FIELD(target_id)
174
+ REFL_FIELD(type)
175
+ REFL_FIELD(data)
176
+ REFL_FIELD(infill_mode)
177
+ REFL_FIELD(embedding_mode)
178
+ REFL_END
179
+
160
180
struct task_result {
161
181
int id;
162
182
bool stop;
@@ -193,6 +213,18 @@ struct slot_params
193
213
json input_suffix;
194
214
};
195
215
216
+ REFL_TYPE (slot_params)
217
+ REFL_FIELD(stream)
218
+ REFL_FIELD(cache_prompt)
219
+ REFL_FIELD(seed)
220
+ REFL_FIELD(n_keep)
221
+ REFL_FIELD(n_predict)
222
+ REFL_FIELD(antiprompt)
223
+ REFL_FIELD(input_prefix)
224
+ REFL_FIELD(input_suffix)
225
+ REFL_END
226
+
227
+
196
228
struct slot_image
197
229
{
198
230
int32_t id;
@@ -220,6 +252,17 @@ struct completion_token_output
220
252
std::string text_to_send;
221
253
};
222
254
255
+ REFL_TYPE (completion_token_output)
256
+ REFL_FIELD(probs)
257
+ REFL_FIELD(tok)
258
+ REFL_FIELD(text_to_send)
259
+ REFL_END
260
+
261
+ REFL_TYPE(completion_token_output::token_prob)
262
+ REFL_FIELD(tok)
263
+ REFL_FIELD(prob)
264
+ REFL_END
265
+
223
266
static size_t common_part(const std::vector<llama_token> &a, const std::vector<llama_token> &b)
224
267
{
225
268
size_t i;
@@ -496,6 +539,51 @@ struct llama_client_slot
496
539
}
497
540
};
498
541
542
+ // REFL_TYPE(llama_client_slot::llama_sampling_params)
543
+ // REFL_END
544
+
545
+ REFL_TYPE (llama_client_slot)
546
+ REFL_FIELD(id)
547
+ REFL_FIELD(task_id)
548
+ REFL_FIELD(params)
549
+ REFL_FIELD(state)
550
+ REFL_FIELD(command)
551
+ REFL_FIELD(t_last_used)
552
+ REFL_FIELD(n_ctx)
553
+ REFL_FIELD(n_past)
554
+ REFL_FIELD(n_decoded)
555
+ REFL_FIELD(n_remaining)
556
+ REFL_FIELD(i_batch)
557
+ REFL_FIELD(num_prompt_tokens)
558
+ REFL_FIELD(num_prompt_tokens_processed)
559
+ REFL_FIELD(multibyte_pending)
560
+ REFL_FIELD(prompt)
561
+ REFL_FIELD(generated_text)
562
+ REFL_FIELD(sampled)
563
+ REFL_FIELD(cache_tokens)
564
+ REFL_FIELD(generated_token_probs)
565
+ REFL_FIELD(infill)
566
+ REFL_FIELD(embedding)
567
+ REFL_FIELD(has_next_token)
568
+ REFL_FIELD(truncated)
569
+ REFL_FIELD(stopped_eos)
570
+ REFL_FIELD(stopped_word)
571
+ REFL_FIELD(stopped_limit)
572
+ REFL_FIELD(oaicompat)
573
+ REFL_FIELD(oaicompat_model)
574
+ REFL_FIELD(stopping_word)
575
+ REFL_FIELD(sparams)
576
+ REFL_FIELD(ctx_sampling)
577
+ REFL_FIELD(images)
578
+ REFL_FIELD(sent_count)
579
+ REFL_FIELD(sent_token_probs_index)
580
+ REFL_FIELD(t_start_process_prompt)
581
+ REFL_FIELD(t_start_genereration)
582
+ REFL_FIELD(t_prompt_processing)
583
+ REFL_FIELD(t_token_generation)
584
+ REFL_END
585
+
586
+
499
587
struct llama_server_context
500
588
{
501
589
llama_model *model = nullptr ;
@@ -878,7 +966,7 @@ struct llama_server_context
878
966
all_slots_are_idle = false ;
879
967
880
968
LOG_TEE (" slot %i is processing [task id: %i]\n " , slot->id , slot->task_id );
881
-
969
+ print_fields (*slot);
882
970
return true ;
883
971
}
884
972
@@ -1787,6 +1875,31 @@ struct llama_server_context
1787
1875
}
1788
1876
};
1789
1877
1878
+ REFL_TYPE (llama_server_context)
1879
+ REFL_FIELD(model)
1880
+ REFL_FIELD(ctx)
1881
+ REFL_FIELD(clp_ctx)
1882
+ REFL_FIELD(params)
1883
+ REFL_FIELD(batch)
1884
+ REFL_FIELD(multimodal)
1885
+ REFL_FIELD(clean_kv_cache)
1886
+ REFL_FIELD(all_slots_are_idle)
1887
+ REFL_FIELD(add_bos_token)
1888
+ REFL_FIELD(id_gen)
1889
+ REFL_FIELD(n_ctx)
1890
+ REFL_FIELD(system_need_update)
1891
+ REFL_FIELD(system_prompt)
1892
+ REFL_FIELD(system_tokens)
1893
+ REFL_FIELD(name_user)
1894
+ REFL_FIELD(name_assistant)
1895
+ REFL_FIELD(slots)
1896
+ REFL_FIELD(queue_tasks)
1897
+ REFL_FIELD(queue_results)
1898
+ REFL_FIELD(mutex_tasks)
1899
+ REFL_FIELD(mutex_results)
1900
+ REFL_END
1901
+
1902
+
1790
1903
static void server_print_usage(const char *argv0, const gpt_params ¶ms,
1791
1904
const server_params &sparams)
1792
1905
{
@@ -2497,6 +2610,11 @@ struct token_translator
2497
2610
std::string operator ()(const completion_token_output &cto) const { return (*this )(cto.tok ); }
2498
2611
};
2499
2612
2613
+
2614
+ REFL_TYPE (token_translator)
2615
+ REFL_FIELD(ctx)
2616
+ REFL_END
2617
+
2500
2618
static void append_to_generated_text_from_generated_token_probs(llama_server_context &llama, llama_client_slot *slot)
2501
2619
{
2502
2620
auto & gtps = slot->generated_token_probs ;
0 commit comments