31
31
#pragma warning(disable: 4244 4267) // possible loss of data
32
32
#endif
33
33
34
+ #include " print.hpp"
35
+
34
36
static llama_context ** g_ctx;
35
37
static llama_model ** g_model;
36
38
static gpt_params * g_params;
@@ -99,6 +101,7 @@ static void sigint_handler(int signo) {
99
101
}
100
102
}
101
103
#endif
104
+ using namespace refl ;
102
105
103
106
int main (int argc, char ** argv) {
104
107
gpt_params params;
@@ -117,7 +120,8 @@ int main(int argc, char ** argv) {
117
120
118
121
// TODO: Dump params ?
119
122
// LOG("Params perplexity: %s\n", LOG_TOSTR(params.perplexity));
120
-
123
+ print_fields (params);
124
+
121
125
// save choice to use color for later
122
126
// (note for later: this is a slightly awkward choice)
123
127
console::init (params.simple_io , params.use_color );
@@ -234,6 +238,8 @@ int main(int argc, char ** argv) {
234
238
235
239
std::vector<llama_token> embd_inp;
236
240
241
+ print_fields (*model);
242
+
237
243
if (params.interactive_first || params.instruct || params.chatml || !params.prompt .empty () || session_tokens.empty ()) {
238
244
LOG (" tokenize the prompt\n " );
239
245
if (params.chatml ) {
@@ -277,7 +283,8 @@ int main(int argc, char ** argv) {
277
283
LOG_TEE (" %s: error: prompt is too long (%d tokens, max %d)\n " , __func__, (int ) embd_inp.size (), n_ctx - 4 );
278
284
return 1 ;
279
285
}
280
-
286
+ print_fields (*ctx);
287
+ // print_fields(session_tokens);
281
288
// debug message about similarity of saved session, if applicable
282
289
size_t n_matching_session_tokens = 0 ;
283
290
if (!session_tokens.empty ()) {
@@ -365,6 +372,10 @@ int main(int argc, char ** argv) {
365
372
for (int i = 0 ; i < (int ) guidance_inp.size (); i++) {
366
373
LOG_TEE (" %6d -> '%s'\n " , guidance_inp[i], llama_token_to_piece (ctx, guidance_inp[i]).c_str ());
367
374
}
375
+
376
+ print_fields (*ctx_guidance);
377
+
378
+
368
379
}
369
380
370
381
if (params.n_keep > 0 ) {
@@ -473,7 +484,8 @@ int main(int argc, char ** argv) {
473
484
std::vector<llama_token> embd_guidance;
474
485
475
486
struct llama_sampling_context * ctx_sampling = llama_sampling_init (sparams);
476
-
487
+ print_fields (*ctx_sampling);
488
+
477
489
while ((n_remain != 0 && !is_antiprompt) || params.interactive ) {
478
490
// predict
479
491
if (!embd.empty ()) {
@@ -508,6 +520,7 @@ int main(int argc, char ** argv) {
508
520
LOG (" context full, swapping: n_past = %d, n_left = %d, n_ctx = %d, n_keep = %d, n_discard = %d\n " ,
509
521
n_past, n_left, n_ctx, params.n_keep , n_discard);
510
522
523
+ print_fields (*ctx);
511
524
llama_kv_cache_seq_rm (ctx, 0 , params.n_keep + 1 , params.n_keep + n_discard + 1 );
512
525
llama_kv_cache_seq_shift (ctx, 0 , params.n_keep + 1 + n_discard, n_past, -n_discard);
513
526
@@ -624,7 +637,7 @@ int main(int argc, char ** argv) {
624
637
}
625
638
626
639
const llama_token id = llama_sampling_sample (ctx_sampling, ctx, ctx_guidance);
627
-
640
+ // print_fields(id);
628
641
llama_sampling_accept (ctx_sampling, ctx, id, true );
629
642
630
643
LOG (" last: %s\n " , LOG_TOKENS_TOSTR_PRETTY (ctx, ctx_sampling->prev ).c_str ());
0 commit comments