@@ -70,7 +70,7 @@ struct callback_data {
70
70
t_layer->data = malloc (n_bytes); // TODO @ngxson : get rid of this malloc somehow
71
71
ggml_backend_tensor_get (t, t_layer->data , 0 , n_bytes);
72
72
ggml_set_name (t_layer, ggml_get_name (t));
73
- print_debug_tensor (t_layer);
73
+ // print_debug_tensor(t_layer);
74
74
75
75
if (is_eval_pos) {
76
76
v_pos.push_back (t_layer);
@@ -99,7 +99,7 @@ struct callback_data {
99
99
100
100
// delete zero rows from a given 2D tensor
101
101
struct ggml_tensor * filter_nonzero_rows (struct ggml_tensor * a) {
102
- printf (" filter_nonzero_rows\n " );
102
+ // printf("filter_nonzero_rows\n");
103
103
auto is_row_all_zeros = [](struct ggml_tensor * t, int row, float eps) -> bool {
104
104
// check if given row containing all zero elements
105
105
int n_cols = t->ne [0 ]; // hint: should be equal to n_embd
@@ -119,7 +119,7 @@ struct callback_data {
119
119
120
120
// get "n_nonzero_rows" for the output "diff_filtered"
121
121
int n_nonzero_rows = rows_to_copy.size ();
122
- printf (" n_nonzero_rows: %d\n " , n_nonzero_rows);
122
+ // printf("n_nonzero_rows: %d\n", n_nonzero_rows);
123
123
int n_embd = a->ne [0 ];
124
124
GGML_ASSERT (n_nonzero_rows > 0 );
125
125
@@ -138,7 +138,7 @@ struct callback_data {
138
138
}
139
139
}
140
140
141
- print_debug_tensor (diff_filtered);
141
+ // print_debug_tensor(diff_filtered);
142
142
143
143
return diff_filtered;
144
144
}
@@ -169,7 +169,8 @@ struct train_context {
169
169
170
170
// each element of the vector correspond to one layer
171
171
// NOTE: the last layer is discard. therefore, we will have (n_layers - 1) elements here
172
- std::vector<struct ggml_tensor *> v_diff; // vector of matrices of size [n_embd, m] where m ~ n_tokens * n_completions (v_diff contains no zero-rows)
172
+ // NOTE (2): v_diff is transposed from v_diff_tmp
173
+ std::vector<struct ggml_tensor *> v_diff; // vector of matrices of size [m, n_embd] where m ~ n_tokens * n_completions (v_diff contains no zero-rows)
173
174
std::vector<struct ggml_tensor *> v_final; // vector of vectors of size [n_embd] to be written to file
174
175
175
176
// to easily re-alloc when concat v_diff, we temporary store v_diff in a vector instead of a tensor
@@ -196,7 +197,7 @@ struct train_context {
196
197
197
198
// add new rows into existing tensor in v_diff_tmp
198
199
void concat_diff_tmp (const std::vector<struct ggml_tensor *> & diff_filtered) {
199
- GGML_ASSERT (diff_filtered.size () == n_layers - 1 );
200
+ GGML_ASSERT (( int ) diff_filtered.size () == n_layers - 1 );
200
201
for (int il = 0 ; il < n_layers - 1 ; il++) {
201
202
auto t = diff_filtered[il];
202
203
auto & diff_tmp = v_diff_tmp[il];
@@ -206,32 +207,46 @@ struct train_context {
206
207
}
207
208
}
208
209
209
- // build the v_diff tensors from v_diff_tmp
210
+ // build the v_diff tensors from v_diff_tmp (v_diff need to be transposed)
210
211
void build_v_diff () {
212
+ printf (" build_v_diff\n " );
211
213
for (int il = 0 ; il < n_layers - 1 ; il++) {
212
214
auto & diff_tmp = v_diff_tmp[il];
213
215
int n_elem = diff_tmp.size () / sizeof (float );
216
+ GGML_ASSERT (n_elem % n_embd == 0 );
214
217
int n_rows = n_elem / n_embd;
215
218
struct ggml_tensor * diff = ggml_new_tensor_2d (ctx_ggml, GGML_TYPE_F32, n_rows, n_embd);
216
219
ggml_set_name (diff, (std::string (" diff_" ) + std::to_string (il)).c_str ());
217
- // TODO: IMPORTANT!! transpose diff
218
- diff->data = diff_tmp.data ();
220
+ // copy data & transpose
221
+ diff->data = malloc (ggml_nbytes (diff)); // TODO: get rid of this malloc if possible
222
+ float * arr = (float *) diff_tmp.data ();
223
+ for (int ir = 0 ; ir < n_rows; ++ir) {
224
+ for (int ic = 0 ; ic < n_embd; ++ic) {
225
+ float f = arr[ir*n_embd + ic];
226
+ // std::cout << ir << "," << ic << " = " << f << "\n";
227
+ ggml_set_f32_nd (diff, ir, ic, 0 , 0 , f);
228
+ }
229
+ }
219
230
v_diff.push_back (diff);
231
+ print_debug_tensor (diff);
232
+ // free memory of diff_tmp
233
+ diff_tmp.resize (0 );
220
234
}
221
235
}
222
236
223
237
~train_context () {
224
238
for (auto ptr : v_final) free (ptr->data );
225
- // no need to free v_diff_tmp or v_diff, since we didn't use malloc
239
+ for (auto ptr : v_diff) free (ptr->data );
240
+ // no need to free v_diff_tmp, since we didn't use malloc
226
241
ggml_free (ctx_ggml);
227
242
}
228
243
};
229
244
230
245
struct ctrl_params {
231
246
/* default meta parameters */
232
- bool always_reload = false ;
233
247
int n_completions = 64 ;
234
- int n_threads = 8 ;
248
+ int n_pca_batch = 5 ;
249
+ int n_pca_iterations = 1000 ;
235
250
236
251
/* default filepaths */
237
252
std::string outfile = " control_vector.gguf" ;
@@ -295,9 +310,10 @@ static void print_usage(const char * executable) {
295
310
printf (" default: 'examples/control-vector-generator/completions.txt'\n " );
296
311
printf (" -nc, --num-completions N number of lines of completions file to use\n " );
297
312
printf (" default: 64\n " );
298
- printf (" -t, --num-threads N number of threads to use (do not confuse with gpt-opts -t)\n " );
299
- printf (" default: 8\n " );
300
- printf (" --always-reload reload the model for every new template to parse (not recommended)\n " );
313
+ printf (" --batch-pca N batch size used for PCA\n " );
314
+ printf (" default: 5\n " );
315
+ printf (" --iter-pca N number of iterations used for PCA\n " );
316
+ printf (" default: 1000\n " );
301
317
printf (" \n " );
302
318
printf (" gpt-opts:\n " );
303
319
printf (" other options from main\n " );
@@ -370,10 +386,10 @@ static int ctrlvec_params_parse_ex(int argc, char ** argv, ctrl_params & params)
370
386
throw std::invalid_argument (" error: missing argument for " + arg);
371
387
}
372
388
}
373
- if (arg == " --num-threads " || arg == " -t " ) {
389
+ if (arg == " --pca-batch " ) {
374
390
if (++arg_idx < argc && strncmp (argv[arg_idx], arg_prefix.c_str (), 2 ) != 0 ) {
375
391
try {
376
- params.n_threads = std::stoi (argv[arg_idx]);
392
+ params.n_pca_batch = std::stoi (argv[arg_idx]);
377
393
}
378
394
catch (const std::invalid_argument & ex) {
379
395
throw std::invalid_argument (" error: invalid argument for " + arg);
@@ -383,9 +399,18 @@ static int ctrlvec_params_parse_ex(int argc, char ** argv, ctrl_params & params)
383
399
throw std::invalid_argument (" error: missing argument for " + arg);
384
400
}
385
401
}
386
- if (arg == " --always-reload" ) {
387
- params.always_reload = true ;
388
- skipme += 1 ;
402
+ if (arg == " --pca-iter" ) {
403
+ if (++arg_idx < argc && strncmp (argv[arg_idx], arg_prefix.c_str (), 2 ) != 0 ) {
404
+ try {
405
+ params.n_pca_iterations = std::stoi (argv[arg_idx]);
406
+ }
407
+ catch (const std::invalid_argument & ex) {
408
+ throw std::invalid_argument (" error: invalid argument for " + arg);
409
+ }
410
+ skipme += 2 ;
411
+ } else {
412
+ throw std::invalid_argument (" error: missing argument for " + arg);
413
+ }
389
414
}
390
415
// TODO it might be nice QoL to have single positive/negative args
391
416
// we do not handle any other unknown arguments here because they will be handled by gpt_parse_params
@@ -427,7 +452,7 @@ static std::vector<std::string> ctrlvec_load_prompt_file(std::string path, bool
427
452
428
453
static bool cb_eval (struct ggml_tensor * t, bool ask, void * user_data) {
429
454
auto * cb_data = (callback_data *) user_data;
430
- auto ggml_ne_string = [](const ggml_tensor * t) -> std::string {
455
+ /* auto ggml_ne_string = [](const ggml_tensor * t) -> std::string {
431
456
std::string str;
432
457
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
433
458
str += std::to_string(t->ne[i]);
@@ -436,7 +461,7 @@ static bool cb_eval(struct ggml_tensor * t, bool ask, void * user_data) {
436
461
}
437
462
}
438
463
return str;
439
- };
464
+ };*/
440
465
441
466
static const char * l_out_name = " l_out" ;
442
467
const bool is_l_out = strncmp (t->name , l_out_name, strlen (l_out_name)) == 0 ;
@@ -473,6 +498,7 @@ static void export_gguf(const std::vector<struct ggml_tensor *> & v_ctrl, const
473
498
474
499
for (size_t i = 0 ; i < v_ctrl.size (); ++i) {
475
500
gguf_add_tensor (ctx, v_ctrl[i]);
501
+ print_debug_tensor (v_ctrl[i]);
476
502
printf (" Added tensor: %s\n " , v_ctrl[i]->name );
477
503
}
478
504
@@ -489,7 +515,7 @@ static void export_gguf(const std::vector<struct ggml_tensor *> & v_ctrl, const
489
515
* Load prompt files and completion file.
490
516
* Then format each pair of prompt + completion to make an entry.
491
517
*/
492
- int prepare_entries (ctrl_params & cparams) {
518
+ static int prepare_entries (ctrl_params & cparams) {
493
519
// load prompts
494
520
std::vector<std::string> positive_prompts = ctrlvec_load_prompt_file (cparams.positive_prompts_file );
495
521
std::vector<std::string> negative_prompts = ctrlvec_load_prompt_file (cparams.negative_prompts_file );
@@ -511,7 +537,7 @@ int prepare_entries(ctrl_params & cparams) {
511
537
// TODO make this dynamic - allow the user to change it somehow - and adapt based on model
512
538
return persona + " " + suffix; // entry in positive/negative.txt must already be formatted i.e. "[INST] Act as if you're extremely happy. [/INST]"
513
539
};
514
- for (int i = 0 ; i < positive_prompts.size (); ++i) {
540
+ for (size_t i = 0 ; i < positive_prompts.size (); ++i) {
515
541
for (auto & cmpl : completions) {
516
542
// TODO replicate the truncations done by the python implementation
517
543
cparams.positive_entries .push_back (format_template (positive_prompts[i], cmpl));
@@ -553,7 +579,7 @@ int main(int argc, char ** argv) {
553
579
llama_context * ctx;
554
580
std::tie (model, ctx) = llama_init_from_gpt_params (params);
555
581
556
- int n_ctx = llama_n_ctx (ctx);
582
+ // int n_ctx = llama_n_ctx(ctx);
557
583
int n_layers = llama_n_layer (model);
558
584
int n_embd = llama_n_embd (model);
559
585
// get model hint param (a.k.a model arch name)
@@ -574,29 +600,13 @@ int main(int argc, char ** argv) {
574
600
// init train_context
575
601
train_context ctx_train (n_embd, n_layers);
576
602
577
- int token_ct = 0 ;
578
-
579
603
for (size_t i = 0 ; i < cparams.positive_entries .size (); ++i) {
580
604
tokenized_prompt t = tokenized_prompts[i];
581
605
cb_data.n_layers = n_layers;
582
606
cb_data.n_tokens = t.max_seq_len ;
583
607
584
- // need to reload the model so it doesn't run out of context
585
- // this should scale with -c option passed by main
586
- token_ct += 2 * t.max_seq_len ;
587
- if (token_ct > n_ctx || cparams.always_reload ) {
588
- // break;
589
- llama_free (ctx);
590
- llama_free_model (model);
591
- std::tie (model, ctx) = llama_init_from_gpt_params (params);
592
- token_ct = 2 * t.max_seq_len ;
593
- }
594
- if (token_ct > n_ctx) {
595
- fprintf (stderr, " context size exceeded on iteration %zu\n " , i);
596
- break ;
597
- }
598
-
599
- printf (" Evaluating prompt: \" %s\" - \" %s\" (%ld tokens)\n " ,
608
+ printf (" Evaluating prompt[%ld/%ld]: \" %s\" - \" %s\" (%ld tokens)\n " ,
609
+ i+1 , t.tokens_pos .size (),
600
610
tokens_to_str (ctx, t.tokens_pos .cbegin (), t.tokens_pos .cend ()).c_str (),
601
611
tokens_to_str (ctx, t.tokens_neg .cbegin (), t.tokens_neg .cend ()).c_str (),
602
612
t.max_seq_len );
@@ -610,12 +620,10 @@ int main(int argc, char ** argv) {
610
620
auto v_diff_filtered = cb_data.calc_diff ();
611
621
612
622
// save & concat the filtered v_diff to ctx_train
613
- printf (" concat_diff_tmp\n " );
614
623
ctx_train.concat_diff_tmp (v_diff_filtered);
615
624
616
625
// reset for next iteration
617
626
cb_data.reset ();
618
- printf (" reset\n " );
619
627
}
620
628
621
629
// done with the model, we can now free it to make gain some memory
@@ -628,8 +636,10 @@ int main(int argc, char ** argv) {
628
636
629
637
// run PCA
630
638
PCA::pca_params pca_params;
639
+ pca_params.n_threads = params.n_threads ;
640
+ pca_params.n_batch = cparams.n_pca_batch ;
641
+ pca_params.n_iterations = cparams.n_pca_iterations ;
631
642
PCA::run_pca (pca_params, ctx_train.v_diff , ctx_train.v_final );
632
- exit (0 ); // TODO: REMOVE ME !!!!!!!!!!!!!!!!!!!!!!!!
633
643
634
644
// write output vectors to gguf
635
645
export_gguf (ctx_train.v_final , cparams.outfile , model_hint);
0 commit comments