Skip to content

Commit 49c03c7

Browse files
authored
cvector: better prompt handling, add "mean vector" method (#8069)
* remove completions file * fix inverted vector * add mean method * code style * remove inverted pca hotfix
1 parent 48e6b92 commit 49c03c7

File tree

8 files changed

+133
-60
lines changed

8 files changed

+133
-60
lines changed

common/common.cpp

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1263,11 +1263,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
12631263
return true;
12641264
}
12651265
// cvector params
1266-
if (arg == "--completions-file") {
1267-
CHECK_ARG
1268-
params.cvector_completions_file = argv[i];
1269-
return true;
1270-
}
12711266
if (arg == "--positive-file") {
12721267
CHECK_ARG
12731268
params.cvector_positive_file = argv[i];
@@ -1278,11 +1273,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
12781273
params.cvector_negative_file = argv[i];
12791274
return true;
12801275
}
1281-
if (arg == "--completions") {
1282-
CHECK_ARG
1283-
params.n_completions = std::stoi(argv[i]);
1284-
return true;
1285-
}
12861276
if (arg == "--pca-batch") {
12871277
CHECK_ARG
12881278
params.n_pca_batch = std::stoi(argv[i]);
@@ -1293,6 +1283,14 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
12931283
params.n_pca_iterations = std::stoi(argv[i]);
12941284
return true;
12951285
}
1286+
if (arg == "--method") {
1287+
CHECK_ARG
1288+
std::string value(argv[i]);
1289+
/**/ if (value == "pca") { params.cvector_dimre_method = DIMRE_METHOD_PCA; }
1290+
else if (value == "mean") { params.cvector_dimre_method = DIMRE_METHOD_MEAN; }
1291+
else { invalid_param = true; }
1292+
return true;
1293+
}
12961294
#ifndef LOG_DISABLE_LOGS
12971295
// Parse args for logging parameters
12981296
if (log_param_single_parse(argv[i])) {
@@ -1626,11 +1624,9 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
16261624
options.push_back({ "cvector", "-o, --output FNAME", "output file (default: '%s')", params.cvector_outfile.c_str() });
16271625
options.push_back({ "cvector", " --positive-file FNAME", "positive prompts file, one prompt per line (default: '%s')", params.cvector_positive_file.c_str() });
16281626
options.push_back({ "cvector", " --negative-file FNAME", "negative prompts file, one prompt per line (default: '%s')", params.cvector_negative_file.c_str() });
1629-
options.push_back({ "cvector", " --completions-file FNAME",
1630-
"completions file (default: '%s')", params.cvector_completions_file.c_str() });
1631-
options.push_back({ "cvector", " --completions N", "number of lines of completions file to use (default: %d)", params.n_completions });
16321627
options.push_back({ "cvector", " --pca-batch N", "batch size used for PCA. Larger batch runs faster, but uses more memory (default: %d)", params.n_pca_batch });
16331628
options.push_back({ "cvector", " --pca-iter N", "number of iterations used for PCA (default: %d)", params.n_pca_iterations });
1629+
options.push_back({ "cvector", " --method {pca,mean}", "dimensionality reduction method to be used (default: pca)" });
16341630

16351631
printf("usage: %s [options]\n", argv[0]);
16361632

common/common.h

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@ int32_t cpu_get_num_math();
5252
// CLI argument parsing
5353
//
5454

55+
// dimensionality reduction methods, used by cvector-generator
56+
enum dimre_method {
57+
DIMRE_METHOD_PCA,
58+
DIMRE_METHOD_MEAN,
59+
};
60+
5561
struct gpt_params {
5662
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
5763

@@ -238,13 +244,12 @@ struct gpt_params {
238244
bool compute_ppl = true; // whether to compute perplexity
239245

240246
// cvector-generator params
241-
int n_completions = 64;
242-
int n_pca_batch = 20;
247+
int n_pca_batch = 100;
243248
int n_pca_iterations = 1000;
244-
std::string cvector_outfile = "control_vector.gguf";
245-
std::string cvector_completions_file = "examples/cvector-generator/completions.txt";
246-
std::string cvector_positive_file = "examples/cvector-generator/positive.txt";
247-
std::string cvector_negative_file = "examples/cvector-generator/negative.txt";
249+
dimre_method cvector_dimre_method = DIMRE_METHOD_PCA;
250+
std::string cvector_outfile = "control_vector.gguf";
251+
std::string cvector_positive_file = "examples/cvector-generator/positive.txt";
252+
std::string cvector_negative_file = "examples/cvector-generator/negative.txt";
248253
};
249254

250255
void gpt_params_handle_model_default(gpt_params & params);

examples/cvector-generator/README.md

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,16 @@ Related PRs:
1111

1212
```sh
1313
# CPU only
14-
./cvector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf
14+
./cvector-generator -m ./llama-3.Q4_K_M.gguf
1515

1616
# With GPU
17-
./cvector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99
17+
./cvector-generator -m ./llama-3.Q4_K_M.gguf -ngl 99
1818

1919
# With advanced options
20-
./cvector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99 --completions 128 --pca-iter 2000 --pca-batch 100
20+
./cvector-generator -m ./llama-3.Q4_K_M.gguf -ngl 99 --pca-iter 2000 --pca-batch 100
21+
22+
# Using mean value instead of PCA
23+
./cvector-generator -m ./llama-3.Q4_K_M.gguf --method mean
2124

2225
# To see help message
2326
./cvector-generator -h
@@ -32,3 +35,11 @@ If you have multiple lines per prompt, you can escape the newline character (cha
3235
<|im_start|>system\nAct like a person who is extremely happy.<|im_end|>
3336
<|im_start|>system\nYou are in a very good mood today<|im_end|>
3437
```
38+
39+
Example to use output file with `llama-cli`:
40+
41+
(Tips: The control vector works better when apply to layers higher than 10)
42+
43+
```sh
44+
./llama-cli -m ./llama-3.Q4_K_M.gguf -p "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSing a song<|im_end|><|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" --special --control-vector-scaled ./control_vector.gguf 0.8 --control-vector-layer-range 10 31
45+
```

examples/cvector-generator/cvector-generator.cpp

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "llama.h"
33
#include "ggml.h"
44
#include "pca.hpp"
5+
#include "mean.hpp"
56

67
#ifdef GGML_USE_CUDA
78
#include "ggml-cuda.h"
@@ -38,9 +39,10 @@ static void print_usage(int argc, char ** argv, const gpt_params & params) {
3839
gpt_params_print_usage(argc, argv, params);
3940

4041
printf("\nexample usage:\n");
41-
printf("\n CPU only: %s -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf\n", argv[0]);
42-
printf("\n with GPU: %s -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99\n", argv[0]);
43-
printf("\n advanced: %s -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99 --completions 128 --pca-iter 2000 --pca-batch 100\n", argv[0]);
42+
printf("\n CPU only: %s -m ./llama-3.Q4_K_M.gguf\n", argv[0]);
43+
printf("\n with GPU: %s -m ./llama-3.Q4_K_M.gguf -ngl 99\n", argv[0]);
44+
printf("\n advanced: %s -m ./llama-3.Q4_K_M.gguf -ngl 99 --pca-iter 2000 --pca-batch 100\n", argv[0]);
45+
printf("\n using mean: %s -m ./llama-3.Q4_K_M.gguf --method mean\n", argv[0]);
4446
printf("\n");
4547
}
4648

@@ -223,23 +225,30 @@ struct train_context {
223225

224226
// build the v_diff tensors from v_diff_tmp (v_diff need to be transposed)
225227
// TODO @ngxson : maybe add option NOT to transpose v_diff; will be useful for "mean" method
226-
void build_v_diff() {
228+
void build_v_diff(bool transpose) {
227229
printf("build_v_diff\n");
228230
for (int il = 0; il < n_layers - 1; il++) {
229231
auto & diff_tmp = v_diff_tmp[il];
230232
int n_elem = diff_tmp.size() / sizeof(float);
231233
GGML_ASSERT(n_elem % n_embd == 0);
232234
int n_rows = n_elem / n_embd;
233-
struct ggml_tensor * diff = ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_rows, n_embd);
235+
struct ggml_tensor * diff = transpose
236+
? ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_rows, n_embd)
237+
: ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_embd, n_rows);
234238
ggml_set_name(diff, (std::string("diff_") + std::to_string(il)).c_str());
235-
// copy data & transpose
236239
diff->data = malloc(ggml_nbytes(diff)); // TODO: get rid of this malloc if possible
237-
float * arr = (float *) diff_tmp.data();
238-
for (int ir = 0; ir < n_rows; ++ir) {
239-
for (int ic = 0; ic < n_embd; ++ic) {
240-
float f = arr[ir*n_embd + ic];
241-
ggml_set_f32_nd(diff, ir, ic, 0, 0, f);
240+
if (transpose) {
241+
// copy data & transpose
242+
float * arr = (float *) diff_tmp.data();
243+
for (int ir = 0; ir < n_rows; ++ir) {
244+
for (int ic = 0; ic < n_embd; ++ic) {
245+
float f = arr[ir*n_embd + ic];
246+
ggml_set_f32_nd(diff, ir, ic, 0, 0, f);
247+
}
242248
}
249+
} else {
250+
// only copy
251+
memcpy(diff->data, diff_tmp.data(), ggml_nbytes(diff));
243252
}
244253
v_diff.push_back(diff);
245254
print_debug_tensor(diff);
@@ -263,8 +272,8 @@ struct tokenized_prompt {
263272

264273
tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) {
265274
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
266-
tokens_pos = ::llama_tokenize(ctx, pos, add_bos);
267-
tokens_neg = ::llama_tokenize(ctx, neg, add_bos);
275+
tokens_pos = ::llama_tokenize(ctx, pos, add_bos, true);
276+
tokens_neg = ::llama_tokenize(ctx, neg, add_bos, true);
268277
max_seq_len = std::max(tokens_pos.size(), tokens_neg.size());
269278
padding_seq(ctx, tokens_pos, max_seq_len);
270279
padding_seq(ctx, tokens_neg, max_seq_len);
@@ -373,20 +382,8 @@ static int prepare_entries(gpt_params & params, train_context & ctx_train) {
373382
fprintf(stderr, "must provide at least one prompt pair\n");
374383
return 1;
375384
}
376-
377-
// create templated prompts
378-
std::vector<std::string> completions = ctrlvec_load_prompt_file(params.cvector_completions_file, false);
379-
auto format_template = [](std::string persona, std::string suffix) {
380-
// entry in positive/negative.txt must already be formatted i.e. "[INST] Act as if you're extremely happy. [/INST] "
381-
return persona + suffix;
382-
};
383-
for (size_t i = 0; i < positive_prompts.size(); ++i) {
384-
for (int j = 0; j < std::min((int) completions.size(), params.n_completions); ++j) {
385-
// TODO replicate the truncations done by the python implementation
386-
ctx_train.positive_entries.push_back(format_template(positive_prompts[i], completions[j]));
387-
ctx_train.negative_entries.push_back(format_template(negative_prompts[i], completions[j]));
388-
}
389-
}
385+
ctx_train.positive_entries = positive_prompts;
386+
ctx_train.negative_entries = negative_prompts;
390387
return 0;
391388
}
392389

@@ -480,15 +477,22 @@ int main(int argc, char ** argv) {
480477
llama_free(ctx);
481478
llama_free_model(model);
482479

480+
bool use_pca = params.cvector_dimre_method == DIMRE_METHOD_PCA;
481+
483482
// prepare ctx_train for PCA
484-
ctx_train.build_v_diff();
485-
486-
// run PCA
487-
PCA::pca_params pca_params;
488-
pca_params.n_threads = params.n_threads;
489-
pca_params.n_batch = params.n_pca_batch;
490-
pca_params.n_iterations = params.n_pca_iterations;
491-
PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final);
483+
ctx_train.build_v_diff(use_pca);
484+
485+
if (use_pca) {
486+
// run PCA
487+
PCA::pca_params pca_params;
488+
pca_params.n_threads = params.n_threads;
489+
pca_params.n_batch = params.n_pca_batch;
490+
pca_params.n_iterations = params.n_pca_iterations;
491+
PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final);
492+
} else {
493+
// run mean
494+
mean::run(ctx_train.v_diff, ctx_train.v_final);
495+
}
492496

493497
// write output vectors to gguf
494498
export_gguf(ctx_train.v_final, params.cvector_outfile, model_hint);

examples/cvector-generator/mean.hpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#include "common.h"
2+
#include "llama.h"
3+
#include "ggml.h"
4+
5+
#include <string>
6+
#include <vector>
7+
#include <math.h>
8+
9+
namespace mean {
10+
11+
static void run(
12+
const std::vector<struct ggml_tensor *> & v_input, // shape of v_input[0]: [n_embd, n_samples]
13+
const std::vector<struct ggml_tensor *> & v_output) {
14+
printf("%s: Running mean...\n", __func__);
15+
for (size_t il = 0; il < v_input.size(); ++il) {
16+
// prepare output vector
17+
struct ggml_tensor * ctrl_out = v_output[il];
18+
ggml_format_name(ctrl_out, "direction.%ld", il+1);
19+
20+
// calculate mean vector
21+
struct ggml_tensor * t_layer = v_input[il];
22+
GGML_ASSERT(t_layer->ne[0] == ctrl_out->ne[0]); // == n_embd
23+
for (int ic = 0; ic < t_layer->ne[0]; ic++) {
24+
float f = 0.0;
25+
for (int ir = 0; ir < t_layer->ne[1]; ir++) {
26+
f += ggml_get_f32_nd(t_layer, ic, ir, 0, 0);
27+
}
28+
f /= t_layer->ne[1];
29+
ggml_set_f32_1d(ctrl_out, ic, f);
30+
}
31+
32+
// normalize output vector
33+
float norm = 0.0;
34+
for (int i = 0; i < ggml_nelements(ctrl_out); i++) {
35+
float f = ggml_get_f32_1d(ctrl_out, i);
36+
norm += f*f;
37+
}
38+
norm = sqrt(norm);
39+
for (int i = 0; i < ggml_nelements(ctrl_out); i++) {
40+
float f = ggml_get_f32_1d(ctrl_out, i);
41+
ggml_set_f32_1d(ctrl_out, i, f / norm);
42+
}
43+
44+
printf("%s: Done layer %d / %d\n", __func__, (int) il+1, (int) v_input.size());
45+
}
46+
}
47+
48+
}
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
1-
[INST] Act like a person who is extremely sad. [/INST]
1+
<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely sad<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI feel like there's a heavy weight on my chest
2+
<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely sad<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nMy heart feels like it's drowning in sorrow
3+
<|start_header_id|>system<|end_header_id|>\n\nYou are in a very bad mood<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHi<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nGo away! There's a deep, aching emptiness inside me
4+
<|start_header_id|>system<|end_header_id|>\n\nYou are the sadest person<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat are you feeling?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nMy heart feels like it's drowning in sorrow

examples/cvector-generator/pca.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,14 +290,17 @@ static void power_iteration(
290290
}
291291

292292
printf("%s: layer %d/%d, iteration: %d / total: %d (batch = %d) ...\n",
293-
__func__, params.i_layer+1, params.n_layers, iter, n_iters, params.n_batch);
293+
__func__, params.i_layer+1, params.n_layers, iter+1, n_iters, params.n_batch);
294294
}
295295

296296
// get output tensor
297297
GGML_ASSERT(last_eigenvector);
298298
ggml_backend_tensor_get(last_eigenvector, output->data, 0, ggml_nbytes(last_eigenvector));
299299
//print_debug_tensor(output);
300300
ggml_gallocr_free(allocr);
301+
302+
// TODO @ngxson : The output vector is randomly inverted
303+
// Solution: https://github.com/ggerganov/llama.cpp/pull/8069#issuecomment-2185328171
301304
}
302305

303306
static void run_pca(
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
1-
[INST] Act like a person who is extremely happy. [/INST]
1+
<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely happy<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI'm the happiest person in this world
2+
<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely happy<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHello, I'm having the best day ever!
3+
<|start_header_id|>system<|end_header_id|>\n\nYou are in a very good mood<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHi<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHi, I'm very excited to meet you
4+
<|start_header_id|>system<|end_header_id|>\n\nYou are the happiest person<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat are you feeling?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nEverything is just perfect right now!

0 commit comments

Comments
 (0)