Skip to content

Commit 8388aaa

Browse files
committed
cleanup and stuff
1 parent 021e6d9 commit 8388aaa

File tree

4 files changed

+48
-37
lines changed

4 files changed

+48
-37
lines changed

examples/common.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -362,12 +362,12 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
362362
break;
363363
}
364364
params.steering_mul = std::stof(argv[i]);
365-
} else if (arg == "--steering-lyr") {
365+
} else if (arg == "--steering-layer") {
366366
if (++i >= argc) {
367367
invalid_param = true;
368368
break;
369369
}
370-
params.steering_lyr = std::stoi(argv[i]);
370+
params.steering_layer = std::stoi(argv[i]);
371371
} else {
372372
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
373373
gpt_print_usage(argc, argv, default_params);
@@ -454,6 +454,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
454454
}
455455
fprintf(stderr, " -ngl N, --n-gpu-layers N\n");
456456
fprintf(stderr, " number of layers to store in VRAM\n");
457+
fprintf(stderr, " --steering-add add positive steering prompt\n");
458+
fprintf(stderr, " --steering-sub add negativ steering prompt\n");
459+
fprintf(stderr, " --steering-mul set steering strength (negative is reverse, default %.1f)\n", params.steering_mul);
460+
fprintf(stderr, " --steering-layer set layer for steering (default %d)\n", params.steering_layer);
457461
fprintf(stderr, " --mtest compute maximum memory usage\n");
458462
fprintf(stderr, " --verbose-prompt print prompt before generation\n");
459463
fprintf(stderr, " --lora FNAME apply LoRA adapter (implies --no-mmap)\n");

examples/common.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,10 @@ struct gpt_params {
7373
bool mem_test = false; // compute maximum memory usage
7474
bool verbose_prompt = false; // print prompt tokens before generation
7575

76-
std::string steering_add = "";
77-
std::string steering_sub = "";
76+
std::string steering_add;
77+
std::string steering_sub;
7878
float steering_mul = 1.0f;
79-
int steering_lyr = 20;
79+
int steering_layer = 15;
8080
};
8181

8282
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);

examples/main/main.cpp

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -136,28 +136,6 @@ int main(int argc, char ** argv) {
136136
return 0;
137137
}
138138

139-
if (params.steering_add.size() || params.steering_sub.size())
140-
{
141-
auto steering_add_tokens = ::llama_tokenize(ctx, params.steering_add, true);
142-
auto steering_sub_tokens = ::llama_tokenize(ctx, params.steering_sub, true);
143-
144-
if (steering_add_tokens.size() != steering_sub_tokens.size()) {
145-
llama_token space;
146-
llama_tokenize(ctx, " ", &space, 1, 0);
147-
148-
while (steering_add_tokens.size() < steering_sub_tokens.size()) steering_add_tokens.push_back(space);
149-
while (steering_sub_tokens.size() < steering_add_tokens.size()) steering_sub_tokens.push_back(space);
150-
}
151-
152-
llama_set_steering_write(ctx, params.steering_lyr, params.steering_mul/2);
153-
llama_eval(ctx, steering_add_tokens.data(), std::min((int)steering_add_tokens.size(), params.n_ctx), 0, params.n_threads);
154-
155-
llama_set_steering_write(ctx, params.steering_lyr, -params.steering_mul/2);
156-
llama_eval(ctx, steering_sub_tokens.data(), std::min((int)steering_sub_tokens.size(), params.n_ctx), 0, params.n_threads);
157-
158-
llama_set_steering_read(ctx, params.steering_lyr, 1);
159-
}
160-
161139
// Add a space in front of the first character to match OG llama tokenizer behavior
162140
params.prompt.insert(0, 1, ' ');
163141

@@ -196,6 +174,32 @@ int main(int argc, char ** argv) {
196174
return 1;
197175
}
198176

177+
if (!params.steering_add.empty() || !params.steering_sub.empty())
178+
{
179+
params.steering_add.insert(0, 1, ' ');
180+
params.steering_sub.insert(0, 1, ' ');
181+
182+
auto add_tokens = ::llama_tokenize(ctx, params.steering_add, true);
183+
auto sub_tokens = ::llama_tokenize(ctx, params.steering_sub, true);
184+
185+
//if (add_tokens.size() != sub_tokens.size()) {
186+
// while (add_tokens.size() < sub_tokens.size()) {
187+
// add_tokens.push_back(llama_token_nl());
188+
// }
189+
// while (sub_tokens.size() < add_tokens.size()) {
190+
// sub_tokens.push_back(llama_token_nl());
191+
// }
192+
//}
193+
//const int N = embd_inp.size();
194+
llama_set_steering_write(ctx, params.steering_layer, +1.0f);
195+
llama_eval(ctx, add_tokens.data(), std::min((int)add_tokens.size(), n_ctx), 0, params.n_threads);
196+
197+
llama_set_steering_write(ctx, params.steering_layer, -1.0f);
198+
llama_eval(ctx, sub_tokens.data(), std::min((int)sub_tokens.size(), n_ctx), 0, params.n_threads);
199+
200+
llama_set_steering_read(ctx, params.steering_layer, params.steering_mul);
201+
}
202+
199203
// debug message about similarity of saved session, if applicable
200204
size_t n_matching_session_tokens = 0;
201205
if (session_tokens.size()) {

llama.cpp

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,9 @@ void llama_set_steering_read(struct llama_context * ctx, int layer, float mul) {
287287
ctx->steering_mode = STEERING_READ;
288288
ctx->steering_mul = mul;
289289
ctx->steering_layer = layer;
290+
//FILE* steeringbin = fopen("steering.bin", "wb");
291+
//fwrite(ctx->steering_vector.data(), sizeof(float), ctx->steering_vector.size(), steeringbin);
292+
//fclose(steeringbin);
290293
}
291294

292295
template <typename T>
@@ -1163,8 +1166,9 @@ static bool llama_eval_internal(
11631166

11641167
struct ggml_tensor * steer;
11651168
if (lctx.steering_mode != STEERING_OFF) {
1166-
steer = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_ctx, n_embd);
1167-
memcpy(steer->data, lctx.steering_vector.data(), ggml_nbytes(steer));
1169+
steer = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N);
1170+
//steer->data = lctx.steering_vector.data() + n_past * n_embd * sizeof(float);
1171+
memcpy(steer->data, lctx.steering_vector.data() + n_past * n_embd * sizeof(float), ggml_nbytes(steer));
11681172
}
11691173

11701174
struct ggml_tensor * inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd);
@@ -1177,15 +1181,14 @@ static bool llama_eval_internal(
11771181
lctx.use_buf(ctx0, 0);
11781182

11791183
if (lctx.steering_mode != STEERING_OFF && il == lctx.steering_layer) {
1180-
steer->data = lctx.steering_vector.data();
1181-
1182-
struct ggml_tensor * src = ggml_scale(ctx0, inpL, ggml_new_f32(ctx0, lctx.steering_mul));
1183-
struct ggml_tensor * dst = ggml_view_2d(ctx0, steer, n_embd, N, n_embd * sizeof(float), n_past * n_embd * sizeof(float));
1184+
struct ggml_tensor * scal = ggml_new_f32(ctx0, lctx.steering_mul);
11841185
if (lctx.steering_mode == STEERING_WRITE) {
1185-
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, ggml_add(ctx0, src, dst), dst));
1186-
} else {
1187-
inpL = src;
1186+
ggml_build_forward_expand(&gf, ggml_cpy(ctx0,
1187+
ggml_add(ctx0, ggml_scale(ctx0, inpL, scal), steer), steer));
1188+
break;
11881189
}
1190+
1191+
inpL = ggml_add(ctx0, ggml_scale(ctx0, steer, scal), inpL);
11891192
}
11901193

11911194
// norm
@@ -1403,7 +1406,7 @@ static bool llama_eval_internal(
14031406

14041407

14051408
if (lctx.steering_mode == STEERING_WRITE) {
1406-
memcpy(lctx.steering_vector.data(), steer->data, ggml_nbytes(steer));
1409+
memcpy(lctx.steering_vector.data() + n_past * n_embd * sizeof(float), steer->data, ggml_nbytes(steer));
14071410
}
14081411

14091412

0 commit comments

Comments
 (0)