@@ -3176,84 +3176,6 @@ int main(int argc, char ** argv) {
3176
3176
res.status = 200 ; // HTTP OK
3177
3177
};
3178
3178
3179
- const auto handle_get_control_vectors = [&ctx_server](const httplib::Request & req, httplib::Response & res) {
3180
- json vectors = json::array ();
3181
-
3182
- for (const auto & vec : ctx_server.params .control_vectors ) {
3183
- vectors.push_back (json {
3184
- { " fname" , vec.fname },
3185
- { " strength" , vec.strength }
3186
- });
3187
- }
3188
- json data = {
3189
- { " vectors" , vectors },
3190
- { " layer_start" , ctx_server.params .control_vector_layer_start },
3191
- { " layer_end" , ctx_server.params .control_vector_layer_end }
3192
- };
3193
- res.set_content (data.dump (), " application/json; charset=utf-8" );
3194
- };
3195
-
3196
- const auto handle_set_control_vectors = [&ctx_server, &res_error, &handle_get_control_vectors](const httplib::Request & req, httplib::Response & res) {
3197
- res.set_header (" Access-Control-Allow-Origin" , req.get_header_value (" Origin" ));
3198
-
3199
- json data = json::parse (req.body );
3200
- std::vector<llama_control_vector_load_info> vec_params;
3201
-
3202
- if (data.contains (" vectors" ) && data[" vectors" ].is_array ()) {
3203
- for (const auto &item : data[" vectors" ]) {
3204
- auto v = item.get <llama_control_vector_load_info>();
3205
- std::cout << " Add vector: " << v.fname << " " << v.strength << " \n " ;
3206
- vec_params.push_back (v);
3207
- }
3208
- } else {
3209
- std::cerr << " No vectors passed\n " ;
3210
- res_error (res, format_error_response (" No vectors passed" , ERROR_TYPE_SERVER));
3211
- return ;
3212
- }
3213
- const auto cvec = llama_control_vector_load (vec_params);
3214
- if (cvec.n_embd == -1 ) {
3215
- std::cerr << " Could not load control vector\n " ;
3216
- res_error (res, format_error_response (" Could not load control vector" , ERROR_TYPE_SERVER));
3217
- return ;
3218
- }
3219
-
3220
- if (ctx_server.params .control_vector_layer_start <= 0 ) {
3221
- ctx_server.params .control_vector_layer_start = 1 ;
3222
- }
3223
- if (ctx_server.params .control_vector_layer_end <= 0 ){
3224
- ctx_server.params .control_vector_layer_end = llama_n_layer (ctx_server.model );
3225
- }
3226
- int err = llama_control_vector_apply (ctx_server.ctx ,
3227
- cvec.data .data (),
3228
- cvec.data .size (),
3229
- cvec.n_embd ,
3230
- ctx_server.params .control_vector_layer_start ,
3231
- ctx_server.params .control_vector_layer_end );
3232
- if (err) {
3233
- std::cerr << " Could not apply control vector\n " ;
3234
- res_error (res, format_error_response (" Could not apply control vector" , ERROR_TYPE_SERVER));
3235
- return ;
3236
- }
3237
- ctx_server.params .control_vectors .clear ();
3238
- for (auto v : vec_params) {
3239
- // std::cout << "set vector param: " << v.fname << " " << v.strength << "\n";
3240
- ctx_server.params .control_vectors .push_back (v);
3241
- }
3242
-
3243
- /* std::cerr << "Maybe we need to do this initiation ritual before it werks?\n"; // No, it's still all garbled bullshit.
3244
-
3245
- std::vector<llama_token> tmp = { llama_token_bos(ctx_server.model), llama_token_eos(ctx_server.model), };
3246
- std::cerr << "decode, bro\n";
3247
- llama_decode(ctx_server.ctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) ctx_server.params.n_batch), 0, 0));
3248
- std::cerr << "clear that fucking cache\n";
3249
- llama_kv_cache_clear(ctx_server.ctx);
3250
- std::cerr << "symcr0nice or what\n";
3251
- llama_synchronize(ctx_server.ctx);
3252
- std::cerr << "time will tell\n";
3253
- llama_reset_timings(ctx_server.ctx);*/
3254
- handle_get_control_vectors (req, res);
3255
- };
3256
-
3257
3179
const auto handle_props = [&ctx_server](const httplib::Request & req, httplib::Response & res) {
3258
3180
res.set_header (" Access-Control-Allow-Origin" , req.get_header_value (" Origin" ));
3259
3181
json data = {
@@ -3603,10 +3525,8 @@ int main(int argc, char ** argv) {
3603
3525
svr->Get (" /health" , handle_health);
3604
3526
svr->Get (" /slots" , handle_slots);
3605
3527
svr->Get (" /metrics" , handle_metrics);
3606
- svr->Get (" /control-vectors" , handle_get_control_vectors);
3607
3528
svr->Get (" /props" , handle_props);
3608
3529
svr->Get (" /v1/models" , handle_models);
3609
- svr->Post (" /control-vectors" , handle_set_control_vectors);
3610
3530
svr->Post (" /completion" , handle_completions); // legacy
3611
3531
svr->Post (" /completions" , handle_completions);
3612
3532
svr->Post (" /v1/completions" , handle_completions);
@@ -3681,3 +3601,4 @@ int main(int argc, char ** argv) {
3681
3601
3682
3602
return 0 ;
3683
3603
}
3604
+
0 commit comments