|
1 |
| -#include <chrono> |
| 1 | +#include "frontend.h" |
| 2 | +#include "common.h" |
| 3 | +#include "llama.h" |
| 4 | + |
2 | 5 | #include "../server/httplib.h"
|
3 | 6 | #include "../server/json.hpp"
|
| 7 | + |
4 | 8 | #include <iostream>
|
5 | 9 | #include <sstream>
|
6 | 10 | #include <thread>
|
7 | 11 | #include <vector>
|
8 |
| -#include "frontend.h" |
9 |
| -#include "common.h" |
10 |
| -#include "llama.h" |
| 12 | +#include <chrono> |
11 | 13 |
|
12 | 14 | using namespace httplib;
|
13 | 15 | using namespace std;
|
@@ -241,9 +243,7 @@ struct server_parallel_context {
|
241 | 243 | string prompt = data.value("prompt", "");
|
242 | 244 | for (llama_client_slot & slot : slots)
|
243 | 245 | {
|
244 |
| - if ( |
245 |
| - slot_id == -1 && slot.available() || |
246 |
| - slot.id == slot_id) |
| 246 | + if ((slot_id == -1 && slot.available()) || slot.id == slot_id) |
247 | 247 | {
|
248 | 248 | slot.start(prompt, temperature);
|
249 | 249 | LOG_TEE("slot %i is processing\n", slot.id);
|
@@ -429,8 +429,6 @@ struct server_parallel_context {
|
429 | 429 | slot.generated_text += token_str;
|
430 | 430 | slot.sampled = id;
|
431 | 431 |
|
432 |
| - size_t pos = 0; |
433 |
| - |
434 | 432 | size_t stop_pos =
|
435 | 433 | findStoppingStrings(slot.generated_text, token_str.size(), STOP_FULL);
|
436 | 434 |
|
@@ -740,20 +738,34 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
740 | 738 | else if (arg == "--numa")
|
741 | 739 | {
|
742 | 740 | params.numa = true;
|
743 |
| - } else if (arg == "-cb" || arg == "--cont-batching") { |
| 741 | + } else if (arg == "-cb" || arg == "--cont-batching") |
| 742 | + { |
744 | 743 | params.cont_batching = true;
|
745 |
| - } else if (arg == "-np" || arg == "--parallel") { |
746 |
| - if (++i >= argc) { |
| 744 | + } |
| 745 | + else if (arg == "-np" || arg == "--parallel") |
| 746 | + { |
| 747 | + if (++i >= argc) |
| 748 | + { |
747 | 749 | invalid_param = true;
|
748 | 750 | break;
|
749 | 751 | }
|
750 | 752 | params.n_parallel = std::stoi(argv[i]);
|
751 |
| - } else if (arg == "-n" || arg == "--n-predict") { |
752 |
| - if (++i >= argc) { |
| 753 | + } else if (arg == "-n" || arg == "--n-predict") |
| 754 | + { |
| 755 | + if (++i >= argc) |
| 756 | + { |
753 | 757 | invalid_param = true;
|
754 | 758 | break;
|
755 | 759 | }
|
756 | 760 | params.n_predict = std::stoi(argv[i]);
|
| 761 | + } else if (arg == "-r" || arg == "--reverse-prompt") |
| 762 | + { |
| 763 | + if (++i >= argc) |
| 764 | + { |
| 765 | + invalid_param = true; |
| 766 | + break; |
| 767 | + } |
| 768 | + params.antiprompt.push_back(argv[i]); |
757 | 769 | }
|
758 | 770 | else
|
759 | 771 | {
|
|
0 commit comments