Skip to content

Commit 005bd75

Browse files
z80maniacjordankanter
authored andcommitted
server : add "samplers" param to control the samplers order (ggml-org#5494)
1 parent 8ef167a commit 005bd75

File tree

6 files changed

+74
-30
lines changed

6 files changed

+74
-30
lines changed

common/common.cpp

Lines changed: 38 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
341341
break;
342342
}
343343
const auto sampler_names = string_split(argv[i], ';');
344-
sparams.samplers_sequence = sampler_types_from_names(sampler_names);
344+
sparams.samplers_sequence = sampler_types_from_names(sampler_names, true);
345345
} else if (arg == "--sampling-seq") {
346346
if (++i >= argc) {
347347
invalid_param = true;
@@ -964,7 +964,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
964964
printf(" -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
965965
printf(" -c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
966966
printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
967-
printf(" --samplers samplers that will be used for generation in the order, separated by \';\' (default: %s)\n", sampler_type_names.c_str());
967+
printf(" --samplers samplers that will be used for generation in the order, separated by \';\'\n");
968+
printf(" (default: %s)\n", sampler_type_names.c_str());
968969
printf(" --sampling-seq simplified sequence for samplers that will be used (default: %s)\n", sampler_type_chars.c_str());
969970
printf(" --top-k N top-k sampling (default: %d, 0 = disabled)\n", sparams.top_k);
970971
printf(" --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)sparams.top_p);
@@ -1133,34 +1134,50 @@ std::vector<std::string> string_split(std::string input, char separator) {
11331134
return parts;
11341135
}
11351136

1136-
std::vector<llama_sampler_type> sampler_types_from_names(const std::vector<std::string> & names) {
1137+
std::vector<llama_sampler_type> sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names) {
1138+
std::unordered_map<std::string, llama_sampler_type> sampler_canonical_name_map {
1139+
{"top_k", llama_sampler_type::TOP_K},
1140+
{"top_p", llama_sampler_type::TOP_P},
1141+
{"typical_p", llama_sampler_type::TYPICAL_P},
1142+
{"min_p", llama_sampler_type::MIN_P},
1143+
{"tfs_z", llama_sampler_type::TFS_Z},
1144+
{"temperature", llama_sampler_type::TEMPERATURE}
1145+
};
1146+
11371147
// since samplers names are written multiple ways
11381148
// make it ready for both system names and input names
1139-
std::unordered_map<std::string, llama_sampler_type> sampler_name_map {
1140-
{"top_k", llama_sampler_type::TOP_K},
1149+
std::unordered_map<std::string, llama_sampler_type> sampler_alt_name_map {
11411150
{"top-k", llama_sampler_type::TOP_K},
1142-
{"top_p", llama_sampler_type::TOP_P},
11431151
{"top-p", llama_sampler_type::TOP_P},
11441152
{"nucleus", llama_sampler_type::TOP_P},
1145-
{"typical_p", llama_sampler_type::TYPICAL_P},
11461153
{"typical-p", llama_sampler_type::TYPICAL_P},
11471154
{"typical", llama_sampler_type::TYPICAL_P},
1148-
{"min_p", llama_sampler_type::MIN_P},
11491155
{"min-p", llama_sampler_type::MIN_P},
1150-
{"tfs_z", llama_sampler_type::TFS_Z},
11511156
{"tfs-z", llama_sampler_type::TFS_Z},
11521157
{"tfs", llama_sampler_type::TFS_Z},
1153-
{"temp", llama_sampler_type::TEMP},
1154-
{"temperature", llama_sampler_type::TEMP}
1158+
{"temp", llama_sampler_type::TEMPERATURE}
11551159
};
11561160

11571161
std::vector<llama_sampler_type> sampler_types;
11581162
sampler_types.reserve(names.size());
1159-
for (const auto& name : names) {
1160-
const auto sampler_item = sampler_name_map.find(name);
1161-
if (sampler_item != sampler_name_map.end()) {
1163+
for (const auto & name : names)
1164+
{
1165+
auto sampler_item = sampler_canonical_name_map.find(name);
1166+
if (sampler_item != sampler_canonical_name_map.end())
1167+
{
11621168
sampler_types.push_back(sampler_item->second);
11631169
}
1170+
else
1171+
{
1172+
if (allow_alt_names)
1173+
{
1174+
sampler_item = sampler_alt_name_map.find(name);
1175+
if (sampler_item != sampler_alt_name_map.end())
1176+
{
1177+
sampler_types.push_back(sampler_item->second);
1178+
}
1179+
}
1180+
}
11641181
}
11651182
return sampler_types;
11661183
}
@@ -1172,7 +1189,7 @@ std::vector<llama_sampler_type> sampler_types_from_chars(const std::string & nam
11721189
{'y', llama_sampler_type::TYPICAL_P},
11731190
{'m', llama_sampler_type::MIN_P},
11741191
{'f', llama_sampler_type::TFS_Z},
1175-
{'t', llama_sampler_type::TEMP}
1192+
{'t', llama_sampler_type::TEMPERATURE}
11761193
};
11771194

11781195
std::vector<llama_sampler_type> sampler_types;
@@ -1188,12 +1205,12 @@ std::vector<llama_sampler_type> sampler_types_from_chars(const std::string & nam
11881205

11891206
std::string sampler_type_to_name_string(llama_sampler_type sampler_type) {
11901207
switch (sampler_type) {
1191-
case llama_sampler_type::TOP_K: return "top_k";
1192-
case llama_sampler_type::TFS_Z: return "tfs_z";
1193-
case llama_sampler_type::TYPICAL_P: return "typical_p";
1194-
case llama_sampler_type::TOP_P: return "top_p";
1195-
case llama_sampler_type::MIN_P: return "min_p";
1196-
case llama_sampler_type::TEMP: return "temp";
1208+
case llama_sampler_type::TOP_K: return "top_k";
1209+
case llama_sampler_type::TFS_Z: return "tfs_z";
1210+
case llama_sampler_type::TYPICAL_P: return "typical_p";
1211+
case llama_sampler_type::TOP_P: return "top_p";
1212+
case llama_sampler_type::MIN_P: return "min_p";
1213+
case llama_sampler_type::TEMPERATURE: return "temperature";
11971214
default : return "";
11981215
}
11991216
}

common/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ void process_escapes(std::string& input);
165165
// String utils
166166
//
167167

168-
std::vector<llama_sampler_type> sampler_types_from_names(const std::vector<std::string> & names);
168+
std::vector<llama_sampler_type> sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
169169
std::vector<llama_sampler_type> sampler_types_from_chars(const std::string & names_string);
170170
std::vector<std::string> string_split(std::string input, char separator);
171171
std::string sampler_type_to_name_string(llama_sampler_type sampler_type);

common/sampling.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ static void sampler_queue(
139139
case llama_sampler_type::TYPICAL_P: llama_sample_typical (ctx_main, &cur_p, typical_p, min_keep); break;
140140
case llama_sampler_type::TOP_P : llama_sample_top_p (ctx_main, &cur_p, top_p, min_keep); break;
141141
case llama_sampler_type::MIN_P : llama_sample_min_p (ctx_main, &cur_p, min_p, min_keep); break;
142-
case llama_sampler_type::TEMP:
142+
case llama_sampler_type::TEMPERATURE:
143143
if (dynatemp_range > 0) {
144144
float dynatemp_min = std::max(0.0f, temp - dynatemp_range);
145145
float dynatemp_max = std::max(0.0f, temp + dynatemp_range);

common/sampling.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@
1010

1111
// sampler types
1212
enum class llama_sampler_type : char {
13-
TOP_K = 'k',
14-
TOP_P = 'p',
15-
MIN_P = 'm',
16-
TFS_Z = 'f',
17-
TYPICAL_P = 'y',
18-
TEMP = 't'
13+
TOP_K = 'k',
14+
TOP_P = 'p',
15+
MIN_P = 'm',
16+
TFS_Z = 'f',
17+
TYPICAL_P = 'y',
18+
TEMPERATURE = 't'
1919
};
2020

2121
// sampling parameters
@@ -45,7 +45,7 @@ typedef struct llama_sampling_params {
4545
llama_sampler_type::TYPICAL_P,
4646
llama_sampler_type::TOP_P,
4747
llama_sampler_type::MIN_P,
48-
llama_sampler_type::TEMP
48+
llama_sampler_type::TEMPERATURE
4949
};
5050

5151
std::string grammar; // optional BNF-like grammar to constrain sampling

examples/server/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ node index.js
204204

205205
`system_prompt`: Change the system prompt (initial prompt of all slots), this is useful for chat applications. [See more](#change-system-prompt-on-runtime)
206206

207+
`samplers`: The order the samplers should be applied in. An array of strings representing sampler type names. If a sampler is not set, it will not be used. If a sampler is specified more than once, it will be applied multiple times. (default: `["top_k", "tfs_z", "typical_p", "top_p", "min_p", "temperature"]` - these are all the available values)
208+
207209
### Result JSON
208210

209211
- Note: When using streaming mode (`stream`) only `content` and `stop` will be returned until end of completion.

examples/server/server.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,24 @@ struct llama_server_context
672672
}
673673
}
674674

675+
const auto &samplers_sequence = data.find("samplers");
676+
if (samplers_sequence != data.end() && samplers_sequence->is_array())
677+
{
678+
std::vector<std::string> sampler_names;
679+
for (const auto &sampler_name : *samplers_sequence)
680+
{
681+
if (sampler_name.is_string())
682+
{
683+
sampler_names.emplace_back(sampler_name);
684+
}
685+
}
686+
slot->sparams.samplers_sequence = sampler_types_from_names(sampler_names, false);
687+
}
688+
else
689+
{
690+
slot->sparams.samplers_sequence = default_sparams.samplers_sequence;
691+
}
692+
675693
if (multimodal)
676694
{
677695
const auto &images_data = data.find("image_data");
@@ -1026,6 +1044,12 @@ struct llama_server_context
10261044
const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
10271045
const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
10281046
eos_bias->second < 0.0f && std::isinf(eos_bias->second);
1047+
std::vector<std::string> samplers_sequence;
1048+
for (const auto &sampler_type : slot.sparams.samplers_sequence)
1049+
{
1050+
samplers_sequence.emplace_back(sampler_type_to_name_string(sampler_type));
1051+
}
1052+
10291053
return json {
10301054
{"n_ctx", slot.n_ctx},
10311055
{"model", params.model_alias},
@@ -1056,6 +1080,7 @@ struct llama_server_context
10561080
{"logit_bias", slot.sparams.logit_bias},
10571081
{"n_probs", slot.sparams.n_probs},
10581082
{"grammar", slot.sparams.grammar},
1083+
{"samplers", samplers_sequence}
10591084
};
10601085
}
10611086

0 commit comments

Comments
 (0)