Skip to content

Commit 31e7903

Browse files
m18coppolaMichael Coppola
andauthored
server : add dynatemp_range and dynatemp_exponent (#5352)
* server: added `dynatemp_range` and `dynatemp_exponent` * Update README.md --------- Co-authored-by: Michael Coppola <[email protected]>
1 parent 4ffc7a1 commit 31e7903

File tree

2 files changed

+29
-21
lines changed

2 files changed

+29
-21
lines changed

examples/server/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,10 @@ node index.js
137137

138138
`temperature`: Adjust the randomness of the generated text (default: 0.8).
139139

140+
`dynatemp_range`: Dynamic temperature range (default: 0.0, 0.0 = disabled).
141+
142+
`dynatemp_exponent`: Dynamic temperature exponent (default: 1.0).
143+
140144
`top_k`: Limit the next token selection to the K most probable tokens (default: 40).
141145

142146
`top_p`: Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.95).

examples/server/server.cpp

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -524,27 +524,29 @@ struct llama_server_context
524524
slot->oaicompat_model = "";
525525
}
526526

527-
slot->params.stream = json_value(data, "stream", false);
528-
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
529-
slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
530-
slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
531-
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
532-
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
533-
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
534-
slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
535-
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
536-
slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n);
537-
slot->sparams.penalty_repeat = json_value(data, "repeat_penalty", default_sparams.penalty_repeat);
538-
slot->sparams.penalty_freq = json_value(data, "frequency_penalty", default_sparams.penalty_freq);
539-
slot->sparams.penalty_present = json_value(data, "presence_penalty", default_sparams.penalty_present);
540-
slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
541-
slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
542-
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
543-
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
544-
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
545-
slot->params.seed = json_value(data, "seed", default_params.seed);
546-
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
547-
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
527+
slot->params.stream = json_value(data, "stream", false);
528+
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
529+
slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
530+
slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
531+
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
532+
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
533+
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
534+
slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
535+
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
536+
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
537+
slot->sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
538+
slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n);
539+
slot->sparams.penalty_repeat = json_value(data, "repeat_penalty", default_sparams.penalty_repeat);
540+
slot->sparams.penalty_freq = json_value(data, "frequency_penalty", default_sparams.penalty_freq);
541+
slot->sparams.penalty_present = json_value(data, "presence_penalty", default_sparams.penalty_present);
542+
slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
543+
slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
544+
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
545+
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
546+
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
547+
slot->params.seed = json_value(data, "seed", default_params.seed);
548+
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
549+
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
548550

549551
// infill
550552
if (data.count("input_prefix") != 0)
@@ -1002,6 +1004,8 @@ struct llama_server_context
10021004
{"model", params.model_alias},
10031005
{"seed", slot.params.seed},
10041006
{"temperature", slot.sparams.temp},
1007+
{"dynatemp_range", slot.sparams.dynatemp_range},
1008+
{"dynatemp_exponent", slot.sparams.dynatemp_exponent},
10051009
{"top_k", slot.sparams.top_k},
10061010
{"top_p", slot.sparams.top_p},
10071011
{"min_p", slot.sparams.min_p},

0 commit comments

Comments
 (0)