Skip to content

Commit ec42af1

Browse files
committed
cont : drop "penalty prompt" support (#3727)
ggml-ci
1 parent e1ece90 commit ec42af1

File tree

6 files changed

+4
-61
lines changed

6 files changed

+4
-61
lines changed

common/sampling.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ static llama_token_data_array llama_sampling_prepare_impl(
354354
llama_token_data_array cur_p = { cur.data(), cur.size(), false };
355355

356356
// apply penalties
357-
const auto& penalty_tokens = params.use_penalty_prompt_tokens ? params.penalty_prompt_tokens : prev;
357+
const auto & penalty_tokens = prev;
358358
const int penalty_tokens_used_size = std::min((int)penalty_tokens.size(), penalty_last_n);
359359
if (penalty_tokens_used_size) {
360360
const float nl_logit = logits[llama_token_nl(llama_get_model(ctx_main))];

common/sampling.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,6 @@ typedef struct gpt_sampling_params {
5656
float cfg_scale = 1.f; // how strong is guidance
5757

5858
std::vector<llama_logit_bias> logit_bias; // logit biases to apply
59-
60-
std::vector<llama_token> penalty_prompt_tokens;
61-
bool use_penalty_prompt_tokens = false;
6259
} gpt_sampling_params;
6360

6461
// general sampler context

examples/server/README.md

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -424,8 +424,6 @@ node index.js
424424

425425
`frequency_penalty`: Repeat alpha frequency penalty. Default: `0.0`, which is disabled.
426426

427-
`penalty_prompt`: This will replace the `prompt` for the purpose of the penalty evaluation. Can be either `null`, a string or an array of numbers representing tokens. Default: `null`, which is to use the original `prompt`.
428-
429427
`mirostat`: Enable Mirostat sampling, controlling perplexity during text generation. Default: `0`, where `0` is disabled, `1` is Mirostat, and `2` is Mirostat 2.0.
430428

431429
`mirostat_tau`: Set the Mirostat target entropy, parameter tau. Default: `5.0`
@@ -672,7 +670,6 @@ Given a ChatML-formatted json description in `messages`, it returns the predicte
672670
"stopping_word": ""
673671
},
674672
"penalize_nl": true,
675-
"penalty_prompt_tokens": [],
676673
"presence_penalty": 0.0,
677674
"prompt": "Say hello to llama.cpp",
678675
"repeat_last_n": 64,
@@ -696,8 +693,7 @@ Given a ChatML-formatted json description in `messages`, it returns the predicte
696693
"tfs_z": 1.0,
697694
"top_k": 40,
698695
"top_p": 0.949999988079071,
699-
"typical_p": 1.0,
700-
"use_penalty_prompt_tokens": false
696+
"typical_p": 1.0
701697
}
702698
]
703699
```

examples/server/server.cpp

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -985,51 +985,6 @@ struct server_context {
985985
}
986986
}
987987

988-
// penalize user-provided tokens
989-
{
990-
slot.sparams.penalty_prompt_tokens.clear();
991-
slot.sparams.use_penalty_prompt_tokens = false;
992-
993-
const auto & penalty_prompt = data.find("penalty_prompt");
994-
995-
if (penalty_prompt != data.end()) {
996-
if (penalty_prompt->is_string()) {
997-
const auto penalty_prompt_string = penalty_prompt->get<std::string>();
998-
slot.sparams.penalty_prompt_tokens = llama_tokenize(model, penalty_prompt_string, false);
999-
1000-
if (slot.params.n_predict > 0) {
1001-
slot.sparams.penalty_prompt_tokens.reserve(slot.sparams.penalty_prompt_tokens.size() + slot.params.n_predict);
1002-
}
1003-
slot.sparams.use_penalty_prompt_tokens = true;
1004-
1005-
LOG_VERBOSE("penalty_prompt_tokens", {
1006-
{"id_slot", slot.id},
1007-
{"tokens", slot.sparams.penalty_prompt_tokens},
1008-
});
1009-
}
1010-
else if (penalty_prompt->is_array()) {
1011-
const auto n_tokens = penalty_prompt->size();
1012-
slot.sparams.penalty_prompt_tokens.reserve(n_tokens + std::max(0, slot.params.n_predict));
1013-
1014-
const int n_vocab = llama_n_vocab(model);
1015-
for (const auto & penalty_token : *penalty_prompt) {
1016-
if (penalty_token.is_number_integer()) {
1017-
const auto tok = penalty_token.get<llama_token>();
1018-
if (tok >= 0 && tok < n_vocab) {
1019-
slot.sparams.penalty_prompt_tokens.push_back(tok);
1020-
}
1021-
}
1022-
}
1023-
slot.sparams.use_penalty_prompt_tokens = true;
1024-
1025-
LOG_VERBOSE("penalty_prompt_tokens", {
1026-
{"id_slot", slot.id},
1027-
{"tokens", slot.sparams.penalty_prompt_tokens},
1028-
});
1029-
}
1030-
}
1031-
}
1032-
1033988
{
1034989
slot.sparams.logit_bias.clear();
1035990

@@ -1191,11 +1146,6 @@ struct server_context {
11911146
slot.generated_text += token_str;
11921147
slot.has_next_token = true;
11931148

1194-
if (slot.ctx_sampling->params.use_penalty_prompt_tokens && result.tok != -1) {
1195-
// we can change penalty_prompt_tokens because it is always created from scratch each request
1196-
slot.ctx_sampling->params.penalty_prompt_tokens.push_back(result.tok);
1197-
}
1198-
11991149
// check if there is incomplete UTF-8 character at the end
12001150
bool incomplete = false;
12011151
for (unsigned i = 1; i < 5 && i <= slot.generated_text.size(); ++i) {
@@ -1336,8 +1286,6 @@ struct server_context {
13361286
{"repeat_penalty", slot.sparams.penalty_repeat},
13371287
{"presence_penalty", slot.sparams.penalty_present},
13381288
{"frequency_penalty", slot.sparams.penalty_freq},
1339-
{"penalty_prompt_tokens", slot.sparams.penalty_prompt_tokens},
1340-
{"use_penalty_prompt_tokens", slot.sparams.use_penalty_prompt_tokens},
13411289
{"mirostat", slot.sparams.mirostat},
13421290
{"mirostat_tau", slot.sparams.mirostat_tau},
13431291
{"mirostat_eta", slot.sparams.mirostat_eta},

include/llama.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,7 @@ extern "C" {
386386
bool ignore_eos; // ignore the end-of-sequence token
387387

388388
const char * grammar;
389+
const char * grammar_root;
389390

390391
int32_t n_logit_bias;
391392
const llama_logit_bias * logit_bias;

src/llama.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16904,6 +16904,7 @@ struct llama_sampling_params llama_sampling_default_params() {
1690416904
/*.penalize_nl =*/ false,
1690516905
/*.ignore_eos =*/ false,
1690616906
/*.grammar =*/ nullptr,
16907+
/*.grammar_root =*/ nullptr,
1690716908
/*.n_logit_bias =*/ 0,
1690816909
/*.logit_bias =*/ nullptr,
1690916910
};

0 commit comments

Comments
 (0)