Skip to content

Commit 37246b1

Browse files
K-Misteleggerganov
andauthored
common : revert showing control tokens by default for server (#6860)
* fix: revert showing control tokens by default * feat: revert changes to default behavior of llama_token_to_piece; provide overridden declaration to receive "bool special" param to toggle showing control tokens * feat: use the overridden declaration of llama_token_to_piece from common/common.cpp to specify "false" so that control tokens are not shown in chat completion responses" * common : simplify --------- Co-authored-by: Georgi Gerganov <[email protected]>
1 parent 28103f4 commit 37246b1

File tree

3 files changed

+7
-6
lines changed

3 files changed

+7
-6
lines changed

common/common.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2328,12 +2328,12 @@ std::vector<llama_token> llama_tokenize(
23282328
return result;
23292329
}
23302330

2331-
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
2331+
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
23322332
std::vector<char> result(8, 0);
2333-
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), true);
2333+
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
23342334
if (n_tokens < 0) {
23352335
result.resize(-n_tokens);
2336-
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), true);
2336+
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
23372337
GGML_ASSERT(check == -n_tokens);
23382338
} else {
23392339
result.resize(n_tokens);

common/common.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,11 +237,12 @@ std::vector<llama_token> llama_tokenize(
237237
bool add_special,
238238
bool parse_special = false);
239239

240-
// tokenizes a token into a piece
240+
// tokenizes a token into a piece, optionally renders special/control tokens
241241
// should work similar to Python's `tokenizer.id_to_piece`
242242
std::string llama_token_to_piece(
243243
const struct llama_context * ctx,
244-
llama_token token);
244+
llama_token token,
245+
bool special = true);
245246

246247
// TODO: these should be moved in llama.h C-style API under single `llama_detokenize` function
247248
// that takes into account the tokenizer type and decides how to handle the leading space

examples/server/server.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1117,7 +1117,7 @@ struct server_context {
11171117

11181118
bool process_token(completion_token_output & result, server_slot & slot) {
11191119
// remember which tokens were sampled - used for repetition penalties during sampling
1120-
const std::string token_str = llama_token_to_piece(ctx, result.tok);
1120+
const std::string token_str = llama_token_to_piece(ctx, result.tok, false);
11211121
slot.sampled = result.tok;
11221122

11231123
// search stop word and delete it

0 commit comments

Comments
 (0)