Skip to content

Commit 0cc6593

Browse files
author
jaime-m-p
committed
Remove previous space
1 parent 503b753 commit 0cc6593

File tree

3 files changed

+45
-54
lines changed

3 files changed

+45
-54
lines changed

common/common.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2908,10 +2908,10 @@ std::vector<llama_token> llama_tokenize(
29082908
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
29092909
std::string piece;
29102910
piece.resize(piece.capacity()); // using string internal cache, 15 bytes + '\n'
2911-
const int n_chars = llama_token_to_piece(llama_get_model(ctx), token, &piece[0], piece.size(), special);
2911+
const int n_chars = llama_token_to_piece(llama_get_model(ctx), token, &piece[0], piece.size(), 0, special);
29122912
if (n_chars < 0) {
29132913
piece.resize(-n_chars);
2914-
int check = llama_token_to_piece(llama_get_model(ctx), token, &piece[0], piece.size(), special);
2914+
int check = llama_token_to_piece(llama_get_model(ctx), token, &piece[0], piece.size(), 0, special);
29152915
GGML_ASSERT(check == -n_chars);
29162916
}
29172917
else {

llama.cpp

Lines changed: 41 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1835,18 +1835,19 @@ using llama_mlocks = std::vector<std::unique_ptr<llama_mlock>>;
18351835

18361836
// NOTE: avoid ever using this except for building the token_to_piece caches
18371837
static std::string llama_token_to_piece(const struct llama_model * model, llama_token token, bool special) {
1838-
std::vector<char> result(8, 0);
1839-
const int n_tokens = llama_token_to_piece(model, token, result.data(), result.size(), special);
1840-
if (n_tokens < 0) {
1841-
result.resize(-n_tokens);
1842-
int check = llama_token_to_piece(model, token, result.data(), result.size(), special);
1843-
GGML_ASSERT(check == -n_tokens);
1838+
std::string piece;
1839+
piece.resize(piece.capacity()); // using string internal cache
1840+
const int n_chars = llama_token_to_piece(model, token, &piece[0], piece.size(), 0, special);
1841+
if (n_chars < 0) {
1842+
piece.resize(-n_chars);
1843+
int check = llama_token_to_piece(model, token, &piece[0], piece.size(), 0, special);
1844+
GGML_ASSERT(check == -n_chars);
18441845
}
18451846
else {
1846-
result.resize(n_tokens);
1847+
piece.resize(n_chars);
18471848
}
18481849

1849-
return std::string(result.data(), result.size());
1850+
return piece;
18501851
}
18511852

18521853
static ggml_backend_buffer_type_t llama_default_buffer_type_cpu(bool host_buffer) {
@@ -18418,23 +18419,33 @@ static std::string llama_decode_text(const std::string & text) {
1841818419
}
1841918420

1842018421
// does not write null-terminator to buf
18421-
int32_t llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int32_t length, bool special) {
18422+
int32_t llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int32_t length, int32_t lstrip, bool special) {
1842218423
// ref: https://github.com/ggerganov/llama.cpp/pull/7587#discussion_r1620983843
1842318424
if (!special && llama_is_control_token(model->vocab, token)) {
1842418425
return 0;
1842518426
}
1842618427

18428+
// copy piece chars to output text buffer
18429+
// skip up to 'lstrip' leading spaces before copying
18430+
auto _try_copy = [=] (const char * token, size_t size) -> int32_t {
18431+
for (int32_t i = 0; i < lstrip && size && *token == ' '; ++i) {
18432+
token++;
18433+
size--;
18434+
}
18435+
if (length < (int32_t)size) {
18436+
return (int32_t) -size;
18437+
}
18438+
memcpy(buf, token, size);
18439+
return (int32_t) size;
18440+
};
18441+
1842718442
// if we have a cache - use it
1842818443
{
1842918444
const auto & cache = model->vocab.cache_token_to_piece;
1843018445

1843118446
if (!cache.empty()) {
18432-
const auto & res = cache.at(token);
18433-
if (length < (int) res.size()) {
18434-
return -(int) res.size();
18435-
}
18436-
memcpy(buf, res.c_str(), res.size());
18437-
return res.size();
18447+
const auto & result = cache.at(token);
18448+
return _try_copy(result.data(), result.size());
1843818449
}
1843918450
}
1844018451

@@ -18447,55 +18458,31 @@ int32_t llama_token_to_piece(const struct llama_model * model, llama_token token
1844718458
if (llama_is_normal_token(model->vocab, token)) {
1844818459
std::string result = model->vocab.id_to_token[token].text;
1844918460
llama_unescape_whitespace(result);
18450-
if (length < (int) result.length()) {
18451-
return -(int) result.length();
18452-
}
18453-
memcpy(buf, result.c_str(), result.length());
18454-
return result.length();
18461+
return _try_copy(result.data(), result.size());
1845518462
} else if (
1845618463
(llama_is_user_defined_token(model->vocab, token)) ||
1845718464
(llama_is_control_token (model->vocab, token) && special)) {
18458-
std::string result = model->vocab.id_to_token[token].text;
18459-
if (length < (int) result.length()) {
18460-
return -(int) result.length();
18461-
}
18462-
memcpy(buf, result.c_str(), result.length());
18463-
return result.length();
18464-
} else if (llama_is_unknown_token(model->vocab, token)) { // NOLINT
18465-
if (length < 3) {
18466-
return -3;
18467-
}
18468-
memcpy(buf, "\xe2\x96\x85", 3);
18469-
return 3;
18465+
const std::string & result = model->vocab.id_to_token[token].text;
18466+
return _try_copy(result.data(), result.size());
18467+
/**/ } else if (llama_is_unknown_token(model->vocab, token)) { // NOLINT
18468+
/**/ return _try_copy("\xe2\x96\x85", 3);
1847018469
} else if (llama_is_byte_token(model->vocab, token)) {
18471-
if (length < 1) {
18472-
return -1;
18473-
}
18474-
buf[0] = llama_token_to_byte(model->vocab, token);
18475-
return 1;
18470+
char byte = (char) llama_token_to_byte(model->vocab, token);
18471+
return _try_copy((char*)&byte, 1);
1847618472
}
1847718473
break;
1847818474
}
1847918475
case LLAMA_VOCAB_TYPE_BPE: {
1848018476
// NOTE: we accept all unsupported token types,
1848118477
// suppressing them like CONTROL tokens.
1848218478
if (llama_is_normal_token(model->vocab, token)) {
18483-
std::string result = model->vocab.id_to_token[token].text;
18484-
result = llama_decode_text(result);
18485-
if (length < (int) result.length()) {
18486-
return -(int) result.length();
18487-
}
18488-
memcpy(buf, result.c_str(), result.length());
18489-
return result.length();
18479+
std::string result = llama_decode_text(model->vocab.id_to_token[token].text);
18480+
return _try_copy(result.data(), result.size());
1849018481
} else if (
1849118482
(llama_is_user_defined_token(model->vocab, token)) ||
1849218483
(llama_is_control_token (model->vocab, token) && special)) {
18493-
std::string result = model->vocab.id_to_token[token].text;
18494-
if (length < (int) result.length()) {
18495-
return -(int) result.length();
18496-
}
18497-
memcpy(buf, result.c_str(), result.length());
18498-
return result.length();
18484+
const std::string & result = model->vocab.id_to_token[token].text;
18485+
return _try_copy(result.data(), result.size());
1849918486
}
1850018487
break;
1850118488
}
@@ -18513,12 +18500,15 @@ int32_t llama_detokenize(
1851318500
char * text,
1851418501
int32_t text_len_max,
1851518502
bool special) {
18503+
// remove the leading space of the first non-control token
18504+
bool remove_space = model->vocab.tokenizer_add_space_prefix;
1851618505
int32_t avail = text_len_max;
1851718506
int32_t total = 0;
1851818507

1851918508
for (int32_t i = 0; i < n_tokens; ++i) {
1852018509
GGML_ASSERT(avail >= 0);
18521-
int32_t n_chars = llama_token_to_piece(model, tokens[i], text, avail, special);
18510+
int32_t n_chars = llama_token_to_piece(model, tokens[i], text, avail, remove_space, special);
18511+
remove_space = remove_space && llama_is_control_token(model->vocab, tokens[i]); // until non-control token
1852218512
if (n_chars < 0) {
1852318513
avail = 0;
1852418514
total -= n_chars;

llama.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -888,13 +888,14 @@ extern "C" {
888888
// Token Id -> Piece.
889889
// Uses the vocabulary in the provided context.
890890
// Does not write null terminator to the buffer.
891-
// User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
891+
// User can skip up to 'lstrip' leading spaces before copying (useful when encoding/decoding multiple tokens with 'add_space_prefix')
892892
// @param special If true, special tokens are rendered in the output.
893893
LLAMA_API int32_t llama_token_to_piece(
894894
const struct llama_model * model,
895895
llama_token token,
896896
char * buf,
897897
int32_t length,
898+
int32_t lstrip,
898899
bool special);
899900

900901
/// @details Convert the provided tokens into text.

0 commit comments

Comments
 (0)