Skip to content

Commit a94897b

Browse files
committed
whisper : by default disable non-speech tokens suppression (#473)
This seems to be causing hallucinations in the end of the audio, e.g.: "Thank you for listening" "Amen" ..
1 parent 2407ae8 commit a94897b

File tree

1 file changed

+10
-16
lines changed

1 file changed

+10
-16
lines changed

whisper.cpp

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2936,7 +2936,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
29362936
/*.language =*/ "en",
29372937

29382938
/*.suppress_blank =*/ true,
2939-
/*.suppress_non_speech_tokens =*/true,
2939+
/*.suppress_non_speech_tokens =*/ false,
29402940

29412941
/*.temperature =*/ 0.0f,
29422942
/*.max_initial_ts =*/ 1.0f,
@@ -3078,8 +3078,7 @@ static int whisper_wrap_segment(struct whisper_context & ctx, int max_len, bool
30783078
return res;
30793079
}
30803080

3081-
static const std::vector<std::string> non_speech_tokens
3082-
{
3081+
static const std::vector<std::string> non_speech_tokens = {
30833082
"\"", "#", "(", ")", "*", "+", "/", ":", ";", "<", "=", ">", "@", "[", "\\", "]", "^",
30843083
"_", "`", "{", "|", "}", "~", "", "", "", "", "<<", ">>", "<<<", ">>>", "--",
30853084
"---", "-(", "-[", "('", "(\"", "((", "))", "(((", ")))", "[[", "]]", "{{", "}}", "♪♪",
@@ -3149,26 +3148,21 @@ static void whisper_process_logits(
31493148

31503149
// suppress non-speech tokens
31513150
// ref: https://github.com/openai/whisper/blob/7858aa9c08d98f75575035ecd6481f462d66ca27/whisper/tokenizer.py#L224-L253
3152-
if (params.suppress_non_speech_tokens)
3153-
{
3154-
for (const std::string &token : non_speech_tokens)
3155-
{
3156-
std::string suppress_tokens[] = {token, " " + token};
3157-
for (const std::string &suppress_token : suppress_tokens)
3158-
{
3159-
if (vocab.token_to_id.find(suppress_token) != vocab.token_to_id.end())
3160-
{
3151+
if (params.suppress_non_speech_tokens) {
3152+
for (const std::string & token : non_speech_tokens) {
3153+
const std::string suppress_tokens[] = {token, " " + token};
3154+
for (const std::string & suppress_token : suppress_tokens) {
3155+
if (vocab.token_to_id.find(suppress_token) != vocab.token_to_id.end()) {
31613156
logits[vocab.token_to_id.at(suppress_token)] = -INFINITY;
31623157
}
31633158
}
31643159
}
3160+
31653161
// allow hyphens "-" and single quotes "'" between words, but not at the beginning of a word
3166-
if (vocab.token_to_id.find(" -") != vocab.token_to_id.end())
3167-
{
3162+
if (vocab.token_to_id.find(" -") != vocab.token_to_id.end()) {
31683163
logits[vocab.token_to_id.at(" -")] = -INFINITY;
31693164
}
3170-
if (vocab.token_to_id.find(" '") != vocab.token_to_id.end())
3171-
{
3165+
if (vocab.token_to_id.find(" '") != vocab.token_to_id.end()) {
31723166
logits[vocab.token_to_id.at(" '")] = -INFINITY;
31733167
}
31743168
}

0 commit comments

Comments
 (0)