Skip to content

Commit c8e1ed6

Browse files
committed
fix incorrect translate/transcribe token_ids that are not static const
1 parent 62c851b commit c8e1ed6

File tree

4 files changed

+26
-23
lines changed

4 files changed

+26
-23
lines changed

bindings/go/whisper.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,12 +271,12 @@ func (ctx *Context) Whisper_token_lang(lang_id int) Token {
271271

272272
// Task tokens
273273
func Whisper_token_translate() Token {
274-
return Token(C.whisper_token_translate())
274+
return Token(C.whisper_token_translate((*C.struct_whisper_context)(ctx)))
275275
}
276276

277277
// Task tokens
278278
func Whisper_token_transcribe() Token {
279-
return Token(C.whisper_token_transcribe())
279+
return Token(C.whisper_token_transcribe((*C.struct_whisper_context)(ctx)))
280280
}
281281

282282
// Performance information

bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,8 @@ public interface WhisperCppJnaLibrary extends Library {
224224
int whisper_token_lang(Pointer ctx, int lang_id);
225225

226226
// Task tokens
227-
int whisper_token_translate();
228-
int whisper_token_transcribe();
227+
int whisper_token_translate (Pointer ctx);
228+
int whisper_token_transcribe(Pointer ctx);
229229

230230
// Performance information from the default state.
231231
void whisper_print_timings(Pointer ctx);

whisper.cpp

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -380,16 +380,17 @@ struct whisper_vocab {
380380
std::map<token, id> token_to_id;
381381
std::map<id, token> id_to_token;
382382

383-
id token_eot = 50256;
384-
id token_sot = 50257;
385-
id token_solm = 50359; // ?? TODO@Akash - rename appropriately
386-
id token_prev = 50360;
387-
id token_not = 50362; // no timestamps
388-
id token_beg = 50363; // begin timestamps
389-
390-
// available tasks
391-
static const id token_translate = 50358; // TODO@Akash - technically it's 50357 for .en models
392-
static const id token_transcribe = 50359; // TODO@Akash - technically it's 50358 for .en models
383+
// reference: https://github.com/openai/whisper/blob/248b6cb124225dd263bb9bd32d060b6517e067f8/whisper/tokenizer.py#L334-L349
384+
id token_eot = 50256;
385+
id token_sot = 50257;
386+
// task tokens (used only for multilingual models)
387+
id token_translate = 50357;
388+
id token_transcribe = 50358;
389+
// other special tokens
390+
id token_solm = 50359; // ?? TODO@Akash - rename appropriately
391+
id token_prev = 50360;
392+
id token_not = 50362; // no timestamps
393+
id token_beg = 50363; // begin timestamps
393394

394395
bool is_multilingual() const {
395396
return n_vocab == 51865;
@@ -966,8 +967,10 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
966967
if (vocab.is_multilingual()) {
967968
vocab.token_eot++;
968969
vocab.token_sot++;
969-
vocab.token_prev++;
970+
vocab.token_translate++;
971+
vocab.token_transcribe++;
970972
vocab.token_solm++;
973+
vocab.token_prev++;
971974
vocab.token_not++;
972975
vocab.token_beg++;
973976
}
@@ -3228,12 +3231,12 @@ whisper_token whisper_token_lang(struct whisper_context * ctx, int lang_id) {
32283231
return whisper_token_sot(ctx) + 1 + lang_id;
32293232
}
32303233

3231-
whisper_token whisper_token_translate(void) {
3232-
return whisper_vocab::token_translate;
3234+
whisper_token whisper_token_translate(struct whisper_context * ctx) {
3235+
return ctx->vocab.token_translate;
32333236
}
32343237

3235-
whisper_token whisper_token_transcribe(void) {
3236-
return whisper_vocab::token_transcribe;
3238+
whisper_token whisper_token_transcribe(struct whisper_context * ctx) {
3239+
return ctx->vocab.token_transcribe;
32373240
}
32383241

32393242
void whisper_print_timings(struct whisper_context * ctx) {
@@ -4018,9 +4021,9 @@ int whisper_full_with_state(
40184021
state->lang_id = lang_id;
40194022
prompt_init.push_back(whisper_token_lang(ctx, lang_id));
40204023
if (params.translate) {
4021-
prompt_init.push_back(whisper_token_translate());
4024+
prompt_init.push_back(whisper_token_translate(ctx));
40224025
} else {
4023-
prompt_init.push_back(whisper_token_transcribe());
4026+
prompt_init.push_back(whisper_token_transcribe(ctx));
40244027
}
40254028
}
40264029

whisper.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -284,8 +284,8 @@ extern "C" {
284284
WHISPER_API whisper_token whisper_token_lang(struct whisper_context * ctx, int lang_id);
285285

286286
// Task tokens
287-
WHISPER_API whisper_token whisper_token_translate (void);
288-
WHISPER_API whisper_token whisper_token_transcribe(void);
287+
WHISPER_API whisper_token whisper_token_translate (struct whisper_context * ctx);
288+
WHISPER_API whisper_token whisper_token_transcribe(struct whisper_context * ctx);
289289

290290
// Performance information from the default state.
291291
WHISPER_API void whisper_print_timings(struct whisper_context * ctx);

0 commit comments

Comments
 (0)