Skip to content

Commit 62c851b

Browse files
committed
support tdrz via simple hack overriding solm tokens
1 parent 7f0dc9b commit 62c851b

File tree

1 file changed

+9
-6
lines changed

1 file changed

+9
-6
lines changed

whisper.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -382,14 +382,14 @@ struct whisper_vocab {
382382

383383
id token_eot = 50256;
384384
id token_sot = 50257;
385+
id token_solm = 50359; // ?? TODO@Akash - rename appropriately
385386
id token_prev = 50360;
386-
id token_solm = 50361; // ??
387387
id token_not = 50362; // no timestamps
388-
id token_beg = 50363;
388+
id token_beg = 50363; // begin timestamps
389389

390390
// available tasks
391-
static const id token_translate = 50358;
392-
static const id token_transcribe = 50359;
391+
static const id token_translate = 50358; // TODO@Akash - technically it's 50357 for .en models
392+
static const id token_transcribe = 50359; // TODO@Akash - technically it's 50358 for .en models
393393

394394
bool is_multilingual() const {
395395
return n_vocab == 51865;
@@ -3521,7 +3521,7 @@ static void whisper_process_logits(
35213521

35223522
// suppress sot and solm tokens
35233523
logits[vocab.token_sot] = -INFINITY;
3524-
logits[vocab.token_solm] = -INFINITY;
3524+
// logits[vocab.token_solm] = -INFINITY;
35253525

35263526
// suppress task tokens
35273527
logits[vocab.token_translate] = -INFINITY;
@@ -4500,7 +4500,6 @@ int whisper_full_with_state(
45004500
prompt_past.push_back(tokens_cur[i].id);
45014501
}
45024502

4503-
// store the text from this iteration
45044503
if (!tokens_cur.empty() && ctx->model.n_loaded > 0) {
45054504
int i0 = 0;
45064505
auto t0 = seek + 2*(tokens_cur.front().tid - whisper_token_beg(ctx));
@@ -4517,6 +4516,10 @@ int whisper_full_with_state(
45174516
text += whisper_token_to_str(ctx, tokens_cur[i].id);
45184517
}
45194518

4519+
if (tokens_cur[i].id == whisper_token_solm(ctx)){
4520+
text += " [SPEAKER TURN]";
4521+
};
4522+
45204523
if (tokens_cur[i].id > whisper_token_beg(ctx) && !params.single_segment) {
45214524
const auto t1 = seek + 2*(tokens_cur[i].tid - whisper_token_beg(ctx));
45224525

0 commit comments

Comments
 (0)