Skip to content

Commit b948361

Browse files
authored
examples : add tinydiarization support for streaming (ggml-org#1137)
1 parent a792c40 commit b948361

File tree

1 file changed

+17
-2
lines changed

1 file changed

+17
-2
lines changed

examples/stream/stream.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ struct whisper_params {
4747
bool print_special = false;
4848
bool no_context = true;
4949
bool no_timestamps = false;
50+
bool tinydiarize = false;
5051

5152
std::string language = "en";
5253
std::string model = "models/ggml-base.en.bin";
@@ -80,6 +81,8 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
8081
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
8182
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
8283
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
84+
else if (arg == "-tdrz" || arg == "--tinydiarize") { params.tinydiarize = true; }
85+
8386
else {
8487
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
8588
whisper_print_usage(argc, argv, params);
@@ -113,6 +116,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
113116
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
114117
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
115118
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
119+
fprintf(stderr, " -tdrz, --tinydiarize [%-7s] enable tinydiarize (requires a tdrz model)\n", params.tinydiarize ? "true" : "false");
116120
fprintf(stderr, "\n");
117121
}
118122

@@ -299,6 +303,8 @@ int main(int argc, char ** argv) {
299303
wparams.audio_ctx = params.audio_ctx;
300304
wparams.speed_up = params.speed_up;
301305

306+
wparams.tdrz_enable = params.tinydiarize; // [TDRZ]
307+
302308
// disable temperature fallback
303309
//wparams.temperature_inc = -1.0f;
304310
wparams.temperature_inc = params.no_fallback ? 0.0f : wparams.temperature_inc;
@@ -344,10 +350,19 @@ int main(int argc, char ** argv) {
344350
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
345351
const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
346352

347-
printf ("[%s --> %s] %s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), text);
353+
std::string output = "[" + to_timestamp(t0) + " --> " + to_timestamp(t1) + "] " + text;
354+
355+
if (whisper_full_get_segment_speaker_turn_next(ctx, i)) {
356+
output += " [SPEAKER_TURN]";
357+
}
358+
359+
output += "\n";
360+
361+
printf("%s", output.c_str());
362+
fflush(stdout);
348363

349364
if (params.fname_out.length() > 0) {
350-
fout << "[" << to_timestamp(t0) << " --> " << to_timestamp(t1) << "] " << text << std::endl;
365+
fout << output;
351366
}
352367
}
353368
}

0 commit comments

Comments
 (0)