@@ -87,6 +87,7 @@ struct whisper_params {
87
87
bool print_colors = false ;
88
88
bool print_progress = false ;
89
89
bool no_timestamps = false ;
90
+ bool log_score = false ;
90
91
91
92
std::string language = " en" ;
92
93
std::string prompt;
@@ -159,6 +160,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
159
160
else if (arg == " -m" || arg == " --model" ) { params.model = argv[++i]; }
160
161
else if (arg == " -f" || arg == " --file" ) { params.fname_inp .emplace_back (argv[++i]); }
161
162
else if (arg == " -oved" || arg == " --ov-e-device" ) { params.openvino_encode_device = argv[++i]; }
163
+ else if (arg == " -ls" || arg == " --log-score" ) { params.log_score = true ; }
162
164
else {
163
165
fprintf (stderr, " error: unknown argument: %s\n " , arg.c_str ());
164
166
whisper_print_usage (argc, argv, params);
@@ -212,6 +214,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
212
214
fprintf (stderr, " -m FNAME, --model FNAME [%-7s] model path\n " , params.model .c_str ());
213
215
fprintf (stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n " , " " );
214
216
fprintf (stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n " , params.openvino_encode_device .c_str ());
217
+ fprintf (stderr, " -ls, --log-score [%-7s] log best decoder scores of tokens\n " , params.log_score ?" true" :" false" );
215
218
fprintf (stderr, " \n " );
216
219
}
217
220
@@ -486,6 +489,25 @@ bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_
486
489
return true ;
487
490
}
488
491
492
+ bool output_score (struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
493
+ std::ofstream fout (fname);
494
+ fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
495
+
496
+ const int n_segments = whisper_full_n_segments (ctx);
497
+ // fprintf(stderr,"segments: %d\n",n_segments);
498
+ for (int i = 0 ; i < n_segments; ++i) {
499
+ const int n_tokens = whisper_full_n_tokens (ctx, i);
500
+ // fprintf(stderr,"tokens: %d\n",n_tokens);
501
+ for (int j = 0 ; j < n_tokens; j++) {
502
+ auto token = whisper_full_get_token_text (ctx, i, j);
503
+ auto probability = whisper_full_get_token_p (ctx, i, j);
504
+ fout << token << ' \t ' << probability << std::endl;
505
+ // fprintf(stderr,"token: %s %f\n",token,probability);
506
+ }
507
+ }
508
+ return true ;
509
+ }
510
+
489
511
bool output_json (struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
490
512
std::ofstream fout (fname);
491
513
int indent = 0 ;
@@ -982,6 +1004,12 @@ int main(int argc, char ** argv) {
982
1004
const auto fname_lrc = fname_out + " .lrc" ;
983
1005
output_lrc (ctx, fname_lrc.c_str (), params, pcmf32s);
984
1006
}
1007
+
1008
+ // output to score file
1009
+ if (params.log_score ) {
1010
+ const auto fname_score = fname_out + " .score.txt" ;
1011
+ output_score (ctx, fname_score.c_str (), params, pcmf32s);
1012
+ }
985
1013
}
986
1014
}
987
1015
0 commit comments