Skip to content

Commit 6559b53

Browse files
author
Oleg Sidorov
authored
server : backport .srt output format (#1565)
This commit adds a support of .srt format to Whisper server. The code is effectively backported from examples/main. The output mimetype is set to application/x-subrip as per https://en.wikipedia.org/wiki/SubRip. Example usage: curl 127.0.0.1:8080/inference \ -H "Content-Type: multipart/form-data" \ -F file="@<file-path>" \ -F temperature="0.2" \ -F response-format="srt"
1 parent 73d5005 commit 6559b53

File tree

1 file changed

+22
-0
lines changed

1 file changed

+22
-0
lines changed

examples/server/server.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <thread>
1212
#include <vector>
1313
#include <cstring>
14+
#include <sstream>
1415

1516
#if defined(_MSC_VER)
1617
#pragma warning(disable: 4244 4267) // possible loss of data
@@ -657,6 +658,27 @@ int main(int argc, char ** argv) {
657658
std::string results = output_str(ctx, params, pcmf32s);
658659
res.set_content(results.c_str(), "text/html");
659660
}
661+
else if (params.response_format == srt_format)
662+
{
663+
std::stringstream ss;
664+
const int n_segments = whisper_full_n_segments(ctx);
665+
for (int i = 0; i < n_segments; ++i) {
666+
const char * text = whisper_full_get_segment_text(ctx, i);
667+
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
668+
const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
669+
std::string speaker = "";
670+
671+
if (params.diarize && pcmf32s.size() == 2)
672+
{
673+
speaker = estimate_diarization_speaker(pcmf32s, t0, t1);
674+
}
675+
676+
ss << i + 1 + params.offset_n << "\n";
677+
ss << to_timestamp(t0, true) << " --> " << to_timestamp(t1, true) << "\n";
678+
ss << speaker << text << "\n\n";
679+
}
680+
res.set_content(ss.str(), "application/x-subrip");
681+
}
660682
// TODO add more output formats
661683
else
662684
{

0 commit comments

Comments
 (0)