Skip to content

Commit fa38bc2

Browse files
ggerganovjbrough
andauthored
whisper : allow whisper_full from mel spectrogram - no audio (ggml-org#1214)
Co-authored-by: jbrough <[email protected]>
1 parent af65ab5 commit fa38bc2

File tree

1 file changed

+14
-14
lines changed

1 file changed

+14
-14
lines changed

whisper.cpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3140,7 +3140,6 @@ int whisper_decode(struct whisper_context * ctx, const whisper_token * tokens, i
31403140
return false;
31413141
}
31423142

3143-
31443143
if (!whisper_decode_internal(*ctx, *ctx->state, ctx->state->decoders[selected_decoder_id], tokens, n_tokens, n_past, n_threads)) {
31453144
log("%s: failed to eval\n", __func__);
31463145
return 1;
@@ -3374,7 +3373,6 @@ float * whisper_get_logits(struct whisper_context * ctx) {
33743373
return ctx->state->logits.data();
33753374
}
33763375

3377-
33783376
float * whisper_get_logits_from_state(struct whisper_state * state) {
33793377
return state->logits.data();
33803378
}
@@ -4087,15 +4085,17 @@ int whisper_full_with_state(
40874085

40884086
result_all.clear();
40894087

4090-
// compute log mel spectrogram
4091-
if (params.speed_up) {
4092-
// TODO: Replace PV with more advanced algorithm
4093-
log("%s: failed to compute log mel spectrogram\n", __func__);
4094-
return -1;
4095-
} else {
4096-
if (whisper_pcm_to_mel_with_state(ctx, state, samples, n_samples, params.n_threads) != 0) {
4088+
if (n_samples > 0) {
4089+
// compute log mel spectrogram
4090+
if (params.speed_up) {
4091+
// TODO: Replace PV with more advanced algorithm
40974092
log("%s: failed to compute log mel spectrogram\n", __func__);
4098-
return -2;
4093+
return -1;
4094+
} else {
4095+
if (whisper_pcm_to_mel_with_state(ctx, state, samples, n_samples, params.n_threads) != 0) {
4096+
log("%s: failed to compute log mel spectrogram\n", __func__);
4097+
return -2;
4098+
}
40994099
}
41004100
}
41014101

@@ -4121,7 +4121,9 @@ int whisper_full_with_state(
41214121
state->t_beg = 0;
41224122
state->t_last = 0;
41234123
state->tid_last = 0;
4124-
state->energy = get_signal_energy(samples, n_samples, 32);
4124+
if (n_samples > 0) {
4125+
state->energy = get_signal_energy(samples, n_samples, 32);
4126+
}
41254127
}
41264128

41274129
const int seek_start = params.offset_ms/10;
@@ -4258,7 +4260,7 @@ int whisper_full_with_state(
42584260
while (true) {
42594261
if (params.progress_callback) {
42604262
const int progress_cur = (100*(seek - seek_start))/(seek_end - seek_start);
4261-
4263+
42624264
params.progress_callback(
42634265
ctx, ctx->state, progress_cur, params.progress_callback_user_data);
42644266
}
@@ -4813,7 +4815,6 @@ int whisper_full_with_state(
48134815
return 0;
48144816
}
48154817

4816-
48174818
int whisper_full(
48184819
struct whisper_context * ctx,
48194820
struct whisper_full_params params,
@@ -4890,7 +4891,6 @@ int whisper_full_parallel(
48904891
result.t0 += 100 * ((i + 1) * n_samples_per_processor) / WHISPER_SAMPLE_RATE + offset_t;
48914892
result.t1 += 100 * ((i + 1) * n_samples_per_processor) / WHISPER_SAMPLE_RATE + offset_t;
48924893

4893-
48944894
// make sure that segments are not overlapping
48954895
if (!ctx->state->result_all.empty()) {
48964896
result.t0 = std::max(result.t0, ctx->state->result_all.back().t1);

0 commit comments

Comments
 (0)