This commit is contained in:
Martin Destagnol 2026-04-20 08:04:56 +00:00 committed by GitHub
commit 1a94c31fcb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 38 additions and 43 deletions

View File

@ -6662,8 +6662,8 @@ static bool whisper_vad(
if (vad_segments->data.size() > 0) {
state->has_vad_segments = true;
ctx->state->vad_segments.clear();
ctx->state->vad_segments.reserve(vad_segments->data.size());
state->vad_segments.clear();
state->vad_segments.reserve(vad_segments->data.size());
// Initialize the time mapping table
state->vad_mapping_table.clear();
@ -6736,7 +6736,7 @@ static bool whisper_vad(
WHISPER_LOG_INFO("%s: vad_segment_info: orig_start: %.2f, orig_end: %.2f, vad_start: %.2f, vad_end: %.2f\n",
__func__, segment.orig_start/100.0, segment.orig_end/100.0, segment.vad_start/100.0, segment.vad_end/100.0);
ctx->state->vad_segments.push_back(segment);
state->vad_segments.push_back(segment);
// Copy this speech segment
memcpy(filtered_samples.data() + offset, samples + segment_start_samples, segment_length * sizeof(float));
@ -6790,19 +6790,41 @@ static bool whisper_vad(
}
int whisper_full_with_state(
struct whisper_context * ctx,
struct whisper_state * state,
struct whisper_full_params params,
const float * samples,
int n_samples) {
struct whisper_context *ctx,
struct whisper_state *state,
struct whisper_full_params params,
const float *samples,
int n_samples)
{
std::vector<float> vad_samples;
if (params.vad)
{
WHISPER_LOG_INFO("%s: VAD is enabled, processing speech segments only\n", __func__);
if (!whisper_vad(ctx, state, params, samples, n_samples, vad_samples))
{
WHISPER_LOG_ERROR("%s: failed to compute VAD\n", __func__);
return -1;
}
if (vad_samples.empty())
{
state->result_all.clear();
return 0;
}
samples = vad_samples.data();
n_samples = vad_samples.size();
}
// clear old results
auto & result_all = state->result_all;
auto &result_all = state->result_all;
result_all.clear();
if (n_samples > 0) {
if (n_samples > 0)
{
// compute log mel spectrogram
if (whisper_pcm_to_mel_with_state(ctx, state, samples, n_samples, params.n_threads) != 0) {
if (whisper_pcm_to_mel_with_state(ctx, state, samples, n_samples, params.n_threads) != 0)
{
WHISPER_LOG_ERROR("%s: failed to compute log mel spectrogram\n", __func__);
return -2;
}
@ -7741,25 +7763,11 @@ int whisper_full_with_state(
}
int whisper_full(
struct whisper_context * ctx,
struct whisper_full_params params,
const float * samples,
int n_samples) {
std::vector<float> vad_samples;
if (params.vad) {
WHISPER_LOG_INFO("%s: VAD is enabled, processing speech segments only\n", __func__);
if (!whisper_vad(ctx, ctx->state, params, samples, n_samples, vad_samples)) {
WHISPER_LOG_ERROR("%s: failed to compute VAD\n", __func__);
return -1;
}
if (vad_samples.empty()) {
ctx->state->result_all.clear();
return 0;
}
samples = vad_samples.data();
n_samples = vad_samples.size();
}
struct whisper_context *ctx,
struct whisper_full_params params,
const float *samples,
int n_samples)
{
return whisper_full_with_state(ctx, ctx->state, params, samples, n_samples);
}
@ -7774,19 +7782,6 @@ int whisper_full_parallel(
return whisper_full(ctx, params, samples, n_samples);
}
std::vector<float> vad_samples;
if (params.vad) {
WHISPER_LOG_INFO("%s: VAD is enabled, processing speech segments only\n", __func__);
if (!whisper_vad(ctx, ctx->state, params, samples, n_samples, vad_samples)) {
WHISPER_LOG_ERROR("%s: failed to compute VAD\n", __func__);
return -1;
}
if (vad_samples.empty()) {
return 0;
}
samples = vad_samples.data();
n_samples = vad_samples.size();
}
int ret = 0;
// prepare separate states for each thread