From ccfea358a889bb689b8c3a1f852732094bb17d0a Mon Sep 17 00:00:00 2001 From: Achyut Krishna Byanjankar Date: Sat, 18 Apr 2026 17:56:58 -0700 Subject: [PATCH] whisper : respect offset_ms in language auto-detection The auto-detect call in whisper_full_with_state passed a hard-coded offset of 0 to whisper_lang_auto_detect_with_state, so language detection always analyzed the first window of audio regardless of the caller's offset_ms. On audio like "1 minute of French then 30 minutes of German" with offset_ms=60000, transcription correctly started at the 1-minute mark but language detection still returned French from the prefix. Pass params.offset_ms through. Auto-detect now reads the same window that decoding will start from. Fixes #1831 --- src/whisper.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/whisper.cpp b/src/whisper.cpp index 2f356da0..d20469dd 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -6812,7 +6812,21 @@ int whisper_full_with_state( if (params.language == nullptr || strlen(params.language) == 0 || strcmp(params.language, "auto") == 0 || params.detect_language) { std::vector probs(whisper_lang_max_id() + 1, 0.0f); - const auto lang_id = whisper_lang_auto_detect_with_state(ctx, state, 0, params.n_threads, probs.data()); + // Respect the user's offset_ms when choosing the window for language + // detection. Previously this was hardcoded to 0, which meant that on + // audio like "1 minute of French then 30 minutes of German" with + // offset_ms=60000, transcription correctly started at the 1-minute + // mark but language detection still picked the French prefix. + // If the offset falls past the end of the audio (including short + // files where offset_ms exceeds total duration), fall back to 0 so + // auto-detect still returns a valid language. The too-short-audio + // guard later in this function will then handle the empty-decode + // case cleanly. + // ref: https://github.com/ggml-org/whisper.cpp/issues/1831 + const int detect_offset_ms = (params.offset_ms > 0 && (params.offset_ms / 10) < state->mel.n_len_org) + ? params.offset_ms + : 0; + const auto lang_id = whisper_lang_auto_detect_with_state(ctx, state, detect_offset_ms, params.n_threads, probs.data()); if (lang_id < 0) { WHISPER_LOG_ERROR("%s: failed to auto-detect language\n", __func__); return -3;