From 5ee29c5e5bafd9fdd47b8614438dcdab002ed748 Mon Sep 17 00:00:00 2001 From: corvo007 Date: Fri, 6 Feb 2026 21:46:28 +0800 Subject: [PATCH] fix: handle both UTF-8 and ANSI model paths on Windows Replace `std::codecvt_utf8` with `MultiByteToWideChar` for converting model file paths to wide strings on Windows. The previous code assumed `path_model` was always UTF-8 encoded, but when whisper-cli is invoked via `main(argc, argv)`, the MSVC C runtime converts the UTF-16 command line to the system ANSI code page (e.g. CP936 for Chinese Windows), not UTF-8. Passing these ANSI bytes to `codecvt_utf8::from_bytes()` causes `std::range_error`, which triggers STATUS_STACK_BUFFER_OVERRUN (0xC0000409) and crashes the process. The fix tries `MultiByteToWideChar(CP_UTF8)` first, and if the string is not valid UTF-8, falls back to `MultiByteToWideChar(CP_ACP)`. This correctly handles both: - UTF-8 paths (from manifest-enabled or Unicode-aware callers) - ANSI paths (from the default MSVC main() using the system code page) Also changes the guard from `_MSC_VER` to `_WIN32` to cover MinGW/Clang on Windows, and removes the deprecated `` header dependency. Fixes model loading crashes for users with non-ASCII characters in their model file paths (e.g. Chinese, Japanese, Korean, Hebrew, Arabic). --- src/whisper.cpp | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/src/whisper.cpp b/src/whisper.cpp index 796bccfb4..1f1adb6b7 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -34,8 +34,8 @@ #include #include -#ifdef _MSC_VER -#include +#ifdef _WIN32 +#include #endif #if defined(WHISPER_BIG_ENDIAN) @@ -3623,10 +3623,23 @@ struct whisper_context_params whisper_context_default_params() { struct whisper_context * whisper_init_from_file_with_params_no_state(const char * path_model, struct whisper_context_params params) { WHISPER_LOG_INFO("%s: loading model from '%s'\n", __func__, path_model); -#ifdef _MSC_VER - // Convert UTF-8 path to wide string (UTF-16) for Windows, resolving character encoding issues. - std::wstring_convert> converter; - std::wstring path_model_wide = converter.from_bytes(path_model); +#ifdef _WIN32 + // Convert path to wide string (UTF-16) for Windows. + // Try UTF-8 first; if invalid, fall back to the system ANSI code page (e.g. CP936). + // This handles both UTF-8 paths (from manifest-enabled or Unicode-aware callers) + // and ANSI paths (from the default MSVC main() which uses the system code page). + std::wstring path_model_wide; + int wlen = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, path_model, -1, nullptr, 0); + if (wlen > 0) { + path_model_wide.resize(wlen - 1); + MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, path_model, -1, &path_model_wide[0], wlen); + } else { + wlen = MultiByteToWideChar(CP_ACP, 0, path_model, -1, nullptr, 0); + if (wlen > 0) { + path_model_wide.resize(wlen - 1); + MultiByteToWideChar(CP_ACP, 0, path_model, -1, &path_model_wide[0], wlen); + } + } auto fin = std::ifstream(path_model_wide, std::ios::binary); #else auto fin = std::ifstream(path_model, std::ios::binary); @@ -4718,9 +4731,19 @@ struct whisper_vad_context * whisper_vad_init_from_file_with_params( const char * path_model, struct whisper_vad_context_params params) { WHISPER_LOG_INFO("%s: loading VAD model from '%s'\n", __func__, path_model); -#ifdef _MSC_VER - std::wstring_convert> converter; - std::wstring path_model_wide = converter.from_bytes(path_model); +#ifdef _WIN32 + std::wstring path_model_wide; + int wlen = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, path_model, -1, nullptr, 0); + if (wlen > 0) { + path_model_wide.resize(wlen - 1); + MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, path_model, -1, &path_model_wide[0], wlen); + } else { + wlen = MultiByteToWideChar(CP_ACP, 0, path_model, -1, nullptr, 0); + if (wlen > 0) { + path_model_wide.resize(wlen - 1); + MultiByteToWideChar(CP_ACP, 0, path_model, -1, &path_model_wide[0], wlen); + } + } auto fin = std::ifstream(path_model_wide, std::ios::binary); #else auto fin = std::ifstream(path_model, std::ios::binary);