whisper-server : expose --seg-len-hint as CLI flag and POST form field

The initial --seg-len-hint commit wired the flag into whisper-cli but not
whisper-server. Mirrors the existing best_of / beam_size pattern at
server.cpp:221-222 (CLI) and :505-511 (POST form field) and assigns the
value to wparams.seg_len_hint during inference setup.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Liz Fong-Jones 2026-04-10 14:53:49 -07:00
parent 24a436d350
commit 4f2b6ff9ea
1 changed files with 9 additions and 0 deletions

View File

@ -78,6 +78,7 @@ struct whisper_params {
int32_t progress_step = 5;
int32_t max_context = -1;
int32_t max_len = 0;
int32_t seg_len_hint = 0;
int32_t best_of = 2;
int32_t beam_size = -1;
int32_t audio_ctx = 0;
@ -146,6 +147,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " -d N, --duration N [%-7d] duration of audio to process in milliseconds\n", params.duration_ms);
fprintf(stderr, " -mc N, --max-context N [%-7d] maximum number of text context tokens to store\n", params.max_context);
fprintf(stderr, " -ml N, --max-len N [%-7d] maximum segment length in characters\n", params.max_len);
fprintf(stderr, " -slh N, --seg-len-hint N [%-7d] target segment length in ms\n", params.seg_len_hint);
fprintf(stderr, " -sow, --split-on-word [%-7s] split on word rather than on token\n", params.split_on_word ? "true" : "false");
fprintf(stderr, " -bo N, --best-of N [%-7d] number of best candidates to keep\n", params.best_of);
fprintf(stderr, " -bs N, --beam-size N [%-7d] beam size for beam search\n", params.beam_size);
@ -218,6 +220,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params, serve
else if (arg == "-d" || arg == "--duration") { params.duration_ms = std::stoi(argv[++i]); }
else if (arg == "-mc" || arg == "--max-context") { params.max_context = std::stoi(argv[++i]); }
else if (arg == "-ml" || arg == "--max-len") { params.max_len = std::stoi(argv[++i]); }
else if (arg == "-slh" || arg == "--seg-len-hint") { params.seg_len_hint = std::stoi(argv[++i]); }
else if (arg == "-bo" || arg == "--best-of") { params.best_of = std::stoi(argv[++i]); }
else if (arg == "-bs" || arg == "--beam-size") { params.beam_size = std::stoi(argv[++i]); }
else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
@ -502,6 +505,10 @@ void get_req_parameters(const Request & req, whisper_params & params)
{
params.max_len = std::stoi(req.get_file_value("max_len").content);
}
if (req.has_file("seg_len_hint"))
{
params.seg_len_hint = std::stoi(req.get_file_value("seg_len_hint").content);
}
if (req.has_file("best_of"))
{
params.best_of = std::stoi(req.get_file_value("best_of").content);
@ -932,6 +939,8 @@ int main(int argc, char ** argv) {
wparams.greedy.best_of = params.best_of;
wparams.beam_search.beam_size = params.beam_size;
wparams.seg_len_hint = params.seg_len_hint;
wparams.temperature = params.temperature;
wparams.no_speech_thold = params.no_speech_thold;
wparams.temperature_inc = params.temperature_inc;