Merge 5dd6ab05a0 into fc674574ca
This commit is contained in:
commit
7f9f3362d9
|
|
@ -37,6 +37,7 @@ struct whisper_params {
|
|||
bool save_audio = false; // save audio to wav file
|
||||
bool use_gpu = true;
|
||||
bool flash_attn = true;
|
||||
int file_mode = 0; // 0=raw, 1=newline
|
||||
|
||||
std::string language = "en";
|
||||
std::string model = "models/ggml-base.en.bin";
|
||||
|
|
@ -45,6 +46,23 @@ struct whisper_params {
|
|||
|
||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
||||
|
||||
// no helpers needed for file write modes when using raw/newline only
|
||||
|
||||
static int file_mode_from_string(const std::string & mode) {
|
||||
if (mode == "raw") return 0;
|
||||
if (mode == "newline") return 1;
|
||||
fprintf(stderr, "error: unknown --file-mode '%s' (expected: raw|newline)\n", mode.c_str());
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static const char * file_mode_to_cstr(int mode) {
|
||||
switch (mode) {
|
||||
case 0: return "raw";
|
||||
case 1: return "newline";
|
||||
default: return "raw";
|
||||
}
|
||||
}
|
||||
|
||||
static bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||
for (int i = 1; i < argc; i++) {
|
||||
std::string arg = argv[i];
|
||||
|
|
@ -70,6 +88,7 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
|
|||
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
||||
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
||||
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
|
||||
else if ( arg == "--file-mode") { params.file_mode = file_mode_from_string(argv[++i]); }
|
||||
else if (arg == "-tdrz" || arg == "--tinydiarize") { params.tinydiarize = true; }
|
||||
else if (arg == "-sa" || arg == "--save-audio") { params.save_audio = true; }
|
||||
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
||||
|
|
@ -109,6 +128,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|||
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
||||
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
||||
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
|
||||
fprintf(stderr, " --file-mode MODE [%-7s] file write mode: raw|newline (default raw)\n", file_mode_to_cstr(params.file_mode));
|
||||
fprintf(stderr, " -tdrz, --tinydiarize [%-7s] enable tinydiarize (requires a tdrz model)\n", params.tinydiarize ? "true" : "false");
|
||||
fprintf(stderr, " -sa, --save-audio [%-7s] save the recorded audio to a file\n", params.save_audio ? "true" : "false");
|
||||
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU inference\n", params.use_gpu ? "false" : "true");
|
||||
|
|
@ -211,6 +231,7 @@ int main(int argc, char ** argv) {
|
|||
bool is_running = true;
|
||||
|
||||
std::ofstream fout;
|
||||
std::string file_buffer;
|
||||
if (params.fname_out.length() > 0) {
|
||||
fout.open(params.fname_out);
|
||||
if (!fout.is_open()) {
|
||||
|
|
@ -231,6 +252,11 @@ int main(int argc, char ** argv) {
|
|||
wavWriter.open(filename, WHISPER_SAMPLE_RATE, 16, 1);
|
||||
}
|
||||
printf("[Start speaking]\n");
|
||||
if (params.fname_out.length() > 0) {
|
||||
// Mirror the start signal into the file for parity with stdout
|
||||
fout << "[Start speaking]\n";
|
||||
fout.flush();
|
||||
}
|
||||
fflush(stdout);
|
||||
|
||||
auto t_last = std::chrono::high_resolution_clock::now();
|
||||
|
|
@ -345,6 +371,10 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// print result;
|
||||
{
|
||||
if (params.fname_out.length() > 0 && (params.file_mode != 0)) {
|
||||
file_buffer.clear();
|
||||
}
|
||||
|
||||
if (!use_vad) {
|
||||
printf("\33[2K\r");
|
||||
|
||||
|
|
@ -370,7 +400,11 @@ int main(int argc, char ** argv) {
|
|||
fflush(stdout);
|
||||
|
||||
if (params.fname_out.length() > 0) {
|
||||
fout << text;
|
||||
if (params.file_mode != 0) {
|
||||
file_buffer += text;
|
||||
} else {
|
||||
fout << text;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
|
||||
|
|
@ -388,13 +422,21 @@ int main(int argc, char ** argv) {
|
|||
fflush(stdout);
|
||||
|
||||
if (params.fname_out.length() > 0) {
|
||||
fout << output;
|
||||
if (params.file_mode != 0) {
|
||||
file_buffer += output;
|
||||
} else {
|
||||
fout << output;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (params.fname_out.length() > 0) {
|
||||
fout << std::endl;
|
||||
if (params.file_mode != 0) {
|
||||
file_buffer.push_back('\n');
|
||||
} else {
|
||||
fout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if (use_vad) {
|
||||
|
|
@ -405,6 +447,16 @@ int main(int argc, char ** argv) {
|
|||
|
||||
++n_iter;
|
||||
|
||||
if ((params.file_mode != 0) && params.fname_out.length() > 0) {
|
||||
if (use_vad || (!use_vad && (n_iter % n_new_line) == 0)) {
|
||||
if (!file_buffer.empty()) {
|
||||
fout << file_buffer;
|
||||
fout.flush();
|
||||
file_buffer.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!use_vad && (n_iter % n_new_line) == 0) {
|
||||
printf("\n");
|
||||
|
||||
|
|
@ -430,6 +482,12 @@ int main(int argc, char ** argv) {
|
|||
|
||||
audio.pause();
|
||||
|
||||
if ((params.file_mode != 0) && params.fname_out.length() > 0 && !file_buffer.empty()) {
|
||||
fout << file_buffer;
|
||||
fout.flush();
|
||||
file_buffer.clear();
|
||||
}
|
||||
|
||||
whisper_print_timings(ctx);
|
||||
whisper_free(ctx);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue