stream: remove duplicates on file

This commit is contained in:
Seunghyun Lee 2025-10-12 22:59:29 +09:00
parent a91dd3be72
commit 43160aec96
1 changed files with 71 additions and 3 deletions

View File

@ -11,6 +11,7 @@
#include <cstdio>
#include <fstream>
#include <string>
#include <cctype>
#include <thread>
#include <vector>
@ -37,6 +38,7 @@ struct whisper_params {
bool save_audio = false; // save audio to wav file
bool use_gpu = true;
bool flash_attn = true;
bool file_on_new_line = false;
std::string language = "en";
std::string model = "models/ggml-base.en.bin";
@ -45,6 +47,14 @@ struct whisper_params {
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
static std::string trim_copy(const std::string & s) {
size_t b = 0;
while (b < s.size() && std::isspace(static_cast<unsigned char>(s[b]))) b++;
size_t e = s.size();
while (e > b && std::isspace(static_cast<unsigned char>(s[e - 1]))) e--;
return s.substr(b, e - b);
}
static bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
for (int i = 1; i < argc; i++) {
std::string arg = argv[i];
@ -70,6 +80,7 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
else if ( arg == "--file-on-newline"){ params.file_on_new_line = true; }
else if (arg == "-tdrz" || arg == "--tinydiarize") { params.tinydiarize = true; }
else if (arg == "-sa" || arg == "--save-audio") { params.save_audio = true; }
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
@ -109,6 +120,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
fprintf(stderr, " --file-on-newline [%-7s] write to file only when newline is printed\n", params.file_on_new_line ? "true" : "false");
fprintf(stderr, " -tdrz, --tinydiarize [%-7s] enable tinydiarize (requires a tdrz model)\n", params.tinydiarize ? "true" : "false");
fprintf(stderr, " -sa, --save-audio [%-7s] save the recorded audio to a file\n", params.save_audio ? "true" : "false");
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU inference\n", params.use_gpu ? "false" : "true");
@ -211,6 +223,8 @@ int main(int argc, char ** argv) {
bool is_running = true;
std::ofstream fout;
std::string file_buffer;
std::string last_flushed_output;
if (params.fname_out.length() > 0) {
fout.open(params.fname_out);
if (!fout.is_open()) {
@ -345,6 +359,11 @@ int main(int argc, char ** argv) {
// print result;
{
const bool capture_iteration = params.fname_out.length() > 0 && params.file_on_new_line;
if (capture_iteration) {
file_buffer.clear();
}
if (!use_vad) {
printf("\33[2K\r");
@ -370,7 +389,11 @@ int main(int argc, char ** argv) {
fflush(stdout);
if (params.fname_out.length() > 0) {
fout << text;
if (capture_iteration) {
file_buffer += text;
} else {
fout << text;
}
}
} else {
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
@ -388,13 +411,21 @@ int main(int argc, char ** argv) {
fflush(stdout);
if (params.fname_out.length() > 0) {
fout << output;
if (capture_iteration) {
file_buffer += output;
} else {
fout << output;
}
}
}
}
if (params.fname_out.length() > 0) {
fout << std::endl;
if (capture_iteration) {
file_buffer.push_back('\n');
} else {
fout << std::endl;
}
}
if (use_vad) {
@ -405,6 +436,27 @@ int main(int argc, char ** argv) {
++n_iter;
if (params.file_on_new_line && params.fname_out.length() > 0) {
if (use_vad || (!use_vad && (n_iter % n_new_line) == 0)) {
if (!file_buffer.empty()) {
const std::string candidate = file_buffer;
// compute incremental suffix compared to last flushed snapshot
size_t lcp = 0;
const size_t maxcp = std::min(candidate.size(), last_flushed_output.size());
while (lcp < maxcp && candidate[lcp] == last_flushed_output[lcp]) {
++lcp;
}
const std::string to_append = candidate.substr(lcp);
if (!trim_copy(to_append).empty()) {
fout << to_append;
fout.flush();
}
last_flushed_output = candidate;
file_buffer.clear();
}
}
}
if (!use_vad && (n_iter % n_new_line) == 0) {
printf("\n");
@ -430,6 +482,22 @@ int main(int argc, char ** argv) {
audio.pause();
if (params.file_on_new_line && params.fname_out.length() > 0 && !file_buffer.empty()) {
const std::string candidate = file_buffer;
size_t lcp = 0;
const size_t maxcp = std::min(candidate.size(), last_flushed_output.size());
while (lcp < maxcp && candidate[lcp] == last_flushed_output[lcp]) {
++lcp;
}
const std::string to_append = candidate.substr(lcp);
if (!trim_copy(to_append).empty()) {
fout << to_append;
fout.flush();
}
last_flushed_output = candidate;
file_buffer.clear();
}
whisper_print_timings(ctx);
whisper_free(ctx);