From 8a73e99137b9fc0ba1242aa23581b0bc6994f9d2 Mon Sep 17 00:00:00 2001 From: Marco Manino Date: Mon, 2 Feb 2026 15:44:59 +0100 Subject: [PATCH] feat(cli): allow ffmpeg decoding from stdin --- examples/common-whisper.cpp | 13 +++++++--- examples/ffmpeg-transcode.cpp | 48 ++++++++++++++++++----------------- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/examples/common-whisper.cpp b/examples/common-whisper.cpp index 6218a882..bfa19d49 100644 --- a/examples/common-whisper.cpp +++ b/examples/common-whisper.cpp @@ -35,8 +35,9 @@ #include #ifdef WHISPER_FFMPEG -// as implemented in ffmpeg_trancode.cpp only embedded in common lib if whisper built with ffmpeg support -extern bool ffmpeg_decode_audio(const std::string & ifname, std::vector & wav_data); +// as implemented in ffmpeg-transcode.cpp only embedded in common lib if whisper built with ffmpeg support +extern bool ffmpeg_decode_audio(uint8_t * idata, size_t isize, std::vector & wav_data); +extern bool ffmpeg_decode_audio_file(const std::string & ifname, std::vector & wav_data); #endif bool read_audio_data(const std::string & fname, std::vector& pcmf32, std::vector>& pcmf32s, bool stereo) { @@ -62,7 +63,13 @@ bool read_audio_data(const std::string & fname, std::vector& pcmf32, std: } audio_data.insert(audio_data.end(), buf, buf + n); } +#if defined(WHISPER_FFMPEG) + if (ffmpeg_decode_audio(audio_data.data(), audio_data.size(), audio_data) != 0) { + fprintf(stderr, "error: failed to ffmpeg decode '%s'\n", fname.c_str()); + return false; + } +#endif if ((result = ma_decoder_init_memory(audio_data.data(), audio_data.size(), &decoder_config, &decoder)) != MA_SUCCESS) { fprintf(stderr, "Error: failed to open audio data from stdin (%s)\n", ma_result_description(result)); @@ -74,7 +81,7 @@ bool read_audio_data(const std::string & fname, std::vector& pcmf32, std: } else if (((result = ma_decoder_init_file(fname.c_str(), &decoder_config, &decoder)) != MA_SUCCESS)) { #if defined(WHISPER_FFMPEG) - if (ffmpeg_decode_audio(fname, audio_data) != 0) { + if (ffmpeg_decode_audio_file(fname, audio_data) != 0) { fprintf(stderr, "error: failed to ffmpeg decode '%s'\n", fname.c_str()); return false; diff --git a/examples/ffmpeg-transcode.cpp b/examples/ffmpeg-transcode.cpp index 1fae58a4..1696de7f 100644 --- a/examples/ffmpeg-transcode.cpp +++ b/examples/ffmpeg-transcode.cpp @@ -187,9 +187,13 @@ static bool is_audio_stream(const AVStream *stream) // audio_buffer: input memory // data: decoded output audio data (wav file) // size: size of output data -static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size) +int ffmpeg_decode_audio(u8 *idata, size_t isize, std::vector &wav_data) { - LOG("decode_audio: input size: %d\n", audio_buf->size); + struct audio_buffer inaudio_buf; + inaudio_buf.ptr = idata; + inaudio_buf.size = isize; + + LOG("ffmpeg_decode_audio: input size: %d\n", inaudio_buf.size); AVFormatContext *fmt_ctx; AVIOContext *avio_ctx; AVStream *stream; @@ -207,7 +211,7 @@ static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size) fmt_ctx = avformat_alloc_context(); avio_ctx_buffer = (u8*)av_malloc(AVIO_CTX_BUF_SZ); LOG("Creating an avio context: AVIO_CTX_BUF_SZ=%d\n", AVIO_CTX_BUF_SZ); - avio_ctx = avio_alloc_context(avio_ctx_buffer, AVIO_CTX_BUF_SZ, 0, audio_buf, &read_packet, NULL, NULL); + avio_ctx = avio_alloc_context(avio_ctx_buffer, AVIO_CTX_BUF_SZ, 0, &inaudio_buf, &read_packet, NULL, NULL); fmt_ctx->pb = avio_ctx; // open the input stream and read header @@ -291,8 +295,8 @@ static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size) } /* iterate through frames */ - *data = NULL; - *size = 0; + s16 *odata = NULL; + int osize = 0; while (av_read_frame(fmt_ctx, packet) >= 0) { avcodec_send_packet(codec, packet); @@ -300,10 +304,10 @@ static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size) if (err == AVERROR(EAGAIN)) continue; - convert_frame(swr, codec, frame, data, size, false); + convert_frame(swr, codec, frame, &odata, &osize, false); } /* Flush any remaining conversion buffers... */ - convert_frame(swr, codec, frame, data, size, true); + convert_frame(swr, codec, frame, &odata, &osize, true); av_packet_free(&packet); av_frame_free(&frame); @@ -318,6 +322,17 @@ static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size) av_freep(&avio_ctx); } + wave_hdr wh; + const size_t outdatasize = osize * sizeof(s16); + set_wave_hdr(wh, outdatasize); + wav_data.resize(sizeof(wave_hdr) + outdatasize); + // header: + memcpy(wav_data.data(), &wh, sizeof(wave_hdr)); + // the data: + memcpy(wav_data.data() + sizeof(wave_hdr), odata, osize* sizeof(s16)); + + free(odata); + return 0; } @@ -325,8 +340,8 @@ static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size) // ifname: input file path // owav_data: in mem wav file. Can be forwarded as it to whisper/drwav // return 0 on success -int ffmpeg_decode_audio(const std::string &ifname, std::vector& owav_data) { - LOG("ffmpeg_decode_audio: %s\n", ifname.c_str()); +int ffmpeg_decode_audio_file(const std::string &ifname, std::vector& owav_data) { + LOG("ffmpeg_decode_audio_file: %s\n", ifname.c_str()); int ifd = open(ifname.c_str(), O_RDONLY); if (ifd == -1) { fprintf(stderr, "Couldn't open input file %s\n", ifname.c_str()); @@ -340,14 +355,10 @@ int ffmpeg_decode_audio(const std::string &ifname, std::vector& owav_da return err; } LOG("Mapped input file: %s size: %d\n", ibuf, (int) ibuf_size); - struct audio_buffer inaudio_buf; - inaudio_buf.ptr = ibuf; - inaudio_buf.size = ibuf_size; - s16 *odata=NULL; int osize=0; - err = decode_audio(&inaudio_buf, &odata, &osize); + err = ffmpeg_decode_audio(ibuf, ibuf_size, owav_data); LOG("decode_audio returned %d \n", err); if (err != 0) { LOG("decode_audio failed\n"); @@ -355,14 +366,5 @@ int ffmpeg_decode_audio(const std::string &ifname, std::vector& owav_da } LOG("decode_audio output size: %d\n", osize); - wave_hdr wh; - const size_t outdatasize = osize * sizeof(s16); - set_wave_hdr(wh, outdatasize); - owav_data.resize(sizeof(wave_hdr) + outdatasize); - // header: - memcpy(owav_data.data(), &wh, sizeof(wave_hdr)); - // the data: - memcpy(owav_data.data() + sizeof(wave_hdr), odata, osize* sizeof(s16)); - return 0; }