Merge 8a73e99137 into fc674574ca

2026-04-20 14:01:34 +00:00 · 2026-04-20 14:01:34 +00:00 · 95da95eb8a
parent fc674574ca 8a73e99137
commit 95da95eb8a
2 changed files with 35 additions and 26 deletions
--- a/examples/common-whisper.cpp
+++ b/examples/common-whisper.cpp
@ -35,8 +35,9 @@
 #include <fstream>

 #ifdef WHISPER_FFMPEG
-// as implemented in ffmpeg_trancode.cpp only embedded in common lib if whisper built with ffmpeg support
-extern bool ffmpeg_decode_audio(const std::string & ifname, std::vector<uint8_t> & wav_data);
+// as implemented in ffmpeg-transcode.cpp only embedded in common lib if whisper built with ffmpeg support
+extern bool ffmpeg_decode_audio(uint8_t * idata, size_t isize, std::vector<uint8_t> & wav_data);
+extern bool ffmpeg_decode_audio_file(const std::string & ifname, std::vector<uint8_t> & wav_data);
 #endif

 bool read_audio_data(const std::string & fname, std::vector<float>& pcmf32, std::vector<std::vector<float>>& pcmf32s, bool stereo) {
@ -62,7 +63,13 @@ bool read_audio_data(const std::string & fname, std::vector<float>& pcmf32, std:
 			}
 			audio_data.insert(audio_data.end(), buf, buf + n);
 		}
+#if defined(WHISPER_FFMPEG)
+		if (ffmpeg_decode_audio(audio_data.data(), audio_data.size(), audio_data) != 0) {
+			fprintf(stderr, "error: failed to ffmpeg decode '%s'\n", fname.c_str());

+			return false;
+		}
+#endif
 		if ((result = ma_decoder_init_memory(audio_data.data(), audio_data.size(), &decoder_config, &decoder)) != MA_SUCCESS) {

 			fprintf(stderr, "Error: failed to open audio data from stdin (%s)\n", ma_result_description(result));
@ -74,7 +81,7 @@ bool read_audio_data(const std::string & fname, std::vector<float>& pcmf32, std:
    }
    else if (((result = ma_decoder_init_file(fname.c_str(), &decoder_config, &decoder)) != MA_SUCCESS)) {
 #if defined(WHISPER_FFMPEG)
-		if (ffmpeg_decode_audio(fname, audio_data) != 0) {
+		if (ffmpeg_decode_audio_file(fname, audio_data) != 0) {
 			fprintf(stderr, "error: failed to ffmpeg decode '%s'\n", fname.c_str());

 			return false;
--- a/examples/ffmpeg-transcode.cpp
+++ b/examples/ffmpeg-transcode.cpp
@ -187,9 +187,13 @@ static bool is_audio_stream(const AVStream *stream)
 // audio_buffer: input memory
 // data: decoded output audio data (wav file)
 // size: size of output data
-static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size)
+int ffmpeg_decode_audio(u8 *idata, size_t isize, std::vector<uint8_t> &wav_data)
 {
-    LOG("decode_audio: input size: %d\n", audio_buf->size);
+	struct audio_buffer inaudio_buf;
+    inaudio_buf.ptr = idata;
+    inaudio_buf.size = isize;
+
+    LOG("ffmpeg_decode_audio: input size: %d\n", inaudio_buf.size);
 	AVFormatContext *fmt_ctx;
 	AVIOContext *avio_ctx;
 	AVStream *stream;
@ -207,7 +211,7 @@ static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size)
    fmt_ctx = avformat_alloc_context();
    avio_ctx_buffer = (u8*)av_malloc(AVIO_CTX_BUF_SZ);
    LOG("Creating an avio context: AVIO_CTX_BUF_SZ=%d\n", AVIO_CTX_BUF_SZ);
-    avio_ctx = avio_alloc_context(avio_ctx_buffer, AVIO_CTX_BUF_SZ, 0, audio_buf, &read_packet, NULL, NULL);
+    avio_ctx = avio_alloc_context(avio_ctx_buffer, AVIO_CTX_BUF_SZ, 0, &inaudio_buf, &read_packet, NULL, NULL);
 	fmt_ctx->pb = avio_ctx;

    // open the input stream and read header
@ -291,8 +295,8 @@ static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size)
 	}

 	/* iterate through frames */
-	*data = NULL;
-	*size = 0;
+	s16 *odata = NULL;
+	int osize = 0;
 	while (av_read_frame(fmt_ctx, packet) >= 0) {
 		avcodec_send_packet(codec, packet);

@ -300,10 +304,10 @@ static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size)
 		if (err == AVERROR(EAGAIN))
 			continue;

-		convert_frame(swr, codec, frame, data, size, false);
+		convert_frame(swr, codec, frame, &odata, &osize, false);
 	}
 	/* Flush any remaining conversion buffers... */
-	convert_frame(swr, codec, frame, data, size, true);
+	convert_frame(swr, codec, frame, &odata, &osize, true);

 	av_packet_free(&packet);
 	av_frame_free(&frame);
@ -318,6 +322,17 @@ static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size)
 		av_freep(&avio_ctx);
 	}

+    wave_hdr wh;
+    const size_t outdatasize = osize * sizeof(s16);
+    set_wave_hdr(wh, outdatasize);
+    wav_data.resize(sizeof(wave_hdr) + outdatasize);
+    // header:
+    memcpy(wav_data.data(), &wh, sizeof(wave_hdr));
+    // the data:
+    memcpy(wav_data.data() + sizeof(wave_hdr), odata, osize* sizeof(s16));
+
+	free(odata);
+
 	return 0;
 }

@ -325,8 +340,8 @@ static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size)
 // ifname: input file path
 // owav_data: in mem wav file. Can be forwarded as it to whisper/drwav
 // return 0 on success
-int ffmpeg_decode_audio(const std::string &ifname, std::vector<uint8_t>& owav_data) {
-    LOG("ffmpeg_decode_audio: %s\n", ifname.c_str());
+int ffmpeg_decode_audio_file(const std::string &ifname, std::vector<uint8_t>& owav_data) {
+    LOG("ffmpeg_decode_audio_file: %s\n", ifname.c_str());
    int ifd = open(ifname.c_str(), O_RDONLY);
    if (ifd == -1) {
        fprintf(stderr, "Couldn't open input file %s\n", ifname.c_str());
@ -340,14 +355,10 @@ int ffmpeg_decode_audio(const std::string &ifname, std::vector<uint8_t>& owav_da
        return err;
    }
    LOG("Mapped input file: %s size: %d\n", ibuf, (int) ibuf_size);
-    struct audio_buffer inaudio_buf;
-    inaudio_buf.ptr = ibuf;
-    inaudio_buf.size = ibuf_size;
-
    s16 *odata=NULL;
    int osize=0;

-    err = decode_audio(&inaudio_buf, &odata, &osize);
+    err = ffmpeg_decode_audio(ibuf, ibuf_size, owav_data);
    LOG("decode_audio returned %d \n", err);
    if (err != 0) {
        LOG("decode_audio failed\n");
@ -355,14 +366,5 @@ int ffmpeg_decode_audio(const std::string &ifname, std::vector<uint8_t>& owav_da
    }
    LOG("decode_audio output size: %d\n", osize);

-    wave_hdr wh;
-    const size_t outdatasize = osize * sizeof(s16);
-    set_wave_hdr(wh, outdatasize);
-    owav_data.resize(sizeof(wave_hdr) + outdatasize);
-    // header:
-    memcpy(owav_data.data(), &wh, sizeof(wave_hdr));
-    // the data:
-    memcpy(owav_data.data() + sizeof(wave_hdr), odata, osize* sizeof(s16));
-
    return 0;
 }