From a7a8169b99ee2d4859178b93a9d0bdfcaf99bd10 Mon Sep 17 00:00:00 2001 From: ciricc Date: Wed, 21 Jan 2026 03:44:02 +0300 Subject: [PATCH] feat: sync with upstream --- .gitignore | 1 + bindings/go/Makefile | 23 ++------ bindings/go/params.go | 6 ++ bindings/go/pkg/whisper/context_test.go | 57 ------------------- bindings/go/pkg/whisper/interface.go | 18 ++++++ bindings/go/pkg/whisper/params_wrap.go | 1 + bindings/go/pkg/whisper/stateful_context.go | 41 +++++++++++++ bindings/go/pkg/whisper/stateless_context.go | 41 +++++++++++++ bindings/go/pkg/whisper/util_test.go | 6 +- bindings/go/pkg/whisper/whisper_ctx_test.go | 2 +- bindings/go/pkg/whisper/whisper_state_test.go | 2 +- bindings/go/whisper_test.go | 4 +- 12 files changed, 120 insertions(+), 82 deletions(-) diff --git a/.gitignore b/.gitignore index 957eeb75..ffb3ccbc 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ *.d .cache/ .coreml/ +pkg/ .test/ .venv/ .vs/ diff --git a/bindings/go/Makefile b/bindings/go/Makefile index fb57d0fc..3bf158bf 100644 --- a/bindings/go/Makefile +++ b/bindings/go/Makefile @@ -35,7 +35,7 @@ whisper: mkdir -DBUILD_SHARED_LIBS=OFF cmake --build ../../${BUILD_DIR} --target whisper -test: model-small whisper modtidy +test: model-tiny whisper modtidy ifeq ($(UNAME_S),Darwin) @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v . @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v ./pkg/whisper/... @@ -46,18 +46,15 @@ endif examples: $(EXAMPLES_DIR) -benchmark: model-small whisper modtidy +benchmark: model-tiny whisper modtidy ifeq ($(UNAME_S),Darwin) - @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -bench=BenchmarkContextProcess -benchmem -run '^$$' ./pkg/whisper/... + @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -bench='BenchmarkContextProcessCPU$$' -benchtime=1x -benchmem -run '^$$' ./pkg/whisper/... else - @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -benchmem -run '^$$' ./pkg/whisper/... + @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -bench='BenchmarkContextProcessCPU$$' -benchtime=1x -benchmem -run '^$$' ./pkg/whisper/... endif -model-small: mkdir examples/go-model-download - @${BUILD_DIR}/go-model-download -out models ggml-small.en.bin - -model-small-tdrz: mkdir examples/go-model-download - @${BUILD_DIR}/go-model-download -out models ggml-small.en-tdrz.bin +model-tiny: mkdir examples/go-model-download + @${BUILD_DIR}/go-model-download -out models ggml-tiny.en.bin $(EXAMPLES_DIR): mkdir whisper modtidy @echo Build example $(notdir $@) @@ -67,14 +64,6 @@ else @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@ endif -.PHONY: samples -samples: - @echo "Downloading samples..." - @mkdir -p samples - @wget --quiet --show-progress -O samples/a13.mp3 https://upload.wikimedia.org/wikipedia/commons/transcoded/6/6f/Apollo13-wehaveaproblem.ogg/Apollo13-wehaveaproblem.ogg.mp3 - @ffmpeg -loglevel -0 -y -i samples/a13.mp3 -ar 16000 -ac 1 -c:a pcm_s16le -ss 00:00:00 -to 00:00:30 samples/a13.wav - @rm samples/a13.mp3 - mkdir: @echo Mkdir ${BUILD_DIR} @install -d ${BUILD_DIR} diff --git a/bindings/go/params.go b/bindings/go/params.go index 07801649..a51055c0 100644 --- a/bindings/go/params.go +++ b/bindings/go/params.go @@ -189,6 +189,12 @@ func (p *Params) SetInitialPrompt(prompt string) { p.initial_prompt = C.CString(prompt) } +// SetCarryInitialPrompt if true, always prepend initial_prompt to every decode window +// (may reduce conditioning on previous text) +func (p *Params) SetCarryInitialPrompt(v bool) { + p.carry_initial_prompt = toBool(v) +} + /////////////////////////////////////////////////////////////////////////////// // PRIVATE METHODS diff --git a/bindings/go/pkg/whisper/context_test.go b/bindings/go/pkg/whisper/context_test.go index f18238ba..2a4b8a49 100644 --- a/bindings/go/pkg/whisper/context_test.go +++ b/bindings/go/pkg/whisper/context_test.go @@ -324,63 +324,6 @@ func TestContext_VAD_And_Diarization_Params_DoNotPanic(t *testing.T) { assert.NoError(err) } -func TestDiarization_TwoSpeakers_Boundaries(t *testing.T) { - data := helperLoadSample(t, MultiSpeakerSamplePath) - - model, err := whisper.NewModelContext(ModelTinydiarizePath) - require.NoError(t, err) - defer func() { _ = model.Close() }() - - params, err := whisper.NewParameters(model, whisper.SAMPLING_GREEDY, func(p *whisper.Parameters) { - p.SetDiarize(true) - p.SetVAD(false) - p.SetSplitOnWord(true) - p.SetMaxSegmentLength(1) - p.SetMaxTokensPerSegment(64) - p.SetTokenTimestamps(true) - }) - require.NoError(t, err) - - // diarize ON with beam search and tighter segmentation - ctxOn, err := whisper.NewStatefulContext(model, params) - require.NoError(t, err) - defer func() { _ = ctxOn.Close() }() - - require.NoError(t, ctxOn.Process(data, nil, nil, nil)) - var turnsOn int - for { - seg, err := ctxOn.NextSegment() - if err == io.EOF { - break - } - require.NoError(t, err) - if seg.SpeakerTurnNext { - turnsOn++ - } - } - require.Greater(t, turnsOn, 0, "expected speaker turn boundaries with diarization enabled") - - // diarize OFF baseline with same segmentation and beam - ctxOff, err := whisper.NewStatefulContext(model, params) - require.NoError(t, err) - defer func() { _ = ctxOff.Close() }() - - require.NoError(t, ctxOff.Process(data, nil, nil, nil)) - var turnsOff int - for { - seg, err := ctxOff.NextSegment() - if err == io.EOF { - break - } - require.NoError(t, err) - if seg.SpeakerTurnNext { - turnsOff++ - } - } - - require.GreaterOrEqual(t, turnsOn, turnsOff, "diarization should not reduce turn boundaries") -} - func TestContext_SpeakerTurnNext_Field_Present(t *testing.T) { assert := assert.New(t) diff --git a/bindings/go/pkg/whisper/interface.go b/bindings/go/pkg/whisper/interface.go index eabdb2db..e17d7fee 100644 --- a/bindings/go/pkg/whisper/interface.go +++ b/bindings/go/pkg/whisper/interface.go @@ -111,6 +111,24 @@ type Context interface { // Get detected language DetectedLanguage() string + // Voice Activity Detection (VAD) methods + // Deprecated: Use Params().SetVAD() instead + SetVAD(bool) + // Deprecated: Use Params().SetVADModelPath() instead + SetVADModelPath(string) + // Deprecated: Use Params().SetVADThreshold() instead + SetVADThreshold(float32) + // Deprecated: Use Params().SetVADMinSpeechMs() instead + SetVADMinSpeechMs(int) + // Deprecated: Use Params().SetVADMinSilenceMs() instead + SetVADMinSilenceMs(int) + // Deprecated: Use Params().SetVADMaxSpeechSec() instead + SetVADMaxSpeechSec(float32) + // Deprecated: Use Params().SetVADSpeechPadMs() instead + SetVADSpeechPadMs(int) + // Deprecated: Use Params().SetVADSamplesOverlap() instead + SetVADSamplesOverlap(float32) + // Process mono audio data and return any errors. // If defined, newly generated segments are passed to the // callback function during processing. diff --git a/bindings/go/pkg/whisper/params_wrap.go b/bindings/go/pkg/whisper/params_wrap.go index 44b74562..a4b18572 100644 --- a/bindings/go/pkg/whisper/params_wrap.go +++ b/bindings/go/pkg/whisper/params_wrap.go @@ -64,6 +64,7 @@ func (w *Parameters) SetMaxContext(n int) { w.p.SetMaxContext(n) } func (w *Parameters) SetBeamSize(n int) { w.p.SetBeamSize(n) } func (w *Parameters) SetEntropyThold(t float32) { w.p.SetEntropyThold(t) } func (w *Parameters) SetInitialPrompt(prompt string) { w.p.SetInitialPrompt(prompt) } +func (w *Parameters) SetCarryInitialPrompt(v bool) { w.p.SetCarryInitialPrompt(v) } func (w *Parameters) SetTemperature(t float32) { w.p.SetTemperature(t) } func (w *Parameters) SetTemperatureFallback(t float32) { w.p.SetTemperatureFallback(t) } func (w *Parameters) SetNoContext(v bool) { w.p.SetNoContext(v) } diff --git a/bindings/go/pkg/whisper/stateful_context.go b/bindings/go/pkg/whisper/stateful_context.go index 08e04094..c8fe5ef9 100644 --- a/bindings/go/pkg/whisper/stateful_context.go +++ b/bindings/go/pkg/whisper/stateful_context.go @@ -392,6 +392,47 @@ func (context *StatefulContext) SetTranslate(v bool) { context.params.SetTranslate(v) } +// VAD methods - implement Context interface +// Deprecated: Use Params().SetVAD() instead +func (context *StatefulContext) SetVAD(v bool) { + context.params.SetVAD(v) +} + +// Deprecated: Use Params().SetVADModelPath() instead +func (context *StatefulContext) SetVADModelPath(path string) { + context.params.SetVADModelPath(path) +} + +// Deprecated: Use Params().SetVADThreshold() instead +func (context *StatefulContext) SetVADThreshold(t float32) { + context.params.SetVADThreshold(t) +} + +// Deprecated: Use Params().SetVADMinSpeechMs() instead +func (context *StatefulContext) SetVADMinSpeechMs(ms int) { + context.params.SetVADMinSpeechMs(ms) +} + +// Deprecated: Use Params().SetVADMinSilenceMs() instead +func (context *StatefulContext) SetVADMinSilenceMs(ms int) { + context.params.SetVADMinSilenceMs(ms) +} + +// Deprecated: Use Params().SetVADMaxSpeechSec() instead +func (context *StatefulContext) SetVADMaxSpeechSec(s float32) { + context.params.SetVADMaxSpeechSec(s) +} + +// Deprecated: Use Params().SetVADSpeechPadMs() instead +func (context *StatefulContext) SetVADSpeechPadMs(ms int) { + context.params.SetVADSpeechPadMs(ms) +} + +// Deprecated: Use Params().SetVADSamplesOverlap() instead +func (context *StatefulContext) SetVADSamplesOverlap(sec float32) { + context.params.SetVADSamplesOverlap(sec) +} + // Make stateful context compatible with the old deprecated interface for // the simple migration into multi-threaded processing. var _ Context = (*StatefulContext)(nil) diff --git a/bindings/go/pkg/whisper/stateless_context.go b/bindings/go/pkg/whisper/stateless_context.go index 7dbe8be2..5c0ee35c 100644 --- a/bindings/go/pkg/whisper/stateless_context.go +++ b/bindings/go/pkg/whisper/stateless_context.go @@ -374,4 +374,45 @@ func (context *StatelessContext) SetTranslate(v bool) { context.params.SetTranslate(v) } +// VAD methods - implement Context interface +// Deprecated: Use Params().SetVAD() instead +func (context *StatelessContext) SetVAD(v bool) { + context.params.SetVAD(v) +} + +// Deprecated: Use Params().SetVADModelPath() instead +func (context *StatelessContext) SetVADModelPath(path string) { + context.params.SetVADModelPath(path) +} + +// Deprecated: Use Params().SetVADThreshold() instead +func (context *StatelessContext) SetVADThreshold(t float32) { + context.params.SetVADThreshold(t) +} + +// Deprecated: Use Params().SetVADMinSpeechMs() instead +func (context *StatelessContext) SetVADMinSpeechMs(ms int) { + context.params.SetVADMinSpeechMs(ms) +} + +// Deprecated: Use Params().SetVADMinSilenceMs() instead +func (context *StatelessContext) SetVADMinSilenceMs(ms int) { + context.params.SetVADMinSilenceMs(ms) +} + +// Deprecated: Use Params().SetVADMaxSpeechSec() instead +func (context *StatelessContext) SetVADMaxSpeechSec(s float32) { + context.params.SetVADMaxSpeechSec(s) +} + +// Deprecated: Use Params().SetVADSpeechPadMs() instead +func (context *StatelessContext) SetVADSpeechPadMs(ms int) { + context.params.SetVADSpeechPadMs(ms) +} + +// Deprecated: Use Params().SetVADSamplesOverlap() instead +func (context *StatelessContext) SetVADSamplesOverlap(sec float32) { + context.params.SetVADSamplesOverlap(sec) +} + var _ Context = (*StatelessContext)(nil) diff --git a/bindings/go/pkg/whisper/util_test.go b/bindings/go/pkg/whisper/util_test.go index a2fadca5..fa6c24e5 100644 --- a/bindings/go/pkg/whisper/util_test.go +++ b/bindings/go/pkg/whisper/util_test.go @@ -6,10 +6,8 @@ import ( ) const ( - ModelPath = "../../models/ggml-small.en.bin" - ModelTinydiarizePath = "../../models/ggml-small.en-tdrz.bin" - SamplePath = "../../samples/jfk.wav" - MultiSpeakerSamplePath = "../../samples/a13.wav" + ModelPath = "../../models/ggml-tiny.en.bin" + SamplePath = "../../samples/jfk.wav" ) func TestMain(m *testing.M) { diff --git a/bindings/go/pkg/whisper/whisper_ctx_test.go b/bindings/go/pkg/whisper/whisper_ctx_test.go index d308f11d..5353a540 100644 --- a/bindings/go/pkg/whisper/whisper_ctx_test.go +++ b/bindings/go/pkg/whisper/whisper_ctx_test.go @@ -9,7 +9,7 @@ import ( "github.com/stretchr/testify/require" ) -const testModelPathCtx = "../../models/ggml-small.en.bin" +const testModelPathCtx = "../../models/ggml-tiny.en.bin" func TestWhisperCtx_NilWrapper(t *testing.T) { wctx := newCtxAccessor(nil) diff --git a/bindings/go/pkg/whisper/whisper_state_test.go b/bindings/go/pkg/whisper/whisper_state_test.go index 2c4c6dd3..b676d492 100644 --- a/bindings/go/pkg/whisper/whisper_state_test.go +++ b/bindings/go/pkg/whisper/whisper_state_test.go @@ -9,7 +9,7 @@ import ( "github.com/stretchr/testify/require" ) -const testModelPathState = "../../models/ggml-small.en.bin" +const testModelPathState = "../../models/ggml-tiny.en.bin" func TestWhisperState_NilWrapper(t *testing.T) { ws := newWhisperState(nil) diff --git a/bindings/go/whisper_test.go b/bindings/go/whisper_test.go index 330981fb..0fc0ef8e 100644 --- a/bindings/go/whisper_test.go +++ b/bindings/go/whisper_test.go @@ -15,12 +15,12 @@ import ( ) const ( - ModelPath = "models/ggml-small.en.bin" + ModelPath = "models/ggml-tiny.en.bin" SamplePath = "samples/jfk.wav" ) func TestMain(m *testing.M) { - whisper.DisableLogs() + // whisper.DisableLogs() // temporarily disabled to see error messages os.Exit(m.Run()) }