feat: sync with upstream

This commit is contained in:
ciricc 2026-01-21 03:44:02 +03:00
parent 3d5d94412c
commit a7a8169b99
12 changed files with 120 additions and 82 deletions

1
.gitignore vendored
View File

@ -3,6 +3,7 @@
*.d
.cache/
.coreml/
pkg/
.test/
.venv/
.vs/

View File

@ -35,7 +35,7 @@ whisper: mkdir
-DBUILD_SHARED_LIBS=OFF
cmake --build ../../${BUILD_DIR} --target whisper
test: model-small whisper modtidy
test: model-tiny whisper modtidy
ifeq ($(UNAME_S),Darwin)
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v .
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v ./pkg/whisper/...
@ -46,18 +46,15 @@ endif
examples: $(EXAMPLES_DIR)
benchmark: model-small whisper modtidy
benchmark: model-tiny whisper modtidy
ifeq ($(UNAME_S),Darwin)
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -bench=BenchmarkContextProcess -benchmem -run '^$$' ./pkg/whisper/...
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -bench='BenchmarkContextProcessCPU$$' -benchtime=1x -benchmem -run '^$$' ./pkg/whisper/...
else
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -benchmem -run '^$$' ./pkg/whisper/...
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -bench='BenchmarkContextProcessCPU$$' -benchtime=1x -benchmem -run '^$$' ./pkg/whisper/...
endif
model-small: mkdir examples/go-model-download
@${BUILD_DIR}/go-model-download -out models ggml-small.en.bin
model-small-tdrz: mkdir examples/go-model-download
@${BUILD_DIR}/go-model-download -out models ggml-small.en-tdrz.bin
model-tiny: mkdir examples/go-model-download
@${BUILD_DIR}/go-model-download -out models ggml-tiny.en.bin
$(EXAMPLES_DIR): mkdir whisper modtidy
@echo Build example $(notdir $@)
@ -67,14 +64,6 @@ else
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@
endif
.PHONY: samples
samples:
@echo "Downloading samples..."
@mkdir -p samples
@wget --quiet --show-progress -O samples/a13.mp3 https://upload.wikimedia.org/wikipedia/commons/transcoded/6/6f/Apollo13-wehaveaproblem.ogg/Apollo13-wehaveaproblem.ogg.mp3
@ffmpeg -loglevel -0 -y -i samples/a13.mp3 -ar 16000 -ac 1 -c:a pcm_s16le -ss 00:00:00 -to 00:00:30 samples/a13.wav
@rm samples/a13.mp3
mkdir:
@echo Mkdir ${BUILD_DIR}
@install -d ${BUILD_DIR}

View File

@ -189,6 +189,12 @@ func (p *Params) SetInitialPrompt(prompt string) {
p.initial_prompt = C.CString(prompt)
}
// SetCarryInitialPrompt if true, always prepend initial_prompt to every decode window
// (may reduce conditioning on previous text)
func (p *Params) SetCarryInitialPrompt(v bool) {
p.carry_initial_prompt = toBool(v)
}
///////////////////////////////////////////////////////////////////////////////
// PRIVATE METHODS

View File

@ -324,63 +324,6 @@ func TestContext_VAD_And_Diarization_Params_DoNotPanic(t *testing.T) {
assert.NoError(err)
}
func TestDiarization_TwoSpeakers_Boundaries(t *testing.T) {
data := helperLoadSample(t, MultiSpeakerSamplePath)
model, err := whisper.NewModelContext(ModelTinydiarizePath)
require.NoError(t, err)
defer func() { _ = model.Close() }()
params, err := whisper.NewParameters(model, whisper.SAMPLING_GREEDY, func(p *whisper.Parameters) {
p.SetDiarize(true)
p.SetVAD(false)
p.SetSplitOnWord(true)
p.SetMaxSegmentLength(1)
p.SetMaxTokensPerSegment(64)
p.SetTokenTimestamps(true)
})
require.NoError(t, err)
// diarize ON with beam search and tighter segmentation
ctxOn, err := whisper.NewStatefulContext(model, params)
require.NoError(t, err)
defer func() { _ = ctxOn.Close() }()
require.NoError(t, ctxOn.Process(data, nil, nil, nil))
var turnsOn int
for {
seg, err := ctxOn.NextSegment()
if err == io.EOF {
break
}
require.NoError(t, err)
if seg.SpeakerTurnNext {
turnsOn++
}
}
require.Greater(t, turnsOn, 0, "expected speaker turn boundaries with diarization enabled")
// diarize OFF baseline with same segmentation and beam
ctxOff, err := whisper.NewStatefulContext(model, params)
require.NoError(t, err)
defer func() { _ = ctxOff.Close() }()
require.NoError(t, ctxOff.Process(data, nil, nil, nil))
var turnsOff int
for {
seg, err := ctxOff.NextSegment()
if err == io.EOF {
break
}
require.NoError(t, err)
if seg.SpeakerTurnNext {
turnsOff++
}
}
require.GreaterOrEqual(t, turnsOn, turnsOff, "diarization should not reduce turn boundaries")
}
func TestContext_SpeakerTurnNext_Field_Present(t *testing.T) {
assert := assert.New(t)

View File

@ -111,6 +111,24 @@ type Context interface {
// Get detected language
DetectedLanguage() string
// Voice Activity Detection (VAD) methods
// Deprecated: Use Params().SetVAD() instead
SetVAD(bool)
// Deprecated: Use Params().SetVADModelPath() instead
SetVADModelPath(string)
// Deprecated: Use Params().SetVADThreshold() instead
SetVADThreshold(float32)
// Deprecated: Use Params().SetVADMinSpeechMs() instead
SetVADMinSpeechMs(int)
// Deprecated: Use Params().SetVADMinSilenceMs() instead
SetVADMinSilenceMs(int)
// Deprecated: Use Params().SetVADMaxSpeechSec() instead
SetVADMaxSpeechSec(float32)
// Deprecated: Use Params().SetVADSpeechPadMs() instead
SetVADSpeechPadMs(int)
// Deprecated: Use Params().SetVADSamplesOverlap() instead
SetVADSamplesOverlap(float32)
// Process mono audio data and return any errors.
// If defined, newly generated segments are passed to the
// callback function during processing.

View File

@ -64,6 +64,7 @@ func (w *Parameters) SetMaxContext(n int) { w.p.SetMaxContext(n) }
func (w *Parameters) SetBeamSize(n int) { w.p.SetBeamSize(n) }
func (w *Parameters) SetEntropyThold(t float32) { w.p.SetEntropyThold(t) }
func (w *Parameters) SetInitialPrompt(prompt string) { w.p.SetInitialPrompt(prompt) }
func (w *Parameters) SetCarryInitialPrompt(v bool) { w.p.SetCarryInitialPrompt(v) }
func (w *Parameters) SetTemperature(t float32) { w.p.SetTemperature(t) }
func (w *Parameters) SetTemperatureFallback(t float32) { w.p.SetTemperatureFallback(t) }
func (w *Parameters) SetNoContext(v bool) { w.p.SetNoContext(v) }

View File

@ -392,6 +392,47 @@ func (context *StatefulContext) SetTranslate(v bool) {
context.params.SetTranslate(v)
}
// VAD methods - implement Context interface
// Deprecated: Use Params().SetVAD() instead
func (context *StatefulContext) SetVAD(v bool) {
context.params.SetVAD(v)
}
// Deprecated: Use Params().SetVADModelPath() instead
func (context *StatefulContext) SetVADModelPath(path string) {
context.params.SetVADModelPath(path)
}
// Deprecated: Use Params().SetVADThreshold() instead
func (context *StatefulContext) SetVADThreshold(t float32) {
context.params.SetVADThreshold(t)
}
// Deprecated: Use Params().SetVADMinSpeechMs() instead
func (context *StatefulContext) SetVADMinSpeechMs(ms int) {
context.params.SetVADMinSpeechMs(ms)
}
// Deprecated: Use Params().SetVADMinSilenceMs() instead
func (context *StatefulContext) SetVADMinSilenceMs(ms int) {
context.params.SetVADMinSilenceMs(ms)
}
// Deprecated: Use Params().SetVADMaxSpeechSec() instead
func (context *StatefulContext) SetVADMaxSpeechSec(s float32) {
context.params.SetVADMaxSpeechSec(s)
}
// Deprecated: Use Params().SetVADSpeechPadMs() instead
func (context *StatefulContext) SetVADSpeechPadMs(ms int) {
context.params.SetVADSpeechPadMs(ms)
}
// Deprecated: Use Params().SetVADSamplesOverlap() instead
func (context *StatefulContext) SetVADSamplesOverlap(sec float32) {
context.params.SetVADSamplesOverlap(sec)
}
// Make stateful context compatible with the old deprecated interface for
// the simple migration into multi-threaded processing.
var _ Context = (*StatefulContext)(nil)

View File

@ -374,4 +374,45 @@ func (context *StatelessContext) SetTranslate(v bool) {
context.params.SetTranslate(v)
}
// VAD methods - implement Context interface
// Deprecated: Use Params().SetVAD() instead
func (context *StatelessContext) SetVAD(v bool) {
context.params.SetVAD(v)
}
// Deprecated: Use Params().SetVADModelPath() instead
func (context *StatelessContext) SetVADModelPath(path string) {
context.params.SetVADModelPath(path)
}
// Deprecated: Use Params().SetVADThreshold() instead
func (context *StatelessContext) SetVADThreshold(t float32) {
context.params.SetVADThreshold(t)
}
// Deprecated: Use Params().SetVADMinSpeechMs() instead
func (context *StatelessContext) SetVADMinSpeechMs(ms int) {
context.params.SetVADMinSpeechMs(ms)
}
// Deprecated: Use Params().SetVADMinSilenceMs() instead
func (context *StatelessContext) SetVADMinSilenceMs(ms int) {
context.params.SetVADMinSilenceMs(ms)
}
// Deprecated: Use Params().SetVADMaxSpeechSec() instead
func (context *StatelessContext) SetVADMaxSpeechSec(s float32) {
context.params.SetVADMaxSpeechSec(s)
}
// Deprecated: Use Params().SetVADSpeechPadMs() instead
func (context *StatelessContext) SetVADSpeechPadMs(ms int) {
context.params.SetVADSpeechPadMs(ms)
}
// Deprecated: Use Params().SetVADSamplesOverlap() instead
func (context *StatelessContext) SetVADSamplesOverlap(sec float32) {
context.params.SetVADSamplesOverlap(sec)
}
var _ Context = (*StatelessContext)(nil)

View File

@ -6,10 +6,8 @@ import (
)
const (
ModelPath = "../../models/ggml-small.en.bin"
ModelTinydiarizePath = "../../models/ggml-small.en-tdrz.bin"
SamplePath = "../../samples/jfk.wav"
MultiSpeakerSamplePath = "../../samples/a13.wav"
ModelPath = "../../models/ggml-tiny.en.bin"
SamplePath = "../../samples/jfk.wav"
)
func TestMain(m *testing.M) {

View File

@ -9,7 +9,7 @@ import (
"github.com/stretchr/testify/require"
)
const testModelPathCtx = "../../models/ggml-small.en.bin"
const testModelPathCtx = "../../models/ggml-tiny.en.bin"
func TestWhisperCtx_NilWrapper(t *testing.T) {
wctx := newCtxAccessor(nil)

View File

@ -9,7 +9,7 @@ import (
"github.com/stretchr/testify/require"
)
const testModelPathState = "../../models/ggml-small.en.bin"
const testModelPathState = "../../models/ggml-tiny.en.bin"
func TestWhisperState_NilWrapper(t *testing.T) {
ws := newWhisperState(nil)

View File

@ -15,12 +15,12 @@ import (
)
const (
ModelPath = "models/ggml-small.en.bin"
ModelPath = "models/ggml-tiny.en.bin"
SamplePath = "samples/jfk.wav"
)
func TestMain(m *testing.M) {
whisper.DisableLogs()
// whisper.DisableLogs() // temporarily disabled to see error messages
os.Exit(m.Run())
}