feat: sync with upstream
This commit is contained in:
parent
3d5d94412c
commit
a7a8169b99
|
|
@ -3,6 +3,7 @@
|
|||
*.d
|
||||
.cache/
|
||||
.coreml/
|
||||
pkg/
|
||||
.test/
|
||||
.venv/
|
||||
.vs/
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ whisper: mkdir
|
|||
-DBUILD_SHARED_LIBS=OFF
|
||||
cmake --build ../../${BUILD_DIR} --target whisper
|
||||
|
||||
test: model-small whisper modtidy
|
||||
test: model-tiny whisper modtidy
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v .
|
||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v ./pkg/whisper/...
|
||||
|
|
@ -46,18 +46,15 @@ endif
|
|||
|
||||
examples: $(EXAMPLES_DIR)
|
||||
|
||||
benchmark: model-small whisper modtidy
|
||||
benchmark: model-tiny whisper modtidy
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -bench=BenchmarkContextProcess -benchmem -run '^$$' ./pkg/whisper/...
|
||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -bench='BenchmarkContextProcessCPU$$' -benchtime=1x -benchmem -run '^$$' ./pkg/whisper/...
|
||||
else
|
||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -benchmem -run '^$$' ./pkg/whisper/...
|
||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -bench='BenchmarkContextProcessCPU$$' -benchtime=1x -benchmem -run '^$$' ./pkg/whisper/...
|
||||
endif
|
||||
|
||||
model-small: mkdir examples/go-model-download
|
||||
@${BUILD_DIR}/go-model-download -out models ggml-small.en.bin
|
||||
|
||||
model-small-tdrz: mkdir examples/go-model-download
|
||||
@${BUILD_DIR}/go-model-download -out models ggml-small.en-tdrz.bin
|
||||
model-tiny: mkdir examples/go-model-download
|
||||
@${BUILD_DIR}/go-model-download -out models ggml-tiny.en.bin
|
||||
|
||||
$(EXAMPLES_DIR): mkdir whisper modtidy
|
||||
@echo Build example $(notdir $@)
|
||||
|
|
@ -67,14 +64,6 @@ else
|
|||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@
|
||||
endif
|
||||
|
||||
.PHONY: samples
|
||||
samples:
|
||||
@echo "Downloading samples..."
|
||||
@mkdir -p samples
|
||||
@wget --quiet --show-progress -O samples/a13.mp3 https://upload.wikimedia.org/wikipedia/commons/transcoded/6/6f/Apollo13-wehaveaproblem.ogg/Apollo13-wehaveaproblem.ogg.mp3
|
||||
@ffmpeg -loglevel -0 -y -i samples/a13.mp3 -ar 16000 -ac 1 -c:a pcm_s16le -ss 00:00:00 -to 00:00:30 samples/a13.wav
|
||||
@rm samples/a13.mp3
|
||||
|
||||
mkdir:
|
||||
@echo Mkdir ${BUILD_DIR}
|
||||
@install -d ${BUILD_DIR}
|
||||
|
|
|
|||
|
|
@ -189,6 +189,12 @@ func (p *Params) SetInitialPrompt(prompt string) {
|
|||
p.initial_prompt = C.CString(prompt)
|
||||
}
|
||||
|
||||
// SetCarryInitialPrompt if true, always prepend initial_prompt to every decode window
|
||||
// (may reduce conditioning on previous text)
|
||||
func (p *Params) SetCarryInitialPrompt(v bool) {
|
||||
p.carry_initial_prompt = toBool(v)
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// PRIVATE METHODS
|
||||
|
||||
|
|
|
|||
|
|
@ -324,63 +324,6 @@ func TestContext_VAD_And_Diarization_Params_DoNotPanic(t *testing.T) {
|
|||
assert.NoError(err)
|
||||
}
|
||||
|
||||
func TestDiarization_TwoSpeakers_Boundaries(t *testing.T) {
|
||||
data := helperLoadSample(t, MultiSpeakerSamplePath)
|
||||
|
||||
model, err := whisper.NewModelContext(ModelTinydiarizePath)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = model.Close() }()
|
||||
|
||||
params, err := whisper.NewParameters(model, whisper.SAMPLING_GREEDY, func(p *whisper.Parameters) {
|
||||
p.SetDiarize(true)
|
||||
p.SetVAD(false)
|
||||
p.SetSplitOnWord(true)
|
||||
p.SetMaxSegmentLength(1)
|
||||
p.SetMaxTokensPerSegment(64)
|
||||
p.SetTokenTimestamps(true)
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// diarize ON with beam search and tighter segmentation
|
||||
ctxOn, err := whisper.NewStatefulContext(model, params)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = ctxOn.Close() }()
|
||||
|
||||
require.NoError(t, ctxOn.Process(data, nil, nil, nil))
|
||||
var turnsOn int
|
||||
for {
|
||||
seg, err := ctxOn.NextSegment()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
require.NoError(t, err)
|
||||
if seg.SpeakerTurnNext {
|
||||
turnsOn++
|
||||
}
|
||||
}
|
||||
require.Greater(t, turnsOn, 0, "expected speaker turn boundaries with diarization enabled")
|
||||
|
||||
// diarize OFF baseline with same segmentation and beam
|
||||
ctxOff, err := whisper.NewStatefulContext(model, params)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = ctxOff.Close() }()
|
||||
|
||||
require.NoError(t, ctxOff.Process(data, nil, nil, nil))
|
||||
var turnsOff int
|
||||
for {
|
||||
seg, err := ctxOff.NextSegment()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
require.NoError(t, err)
|
||||
if seg.SpeakerTurnNext {
|
||||
turnsOff++
|
||||
}
|
||||
}
|
||||
|
||||
require.GreaterOrEqual(t, turnsOn, turnsOff, "diarization should not reduce turn boundaries")
|
||||
}
|
||||
|
||||
func TestContext_SpeakerTurnNext_Field_Present(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
|
|
|
|||
|
|
@ -111,6 +111,24 @@ type Context interface {
|
|||
// Get detected language
|
||||
DetectedLanguage() string
|
||||
|
||||
// Voice Activity Detection (VAD) methods
|
||||
// Deprecated: Use Params().SetVAD() instead
|
||||
SetVAD(bool)
|
||||
// Deprecated: Use Params().SetVADModelPath() instead
|
||||
SetVADModelPath(string)
|
||||
// Deprecated: Use Params().SetVADThreshold() instead
|
||||
SetVADThreshold(float32)
|
||||
// Deprecated: Use Params().SetVADMinSpeechMs() instead
|
||||
SetVADMinSpeechMs(int)
|
||||
// Deprecated: Use Params().SetVADMinSilenceMs() instead
|
||||
SetVADMinSilenceMs(int)
|
||||
// Deprecated: Use Params().SetVADMaxSpeechSec() instead
|
||||
SetVADMaxSpeechSec(float32)
|
||||
// Deprecated: Use Params().SetVADSpeechPadMs() instead
|
||||
SetVADSpeechPadMs(int)
|
||||
// Deprecated: Use Params().SetVADSamplesOverlap() instead
|
||||
SetVADSamplesOverlap(float32)
|
||||
|
||||
// Process mono audio data and return any errors.
|
||||
// If defined, newly generated segments are passed to the
|
||||
// callback function during processing.
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ func (w *Parameters) SetMaxContext(n int) { w.p.SetMaxContext(n) }
|
|||
func (w *Parameters) SetBeamSize(n int) { w.p.SetBeamSize(n) }
|
||||
func (w *Parameters) SetEntropyThold(t float32) { w.p.SetEntropyThold(t) }
|
||||
func (w *Parameters) SetInitialPrompt(prompt string) { w.p.SetInitialPrompt(prompt) }
|
||||
func (w *Parameters) SetCarryInitialPrompt(v bool) { w.p.SetCarryInitialPrompt(v) }
|
||||
func (w *Parameters) SetTemperature(t float32) { w.p.SetTemperature(t) }
|
||||
func (w *Parameters) SetTemperatureFallback(t float32) { w.p.SetTemperatureFallback(t) }
|
||||
func (w *Parameters) SetNoContext(v bool) { w.p.SetNoContext(v) }
|
||||
|
|
|
|||
|
|
@ -392,6 +392,47 @@ func (context *StatefulContext) SetTranslate(v bool) {
|
|||
context.params.SetTranslate(v)
|
||||
}
|
||||
|
||||
// VAD methods - implement Context interface
|
||||
// Deprecated: Use Params().SetVAD() instead
|
||||
func (context *StatefulContext) SetVAD(v bool) {
|
||||
context.params.SetVAD(v)
|
||||
}
|
||||
|
||||
// Deprecated: Use Params().SetVADModelPath() instead
|
||||
func (context *StatefulContext) SetVADModelPath(path string) {
|
||||
context.params.SetVADModelPath(path)
|
||||
}
|
||||
|
||||
// Deprecated: Use Params().SetVADThreshold() instead
|
||||
func (context *StatefulContext) SetVADThreshold(t float32) {
|
||||
context.params.SetVADThreshold(t)
|
||||
}
|
||||
|
||||
// Deprecated: Use Params().SetVADMinSpeechMs() instead
|
||||
func (context *StatefulContext) SetVADMinSpeechMs(ms int) {
|
||||
context.params.SetVADMinSpeechMs(ms)
|
||||
}
|
||||
|
||||
// Deprecated: Use Params().SetVADMinSilenceMs() instead
|
||||
func (context *StatefulContext) SetVADMinSilenceMs(ms int) {
|
||||
context.params.SetVADMinSilenceMs(ms)
|
||||
}
|
||||
|
||||
// Deprecated: Use Params().SetVADMaxSpeechSec() instead
|
||||
func (context *StatefulContext) SetVADMaxSpeechSec(s float32) {
|
||||
context.params.SetVADMaxSpeechSec(s)
|
||||
}
|
||||
|
||||
// Deprecated: Use Params().SetVADSpeechPadMs() instead
|
||||
func (context *StatefulContext) SetVADSpeechPadMs(ms int) {
|
||||
context.params.SetVADSpeechPadMs(ms)
|
||||
}
|
||||
|
||||
// Deprecated: Use Params().SetVADSamplesOverlap() instead
|
||||
func (context *StatefulContext) SetVADSamplesOverlap(sec float32) {
|
||||
context.params.SetVADSamplesOverlap(sec)
|
||||
}
|
||||
|
||||
// Make stateful context compatible with the old deprecated interface for
|
||||
// the simple migration into multi-threaded processing.
|
||||
var _ Context = (*StatefulContext)(nil)
|
||||
|
|
|
|||
|
|
@ -374,4 +374,45 @@ func (context *StatelessContext) SetTranslate(v bool) {
|
|||
context.params.SetTranslate(v)
|
||||
}
|
||||
|
||||
// VAD methods - implement Context interface
|
||||
// Deprecated: Use Params().SetVAD() instead
|
||||
func (context *StatelessContext) SetVAD(v bool) {
|
||||
context.params.SetVAD(v)
|
||||
}
|
||||
|
||||
// Deprecated: Use Params().SetVADModelPath() instead
|
||||
func (context *StatelessContext) SetVADModelPath(path string) {
|
||||
context.params.SetVADModelPath(path)
|
||||
}
|
||||
|
||||
// Deprecated: Use Params().SetVADThreshold() instead
|
||||
func (context *StatelessContext) SetVADThreshold(t float32) {
|
||||
context.params.SetVADThreshold(t)
|
||||
}
|
||||
|
||||
// Deprecated: Use Params().SetVADMinSpeechMs() instead
|
||||
func (context *StatelessContext) SetVADMinSpeechMs(ms int) {
|
||||
context.params.SetVADMinSpeechMs(ms)
|
||||
}
|
||||
|
||||
// Deprecated: Use Params().SetVADMinSilenceMs() instead
|
||||
func (context *StatelessContext) SetVADMinSilenceMs(ms int) {
|
||||
context.params.SetVADMinSilenceMs(ms)
|
||||
}
|
||||
|
||||
// Deprecated: Use Params().SetVADMaxSpeechSec() instead
|
||||
func (context *StatelessContext) SetVADMaxSpeechSec(s float32) {
|
||||
context.params.SetVADMaxSpeechSec(s)
|
||||
}
|
||||
|
||||
// Deprecated: Use Params().SetVADSpeechPadMs() instead
|
||||
func (context *StatelessContext) SetVADSpeechPadMs(ms int) {
|
||||
context.params.SetVADSpeechPadMs(ms)
|
||||
}
|
||||
|
||||
// Deprecated: Use Params().SetVADSamplesOverlap() instead
|
||||
func (context *StatelessContext) SetVADSamplesOverlap(sec float32) {
|
||||
context.params.SetVADSamplesOverlap(sec)
|
||||
}
|
||||
|
||||
var _ Context = (*StatelessContext)(nil)
|
||||
|
|
|
|||
|
|
@ -6,10 +6,8 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
ModelPath = "../../models/ggml-small.en.bin"
|
||||
ModelTinydiarizePath = "../../models/ggml-small.en-tdrz.bin"
|
||||
SamplePath = "../../samples/jfk.wav"
|
||||
MultiSpeakerSamplePath = "../../samples/a13.wav"
|
||||
ModelPath = "../../models/ggml-tiny.en.bin"
|
||||
SamplePath = "../../samples/jfk.wav"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import (
|
|||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
const testModelPathCtx = "../../models/ggml-small.en.bin"
|
||||
const testModelPathCtx = "../../models/ggml-tiny.en.bin"
|
||||
|
||||
func TestWhisperCtx_NilWrapper(t *testing.T) {
|
||||
wctx := newCtxAccessor(nil)
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import (
|
|||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
const testModelPathState = "../../models/ggml-small.en.bin"
|
||||
const testModelPathState = "../../models/ggml-tiny.en.bin"
|
||||
|
||||
func TestWhisperState_NilWrapper(t *testing.T) {
|
||||
ws := newWhisperState(nil)
|
||||
|
|
|
|||
|
|
@ -15,12 +15,12 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
ModelPath = "models/ggml-small.en.bin"
|
||||
ModelPath = "models/ggml-tiny.en.bin"
|
||||
SamplePath = "samples/jfk.wav"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
whisper.DisableLogs()
|
||||
// whisper.DisableLogs() // temporarily disabled to see error messages
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue