Update java bindings
This commit is contained in:
parent
a91dd3be72
commit
bf4cd5428b
|
|
@ -387,4 +387,126 @@ public interface WhisperCppJnaLibrary extends Library {
|
|||
* @return The result of the benchmark as a string.
|
||||
*/
|
||||
String whisper_bench_ggml_mul_mat_str(int nThreads);
|
||||
|
||||
// ============================================================================
|
||||
// Voice Activity Detection (VAD) Functions
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Get default VAD parameters.
|
||||
*
|
||||
* @return Default VAD parameters
|
||||
*/
|
||||
Pointer whisper_vad_default_params();
|
||||
|
||||
/**
|
||||
* Get default VAD context parameters.
|
||||
*
|
||||
* @return Default VAD context parameters
|
||||
*/
|
||||
Pointer whisper_vad_default_context_params();
|
||||
|
||||
/**
|
||||
* Initialize VAD context from file with parameters.
|
||||
*
|
||||
* @param path_model Path to the VAD model file
|
||||
* @param params VAD context parameters
|
||||
* @return VAD context pointer on success, null on failure
|
||||
*/
|
||||
Pointer whisper_vad_init_from_file_with_params(String path_model, Pointer params);
|
||||
|
||||
/**
|
||||
* Initialize VAD context with model loader and parameters.
|
||||
*
|
||||
* @param loader Model loader
|
||||
* @param params VAD context parameters
|
||||
* @return VAD context pointer on success, null on failure
|
||||
*/
|
||||
Pointer whisper_vad_init_with_params(WhisperModelLoader loader, Pointer params);
|
||||
|
||||
/**
|
||||
* Detect speech in audio samples.
|
||||
*
|
||||
* @param vctx VAD context
|
||||
* @param samples Audio samples (float array)
|
||||
* @param n_samples Number of samples
|
||||
* @return true if speech detected, false otherwise
|
||||
*/
|
||||
boolean whisper_vad_detect_speech(Pointer vctx, float[] samples, int n_samples);
|
||||
|
||||
/**
|
||||
* Get number of probability values in VAD context.
|
||||
*
|
||||
* @param vctx VAD context
|
||||
* @return Number of probability values
|
||||
*/
|
||||
int whisper_vad_n_probs(Pointer vctx);
|
||||
|
||||
/**
|
||||
* Get probability array from VAD context.
|
||||
*
|
||||
* @param vctx VAD context
|
||||
* @return Pointer to probability array
|
||||
*/
|
||||
Pointer whisper_vad_probs(Pointer vctx);
|
||||
|
||||
/**
|
||||
* Get VAD segments from pre-computed probabilities.
|
||||
*
|
||||
* @param vctx VAD context
|
||||
* @param params VAD parameters
|
||||
* @return Pointer to VAD segments
|
||||
*/
|
||||
Pointer whisper_vad_segments_from_probs(Pointer vctx, Pointer params);
|
||||
|
||||
/**
|
||||
* Get VAD segments directly from audio samples.
|
||||
*
|
||||
* @param vctx VAD context
|
||||
* @param params VAD parameters
|
||||
* @param samples Audio samples (float array)
|
||||
* @param n_samples Number of samples
|
||||
* @return Pointer to VAD segments
|
||||
*/
|
||||
Pointer whisper_vad_segments_from_samples(Pointer vctx, Pointer params, float[] samples, int n_samples);
|
||||
|
||||
/**
|
||||
* Get number of segments in VAD segments result.
|
||||
*
|
||||
* @param segments VAD segments pointer
|
||||
* @return Number of segments
|
||||
*/
|
||||
int whisper_vad_segments_n_segments(Pointer segments);
|
||||
|
||||
/**
|
||||
* Get start time of a specific segment.
|
||||
*
|
||||
* @param segments VAD segments pointer
|
||||
* @param i_segment Segment index
|
||||
* @return Start time in seconds
|
||||
*/
|
||||
float whisper_vad_segments_get_segment_t0(Pointer segments, int i_segment);
|
||||
|
||||
/**
|
||||
* Get end time of a specific segment.
|
||||
*
|
||||
* @param segments VAD segments pointer
|
||||
* @param i_segment Segment index
|
||||
* @return End time in seconds
|
||||
*/
|
||||
float whisper_vad_segments_get_segment_t1(Pointer segments, int i_segment);
|
||||
|
||||
/**
|
||||
* Free VAD segments memory.
|
||||
*
|
||||
* @param segments VAD segments pointer to free
|
||||
*/
|
||||
void whisper_vad_free_segments(Pointer segments);
|
||||
|
||||
/**
|
||||
* Free VAD context memory.
|
||||
*
|
||||
* @param ctx VAD context pointer to free
|
||||
*/
|
||||
void whisper_vad_free(Pointer ctx);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -331,6 +331,38 @@ public class WhisperFullParams extends Structure {
|
|||
public long i_start_rule;
|
||||
public float grammar_penalty;
|
||||
|
||||
/** Voice Activity Detection (VAD) parameters */
|
||||
|
||||
/** Enable VAD (default = false) */
|
||||
public CBool vad;
|
||||
|
||||
/** Enable VAD */
|
||||
public void enableVAD(boolean enable) {
|
||||
vad = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Path to VAD model file */
|
||||
public String vad_model_path;
|
||||
|
||||
/** Set VAD model path */
|
||||
public void setVADModelPath(String path) {
|
||||
this.vad_model_path = path;
|
||||
}
|
||||
|
||||
/** VAD parameters */
|
||||
public WhisperVADParams.ByValue vad_params;
|
||||
|
||||
/** Set VAD parameters */
|
||||
public void setVADParams(WhisperVADParams params) {
|
||||
this.vad_params = new WhisperVADParams.ByValue();
|
||||
this.vad_params.threshold = params.threshold;
|
||||
this.vad_params.min_speech_duration_ms = params.min_speech_duration_ms;
|
||||
this.vad_params.min_silence_duration_ms = params.min_silence_duration_ms;
|
||||
this.vad_params.max_speech_duration_s = params.max_speech_duration_s;
|
||||
this.vad_params.speech_pad_ms = params.speech_pad_ms;
|
||||
this.vad_params.samples_overlap = params.samples_overlap;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return Arrays.asList("strategy", "n_threads", "n_max_text_ctx",
|
||||
|
|
@ -349,7 +381,8 @@ public class WhisperFullParams extends Structure {
|
|||
"encoder_begin_callback", "encoder_begin_callback_user_data",
|
||||
"abort_callback", "abort_callback_user_data",
|
||||
"logits_filter_callback", "logits_filter_callback_user_data",
|
||||
"grammar_rules", "n_grammar_rules", "i_start_rule", "grammar_penalty");
|
||||
"grammar_rules", "n_grammar_rules", "i_start_rule", "grammar_penalty",
|
||||
"vad", "vad_model_path", "vad_params");
|
||||
}
|
||||
|
||||
public static class ByValue extends WhisperFullParams implements Structure.ByValue {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,66 @@
|
|||
package io.github.ggerganov.whispercpp.params;
|
||||
|
||||
import com.sun.jna.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Parameters for initializing a VAD context.
|
||||
*/
|
||||
public class WhisperVADContextParams extends Structure {
|
||||
|
||||
public WhisperVADContextParams() {
|
||||
super();
|
||||
}
|
||||
|
||||
public WhisperVADContextParams(Pointer p) {
|
||||
super(p);
|
||||
}
|
||||
|
||||
/** Number of threads to use for VAD processing (default = 4) */
|
||||
public int n_threads;
|
||||
|
||||
/** Use GPU for VAD (default = true) */
|
||||
public CBool use_gpu;
|
||||
|
||||
/** CUDA device to use (default = 0) */
|
||||
public int gpu_device;
|
||||
|
||||
/**
|
||||
* Set number of threads for VAD processing.
|
||||
* @param threads Number of threads
|
||||
*/
|
||||
public void setThreads(int threads) {
|
||||
this.n_threads = threads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable or disable GPU for VAD.
|
||||
* @param enable Whether to use GPU
|
||||
*/
|
||||
public void useGpu(boolean enable) {
|
||||
use_gpu = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set CUDA device for VAD.
|
||||
* @param device CUDA device ID
|
||||
*/
|
||||
public void setGpuDevice(int device) {
|
||||
this.gpu_device = device;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return Arrays.asList(
|
||||
"n_threads",
|
||||
"use_gpu",
|
||||
"gpu_device"
|
||||
);
|
||||
}
|
||||
|
||||
public static class ByValue extends WhisperVADContextParams implements Structure.ByValue {
|
||||
public ByValue() { super(); }
|
||||
public ByValue(Pointer p) { super(p); }
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
package io.github.ggerganov.whispercpp.params;
|
||||
|
||||
import com.sun.jna.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Voice Activity Detection (VAD) parameters.
|
||||
* Used for detecting speech segments in audio.
|
||||
*/
|
||||
public class WhisperVADParams extends Structure {
|
||||
|
||||
public WhisperVADParams() {
|
||||
super();
|
||||
}
|
||||
|
||||
public WhisperVADParams(Pointer p) {
|
||||
super(p);
|
||||
}
|
||||
|
||||
/** Probability threshold to consider as speech (default = 0.5) */
|
||||
public float threshold;
|
||||
|
||||
/** Minimum duration for a valid speech segment in milliseconds (default = 250) */
|
||||
public int min_speech_duration_ms;
|
||||
|
||||
/** Minimum silence duration to consider speech as ended in milliseconds (default = 2000) */
|
||||
public int min_silence_duration_ms;
|
||||
|
||||
/** Maximum duration of a speech segment before forcing a new segment in seconds (default = Float.MAX_VALUE) */
|
||||
public float max_speech_duration_s;
|
||||
|
||||
/** Padding added before and after speech segments in milliseconds (default = 400) */
|
||||
public int speech_pad_ms;
|
||||
|
||||
/** Overlap in seconds when copying audio samples from speech segment (default = 1.0) */
|
||||
public float samples_overlap;
|
||||
|
||||
/**
|
||||
* Set probability threshold for speech detection.
|
||||
* @param threshold Probability threshold (0.0 to 1.0)
|
||||
*/
|
||||
public void setThreshold(float threshold) {
|
||||
this.threshold = threshold;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set minimum speech duration.
|
||||
* @param durationMs Duration in milliseconds
|
||||
*/
|
||||
public void setMinSpeechDuration(int durationMs) {
|
||||
this.min_speech_duration_ms = durationMs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set minimum silence duration.
|
||||
* @param durationMs Duration in milliseconds
|
||||
*/
|
||||
public void setMinSilenceDuration(int durationMs) {
|
||||
this.min_silence_duration_ms = durationMs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set maximum speech duration.
|
||||
* @param durationS Duration in seconds
|
||||
*/
|
||||
public void setMaxSpeechDuration(float durationS) {
|
||||
this.max_speech_duration_s = durationS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set speech padding.
|
||||
* @param paddingMs Padding in milliseconds
|
||||
*/
|
||||
public void setSpeechPadding(int paddingMs) {
|
||||
this.speech_pad_ms = paddingMs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set samples overlap.
|
||||
* @param overlapS Overlap in seconds
|
||||
*/
|
||||
public void setSamplesOverlap(float overlapS) {
|
||||
this.samples_overlap = overlapS;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return Arrays.asList(
|
||||
"threshold",
|
||||
"min_speech_duration_ms",
|
||||
"min_silence_duration_ms",
|
||||
"max_speech_duration_s",
|
||||
"speech_pad_ms",
|
||||
"samples_overlap"
|
||||
);
|
||||
}
|
||||
|
||||
public static class ByValue extends WhisperVADParams implements Structure.ByValue {
|
||||
public ByValue() { super(); }
|
||||
public ByValue(Pointer p) { super(p); }
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue