Update java bindings

This commit is contained in:
Andreas Lubbe 2025-10-12 21:30:18 +02:00
parent a91dd3be72
commit bf4cd5428b
4 changed files with 325 additions and 1 deletions

View File

@ -387,4 +387,126 @@ public interface WhisperCppJnaLibrary extends Library {
* @return The result of the benchmark as a string.
*/
String whisper_bench_ggml_mul_mat_str(int nThreads);
// ============================================================================
// Voice Activity Detection (VAD) Functions
// ============================================================================
/**
* Get default VAD parameters.
*
* @return Default VAD parameters
*/
Pointer whisper_vad_default_params();
/**
* Get default VAD context parameters.
*
* @return Default VAD context parameters
*/
Pointer whisper_vad_default_context_params();
/**
* Initialize VAD context from file with parameters.
*
* @param path_model Path to the VAD model file
* @param params VAD context parameters
* @return VAD context pointer on success, null on failure
*/
Pointer whisper_vad_init_from_file_with_params(String path_model, Pointer params);
/**
* Initialize VAD context with model loader and parameters.
*
* @param loader Model loader
* @param params VAD context parameters
* @return VAD context pointer on success, null on failure
*/
Pointer whisper_vad_init_with_params(WhisperModelLoader loader, Pointer params);
/**
* Detect speech in audio samples.
*
* @param vctx VAD context
* @param samples Audio samples (float array)
* @param n_samples Number of samples
* @return true if speech detected, false otherwise
*/
boolean whisper_vad_detect_speech(Pointer vctx, float[] samples, int n_samples);
/**
* Get number of probability values in VAD context.
*
* @param vctx VAD context
* @return Number of probability values
*/
int whisper_vad_n_probs(Pointer vctx);
/**
* Get probability array from VAD context.
*
* @param vctx VAD context
* @return Pointer to probability array
*/
Pointer whisper_vad_probs(Pointer vctx);
/**
* Get VAD segments from pre-computed probabilities.
*
* @param vctx VAD context
* @param params VAD parameters
* @return Pointer to VAD segments
*/
Pointer whisper_vad_segments_from_probs(Pointer vctx, Pointer params);
/**
* Get VAD segments directly from audio samples.
*
* @param vctx VAD context
* @param params VAD parameters
* @param samples Audio samples (float array)
* @param n_samples Number of samples
* @return Pointer to VAD segments
*/
Pointer whisper_vad_segments_from_samples(Pointer vctx, Pointer params, float[] samples, int n_samples);
/**
* Get number of segments in VAD segments result.
*
* @param segments VAD segments pointer
* @return Number of segments
*/
int whisper_vad_segments_n_segments(Pointer segments);
/**
* Get start time of a specific segment.
*
* @param segments VAD segments pointer
* @param i_segment Segment index
* @return Start time in seconds
*/
float whisper_vad_segments_get_segment_t0(Pointer segments, int i_segment);
/**
* Get end time of a specific segment.
*
* @param segments VAD segments pointer
* @param i_segment Segment index
* @return End time in seconds
*/
float whisper_vad_segments_get_segment_t1(Pointer segments, int i_segment);
/**
* Free VAD segments memory.
*
* @param segments VAD segments pointer to free
*/
void whisper_vad_free_segments(Pointer segments);
/**
* Free VAD context memory.
*
* @param ctx VAD context pointer to free
*/
void whisper_vad_free(Pointer ctx);
}

View File

@ -331,6 +331,38 @@ public class WhisperFullParams extends Structure {
public long i_start_rule;
public float grammar_penalty;
/** Voice Activity Detection (VAD) parameters */
/** Enable VAD (default = false) */
public CBool vad;
/** Enable VAD */
public void enableVAD(boolean enable) {
vad = enable ? CBool.TRUE : CBool.FALSE;
}
/** Path to VAD model file */
public String vad_model_path;
/** Set VAD model path */
public void setVADModelPath(String path) {
this.vad_model_path = path;
}
/** VAD parameters */
public WhisperVADParams.ByValue vad_params;
/** Set VAD parameters */
public void setVADParams(WhisperVADParams params) {
this.vad_params = new WhisperVADParams.ByValue();
this.vad_params.threshold = params.threshold;
this.vad_params.min_speech_duration_ms = params.min_speech_duration_ms;
this.vad_params.min_silence_duration_ms = params.min_silence_duration_ms;
this.vad_params.max_speech_duration_s = params.max_speech_duration_s;
this.vad_params.speech_pad_ms = params.speech_pad_ms;
this.vad_params.samples_overlap = params.samples_overlap;
}
@Override
protected List<String> getFieldOrder() {
return Arrays.asList("strategy", "n_threads", "n_max_text_ctx",
@ -349,7 +381,8 @@ public class WhisperFullParams extends Structure {
"encoder_begin_callback", "encoder_begin_callback_user_data",
"abort_callback", "abort_callback_user_data",
"logits_filter_callback", "logits_filter_callback_user_data",
"grammar_rules", "n_grammar_rules", "i_start_rule", "grammar_penalty");
"grammar_rules", "n_grammar_rules", "i_start_rule", "grammar_penalty",
"vad", "vad_model_path", "vad_params");
}
public static class ByValue extends WhisperFullParams implements Structure.ByValue {

View File

@ -0,0 +1,66 @@
package io.github.ggerganov.whispercpp.params;
import com.sun.jna.*;
import java.util.Arrays;
import java.util.List;
/**
* Parameters for initializing a VAD context.
*/
public class WhisperVADContextParams extends Structure {
public WhisperVADContextParams() {
super();
}
public WhisperVADContextParams(Pointer p) {
super(p);
}
/** Number of threads to use for VAD processing (default = 4) */
public int n_threads;
/** Use GPU for VAD (default = true) */
public CBool use_gpu;
/** CUDA device to use (default = 0) */
public int gpu_device;
/**
* Set number of threads for VAD processing.
* @param threads Number of threads
*/
public void setThreads(int threads) {
this.n_threads = threads;
}
/**
* Enable or disable GPU for VAD.
* @param enable Whether to use GPU
*/
public void useGpu(boolean enable) {
use_gpu = enable ? CBool.TRUE : CBool.FALSE;
}
/**
* Set CUDA device for VAD.
* @param device CUDA device ID
*/
public void setGpuDevice(int device) {
this.gpu_device = device;
}
@Override
protected List<String> getFieldOrder() {
return Arrays.asList(
"n_threads",
"use_gpu",
"gpu_device"
);
}
public static class ByValue extends WhisperVADContextParams implements Structure.ByValue {
public ByValue() { super(); }
public ByValue(Pointer p) { super(p); }
}
}

View File

@ -0,0 +1,103 @@
package io.github.ggerganov.whispercpp.params;
import com.sun.jna.*;
import java.util.Arrays;
import java.util.List;
/**
* Voice Activity Detection (VAD) parameters.
* Used for detecting speech segments in audio.
*/
public class WhisperVADParams extends Structure {
public WhisperVADParams() {
super();
}
public WhisperVADParams(Pointer p) {
super(p);
}
/** Probability threshold to consider as speech (default = 0.5) */
public float threshold;
/** Minimum duration for a valid speech segment in milliseconds (default = 250) */
public int min_speech_duration_ms;
/** Minimum silence duration to consider speech as ended in milliseconds (default = 2000) */
public int min_silence_duration_ms;
/** Maximum duration of a speech segment before forcing a new segment in seconds (default = Float.MAX_VALUE) */
public float max_speech_duration_s;
/** Padding added before and after speech segments in milliseconds (default = 400) */
public int speech_pad_ms;
/** Overlap in seconds when copying audio samples from speech segment (default = 1.0) */
public float samples_overlap;
/**
* Set probability threshold for speech detection.
* @param threshold Probability threshold (0.0 to 1.0)
*/
public void setThreshold(float threshold) {
this.threshold = threshold;
}
/**
* Set minimum speech duration.
* @param durationMs Duration in milliseconds
*/
public void setMinSpeechDuration(int durationMs) {
this.min_speech_duration_ms = durationMs;
}
/**
* Set minimum silence duration.
* @param durationMs Duration in milliseconds
*/
public void setMinSilenceDuration(int durationMs) {
this.min_silence_duration_ms = durationMs;
}
/**
* Set maximum speech duration.
* @param durationS Duration in seconds
*/
public void setMaxSpeechDuration(float durationS) {
this.max_speech_duration_s = durationS;
}
/**
* Set speech padding.
* @param paddingMs Padding in milliseconds
*/
public void setSpeechPadding(int paddingMs) {
this.speech_pad_ms = paddingMs;
}
/**
* Set samples overlap.
* @param overlapS Overlap in seconds
*/
public void setSamplesOverlap(float overlapS) {
this.samples_overlap = overlapS;
}
@Override
protected List<String> getFieldOrder() {
return Arrays.asList(
"threshold",
"min_speech_duration_ms",
"min_silence_duration_ms",
"max_speech_duration_s",
"speech_pad_ms",
"samples_overlap"
);
}
public static class ByValue extends WhisperVADParams implements Structure.ByValue {
public ByValue() { super(); }
public ByValue(Pointer p) { super(p); }
}
}