Add Parakeet.log_set
This commit is contained in:
parent
4f53fad54e
commit
cc958f0af5
|
|
@ -49,6 +49,7 @@ extern void init_ruby_whisper_vad_params(VALUE *mVAD);
|
|||
extern void init_ruby_whisper_vad_context(VALUE *mVAD);
|
||||
extern void init_ruby_whisper_vad_segment(VALUE *mVAD);
|
||||
extern void init_ruby_whisper_vad_segments(VALUE *mVAD);
|
||||
extern void init_ruby_whisper_parakeet();
|
||||
extern void init_ruby_whisper_parakeet_params(VALUE *mParakeet);
|
||||
extern void init_ruby_whisper_parakeet_segment(VALUE *mParakeet);
|
||||
extern void init_ruby_whisper_parakeet_context(VALUE *mParakeet);
|
||||
|
|
@ -244,6 +245,7 @@ void Init_whisper() {
|
|||
init_ruby_whisper_vad_segment(&mVAD);
|
||||
init_ruby_whisper_vad_segments(&mVAD);
|
||||
init_ruby_whisper_vad_context(&mVAD);
|
||||
init_ruby_whisper_parakeet();
|
||||
init_ruby_whisper_parakeet_params(&mParakeet);
|
||||
init_ruby_whisper_parakeet_segment(&mParakeet);
|
||||
init_ruby_whisper_parakeet_context(&mParakeet);
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
#include <ruby/version.h>
|
||||
#include <ruby/util.h>
|
||||
#include <ruby/thread.h>
|
||||
#include <ruby/thread_native.h>
|
||||
#include <ruby/atomic.h>
|
||||
#include <ruby/memory_view.h>
|
||||
#include "whisper.h"
|
||||
|
|
@ -15,6 +16,9 @@
|
|||
int ruby_thread_has_gvl_p(void);
|
||||
#endif
|
||||
|
||||
#define LOG_QUEUE_CAPACITY 256
|
||||
#define LOG_DEFAULT_CAPACITY 1024
|
||||
|
||||
typedef struct {
|
||||
VALUE *context;
|
||||
VALUE user_data;
|
||||
|
|
@ -34,6 +38,25 @@ typedef struct ruby_whisper_parakeet_abort_callback_user_data {
|
|||
volatile rb_atomic_t is_interrupted;
|
||||
} ruby_whisper_parakeet_abort_callback_user_data;
|
||||
|
||||
typedef struct ruby_whisper_log {
|
||||
enum ggml_log_level level;
|
||||
char *text;
|
||||
size_t length;
|
||||
size_t capacity;
|
||||
} ruby_whisper_log;
|
||||
|
||||
typedef struct ruby_whisper_log_queue {
|
||||
rb_nativethread_lock_t lock;
|
||||
rb_nativethread_cond_t cond;
|
||||
|
||||
size_t head;
|
||||
size_t tail;
|
||||
size_t size;
|
||||
bool is_active;
|
||||
|
||||
ruby_whisper_log *logs;
|
||||
} ruby_whisper_log_queue;
|
||||
|
||||
typedef struct {
|
||||
struct whisper_context *context;
|
||||
} ruby_whisper;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,151 @@
|
|||
#include "ruby_whisper.h"
|
||||
|
||||
void
|
||||
ruby_whisper_log_queue_initialize(ruby_whisper_log_queue *log_queue)
|
||||
{
|
||||
rb_nativethread_lock_initialize(&log_queue->lock);
|
||||
rb_native_cond_initialize(&log_queue->cond);
|
||||
log_queue->head = 0;
|
||||
log_queue->tail = 0;
|
||||
log_queue->size = 0;
|
||||
log_queue->is_active = true;
|
||||
log_queue->logs = ALLOC_N(ruby_whisper_log, LOG_QUEUE_CAPACITY);
|
||||
for (size_t i = 0; i < LOG_QUEUE_CAPACITY; i++) {
|
||||
// we cannot call Ruby API like ALLOC_N because this slot may realloced without GVL
|
||||
// doesn't free this because log queue lives at the end of process
|
||||
char *slot = malloc(sizeof(char) * LOG_QUEUE_CAPACITY);
|
||||
if (!slot) {
|
||||
rb_raise(rb_eRuntimeError, "Could not allocate memory for log text");
|
||||
}
|
||||
ruby_whisper_log log = {
|
||||
0,
|
||||
slot,
|
||||
0,
|
||||
LOG_QUEUE_CAPACITY,
|
||||
};
|
||||
log_queue->logs[i] = log;
|
||||
}
|
||||
}
|
||||
|
||||
static size_t
|
||||
calc_enough_cap(size_t len)
|
||||
{
|
||||
size_t quot = len / LOG_DEFAULT_CAPACITY;
|
||||
size_t rem = len % LOG_DEFAULT_CAPACITY;
|
||||
|
||||
return sizeof(char) * (rem == 0 ? quot : quot + 1) * LOG_DEFAULT_CAPACITY;
|
||||
}
|
||||
|
||||
void
|
||||
ruby_whisper_log_queue_enqueue(ruby_whisper_log_queue *log_queue, enum ggml_log_level level, const char *text)
|
||||
{
|
||||
rb_nativethread_lock_lock(&log_queue->lock);
|
||||
|
||||
size_t len = strlen(text);
|
||||
ruby_whisper_log *log = &log_queue->logs[log_queue->head];
|
||||
if (len > log->capacity) {
|
||||
size_t new_cap = calc_enough_cap(len);
|
||||
// we cannot call Ruby API like REALLOC_N because this function is called without GVL
|
||||
char *slot = realloc(log->text, new_cap);
|
||||
if (!slot) {
|
||||
rb_nativethread_lock_unlock(&log_queue->lock);
|
||||
return;
|
||||
}
|
||||
log->text = slot;
|
||||
log->capacity = new_cap;
|
||||
}
|
||||
// we cannot call Ruby API like MEMCPY because this function is called without GVL
|
||||
memcpy(log->text, text, sizeof(char) * len);
|
||||
log->length = len;
|
||||
log->level = level;
|
||||
log_queue->head = (log_queue->head + 1) % LOG_QUEUE_CAPACITY;
|
||||
bool is_full = log_queue->size >= LOG_QUEUE_CAPACITY;
|
||||
log_queue->size = is_full ? LOG_QUEUE_CAPACITY : log_queue->size + 1;
|
||||
if (is_full) {
|
||||
log_queue->tail = log_queue->head;
|
||||
}
|
||||
|
||||
rb_native_cond_signal(&log_queue->cond);
|
||||
rb_nativethread_lock_unlock(&log_queue->lock);
|
||||
}
|
||||
|
||||
static void*
|
||||
ruby_whisper_log_queue_wait(void *args)
|
||||
{
|
||||
ruby_whisper_log_queue *log_queue = (ruby_whisper_log_queue *)args;
|
||||
|
||||
rb_native_cond_wait(&log_queue->cond, &log_queue->lock);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
ruby_whisper_log_queue_wait_ubf(void *args)
|
||||
{
|
||||
ruby_whisper_log_queue *log_queue = (ruby_whisper_log_queue *)args;
|
||||
|
||||
rb_nativethread_lock_lock(&log_queue->lock);
|
||||
|
||||
rb_native_cond_broadcast(&log_queue->cond);
|
||||
|
||||
rb_nativethread_lock_unlock(&log_queue->lock);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
enum ggml_log_level level;
|
||||
size_t length;
|
||||
char *text;
|
||||
} log_snapshot;
|
||||
|
||||
VALUE
|
||||
ruby_whisper_log_queue_drain(ruby_whisper_log_queue *log_queue)
|
||||
{
|
||||
log_snapshot logs[LOG_QUEUE_CAPACITY];
|
||||
|
||||
rb_nativethread_lock_lock(&log_queue->lock);
|
||||
|
||||
while (log_queue->size == 0 && log_queue->is_active) {
|
||||
rb_thread_call_without_gvl(ruby_whisper_log_queue_wait, (void *)log_queue, ruby_whisper_log_queue_wait_ubf, (void *)log_queue);
|
||||
}
|
||||
|
||||
if (log_queue->size == 0 && !log_queue->is_active) {
|
||||
rb_native_cond_broadcast(&log_queue->cond);
|
||||
rb_nativethread_lock_unlock(&log_queue->lock);
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
size_t size = log_queue->size;
|
||||
ruby_whisper_log *log;
|
||||
size_t i;
|
||||
for (i = 0; i < size; i++) {
|
||||
log = &log_queue->logs[(log_queue->tail + i) % LOG_QUEUE_CAPACITY];
|
||||
logs[i].level = log->level;
|
||||
logs[i].length = log->length;
|
||||
char *text = malloc(log->length);
|
||||
if (!text) {
|
||||
logs[i].text = NULL;
|
||||
continue;
|
||||
}
|
||||
logs[i].text = text;
|
||||
memcpy(logs[i].text, log->text, log->length);
|
||||
}
|
||||
log_queue->size = 0;
|
||||
log_queue->tail = log_queue->head;
|
||||
|
||||
rb_native_cond_signal(&log_queue->cond);
|
||||
|
||||
rb_nativethread_lock_unlock(&log_queue->lock);
|
||||
|
||||
VALUE rb_logs = rb_ary_new2(size);
|
||||
VALUE rb_text;
|
||||
for (i = 0; i < size; i++) {
|
||||
if (!logs[i].text) {
|
||||
continue;
|
||||
}
|
||||
rb_text = rb_str_new(logs[i].text, logs[i].length);
|
||||
free(logs[i].text);
|
||||
rb_ary_push(rb_logs, rb_ary_new3(2, INT2NUM(logs[i].level), rb_text));
|
||||
}
|
||||
|
||||
return rb_logs;
|
||||
}
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
#include "ruby_whisper.h"
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
extern VALUE mParakeet;
|
||||
|
||||
extern void ruby_whisper_log_queue_initialize(ruby_whisper_log_queue *log_queue);
|
||||
extern void ruby_whisper_log_queue_enqueue(ruby_whisper_log_queue *log_queue, enum ggml_log_level level, const char *text);
|
||||
extern VALUE ruby_whisper_log_queue_drain(ruby_whisper_log_queue *log_queue);
|
||||
|
||||
ID id_start_log_callback_thread;
|
||||
|
||||
static ruby_whisper_log_queue parakeet_log_queue;
|
||||
|
||||
static VALUE
|
||||
ruby_whisper_parakeet_s_drain_logs(VALUE self)
|
||||
{
|
||||
return ruby_whisper_log_queue_drain(¶keet_log_queue);
|
||||
}
|
||||
|
||||
static void
|
||||
ruby_whisper_parakeet_log_callback(enum ggml_log_level level, const char *text, void *user_data)
|
||||
{
|
||||
ruby_whisper_log_queue_enqueue(¶keet_log_queue, level, text);
|
||||
}
|
||||
|
||||
static VALUE
|
||||
ruby_whisper_parakeet_s_log_set(VALUE self, VALUE log_callback, VALUE user_data)
|
||||
{
|
||||
if (NIL_P(log_callback)) {
|
||||
parakeet_log_set(NULL, NULL);
|
||||
} else {
|
||||
rb_iv_set(self, "@log_callback", log_callback);
|
||||
rb_iv_set(self, "@log_callback_user_data", user_data);
|
||||
parakeet_log_queue.is_active = true;
|
||||
rb_funcall(mParakeet, id_start_log_callback_thread, 0);
|
||||
parakeet_log_set(ruby_whisper_parakeet_log_callback, NULL);
|
||||
}
|
||||
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
static void
|
||||
ruby_whisper_parakeet_end_proc(VALUE args)
|
||||
{
|
||||
ID id_log_callback_thread = rb_intern("@log_callback_thread");
|
||||
ID id_alive = rb_intern("alive?");
|
||||
ID id_join = rb_intern("join");
|
||||
|
||||
rb_nativethread_lock_lock(¶keet_log_queue.lock);
|
||||
|
||||
parakeet_log_queue.is_active = false;
|
||||
rb_native_cond_broadcast(¶keet_log_queue.cond);
|
||||
|
||||
rb_nativethread_lock_unlock(¶keet_log_queue.lock);
|
||||
|
||||
VALUE log_callback_thread = rb_ivar_get(mParakeet, id_log_callback_thread);
|
||||
if (!NIL_P(log_callback_thread) && RTEST(rb_funcall(log_callback_thread, id_alive, 0))) {
|
||||
rb_funcall(log_callback_thread, id_join, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
init_ruby_whisper_parakeet()
|
||||
{
|
||||
id_start_log_callback_thread = rb_intern("start_log_callback_thread");
|
||||
|
||||
ruby_whisper_log_queue_initialize(¶keet_log_queue);
|
||||
|
||||
rb_define_singleton_method(mParakeet, "log_set", ruby_whisper_parakeet_s_log_set, 2);
|
||||
rb_define_private_method(rb_singleton_class(mParakeet), "drain_logs", ruby_whisper_parakeet_s_drain_logs, 0);
|
||||
|
||||
rb_set_end_proc(ruby_whisper_parakeet_end_proc, Qnil);
|
||||
rb_require("whisper/parakeet");
|
||||
rb_funcall(mParakeet, id_start_log_callback_thread, 0);
|
||||
}
|
||||
|
|
@ -49,6 +49,19 @@ ruby_whisper_parakeet_context_allocate(VALUE klass)
|
|||
return obj;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
struct parakeet_context **context;
|
||||
char *model_path;
|
||||
} ruby_whisper_parakeet_context_init_args;
|
||||
|
||||
static void*
|
||||
ruby_whisper_parakeet_context_init_without_gvl(void *args)
|
||||
{
|
||||
ruby_whisper_parakeet_context_init_args *init_args = (ruby_whisper_parakeet_context_init_args *)args;
|
||||
*init_args->context = parakeet_init_from_file_with_params(init_args->model_path, parakeet_context_default_params());
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
ruby_whisper_parakeet_context_initialize(int argc, VALUE *argv, VALUE self)
|
||||
{
|
||||
|
|
@ -62,7 +75,11 @@ ruby_whisper_parakeet_context_initialize(int argc, VALUE *argv, VALUE self)
|
|||
if (!rb_respond_to(model_path, id_to_s)) {
|
||||
rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Parakeet::Context");
|
||||
}
|
||||
rwpc->context = parakeet_init_from_file_with_params(StringValueCStr(model_path), parakeet_context_default_params());
|
||||
ruby_whisper_parakeet_context_init_args init_args = {
|
||||
&rwpc->context,
|
||||
StringValueCStr(model_path),
|
||||
};
|
||||
rb_thread_call_without_gvl(ruby_whisper_parakeet_context_init_without_gvl, (void *)&init_args, NULL, NULL);
|
||||
if (rwpc->context == NULL) {
|
||||
rb_raise(rb_eRuntimeError, "Failed to load model");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,9 +24,7 @@ enum {
|
|||
ITERATE_PARAMS(DEF_IDX)
|
||||
ITERATE_CALLBACK_PARAMS(DEF_IDX_CALLBACK)
|
||||
ITERATE_CALLBACK_PARAMS(DEF_IDX_USER_DATA)
|
||||
#undef DEF_IDX
|
||||
#undef DEF_IDX_CALLBACK
|
||||
#undef DEF_IDX_USER_DATA
|
||||
|
||||
RUBY_WHISPER_PARAKEET_NUM_PARAMS
|
||||
};
|
||||
|
||||
|
|
@ -69,8 +67,6 @@ ruby_whisper_parakeet_params_mark(void *p)
|
|||
#define MARK_CONTAINER(name) ruby_whisper_callback_container_mark(rwpp->name##_container);
|
||||
|
||||
ITERATE_CALLBACK_PARAMS(MARK_CONTAINER)
|
||||
|
||||
#undef MARK_CONTAINER
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -84,9 +80,7 @@ ruby_whisper_parakeet_params_free(void *p)
|
|||
xfree(rwpp->name##_container); \
|
||||
}
|
||||
|
||||
ITERATE_CALLBACK_PARAMS(FREE_CONTAINER)
|
||||
|
||||
#undef FREE_CONTAINER
|
||||
ITERATE_CALLBACK_PARAMS(FREE_CONTAINER)
|
||||
}
|
||||
|
||||
static size_t
|
||||
|
|
@ -202,9 +196,7 @@ ruby_whisper_parakeet_params_initialize(int argc, VALUE *argv, VALUE self)
|
|||
|
||||
#define INIT_CONTAINER(name) rwpp->name##_container = ruby_whisper_callback_container_allocate();
|
||||
|
||||
ITERATE_CALLBACK_PARAMS(INIT_CONTAINER)
|
||||
|
||||
#undef INIT_CONTAINER
|
||||
ITERATE_CALLBACK_PARAMS(INIT_CONTAINER)
|
||||
|
||||
rb_scan_args_kw(RB_SCAN_ARGS_KEYWORDS, argc, argv, ":", &kw_hash);
|
||||
if (NIL_P(kw_hash)) {
|
||||
|
|
@ -252,27 +244,4 @@ init_ruby_whisper_parakeet_params(VALUE *mParakeet)
|
|||
rb_define_method(cParakeetParams, "on_" #name, ruby_whisper_parakeet_params_on_##name, 0);
|
||||
|
||||
ITERATE_CALLBACK_PARAMS(REGISTER_HOOK)
|
||||
|
||||
#undef REGISTER_PARAM
|
||||
#undef REGISTER_PARAM_ATTR
|
||||
#undef REGISTER_CALLBACK_PARAM_ATTR
|
||||
#undef REGISTER_USER_DATA_PARAM_ATTR
|
||||
#undef REGISTER_HOOK
|
||||
}
|
||||
|
||||
#undef VAL_TO_INT
|
||||
#undef VAL_FROM_INT
|
||||
#undef VAL_TO_BOOL
|
||||
#undef VAL_FROM_BOOL
|
||||
#undef DEF_BOOL_PARAM_ATTR
|
||||
#undef DEF_INT_PARAM_ATTR
|
||||
#undef CALLBACK_CONTAINER_NAME
|
||||
#undef DEF_CALLBACK_PARAM_ATTR
|
||||
#undef DEF_USER_DATA_PARAM_ATTR
|
||||
#undef DEF_HOOK
|
||||
#undef READER
|
||||
#undef WRITER
|
||||
#undef DEF_PARAM_ATTR
|
||||
#undef DEF_PARAM_ATTR_I
|
||||
#undef ITERATE_PARAMS
|
||||
#undef ITERATE_CALLBACK_PARAMS
|
||||
|
|
|
|||
|
|
@ -10,8 +10,6 @@ enum {
|
|||
|
||||
ITERATE_ATTRS(DEF_IDX)
|
||||
RUBY_WHISPER_PARAKEET_SEGMENT_NUM_ATTRS,
|
||||
|
||||
#undef DEF_IDX
|
||||
};
|
||||
|
||||
#define VAL_FROM_TIME(v) (LONG2NUM((v) * 10))
|
||||
|
|
@ -48,15 +46,11 @@ ruby_whisper_parakeet_segment_memsize(const void *p)
|
|||
if (!rwps) {
|
||||
return 0;
|
||||
}
|
||||
size_t size = sizeof(*rwps);
|
||||
if (rwps->index) {
|
||||
size += sizeof(rwps->index);
|
||||
}
|
||||
return size;
|
||||
return sizeof(*rwps);
|
||||
}
|
||||
|
||||
static const rb_data_type_t ruby_whisper_parakeet_segment_type = {
|
||||
"ruby_whisper_segment",
|
||||
"ruby_whisper_parakeet_segment",
|
||||
{rb_whisper_parakeet_segment_mark, RUBY_DEFAULT_FREE, ruby_whisper_parakeet_segment_memsize,},
|
||||
0, 0,
|
||||
0
|
||||
|
|
@ -117,8 +111,6 @@ ruby_whisper_parakeet_segment_deconstruct_keys(VALUE self, VALUE keys)
|
|||
}
|
||||
|
||||
ITERATE_ATTRS(CHECK_AND_SET_KEY)
|
||||
|
||||
#undef CHECK_AND_SET_KEY
|
||||
}
|
||||
|
||||
return hash;
|
||||
|
|
@ -136,13 +128,5 @@ init_ruby_whisper_parakeet_segment(VALUE *mParakeet)
|
|||
|
||||
ITERATE_ATTRS(REGISTER_ATTR)
|
||||
|
||||
#undef REGISTER_ATTR
|
||||
|
||||
rb_define_method(cParakeetSegment, "deconstruct_keys", ruby_whisper_parakeet_segment_deconstruct_keys, 1);
|
||||
}
|
||||
|
||||
#undef DEF_ATTR
|
||||
#undef READER
|
||||
#undef VAL_FROM_STRING
|
||||
#undef VAL_FROM_TIME
|
||||
#undef ITERATE_ATTRS
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ extern ID id_new;
|
|||
|
||||
extern VALUE eError;
|
||||
|
||||
static struct transcribe_without_gvl_args {
|
||||
typedef struct transcribe_without_gvl_args {
|
||||
struct parakeet_context *context;
|
||||
struct parakeet_full_params params;
|
||||
float *samples;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,32 @@
|
|||
require "mutex_m"
|
||||
|
||||
module Whisper
|
||||
module Parakeet
|
||||
extend Mutex_m
|
||||
|
||||
class << self
|
||||
def start_log_callback_thread
|
||||
return if @log_callback_thread&.alive?
|
||||
|
||||
@log_callback_thread = Thread.new {
|
||||
begin
|
||||
while logs = drain_logs
|
||||
begin
|
||||
callback, user_data = synchronize {[@log_callback, @log_callback_user_data]}
|
||||
next if callback.nil?
|
||||
|
||||
logs.each do |(level, text)|
|
||||
callback.call level, text, user_data
|
||||
end
|
||||
rescue => err
|
||||
$stderr.puts err
|
||||
end
|
||||
end
|
||||
rescue => err
|
||||
$stderr.puts err
|
||||
end
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
require_relative "helper"
|
||||
require "stringio"
|
||||
|
||||
class TestParakeet < TestBase
|
||||
def test_log_set
|
||||
log_callback = Parakeet.instance_variable_get("@log_callback")
|
||||
user_data = Parakeet.instance_variable_get("@log_callback_user_data")
|
||||
|
||||
$stdout = StringIO.new
|
||||
Parakeet.log_set proc {|level, message, _| puts [level, message].join(": ")}, nil
|
||||
Parakeet::Context.new(File.join(__dir__, "../../../models/parakeet-tdt-0.6b-v3.bin"))
|
||||
sleep 0.1
|
||||
$stdout.rewind
|
||||
logs = $stdout.string
|
||||
assert_match /loading model from/, logs
|
||||
ensure
|
||||
$stdout = STDOUT
|
||||
Parakeet.log_set log_callback, user_data
|
||||
end
|
||||
end
|
||||
|
|
@ -1,7 +1,11 @@
|
|||
require_relative "helper"
|
||||
require "stringio"
|
||||
|
||||
class TestParakeetContext < TestBase
|
||||
def setup
|
||||
Whisper.instance_variable_set "@whisper", nil
|
||||
GC.start
|
||||
|
||||
@parakeet = Parakeet::Context.new(File.join(__dir__, "../../../models/parakeet-tdt-0.6b-v3.bin"))
|
||||
@params = Parakeet::Params.new
|
||||
end
|
||||
|
|
|
|||
Loading…
Reference in New Issue