From 3e855f63b2746e641185d6a0d5f0a6d7e5e7c778 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sun, 22 Mar 2026 02:51:31 +0900 Subject: [PATCH 01/29] Change MemoryView example using NDAV --- bindings/ruby/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bindings/ruby/README.md b/bindings/ruby/README.md index 41e7b330..b8210b1e 100644 --- a/bindings/ruby/README.md +++ b/bindings/ruby/README.md @@ -383,16 +383,16 @@ If you can prepare audio data as C array and export it as a MemoryView, whisperc ```ruby require "torchaudio" -require "arrow-numo-narray" +require "ndav/torch/tensor" require "whisper" waveform, sample_rate = TorchAudio.load("test/fixtures/jfk.wav") -# Convert Torch::Tensor to Arrow::Array via Numo::NArray -samples = waveform.squeeze.numo.to_arrow.to_arrow_array +# Convert Torch::Tensor to NDAV +samples = waveform.squeeze.to_ndav whisper = Whisper::Context.new("base") whisper - # Arrow::Array exports MemoryView + # NDAV exports MemoryView .full(Whisper::Params.new, samples) ``` From cba11bbbbf32c78ba179fe4f10f3b28735f1db6b Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sun, 22 Mar 2026 03:16:43 +0900 Subject: [PATCH 02/29] Add note on audio attributes for #full and #full_parallel --- bindings/ruby/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/ruby/README.md b/bindings/ruby/README.md index b8210b1e..07b81830 100644 --- a/bindings/ruby/README.md +++ b/bindings/ruby/README.md @@ -360,7 +360,7 @@ Whisper::Context.new("base") ### Low-level API to transcribe ### -You can also call `Whisper::Context#full` and `#full_parallel` with a Ruby array as samples. Although `#transcribe` with audio file path is recommended because it extracts PCM samples in C++ and is fast, `#full` and `#full_parallel` give you flexibility. +You can also call `Whisper::Context#full` and `#full_parallel` with a Ruby array as samples. Although `#transcribe` with audio file path is recommended because it extracts PCM samples in C++ and is fast, `#full` and `#full_parallel` give you flexibility. Unlike `#transcribe`, these methods requires 16,000 Hz, 32-bit float audio. ```ruby require "whisper" From cfdeaa25741596b9e0e4fdcd319e44b1ba7a7600 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Fri, 3 Apr 2026 07:31:33 +0900 Subject: [PATCH 03/29] Support more variants of MemoryView --- bindings/ruby/ext/ruby_whisper_context.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bindings/ruby/ext/ruby_whisper_context.c b/bindings/ruby/ext/ruby_whisper_context.c index 6e38ead6..8f1cacdd 100644 --- a/bindings/ruby/ext/ruby_whisper_context.c +++ b/bindings/ruby/ext/ruby_whisper_context.c @@ -304,11 +304,13 @@ VALUE ruby_whisper_model_type(VALUE self) static bool check_memory_view(rb_memory_view_t *memview) { - if (memview->format != NULL && strcmp(memview->format, "f") != 0) { - rb_warn("currently only format \"f\" is supported for MemoryView, but given: %s", memview->format); + if (memview->format != NULL && strcmp(memview->format, "f") != 0 && strcmp(memview->format, "e") != 0) { + // TODO: Accept other formats and convert samples + rb_warn("currently only format \"f\" and \"e\" is supported for MemoryView, but given: %s", memview->format); return false; } - if (memview->format != NULL && memview->ndim != 1) { + if (memview->format != NULL && memview->ndim != 1 && !(memview->ndim == 2 && memview->shape[1] == 1)) { + // TODO: Accept ndim == 2 with shape [n_samples, channels] and channels > 1 by averaging the samples in different channels or just taking the first channel rb_warn("currently only 1 dimensional MemoryView is supported, but given: %zd", memview->ndim); return false; } From ea34b179975efd3d470294cb2fca70c897ecec98 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Wed, 15 Apr 2026 13:19:55 +0900 Subject: [PATCH 04/29] Use IO.popen instead of Kernel.` for Windows compatibility --- bindings/ruby/ext/options.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/ruby/ext/options.rb b/bindings/ruby/ext/options.rb index ede80c06..0b2f9eb7 100644 --- a/bindings/ruby/ext/options.rb +++ b/bindings/ruby/ext/options.rb @@ -18,7 +18,7 @@ class Options output = nil Dir.chdir __dir__ do - output = `#{@cmake.shellescape} -S sources -B build -L` + output = IO.popen([@cmake, "-S", "sources", "-B", "build", "-L"]).read end @cmake_options = output.lines.drop_while {|line| line.chomp != "-- Cache values"}.drop(1) .filter_map {|line| From 9ca31b69816c2ec1cc2b554696f923ed5a35f87b Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Wed, 15 Apr 2026 14:08:45 +0900 Subject: [PATCH 05/29] Use cmake's -C option instead of multiple -D options --- bindings/ruby/ext/dependencies.rb | 3 +-- bindings/ruby/ext/extconf.rb | 4 ++-- bindings/ruby/ext/options.rb | 28 +++++++++++++++++++++++----- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/bindings/ruby/ext/dependencies.rb b/bindings/ruby/ext/dependencies.rb index 2ba4b94b..e77ac0c4 100644 --- a/bindings/ruby/ext/dependencies.rb +++ b/bindings/ruby/ext/dependencies.rb @@ -36,8 +36,7 @@ class Dependencies end def generate_dot - args = ["-S", "sources", "-B", "build", "--graphviz", dot_path, "-D", "BUILD_SHARED_LIBS=OFF"] - args << @options.to_s unless @options.to_s.empty? + args = ["-S", "sources", "-B", "build", "--graphviz", dot_path, "-D", "BUILD_SHARED_LIBS=OFF", "-C", @options.cache_path] system @cmake, *args, exception: true end diff --git a/bindings/ruby/ext/extconf.rb b/bindings/ruby/ext/extconf.rb index acff501a..ce9ffc0e 100644 --- a/bindings/ruby/ext/extconf.rb +++ b/bindings/ruby/ext/extconf.rb @@ -3,7 +3,7 @@ require_relative "options" require_relative "dependencies" cmake = find_executable("cmake") || abort -options = Options.new(cmake).to_s +options = Options.new(cmake) have_library("gomp") rescue nil libs = Dependencies.new(cmake, options).to_s @@ -17,7 +17,7 @@ create_makefile "whisper" do |conf| $(TARGET_SO): #{libs} #{libs}: cmake-targets cmake-targets: - #{"\t"}#{cmake} -S sources -B build -D BUILD_SHARED_LIBS=OFF -D CMAKE_ARCHIVE_OUTPUT_DIRECTORY=#{__dir__} -D CMAKE_POSITION_INDEPENDENT_CODE=ON #{options} + #{"\t"}#{cmake} -S sources -B build -D BUILD_SHARED_LIBS=OFF -D CMAKE_ARCHIVE_OUTPUT_DIRECTORY=#{__dir__} -D CMAKE_POSITION_INDEPENDENT_CODE=ON -C #{options.cache_path} #{"\t"}#{cmake} --build build --config Release --target common whisper EOF end diff --git a/bindings/ruby/ext/options.rb b/bindings/ruby/ext/options.rb index 0b2f9eb7..09baf9b2 100644 --- a/bindings/ruby/ext/options.rb +++ b/bindings/ruby/ext/options.rb @@ -1,16 +1,16 @@ +require "fileutils" + class Options def initialize(cmake="cmake") @cmake = cmake @options = {} configure + write_cache_file end - def to_s - @options - .reject {|name, (type, value)| value.nil?} - .collect {|name, (type, value)| "-D #{name}=#{value == true ? "ON" : value == false ? "OFF" : value.shellescape}"} - .join(" ") + def cache_path + File.join(__dir__, "source", "Options.cmake") end def cmake_options @@ -82,4 +82,22 @@ class Options op[1] end end + + def write_cache_file + FileUtils.mkpath File.dirname(cache_path) + File.open cache_path, "w" do |file| + @options.reject {|name, (type, value)| value.nil?}.each do |name, (type, value)| + line = "set(CACHE{%s} TYPE %s FORCE VALUE %s)" % { + name:, + type:, + value: value == true ? "ON" : value == false ? "OFF" : escape_cmake(value) + } + file.puts line + end + end + end + + def escape_cmake(str) + str.gsub(/([\\"])/, '\\\\\1') + end end From 3ee810047e69df5d8ba9f48386290806d97f4878 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Mon, 20 Apr 2026 05:51:16 +0900 Subject: [PATCH 06/29] Fix memsize calculation --- bindings/ruby/ext/ruby_whisper_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/ruby/ext/ruby_whisper_context.c b/bindings/ruby/ext/ruby_whisper_context.c index 8f1cacdd..eb1c102a 100644 --- a/bindings/ruby/ext/ruby_whisper_context.c +++ b/bindings/ruby/ext/ruby_whisper_context.c @@ -74,7 +74,7 @@ static size_t ruby_whisper_memsize(const void *p) { const ruby_whisper *rw = (const ruby_whisper *)p; - size_t size = sizeof(rw); + size_t size = sizeof(*rw); if (!rw) { return 0; } From 080d0d3658ed4a0cdd8874008e0fd22a89938310 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Wed, 22 Apr 2026 08:30:41 +0900 Subject: [PATCH 07/29] Remove unused argument --- bindings/ruby/ext/ruby_whisper_params.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bindings/ruby/ext/ruby_whisper_params.c b/bindings/ruby/ext/ruby_whisper_params.c index 3e5dca9c..0fedadab 100644 --- a/bindings/ruby/ext/ruby_whisper_params.c +++ b/bindings/ruby/ext/ruby_whisper_params.c @@ -188,7 +188,7 @@ static bool abort_callback(void * user_data) { } static void -check_thread_safety(ruby_whisper_params *rwp, VALUE *context, int n_processors) +check_thread_safety(ruby_whisper_params *rwp, int n_processors) { if (n_processors == 1) { return; @@ -255,7 +255,7 @@ static void set_vad_params(ruby_whisper_params *rwp) void prepare_transcription(ruby_whisper_params *rwp, VALUE *context, int n_processors) { - check_thread_safety(rwp, context, n_processors); + check_thread_safety(rwp, n_processors); register_callbacks(rwp, context); set_vad_params(rwp); } From d7301aa79389ff7586594c3b2451bec4d3d9de7c Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Wed, 22 Apr 2026 09:04:40 +0900 Subject: [PATCH 08/29] Add is_interrupted field to abort callback container --- bindings/ruby/ext/ruby_whisper.h | 10 +++++- bindings/ruby/ext/ruby_whisper_params.c | 42 ++++++++++++++++++++----- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/bindings/ruby/ext/ruby_whisper.h b/bindings/ruby/ext/ruby_whisper.h index 6b0b4df7..261d305e 100644 --- a/bindings/ruby/ext/ruby_whisper.h +++ b/bindings/ruby/ext/ruby_whisper.h @@ -13,6 +13,14 @@ typedef struct { VALUE callbacks; } ruby_whisper_callback_container; +typedef struct { + VALUE *context; + VALUE user_data; + VALUE callback; + VALUE callbacks; + bool is_interrupted; +} ruby_whisper_abort_callback_container; + typedef struct { struct whisper_context *context; } ruby_whisper; @@ -27,7 +35,7 @@ typedef struct { ruby_whisper_callback_container *new_segment_callback_container; ruby_whisper_callback_container *progress_callback_container; ruby_whisper_callback_container *encoder_begin_callback_container; - ruby_whisper_callback_container *abort_callback_container; + ruby_whisper_abort_callback_container *abort_callback_container; VALUE vad_params; } ruby_whisper_params; diff --git a/bindings/ruby/ext/ruby_whisper_params.c b/bindings/ruby/ext/ruby_whisper_params.c index 0fedadab..c0d2029a 100644 --- a/bindings/ruby/ext/ruby_whisper_params.c +++ b/bindings/ruby/ext/ruby_whisper_params.c @@ -97,6 +97,28 @@ rb_whisper_callback_container_allocate() { return container; } +static void +rb_whisper_abort_callback_container_mark(ruby_whisper_abort_callback_container *rwc) +{ + if (rwc == NULL) return; + + rb_gc_mark(rwc->user_data); + rb_gc_mark(rwc->callback); + rb_gc_mark(rwc->callbacks); +} + +static ruby_whisper_abort_callback_container* +rb_whisper_abort_callback_container_allocate() { + ruby_whisper_abort_callback_container *container; + container = ALLOC(ruby_whisper_abort_callback_container); + container->context = NULL; + container->user_data = Qnil; + container->callback = Qnil; + container->callbacks = rb_ary_new(); + container->is_interrupted = false; + return container; +} + static void new_segment_callback(struct whisper_context *ctx, struct whisper_state *state, int n_new, void *user_data) { const ruby_whisper_callback_container *container = (ruby_whisper_callback_container *)user_data; @@ -166,7 +188,12 @@ static bool encoder_begin_callback(struct whisper_context *ctx, struct whisper_s } static bool abort_callback(void * user_data) { - const ruby_whisper_callback_container *container = (ruby_whisper_callback_container *)user_data; + const ruby_whisper_abort_callback_container *container = (ruby_whisper_abort_callback_container *)user_data; + + if (container->is_interrupted) { + return true; + } + if (!NIL_P(container->callback)) { VALUE result = rb_funcall(container->callback, id_call, 1, container->user_data); if (!NIL_P(result) && Qfalse != result) { @@ -235,11 +262,10 @@ static void register_callbacks(ruby_whisper_params * rwp, VALUE * context) { rwp->params.encoder_begin_callback_user_data = rwp->encoder_begin_callback_container; } - if (!NIL_P(rwp->abort_callback_container->callback) || 0 != RARRAY_LEN(rwp->abort_callback_container->callbacks)) { - rwp->abort_callback_container->context = context; - rwp->params.abort_callback = abort_callback; - rwp->params.abort_callback_user_data = rwp->abort_callback_container; - } + rwp->abort_callback_container->context = context; + rwp->params.abort_callback = abort_callback; + rwp->abort_callback_container->is_interrupted = false; + rwp->params.abort_callback_user_data = rwp->abort_callback_container; } static void set_vad_params(ruby_whisper_params *rwp) @@ -267,7 +293,7 @@ rb_whisper_params_mark(void *p) rb_whisper_callbcack_container_mark(rwp->new_segment_callback_container); rb_whisper_callbcack_container_mark(rwp->progress_callback_container); rb_whisper_callbcack_container_mark(rwp->encoder_begin_callback_container); - rb_whisper_callbcack_container_mark(rwp->abort_callback_container); + rb_whisper_abort_callback_container_mark(rwp->abort_callback_container); rb_gc_mark(rwp->vad_params); } @@ -338,7 +364,7 @@ ruby_whisper_params_allocate(VALUE klass) rwp->new_segment_callback_container = rb_whisper_callback_container_allocate(); rwp->progress_callback_container = rb_whisper_callback_container_allocate(); rwp->encoder_begin_callback_container = rb_whisper_callback_container_allocate(); - rwp->abort_callback_container = rb_whisper_callback_container_allocate(); + rwp->abort_callback_container = rb_whisper_abort_callback_container_allocate(); return obj; } From b89917298ecec6d6bf89380872edc69a4f94e659 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Thu, 23 Apr 2026 06:03:35 +0900 Subject: [PATCH 09/29] Fix RBS syntax --- bindings/ruby/sig/whisper.rbs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bindings/ruby/sig/whisper.rbs b/bindings/ruby/sig/whisper.rbs index 3c596619..f85fd098 100644 --- a/bindings/ruby/sig/whisper.rbs +++ b/bindings/ruby/sig/whisper.rbs @@ -5,10 +5,10 @@ module Whisper end type log_callback = ^(Integer level, String message, Object user_data) -> void - type new_segment_callback = ^(Whisper::Context, void, Integer n_new, Object user_data) -> void - type progress_callback = ^(Whisper::Context, void, Integer progress, Object user_data) -> void - type encoder_begin_callback = ^(Whisper::Context, void, Object user_data) -> void - type abort_callback = ^(Whisper::Context, void, Object user_data) -> boolish + type new_segment_callback = ^(Whisper::Context, untyped, Integer n_new, Object user_data) -> void + type progress_callback = ^(Whisper::Context, untyped, Integer progress, Object user_data) -> void + type encoder_begin_callback = ^(Whisper::Context, untyped, Object user_data) -> void + type abort_callback = ^(Whisper::Context, untyped, Object user_data) -> boolish VERSION: String LOG_LEVEL_NONE: Integer From 63caba664d837d7a9b9626006de22c5721ec2539 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Thu, 23 Apr 2026 07:07:35 +0900 Subject: [PATCH 10/29] Address document comment for RDoc --- bindings/ruby/sig/whisper.rbs | 78 +++++++++++++++++------------------ 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/bindings/ruby/sig/whisper.rbs b/bindings/ruby/sig/whisper.rbs index f85fd098..94bee4b5 100644 --- a/bindings/ruby/sig/whisper.rbs +++ b/bindings/ruby/sig/whisper.rbs @@ -52,7 +52,7 @@ module Whisper # puts text # end # - # If n_processors is greater than 1, you cannot set any callbacks including + # If `n_processors` is greater than 1, you cannot set any callbacks including # new_segment_callback, progress_callback, encoder_begin_callback, abort_callback, # and log_callback set by Whisper.log_set def transcribe: (path, Params, ?n_processors: Integer) -> self @@ -74,7 +74,7 @@ module Whisper # puts segment.text # end # - # Returns an Enumerator if no block given: + # Returns an `Enumerator` if no block given: # # whisper.transcribe("path/to/audio.wav", params) # enum = whisper.each_segment @@ -91,25 +91,25 @@ module Whisper # def full_lang_id: () -> Integer - # Start time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds). + # Start time of a segment indexed by `segment_index` in centiseconds (10 times milliseconds). # # full_get_segment_t0(3) # => 1668 (16680 ms) # def full_get_segment_t0: (Integer) -> Integer - # End time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds). + # End time of a segment indexed by `segment_index` in centiseconds (10 times milliseconds). # # full_get_segment_t1(3) # => 1668 (16680 ms) # def full_get_segment_t1: (Integer) -> Integer - # Whether the next segment indexed by +segment_index+ is predicated as a speaker turn. + # Whether the next segment indexed by `segment_index` is predicated as a speaker turn. # # full_get_segment_speacker_turn_next(3) # => true # def full_get_segment_speaker_turn_next: (Integer) -> (true | false) - # Text of a segment indexed by +segment_index+. + # Text of a segment indexed by `segment_index`. # # full_get_segment_text(3) # => "ask not what your country can do for you, ..." # @@ -117,22 +117,22 @@ module Whisper def full_get_segment_no_speech_prob: (Integer) -> Float - # Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text - # Not thread safe for same context + # Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text + # Not thread safe for same context # Uses the specified decoding strategy to obtain the text. # - # The second argument +samples+ must be an array of samples, respond to :length, or be a MemoryView of an array of float. It must be 32 bit float PCM audio data. + # The second argument `samples` must be an array of samples, respond to `:length`, or be a MemoryView of an array of float. It must be 32 bit float PCM audio data. # def full: (Params, Array[Float] samples, ?Integer n_samples) -> self | (Params, _Samples, ?Integer n_samples) -> self - # Split the input audio in chunks and process each chunk separately using whisper_full_with_state() - # Result is stored in the default state of the context - # Not thread safe if executed in parallel on the same context. - # It seems this approach can offer some speedup in some cases. + # Split the input audio in chunks and process each chunk separately using `whisper_full_with_state()` + # Result is stored in the default state of the context + # Not thread safe if executed in parallel on the same context. + # It seems this approach can offer some speedup in some cases. # However, the transcription accuracy can be worse at the beginning and end of each chunk. # - # If n_processors is greater than 1, you cannot set any callbacks including + # If `n_processors` is greater than 1, you cannot set any callbacks including # new_segment_callback, progress_callback, encoder_begin_callback, abort_callback, # and log_callback set by Whisper.log_set def full_parallel: (Params, Array[Float], ?Integer n_samples) -> self @@ -217,35 +217,35 @@ module Whisper def translate: () -> (true | false) def no_context=: (boolish) -> boolish - # If true, does not use past transcription (if any) as initial prompt for the decoder. + # If `true`, does not use past transcription (if any) as initial prompt for the decoder. # def no_context: () -> (true | false) def single_segment=: (boolish) -> boolish - # If true, forces single segment output (useful for streaming). + # If `true`, forces single segment output (useful for streaming). # def single_segment: () -> (true | false) def print_special=: (boolish) -> boolish - # If true, prints special tokens (e.g. , , , etc.). + # If `true`, prints special tokens (e.g. , , , etc.). # def print_special: () -> (true | false) def print_progress=: (boolish) -> boolish - # If true, prints progress information. + # If `true`, prints progress information. # def print_progress: () -> (true | false) def print_realtime=: (boolish) -> boolish - # If true, prints results from within whisper.cpp. (avoid it, use callback instead) + # If `true`, prints results from within whisper.cpp. (avoid it, use callback instead) # def print_realtime: () -> (true | false) - # If true, prints timestamps for each text segment when printing realtime. + # If `true`, prints timestamps for each text segment when printing realtime. # def print_timestamps=: (boolish) -> boolish @@ -253,19 +253,19 @@ module Whisper def suppress_blank=: (boolish) -> boolish - # If true, suppresses blank outputs. + # If `true`, suppresses blank outputs. # def suppress_blank: () -> (true | false) def suppress_nst=: (boolish) -> boolish - # If true, suppresses non-speech-tokens. + # If `true`, suppresses non-speech-tokens. # def suppress_nst: () -> (true | false) def token_timestamps=: (boolish) -> boolish - # If true, enables token-level timestamps. + # If `true`, enables token-level timestamps. # def token_timestamps: () -> (true | false) @@ -277,16 +277,16 @@ module Whisper def split_on_word=: (boolish) -> boolish - # If true, split on word rather than on token (when used with max_len). + # If `true`, split on word rather than on token (when used with max_len). # def split_on_word: () -> (true | false) def initial_prompt=: (_ToS) -> _ToS def carry_initial_prompt=: (boolish) -> boolish - # Tokens to provide to the whisper decoder as initial prompt - # these are prepended to any existing text context from a previous call - # use whisper_tokenize() to convert text to tokens. + # Tokens to provide to the whisper decoder as initial prompt + # these are prepended to any existing text context from a previous call + # use whisper_tokenize() to convert text to tokens. # Maximum of whisper_n_text_ctx()/2 tokens are used (typically 224). # def initial_prompt: () -> (String | nil) @@ -294,7 +294,7 @@ module Whisper def diarize=: (boolish) -> boolish - # If true, enables diarization. + # If `true`, enables diarization. # def diarize: () -> (true | false) @@ -423,7 +423,7 @@ module Whisper # def on_new_segment: { (Segment) -> void } -> void - # Hook called on progress update. Yields each progress Integer between 0 and 100. + # Hook called on progress update. Yields each progress `Integer` between 0 and 100. # def on_progress: { (Integer progress) -> void } -> void @@ -431,7 +431,7 @@ module Whisper # def on_encoder_begin: { () -> void } -> void - # Call block to determine whether abort or not. Return +true+ when you want to abort. + # Call block to determine whether abort or not. Return `true` when you want to abort. # # params.abort_on do # if some_condition @@ -504,13 +504,13 @@ module Whisper # Yields each Whisper::Token: # - # whisper.each_segment.first.each_token do |token| - # p token - # end + # whisper.each_segment.first.each_token do |token| + # p token + # end # - # Returns an Enumerator if no block is given: + # Returns an `Enumerator` if no block is given: # - # whisper.each_segment.first.each_token.to_a # => [#, ...] + # whisper.each_segment.first.each_token.to_a # => [#, ...] # def each_token: { (Token) -> void } -> void | () -> Enumerator[Token] @@ -518,7 +518,7 @@ module Whisper def to_webvtt_cue: () -> String - # Possible keys: :start_time, :end_time, :text, :no_speech_prob, :speaker_turn_next + # Possible keys: `:start_time`, `:end_time`, `:text`, `:no_speech_prob`, `:speaker_turn_next` # # whisper.each_segment do |segment| # segment => {start_time:, end_time:, text:, no_speech_prob:, speaker_turn_next:} @@ -569,7 +569,7 @@ module Whisper # [EXPERIMENTAL] Token-level timestamps with DTW # - # Do not use if you haven't computed token-level timestamps with dtw. + # Do not use if you haven't computed token-level timestamps with dtw. # Roughly corresponds to the moment in audio in which the token was output. # def t_dtw: () -> Integer @@ -580,14 +580,14 @@ module Whisper # Start time of the token. # - # Token-level timestamp data. + # Token-level timestamp data. # Do not use if you haven't computed token-level timestamps. # def start_time: () -> Integer # End time of the token. # - # Token-level timestamp data. + # Token-level timestamp data. # Do not use if you haven't computed token-level timestamps. # def end_time: () -> Integer From 41adde52040b7c5e49ea63e71e726be0921f2565 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Thu, 23 Apr 2026 07:19:18 +0900 Subject: [PATCH 11/29] Add .document for RDoc --- bindings/ruby/.document | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 bindings/ruby/.document diff --git a/bindings/ruby/.document b/bindings/ruby/.document new file mode 100644 index 00000000..a8e9788f --- /dev/null +++ b/bindings/ruby/.document @@ -0,0 +1,3 @@ +README.md +LICENSE +sig From 142b23d83c0af517cc381a0d22aeb912499b9761 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Thu, 23 Apr 2026 07:27:35 +0900 Subject: [PATCH 12/29] Add .rdoc_options --- bindings/ruby/.rdoc_options | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 bindings/ruby/.rdoc_options diff --git a/bindings/ruby/.rdoc_options b/bindings/ruby/.rdoc_options new file mode 100644 index 00000000..cf14aa5f --- /dev/null +++ b/bindings/ruby/.rdoc_options @@ -0,0 +1,2 @@ +title: whispercpp +main_page: README.md From eac59e05825f09cf651affdbd527f35697a70138 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Thu, 23 Apr 2026 14:24:58 +0900 Subject: [PATCH 13/29] Run #full without GVL --- bindings/ruby/ext/ruby_whisper.h | 1 + bindings/ruby/ext/ruby_whisper_context.c | 42 ++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/bindings/ruby/ext/ruby_whisper.h b/bindings/ruby/ext/ruby_whisper.h index 261d305e..6a2d4585 100644 --- a/bindings/ruby/ext/ruby_whisper.h +++ b/bindings/ruby/ext/ruby_whisper.h @@ -3,6 +3,7 @@ #include #include +#include #include #include "whisper.h" diff --git a/bindings/ruby/ext/ruby_whisper_context.c b/bindings/ruby/ext/ruby_whisper_context.c index eb1c102a..e92605db 100644 --- a/bindings/ruby/ext/ruby_whisper_context.c +++ b/bindings/ruby/ext/ruby_whisper_context.c @@ -47,6 +47,18 @@ typedef struct full_parallel_args { int n_processors; } full_parallel_args; +typedef struct full_without_gvl_args { + struct whisper_context *context; + struct whisper_full_params *params; + float *samples; + int n_samples; + int result; +} full_without_gvl_args; + +typedef struct full_ubf_args { + ruby_whisper_abort_callback_container *abort_callback_container; +} full_ubf_args; + static void ruby_whisper_free(ruby_whisper *rw) { @@ -428,6 +440,22 @@ release_samples(VALUE rb_parsed_args) return Qnil; } +static void* +full_without_gvl(void *rb_args) +{ + full_without_gvl_args *args = (full_without_gvl_args *)rb_args; + args->result = whisper_full(args->context, *args->params, args->samples, args->n_samples); + return NULL; +} + +static void +full_ubf(void *rb_args) +{ + full_ubf_args *args = (full_ubf_args *)rb_args; + + args->abort_callback_container->is_interrupted = true; +} + static VALUE full_body(VALUE rb_args) { @@ -439,9 +467,19 @@ full_body(VALUE rb_args) TypedData_Get_Struct(*args->params, ruby_whisper_params, &ruby_whisper_params_type, rwp); prepare_transcription(rwp, args->context, 1); - int result = whisper_full(rw->context, rwp->params, args->samples, args->n_samples); - return INT2NUM(result); + struct full_without_gvl_args full_without_gvl_args = { + rw->context, + &rwp->params, + args->samples, + args->n_samples, + 0, + }; + full_ubf_args full_ubf_args = { + rwp->abort_callback_container, + }; + rb_thread_call_without_gvl(full_without_gvl, (void *)&full_without_gvl_args, full_ubf, (void *)&full_ubf_args); + return INT2NUM(full_without_gvl_args.result); } /* From 946980634f50dbdcde414cabc434f00ea9d97ce8 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Thu, 23 Apr 2026 14:41:28 +0900 Subject: [PATCH 14/29] Initialize callbacks with nil --- bindings/ruby/ext/ruby_whisper_params.c | 69 ++++++++++++++++++------- 1 file changed, 51 insertions(+), 18 deletions(-) diff --git a/bindings/ruby/ext/ruby_whisper_params.c b/bindings/ruby/ext/ruby_whisper_params.c index c0d2029a..1b234d54 100644 --- a/bindings/ruby/ext/ruby_whisper_params.c +++ b/bindings/ruby/ext/ruby_whisper_params.c @@ -93,7 +93,7 @@ rb_whisper_callback_container_allocate() { container->context = NULL; container->user_data = Qnil; container->callback = Qnil; - container->callbacks = rb_ary_new(); + container->callbacks = Qnil; return container; } @@ -114,11 +114,21 @@ rb_whisper_abort_callback_container_allocate() { container->context = NULL; container->user_data = Qnil; container->callback = Qnil; - container->callbacks = rb_ary_new(); + container->callbacks = Qnil; container->is_interrupted = false; return container; } +static bool +ruby_whisper_callback_container_is_present(const ruby_whisper_callback_container *container) { + return !NIL_P(container->callback) || !NIL_P(container->callbacks); +} + +static bool +abort_ruby_whisper_callback_container_is_present(ruby_whisper_abort_callback_container *container) { + return !NIL_P(container->callback) || !NIL_P(container->callbacks); +} + static void new_segment_callback(struct whisper_context *ctx, struct whisper_state *state, int n_new, void *user_data) { const ruby_whisper_callback_container *container = (ruby_whisper_callback_container *)user_data; @@ -127,6 +137,9 @@ static void new_segment_callback(struct whisper_context *ctx, struct whisper_sta if (!NIL_P(container->callback)) { rb_funcall(container->callback, id_call, 4, *container->context, Qnil, INT2NUM(n_new), container->user_data); } + if (NIL_P(container->callbacks)) { + return; + } const long callbacks_len = RARRAY_LEN(container->callbacks); if (0 == callbacks_len) { return; @@ -150,6 +163,9 @@ static void progress_callback(struct whisper_context *ctx, struct whisper_state if (!NIL_P(container->callback)) { rb_funcall(container->callback, id_call, 4, *container->context, Qnil, progress, container->user_data); } + if (NIL_P(container->callbacks)) { + return; + } const long callbacks_len = RARRAY_LEN(container->callbacks); if (0 == callbacks_len) { return; @@ -173,15 +189,17 @@ static bool encoder_begin_callback(struct whisper_context *ctx, struct whisper_s is_aborted = true; } } - const long callbacks_len = RARRAY_LEN(container->callbacks); - if (0 == callbacks_len) { - return !is_aborted; - } - for (int j = 0; j < callbacks_len; j++) { - VALUE cb = rb_ary_entry(container->callbacks, j); - result = rb_funcall(cb, id_call, 0); - if (result == Qfalse) { - is_aborted = true; + if (!NIL_P(container->callbacks)) { + const long callbacks_len = RARRAY_LEN(container->callbacks); + if (0 == callbacks_len) { + return !is_aborted; + } + for (int j = 0; j < callbacks_len; j++) { + VALUE cb = rb_ary_entry(container->callbacks, j); + result = rb_funcall(cb, id_call, 0); + if (result == Qfalse) { + is_aborted = true; + } } } return !is_aborted; @@ -200,6 +218,9 @@ static bool abort_callback(void * user_data) { return true; } } + if (NIL_P(container->callbacks)) { + return false; + } const long callbacks_len = RARRAY_LEN(container->callbacks); if (0 == callbacks_len) { return false; @@ -221,19 +242,19 @@ check_thread_safety(ruby_whisper_params *rwp, int n_processors) return; } - if (!NIL_P(rwp->new_segment_callback_container->callback) || 0 != RARRAY_LEN(rwp->new_segment_callback_container->callbacks)) { + if (ruby_whisper_callback_container_is_present(rwp->new_segment_callback_container)) { rb_raise(rb_eRuntimeError, "new segment callback not supported on parallel transcription"); } - if (!NIL_P(rwp->progress_callback_container->callback) || 0 != RARRAY_LEN(rwp->progress_callback_container->callbacks)) { + if (ruby_whisper_callback_container_is_present(rwp->progress_callback_container)) { rb_raise(rb_eRuntimeError, "progress callback not supported on parallel transcription"); } - if (!NIL_P(rwp->encoder_begin_callback_container->callback) || 0 != RARRAY_LEN(rwp->encoder_begin_callback_container->callbacks)) { + if (ruby_whisper_callback_container_is_present(rwp->encoder_begin_callback_container)) { rb_raise(rb_eRuntimeError, "encoder begin callback not supported on parallel transcription"); } - if (!NIL_P(rwp->abort_callback_container->callback) || 0 != RARRAY_LEN(rwp->abort_callback_container->callbacks)) { + if (abort_ruby_whisper_callback_container_is_present(rwp->abort_callback_container)) { rb_raise(rb_eRuntimeError, "abort callback not supported on parallel transcription"); } @@ -244,19 +265,19 @@ check_thread_safety(ruby_whisper_params *rwp, int n_processors) } static void register_callbacks(ruby_whisper_params * rwp, VALUE * context) { - if (!NIL_P(rwp->new_segment_callback_container->callback) || 0 != RARRAY_LEN(rwp->new_segment_callback_container->callbacks)) { + if (ruby_whisper_callback_container_is_present(rwp->new_segment_callback_container)) { rwp->new_segment_callback_container->context = context; rwp->params.new_segment_callback = new_segment_callback; rwp->params.new_segment_callback_user_data = rwp->new_segment_callback_container; } - if (!NIL_P(rwp->progress_callback_container->callback) || 0 != RARRAY_LEN(rwp->progress_callback_container->callbacks)) { + if (ruby_whisper_callback_container_is_present(rwp->progress_callback_container)) { rwp->progress_callback_container->context = context; rwp->params.progress_callback = progress_callback; rwp->params.progress_callback_user_data = rwp->progress_callback_container; } - if (!NIL_P(rwp->encoder_begin_callback_container->callback) || 0 != RARRAY_LEN(rwp->encoder_begin_callback_container->callbacks)) { + if (ruby_whisper_callback_container_is_present(rwp->encoder_begin_callback_container)) { rwp->encoder_begin_callback_container->context = context; rwp->params.encoder_begin_callback = encoder_begin_callback; rwp->params.encoder_begin_callback_user_data = rwp->encoder_begin_callback_container; @@ -1328,6 +1349,9 @@ ruby_whisper_params_on_new_segment(VALUE self) ruby_whisper_params *rwp; TypedData_Get_Struct(self, ruby_whisper_params, &ruby_whisper_params_type, rwp); const VALUE blk = rb_block_proc(); + if (NIL_P(rwp->new_segment_callback_container->callbacks)) { + rwp->new_segment_callback_container->callbacks = rb_ary_new(); + } rb_ary_push(rwp->new_segment_callback_container->callbacks, blk); return Qnil; } @@ -1348,6 +1372,9 @@ ruby_whisper_params_on_progress(VALUE self) ruby_whisper_params *rwp; TypedData_Get_Struct(self, ruby_whisper_params, &ruby_whisper_params_type, rwp); const VALUE blk = rb_block_proc(); + if (NIL_P(rwp->progress_callback_container->callbacks)) { + rwp->progress_callback_container->callbacks = rb_ary_new(); + } rb_ary_push(rwp->progress_callback_container->callbacks, blk); return Qnil; } @@ -1368,6 +1395,9 @@ ruby_whisper_params_on_encoder_begin(VALUE self) ruby_whisper_params *rwp; TypedData_Get_Struct(self, ruby_whisper_params, &ruby_whisper_params_type, rwp); const VALUE blk = rb_block_proc(); + if (NIL_P(rwp->encoder_begin_callback_container->callbacks)) { + rwp->encoder_begin_callback_container->callbacks = rb_ary_new(); + } rb_ary_push(rwp->encoder_begin_callback_container->callbacks, blk); return Qnil; } @@ -1392,6 +1422,9 @@ ruby_whisper_params_abort_on(VALUE self) ruby_whisper_params *rwp; TypedData_Get_Struct(self, ruby_whisper_params, &ruby_whisper_params_type, rwp); const VALUE blk = rb_block_proc(); + if (NIL_P(rwp->abort_callback_container->callbacks)) { + rwp->abort_callback_container->callbacks = rb_ary_new(); + } rb_ary_push(rwp->abort_callback_container->callbacks, blk); return Qnil; } From 83237e23032c1f2bef8193d9b140d48a9d33aeb6 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sun, 26 Apr 2026 03:41:04 +0900 Subject: [PATCH 15/29] Specify implicity Whisper::Params to distinguish from Whisper::Context::Params --- bindings/ruby/sig/whisper.rbs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/bindings/ruby/sig/whisper.rbs b/bindings/ruby/sig/whisper.rbs index 94bee4b5..a7bf44e7 100644 --- a/bindings/ruby/sig/whisper.rbs +++ b/bindings/ruby/sig/whisper.rbs @@ -55,8 +55,8 @@ module Whisper # If `n_processors` is greater than 1, you cannot set any callbacks including # new_segment_callback, progress_callback, encoder_begin_callback, abort_callback, # and log_callback set by Whisper.log_set - def transcribe: (path, Params, ?n_processors: Integer) -> self - | (path, Params, ?n_processors: Integer) { (String) -> void } -> self + def transcribe: (path, Whisper::Params, ?n_processors: Integer) -> self + | (path, Whisper::Params, ?n_processors: Integer) { (String) -> void } -> self def model_n_vocab: () -> Integer def model_n_audio_ctx: () -> Integer @@ -123,8 +123,8 @@ module Whisper # # The second argument `samples` must be an array of samples, respond to `:length`, or be a MemoryView of an array of float. It must be 32 bit float PCM audio data. # - def full: (Params, Array[Float] samples, ?Integer n_samples) -> self - | (Params, _Samples, ?Integer n_samples) -> self + def full: (Whisper::Params, Array[Float] samples, ?Integer n_samples) -> self + | (Whisper::Params, _Samples, ?Integer n_samples) -> self # Split the input audio in chunks and process each chunk separately using `whisper_full_with_state()` # Result is stored in the default state of the context @@ -135,9 +135,8 @@ module Whisper # If `n_processors` is greater than 1, you cannot set any callbacks including # new_segment_callback, progress_callback, encoder_begin_callback, abort_callback, # and log_callback set by Whisper.log_set - def full_parallel: (Params, Array[Float], ?Integer n_samples) -> self - | (Params, _Samples, ?Integer n_samples) -> self - | (Params, _Samples, ?Integer? n_samples, Integer n_processors) -> self + def full_parallel: (Whisper::Params, Array[Float], ?Integer n_samples) -> self + | (Whisper::Params, _Samples, ?Integer n_samples) -> self def to_srt: () -> String def to_webvtt: () -> String From bd5507d7f6ab4e2232f2dbe1b9c83dbb4bba88f5 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sun, 26 Apr 2026 08:43:56 +0900 Subject: [PATCH 16/29] Run callbacks without GVL --- bindings/ruby/ext/ruby_whisper_params.c | 202 ++++++++++++++++++------ bindings/ruby/sig/whisper.rbs | 4 + 2 files changed, 158 insertions(+), 48 deletions(-) diff --git a/bindings/ruby/ext/ruby_whisper_params.c b/bindings/ruby/ext/ruby_whisper_params.c index 1b234d54..17841a53 100644 --- a/bindings/ruby/ext/ruby_whisper_params.c +++ b/bindings/ruby/ext/ruby_whisper_params.c @@ -129,8 +129,18 @@ abort_ruby_whisper_callback_container_is_present(ruby_whisper_abort_callback_con return !NIL_P(container->callback) || !NIL_P(container->callbacks); } -static void new_segment_callback(struct whisper_context *ctx, struct whisper_state *state, int n_new, void *user_data) { - const ruby_whisper_callback_container *container = (ruby_whisper_callback_container *)user_data; +typedef struct { + const ruby_whisper_callback_container *container; + struct whisper_state *state; + int n_new; +} call_new_segment_callbacks_args; + +static void* +call_new_segment_callbacks(void *v_args) { + call_new_segment_callbacks_args *args = (call_new_segment_callbacks_args *)v_args; + const ruby_whisper_callback_container *container = args->container; + struct whisper_state *state = args->state; + int n_new = args->n_new; // Currently, doesn't support state because // those require to resolve GC-related problems. @@ -138,11 +148,11 @@ static void new_segment_callback(struct whisper_context *ctx, struct whisper_sta rb_funcall(container->callback, id_call, 4, *container->context, Qnil, INT2NUM(n_new), container->user_data); } if (NIL_P(container->callbacks)) { - return; + return NULL; } const long callbacks_len = RARRAY_LEN(container->callbacks); if (0 == callbacks_len) { - return; + return NULL; } const int n_segments = whisper_full_n_segments_from_state(state); for (int i = n_new; i > 0; i--) { @@ -153,56 +163,165 @@ static void new_segment_callback(struct whisper_context *ctx, struct whisper_sta rb_funcall(cb, id_call, 1, segment); } } + + return NULL; +} + +static void new_segment_callback(struct whisper_context *ctx, struct whisper_state *state, int n_new, void *user_data) { + const ruby_whisper_callback_container *container = (ruby_whisper_callback_container *)user_data; + if (!ruby_whisper_callback_container_is_present(container)) { + return; + } + + call_new_segment_callbacks_args args = { + container, + state, + n_new + }; + rb_thread_call_with_gvl(call_new_segment_callbacks, (void *)&args); +} + +typedef struct { + const ruby_whisper_callback_container *container; + struct whisper_state *state; + int progress_cur; +} call_progress_callbacks_args; + +static void* +call_progress_callbacks(void *v_args) { + call_progress_callbacks_args *args = (call_progress_callbacks_args *)v_args; + const ruby_whisper_callback_container *container = args->container; + int progress_cur = args->progress_cur; + + // Currently, doesn't support state because + // those require to resolve GC-related problems. + if (!NIL_P(args->container->callback)) { + rb_funcall(container->callback, id_call, 4, *container->context, Qnil, INT2NUM(progress_cur), container->user_data); + } + if (NIL_P(container->callbacks)) { + return NULL; + } + const long callbacks_len = RARRAY_LEN(container->callbacks); + if (0 == callbacks_len) { + return NULL; + } + for (int j = 0; j < callbacks_len; j++) { + VALUE cb = rb_ary_entry(container->callbacks, j); + rb_funcall(cb, id_call, 1, INT2NUM(progress_cur)); + } + + return NULL; } static void progress_callback(struct whisper_context *ctx, struct whisper_state *state, int progress_cur, void *user_data) { const ruby_whisper_callback_container *container = (ruby_whisper_callback_container *)user_data; - const VALUE progress = INT2NUM(progress_cur); - // Currently, doesn't support state because - // those require to resolve GC-related problems. - if (!NIL_P(container->callback)) { - rb_funcall(container->callback, id_call, 4, *container->context, Qnil, progress, container->user_data); - } - if (NIL_P(container->callbacks)) { + if (!ruby_whisper_callback_container_is_present(container)) { return; } - const long callbacks_len = RARRAY_LEN(container->callbacks); - if (0 == callbacks_len) { - return; - } - for (int j = 0; j < callbacks_len; j++) { - VALUE cb = rb_ary_entry(container->callbacks, j); - rb_funcall(cb, id_call, 1, progress); - } + + call_progress_callbacks_args args = { + container, + state, + progress_cur + }; + rb_thread_call_with_gvl(call_progress_callbacks, (void *)&args); } -static bool encoder_begin_callback(struct whisper_context *ctx, struct whisper_state *state, void *user_data) { - const ruby_whisper_callback_container *container = (ruby_whisper_callback_container *)user_data; - bool is_aborted = false; - VALUE result; +typedef struct { + const ruby_whisper_callback_container *container; + struct whisper_state *state; + bool is_continued; +} call_encoder_begin_callbacks_args; + +static void* +call_encoder_begin_callbacks(void *v_args) { + call_encoder_begin_callbacks_args *args = (call_encoder_begin_callbacks_args *)v_args; + const ruby_whisper_callback_container *container = args->container; + VALUE result = Qnil; // Currently, doesn't support state because // those require to resolve GC-related problems. if (!NIL_P(container->callback)) { result = rb_funcall(container->callback, id_call, 3, *container->context, Qnil, container->user_data); if (result == Qfalse) { - is_aborted = true; + args->is_continued = false; + return NULL; } } if (!NIL_P(container->callbacks)) { const long callbacks_len = RARRAY_LEN(container->callbacks); if (0 == callbacks_len) { - return !is_aborted; + return NULL; } for (int j = 0; j < callbacks_len; j++) { VALUE cb = rb_ary_entry(container->callbacks, j); result = rb_funcall(cb, id_call, 0); if (result == Qfalse) { - is_aborted = true; + args->is_continued = false; + return NULL; } } } - return !is_aborted; + + return NULL; +} + +static bool encoder_begin_callback(struct whisper_context *ctx, struct whisper_state *state, void *user_data) { + const ruby_whisper_callback_container *container = (ruby_whisper_callback_container *)user_data; + if (!ruby_whisper_callback_container_is_present(container)) { + return false; + } + + call_encoder_begin_callbacks_args args = { + container, + state, + true + }; + rb_thread_call_with_gvl(call_encoder_begin_callbacks, (void *)&args); + + return args.is_continued; +} + +typedef struct { + const ruby_whisper_abort_callback_container *container; + struct whisper_state *state; + bool is_interrupted; +} call_abort_callbacks_args; + +static void* +call_abort_callbacks(void *v_args) { + call_abort_callbacks_args *args = (call_abort_callbacks_args *)v_args; + const ruby_whisper_abort_callback_container *container = args->container; + + if (container->is_interrupted) { + args->is_interrupted = true; + return NULL; + } + + if (!NIL_P(container->callback)) { + VALUE result = rb_funcall(container->callback, id_call, 1, container->user_data); + if (!NIL_P(result) && Qfalse != result) { + args->is_interrupted = true; + return NULL; + } + } + if (NIL_P(container->callbacks)) { + return NULL; + } + const long callbacks_len = RARRAY_LEN(container->callbacks); + if (0 == callbacks_len) { + return NULL; + } + for (int j = 0; j < callbacks_len; j++) { + VALUE cb = rb_ary_entry(container->callbacks, j); + VALUE result = rb_funcall(cb, id_call, 1, container->user_data); + if (!NIL_P(result) && Qfalse != result) { + args->is_interrupted = true; + return NULL; + } + } + + return NULL; } static bool abort_callback(void * user_data) { @@ -212,27 +331,14 @@ static bool abort_callback(void * user_data) { return true; } - if (!NIL_P(container->callback)) { - VALUE result = rb_funcall(container->callback, id_call, 1, container->user_data); - if (!NIL_P(result) && Qfalse != result) { - return true; - } - } - if (NIL_P(container->callbacks)) { - return false; - } - const long callbacks_len = RARRAY_LEN(container->callbacks); - if (0 == callbacks_len) { - return false; - } - for (int j = 0; j < callbacks_len; j++) { - VALUE cb = rb_ary_entry(container->callbacks, j); - VALUE result = rb_funcall(cb, id_call, 1, container->user_data); - if (!NIL_P(result) && Qfalse != result) { - return true; - } - } - return false; + call_abort_callbacks_args args = { + container, + NULL, + false + }; + rb_thread_call_with_gvl(call_abort_callbacks, (void *)&args); + + return args.is_interrupted; } static void diff --git a/bindings/ruby/sig/whisper.rbs b/bindings/ruby/sig/whisper.rbs index a7bf44e7..f7304915 100644 --- a/bindings/ruby/sig/whisper.rbs +++ b/bindings/ruby/sig/whisper.rbs @@ -1,3 +1,7 @@ +interface _MemoryView + def initialize: (string) -> void +end + module Whisper interface _Samples def length: () -> Integer From c33b9062430b7a17b2ce8a122ab90c360b7f62af Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sun, 26 Apr 2026 09:31:28 +0900 Subject: [PATCH 17/29] Call log callback with GVL --- bindings/ruby/ext/ruby_whisper.c | 50 +++++++++++++++++++++++-- bindings/ruby/ext/ruby_whisper_params.c | 14 +++++++ 2 files changed, 60 insertions(+), 4 deletions(-) diff --git a/bindings/ruby/ext/ruby_whisper.c b/bindings/ruby/ext/ruby_whisper.c index 5f1917ee..b1a0d6a4 100644 --- a/bindings/ruby/ext/ruby_whisper.c +++ b/bindings/ruby/ext/ruby_whisper.c @@ -29,6 +29,8 @@ ID id_cache; ID id_n_processors; static bool is_log_callback_finalized = false; +static bool is_ruby_log_callback_present = false; +static bool is_without_gvl = false; // High level API extern VALUE ruby_whisper_segment_allocate(VALUE klass); @@ -106,18 +108,56 @@ static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) { return Qnil; } +void +ruby_whisper_lock_gvl(void) +{ + is_without_gvl = true; +} + +void +ruby_whisper_unlock_gvl(void) +{ + is_without_gvl = false; +} + +typedef struct { + int level; + const char * buffer; + VALUE user_data; +} call_log_callbacks_args; + +static void* +call_log_callbacks(void *v_args) { + VALUE log_callback = rb_iv_get(mWhisper, "log_callback"); + if (NIL_P(log_callback)) { + return NULL; + } + + call_log_callbacks_args *args = (call_log_callbacks_args *)v_args; + rb_funcall(log_callback, id_call, 3, INT2NUM(args->level), rb_str_new2(args->buffer), args->user_data); + + return NULL; +} + static void ruby_whisper_log_callback(enum ggml_log_level level, const char * buffer, void * user_data) { if (is_log_callback_finalized) { return; } - VALUE log_callback = rb_iv_get(mWhisper, "log_callback"); - if (NIL_P(log_callback)) { + if (!is_ruby_log_callback_present) { return; } - VALUE udata = rb_iv_get(mWhisper, "user_data"); - rb_funcall(log_callback, id_call, 3, INT2NUM(level), rb_str_new2(buffer), udata); + call_log_callbacks_args args = { + level, + buffer, + rb_iv_get(mWhisper, "user_data") + }; + if (is_without_gvl) { + rb_thread_call_with_gvl(call_log_callbacks, (void *)&args); + } else { + call_log_callbacks((void *)&args); + } } /* @@ -140,8 +180,10 @@ static VALUE ruby_whisper_s_log_set(VALUE self, VALUE log_callback, VALUE user_d if (NIL_P(log_callback)) { whisper_log_set(NULL, NULL); + is_ruby_log_callback_present = false; } else { whisper_log_set(ruby_whisper_log_callback, NULL); + is_ruby_log_callback_present = true; } return Qnil; diff --git a/bindings/ruby/ext/ruby_whisper_params.c b/bindings/ruby/ext/ruby_whisper_params.c index 17841a53..31e86f67 100644 --- a/bindings/ruby/ext/ruby_whisper_params.c +++ b/bindings/ruby/ext/ruby_whisper_params.c @@ -33,6 +33,8 @@ extern VALUE mWhisper; extern ID id_call; +extern void ruby_whisper_lock_gvl(void); +extern void ruby_whisper_unlock_gvl(void); extern VALUE ruby_whisper_normalize_model_path(VALUE model_path); extern VALUE rb_whisper_segment_s_new(VALUE context, int index); extern const rb_data_type_t ruby_whisper_vad_params_type; @@ -137,6 +139,8 @@ typedef struct { static void* call_new_segment_callbacks(void *v_args) { + ruby_whisper_lock_gvl(); + call_new_segment_callbacks_args *args = (call_new_segment_callbacks_args *)v_args; const ruby_whisper_callback_container *container = args->container; struct whisper_state *state = args->state; @@ -179,6 +183,7 @@ static void new_segment_callback(struct whisper_context *ctx, struct whisper_sta n_new }; rb_thread_call_with_gvl(call_new_segment_callbacks, (void *)&args); + ruby_whisper_unlock_gvl(); } typedef struct { @@ -189,6 +194,8 @@ typedef struct { static void* call_progress_callbacks(void *v_args) { + ruby_whisper_lock_gvl(); + call_progress_callbacks_args *args = (call_progress_callbacks_args *)v_args; const ruby_whisper_callback_container *container = args->container; int progress_cur = args->progress_cur; @@ -225,6 +232,7 @@ static void progress_callback(struct whisper_context *ctx, struct whisper_state progress_cur }; rb_thread_call_with_gvl(call_progress_callbacks, (void *)&args); + ruby_whisper_unlock_gvl(); } typedef struct { @@ -235,6 +243,8 @@ typedef struct { static void* call_encoder_begin_callbacks(void *v_args) { + ruby_whisper_lock_gvl(); + call_encoder_begin_callbacks_args *args = (call_encoder_begin_callbacks_args *)v_args; const ruby_whisper_callback_container *container = args->container; VALUE result = Qnil; @@ -278,6 +288,7 @@ static bool encoder_begin_callback(struct whisper_context *ctx, struct whisper_s true }; rb_thread_call_with_gvl(call_encoder_begin_callbacks, (void *)&args); + ruby_whisper_unlock_gvl(); return args.is_continued; } @@ -290,6 +301,8 @@ typedef struct { static void* call_abort_callbacks(void *v_args) { + ruby_whisper_lock_gvl(); + call_abort_callbacks_args *args = (call_abort_callbacks_args *)v_args; const ruby_whisper_abort_callback_container *container = args->container; @@ -337,6 +350,7 @@ static bool abort_callback(void * user_data) { false }; rb_thread_call_with_gvl(call_abort_callbacks, (void *)&args); + ruby_whisper_unlock_gvl(); return args.is_interrupted; } From d314cfe47ea8eaea4fda0e27a1754890cb90d4f8 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sun, 26 Apr 2026 09:59:46 +0900 Subject: [PATCH 18/29] Run full_parallel without GVL --- bindings/ruby/ext/ruby_whisper_context.c | 40 ++++++++++++++++++++++-- bindings/ruby/ext/ruby_whisper_params.c | 8 +++-- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/bindings/ruby/ext/ruby_whisper_context.c b/bindings/ruby/ext/ruby_whisper_context.c index e92605db..65c3d906 100644 --- a/bindings/ruby/ext/ruby_whisper_context.c +++ b/bindings/ruby/ext/ruby_whisper_context.c @@ -23,6 +23,8 @@ extern VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self); extern VALUE rb_whisper_model_s_new(VALUE context); extern VALUE rb_whisper_segment_s_new(VALUE context, int index); extern void prepare_transcription(ruby_whisper_params *rwp, VALUE *context, int n_processors); +extern void ruby_whisper_lock_gvl(void); +extern void ruby_whisper_unlock_gvl(void); ID transcribe_option_names[1]; @@ -55,6 +57,15 @@ typedef struct full_without_gvl_args { int result; } full_without_gvl_args; +typedef struct full_parallel_without_gvl_args { + struct whisper_context *context; + struct whisper_full_params *params; + float *samples; + int n_samples; + int n_processors; + int result; +} full_parallel_without_gvl_args; + typedef struct full_ubf_args { ruby_whisper_abort_callback_container *abort_callback_container; } full_ubf_args; @@ -443,6 +454,8 @@ release_samples(VALUE rb_parsed_args) static void* full_without_gvl(void *rb_args) { + ruby_whisper_unlock_gvl(); + full_without_gvl_args *args = (full_without_gvl_args *)rb_args; args->result = whisper_full(args->context, *args->params, args->samples, args->n_samples); return NULL; @@ -479,6 +492,7 @@ full_body(VALUE rb_args) rwp->abort_callback_container, }; rb_thread_call_without_gvl(full_without_gvl, (void *)&full_without_gvl_args, full_ubf, (void *)&full_ubf_args); + ruby_whisper_lock_gvl(); return INT2NUM(full_without_gvl_args.result); } @@ -517,6 +531,16 @@ VALUE ruby_whisper_full(int argc, VALUE *argv, VALUE self) } } +static void* +full_parallel_without_gvl(void *rb_args) +{ + ruby_whisper_unlock_gvl(); + + full_parallel_without_gvl_args *args = (full_parallel_without_gvl_args *)rb_args; + args->result = whisper_full_parallel(args->context, *args->params, args->samples, args->n_samples, args->n_processors); + return NULL; +} + static VALUE full_parallel_body(VALUE rb_args) { @@ -528,9 +552,21 @@ full_parallel_body(VALUE rb_args) TypedData_Get_Struct(*args->params, ruby_whisper_params, &ruby_whisper_params_type, rwp); prepare_transcription(rwp, args->context, args->n_processors); - int result = whisper_full_parallel(rw->context, rwp->params, args->samples, args->n_samples, args->n_processors); - return INT2NUM(result); + struct full_parallel_without_gvl_args full_parallel_without_gvl_args = { + rw->context, + &rwp->params, + args->samples, + args->n_samples, + args->n_processors, + 0, + }; + full_ubf_args full_ubf_args = { + rwp->abort_callback_container, + }; + rb_thread_call_without_gvl(full_parallel_without_gvl, (void *)&full_parallel_without_gvl_args, full_ubf, (void *)&full_ubf_args); + ruby_whisper_lock_gvl(); + return INT2NUM(full_parallel_without_gvl_args.result); } /* diff --git a/bindings/ruby/ext/ruby_whisper_params.c b/bindings/ruby/ext/ruby_whisper_params.c index 31e86f67..40935076 100644 --- a/bindings/ruby/ext/ruby_whisper_params.c +++ b/bindings/ruby/ext/ruby_whisper_params.c @@ -127,7 +127,7 @@ ruby_whisper_callback_container_is_present(const ruby_whisper_callback_container } static bool -abort_ruby_whisper_callback_container_is_present(ruby_whisper_abort_callback_container *container) { +ruby_whisper_abort_callback_container_is_present(const ruby_whisper_abort_callback_container *container) { return !NIL_P(container->callback) || !NIL_P(container->callbacks); } @@ -344,6 +344,10 @@ static bool abort_callback(void * user_data) { return true; } + if (!ruby_whisper_abort_callback_container_is_present(container)) { + return false; + } + call_abort_callbacks_args args = { container, NULL, @@ -374,7 +378,7 @@ check_thread_safety(ruby_whisper_params *rwp, int n_processors) rb_raise(rb_eRuntimeError, "encoder begin callback not supported on parallel transcription"); } - if (abort_ruby_whisper_callback_container_is_present(rwp->abort_callback_container)) { + if (ruby_whisper_abort_callback_container_is_present(rwp->abort_callback_container)) { rb_raise(rb_eRuntimeError, "abort callback not supported on parallel transcription"); } From d9b0dd1fbeb43eb719a18c711bb37454502a1120 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sun, 26 Apr 2026 10:10:58 +0900 Subject: [PATCH 19/29] Run transcribe without GVL --- bindings/ruby/ext/ruby_whisper_transcribe.cpp | 49 +++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/bindings/ruby/ext/ruby_whisper_transcribe.cpp b/bindings/ruby/ext/ruby_whisper_transcribe.cpp index 3d005660..c93b6ba3 100644 --- a/bindings/ruby/ext/ruby_whisper_transcribe.cpp +++ b/bindings/ruby/ext/ruby_whisper_transcribe.cpp @@ -15,8 +15,39 @@ extern ID id_call; extern ID id_to_path; extern ID transcribe_option_names[1]; -extern void -prepare_transcription(ruby_whisper_params * rwp, VALUE * self, int n_processors); +extern void prepare_transcription(ruby_whisper_params * rwp, VALUE * self, int n_processors); +extern void ruby_whisper_lock_gvl(void); +extern void ruby_whisper_unlock_gvl(void); + +typedef struct transcribe_without_gvl_args { + struct whisper_context *context; + struct whisper_full_params *params; + float *samples; + size_t n_samples; + int n_processors; + int result; +} full_parallel_without_gvl_args; + +static void* +transcribe_without_gvl(void *rb_args) +{ + transcribe_without_gvl_args *args = (transcribe_without_gvl_args *)rb_args; + args->result = whisper_full_parallel(args->context, *args->params, args->samples, args->n_samples, args->n_processors); + + return NULL; +} + +typedef struct transcribe_ubf_args { + ruby_whisper_abort_callback_container *abort_callback_container; +} full_ubf_args; + +static void +transcribe_ubf(void *rb_args) +{ + transcribe_ubf_args *args = (transcribe_ubf_args *)rb_args; + + args->abort_callback_container->is_interrupted = true; +} /* * transcribe a single file @@ -75,7 +106,19 @@ ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) { prepare_transcription(rwp, &self, n_processors); - if (whisper_full_parallel(rw->context, rwp->params, pcmf32.data(), pcmf32.size(), n_processors) != 0) { + transcribe_without_gvl_args args = { + rw->context, + &rwp->params, + pcmf32.data(), + pcmf32.size(), + n_processors, + 0, + }; + transcribe_ubf_args ubf_args = { + rwp->abort_callback_container, + }; + rb_thread_call_without_gvl(transcribe_without_gvl, (void *)&args, transcribe_ubf, (void *)&ubf_args); + if (args.result != 0) { fprintf(stderr, "failed to process audio\n"); return self; } From 9deffc6d4162227af86d7b2ac2d1305a82ce8f72 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sun, 26 Apr 2026 21:22:36 +0900 Subject: [PATCH 20/29] Fix ruby_whisper_lock_gvl and ruby_whisper_unlock_gvl --- bindings/ruby/ext/ruby_whisper.c | 8 ++++---- bindings/ruby/ext/ruby_whisper_context.c | 12 +++++------ bindings/ruby/ext/ruby_whisper_params.c | 20 +++++++++---------- bindings/ruby/ext/ruby_whisper_transcribe.cpp | 4 ++-- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/bindings/ruby/ext/ruby_whisper.c b/bindings/ruby/ext/ruby_whisper.c index b1a0d6a4..7478d790 100644 --- a/bindings/ruby/ext/ruby_whisper.c +++ b/bindings/ruby/ext/ruby_whisper.c @@ -109,15 +109,15 @@ static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) { } void -ruby_whisper_lock_gvl(void) +ruby_whisper_gvl_locked(void) { - is_without_gvl = true; + is_without_gvl = false; } void -ruby_whisper_unlock_gvl(void) +ruby_whisper_gvl_unlocked(void) { - is_without_gvl = false; + is_without_gvl = true; } typedef struct { diff --git a/bindings/ruby/ext/ruby_whisper_context.c b/bindings/ruby/ext/ruby_whisper_context.c index 65c3d906..42f6fbdb 100644 --- a/bindings/ruby/ext/ruby_whisper_context.c +++ b/bindings/ruby/ext/ruby_whisper_context.c @@ -23,8 +23,8 @@ extern VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self); extern VALUE rb_whisper_model_s_new(VALUE context); extern VALUE rb_whisper_segment_s_new(VALUE context, int index); extern void prepare_transcription(ruby_whisper_params *rwp, VALUE *context, int n_processors); -extern void ruby_whisper_lock_gvl(void); -extern void ruby_whisper_unlock_gvl(void); +extern void ruby_whisper_gvl_locked(void); +extern void ruby_whisper_gvl_unlocked(void); ID transcribe_option_names[1]; @@ -454,7 +454,7 @@ release_samples(VALUE rb_parsed_args) static void* full_without_gvl(void *rb_args) { - ruby_whisper_unlock_gvl(); + ruby_whisper_gvl_unlocked(); full_without_gvl_args *args = (full_without_gvl_args *)rb_args; args->result = whisper_full(args->context, *args->params, args->samples, args->n_samples); @@ -492,7 +492,7 @@ full_body(VALUE rb_args) rwp->abort_callback_container, }; rb_thread_call_without_gvl(full_without_gvl, (void *)&full_without_gvl_args, full_ubf, (void *)&full_ubf_args); - ruby_whisper_lock_gvl(); + ruby_whisper_gvl_locked(); return INT2NUM(full_without_gvl_args.result); } @@ -534,7 +534,7 @@ VALUE ruby_whisper_full(int argc, VALUE *argv, VALUE self) static void* full_parallel_without_gvl(void *rb_args) { - ruby_whisper_unlock_gvl(); + ruby_whisper_gvl_unlocked(); full_parallel_without_gvl_args *args = (full_parallel_without_gvl_args *)rb_args; args->result = whisper_full_parallel(args->context, *args->params, args->samples, args->n_samples, args->n_processors); @@ -565,7 +565,7 @@ full_parallel_body(VALUE rb_args) rwp->abort_callback_container, }; rb_thread_call_without_gvl(full_parallel_without_gvl, (void *)&full_parallel_without_gvl_args, full_ubf, (void *)&full_ubf_args); - ruby_whisper_lock_gvl(); + ruby_whisper_gvl_locked(); return INT2NUM(full_parallel_without_gvl_args.result); } diff --git a/bindings/ruby/ext/ruby_whisper_params.c b/bindings/ruby/ext/ruby_whisper_params.c index 40935076..d8fa8e69 100644 --- a/bindings/ruby/ext/ruby_whisper_params.c +++ b/bindings/ruby/ext/ruby_whisper_params.c @@ -33,8 +33,8 @@ extern VALUE mWhisper; extern ID id_call; -extern void ruby_whisper_lock_gvl(void); -extern void ruby_whisper_unlock_gvl(void); +extern void ruby_whisper_gvl_locked(void); +extern void ruby_whisper_gvl_unlocked(void); extern VALUE ruby_whisper_normalize_model_path(VALUE model_path); extern VALUE rb_whisper_segment_s_new(VALUE context, int index); extern const rb_data_type_t ruby_whisper_vad_params_type; @@ -139,7 +139,7 @@ typedef struct { static void* call_new_segment_callbacks(void *v_args) { - ruby_whisper_lock_gvl(); + ruby_whisper_gvl_locked(); call_new_segment_callbacks_args *args = (call_new_segment_callbacks_args *)v_args; const ruby_whisper_callback_container *container = args->container; @@ -183,7 +183,7 @@ static void new_segment_callback(struct whisper_context *ctx, struct whisper_sta n_new }; rb_thread_call_with_gvl(call_new_segment_callbacks, (void *)&args); - ruby_whisper_unlock_gvl(); + ruby_whisper_gvl_unlocked(); } typedef struct { @@ -194,7 +194,7 @@ typedef struct { static void* call_progress_callbacks(void *v_args) { - ruby_whisper_lock_gvl(); + ruby_whisper_gvl_locked(); call_progress_callbacks_args *args = (call_progress_callbacks_args *)v_args; const ruby_whisper_callback_container *container = args->container; @@ -232,7 +232,7 @@ static void progress_callback(struct whisper_context *ctx, struct whisper_state progress_cur }; rb_thread_call_with_gvl(call_progress_callbacks, (void *)&args); - ruby_whisper_unlock_gvl(); + ruby_whisper_gvl_unlocked(); } typedef struct { @@ -243,7 +243,7 @@ typedef struct { static void* call_encoder_begin_callbacks(void *v_args) { - ruby_whisper_lock_gvl(); + ruby_whisper_gvl_locked(); call_encoder_begin_callbacks_args *args = (call_encoder_begin_callbacks_args *)v_args; const ruby_whisper_callback_container *container = args->container; @@ -288,7 +288,7 @@ static bool encoder_begin_callback(struct whisper_context *ctx, struct whisper_s true }; rb_thread_call_with_gvl(call_encoder_begin_callbacks, (void *)&args); - ruby_whisper_unlock_gvl(); + ruby_whisper_gvl_unlocked(); return args.is_continued; } @@ -301,7 +301,7 @@ typedef struct { static void* call_abort_callbacks(void *v_args) { - ruby_whisper_lock_gvl(); + ruby_whisper_gvl_locked(); call_abort_callbacks_args *args = (call_abort_callbacks_args *)v_args; const ruby_whisper_abort_callback_container *container = args->container; @@ -354,7 +354,7 @@ static bool abort_callback(void * user_data) { false }; rb_thread_call_with_gvl(call_abort_callbacks, (void *)&args); - ruby_whisper_unlock_gvl(); + ruby_whisper_gvl_unlocked(); return args.is_interrupted; } diff --git a/bindings/ruby/ext/ruby_whisper_transcribe.cpp b/bindings/ruby/ext/ruby_whisper_transcribe.cpp index c93b6ba3..082b3e4a 100644 --- a/bindings/ruby/ext/ruby_whisper_transcribe.cpp +++ b/bindings/ruby/ext/ruby_whisper_transcribe.cpp @@ -16,8 +16,8 @@ extern ID id_to_path; extern ID transcribe_option_names[1]; extern void prepare_transcription(ruby_whisper_params * rwp, VALUE * self, int n_processors); -extern void ruby_whisper_lock_gvl(void); -extern void ruby_whisper_unlock_gvl(void); +extern void ruby_whisper_gvl_locked(void); +extern void ruby_whisper_gvl_unlocked(void); typedef struct transcribe_without_gvl_args { struct whisper_context *context; From 944a787efd8985ba2060539a941d3d75b1c0a834 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sun, 26 Apr 2026 21:28:07 +0900 Subject: [PATCH 21/29] Fix return value of encoder_begin_callback --- bindings/ruby/ext/ruby_whisper_params.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/ruby/ext/ruby_whisper_params.c b/bindings/ruby/ext/ruby_whisper_params.c index d8fa8e69..93d76486 100644 --- a/bindings/ruby/ext/ruby_whisper_params.c +++ b/bindings/ruby/ext/ruby_whisper_params.c @@ -279,7 +279,7 @@ call_encoder_begin_callbacks(void *v_args) { static bool encoder_begin_callback(struct whisper_context *ctx, struct whisper_state *state, void *user_data) { const ruby_whisper_callback_container *container = (ruby_whisper_callback_container *)user_data; if (!ruby_whisper_callback_container_is_present(container)) { - return false; + return true; } call_encoder_begin_callbacks_args args = { From f6da758e00b454d180e6794184a10e9fb729ba75 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sun, 26 Apr 2026 21:28:18 +0900 Subject: [PATCH 22/29] Report GVL unlocking from transcribe --- bindings/ruby/ext/ruby_whisper_transcribe.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bindings/ruby/ext/ruby_whisper_transcribe.cpp b/bindings/ruby/ext/ruby_whisper_transcribe.cpp index 082b3e4a..148eeae9 100644 --- a/bindings/ruby/ext/ruby_whisper_transcribe.cpp +++ b/bindings/ruby/ext/ruby_whisper_transcribe.cpp @@ -31,6 +31,8 @@ typedef struct transcribe_without_gvl_args { static void* transcribe_without_gvl(void *rb_args) { + ruby_whisper_gvl_unlocked(); + transcribe_without_gvl_args *args = (transcribe_without_gvl_args *)rb_args; args->result = whisper_full_parallel(args->context, *args->params, args->samples, args->n_samples, args->n_processors); @@ -118,6 +120,7 @@ ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) { rwp->abort_callback_container, }; rb_thread_call_without_gvl(transcribe_without_gvl, (void *)&args, transcribe_ubf, (void *)&ubf_args); + ruby_whisper_gvl_locked(); if (args.result != 0) { fprintf(stderr, "failed to process audio\n"); return self; From 0eb7a441ae60193e8d9df9d11cd3962e2d9885d6 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sun, 26 Apr 2026 21:30:21 +0900 Subject: [PATCH 23/29] Remove unused interface --- bindings/ruby/sig/whisper.rbs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/bindings/ruby/sig/whisper.rbs b/bindings/ruby/sig/whisper.rbs index f7304915..a7bf44e7 100644 --- a/bindings/ruby/sig/whisper.rbs +++ b/bindings/ruby/sig/whisper.rbs @@ -1,7 +1,3 @@ -interface _MemoryView - def initialize: (string) -> void -end - module Whisper interface _Samples def length: () -> Integer From d61ba6fe347933e239b3b2c5bda35c993db74ad8 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sun, 26 Apr 2026 21:31:06 +0900 Subject: [PATCH 24/29] Restore overload of full_parallel --- bindings/ruby/sig/whisper.rbs | 1 + 1 file changed, 1 insertion(+) diff --git a/bindings/ruby/sig/whisper.rbs b/bindings/ruby/sig/whisper.rbs index a7bf44e7..cbec4803 100644 --- a/bindings/ruby/sig/whisper.rbs +++ b/bindings/ruby/sig/whisper.rbs @@ -137,6 +137,7 @@ module Whisper # and log_callback set by Whisper.log_set def full_parallel: (Whisper::Params, Array[Float], ?Integer n_samples) -> self | (Whisper::Params, _Samples, ?Integer n_samples) -> self + | (Whisper::Params, _Samples, ?Integer? n_samples, Integer n_processors) -> self def to_srt: () -> String def to_webvtt: () -> String From 94e2163514731d515ba54e859f00ba24b5d1b510 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sun, 26 Apr 2026 21:32:08 +0900 Subject: [PATCH 25/29] Close process --- bindings/ruby/ext/options.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/ruby/ext/options.rb b/bindings/ruby/ext/options.rb index 09baf9b2..5fe600a6 100644 --- a/bindings/ruby/ext/options.rb +++ b/bindings/ruby/ext/options.rb @@ -18,7 +18,7 @@ class Options output = nil Dir.chdir __dir__ do - output = IO.popen([@cmake, "-S", "sources", "-B", "build", "-L"]).read + output = IO.popen([@cmake, "-S", "sources", "-B", "build", "-L"]) {|io| io.read} end @cmake_options = output.lines.drop_while {|line| line.chomp != "-- Cache values"}.drop(1) .filter_map {|line| From 35eda01cb4de7f81137bd7a703b52af49688b03d Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sun, 26 Apr 2026 21:32:14 +0900 Subject: [PATCH 26/29] Fix struct name --- bindings/ruby/ext/ruby_whisper_transcribe.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bindings/ruby/ext/ruby_whisper_transcribe.cpp b/bindings/ruby/ext/ruby_whisper_transcribe.cpp index 148eeae9..e9313fc2 100644 --- a/bindings/ruby/ext/ruby_whisper_transcribe.cpp +++ b/bindings/ruby/ext/ruby_whisper_transcribe.cpp @@ -19,14 +19,14 @@ extern void prepare_transcription(ruby_whisper_params * rwp, VALUE * self, int n extern void ruby_whisper_gvl_locked(void); extern void ruby_whisper_gvl_unlocked(void); -typedef struct transcribe_without_gvl_args { +typedef struct{ struct whisper_context *context; struct whisper_full_params *params; float *samples; size_t n_samples; int n_processors; int result; -} full_parallel_without_gvl_args; +} transcribe_without_gvl_args; static void* transcribe_without_gvl(void *rb_args) @@ -39,9 +39,9 @@ transcribe_without_gvl(void *rb_args) return NULL; } -typedef struct transcribe_ubf_args { +typedef struct { ruby_whisper_abort_callback_container *abort_callback_container; -} full_ubf_args; +} transcribe_ubf_args; static void transcribe_ubf(void *rb_args) From a841074284135584c7af53669e90cba3b112977a Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sun, 26 Apr 2026 22:20:44 +0900 Subject: [PATCH 27/29] Make is_without_gvl thread local --- bindings/ruby/ext/ruby_whisper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/ruby/ext/ruby_whisper.c b/bindings/ruby/ext/ruby_whisper.c index 7478d790..acced19d 100644 --- a/bindings/ruby/ext/ruby_whisper.c +++ b/bindings/ruby/ext/ruby_whisper.c @@ -30,7 +30,7 @@ ID id_n_processors; static bool is_log_callback_finalized = false; static bool is_ruby_log_callback_present = false; -static bool is_without_gvl = false; +static _Thread_local bool is_without_gvl = false; // High level API extern VALUE ruby_whisper_segment_allocate(VALUE klass); From a62c44d722c69118caf6b301634f2f86e89c45f0 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Mon, 27 Apr 2026 08:52:56 +0900 Subject: [PATCH 28/29] Use rb_thread_call_with_gvl instead of global variable --- bindings/ruby/ext/ruby_whisper.c | 19 +++---------------- bindings/ruby/ext/ruby_whisper_context.c | 8 -------- bindings/ruby/ext/ruby_whisper_params.c | 14 -------------- bindings/ruby/ext/ruby_whisper_transcribe.cpp | 5 ----- 4 files changed, 3 insertions(+), 43 deletions(-) diff --git a/bindings/ruby/ext/ruby_whisper.c b/bindings/ruby/ext/ruby_whisper.c index acced19d..9227dba6 100644 --- a/bindings/ruby/ext/ruby_whisper.c +++ b/bindings/ruby/ext/ruby_whisper.c @@ -30,7 +30,6 @@ ID id_n_processors; static bool is_log_callback_finalized = false; static bool is_ruby_log_callback_present = false; -static _Thread_local bool is_without_gvl = false; // High level API extern VALUE ruby_whisper_segment_allocate(VALUE klass); @@ -108,18 +107,6 @@ static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) { return Qnil; } -void -ruby_whisper_gvl_locked(void) -{ - is_without_gvl = false; -} - -void -ruby_whisper_gvl_unlocked(void) -{ - is_without_gvl = true; -} - typedef struct { int level; const char * buffer; @@ -153,10 +140,10 @@ ruby_whisper_log_callback(enum ggml_log_level level, const char * buffer, void * buffer, rb_iv_get(mWhisper, "user_data") }; - if (is_without_gvl) { - rb_thread_call_with_gvl(call_log_callbacks, (void *)&args); - } else { + if (ruby_thread_has_gvl_p()) { call_log_callbacks((void *)&args); + } else { + rb_thread_call_with_gvl(call_log_callbacks, (void *)&args); } } diff --git a/bindings/ruby/ext/ruby_whisper_context.c b/bindings/ruby/ext/ruby_whisper_context.c index 42f6fbdb..2428aeff 100644 --- a/bindings/ruby/ext/ruby_whisper_context.c +++ b/bindings/ruby/ext/ruby_whisper_context.c @@ -23,8 +23,6 @@ extern VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self); extern VALUE rb_whisper_model_s_new(VALUE context); extern VALUE rb_whisper_segment_s_new(VALUE context, int index); extern void prepare_transcription(ruby_whisper_params *rwp, VALUE *context, int n_processors); -extern void ruby_whisper_gvl_locked(void); -extern void ruby_whisper_gvl_unlocked(void); ID transcribe_option_names[1]; @@ -454,8 +452,6 @@ release_samples(VALUE rb_parsed_args) static void* full_without_gvl(void *rb_args) { - ruby_whisper_gvl_unlocked(); - full_without_gvl_args *args = (full_without_gvl_args *)rb_args; args->result = whisper_full(args->context, *args->params, args->samples, args->n_samples); return NULL; @@ -492,7 +488,6 @@ full_body(VALUE rb_args) rwp->abort_callback_container, }; rb_thread_call_without_gvl(full_without_gvl, (void *)&full_without_gvl_args, full_ubf, (void *)&full_ubf_args); - ruby_whisper_gvl_locked(); return INT2NUM(full_without_gvl_args.result); } @@ -534,8 +529,6 @@ VALUE ruby_whisper_full(int argc, VALUE *argv, VALUE self) static void* full_parallel_without_gvl(void *rb_args) { - ruby_whisper_gvl_unlocked(); - full_parallel_without_gvl_args *args = (full_parallel_without_gvl_args *)rb_args; args->result = whisper_full_parallel(args->context, *args->params, args->samples, args->n_samples, args->n_processors); return NULL; @@ -565,7 +558,6 @@ full_parallel_body(VALUE rb_args) rwp->abort_callback_container, }; rb_thread_call_without_gvl(full_parallel_without_gvl, (void *)&full_parallel_without_gvl_args, full_ubf, (void *)&full_ubf_args); - ruby_whisper_gvl_locked(); return INT2NUM(full_parallel_without_gvl_args.result); } diff --git a/bindings/ruby/ext/ruby_whisper_params.c b/bindings/ruby/ext/ruby_whisper_params.c index 93d76486..2aae7c12 100644 --- a/bindings/ruby/ext/ruby_whisper_params.c +++ b/bindings/ruby/ext/ruby_whisper_params.c @@ -33,8 +33,6 @@ extern VALUE mWhisper; extern ID id_call; -extern void ruby_whisper_gvl_locked(void); -extern void ruby_whisper_gvl_unlocked(void); extern VALUE ruby_whisper_normalize_model_path(VALUE model_path); extern VALUE rb_whisper_segment_s_new(VALUE context, int index); extern const rb_data_type_t ruby_whisper_vad_params_type; @@ -139,8 +137,6 @@ typedef struct { static void* call_new_segment_callbacks(void *v_args) { - ruby_whisper_gvl_locked(); - call_new_segment_callbacks_args *args = (call_new_segment_callbacks_args *)v_args; const ruby_whisper_callback_container *container = args->container; struct whisper_state *state = args->state; @@ -183,7 +179,6 @@ static void new_segment_callback(struct whisper_context *ctx, struct whisper_sta n_new }; rb_thread_call_with_gvl(call_new_segment_callbacks, (void *)&args); - ruby_whisper_gvl_unlocked(); } typedef struct { @@ -194,8 +189,6 @@ typedef struct { static void* call_progress_callbacks(void *v_args) { - ruby_whisper_gvl_locked(); - call_progress_callbacks_args *args = (call_progress_callbacks_args *)v_args; const ruby_whisper_callback_container *container = args->container; int progress_cur = args->progress_cur; @@ -232,7 +225,6 @@ static void progress_callback(struct whisper_context *ctx, struct whisper_state progress_cur }; rb_thread_call_with_gvl(call_progress_callbacks, (void *)&args); - ruby_whisper_gvl_unlocked(); } typedef struct { @@ -243,8 +235,6 @@ typedef struct { static void* call_encoder_begin_callbacks(void *v_args) { - ruby_whisper_gvl_locked(); - call_encoder_begin_callbacks_args *args = (call_encoder_begin_callbacks_args *)v_args; const ruby_whisper_callback_container *container = args->container; VALUE result = Qnil; @@ -288,7 +278,6 @@ static bool encoder_begin_callback(struct whisper_context *ctx, struct whisper_s true }; rb_thread_call_with_gvl(call_encoder_begin_callbacks, (void *)&args); - ruby_whisper_gvl_unlocked(); return args.is_continued; } @@ -301,8 +290,6 @@ typedef struct { static void* call_abort_callbacks(void *v_args) { - ruby_whisper_gvl_locked(); - call_abort_callbacks_args *args = (call_abort_callbacks_args *)v_args; const ruby_whisper_abort_callback_container *container = args->container; @@ -354,7 +341,6 @@ static bool abort_callback(void * user_data) { false }; rb_thread_call_with_gvl(call_abort_callbacks, (void *)&args); - ruby_whisper_gvl_unlocked(); return args.is_interrupted; } diff --git a/bindings/ruby/ext/ruby_whisper_transcribe.cpp b/bindings/ruby/ext/ruby_whisper_transcribe.cpp index e9313fc2..37656af1 100644 --- a/bindings/ruby/ext/ruby_whisper_transcribe.cpp +++ b/bindings/ruby/ext/ruby_whisper_transcribe.cpp @@ -16,8 +16,6 @@ extern ID id_to_path; extern ID transcribe_option_names[1]; extern void prepare_transcription(ruby_whisper_params * rwp, VALUE * self, int n_processors); -extern void ruby_whisper_gvl_locked(void); -extern void ruby_whisper_gvl_unlocked(void); typedef struct{ struct whisper_context *context; @@ -31,8 +29,6 @@ typedef struct{ static void* transcribe_without_gvl(void *rb_args) { - ruby_whisper_gvl_unlocked(); - transcribe_without_gvl_args *args = (transcribe_without_gvl_args *)rb_args; args->result = whisper_full_parallel(args->context, *args->params, args->samples, args->n_samples, args->n_processors); @@ -120,7 +116,6 @@ ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) { rwp->abort_callback_container, }; rb_thread_call_without_gvl(transcribe_without_gvl, (void *)&args, transcribe_ubf, (void *)&ubf_args); - ruby_whisper_gvl_locked(); if (args.result != 0) { fprintf(stderr, "failed to process audio\n"); return self; From 59aacc8afa8f84863a2a24d538cb3f3afa415489 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Mon, 27 Apr 2026 09:19:33 +0900 Subject: [PATCH 29/29] Retrieve instance variable in GVL --- bindings/ruby/ext/ruby_whisper.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bindings/ruby/ext/ruby_whisper.c b/bindings/ruby/ext/ruby_whisper.c index 9227dba6..56fceb1c 100644 --- a/bindings/ruby/ext/ruby_whisper.c +++ b/bindings/ruby/ext/ruby_whisper.c @@ -110,7 +110,6 @@ static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) { typedef struct { int level; const char * buffer; - VALUE user_data; } call_log_callbacks_args; static void* @@ -121,7 +120,8 @@ call_log_callbacks(void *v_args) { } call_log_callbacks_args *args = (call_log_callbacks_args *)v_args; - rb_funcall(log_callback, id_call, 3, INT2NUM(args->level), rb_str_new2(args->buffer), args->user_data); + VALUE user_data = rb_iv_get(mWhisper, "user_data"); + rb_funcall(log_callback, id_call, 3, INT2NUM(args->level), rb_str_new2(args->buffer), user_data); return NULL; } @@ -138,7 +138,6 @@ ruby_whisper_log_callback(enum ggml_log_level level, const char * buffer, void * call_log_callbacks_args args = { level, buffer, - rb_iv_get(mWhisper, "user_data") }; if (ruby_thread_has_gvl_p()) { call_log_callbacks((void *)&args);