Merge pull request #124 from absadiki/fix/ui-freeze-gil-release

absadiki · web-flow · commit 2b09b82f681d · 2025-07-01T14:48:19.000-04:00
Fix/UI freeze gil release
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
@@ -37,7 +37,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-24.04-arm, ubuntu-latest, windows-2019, macos-latest]
+        os: [ubuntu-24.04-arm, ubuntu-latest, windows-2022, macos-latest]
 
     steps:
     - uses: actions/checkout@v4
@@ -72,12 +72,14 @@ jobs:
         path: wheelhouse/*.whl
 
   test_wheels:
-    name: Test wheels on ${{ matrix.os }}
+    name: Test wheels on ${{ matrix.os }} (Python ${{ matrix.python-version }})
     runs-on: ${{ matrix.os }}
     needs: build_wheels
     strategy:
+      fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest, ubuntu-24.04-arm]
+        python-version: [3.11, 3.12, 3.13]
 
     steps:
     - uses: actions/checkout@v4
@@ -97,7 +99,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v5
       with:
-        python-version: '3.11'
+        python-version: ${{ matrix.python-version }}
 
     - name: Install dependencies
       run: |
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 numpy
 sounddevice~=0.4.6
 webrtcvad~=2.0.10
-requests~=2.28.2
-tqdm~=4.65.0
-platformdirs~=3.1.1
+requests
+tqdm
+platformdirs
diff --git a/src/main.cpp b/src/main.cpp
@@ -221,6 +221,8 @@ int whisper_full_wrapper(
         int   n_samples){
     py::buffer_info buf = samples.request();
     float *samples_ptr = static_cast<float *>(buf.ptr);
+
+    py::gil_scoped_release release;
     return whisper_full(ctx_w->ptr, params, samples_ptr, n_samples);
 }
 
@@ -232,11 +234,14 @@ int whisper_full_parallel_wrapper(
         int n_processors){
     py::buffer_info buf = samples.request();
     float *samples_ptr = static_cast<float *>(buf.ptr);
+
+    py::gil_scoped_release release;
     return whisper_full_parallel(ctx_w->ptr, params, samples_ptr, n_samples, n_processors);
 }
 
 
 int whisper_full_n_segments_wrapper(struct whisper_context_wrapper * ctx){
+    py::gil_scoped_release release;
     return whisper_full_n_segments(ctx->ptr);
 }
 
@@ -637,14 +642,14 @@ PYBIND11_MODULE(_pywhispercpp, m) {
 
     m.def("whisper_full_default_params", &whisper_full_default_params_wrapper);
 
-    DEF_RELEASE_GIL("whisper_full", &whisper_full_wrapper, "Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text\n"
+    m.def("whisper_full", &whisper_full_wrapper, "Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text\n"
                                                  "Uses the specified decoding strategy to obtain the text.\n");
 
-    DEF_RELEASE_GIL("whisper_full_parallel", &whisper_full_parallel_wrapper, "Split the input audio in chunks and process each chunk separately using whisper_full()\n"
+    m.def("whisper_full_parallel", &whisper_full_parallel_wrapper, "Split the input audio in chunks and process each chunk separately using whisper_full()\n"
                                                                     "It seems this approach can offer some speedup in some cases.\n"
                                                                     "However, the transcription accuracy can be worse at the beginning and end of each chunk.");
 
-    DEF_RELEASE_GIL("whisper_full_n_segments", &whisper_full_n_segments_wrapper, "Number of generated text segments.\n"
+    m.def("whisper_full_n_segments", &whisper_full_n_segments_wrapper, "Number of generated text segments.\n"
                                                                        "A segment can be a few words, a sentence, or even a paragraph.\n");
 
     m.def("whisper_full_lang_id", &whisper_full_lang_id_wrapper, "Language id associated with the current context");
diff --git a/tests/test_segfault.py b/tests/test_segfault.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+
+import unittest
+from pathlib import Path
+from unittest import TestCase
+
+from pywhispercpp.model import Model, Segment
+
+if __name__ == '__main__':
+    pass
+
+WHISPER_CPP_DIR = Path(__file__).parent.parent / 'whisper.cpp'
+
+class TestSegfault(TestCase):
+    audio_file = WHISPER_CPP_DIR/ 'samples/jfk.wav'
+
+    def voice_to_text(self, tmp_path):
+        # n_threads=1 is 3x faster than n_threads=6 when running in Docker.
+        whisper_model = Model('tiny.en-q5_1', n_threads=1)
+        segments: list[Segment] = whisper_model.transcribe(tmp_path)
+        text = next((segment.text for segment in segments if segment.text and '(' not in segment.text and '[' not in segment.text))
+        return text
+
+    def test_sample_file(self):
+        expected_text = "ask not what your country can do for you"
+        text = self.voice_to_text(str(self.audio_file))
+        self.assertIn(expected_text.lower(), text.lower(),
+                      f"Expected text '{expected_text}' not found in transcription: '{text}'")
+
+
+if __name__ == '__main__':
+    unittest.main()