Skip to content

Commit 2b09b82

Browse files
authored
Merge pull request #124 from absadiki/fix/ui-freeze-gil-release
Fix/UI freeze gil release
2 parents 5b34ab4 + 85e0868 commit 2b09b82

4 files changed

Lines changed: 50 additions & 9 deletions

File tree

.github/workflows/wheels.yml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ jobs:
3737
strategy:
3838
fail-fast: false
3939
matrix:
40-
os: [ubuntu-24.04-arm, ubuntu-latest, windows-2019, macos-latest]
40+
os: [ubuntu-24.04-arm, ubuntu-latest, windows-2022, macos-latest]
4141

4242
steps:
4343
- uses: actions/checkout@v4
@@ -72,12 +72,14 @@ jobs:
7272
path: wheelhouse/*.whl
7373

7474
test_wheels:
75-
name: Test wheels on ${{ matrix.os }}
75+
name: Test wheels on ${{ matrix.os }} (Python ${{ matrix.python-version }})
7676
runs-on: ${{ matrix.os }}
7777
needs: build_wheels
7878
strategy:
79+
fail-fast: false
7980
matrix:
8081
os: [ubuntu-latest, windows-latest, macos-latest, ubuntu-24.04-arm]
82+
python-version: [3.11, 3.12, 3.13]
8183

8284
steps:
8385
- uses: actions/checkout@v4
@@ -97,7 +99,7 @@ jobs:
9799
- name: Set up Python
98100
uses: actions/setup-python@v5
99101
with:
100-
python-version: '3.11'
102+
python-version: ${{ matrix.python-version }}
101103

102104
- name: Install dependencies
103105
run: |

requirements.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
numpy
22
sounddevice~=0.4.6
33
webrtcvad~=2.0.10
4-
requests~=2.28.2
5-
tqdm~=4.65.0
6-
platformdirs~=3.1.1
4+
requests
5+
tqdm
6+
platformdirs

src/main.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,8 @@ int whisper_full_wrapper(
221221
int n_samples){
222222
py::buffer_info buf = samples.request();
223223
float *samples_ptr = static_cast<float *>(buf.ptr);
224+
225+
py::gil_scoped_release release;
224226
return whisper_full(ctx_w->ptr, params, samples_ptr, n_samples);
225227
}
226228

@@ -232,11 +234,14 @@ int whisper_full_parallel_wrapper(
232234
int n_processors){
233235
py::buffer_info buf = samples.request();
234236
float *samples_ptr = static_cast<float *>(buf.ptr);
237+
238+
py::gil_scoped_release release;
235239
return whisper_full_parallel(ctx_w->ptr, params, samples_ptr, n_samples, n_processors);
236240
}
237241

238242

239243
int whisper_full_n_segments_wrapper(struct whisper_context_wrapper * ctx){
244+
py::gil_scoped_release release;
240245
return whisper_full_n_segments(ctx->ptr);
241246
}
242247

@@ -637,14 +642,14 @@ PYBIND11_MODULE(_pywhispercpp, m) {
637642

638643
m.def("whisper_full_default_params", &whisper_full_default_params_wrapper);
639644

640-
DEF_RELEASE_GIL("whisper_full", &whisper_full_wrapper, "Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text\n"
645+
m.def("whisper_full", &whisper_full_wrapper, "Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text\n"
641646
"Uses the specified decoding strategy to obtain the text.\n");
642647

643-
DEF_RELEASE_GIL("whisper_full_parallel", &whisper_full_parallel_wrapper, "Split the input audio in chunks and process each chunk separately using whisper_full()\n"
648+
m.def("whisper_full_parallel", &whisper_full_parallel_wrapper, "Split the input audio in chunks and process each chunk separately using whisper_full()\n"
644649
"It seems this approach can offer some speedup in some cases.\n"
645650
"However, the transcription accuracy can be worse at the beginning and end of each chunk.");
646651

647-
DEF_RELEASE_GIL("whisper_full_n_segments", &whisper_full_n_segments_wrapper, "Number of generated text segments.\n"
652+
m.def("whisper_full_n_segments", &whisper_full_n_segments_wrapper, "Number of generated text segments.\n"
648653
"A segment can be a few words, a sentence, or even a paragraph.\n");
649654

650655
m.def("whisper_full_lang_id", &whisper_full_lang_id_wrapper, "Language id associated with the current context");

tests/test_segfault.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
4+
5+
import unittest
6+
from pathlib import Path
7+
from unittest import TestCase
8+
9+
from pywhispercpp.model import Model, Segment
10+
11+
if __name__ == '__main__':
12+
pass
13+
14+
WHISPER_CPP_DIR = Path(__file__).parent.parent / 'whisper.cpp'
15+
16+
class TestSegfault(TestCase):
17+
audio_file = WHISPER_CPP_DIR/ 'samples/jfk.wav'
18+
19+
def voice_to_text(self, tmp_path):
20+
# n_threads=1 is 3x faster than n_threads=6 when running in Docker.
21+
whisper_model = Model('tiny.en-q5_1', n_threads=1)
22+
segments: list[Segment] = whisper_model.transcribe(tmp_path)
23+
text = next((segment.text for segment in segments if segment.text and '(' not in segment.text and '[' not in segment.text))
24+
return text
25+
26+
def test_sample_file(self):
27+
expected_text = "ask not what your country can do for you"
28+
text = self.voice_to_text(str(self.audio_file))
29+
self.assertIn(expected_text.lower(), text.lower(),
30+
f"Expected text '{expected_text}' not found in transcription: '{text}'")
31+
32+
33+
if __name__ == '__main__':
34+
unittest.main()

0 commit comments

Comments
 (0)