Merge pull request #166 from scottmonster/typing

absadiki · web-flow · commit c15fff50577f · 2026-05-18T22:05:04.000-04:00
add type support for model.py
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,4 +1,6 @@
 include README.md LICENSE pybind11/LICENSE version.txt
+include pywhispercpp/model.pyi
+include pywhispercpp/py.typed
 graft pybind11/include
 graft pybind11/tools
 graft src
diff --git a/pywhispercpp/model.pyi b/pywhispercpp/model.pyi
@@ -0,0 +1,148 @@
+from __future__ import annotations
+
+from typing import Any, Callable, Dict, List, Optional, TextIO, Tuple, TypedDict, Union
+
+import numpy as np
+import numpy.typing as npt
+
+AudioArray = npt.NDArray[np.float32]
+AudioInput = Union[str, AudioArray]
+
+
+class GreedyParams(TypedDict):
+    best_of: int
+
+
+class BeamSearchParams(TypedDict):
+    beam_size: int
+    patience: float
+
+
+class Segment:
+    t0: int
+    t1: int
+    text: str
+    probability: float
+
+    def __init__(self, t0: int, t1: int, text: str, probability: float = np.nan)->None: ...
+    def __str__(self)->str: ...
+    def __repr__(self)->str: ...
+
+
+class Model:
+    _new_segment_callback: Optional[Callable[[Segment], None]]
+
+    def __init__(
+        self,
+        model: str = 'tiny',
+        models_dir: Optional[str] = None,
+        params_sampling_strategy: int = 0,
+        redirect_whispercpp_logs_to: Union[bool, TextIO, str, None] = False,
+        use_openvino: bool = False,
+        openvino_model_path: Optional[str] = None,
+        openvino_device: str = 'CPU',
+        openvino_cache_dir: Optional[str] = None,
+        *,
+        n_threads: Optional[int] = None,
+        n_max_text_ctx: int = 16384,
+        offset_ms: int = 0,
+        duration_ms: int = 0,
+        translate: bool = False,
+        no_context: bool = False,
+        single_segment: bool = False,
+        print_special: bool = False,
+        print_progress: bool = True,
+        print_realtime: bool = False,
+        print_timestamps: bool = True,
+        token_timestamps: bool = False,
+        thold_pt: float = 0.01,
+        thold_ptsum: float = 0.01,
+        max_len: int = 0,
+        split_on_word: bool = False,
+        max_tokens: int = 0,
+        audio_ctx: int = 0,
+        initial_prompt: Optional[str] = None,
+        prompt_tokens: Optional[Tuple[Any, ...]] = None,
+        prompt_n_tokens: int = 0,
+        language: str = '',
+        suppress_blank: bool = True,
+        suppress_non_speech_tokens: bool = False,
+        temperature: float = 0.0,
+        max_initial_ts: float = 1.0,
+        length_penalty: float = -1.0,
+        temperature_inc: float = 0.2,
+        entropy_thold: float = 2.4,
+        logprob_thold: float = -1.0,
+        no_speech_thold: float = 0.6,
+        greedy: GreedyParams = {'best_of': -1},
+        beam_search: BeamSearchParams = {'beam_size': -1, 'patience': -1.0},
+        vad: bool = False,
+        vad_model_path: Optional[str] = None,
+        **params
+    )->None: ...
+
+    def transcribe(
+        self,
+        media: AudioInput,
+        n_processors: Optional[int] = None,
+        new_segment_callback: Optional[Callable[[Segment], None]] = None,
+        *,
+        n_threads: Optional[int] = None,
+        n_max_text_ctx: int = 16384,
+        offset_ms: int = 0,
+        duration_ms: int = 0,
+        translate: bool = False,
+        no_context: bool = False,
+        single_segment: bool = False,
+        print_special: bool = False,
+        print_progress: bool = True,
+        print_realtime: bool = False,
+        print_timestamps: bool = True,
+        token_timestamps: bool = False,
+        thold_pt: float = 0.01,
+        thold_ptsum: float = 0.01,
+        max_len: int = 0,
+        split_on_word: bool = False,
+        max_tokens: int = 0,
+        audio_ctx: int = 0,
+        initial_prompt: Optional[str] = None,
+        prompt_tokens: Optional[Tuple[Any, ...]] = None,
+        prompt_n_tokens: int = 0,
+        language: str = '',
+        suppress_blank: bool = True,
+        suppress_non_speech_tokens: bool = False,
+        temperature: float = 0.0,
+        max_initial_ts: float = 1.0,
+        length_penalty: float = -1.0,
+        temperature_inc: float = 0.2,
+        entropy_thold: float = 2.4,
+        logprob_thold: float = -1.0,
+        no_speech_thold: float = 0.6,
+        greedy: GreedyParams = {'best_of': -1},
+        beam_search: BeamSearchParams = {'beam_size': -1, 'patience': -1.0},
+        extract_probability: bool = False,
+        vad: bool = False,
+        vad_model_path: Optional[str] = None,
+        **params
+    ) -> List[Segment]: ...
+
+    def get_params(self) -> Dict[str, Any]: ...
+    @staticmethod
+    def get_params_schema() -> Dict[str, Dict[str, Any]]: ...
+    @staticmethod
+    def lang_max_id() -> int: ...
+    def print_timings(self) -> None: ...
+    @staticmethod
+    def system_info() -> Any: ...
+    @staticmethod
+    def available_languages() -> List[str]: ...
+    @staticmethod
+    def _load_audio(media_file_path: str) -> AudioArray: ...
+    def auto_detect_language(
+        self,
+        media: AudioInput,
+        offset_ms: int = 0,
+        n_threads: int = 4,
+    ) -> Tuple[Tuple[str, np.float32], Dict[str, np.float32]]: ...
+    def __del__(self) -> None: ...
+
diff --git a/pywhispercpp/py.typed b/pywhispercpp/py.typed
diff --git a/setup.py b/setup.py
@@ -258,7 +258,7 @@ def get_version() -> str:
     packages=find_packages('.'),
     package_dir={'': '.'},
     include_package_data=True,
-    package_data={'pywhispercpp': []},
+    package_data={'pywhispercpp': ["*.pyi", "py.typed"]},
     long_description_content_type="text/markdown",
     license='MIT',
     entry_points={
diff --git a/whisper.cpp b/whisper.cpp
@@ -1 +1 @@
-Subproject commit 9386f239401074690479731c1e41683fbbeac557
+Subproject commit 4979e04f5dcaccb36057e059bbaed8a2f5288315