Merge pull request #169 from RomanValov/main

absadiki · web-flow · commit 294e1e15f1fa · 2026-05-29T23:10:02.000-04:00
Dont expose extract_probability as C API parameter
diff --git a/pywhispercpp/constants.py b/pywhispercpp/constants.py
@@ -302,12 +302,6 @@
             'options': None,
             'default': {"beam_size": -1, "patience": -1.0}
     },
-    'extract_probability': {
-            'type': bool,
-            'description': 'calculate the geometric mean of token probabilities for each segment.',
-            'options': None,
-            'default': False
-    },
     'vad': {
         'type': bool,
         'description': 'Enable VAD',
diff --git a/pywhispercpp/model.py b/pywhispercpp/model.py
@@ -181,6 +181,7 @@ def transcribe(self,
                    n_processors: Optional[int] = None,
                    new_segment_callback: Optional[Callable[[Segment], None]] = None,
                    abort_callback: Optional[Callable[[], bool]] = None,
+                   extract_probability: bool = False,
                    **params) -> List[Segment]:
         """
         Transcribes the media provided as input and returns list of `Segment` objects.
@@ -205,9 +206,6 @@ def transcribe(self,
                 raise FileNotFoundError(media)
             audio = self._load_audio(media)
 
-        # Handle extract_probability parameter
-        self.extract_probability = params.pop('extract_probability', False)
-
         # update params if any
         self._set_params(params)
 
@@ -224,7 +222,7 @@ def transcribe(self,
         # run inference
         start_time = time()
         logger.info("Transcribing ...")
-        res = self._transcribe(audio, n_processors=n_processors)
+        res = self._transcribe(audio, n_processors=n_processors, extract_probability=extract_probability)
         end_time = time()
         logger.info(f"Inference time: {end_time - start_time:.3f} s")
         return res
@@ -402,12 +400,14 @@ def _set_params(self, kwargs: dict) -> None:
         for param, value in normalized.items():
             setattr(self._params, param, value)
 
-    def _transcribe(self, audio: np.ndarray, n_processors: Optional[int] = None):
+    def _transcribe(self, audio: np.ndarray, n_processors: Optional[int] = None, extract_probability: bool = False):
         """
         Private method to call the whisper.cpp/whisper_full function
 
         :param audio: numpy array of audio data
         :param n_processors: if not None, it will run whisper.cpp/whisper_full_parallel with n_processors
+        :param extract_probability: If True, calculates the geometric mean of token probabilities for each segment,
+            providing a confidence score interpretable as a probability in [0, 1].
         :return:
         """
 
@@ -416,7 +416,7 @@ def _transcribe(self, audio: np.ndarray, n_processors: Optional[int] = None):
         else:
             pw.whisper_full(self._ctx, self._params, audio, audio.size)
         n = pw.whisper_full_n_segments(self._ctx)
-        res = Model._get_segments(self._ctx, 0, n, self.extract_probability)
+        res = Model._get_segments(self._ctx, 0, n, extract_probability)
         return res
 
     
@@ -528,4 +528,4 @@ def __del__(self):
         :return: None
         """
         if self._ctx is not None:
-            pw.whisper_free(self._ctx)
+            pw.whisper_free(self._ctx)