more fixes

sjrl · sjrl · commit 7eedb323034c · 2026-06-22T11:20:23.000+02:00
diff --git a/haystack/__init__.py b/haystack/__init__.py
@@ -18,8 +18,9 @@
 from haystack.version import __version__  # noqa: F401
 
 # Initialize the logging configuration
-# This is a no-op unless `structlog` is installed
-haystack.logging.configure_logging()
+# This is a no-op unless `structlog` is installed, and `force=False` makes it skip configuration if `structlog` has
+# already been configured by the host application (so importing Haystack does not overwrite their setup).
+haystack.logging.configure_logging(force=False)
 
 # Same for tracing (no op if `opentelemetry` or `ddtrace` is not installed)
 haystack.tracing.auto_enable_tracing()
diff --git a/haystack/logging.py b/haystack/logging.py
@@ -17,6 +17,10 @@
 HAYSTACK_LOGGING_USE_JSON_ENV_VAR = "HAYSTACK_LOGGING_USE_JSON"
 HAYSTACK_LOGGING_IGNORE_STRUCTLOG_ENV_VAR = "HAYSTACK_LOGGING_IGNORE_STRUCTLOG"
 
+# Attribute set on a logger once we have patched its methods. `logging.getLogger` returns a shared singleton, so we
+# use this marker to patch each logger only once and avoid wrapping the already-wrapped methods on repeated calls.
+_PATCHED_MARKER = "_haystack_patched"
+
 
 class PatchedLogger(typing.Protocol):
     """Class which enables using type checkers to find wrong logger usage."""
@@ -221,16 +225,13 @@ def _patch_structlog_call_information(logger: logging.Logger) -> None:
         if not isinstance(logger, _FixedFindCallerLogger):
             return
 
-        # completely copied from structlog. We only add `haystack.logging` to the list of ignored frames
+        # Copied from structlog's `_FixedFindCallerLogger.findCaller`, only adding `haystack.logging` to the list of
+        # ignored frames so our own logging wrappers don't show up as the caller. We deliberately do not forward
+        # `stacklevel` to `_find_first_app_frame_and_name`: that parameter only exists in structlog >= 25.5.0 and
+        # structlog is an optional dependency, so forwarding it would break logging on older versions.
         def findCaller(stack_info: bool = False, stacklevel: int = 1) -> tuple[str, int, str, str | None]:  # noqa: ARG001
-            try:
-                sinfo: str | None
-                # we need to exclude `haystack.logging` from the stack
-                f, name = _find_first_app_frame_and_name(["logging", "haystack.logging"])
-                sinfo = _format_stack(f) if stack_info else None
-            except Exception as error:
-                print(f"Error in findCaller: {error}")
-
+            f, _name = _find_first_app_frame_and_name(["logging", "haystack.logging"])
+            sinfo = _format_stack(f) if stack_info else None
             return f.f_code.co_filename, f.f_lineno, f.f_code.co_name, sinfo
 
         logger.findCaller = findCaller  # type: ignore
@@ -248,6 +249,11 @@ def getLogger(name: str) -> PatchedLogger:
         - it makes structure logging effective, not just an available feature
     """
     logger = logging.getLogger(name)
+    if getattr(logger, _PATCHED_MARKER, False):
+        # Already patched: `logging.getLogger` returned the same singleton, so re-patching would stack the wrappers
+        # and interpolate the message more than once.
+        return typing.cast(PatchedLogger, logger)
+
     logger.debug = patch_log_method_to_kwargs_only(logger.debug)  # type: ignore
     logger.info = patch_log_method_to_kwargs_only(logger.info)  # type: ignore
     logger.warn = patch_log_method_to_kwargs_only(logger.warn)  # type: ignore
@@ -263,6 +269,8 @@ def getLogger(name: str) -> PatchedLogger:
     # We also patch the `makeRecord` method to use keyword string interpolation
     logger.makeRecord = patch_make_records_to_use_kwarg_string_interpolation(logger.makeRecord)  # type: ignore
 
+    setattr(logger, _PATCHED_MARKER, True)
+
     return typing.cast(PatchedLogger, logger)
 
 
@@ -300,6 +308,7 @@ def configure_logging(
     use_json: bool | None = None,
     logger_name: str | Sequence[str] = ("haystack", "haystack_integrations", "haystack_experimental"),
     propagate: bool = True,
+    force: bool = True,
 ) -> None:
     """
     Configure logging for Haystack.
@@ -325,6 +334,11 @@ def configure_logging(
         capturing tools such as `pytest`'s `caplog`. Set it to `False` to make Haystack fully own the output of its
         own logs - this avoids duplicate log lines when the host application also configures the root logger. It has
         no effect when `logger_name=""` (the root logger has no ancestors).
+    :param force:
+        Whether to (re)configure logging even if `structlog` has already been configured by someone else. The default
+        (`True`) means an explicit call always takes over. Pass `False` to make this a no-op when `structlog` is
+        already configured - this is used by the import-time call in `haystack/__init__.py` so that merely importing
+        Haystack does not overwrite a `structlog` configuration set up by the host application.
     """
     import haystack.utils.jupyter  # to avoid circular imports
 
@@ -341,6 +355,11 @@ def configure_logging(
         # If the user wants to ignore structlog, we don't configure it and fall back to standard logging
         return
 
+    # When not forcing, skip configuration if structlog is already configured (e.g. by the host application) so we
+    # leave its configuration and handlers untouched.
+    if not force and structlog.is_configured():
+        return
+
     # We roughly follow the structlog documentation here:
     # https://www.structlog.org/en/stable/standard-library.html#rendering-using-structlog-based-formatters-within-logging
     # This means that we use structlog to format the log entries for entries emitted via `logging` and `structlog`.
diff --git a/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml b/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml
@@ -14,9 +14,21 @@ fixes:
   - |
     Fixed the logger in ``haystack.utils.requests_utils`` being named after the module's file path instead of
     ``haystack.utils.requests_utils``, which kept its records outside the ``haystack`` logger namespace.
+  - |
+    Importing Haystack no longer overwrites a ``structlog`` configuration that the host application already set up.
+    The import-time call to ``configure_logging`` now skips configuration when ``structlog`` is already configured.
+  - |
+    ``haystack.logging.getLogger`` is now idempotent. Previously, calling it more than once for the same logger name
+    wrapped the already-wrapped logger methods again, which caused the log message to be run through ``str.format``
+    once per call. As a result a field value containing ``{...}`` could be re-interpolated and pull in the value of
+    another field. Each logger is now patched only once.
+  - |
+    The patched ``findCaller`` used to determine the source of a log entry no longer prints to stdout and no longer
+    masks errors with a misleading ``NameError``, matching structlog's own ``findCaller`` implementation.
 features:
   - |
-    ``haystack.logging.configure_logging`` gained two parameters: ``logger_name`` to choose which logger(s) the
-    formatting handler is attached to, and ``propagate`` to control whether Haystack's loggers propagate their
-    records to ancestor loggers. Set ``propagate=False`` to let Haystack fully own the output of its own logs and
-    avoid duplicate log lines when the host application also configures the root logger.
+    ``haystack.logging.configure_logging`` gained three parameters: ``logger_name`` to choose which logger(s) the
+    formatting handler is attached to, ``propagate`` to control whether Haystack's loggers propagate their records to
+    ancestor loggers, and ``force`` to control whether an existing ``structlog`` configuration is replaced. Set
+    ``propagate=False`` to let Haystack fully own the output of its own logs and avoid duplicate log lines when the
+    host application also configures the root logger.
diff --git a/test/test_logging.py b/test/test_logging.py
@@ -14,6 +14,8 @@
 
 import pytest
 import structlog
+import structlog._frames
+import structlog.stdlib
 from _pytest.capture import CaptureFixture
 from _pytest.logging import LogCaptureFixture
 from _pytest.monkeypatch import MonkeyPatch
@@ -63,6 +65,23 @@ def _snapshot(name: str) -> logging.Logger:
         logger.setLevel(level)
 
 
+@pytest.fixture()
+def restore_structlog_config() -> Generator[None, None, None]:
+    """Snapshot the global structlog configuration and restore it after the test."""
+    was_configured = structlog.is_configured()
+    config = structlog.get_config()
+    yield
+    if was_configured:
+        structlog.configure(**config)
+    else:
+        structlog.reset_defaults()
+
+
+def _sentinel_processor(logger: object, method_name: str, event_dict: dict) -> dict:
+    """A no-op processor used to detect whether an existing structlog config was left untouched."""
+    return event_dict
+
+
 @pytest.fixture()
 def set_context_var_key() -> Generator[str, None, None]:
     structlog.contextvars.bind_contextvars(context_var="value")
@@ -753,3 +772,112 @@ def test_structlog_native_logger_still_filters_below_level(self, capfd: CaptureF
         structlog.get_logger("haystack.native_filtered_level").debug("debug below the configured level")
 
         assert "debug below the configured level" not in capfd.readouterr().err
+
+
+class TestStructlogConfigIsPreserved:
+    """
+    `structlog.configure` writes to a single process-global configuration. These tests pin down that merely importing
+    Haystack (which calls `configure_logging(force=False)`) does not overwrite a structlog configuration that the host
+    application already set up, while an explicit call still takes over.
+    """
+
+    def test_not_forced_skips_when_structlog_already_configured(self, restore_structlog_config: None) -> None:
+        # Stand-in for the host application configuring structlog before Haystack is imported/configured.
+        structlog.reset_defaults()
+        structlog.configure(processors=[_sentinel_processor])
+        haystack_logger = logging.getLogger("haystack")
+        haystack_logger.handlers = []
+
+        haystack_logging.configure_logging(force=False)
+
+        # The application's structlog configuration is left untouched ...
+        assert structlog.get_config()["processors"] == [_sentinel_processor]
+        # ... and we did not attach our handler on top of their setup.
+        assert not any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers)
+
+    def test_forced_takes_over_existing_structlog_config(self, restore_structlog_config: None) -> None:
+        structlog.reset_defaults()
+        structlog.configure(processors=[_sentinel_processor])
+        haystack_logger = logging.getLogger("haystack")
+        haystack_logger.handlers = []
+
+        haystack_logging.configure_logging(use_json=True, force=True)
+
+        assert structlog.get_config()["processors"] != [_sentinel_processor]
+        assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers)
+
+    def test_not_forced_still_configures_when_structlog_is_unconfigured(self, restore_structlog_config: None) -> None:
+        # This is the real import-time situation: nobody configured structlog yet, so we set up our nice defaults.
+        structlog.reset_defaults()
+        haystack_logger = logging.getLogger("haystack")
+        haystack_logger.handlers = []
+        assert not structlog.is_configured()
+
+        haystack_logging.configure_logging(force=False)
+
+        assert structlog.is_configured()
+        assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers)
+
+
+class TestGetLoggerIsIdempotent:
+    """
+    `logging.getLogger(name)` returns a process-wide singleton. `haystack.logging.getLogger` patches that shared
+    object in place, so calling it more than once for the same name (different modules, re-imports, ...) must not wrap
+    the already-wrapped methods again. The user-visible symptom of re-wrapping is that the message is run through
+    `str.format` once per wrap, so a field value that itself contains `{...}` gets re-interpolated.
+    """
+
+    def test_repeated_get_logger_interpolates_the_message_exactly_once(self, capfd: CaptureFixture) -> None:
+        haystack_logging.configure_logging(use_json=True)
+
+        # Two modules grabbing the same logger name is the realistic trigger for re-wrapping.
+        haystack_logging.getLogger("haystack.idempotency_test")
+        logger = haystack_logging.getLogger("haystack.idempotency_test")
+        logger.setLevel(logging.INFO)
+
+        # `a`'s value contains a `{b}` placeholder. With a single interpolation it must be left as-is; a second
+        # interpolation would expand it using `b` and leak "SECRET" into the message.
+        logger.info("Hello {a}", a="{b}", b="SECRET")
+
+        parsed_output = json.loads(capfd.readouterr().err)
+        assert parsed_output["event"] == "Hello {b}"
+        assert "SECRET" not in parsed_output["event"]
+
+    def test_repeated_get_logger_does_not_rewrap_methods(self) -> None:
+        haystack_logging.getLogger("haystack.idempotency_identity_test")
+        # Capture the patched methods after the first call, before the second one runs.
+        patched = logging.getLogger("haystack.idempotency_identity_test")
+        debug_after_first = patched.debug
+        make_record_after_first = patched.makeRecord
+
+        haystack_logging.getLogger("haystack.idempotency_identity_test")
+
+        # The second call must leave the already-patched methods in place, not wrap a fresh layer on top.
+        assert patched.debug is debug_after_first
+        assert patched.makeRecord is make_record_after_first
+
+
+class TestFindCallerMatchesStructlog:
+    """
+    `_patch_structlog_call_information` mirrors structlog's `_FixedFindCallerLogger.findCaller`, only adding
+    `haystack.logging` to the ignored frames. structlog itself does not guard the frame lookup, so neither do we: any
+    error must propagate as-is instead of being swallowed and printed to stdout.
+    """
+
+    def test_find_caller_does_not_print_or_mask_errors(self, capsys: CaptureFixture, monkeypatch: MonkeyPatch) -> None:
+        # Force the frame lookup to fail. It is imported inside `_patch_structlog_call_information`, so we patch the
+        # module attribute before patching the logger.
+        def boom(*args: object, **kwargs: object) -> tuple:
+            raise RuntimeError("frame lookup failed")
+
+        monkeypatch.setattr(structlog._frames, "_find_first_app_frame_and_name", boom)
+
+        logger = structlog.stdlib._FixedFindCallerLogger("haystack.find_caller_test")
+        haystack_logging._patch_structlog_call_information(logger)
+
+        # The original error must propagate (not be masked by a NameError on an unbound `f`) ...
+        with pytest.raises(RuntimeError, match="frame lookup failed"):
+            logger.findCaller()
+
+        # ... and nothing must be written to stdout.
+        assert capsys.readouterr().out == ""