Skip to content

Commit 7eedb32

Browse files
committed
more fixes
1 parent 9825bcc commit 7eedb32

4 files changed

Lines changed: 175 additions & 15 deletions

File tree

haystack/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@
1818
from haystack.version import __version__ # noqa: F401
1919

2020
# Initialize the logging configuration
21-
# This is a no-op unless `structlog` is installed
22-
haystack.logging.configure_logging()
21+
# This is a no-op unless `structlog` is installed, and `force=False` makes it skip configuration if `structlog` has
22+
# already been configured by the host application (so importing Haystack does not overwrite their setup).
23+
haystack.logging.configure_logging(force=False)
2324

2425
# Same for tracing (no op if `opentelemetry` or `ddtrace` is not installed)
2526
haystack.tracing.auto_enable_tracing()

haystack/logging.py

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717
HAYSTACK_LOGGING_USE_JSON_ENV_VAR = "HAYSTACK_LOGGING_USE_JSON"
1818
HAYSTACK_LOGGING_IGNORE_STRUCTLOG_ENV_VAR = "HAYSTACK_LOGGING_IGNORE_STRUCTLOG"
1919

20+
# Attribute set on a logger once we have patched its methods. `logging.getLogger` returns a shared singleton, so we
21+
# use this marker to patch each logger only once and avoid wrapping the already-wrapped methods on repeated calls.
22+
_PATCHED_MARKER = "_haystack_patched"
23+
2024

2125
class PatchedLogger(typing.Protocol):
2226
"""Class which enables using type checkers to find wrong logger usage."""
@@ -221,16 +225,13 @@ def _patch_structlog_call_information(logger: logging.Logger) -> None:
221225
if not isinstance(logger, _FixedFindCallerLogger):
222226
return
223227

224-
# completely copied from structlog. We only add `haystack.logging` to the list of ignored frames
228+
# Copied from structlog's `_FixedFindCallerLogger.findCaller`, only adding `haystack.logging` to the list of
229+
# ignored frames so our own logging wrappers don't show up as the caller. We deliberately do not forward
230+
# `stacklevel` to `_find_first_app_frame_and_name`: that parameter only exists in structlog >= 25.5.0 and
231+
# structlog is an optional dependency, so forwarding it would break logging on older versions.
225232
def findCaller(stack_info: bool = False, stacklevel: int = 1) -> tuple[str, int, str, str | None]: # noqa: ARG001
226-
try:
227-
sinfo: str | None
228-
# we need to exclude `haystack.logging` from the stack
229-
f, name = _find_first_app_frame_and_name(["logging", "haystack.logging"])
230-
sinfo = _format_stack(f) if stack_info else None
231-
except Exception as error:
232-
print(f"Error in findCaller: {error}")
233-
233+
f, _name = _find_first_app_frame_and_name(["logging", "haystack.logging"])
234+
sinfo = _format_stack(f) if stack_info else None
234235
return f.f_code.co_filename, f.f_lineno, f.f_code.co_name, sinfo
235236

236237
logger.findCaller = findCaller # type: ignore
@@ -248,6 +249,11 @@ def getLogger(name: str) -> PatchedLogger:
248249
- it makes structure logging effective, not just an available feature
249250
"""
250251
logger = logging.getLogger(name)
252+
if getattr(logger, _PATCHED_MARKER, False):
253+
# Already patched: `logging.getLogger` returned the same singleton, so re-patching would stack the wrappers
254+
# and interpolate the message more than once.
255+
return typing.cast(PatchedLogger, logger)
256+
251257
logger.debug = patch_log_method_to_kwargs_only(logger.debug) # type: ignore
252258
logger.info = patch_log_method_to_kwargs_only(logger.info) # type: ignore
253259
logger.warn = patch_log_method_to_kwargs_only(logger.warn) # type: ignore
@@ -263,6 +269,8 @@ def getLogger(name: str) -> PatchedLogger:
263269
# We also patch the `makeRecord` method to use keyword string interpolation
264270
logger.makeRecord = patch_make_records_to_use_kwarg_string_interpolation(logger.makeRecord) # type: ignore
265271

272+
setattr(logger, _PATCHED_MARKER, True)
273+
266274
return typing.cast(PatchedLogger, logger)
267275

268276

@@ -300,6 +308,7 @@ def configure_logging(
300308
use_json: bool | None = None,
301309
logger_name: str | Sequence[str] = ("haystack", "haystack_integrations", "haystack_experimental"),
302310
propagate: bool = True,
311+
force: bool = True,
303312
) -> None:
304313
"""
305314
Configure logging for Haystack.
@@ -325,6 +334,11 @@ def configure_logging(
325334
capturing tools such as `pytest`'s `caplog`. Set it to `False` to make Haystack fully own the output of its
326335
own logs - this avoids duplicate log lines when the host application also configures the root logger. It has
327336
no effect when `logger_name=""` (the root logger has no ancestors).
337+
:param force:
338+
Whether to (re)configure logging even if `structlog` has already been configured by someone else. The default
339+
(`True`) means an explicit call always takes over. Pass `False` to make this a no-op when `structlog` is
340+
already configured - this is used by the import-time call in `haystack/__init__.py` so that merely importing
341+
Haystack does not overwrite a `structlog` configuration set up by the host application.
328342
"""
329343
import haystack.utils.jupyter # to avoid circular imports
330344

@@ -341,6 +355,11 @@ def configure_logging(
341355
# If the user wants to ignore structlog, we don't configure it and fall back to standard logging
342356
return
343357

358+
# When not forcing, skip configuration if structlog is already configured (e.g. by the host application) so we
359+
# leave its configuration and handlers untouched.
360+
if not force and structlog.is_configured():
361+
return
362+
344363
# We roughly follow the structlog documentation here:
345364
# https://www.structlog.org/en/stable/standard-library.html#rendering-using-structlog-based-formatters-within-logging
346365
# This means that we use structlog to format the log entries for entries emitted via `logging` and `structlog`.

releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,21 @@ fixes:
1414
- |
1515
Fixed the logger in ``haystack.utils.requests_utils`` being named after the module's file path instead of
1616
``haystack.utils.requests_utils``, which kept its records outside the ``haystack`` logger namespace.
17+
- |
18+
Importing Haystack no longer overwrites a ``structlog`` configuration that the host application already set up.
19+
The import-time call to ``configure_logging`` now skips configuration when ``structlog`` is already configured.
20+
- |
21+
``haystack.logging.getLogger`` is now idempotent. Previously, calling it more than once for the same logger name
22+
wrapped the already-wrapped logger methods again, which caused the log message to be run through ``str.format``
23+
once per call. As a result a field value containing ``{...}`` could be re-interpolated and pull in the value of
24+
another field. Each logger is now patched only once.
25+
- |
26+
The patched ``findCaller`` used to determine the source of a log entry no longer prints to stdout and no longer
27+
masks errors with a misleading ``NameError``, matching structlog's own ``findCaller`` implementation.
1728
features:
1829
- |
19-
``haystack.logging.configure_logging`` gained two parameters: ``logger_name`` to choose which logger(s) the
20-
formatting handler is attached to, and ``propagate`` to control whether Haystack's loggers propagate their
21-
records to ancestor loggers. Set ``propagate=False`` to let Haystack fully own the output of its own logs and
22-
avoid duplicate log lines when the host application also configures the root logger.
30+
``haystack.logging.configure_logging`` gained three parameters: ``logger_name`` to choose which logger(s) the
31+
formatting handler is attached to, ``propagate`` to control whether Haystack's loggers propagate their records to
32+
ancestor loggers, and ``force`` to control whether an existing ``structlog`` configuration is replaced. Set
33+
``propagate=False`` to let Haystack fully own the output of its own logs and avoid duplicate log lines when the
34+
host application also configures the root logger.

test/test_logging.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
import pytest
1616
import structlog
17+
import structlog._frames
18+
import structlog.stdlib
1719
from _pytest.capture import CaptureFixture
1820
from _pytest.logging import LogCaptureFixture
1921
from _pytest.monkeypatch import MonkeyPatch
@@ -63,6 +65,23 @@ def _snapshot(name: str) -> logging.Logger:
6365
logger.setLevel(level)
6466

6567

68+
@pytest.fixture()
69+
def restore_structlog_config() -> Generator[None, None, None]:
70+
"""Snapshot the global structlog configuration and restore it after the test."""
71+
was_configured = structlog.is_configured()
72+
config = structlog.get_config()
73+
yield
74+
if was_configured:
75+
structlog.configure(**config)
76+
else:
77+
structlog.reset_defaults()
78+
79+
80+
def _sentinel_processor(logger: object, method_name: str, event_dict: dict) -> dict:
81+
"""A no-op processor used to detect whether an existing structlog config was left untouched."""
82+
return event_dict
83+
84+
6685
@pytest.fixture()
6786
def set_context_var_key() -> Generator[str, None, None]:
6887
structlog.contextvars.bind_contextvars(context_var="value")
@@ -753,3 +772,112 @@ def test_structlog_native_logger_still_filters_below_level(self, capfd: CaptureF
753772
structlog.get_logger("haystack.native_filtered_level").debug("debug below the configured level")
754773

755774
assert "debug below the configured level" not in capfd.readouterr().err
775+
776+
777+
class TestStructlogConfigIsPreserved:
778+
"""
779+
`structlog.configure` writes to a single process-global configuration. These tests pin down that merely importing
780+
Haystack (which calls `configure_logging(force=False)`) does not overwrite a structlog configuration that the host
781+
application already set up, while an explicit call still takes over.
782+
"""
783+
784+
def test_not_forced_skips_when_structlog_already_configured(self, restore_structlog_config: None) -> None:
785+
# Stand-in for the host application configuring structlog before Haystack is imported/configured.
786+
structlog.reset_defaults()
787+
structlog.configure(processors=[_sentinel_processor])
788+
haystack_logger = logging.getLogger("haystack")
789+
haystack_logger.handlers = []
790+
791+
haystack_logging.configure_logging(force=False)
792+
793+
# The application's structlog configuration is left untouched ...
794+
assert structlog.get_config()["processors"] == [_sentinel_processor]
795+
# ... and we did not attach our handler on top of their setup.
796+
assert not any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers)
797+
798+
def test_forced_takes_over_existing_structlog_config(self, restore_structlog_config: None) -> None:
799+
structlog.reset_defaults()
800+
structlog.configure(processors=[_sentinel_processor])
801+
haystack_logger = logging.getLogger("haystack")
802+
haystack_logger.handlers = []
803+
804+
haystack_logging.configure_logging(use_json=True, force=True)
805+
806+
assert structlog.get_config()["processors"] != [_sentinel_processor]
807+
assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers)
808+
809+
def test_not_forced_still_configures_when_structlog_is_unconfigured(self, restore_structlog_config: None) -> None:
810+
# This is the real import-time situation: nobody configured structlog yet, so we set up our nice defaults.
811+
structlog.reset_defaults()
812+
haystack_logger = logging.getLogger("haystack")
813+
haystack_logger.handlers = []
814+
assert not structlog.is_configured()
815+
816+
haystack_logging.configure_logging(force=False)
817+
818+
assert structlog.is_configured()
819+
assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers)
820+
821+
822+
class TestGetLoggerIsIdempotent:
823+
"""
824+
`logging.getLogger(name)` returns a process-wide singleton. `haystack.logging.getLogger` patches that shared
825+
object in place, so calling it more than once for the same name (different modules, re-imports, ...) must not wrap
826+
the already-wrapped methods again. The user-visible symptom of re-wrapping is that the message is run through
827+
`str.format` once per wrap, so a field value that itself contains `{...}` gets re-interpolated.
828+
"""
829+
830+
def test_repeated_get_logger_interpolates_the_message_exactly_once(self, capfd: CaptureFixture) -> None:
831+
haystack_logging.configure_logging(use_json=True)
832+
833+
# Two modules grabbing the same logger name is the realistic trigger for re-wrapping.
834+
haystack_logging.getLogger("haystack.idempotency_test")
835+
logger = haystack_logging.getLogger("haystack.idempotency_test")
836+
logger.setLevel(logging.INFO)
837+
838+
# `a`'s value contains a `{b}` placeholder. With a single interpolation it must be left as-is; a second
839+
# interpolation would expand it using `b` and leak "SECRET" into the message.
840+
logger.info("Hello {a}", a="{b}", b="SECRET")
841+
842+
parsed_output = json.loads(capfd.readouterr().err)
843+
assert parsed_output["event"] == "Hello {b}"
844+
assert "SECRET" not in parsed_output["event"]
845+
846+
def test_repeated_get_logger_does_not_rewrap_methods(self) -> None:
847+
haystack_logging.getLogger("haystack.idempotency_identity_test")
848+
# Capture the patched methods after the first call, before the second one runs.
849+
patched = logging.getLogger("haystack.idempotency_identity_test")
850+
debug_after_first = patched.debug
851+
make_record_after_first = patched.makeRecord
852+
853+
haystack_logging.getLogger("haystack.idempotency_identity_test")
854+
855+
# The second call must leave the already-patched methods in place, not wrap a fresh layer on top.
856+
assert patched.debug is debug_after_first
857+
assert patched.makeRecord is make_record_after_first
858+
859+
860+
class TestFindCallerMatchesStructlog:
861+
"""
862+
`_patch_structlog_call_information` mirrors structlog's `_FixedFindCallerLogger.findCaller`, only adding
863+
`haystack.logging` to the ignored frames. structlog itself does not guard the frame lookup, so neither do we: any
864+
error must propagate as-is instead of being swallowed and printed to stdout.
865+
"""
866+
867+
def test_find_caller_does_not_print_or_mask_errors(self, capsys: CaptureFixture, monkeypatch: MonkeyPatch) -> None:
868+
# Force the frame lookup to fail. It is imported inside `_patch_structlog_call_information`, so we patch the
869+
# module attribute before patching the logger.
870+
def boom(*args: object, **kwargs: object) -> tuple:
871+
raise RuntimeError("frame lookup failed")
872+
873+
monkeypatch.setattr(structlog._frames, "_find_first_app_frame_and_name", boom)
874+
875+
logger = structlog.stdlib._FixedFindCallerLogger("haystack.find_caller_test")
876+
haystack_logging._patch_structlog_call_information(logger)
877+
878+
# The original error must propagate (not be masked by a NameError on an unbound `f`) ...
879+
with pytest.raises(RuntimeError, match="frame lookup failed"):
880+
logger.findCaller()
881+
882+
# ... and nothing must be written to stdout.
883+
assert capsys.readouterr().out == ""

0 commit comments

Comments
 (0)