Skip to content

fix(logger): correctly pick powertools or custom handler in custom environments #4295

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -315,3 +315,4 @@ examples/**/sam/.aws-sam
cdk.out
# NOTE: different accounts will be used for E2E thus creating unnecessary git clutter
cdk.context.json
bla.py
2 changes: 1 addition & 1 deletion .markdownlint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ MD038: false
MD039: true

# MD040/fenced-code-language - Fenced code blocks should have a language specified
MD040: true
MD040: false

# MD041/first-line-heading/first-line-h1 - First line in a file should be a top-level heading
MD041:
Expand Down
3 changes: 3 additions & 0 deletions aws_lambda_powertools/logging/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
LOGGER_ATTRIBUTE_POWERTOOLS_HANDLER = "powertools_handler"
LOGGER_ATTRIBUTE_PRECONFIGURED = "init"
LOGGER_ATTRIBUTE_HANDLER = "logger_handler"
4 changes: 4 additions & 0 deletions aws_lambda_powertools/logging/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
class InvalidLoggerSamplingRateError(Exception):
pass


class OrphanedChildLoggerError(Exception):
pass
44 changes: 34 additions & 10 deletions aws_lambda_powertools/logging/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,15 @@
Optional,
TypeVar,
Union,
cast,
overload,
)

from aws_lambda_powertools.logging.constants import (
LOGGER_ATTRIBUTE_HANDLER,
LOGGER_ATTRIBUTE_POWERTOOLS_HANDLER,
LOGGER_ATTRIBUTE_PRECONFIGURED,
)
from aws_lambda_powertools.shared import constants
from aws_lambda_powertools.shared.functions import (
extract_event_from_common_models,
Expand All @@ -31,7 +37,7 @@
from aws_lambda_powertools.utilities import jmespath_utils

from ..shared.types import AnyCallableT
from .exceptions import InvalidLoggerSamplingRateError
from .exceptions import InvalidLoggerSamplingRateError, OrphanedChildLoggerError
from .filters import SuppressFilter
from .formatter import (
RESERVED_FORMATTER_CUSTOM_KEYS,
Expand Down Expand Up @@ -233,14 +239,14 @@ def __init__(
self.child = child
self.logger_formatter = logger_formatter
self._stream = stream or sys.stdout
self.logger_handler = logger_handler or logging.StreamHandler(self._stream)
self.log_uncaught_exceptions = log_uncaught_exceptions

self._is_deduplication_disabled = resolve_truthy_env_var_choice(
env=os.getenv(constants.LOGGER_LOG_DEDUPLICATION_ENV, "false"),
)
self._default_log_keys = {"service": self.service, "sampling_rate": self.sampling_rate}
self._logger = self._get_logger()
self.logger_handler = logger_handler or self._get_handler()

# NOTE: This is primarily to improve UX, so IDEs can autocomplete LambdaPowertoolsFormatter options
# previously, we masked all of them as kwargs thus limiting feature discovery
Expand All @@ -264,7 +270,7 @@ def __init__(

# Prevent __getattr__ from shielding unknown attribute errors in type checkers
# https://github.com/aws-powertools/powertools-lambda-python/issues/1660
if not TYPE_CHECKING:
if not TYPE_CHECKING: # pragma: no cover

def __getattr__(self, name):
# Proxy attributes not found to actual logger to support backward compatibility
Expand All @@ -279,6 +285,18 @@ def _get_logger(self) -> logging.Logger:

return logging.getLogger(logger_name)

def _get_handler(self) -> logging.Handler:
# is a logger handler already configured?
if getattr(self, LOGGER_ATTRIBUTE_HANDLER, None):
return self.logger_handler

# for children, use parent's handler
if self.child:
return getattr(self._logger.parent, LOGGER_ATTRIBUTE_POWERTOOLS_HANDLER, None) # type: ignore[return-value] # always checked in formatting

# otherwise, create a new stream handler (first time init)
return logging.StreamHandler(self._stream)

def _init_logger(
self,
formatter_options: Optional[Dict] = None,
Expand All @@ -292,7 +310,7 @@ def _init_logger(
# a) multiple handlers being attached
# b) different sampling mechanisms
# c) multiple messages from being logged as handlers can be duplicated
is_logger_preconfigured = getattr(self._logger, "init", False)
is_logger_preconfigured = getattr(self._logger, LOGGER_ATTRIBUTE_PRECONFIGURED, False)
if self.child or is_logger_preconfigured:
return

Expand All @@ -317,6 +335,7 @@ def _init_logger(
# std logging will return the same Logger with our attribute if name is reused
logger.debug(f"Marking logger {self.service} as preconfigured")
self._logger.init = True # type: ignore[attr-defined]
self._logger.powertools_handler = self.logger_handler # type: ignore[attr-defined]

def _configure_sampling(self) -> None:
"""Dynamically set log level based on sampling rate
Expand Down Expand Up @@ -613,7 +632,7 @@ def structure_logs(self, append: bool = False, formatter_options: Optional[Dict]

# Mode 1
log_keys = {**self._default_log_keys, **keys}
is_logger_preconfigured = getattr(self._logger, "init", False)
is_logger_preconfigured = getattr(self._logger, LOGGER_ATTRIBUTE_PRECONFIGURED, False)
if not is_logger_preconfigured:
formatter = self.logger_formatter or LambdaPowertoolsFormatter(**formatter_options, **log_keys)
self.registered_handler.setFormatter(formatter)
Expand Down Expand Up @@ -672,15 +691,20 @@ def removeFilter(self, filter: logging._FilterType) -> None: # noqa: A002 # fil
@property
def registered_handler(self) -> logging.Handler:
"""Convenience property to access the first logger handler"""
# We ignore mypy here because self.child encodes whether or not self._logger.parent is
# None, mypy can't see this from context but we can
handlers = self._logger.parent.handlers if self.child else self._logger.handlers # type: ignore[union-attr]
return handlers[0]
return self._get_handler()

@property
def registered_formatter(self) -> BasePowertoolsFormatter:
"""Convenience property to access the first logger formatter"""
return self.registered_handler.formatter # type: ignore[return-value]
handler = self.registered_handler
if handler is None:
raise OrphanedChildLoggerError(
"Orphan child loggers cannot append nor remove keys until a parent is initialized first. "
"To solve this issue, you can A) make sure a parent logger is initialized first, or B) move append/remove keys operations to a later stage." # noqa: E501
"Reference: https://docs.powertools.aws.dev/lambda/python/latest/core/logger/#reusing-logger-across-your-code",
)

return cast(BasePowertoolsFormatter, handler.formatter)

@property
def log_level(self) -> int:
Expand Down
1 change: 1 addition & 0 deletions aws_lambda_powertools/shared/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
INVALID_XRAY_NAME_CHARACTERS = r"[?;*()!$~^<>]"

# Logger constants
# maintenance: future major version should start having localized `constants.py` to ease future modularization
LOGGER_LOG_SAMPLING_RATE: str = "POWERTOOLS_LOGGER_SAMPLE_RATE"
LOGGER_LOG_EVENT_ENV: str = "POWERTOOLS_LOGGER_LOG_EVENT"
LOGGER_LOG_DEDUPLICATION_ENV: str = "POWERTOOLS_LOG_DEDUPLICATION_DISABLED"
Expand Down
58 changes: 28 additions & 30 deletions docs/core/logger.md
Original file line number Diff line number Diff line change
Expand Up @@ -475,9 +475,9 @@ You can use any of the following built-in JMESPath expressions as part of [injec

### Reusing Logger across your code

Similar to [Tracer](./tracer.md#reusing-tracer-across-your-code){target="_blank"}, a new instance that uses the same `service` name - env var or explicit parameter - will reuse a previous Logger instance. Just like `logging.getLogger("logger_name")` would in the standard library if called with the same logger name.
Similar to [Tracer](./tracer.md#reusing-tracer-across-your-code){target="_blank"}, a new instance that uses the same `service` name will reuse a previous Logger instance.

Notice in the CloudWatch Logs output how `payment_id` appeared as expected when logging in `collect.py`.
Notice in the CloudWatch Logs output how `payment_id` appears as expected when logging in `collect.py`.

=== "logger_reuse.py"

Expand All @@ -497,17 +497,6 @@ Notice in the CloudWatch Logs output how `payment_id` appeared as expected when
--8<-- "examples/logger/src/logger_reuse_output.json"
```

???+ note "Note: About Child Loggers"
Coming from standard library, you might be used to use `logging.getLogger(__name__)`. This will create a new instance of a Logger with a different name.

In Powertools, you can have the same effect by using `child=True` parameter: `Logger(child=True)`. This creates a new Logger instance named after `service.<module>`. All state changes will be propagated bi-directionally between Child and Parent.

For that reason, there could be side effects depending on the order the Child Logger is instantiated, because Child Loggers don't have a handler.

For example, if you instantiated a Child Logger and immediately used `logger.append_keys/remove_keys/set_correlation_id` to update logging state, this might fail if the Parent Logger wasn't instantiated.

In this scenario, you can either ensure any calls manipulating state are only called when a Parent Logger is instantiated (example above), or refrain from using `child=True` parameter altogether.

### Sampling debug logs

Use sampling when you want to dynamically change your log level to **DEBUG** based on a **percentage of your concurrent/cold start invocations**.
Expand Down Expand Up @@ -582,31 +571,40 @@ You can use import and use them as any other Logger formatter via `logger_format

### Migrating from other Loggers

If you're migrating from other Loggers, there are few key points to be aware of: [Service parameter](#the-service-parameter), [Inheriting Loggers](#inheriting-loggers), [Overriding Log records](#overriding-log-records), and [Logging exceptions](#logging-exceptions).
If you're migrating from other Loggers, there are few key points to be aware of: [Service parameter](#the-service-parameter), [Child Loggers](#child-loggers), [Overriding Log records](#overriding-log-records), and [Logging exceptions](#logging-exceptions).

#### The service parameter

Service is what defines the Logger name, including what the Lambda function is responsible for, or part of (e.g payment service).

For Logger, the `service` is the logging key customers can use to search log operations for one or more functions - For example, **search for all errors, or messages like X, where service is payment**.

#### Inheriting Loggers
#### Child Loggers

??? tip "Tip: Prefer [Logger Reuse feature](#reusing-logger-across-your-code) over inheritance unless strictly necessary, [see caveats.](#reusing-logger-across-your-code)"
<center>
```mermaid
stateDiagram-v2
direction LR
Parent: Logger()
Child: Logger(child=True)

Parent --> Child: bi-directional updates

> Python Logging hierarchy happens via the dot notation: `service`, `service.child`, `service.child_2`
Note right of Child
Both have the same service
end note

For inheritance, Logger uses a `child=True` parameter along with `service` being the same value across Loggers.
```
</center>

For child Loggers, we introspect the name of your module where `Logger(child=True, service="name")` is called, and we name your Logger as **{service}.{filename}**.
For inheritance, Logger uses `child` parameter to ensure we don't compete with its parents config. We name child Loggers following Python's convention: _`{service}`.`{filename}`_.

???+ danger
A common issue when migrating from other Loggers is that `service` might be defined in the parent Logger (no child param), and not defined in the child Logger:
Changes are bidirectional between parents and loggers. That is, appending a key in a child or parent will ensure both have them. This means, having the same `service` name is important when instantiating them.

=== "logging_inheritance_bad.py"
=== "logging_inheritance_good.py"

```python hl_lines="1 9"
--8<-- "examples/logger/src/logging_inheritance_bad.py"
--8<-- "examples/logger/src/logging_inheritance_good.py"
```

=== "logging_inheritance_module.py"
Expand All @@ -615,17 +613,17 @@ For child Loggers, we introspect the name of your module where `Logger(child=Tru
--8<-- "examples/logger/src/logging_inheritance_module.py"
```

In this case, Logger will register a Logger named `payment`, and a Logger named `service_undefined`. The latter isn't inheriting from the parent, and will have no handler, resulting in no message being logged to standard output.

???+ tip
This can be fixed by either ensuring both has the `service` value as `payment`, or simply use the environment variable `POWERTOOLS_SERVICE_NAME` to ensure service value will be the same across all Loggers when not explicitly set.
There are two important side effects when using child loggers:

Do this instead:
1. **Service name mismatch**. Logging messages will be dropped as child loggers don't have logging handlers.
* Solution: use `POWERTOOLS_SERVICE_NAME` env var. Alternatively, use the same service explicit value.
2. **Changing state before a parent instantiate**. Using `logger.append_keys` or `logger.remove_keys` without a parent Logger will lead to `OrphanedChildLoggerError` exception.
* Solution: always initialize parent Loggers first. Alternatively, move calls to `append_keys`/`remove_keys` from the child at a later stage.

=== "logging_inheritance_good.py"
=== "logging_inheritance_bad.py"

```python hl_lines="1 9"
--8<-- "examples/logger/src/logging_inheritance_good.py"
--8<-- "examples/logger/src/logging_inheritance_bad.py"
```

=== "logging_inheritance_module.py"
Expand Down
62 changes: 61 additions & 1 deletion tests/functional/test_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from aws_lambda_powertools import Logger, Tracer, set_package_logger_handler
from aws_lambda_powertools.logging import correlation_paths
from aws_lambda_powertools.logging.exceptions import InvalidLoggerSamplingRateError
from aws_lambda_powertools.logging.exceptions import InvalidLoggerSamplingRateError, OrphanedChildLoggerError
from aws_lambda_powertools.logging.formatter import (
BasePowertoolsFormatter,
LambdaPowertoolsFormatter,
Expand Down Expand Up @@ -1176,3 +1176,63 @@ def test_logger_json_unicode(stdout, service_name):

assert log["message"] == non_ascii_chars
assert log[japanese_field] == japanese_string


def test_logger_registered_handler_is_custom_handler(service_name):
# GIVEN a library or environment pre-setup a logger for us using the same name (see #4277)
class ForeignHandler(logging.StreamHandler): ...

foreign_handler = ForeignHandler()
logging.getLogger(service_name).addHandler(foreign_handler)

# WHEN Logger init with a custom handler
custom_handler = logging.StreamHandler()
logger = Logger(service=service_name, logger_handler=custom_handler)

# THEN registered handler should always return what we provided
assert logger.registered_handler is not foreign_handler
assert logger.registered_handler is custom_handler
assert logger.logger_handler is custom_handler
assert logger.handlers == [foreign_handler, custom_handler]


def test_child_logger_registered_handler_is_custom_handler(service_name):
# GIVEN
class ForeignHandler(logging.StreamHandler): ...

foreign_handler = ForeignHandler()
logging.getLogger(service_name).addHandler(foreign_handler)

custom_handler = logging.StreamHandler()
custom_handler.name = "CUSTOM HANDLER"
parent = Logger(service=service_name, logger_handler=custom_handler)

# WHEN a child Logger init
child = Logger(service=service_name, child=True)

# THEN child registered handler should always return what we provided in the parent
assert child.registered_handler is not foreign_handler
assert child.registered_handler is custom_handler
assert child.registered_handler is parent.registered_handler


def test_logger_handler_is_created_despite_env_pre_setup(service_name):
# GIVEN a library or environment pre-setup a logger for us using the same name
environment_handler = logging.StreamHandler()
logging.getLogger(service_name).addHandler(environment_handler)

# WHEN Logger init without a custom handler
logger = Logger(service=service_name)

# THEN registered handler should be Powertools default handler, not env
assert logger.registered_handler is not environment_handler


def test_child_logger_append_keys_before_parent(stdout, service_name):
# GIVEN a child Logger is initialized before its/without parent
child = Logger(stream=stdout, service=service_name, child=True)

# WHEN a child Logger appends a key
# THEN it will raise an AttributeError
with pytest.raises(OrphanedChildLoggerError):
child.append_keys(customer_id="value")