Skip to content

feat(logger): add new logic to sample debug logs #6142

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Feb 27, 2025
34 changes: 30 additions & 4 deletions aws_lambda_powertools/logging/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,10 @@ def __init__(
serialize_stacktrace: bool = True,
**kwargs,
) -> None:

# Used in case of sampling
self.initial_log_level = self._determine_log_level(level)

self.service = resolve_env_var_choice(
choice=service,
env=os.getenv(constants.SERVICE_NAME_ENV, "service_undefined"),
Expand Down Expand Up @@ -339,6 +343,17 @@ def _init_logger(
self._logger.init = True # type: ignore[attr-defined]
self._logger.powertools_handler = self.logger_handler # type: ignore[attr-defined]

def refresh_sample_rate_calculation(self) -> None:
"""
Refreshes the sample rate calculation by reconfiguring logging settings.

Returns
-------
None
"""
self._logger.setLevel(self.initial_log_level)
self._configure_sampling()

def _configure_sampling(self) -> None:
"""Dynamically set log level based on sampling rate

Expand All @@ -347,15 +362,19 @@ def _configure_sampling(self) -> None:
InvalidLoggerSamplingRateError
When sampling rate provided is not a float
"""
if not self.sampling_rate:
return

try:
if self.sampling_rate and random.random() <= float(self.sampling_rate):
logger.debug("Setting log level to Debug due to sampling rate")
# This is not testing < 0 or > 1 conditions
# Because I don't need other if condition here
if random.random() <= float(self.sampling_rate):
self._logger.setLevel(logging.DEBUG)
except ValueError:
raise InvalidLoggerSamplingRateError(
(
f"Expected a float value ranging 0 to 1, but received {self.sampling_rate} instead."
"Please review POWERTOOLS_LOGGER_SAMPLE_RATE environment variable."
"Please review POWERTOOLS_LOGGER_SAMPLE_RATE environment variable or `sampling_rate` parameter."
),
)

Expand Down Expand Up @@ -465,7 +484,14 @@ def decorate(event, context, *args, **kwargs):
logger.debug("Event received")
self.info(extract_event_from_common_models(event))

return lambda_handler(event, context, *args, **kwargs)
lambda_execution_return = lambda_handler(event, context, *args, **kwargs)

# Sampling rate is defined, so, we need to recalculate the sampling
# See: https://github.com/aws-powertools/powertools-lambda-python/issues/6141
if self.sampling_rate:
self.refresh_sample_rate_calculation()

return lambda_execution_return

return decorate

Expand Down
23 changes: 13 additions & 10 deletions docs/core/logger.md
Original file line number Diff line number Diff line change
Expand Up @@ -627,24 +627,27 @@ Notice in the CloudWatch Logs output how `payment_id` appears as expected when l

### Sampling debug logs

Use sampling when you want to dynamically change your log level to **DEBUG** based on a **percentage of your concurrent/cold start invocations**.
Use sampling when you want to dynamically change your log level to **DEBUG** based on a **percentage of the Lambda function executions**.

You can use values ranging from `0.0` to `1` (100%) when setting `POWERTOOLS_LOGGER_SAMPLE_RATE` env var, or `sample_rate` parameter in Logger.
You can use values ranging from `0.0` to `1` (100%) when setting `POWERTOOLS_LOGGER_SAMPLE_RATE` env var, or `sampling_rate` parameter in Logger.

???+ tip "Tip: When is this useful?"
Let's imagine a sudden spike increase in concurrency triggered a transient issue downstream. When looking into the logs you might not have enough information, and while you can adjust log levels it might not happen again.
Log sampling allows you to capture debug information for a fraction of your requests, helping you diagnose rare or intermittent issues without drowning your logs in unnecessary details.

This feature takes into account transient issues where additional debugging information can be useful.
Example: Imagine an e-commerce checkout process where you want to understand rare payment gateway errors. With 10% sampling, you'll log detailed information for a small subset of transactions, making troubleshooting easier without generating excessive logs.

Sampling decision happens at the Logger initialization. This means sampling may happen significantly more or less than depending on your traffic patterns, for example a steady low number of invocations and thus few cold starts.
Sampling decision happens at every execution when using `@logger.inject_lambda_context` decorator or `refresh_sample_rate_calculation` method. If you don't use any of both, you may end up with undesired sampling behavior.

???+ note
Open a [feature request](https://github.com/aws-powertools/powertools-lambda-python/issues/new?assignees=&labels=feature-request%2C+triage&template=feature_request.md&title=){target="_blank"} if you want Logger to calculate sampling for every invocation
=== "sampling_debug_logs_with_decorator.py"

=== "sampling_debug_logs.py"
```python hl_lines="5 8"
--8<-- "examples/logger/src/sampling_debug_logs_with_decorator.py"
```

```python hl_lines="6 10"
--8<-- "examples/logger/src/sampling_debug_logs.py"
=== "sampling_debug_logs_with_standalone_function.py"

```python hl_lines="5 12"
--8<-- "examples/logger/src/sampling_debug_logs_with_standalone_function.py"
```

=== "sampling_debug_logs_output.json"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
from aws_lambda_powertools.utilities.typing import LambdaContext

# Sample 10% of debug logs e.g. 0.1
# NOTE: this evaluation will only occur at cold start
logger = Logger(service="payment", sample_rate=0.1)


@logger.inject_lambda_context
def lambda_handler(event: dict, context: LambdaContext):
logger.debug("Verifying whether order_id is present")
logger.info("Collecting payment")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from aws_lambda_powertools import Logger
from aws_lambda_powertools.utilities.typing import LambdaContext

# Sample 10% of debug logs e.g. 0.1
logger = Logger(service="payment", sample_rate=0.1)


def lambda_handler(event: dict, context: LambdaContext):
logger.debug("Verifying whether order_id is present")
logger.info("Collecting payment")

logger.refresh_sample_rate_calculation()

return "hello world"
125 changes: 124 additions & 1 deletion tests/functional/logger/required_dependencies/test_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def test_setup_service_env_var(monkeypatch, stdout, service_name):
assert service_name == log["service"]


def test_setup_sampling_rate_env_var(monkeypatch, stdout, service_name):
def test_setup_sampling_rate_env_var_with_100percent(monkeypatch, stdout, service_name):
# GIVEN Logger is initialized
# WHEN samping rate is explicitly set to 100% via POWERTOOLS_LOGGER_SAMPLE_RATE env
sampling_rate = "1"
Expand All @@ -103,6 +103,129 @@ def test_setup_sampling_rate_env_var(monkeypatch, stdout, service_name):
assert "I am being sampled" == log["message"]


def test_setup_sampling_rate_constructor_with_100percent(stdout, service_name):
# GIVEN Logger is initialized
# WHEN samping rate is explicitly set to 100% via constructor
sampling_rate = 1
logger = Logger(service=service_name, sampling_rate=sampling_rate, stream=stdout)
logger.debug("I am being sampled")

# THEN sampling rate should be equals sampling_rate value
# log level should be DEBUG
# and debug log statements should be in stdout
log = capture_logging_output(stdout)
assert sampling_rate == log["sampling_rate"]
assert "DEBUG" == log["level"]
assert "I am being sampled" == log["message"]


def test_setup_sampling_rate_env_var_with_0percent(monkeypatch, stdout, service_name):
# GIVEN Logger is initialized
# WHEN samping rate is explicitly set to 0% via POWERTOOLS_LOGGER_SAMPLE_RATE env
sampling_rate = "0"
monkeypatch.setenv("POWERTOOLS_LOGGER_SAMPLE_RATE", sampling_rate)
logger = Logger(service=service_name, stream=stdout)
logger.debug("I am being sampled")

# THEN we should not log
logs = list(stdout.getvalue().strip())
assert not logs


def test_setup_sampling_rate_constructor_with_0percent(stdout, service_name):
# GIVEN Logger is initialized
# WHEN samping rate is explicitly set to 100% via constructor
sampling_rate = 0
logger = Logger(service=service_name, sampling_rate=sampling_rate, stream=stdout)
logger.debug("I am being sampled")

# THEN we should not log
logs = list(stdout.getvalue().strip())
assert not logs


@pytest.mark.parametrize(
"percent, minimum_logs, maximum_logs",
[
(0.5, 35, 65),
(0.1, 0, 20),
(0.9, 75, 115),
],
)
def test_setup_sampling_rate_env_var_with_percent_and_decorator(
lambda_context,
stdout,
service_name,
percent,
minimum_logs,
maximum_logs,
):
# GIVEN the Logger is initialized with a specific sampling rate
sampling_rate = percent
total_runs = 100
minimum_logs_excepted = minimum_logs
maximum_logs_excepted = maximum_logs
logger = Logger(service=service_name, level="INFO", sampling_rate=sampling_rate, stream=stdout)

@logger.inject_lambda_context
def handler(event, context):
logger.debug("test")

# WHEN A lambda handler is invoked multiple times with decorator
for _i in range(total_runs):
handler({}, lambda_context)

# THEN verify the number of logs falls within the expected range
logs = list(stdout.getvalue().strip().split("\n"))
assert (
len(logs) >= minimum_logs_excepted
), f"Log count {len(logs)} should be at least {minimum_logs_excepted} for sampling rate {sampling_rate}"
assert (
len(logs) <= maximum_logs_excepted
), f"Log count {len(logs)} should be at most {maximum_logs_excepted} for sampling rate {sampling_rate}"


@pytest.mark.parametrize(
"percent, minimum_logs, maximum_logs",
[
(0.5, 35, 65),
(0.1, 0, 20),
(0.9, 75, 115),
],
)
def test_setup_sampling_rate_env_var_with_percent_and_recalculate_manual_method(
lambda_context,
stdout,
service_name,
percent,
minimum_logs,
maximum_logs,
):
# GIVEN the Logger is initialized with a specific sampling rate
sampling_rate = percent
total_runs = 100
minimum_logs_excepted = minimum_logs
maximum_logs_excepted = maximum_logs
logger = Logger(service=service_name, level="INFO", sampling_rate=sampling_rate, stream=stdout)

def handler(event, context):
logger.debug("test")
logger.refresh_sample_rate_calculation()

# WHEN A lambda handler is invoked multiple times with manual refresh_sample_rate_calculation()
for _i in range(total_runs):
handler({}, lambda_context)

# THEN verify the number of logs falls within the expected range
logs = list(stdout.getvalue().strip().split("\n"))
assert (
len(logs) >= minimum_logs_excepted
), f"Log count {len(logs)} should be at least {minimum_logs_excepted} for sampling rate {sampling_rate}"
assert (
len(logs) <= maximum_logs_excepted
), f"Log count {len(logs)} should be at most {maximum_logs_excepted} for sampling rate {sampling_rate}"


def test_inject_lambda_context(lambda_context, stdout, service_name):
# GIVEN Logger is initialized
logger = Logger(service=service_name, stream=stdout)
Expand Down
Loading