diff --git a/aws_lambda_powertools/logging/logger.py b/aws_lambda_powertools/logging/logger.py index 16ae3bace2c..36f4ed1f194 100644 --- a/aws_lambda_powertools/logging/logger.py +++ b/aws_lambda_powertools/logging/logger.py @@ -220,6 +220,10 @@ def __init__( serialize_stacktrace: bool = True, **kwargs, ) -> None: + + # Used in case of sampling + self.initial_log_level = self._determine_log_level(level) + self.service = resolve_env_var_choice( choice=service, env=os.getenv(constants.SERVICE_NAME_ENV, "service_undefined"), @@ -339,6 +343,17 @@ def _init_logger( self._logger.init = True # type: ignore[attr-defined] self._logger.powertools_handler = self.logger_handler # type: ignore[attr-defined] + def refresh_sample_rate_calculation(self) -> None: + """ + Refreshes the sample rate calculation by reconfiguring logging settings. + + Returns + ------- + None + """ + self._logger.setLevel(self.initial_log_level) + self._configure_sampling() + def _configure_sampling(self) -> None: """Dynamically set log level based on sampling rate @@ -347,15 +362,20 @@ def _configure_sampling(self) -> None: InvalidLoggerSamplingRateError When sampling rate provided is not a float """ + if not self.sampling_rate: + return + try: - if self.sampling_rate and random.random() <= float(self.sampling_rate): - logger.debug("Setting log level to Debug due to sampling rate") + # This is not testing < 0 or > 1 conditions + # Because I don't need other if condition here + if random.random() <= float(self.sampling_rate): self._logger.setLevel(logging.DEBUG) + logger.debug("Setting log level to DEBUG due to sampling rate") except ValueError: raise InvalidLoggerSamplingRateError( ( f"Expected a float value ranging 0 to 1, but received {self.sampling_rate} instead." - "Please review POWERTOOLS_LOGGER_SAMPLE_RATE environment variable." + "Please review POWERTOOLS_LOGGER_SAMPLE_RATE environment variable or `sampling_rate` parameter." ), ) @@ -465,6 +485,12 @@ def decorate(event, context, *args, **kwargs): logger.debug("Event received") self.info(extract_event_from_common_models(event)) + # Sampling rate is defined, and this is not ColdStart + # then we need to recalculate the sampling + # See: https://github.com/aws-powertools/powertools-lambda-python/issues/6141 + if self.sampling_rate and not cold_start: + self.refresh_sample_rate_calculation() + return lambda_handler(event, context, *args, **kwargs) return decorate diff --git a/docs/core/logger.md b/docs/core/logger.md index e605855570d..3e19d8ebcd1 100644 --- a/docs/core/logger.md +++ b/docs/core/logger.md @@ -627,24 +627,27 @@ Notice in the CloudWatch Logs output how `payment_id` appears as expected when l ### Sampling debug logs -Use sampling when you want to dynamically change your log level to **DEBUG** based on a **percentage of your concurrent/cold start invocations**. +Use sampling when you want to dynamically change your log level to **DEBUG** based on a **percentage of the Lambda function invocations**. -You can use values ranging from `0.0` to `1` (100%) when setting `POWERTOOLS_LOGGER_SAMPLE_RATE` env var, or `sample_rate` parameter in Logger. +You can use values ranging from `0.0` to `1` (100%) when setting `POWERTOOLS_LOGGER_SAMPLE_RATE` env var, or `sampling_rate` parameter in Logger. ???+ tip "Tip: When is this useful?" - Let's imagine a sudden spike increase in concurrency triggered a transient issue downstream. When looking into the logs you might not have enough information, and while you can adjust log levels it might not happen again. + Log sampling allows you to capture debug information for a fraction of your requests, helping you diagnose rare or intermittent issues without increasing the overall verbosity of your logs. - This feature takes into account transient issues where additional debugging information can be useful. + Example: Imagine an e-commerce checkout process where you want to understand rare payment gateway errors. With 10% sampling, you'll log detailed information for a small subset of transactions, making troubleshooting easier without generating excessive logs. -Sampling decision happens at the Logger initialization. This means sampling may happen significantly more or less than depending on your traffic patterns, for example a steady low number of invocations and thus few cold starts. +The sampling decision happens automatically with each invocation when using `@logger.inject_lambda_context` decorator. When not using the decorator, you're in charge of refreshing it via `refresh_sample_rate_calculation` method. Skipping both may lead to unexpected sampling results. -???+ note - Open a [feature request](https://github.com/aws-powertools/powertools-lambda-python/issues/new?assignees=&labels=feature-request%2C+triage&template=feature_request.md&title=){target="_blank"} if you want Logger to calculate sampling for every invocation +=== "sampling_debug_logs_with_decorator.py" -=== "sampling_debug_logs.py" + ```python hl_lines="5 8" + --8<-- "examples/logger/src/sampling_debug_logs_with_decorator.py" + ``` - ```python hl_lines="6 10" - --8<-- "examples/logger/src/sampling_debug_logs.py" +=== "sampling_debug_logs_with_standalone_function.py" + + ```python hl_lines="5 12" + --8<-- "examples/logger/src/sampling_debug_logs_with_standalone_function.py" ``` === "sampling_debug_logs_output.json" diff --git a/examples/logger/src/sampling_debug_logs.py b/examples/logger/src/sampling_debug_logs_with_decorator.py similarity index 87% rename from examples/logger/src/sampling_debug_logs.py rename to examples/logger/src/sampling_debug_logs_with_decorator.py index 042c1f4a54a..b4b7c594255 100644 --- a/examples/logger/src/sampling_debug_logs.py +++ b/examples/logger/src/sampling_debug_logs_with_decorator.py @@ -2,10 +2,10 @@ from aws_lambda_powertools.utilities.typing import LambdaContext # Sample 10% of debug logs e.g. 0.1 -# NOTE: this evaluation will only occur at cold start logger = Logger(service="payment", sample_rate=0.1) +@logger.inject_lambda_context def lambda_handler(event: dict, context: LambdaContext): logger.debug("Verifying whether order_id is present") logger.info("Collecting payment") diff --git a/examples/logger/src/sampling_debug_logs_with_standalone_function.py b/examples/logger/src/sampling_debug_logs_with_standalone_function.py new file mode 100644 index 00000000000..8d68eb18352 --- /dev/null +++ b/examples/logger/src/sampling_debug_logs_with_standalone_function.py @@ -0,0 +1,14 @@ +from aws_lambda_powertools import Logger +from aws_lambda_powertools.utilities.typing import LambdaContext + +# Sample 10% of debug logs e.g. 0.1 +logger = Logger(service="payment", sample_rate=0.1) + + +def lambda_handler(event: dict, context: LambdaContext): + logger.debug("Verifying whether order_id is present") + logger.info("Collecting payment") + + logger.refresh_sample_rate_calculation() + + return "hello world" diff --git a/tests/functional/logger/required_dependencies/test_logger.py b/tests/functional/logger/required_dependencies/test_logger.py index f24866ea84b..71dfbc27638 100644 --- a/tests/functional/logger/required_dependencies/test_logger.py +++ b/tests/functional/logger/required_dependencies/test_logger.py @@ -86,7 +86,7 @@ def test_setup_service_env_var(monkeypatch, stdout, service_name): assert service_name == log["service"] -def test_setup_sampling_rate_env_var(monkeypatch, stdout, service_name): +def test_setup_sampling_rate_env_var_with_100percent(monkeypatch, stdout, service_name): # GIVEN Logger is initialized # WHEN samping rate is explicitly set to 100% via POWERTOOLS_LOGGER_SAMPLE_RATE env sampling_rate = "1" @@ -103,6 +103,129 @@ def test_setup_sampling_rate_env_var(monkeypatch, stdout, service_name): assert "I am being sampled" == log["message"] +def test_setup_sampling_rate_constructor_with_100percent(stdout, service_name): + # GIVEN Logger is initialized + # WHEN samping rate is explicitly set to 100% via constructor + sampling_rate = 1 + logger = Logger(service=service_name, sampling_rate=sampling_rate, stream=stdout) + logger.debug("I am being sampled") + + # THEN sampling rate should be equals sampling_rate value + # log level should be DEBUG + # and debug log statements should be in stdout + log = capture_logging_output(stdout) + assert sampling_rate == log["sampling_rate"] + assert "DEBUG" == log["level"] + assert "I am being sampled" == log["message"] + + +def test_setup_sampling_rate_env_var_with_0percent(monkeypatch, stdout, service_name): + # GIVEN Logger is initialized + # WHEN samping rate is explicitly set to 0% via POWERTOOLS_LOGGER_SAMPLE_RATE env + sampling_rate = "0" + monkeypatch.setenv("POWERTOOLS_LOGGER_SAMPLE_RATE", sampling_rate) + logger = Logger(service=service_name, stream=stdout) + logger.debug("I am being sampled") + + # THEN we should not log + logs = list(stdout.getvalue().strip()) + assert not logs + + +def test_setup_sampling_rate_constructor_with_0percent(stdout, service_name): + # GIVEN Logger is initialized + # WHEN samping rate is explicitly set to 100% via constructor + sampling_rate = 0 + logger = Logger(service=service_name, sampling_rate=sampling_rate, stream=stdout) + logger.debug("I am being sampled") + + # THEN we should not log + logs = list(stdout.getvalue().strip()) + assert not logs + + +@pytest.mark.parametrize( + "percent, minimum_logs, maximum_logs", + [ + (0.5, 35, 65), + (0.1, 0, 20), + (0.9, 75, 115), + ], +) +def test_setup_sampling_rate_env_var_with_percent_and_decorator( + lambda_context, + stdout, + service_name, + percent, + minimum_logs, + maximum_logs, +): + # GIVEN the Logger is initialized with a specific sampling rate + sampling_rate = percent + total_runs = 100 + minimum_logs_excepted = minimum_logs + maximum_logs_excepted = maximum_logs + logger = Logger(service=service_name, level="INFO", sampling_rate=sampling_rate, stream=stdout) + + @logger.inject_lambda_context + def handler(event, context): + logger.debug("test") + + # WHEN A lambda handler is invoked multiple times with decorator + for _i in range(total_runs): + handler({}, lambda_context) + + # THEN verify the number of logs falls within the expected range + logs = list(stdout.getvalue().strip().split("\n")) + assert ( + len(logs) >= minimum_logs_excepted + ), f"Log count {len(logs)} should be at least {minimum_logs_excepted} for sampling rate {sampling_rate}" + assert ( + len(logs) <= maximum_logs_excepted + ), f"Log count {len(logs)} should be at most {maximum_logs_excepted} for sampling rate {sampling_rate}" + + +@pytest.mark.parametrize( + "percent, minimum_logs, maximum_logs", + [ + (0.5, 35, 65), + (0.1, 0, 20), + (0.9, 75, 115), + ], +) +def test_setup_sampling_rate_env_var_with_percent_and_recalculate_manual_method( + lambda_context, + stdout, + service_name, + percent, + minimum_logs, + maximum_logs, +): + # GIVEN the Logger is initialized with a specific sampling rate + sampling_rate = percent + total_runs = 100 + minimum_logs_excepted = minimum_logs + maximum_logs_excepted = maximum_logs + logger = Logger(service=service_name, level="INFO", sampling_rate=sampling_rate, stream=stdout) + + def handler(event, context): + logger.debug("test") + logger.refresh_sample_rate_calculation() + + # WHEN A lambda handler is invoked multiple times with manual refresh_sample_rate_calculation() + for _i in range(total_runs): + handler({}, lambda_context) + + # THEN verify the number of logs falls within the expected range + logs = list(stdout.getvalue().strip().split("\n")) + assert ( + len(logs) >= minimum_logs_excepted + ), f"Log count {len(logs)} should be at least {minimum_logs_excepted} for sampling rate {sampling_rate}" + assert ( + len(logs) <= maximum_logs_excepted + ), f"Log count {len(logs)} should be at most {maximum_logs_excepted} for sampling rate {sampling_rate}" + + def test_inject_lambda_context(lambda_context, stdout, service_name): # GIVEN Logger is initialized logger = Logger(service=service_name, stream=stdout)