feat(metrics): allow custom timestamps for metrics (#4006)

leandrodamascena · web-flow · commit 17a7ac5c327b · 2024-03-27T11:33:34.000Z
* Initial commit

* Refactoring logic + mypy still failling

* Metrics should raise a warning when outside of constraints

* Adding timestamp to single_metric

* Adding timestamp to single_metric

* Adding test for wrong type

* Adding examples + doc

* Adding examples + doc

* Wording + TZ

* Improving doc
diff --git a/aws_lambda_powertools/metrics/base.py b/aws_lambda_powertools/metrics/base.py
@@ -17,6 +17,7 @@
     MetricValueError,
     SchemaValidationError,
 )
+from aws_lambda_powertools.metrics.functions import convert_timestamp_to_emf_format, validate_emf_timestamp
 from aws_lambda_powertools.metrics.provider import cold_start
 from aws_lambda_powertools.metrics.provider.cloudwatch_emf.constants import MAX_DIMENSIONS, MAX_METRICS
 from aws_lambda_powertools.metrics.provider.cloudwatch_emf.metric_properties import MetricResolution, MetricUnit
@@ -76,6 +77,8 @@ def __init__(
         self.namespace = resolve_env_var_choice(choice=namespace, env=os.getenv(constants.METRICS_NAMESPACE_ENV))
         self.service = resolve_env_var_choice(choice=service, env=os.getenv(constants.SERVICE_NAME_ENV))
         self.metadata_set = metadata_set if metadata_set is not None else {}
+        self.timestamp: int | None = None
+
         self._metric_units = [unit.value for unit in MetricUnit]
         self._metric_unit_valid_options = list(MetricUnit.__members__)
         self._metric_resolutions = [resolution.value for resolution in MetricResolution]
@@ -224,7 +227,7 @@ def serialize_metric_set(
 
         return {
             "_aws": {
-                "Timestamp": int(datetime.datetime.now().timestamp() * 1000),  # epoch
+                "Timestamp": self.timestamp or int(datetime.datetime.now().timestamp() * 1000),  # epoch
                 "CloudWatchMetrics": [
                     {
                         "Namespace": self.namespace,  # "test_namespace"
@@ -296,6 +299,31 @@ def add_metadata(self, key: str, value: Any) -> None:
         else:
             self.metadata_set[str(key)] = value
 
+    def set_timestamp(self, timestamp: int | datetime.datetime):
+        """
+        Set the timestamp for the metric.
+
+        Parameters:
+        -----------
+        timestamp: int | datetime.datetime
+            The timestamp to create the metric.
+            If an integer is provided, it is assumed to be the epoch time in milliseconds.
+            If a datetime object is provided, it will be converted to epoch time in milliseconds.
+        """
+        # The timestamp must be a Datetime object or an integer representing an epoch time.
+        # This should not exceed 14 days in the past or be more than 2 hours in the future.
+        # Any metrics failing to meet this criteria will be skipped by Amazon CloudWatch.
+        # See: https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch_Embedded_Metric_Format_Specification.html
+        # See: https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/CloudWatch-Logs-Monitoring-CloudWatch-Metrics.html
+        if not validate_emf_timestamp(timestamp):
+            warnings.warn(
+                "This metric doesn't meet the requirements and will be skipped by Amazon CloudWatch. "
+                "Ensure the timestamp is within 14 days past or 2 hours future.",
+                stacklevel=2,
+            )
+
+        self.timestamp = convert_timestamp_to_emf_format(timestamp)
+
     def clear_metrics(self) -> None:
         logger.debug("Clearing out existing metric set from memory")
         self.metric_set.clear()
@@ -576,6 +604,9 @@ def single_metric(
         Metric value
     namespace: str
         Namespace for metrics
+    default_dimensions: Dict[str, str], optional
+        Metric dimensions as key=value that will always be present
+
 
     Yields
     -------
diff --git a/aws_lambda_powertools/metrics/functions.py b/aws_lambda_powertools/metrics/functions.py
@@ -1,10 +1,13 @@
 from __future__ import annotations
 
+from datetime import datetime
+
 from aws_lambda_powertools.metrics.provider.cloudwatch_emf.exceptions import (
     MetricResolutionError,
     MetricUnitError,
 )
 from aws_lambda_powertools.metrics.provider.cloudwatch_emf.metric_properties import MetricResolution, MetricUnit
+from aws_lambda_powertools.shared import constants
 from aws_lambda_powertools.shared.types import List
 
 
@@ -69,3 +72,66 @@ def extract_cloudwatch_metric_unit_value(metric_units: List, metric_valid_option
         unit = unit.value
 
     return unit
+
+
+def validate_emf_timestamp(timestamp: int | datetime) -> bool:
+    """
+    Validates a given timestamp based on CloudWatch Timestamp guidelines.
+
+    Timestamp must meet CloudWatch requirements, otherwise an InvalidTimestampError will be raised.
+    See [Timestamps](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/cloudwatch_concepts.html#about_timestamp)
+    for valid values.
+
+    Parameters:
+    ----------
+    timestamp: int | datetime
+        Datetime object or epoch time in milliseconds representing the timestamp to validate.
+
+    Returns
+    -------
+    bool
+        Valid or not timestamp values
+    """
+
+    if not isinstance(timestamp, (int, datetime)):
+        return False
+
+    if isinstance(timestamp, datetime):
+        # Converting timestamp to epoch time in milliseconds
+        timestamp = int(timestamp.timestamp() * 1000)
+
+    # Consider current timezone when working with date and time
+    current_timezone = datetime.now().astimezone().tzinfo
+
+    current_time = int(datetime.now(current_timezone).timestamp() * 1000)
+    min_valid_timestamp = current_time - constants.EMF_MAX_TIMESTAMP_PAST_AGE
+    max_valid_timestamp = current_time + constants.EMF_MAX_TIMESTAMP_FUTURE_AGE
+
+    return min_valid_timestamp <= timestamp <= max_valid_timestamp
+
+
+def convert_timestamp_to_emf_format(timestamp: int | datetime) -> int:
+    """
+    Converts a timestamp to EMF compatible format.
+
+    Parameters
+    ----------
+    timestamp: int | datetime
+        The timestamp to convert. If already in epoch milliseconds format, returns it as is.
+        If datetime object, converts it to milliseconds since Unix epoch.
+
+    Returns:
+    --------
+    int
+        The timestamp converted to EMF compatible format (milliseconds since Unix epoch).
+    """
+    if isinstance(timestamp, int):
+        return timestamp
+
+    try:
+        return int(round(timestamp.timestamp() * 1000))
+    except AttributeError:
+        # If this point is reached, it indicates timestamp is not a datetime object
+        # Returning zero represents the initial date of epoch time,
+        # which will be skipped by Amazon CloudWatch.
+        return 0
diff --git a/aws_lambda_powertools/metrics/metrics.py b/aws_lambda_powertools/metrics/metrics.py
@@ -125,6 +125,19 @@ def serialize_metric_set(
     def add_metadata(self, key: str, value: Any) -> None:
         self.provider.add_metadata(key=key, value=value)
 
+    def set_timestamp(self, timestamp: int):
+        """
+        Set the timestamp for the metric.
+
+        Parameters:
+        -----------
+        timestamp: int | datetime.datetime
+            The timestamp to create the metric.
+            If an integer is provided, it is assumed to be the epoch time in milliseconds.
+            If a datetime object is provided, it will be converted to epoch time in milliseconds.
+        """
+        self.provider.set_timestamp(timestamp=timestamp)
+
     def flush_metrics(self, raise_on_empty_metrics: bool = False) -> None:
         self.provider.flush_metrics(raise_on_empty_metrics=raise_on_empty_metrics)
 
diff --git a/aws_lambda_powertools/metrics/provider/cloudwatch_emf/cloudwatch.py b/aws_lambda_powertools/metrics/provider/cloudwatch_emf/cloudwatch.py
@@ -12,8 +12,10 @@
 from aws_lambda_powertools.metrics.base import single_metric
 from aws_lambda_powertools.metrics.exceptions import MetricValueError, SchemaValidationError
 from aws_lambda_powertools.metrics.functions import (
+    convert_timestamp_to_emf_format,
     extract_cloudwatch_metric_resolution_value,
     extract_cloudwatch_metric_unit_value,
+    validate_emf_timestamp,
 )
 from aws_lambda_powertools.metrics.provider.base import BaseProvider
 from aws_lambda_powertools.metrics.provider.cloudwatch_emf.constants import MAX_DIMENSIONS, MAX_METRICS
@@ -73,6 +75,7 @@ def __init__(
         self.namespace = resolve_env_var_choice(choice=namespace, env=os.getenv(constants.METRICS_NAMESPACE_ENV))
         self.service = resolve_env_var_choice(choice=service, env=os.getenv(constants.SERVICE_NAME_ENV))
         self.metadata_set = metadata_set if metadata_set is not None else {}
+        self.timestamp: int | None = None
 
         self._metric_units = [unit.value for unit in MetricUnit]
         self._metric_unit_valid_options = list(MetricUnit.__members__)
@@ -231,7 +234,7 @@ def serialize_metric_set(
 
         return {
             "_aws": {
-                "Timestamp": int(datetime.datetime.now().timestamp() * 1000),  # epoch
+                "Timestamp": self.timestamp or int(datetime.datetime.now().timestamp() * 1000),  # epoch
                 "CloudWatchMetrics": [
                     {
                         "Namespace": self.namespace,  # "test_namespace"
@@ -304,6 +307,31 @@ def add_metadata(self, key: str, value: Any) -> None:
         else:
             self.metadata_set[str(key)] = value
 
+    def set_timestamp(self, timestamp: int | datetime.datetime):
+        """
+        Set the timestamp for the metric.
+
+        Parameters:
+        -----------
+        timestamp: int | datetime.datetime
+            The timestamp to create the metric.
+            If an integer is provided, it is assumed to be the epoch time in milliseconds.
+            If a datetime object is provided, it will be converted to epoch time in milliseconds.
+        """
+        # The timestamp must be a Datetime object or an integer representing an epoch time.
+        # This should not exceed 14 days in the past or be more than 2 hours in the future.
+        # Any metrics failing to meet this criteria will be skipped by Amazon CloudWatch.
+        # See: https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch_Embedded_Metric_Format_Specification.html
+        # See: https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/CloudWatch-Logs-Monitoring-CloudWatch-Metrics.html
+        if not validate_emf_timestamp(timestamp):
+            warnings.warn(
+                "This metric doesn't meet the requirements and will be skipped by Amazon CloudWatch. "
+                "Ensure the timestamp is within 14 days past or 2 hours future.",
+                stacklevel=2,
+            )
+
+        self.timestamp = convert_timestamp_to_emf_format(timestamp)
+
     def clear_metrics(self) -> None:
         logger.debug("Clearing out existing metric set from memory")
         self.metric_set.clear()
diff --git a/aws_lambda_powertools/shared/constants.py b/aws_lambda_powertools/shared/constants.py
@@ -39,6 +39,11 @@
 METRICS_NAMESPACE_ENV: str = "POWERTOOLS_METRICS_NAMESPACE"
 DATADOG_FLUSH_TO_LOG: str = "DD_FLUSH_TO_LOG"
 SERVICE_NAME_ENV: str = "POWERTOOLS_SERVICE_NAME"
+# If the timestamp of log event is more than 2 hours in future, the log event is skipped.
+# If the timestamp of log event is more than 14 days in past, the log event is skipped.
+# See https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/AgentReference.html
+EMF_MAX_TIMESTAMP_PAST_AGE = 14 * 24 * 60 * 60 * 1000  # 14 days
+EMF_MAX_TIMESTAMP_FUTURE_AGE = 2 * 60 * 60 * 1000  # 2 hours
 
 # Parameters constants
 PARAMETERS_SSM_DECRYPT_ENV: str = "POWERTOOLS_PARAMETERS_SSM_DECRYPT"
diff --git a/docs/core/metrics.md b/docs/core/metrics.md
@@ -131,6 +131,21 @@ If you'd like to remove them at some point, you can use `clear_default_dimension
     --8<-- "examples/metrics/src/set_default_dimensions_log_metrics.py"
     ```
 
+### Changing default timestamp
+
+When creating metrics, we use the current timestamp. If you want to change the timestamp of all the metrics you create, utilize the `set_timestamp` function. You can specify a datetime object or an integer representing an epoch timestamp in milliseconds.
+
+Note that when specifying the timestamp using an integer, it must adhere to the epoch timezone format in milliseconds.
+
+???+ info
+    If you need to use different timestamps across multiple metrics, opt for [single_metric](#working-with-different-timestamp).
+
+=== "set_custom_timestamp_log_metrics.py"
+
+    ```python hl_lines="15"
+    --8<-- "examples/metrics/src/set_custom_timestamp_log_metrics.py"
+    ```
+
 ### Flushing metrics
 
 As you finish adding all your metrics, you need to serialize and flush them to standard output. You can do that automatically with the `log_metrics` decorator.
@@ -224,14 +239,15 @@ You can add high-cardinality data as part of your Metrics log with `add_metadata
     --8<-- "examples/metrics/src/add_metadata_output.json"
     ```
 
-### Single metric with a different dimension
+### Single metric
 
-CloudWatch EMF uses the same dimensions across all your metrics. Use `single_metric` if you have a metric that should have different dimensions.
+CloudWatch EMF uses the same dimensions and timestamp across all your metrics. Use `single_metric` if you have a metric that should have different dimensions or timestamp.
 
-???+ info
-    Generally, this would be an edge case since you [pay for unique metric](https://aws.amazon.com/cloudwatch/pricing){target="_blank"}. Keep the following formula in mind:
+#### Working with different dimensions
 
-    **unique metric = (metric_name + dimension_name + dimension_value)**
+Generally, using different dimensions would be an edge case since you [pay for unique metric](https://aws.amazon.com/cloudwatch/pricing){target="_blank"}.
+
+Keep the following formula in mind: **unique metric = (metric_name + dimension_name + dimension_value)**
 
 === "single_metric.py"
 
@@ -259,6 +275,22 @@ By default it will skip all previously defined dimensions including default dime
     --8<-- "examples/metrics/src/single_metric_default_dimensions.py"
     ```
 
+#### Working with different timestamp
+
+When working with multiple metrics, customers may need different timestamps between them. In such cases, utilize `single_metric` to flush individual metrics with specific timestamps.
+
+=== "single_metric_with_different_timestamp.py"
+
+    ```python hl_lines="15 17"
+    --8<-- "examples/metrics/src/single_metric_with_different_timestamp.py"
+    ```
+
+=== "single_metric_with_different_timestamp_payload.json"
+
+    ```json hl_lines="5 10 15 20 25"
+    --8<-- "examples/metrics/src/single_metric_with_different_timestamp_payload.json"
+    ```
+
 ### Flushing metrics manually
 
 If you are using the [AWS Lambda Web Adapter](https://github.com/awslabs/aws-lambda-web-adapter){target="_blank"} project, or a middleware with custom metric logic, you can use `flush_metrics()`. This method will serialize, print metrics available to standard output, and clear in-memory metrics data.
diff --git a/examples/metrics/src/set_custom_timestamp_log_metrics.py b/examples/metrics/src/set_custom_timestamp_log_metrics.py
@@ -0,0 +1,15 @@
+import datetime
+
+from aws_lambda_powertools import Metrics
+from aws_lambda_powertools.metrics import MetricUnit
+from aws_lambda_powertools.utilities.typing import LambdaContext
+
+metrics = Metrics()
+
+
+@metrics.log_metrics  # ensures metrics are flushed upon request completion/failure
+def lambda_handler(event: dict, context: LambdaContext):
+    metrics.add_metric(name="SuccessfulBooking", unit=MetricUnit.Count, value=1)
+
+    metric_timestamp = int((datetime.datetime.now() - datetime.timedelta(days=2)).timestamp() * 1000)
+    metrics.set_timestamp(metric_timestamp)
diff --git a/examples/metrics/src/single_metric_with_different_timestamp.py b/examples/metrics/src/single_metric_with_different_timestamp.py
@@ -0,0 +1,18 @@
+from aws_lambda_powertools import Logger, single_metric
+from aws_lambda_powertools.metrics import MetricUnit
+from aws_lambda_powertools.utilities.typing import LambdaContext
+
+logger = Logger()
+
+
+def lambda_handler(event: dict, context: LambdaContext):
+
+    for record in event:
+
+        record_id: str = record.get("record_id")
+        amount: int = record.get("amount")
+        timestamp: int = record.get("timestamp")
+
+        with single_metric(name="Orders", unit=MetricUnit.Count, value=amount, namespace="Powertools") as metric:
+            logger.info(f"Processing record id {record_id}")
+            metric.set_timestamp(timestamp)
diff --git a/examples/metrics/src/single_metric_with_different_timestamp_payload.json b/examples/metrics/src/single_metric_with_different_timestamp_payload.json
@@ -0,0 +1,27 @@
+[
+    {
+        "record_id": "6ba7b810-9dad-11d1-80b4-00c04fd430c8",
+        "amount": 10,
+        "timestamp": 1648195200000
+    },
+    {
+        "record_id": "6ba7b811-9dad-11d1-80b4-00c04fd430c8",
+        "amount": 30,
+        "timestamp": 1648224000000
+    },
+    {
+        "record_id": "6ba7b812-9dad-11d1-80b4-00c04fd430c8",
+        "amount": 25,
+        "timestamp": 1648209600000
+    },
+    {
+        "record_id": "6ba7b813-9dad-11d1-80b4-00c04fd430c8",
+        "amount": 40,
+        "timestamp": 1648177200000
+    },
+    {
+        "record_id": "6ba7b814-9dad-11d1-80b4-00c04fd430c8",
+        "amount": 32,
+        "timestamp": 1648216800000
+    }
+]
diff --git a/tests/functional/metrics/test_metrics_cloudwatch_emf.py b/tests/functional/metrics/test_metrics_cloudwatch_emf.py