diff --git a/aws_lambda_powertools/metrics/base.py b/aws_lambda_powertools/metrics/base.py index 1eece781bbf..a1ffe08caf9 100644 --- a/aws_lambda_powertools/metrics/base.py +++ b/aws_lambda_powertools/metrics/base.py @@ -5,7 +5,7 @@ import os import pathlib from enum import Enum -from typing import Dict, List, Union +from typing import Any, Dict, List, Union import fastjsonschema @@ -78,7 +78,12 @@ class MetricManager: """ def __init__( - self, metric_set: Dict[str, str] = None, dimension_set: Dict = None, namespace: str = None, service: str = None + self, + metric_set: Dict[str, str] = None, + dimension_set: Dict = None, + namespace: str = None, + metadata_set: Dict[str, Any] = None, + service: str = None, ): self.metric_set = metric_set if metric_set is not None else {} self.dimension_set = dimension_set if dimension_set is not None else {} @@ -86,6 +91,7 @@ def __init__( self.service = service or os.environ.get("POWERTOOLS_SERVICE_NAME") self._metric_units = [unit.value for unit in MetricUnit] self._metric_unit_options = list(MetricUnit.__members__) + self.metadata_set = self.metadata_set if metadata_set is not None else {} def add_metric(self, name: str, unit: MetricUnit, value: Union[float, int]): """Adds given metric @@ -131,7 +137,7 @@ def add_metric(self, name: str, unit: MetricUnit, value: Union[float, int]): # since we could have more than 100 metrics self.metric_set.clear() - def serialize_metric_set(self, metrics: Dict = None, dimensions: Dict = None) -> Dict: + def serialize_metric_set(self, metrics: Dict = None, dimensions: Dict = None, metadata: Dict = None) -> Dict: """Serializes metric and dimensions set Parameters @@ -165,39 +171,48 @@ def serialize_metric_set(self, metrics: Dict = None, dimensions: Dict = None) -> if dimensions is None: # pragma: no cover dimensions = self.dimension_set + if metadata is None: # pragma: no cover + metadata = self.metadata_set + if self.service and not self.dimension_set.get("service"): self.dimension_set["service"] = self.service logger.debug("Serializing...", {"metrics": metrics, "dimensions": dimensions}) - dimension_keys: List[str] = list(dimensions.keys()) - metric_names_unit: List[Dict[str, str]] = [] - metric_set: Dict[str, str] = {} + metric_names_and_units: List[Dict[str, str]] = [] # [ { "Name": "metric_name", "Unit": "Count" } ] + metric_names_and_values: Dict[str, str] = {} # { "metric_name": 1.0 } for metric_name in metrics: metric: str = metrics[metric_name] metric_value: int = metric.get("Value", 0) metric_unit: str = metric.get("Unit", "") - metric_names_unit.append({"Name": metric_name, "Unit": metric_unit}) - metric_set.update({metric_name: metric_value}) - - metrics_definition = { - "CloudWatchMetrics": [ - {"Namespace": self.namespace, "Dimensions": [dimension_keys], "Metrics": metric_names_unit} - ] + metric_names_and_units.append({"Name": metric_name, "Unit": metric_unit}) + metric_names_and_values.update({metric_name: metric_value}) + + embedded_metrics_object = { + "_aws": { + "Timestamp": int(datetime.datetime.now().timestamp() * 1000), # epoch + "CloudWatchMetrics": [ + { + "Namespace": self.namespace, # "test_namespace" + "Dimensions": [list(dimensions.keys())], # [ "service" ] + "Metrics": metric_names_and_units, + } + ], + }, + **dimensions, # "service": "test_service" + **metadata, # "username": "test" + **metric_names_and_values, # "single_metric": 1.0 } - metrics_timestamp = {"Timestamp": int(datetime.datetime.now().timestamp() * 1000)} - metric_set["_aws"] = {**metrics_timestamp, **metrics_definition} - metric_set.update(**dimensions) try: - logger.debug("Validating serialized metrics against CloudWatch EMF schema", metric_set) - fastjsonschema.validate(definition=CLOUDWATCH_EMF_SCHEMA, data=metric_set) + logger.debug("Validating serialized metrics against CloudWatch EMF schema", embedded_metrics_object) + fastjsonschema.validate(definition=CLOUDWATCH_EMF_SCHEMA, data=embedded_metrics_object) except fastjsonschema.JsonSchemaException as e: message = f"Invalid format. Error: {e.message}, Invalid item: {e.name}" # noqa: B306, E501 raise SchemaValidationError(message) - return metric_set + return embedded_metrics_object def add_dimension(self, name: str, value: str): """Adds given dimension to all metrics @@ -225,6 +240,38 @@ def add_dimension(self, name: str, value: str): else: self.dimension_set[name] = str(value) + def add_metadata(self, key: str, value: Any): + """Adds high cardinal metadata for metrics object + + This will not be available during metrics visualization. + Instead, this will be searchable through logs. + + If you're looking to add metadata to filter metrics, then + use add_dimensions method. + + Example + ------- + **Add metrics metadata** + + metric.add_metadata(key="booking_id", value="booking_id") + + Parameters + ---------- + name : str + Metadata key + value : any + Metadata value + """ + logger.debug(f"Adding metadata: {key}:{value}") + + # Cast key to str according to EMF spec + # Majority of keys are expected to be string already, so + # checking before casting improves performance in most cases + if isinstance(key, str): + self.metadata_set[key] = value + else: + self.metadata_set[str(key)] = value + def __extract_metric_unit_value(self, unit: Union[str, MetricUnit]) -> str: """Return metric value from metric unit whether that's str or MetricUnit enum diff --git a/aws_lambda_powertools/metrics/metrics.py b/aws_lambda_powertools/metrics/metrics.py index 43cbeea2dc1..b16f28e92f0 100644 --- a/aws_lambda_powertools/metrics/metrics.py +++ b/aws_lambda_powertools/metrics/metrics.py @@ -67,20 +67,28 @@ def do_something(): _metrics = {} _dimensions = {} + _metadata = {} def __init__(self, service: str = None, namespace: str = None): self.metric_set = self._metrics self.dimension_set = self._dimensions self.service = service self.namespace = namespace + self.metadata_set = self._metadata + super().__init__( - metric_set=self.metric_set, dimension_set=self.dimension_set, namespace=self.namespace, service=self.service + metric_set=self.metric_set, + dimension_set=self.dimension_set, + namespace=self.namespace, + metadata_set=self.metadata_set, + service=self.service, ) def clear_metrics(self): logger.debug("Clearing out existing metric set from memory") self.metric_set.clear() self.dimension_set.clear() + self.metadata_set.clear() def log_metrics( self, diff --git a/docs/content/core/metrics.mdx b/docs/content/core/metrics.mdx index e8767e1b6d4..7265d9b8a50 100644 --- a/docs/content/core/metrics.mdx +++ b/docs/content/core/metrics.mdx @@ -89,6 +89,56 @@ with single_metric(name="ColdStart", unit=MetricUnit.Count, value=1, namespace=" ... ``` +## Adding metadata + +You can use `add_metadata` for advanced use cases, where you want to metadata as part of the serialized metrics object. + + + This will not be available during metrics visualization - Use dimensions for this purpose +
+ +```python:title=app.py +from aws_lambda_powertools import Metrics +from aws_lambda_powertools.metrics import MetricUnit + +metrics = Metrics(namespace="ExampleApplication", service="booking") +metrics.add_metric(name="SuccessfulBooking", unit=MetricUnit.Count, value=1) +metrics.add_metadata(key="booking_id", value="booking_uuid") # highlight-line +``` + +This will be available in CloudWatch Logs to ease operations on high cardinal data. + +
+Exerpt output in CloudWatch Logs + +```json:title=cloudwatch_logs.json +{ + "SuccessfulBooking": 1.0, + "_aws": { + "Timestamp": 1592234975665, + "CloudWatchMetrics": [ + { + "Namespace": "ExampleApplication", + "Dimensions": [ + [ + "service" + ] + ], + "Metrics": [ + { + "Name": "SuccessfulBooking", + "Unit": "Count" + } + ] + } + ] + }, + "service": "booking", + "booking_id": "booking_uuid" // highlight-line +} +``` +
+ ## Flushing metrics As you finish adding all your metrics, you need to serialize and flush them to standard output. You can do that right before you return your response to the caller via `log_metrics`. diff --git a/tests/functional/test_metrics.py b/tests/functional/test_metrics.py index efc93daa739..3407441a7bc 100644 --- a/tests/functional/test_metrics.py +++ b/tests/functional/test_metrics.py @@ -61,12 +61,19 @@ def service() -> str: return "test_service" +@pytest.fixture +def metadata() -> Dict[str, str]: + return {"key": "username", "value": "test"} + + @pytest.fixture def a_hundred_metrics(namespace=namespace) -> List[Dict[str, str]]: return [{"name": f"metric_{i}", "unit": "Count", "value": 1} for i in range(100)] -def serialize_metrics(metrics: List[Dict], dimensions: List[Dict], namespace: str) -> Dict: +def serialize_metrics( + metrics: List[Dict], dimensions: List[Dict], namespace: str, metadatas: List[Dict] = None +) -> Dict: """ Helper function to build EMF object from a list of metrics, dimensions """ my_metrics = MetricManager(namespace=namespace) for dimension in dimensions: @@ -75,15 +82,23 @@ def serialize_metrics(metrics: List[Dict], dimensions: List[Dict], namespace: st for metric in metrics: my_metrics.add_metric(**metric) + if metadatas is not None: + for metadata in metadatas: + my_metrics.add_metadata(**metadata) + if len(metrics) != 100: return my_metrics.serialize_metric_set() -def serialize_single_metric(metric: Dict, dimension: Dict, namespace: str) -> Dict: +def serialize_single_metric(metric: Dict, dimension: Dict, namespace: str, metadata: Dict = None) -> Dict: """ Helper function to build EMF object from a given metric, dimension and namespace """ my_metrics = MetricManager(namespace=namespace) my_metrics.add_metric(**metric) my_metrics.add_dimension(**dimension) + + if metadata is not None: + my_metrics.add_metadata(**metadata) + return my_metrics.serialize_metric_set() @@ -533,3 +548,85 @@ def lambda_handler(evt, ctx): for metric_record in second_output["_aws"]["CloudWatchMetrics"]: assert ["service"] in metric_record["Dimensions"] + + +def test_add_metadata_non_string_dimension_keys(service, metric, namespace): + # GIVEN Metrics is initialized + my_metrics = Metrics(service=service, namespace=namespace) + my_metrics.add_metric(**metric) + + # WHEN we utilize add_metadata with non-string keys + my_metrics.add_metadata(key=10, value="number_ten") + + # THEN we should have no exceptions + # and dimension values should be serialized as strings + expected_metadata = {"10": "number_ten"} + assert my_metrics.metadata_set == expected_metadata + + +def test_add_metadata(service, metric, namespace, metadata): + # GIVEN Metrics is initialized + my_metrics = Metrics(service=service, namespace=namespace) + my_metrics.add_metric(**metric) + + # WHEN we utilize add_metadata with non-string keys + my_metrics.add_metadata(**metadata) + + # THEN we should have no exceptions + # and dimension values should be serialized as strings + assert my_metrics.metadata_set == {metadata["key"]: metadata["value"]} + + +def test_log_metrics_with_metadata(capsys, metric, dimension, namespace, service, metadata): + # GIVEN Metrics is initialized + my_metrics = Metrics(namespace=namespace) + my_metrics.add_metric(**metric) + my_metrics.add_dimension(**dimension) + + # WHEN we utilize log_metrics to serialize and add metadata + @my_metrics.log_metrics + def lambda_handler(evt, ctx): + my_metrics.add_metadata(**metadata) + pass + + lambda_handler({}, {}) + + output = capture_metrics_output(capsys) + expected = serialize_single_metric(metric=metric, dimension=dimension, namespace=namespace, metadata=metadata) + + # THEN we should have no exceptions and metadata + remove_timestamp(metrics=[output, expected]) + assert expected == output + + +def test_serialize_metric_set_metric_definition(metric, dimension, namespace, service, metadata): + expected_metric_definition = { + "single_metric": 1.0, + "_aws": { + "Timestamp": 1592237875494, + "CloudWatchMetrics": [ + { + "Namespace": "test_namespace", + "Dimensions": [["test_dimension", "service"]], + "Metrics": [{"Name": "single_metric", "Unit": "Count"}], + } + ], + }, + "service": "test_service", + "username": "test", + "test_dimension": "test", + } + + # GIVEN Metrics is initialized + my_metrics = Metrics(service=service, namespace=namespace) + my_metrics.add_metric(**metric) + my_metrics.add_dimension(**dimension) + my_metrics.add_metadata(**metadata) + + # WHEN metrics are serialized manually + metric_definition_output = my_metrics.serialize_metric_set() + + # THEN we should emit a valid embedded metric definition object + assert "Timestamp" in metric_definition_output["_aws"] + remove_timestamp(metrics=[metric_definition_output, expected_metric_definition]) + assert metric_definition_output == expected_metric_definition