Skip to content

feat: add metrics metadata #81

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Jun 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 66 additions & 19 deletions aws_lambda_powertools/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import os
import pathlib
from enum import Enum
from typing import Dict, List, Union
from typing import Any, Dict, List, Union

import fastjsonschema

Expand Down Expand Up @@ -78,14 +78,20 @@ class MetricManager:
"""

def __init__(
self, metric_set: Dict[str, str] = None, dimension_set: Dict = None, namespace: str = None, service: str = None
self,
metric_set: Dict[str, str] = None,
dimension_set: Dict = None,
namespace: str = None,
metadata_set: Dict[str, Any] = None,
service: str = None,
):
self.metric_set = metric_set if metric_set is not None else {}
self.dimension_set = dimension_set if dimension_set is not None else {}
self.namespace = namespace or os.getenv("POWERTOOLS_METRICS_NAMESPACE")
self.service = service or os.environ.get("POWERTOOLS_SERVICE_NAME")
self._metric_units = [unit.value for unit in MetricUnit]
self._metric_unit_options = list(MetricUnit.__members__)
self.metadata_set = self.metadata_set if metadata_set is not None else {}

def add_metric(self, name: str, unit: MetricUnit, value: Union[float, int]):
"""Adds given metric
Expand Down Expand Up @@ -131,7 +137,7 @@ def add_metric(self, name: str, unit: MetricUnit, value: Union[float, int]):
# since we could have more than 100 metrics
self.metric_set.clear()

def serialize_metric_set(self, metrics: Dict = None, dimensions: Dict = None) -> Dict:
def serialize_metric_set(self, metrics: Dict = None, dimensions: Dict = None, metadata: Dict = None) -> Dict:
"""Serializes metric and dimensions set

Parameters
Expand Down Expand Up @@ -165,39 +171,48 @@ def serialize_metric_set(self, metrics: Dict = None, dimensions: Dict = None) ->
if dimensions is None: # pragma: no cover
dimensions = self.dimension_set

if metadata is None: # pragma: no cover
metadata = self.metadata_set

if self.service and not self.dimension_set.get("service"):
self.dimension_set["service"] = self.service

logger.debug("Serializing...", {"metrics": metrics, "dimensions": dimensions})

dimension_keys: List[str] = list(dimensions.keys())
metric_names_unit: List[Dict[str, str]] = []
metric_set: Dict[str, str] = {}
metric_names_and_units: List[Dict[str, str]] = [] # [ { "Name": "metric_name", "Unit": "Count" } ]
metric_names_and_values: Dict[str, str] = {} # { "metric_name": 1.0 }

for metric_name in metrics:
metric: str = metrics[metric_name]
metric_value: int = metric.get("Value", 0)
metric_unit: str = metric.get("Unit", "")

metric_names_unit.append({"Name": metric_name, "Unit": metric_unit})
metric_set.update({metric_name: metric_value})

metrics_definition = {
"CloudWatchMetrics": [
{"Namespace": self.namespace, "Dimensions": [dimension_keys], "Metrics": metric_names_unit}
]
metric_names_and_units.append({"Name": metric_name, "Unit": metric_unit})
metric_names_and_values.update({metric_name: metric_value})

embedded_metrics_object = {
"_aws": {
"Timestamp": int(datetime.datetime.now().timestamp() * 1000), # epoch
"CloudWatchMetrics": [
{
"Namespace": self.namespace, # "test_namespace"
"Dimensions": [list(dimensions.keys())], # [ "service" ]
"Metrics": metric_names_and_units,
}
],
},
**dimensions, # "service": "test_service"
**metadata, # "username": "test"
**metric_names_and_values, # "single_metric": 1.0
}
metrics_timestamp = {"Timestamp": int(datetime.datetime.now().timestamp() * 1000)}
metric_set["_aws"] = {**metrics_timestamp, **metrics_definition}
metric_set.update(**dimensions)

try:
logger.debug("Validating serialized metrics against CloudWatch EMF schema", metric_set)
fastjsonschema.validate(definition=CLOUDWATCH_EMF_SCHEMA, data=metric_set)
logger.debug("Validating serialized metrics against CloudWatch EMF schema", embedded_metrics_object)
fastjsonschema.validate(definition=CLOUDWATCH_EMF_SCHEMA, data=embedded_metrics_object)
except fastjsonschema.JsonSchemaException as e:
message = f"Invalid format. Error: {e.message}, Invalid item: {e.name}" # noqa: B306, E501
raise SchemaValidationError(message)
return metric_set
return embedded_metrics_object

def add_dimension(self, name: str, value: str):
"""Adds given dimension to all metrics
Expand Down Expand Up @@ -225,6 +240,38 @@ def add_dimension(self, name: str, value: str):
else:
self.dimension_set[name] = str(value)

def add_metadata(self, key: str, value: Any):
"""Adds high cardinal metadata for metrics object

This will not be available during metrics visualization.
Instead, this will be searchable through logs.

If you're looking to add metadata to filter metrics, then
use add_dimensions method.

Example
-------
**Add metrics metadata**

metric.add_metadata(key="booking_id", value="booking_id")

Parameters
----------
name : str
Metadata key
value : any
Metadata value
"""
logger.debug(f"Adding metadata: {key}:{value}")

# Cast key to str according to EMF spec
# Majority of keys are expected to be string already, so
# checking before casting improves performance in most cases
if isinstance(key, str):
self.metadata_set[key] = value
else:
self.metadata_set[str(key)] = value

def __extract_metric_unit_value(self, unit: Union[str, MetricUnit]) -> str:
"""Return metric value from metric unit whether that's str or MetricUnit enum

Expand Down
10 changes: 9 additions & 1 deletion aws_lambda_powertools/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,20 +67,28 @@ def do_something():

_metrics = {}
_dimensions = {}
_metadata = {}

def __init__(self, service: str = None, namespace: str = None):
self.metric_set = self._metrics
self.dimension_set = self._dimensions
self.service = service
self.namespace = namespace
self.metadata_set = self._metadata

super().__init__(
metric_set=self.metric_set, dimension_set=self.dimension_set, namespace=self.namespace, service=self.service
metric_set=self.metric_set,
dimension_set=self.dimension_set,
namespace=self.namespace,
metadata_set=self.metadata_set,
service=self.service,
)

def clear_metrics(self):
logger.debug("Clearing out existing metric set from memory")
self.metric_set.clear()
self.dimension_set.clear()
self.metadata_set.clear()

def log_metrics(
self,
Expand Down
50 changes: 50 additions & 0 deletions docs/content/core/metrics.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,56 @@ with single_metric(name="ColdStart", unit=MetricUnit.Count, value=1, namespace="
...
```

## Adding metadata

You can use `add_metadata` for advanced use cases, where you want to metadata as part of the serialized metrics object.

<Note type="info">
<strong>This will not be available during metrics visualization</strong> - Use <strong>dimensions</strong> for this purpose
</Note><br/>

```python:title=app.py
from aws_lambda_powertools import Metrics
from aws_lambda_powertools.metrics import MetricUnit

metrics = Metrics(namespace="ExampleApplication", service="booking")
metrics.add_metric(name="SuccessfulBooking", unit=MetricUnit.Count, value=1)
metrics.add_metadata(key="booking_id", value="booking_uuid") # highlight-line
```

This will be available in CloudWatch Logs to ease operations on high cardinal data.

<details>
<summary><strong>Exerpt output in CloudWatch Logs</strong></summary>

```json:title=cloudwatch_logs.json
{
"SuccessfulBooking": 1.0,
"_aws": {
"Timestamp": 1592234975665,
"CloudWatchMetrics": [
{
"Namespace": "ExampleApplication",
"Dimensions": [
[
"service"
]
],
"Metrics": [
{
"Name": "SuccessfulBooking",
"Unit": "Count"
}
]
}
]
},
"service": "booking",
"booking_id": "booking_uuid" // highlight-line
}
```
</details>

## Flushing metrics

As you finish adding all your metrics, you need to serialize and flush them to standard output. You can do that right before you return your response to the caller via `log_metrics`.
Expand Down
101 changes: 99 additions & 2 deletions tests/functional/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,19 @@ def service() -> str:
return "test_service"


@pytest.fixture
def metadata() -> Dict[str, str]:
return {"key": "username", "value": "test"}


@pytest.fixture
def a_hundred_metrics(namespace=namespace) -> List[Dict[str, str]]:
return [{"name": f"metric_{i}", "unit": "Count", "value": 1} for i in range(100)]


def serialize_metrics(metrics: List[Dict], dimensions: List[Dict], namespace: str) -> Dict:
def serialize_metrics(
metrics: List[Dict], dimensions: List[Dict], namespace: str, metadatas: List[Dict] = None
) -> Dict:
""" Helper function to build EMF object from a list of metrics, dimensions """
my_metrics = MetricManager(namespace=namespace)
for dimension in dimensions:
Expand All @@ -75,15 +82,23 @@ def serialize_metrics(metrics: List[Dict], dimensions: List[Dict], namespace: st
for metric in metrics:
my_metrics.add_metric(**metric)

if metadatas is not None:
for metadata in metadatas:
my_metrics.add_metadata(**metadata)

if len(metrics) != 100:
return my_metrics.serialize_metric_set()


def serialize_single_metric(metric: Dict, dimension: Dict, namespace: str) -> Dict:
def serialize_single_metric(metric: Dict, dimension: Dict, namespace: str, metadata: Dict = None) -> Dict:
""" Helper function to build EMF object from a given metric, dimension and namespace """
my_metrics = MetricManager(namespace=namespace)
my_metrics.add_metric(**metric)
my_metrics.add_dimension(**dimension)

if metadata is not None:
my_metrics.add_metadata(**metadata)

return my_metrics.serialize_metric_set()


Expand Down Expand Up @@ -533,3 +548,85 @@ def lambda_handler(evt, ctx):

for metric_record in second_output["_aws"]["CloudWatchMetrics"]:
assert ["service"] in metric_record["Dimensions"]


def test_add_metadata_non_string_dimension_keys(service, metric, namespace):
# GIVEN Metrics is initialized
my_metrics = Metrics(service=service, namespace=namespace)
my_metrics.add_metric(**metric)

# WHEN we utilize add_metadata with non-string keys
my_metrics.add_metadata(key=10, value="number_ten")

# THEN we should have no exceptions
# and dimension values should be serialized as strings
expected_metadata = {"10": "number_ten"}
assert my_metrics.metadata_set == expected_metadata


def test_add_metadata(service, metric, namespace, metadata):
# GIVEN Metrics is initialized
my_metrics = Metrics(service=service, namespace=namespace)
my_metrics.add_metric(**metric)

# WHEN we utilize add_metadata with non-string keys
my_metrics.add_metadata(**metadata)

# THEN we should have no exceptions
# and dimension values should be serialized as strings
assert my_metrics.metadata_set == {metadata["key"]: metadata["value"]}


def test_log_metrics_with_metadata(capsys, metric, dimension, namespace, service, metadata):
# GIVEN Metrics is initialized
my_metrics = Metrics(namespace=namespace)
my_metrics.add_metric(**metric)
my_metrics.add_dimension(**dimension)

# WHEN we utilize log_metrics to serialize and add metadata
@my_metrics.log_metrics
def lambda_handler(evt, ctx):
my_metrics.add_metadata(**metadata)
pass

lambda_handler({}, {})

output = capture_metrics_output(capsys)
expected = serialize_single_metric(metric=metric, dimension=dimension, namespace=namespace, metadata=metadata)

# THEN we should have no exceptions and metadata
remove_timestamp(metrics=[output, expected])
assert expected == output


def test_serialize_metric_set_metric_definition(metric, dimension, namespace, service, metadata):
expected_metric_definition = {
"single_metric": 1.0,
"_aws": {
"Timestamp": 1592237875494,
"CloudWatchMetrics": [
{
"Namespace": "test_namespace",
"Dimensions": [["test_dimension", "service"]],
"Metrics": [{"Name": "single_metric", "Unit": "Count"}],
}
],
},
"service": "test_service",
"username": "test",
"test_dimension": "test",
}

# GIVEN Metrics is initialized
my_metrics = Metrics(service=service, namespace=namespace)
my_metrics.add_metric(**metric)
my_metrics.add_dimension(**dimension)
my_metrics.add_metadata(**metadata)

# WHEN metrics are serialized manually
metric_definition_output = my_metrics.serialize_metric_set()

# THEN we should emit a valid embedded metric definition object
assert "Timestamp" in metric_definition_output["_aws"]
remove_timestamp(metrics=[metric_definition_output, expected_metric_definition])
assert metric_definition_output == expected_metric_definition