diff --git a/aws_embedded_metrics/serializers/log_serializer.py b/aws_embedded_metrics/serializers/log_serializer.py index cf8e05a..36bbb12 100644 --- a/aws_embedded_metrics/serializers/log_serializer.py +++ b/aws_embedded_metrics/serializers/log_serializer.py @@ -67,15 +67,19 @@ def create_body() -> Dict[str, Any]: # Track batch number to know where to slice metric data i = 0 - + complete_metrics = set() while remaining_data: remaining_data = False current_body = create_body() for metric_name, metric in context.metrics.items(): + # ensure we don't add duplicates of metrics we already completed + if metric_name in complete_metrics: + continue if len(metric.values) == 1: current_body[metric_name] = metric.values[0] + complete_metrics.add(metric_name) else: # Slice metric data as each batch cannot contain more than # MAX_DATAPOINTS_PER_METRIC entries for a given metric @@ -87,6 +91,8 @@ def create_body() -> Dict[str, Any]: # of the metric value list if len(metric.values) > end_index: remaining_data = True + else: + complete_metrics.add(metric_name) metric_body = {"Name": metric_name, "Unit": metric.unit} if metric.storage_resolution == StorageResolution.HIGH: diff --git a/tests/serializer/test_log_serializer.py b/tests/serializer/test_log_serializer.py index 60d1e63..ff5e77a 100644 --- a/tests/serializer/test_log_serializer.py +++ b/tests/serializer/test_log_serializer.py @@ -248,6 +248,33 @@ def test_serialize_with_more_than_100_metrics_and_datapoints(): assert metric_results == expected_results +def test_serialize_no_duplication_bug(): + """ + A bug existed where metrics with lots of values have to be broken up + but single value metrics got duplicated across each section. + This test verifies the fix to ensure no duplication. + """ + context = get_context() + single_expected_result = 1 + single_found_result = 0 + + # create a metric with a single value + single_key = "Metric-single" + context.put_metric(single_key, single_expected_result) + # add a lot of another metric so the log batches must be broken up + for i in range(1000): + context.put_metric("Metric-many", 0) + + results = serializer.serialize(context) + + # count up all values for the single metric to ensure no duplicates + for batch in results: + for metric_key, value in json.loads(batch).items(): + if metric_key == single_key: + single_found_result += value + assert single_expected_result == single_found_result + + def test_serialize_with_multiple_metrics(): # arrange metrics = 2