Skip to content

Fix defect where duplicate data is saved making incorrect metric values get saved. #102

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion aws_embedded_metrics/serializers/log_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,19 @@ def create_body() -> Dict[str, Any]:

# Track batch number to know where to slice metric data
i = 0

complete_metrics = set()
while remaining_data:
remaining_data = False
current_body = create_body()

for metric_name, metric in context.metrics.items():
# ensure we don't add duplicates of metrics we already completed
if metric_name in complete_metrics:
continue

if len(metric.values) == 1:
current_body[metric_name] = metric.values[0]
complete_metrics.add(metric_name)
else:
# Slice metric data as each batch cannot contain more than
# MAX_DATAPOINTS_PER_METRIC entries for a given metric
Expand All @@ -87,6 +91,8 @@ def create_body() -> Dict[str, Any]:
# of the metric value list
if len(metric.values) > end_index:
remaining_data = True
else:
complete_metrics.add(metric_name)

metric_body = {"Name": metric_name, "Unit": metric.unit}
if metric.storage_resolution == StorageResolution.HIGH:
Expand Down
27 changes: 27 additions & 0 deletions tests/serializer/test_log_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,33 @@ def test_serialize_with_more_than_100_metrics_and_datapoints():
assert metric_results == expected_results


def test_serialize_no_duplication_bug():
"""
A bug existed where metrics with lots of values have to be broken up
but single value metrics got duplicated across each section.
This test verifies the fix to ensure no duplication.
"""
context = get_context()
single_expected_result = 1
single_found_result = 0

# create a metric with a single value
single_key = "Metric-single"
context.put_metric(single_key, single_expected_result)
# add a lot of another metric so the log batches must be broken up
for i in range(1000):
context.put_metric("Metric-many", 0)

results = serializer.serialize(context)

# count up all values for the single metric to ensure no duplicates
for batch in results:
for metric_key, value in json.loads(batch).items():
if metric_key == single_key:
single_found_result += value
assert single_expected_result == single_found_result


def test_serialize_with_multiple_metrics():
# arrange
metrics = 2
Expand Down