Skip to content

Commit f44aaf2

Browse files
liquidpelereecepeg
and
reecepeg
authored
Fix defect where duplicate data is saved if one metric requires splitting up into multiple log lines (awslabs#102)
Co-authored-by: reecepeg <[email protected]>
1 parent f1db319 commit f44aaf2

File tree

2 files changed

+34
-1
lines changed

2 files changed

+34
-1
lines changed

aws_embedded_metrics/serializers/log_serializer.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,15 +67,19 @@ def create_body() -> Dict[str, Any]:
6767

6868
# Track batch number to know where to slice metric data
6969
i = 0
70-
70+
complete_metrics = set()
7171
while remaining_data:
7272
remaining_data = False
7373
current_body = create_body()
7474

7575
for metric_name, metric in context.metrics.items():
76+
# ensure we don't add duplicates of metrics we already completed
77+
if metric_name in complete_metrics:
78+
continue
7679

7780
if len(metric.values) == 1:
7881
current_body[metric_name] = metric.values[0]
82+
complete_metrics.add(metric_name)
7983
else:
8084
# Slice metric data as each batch cannot contain more than
8185
# MAX_DATAPOINTS_PER_METRIC entries for a given metric
@@ -87,6 +91,8 @@ def create_body() -> Dict[str, Any]:
8791
# of the metric value list
8892
if len(metric.values) > end_index:
8993
remaining_data = True
94+
else:
95+
complete_metrics.add(metric_name)
9096

9197
metric_body = {"Name": metric_name, "Unit": metric.unit}
9298
if metric.storage_resolution == StorageResolution.HIGH:

tests/serializer/test_log_serializer.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,33 @@ def test_serialize_with_more_than_100_metrics_and_datapoints():
248248
assert metric_results == expected_results
249249

250250

251+
def test_serialize_no_duplication_bug():
252+
"""
253+
A bug existed where metrics with lots of values have to be broken up
254+
but single value metrics got duplicated across each section.
255+
This test verifies the fix to ensure no duplication.
256+
"""
257+
context = get_context()
258+
single_expected_result = 1
259+
single_found_result = 0
260+
261+
# create a metric with a single value
262+
single_key = "Metric-single"
263+
context.put_metric(single_key, single_expected_result)
264+
# add a lot of another metric so the log batches must be broken up
265+
for i in range(1000):
266+
context.put_metric("Metric-many", 0)
267+
268+
results = serializer.serialize(context)
269+
270+
# count up all values for the single metric to ensure no duplicates
271+
for batch in results:
272+
for metric_key, value in json.loads(batch).items():
273+
if metric_key == single_key:
274+
single_found_result += value
275+
assert single_expected_result == single_found_result
276+
277+
251278
def test_serialize_with_multiple_metrics():
252279
# arrange
253280
metrics = 2

0 commit comments

Comments
 (0)