Skip to content

Commit 189f49e

Browse files
Update Step Functions Parent ID Generation (#559)
Co-authored-by: purple4reina <[email protected]>
1 parent d580d5f commit 189f49e

File tree

2 files changed

+58
-7
lines changed

2 files changed

+58
-7
lines changed

datadog_lambda/tracing.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -385,21 +385,24 @@ def _parse_high_64_bits(trace_tags: str) -> str:
385385

386386
def _generate_sfn_parent_id(context: dict) -> int:
387387
"""
388-
The upstream Step Function can propagate its execution context to downstream Lambdas. The
389-
Lambda can use these details to share the same traceID and infer its parent's spanID.
388+
Generates a stable parent span ID for a downstream Lambda invoked by a Step Function. The
389+
upstream Step Function execution context is used to infer the parent's span ID, ensuring trace
390+
continuity.
390391
391-
Excluding redriveCount when its 0 to account for cases where customers are using an old
392-
version of the Lambda layer that doesn't use this value for its parentID generation.
392+
`RetryCount` and `RedriveCount` are appended only when both are nonzero to maintain
393+
compatibility with older Lambda layers that did not include these fields.
393394
"""
394395
execution_id = context.get("Execution").get("Id")
395396
redrive_count = context.get("Execution").get("RedriveCount", 0)
396397
state_name = context.get("State").get("Name")
397398
state_entered_time = context.get("State").get("EnteredTime")
399+
retry_count = context.get("State").get("RetryCount", 0)
398400

399-
redrive_postfix = "" if redrive_count == 0 else f"#{redrive_count}"
401+
include_counts = not (retry_count == 0 and redrive_count == 0)
402+
counts_suffix = f"#{retry_count}#{redrive_count}" if include_counts else ""
400403

401404
return _deterministic_sha256_hash(
402-
f"{execution_id}#{state_name}#{state_entered_time}{redrive_postfix}",
405+
f"{execution_id}#{state_name}#{state_entered_time}{counts_suffix}",
403406
HIGHER_64_BITS,
404407
)
405408

tests/test_tracing.py

+49-1
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,7 @@ def test_step_function_trace_data(self):
622622
"Name": "72a7ca3e-901c-41bb-b5a3-5f279b92a316",
623623
"RoleArn": "arn:aws:iam::425362996713:role/service-role/StepFunctions-abhinav-activity-state-machine-role-22jpbgl6j",
624624
"StartTime": "2024-12-04T19:38:04.069Z",
625+
"RedriveCount": 0,
625626
},
626627
"State": {
627628
"Name": "Lambda Invoke",
@@ -657,6 +658,51 @@ def test_step_function_trace_data(self):
657658
expected_context,
658659
)
659660

661+
@with_trace_propagation_style("datadog")
662+
def test_step_function_trace_data_retry(self):
663+
lambda_ctx = get_mock_context()
664+
sfn_event = {
665+
"Execution": {
666+
"Id": "arn:aws:states:sa-east-1:425362996713:execution:abhinav-activity-state-machine:72a7ca3e-901c-41bb-b5a3-5f279b92a316",
667+
"Name": "72a7ca3e-901c-41bb-b5a3-5f279b92a316",
668+
"RoleArn": "arn:aws:iam::425362996713:role/service-role/StepFunctions-abhinav-activity-state-machine-role-22jpbgl6j",
669+
"StartTime": "2024-12-04T19:38:04.069Z",
670+
"RedriveCount": 0,
671+
},
672+
"State": {
673+
"Name": "Lambda Invoke",
674+
"EnteredTime": "2024-12-04T19:38:04.118Z",
675+
"RetryCount": 1,
676+
},
677+
"StateMachine": {
678+
"Id": "arn:aws:states:sa-east-1:425362996713:stateMachine:abhinav-activity-state-machine",
679+
"Name": "abhinav-activity-state-machine",
680+
},
681+
}
682+
ctx, source, event_source = extract_dd_trace_context(sfn_event, lambda_ctx)
683+
self.assertEqual(source, "event")
684+
expected_context = Context(
685+
trace_id=435175499815315247,
686+
span_id=5063839446130725204,
687+
sampling_priority=1,
688+
meta={"_dd.p.tid": "3e7a89d1b7310603"},
689+
)
690+
self.assertEqual(ctx, expected_context)
691+
self.assertEqual(
692+
get_dd_trace_context(),
693+
{
694+
TraceHeader.TRACE_ID: "435175499815315247",
695+
TraceHeader.PARENT_ID: "10713633173203262661",
696+
TraceHeader.SAMPLING_PRIORITY: "1",
697+
TraceHeader.TAGS: "_dd.p.tid=3e7a89d1b7310603",
698+
},
699+
)
700+
create_dd_dummy_metadata_subsegment(ctx, XraySubsegment.TRACE_KEY)
701+
self.mock_send_segment.assert_called_with(
702+
XraySubsegment.TRACE_KEY,
703+
expected_context,
704+
)
705+
660706
# https://github.com/DataDog/logs-backend/blob/c17618cb552fc369ca40282bae0a65803f82f694/domains/serverless/apps/logs-to-traces-reducer/src/test/resources/test-json-files/stepfunctions/RedriveTest/snapshots/RedriveLambdaSuccessTraceMerging.json#L46
661707
@with_trace_propagation_style("datadog")
662708
def test_step_function_trace_data_redrive(self):
@@ -683,7 +729,7 @@ def test_step_function_trace_data_redrive(self):
683729
self.assertEqual(source, "event")
684730
expected_context = Context(
685731
trace_id=435175499815315247,
686-
span_id=5063839446130725204,
732+
span_id=8782364156266188026,
687733
sampling_priority=1,
688734
meta={"_dd.p.tid": "3e7a89d1b7310603"},
689735
)
@@ -716,6 +762,7 @@ def test_step_function_trace_data_lambda_root(self):
716762
"State": {
717763
"Name": "my-awesome-state",
718764
"EnteredTime": "Mon Nov 13 12:43:33 PST 2023",
765+
"RetryCount": 0,
719766
},
720767
"x-datadog-trace-id": "5821803790426892636",
721768
"x-datadog-tags": "_dd.p.dm=-0,_dd.p.tid=672a7cb100000000",
@@ -759,6 +806,7 @@ def test_step_function_trace_data_sfn_root(self):
759806
"State": {
760807
"Name": "my-awesome-state",
761808
"EnteredTime": "Mon Nov 13 12:43:33 PST 2023",
809+
"RetryCount": 0,
762810
},
763811
"RootExecutionId": "4875aba4-ae31-4a4c-bf8a-63e9eee31dad",
764812
"serverless-version": "v1",

0 commit comments

Comments
 (0)