diff --git a/Dockerfile b/Dockerfile index 554766df..a1b24bf3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -47,10 +47,10 @@ RUN rm -rf \ # https://docs.python.org/3.11/using/cmdline.html#cmdoption-O # https://docs.python.org/3/using/cmdline.html#envvar-PYTHONNODEBUGRANGES RUN PYTHONNODEBUGRANGES=1 python -OO -m compileall -b ./python/lib/$runtime/site-packages -# remove all .py files except ddtrace/contrib/*/__init__.py which are necessary +# remove all .py files except ddtrace/contrib/*/patch.py which are necessary # for ddtrace.patch to discover instrumationation packages. RUN find ./python/lib/$runtime/site-packages -name \*.py | grep -v ddtrace/contrib | xargs rm -rf -RUN find ./python/lib/$runtime/site-packages/ddtrace/contrib -name \*.py | grep -v __init__ | xargs rm -rf +RUN find ./python/lib/$runtime/site-packages/ddtrace/contrib -name \*.py | grep -v patch.py | xargs rm -rf RUN find ./python/lib/$runtime/site-packages -name __pycache__ -type d -exec rm -r {} \+ # When building ddtrace from branch, remove extra source files. These are diff --git a/datadog_lambda/tracing.py b/datadog_lambda/tracing.py index 9189eb3b..a73423e1 100644 --- a/datadog_lambda/tracing.py +++ b/datadog_lambda/tracing.py @@ -385,21 +385,24 @@ def _parse_high_64_bits(trace_tags: str) -> str: def _generate_sfn_parent_id(context: dict) -> int: """ - The upstream Step Function can propagate its execution context to downstream Lambdas. The - Lambda can use these details to share the same traceID and infer its parent's spanID. + Generates a stable parent span ID for a downstream Lambda invoked by a Step Function. The + upstream Step Function execution context is used to infer the parent's span ID, ensuring trace + continuity. - Excluding redriveCount when its 0 to account for cases where customers are using an old - version of the Lambda layer that doesn't use this value for its parentID generation. + `RetryCount` and `RedriveCount` are appended only when both are nonzero to maintain + compatibility with older Lambda layers that did not include these fields. """ execution_id = context.get("Execution").get("Id") redrive_count = context.get("Execution").get("RedriveCount", 0) state_name = context.get("State").get("Name") state_entered_time = context.get("State").get("EnteredTime") + retry_count = context.get("State").get("RetryCount", 0) - redrive_postfix = "" if redrive_count == 0 else f"#{redrive_count}" + include_counts = not (retry_count == 0 and redrive_count == 0) + counts_suffix = f"#{retry_count}#{redrive_count}" if include_counts else "" return _deterministic_sha256_hash( - f"{execution_id}#{state_name}#{state_entered_time}{redrive_postfix}", + f"{execution_id}#{state_name}#{state_entered_time}{counts_suffix}", HIGHER_64_BITS, ) diff --git a/tests/test_tracing.py b/tests/test_tracing.py index 89a7712c..5480a92c 100644 --- a/tests/test_tracing.py +++ b/tests/test_tracing.py @@ -622,6 +622,7 @@ def test_step_function_trace_data(self): "Name": "72a7ca3e-901c-41bb-b5a3-5f279b92a316", "RoleArn": "arn:aws:iam::425362996713:role/service-role/StepFunctions-abhinav-activity-state-machine-role-22jpbgl6j", "StartTime": "2024-12-04T19:38:04.069Z", + "RedriveCount": 0, }, "State": { "Name": "Lambda Invoke", @@ -657,6 +658,51 @@ def test_step_function_trace_data(self): expected_context, ) + @with_trace_propagation_style("datadog") + def test_step_function_trace_data_retry(self): + lambda_ctx = get_mock_context() + sfn_event = { + "Execution": { + "Id": "arn:aws:states:sa-east-1:425362996713:execution:abhinav-activity-state-machine:72a7ca3e-901c-41bb-b5a3-5f279b92a316", + "Name": "72a7ca3e-901c-41bb-b5a3-5f279b92a316", + "RoleArn": "arn:aws:iam::425362996713:role/service-role/StepFunctions-abhinav-activity-state-machine-role-22jpbgl6j", + "StartTime": "2024-12-04T19:38:04.069Z", + "RedriveCount": 0, + }, + "State": { + "Name": "Lambda Invoke", + "EnteredTime": "2024-12-04T19:38:04.118Z", + "RetryCount": 1, + }, + "StateMachine": { + "Id": "arn:aws:states:sa-east-1:425362996713:stateMachine:abhinav-activity-state-machine", + "Name": "abhinav-activity-state-machine", + }, + } + ctx, source, event_source = extract_dd_trace_context(sfn_event, lambda_ctx) + self.assertEqual(source, "event") + expected_context = Context( + trace_id=435175499815315247, + span_id=5063839446130725204, + sampling_priority=1, + meta={"_dd.p.tid": "3e7a89d1b7310603"}, + ) + self.assertEqual(ctx, expected_context) + self.assertEqual( + get_dd_trace_context(), + { + TraceHeader.TRACE_ID: "435175499815315247", + TraceHeader.PARENT_ID: "10713633173203262661", + TraceHeader.SAMPLING_PRIORITY: "1", + TraceHeader.TAGS: "_dd.p.tid=3e7a89d1b7310603", + }, + ) + create_dd_dummy_metadata_subsegment(ctx, XraySubsegment.TRACE_KEY) + self.mock_send_segment.assert_called_with( + XraySubsegment.TRACE_KEY, + expected_context, + ) + # https://github.com/DataDog/logs-backend/blob/c17618cb552fc369ca40282bae0a65803f82f694/domains/serverless/apps/logs-to-traces-reducer/src/test/resources/test-json-files/stepfunctions/RedriveTest/snapshots/RedriveLambdaSuccessTraceMerging.json#L46 @with_trace_propagation_style("datadog") def test_step_function_trace_data_redrive(self): @@ -683,7 +729,7 @@ def test_step_function_trace_data_redrive(self): self.assertEqual(source, "event") expected_context = Context( trace_id=435175499815315247, - span_id=5063839446130725204, + span_id=8782364156266188026, sampling_priority=1, meta={"_dd.p.tid": "3e7a89d1b7310603"}, ) @@ -716,6 +762,7 @@ def test_step_function_trace_data_lambda_root(self): "State": { "Name": "my-awesome-state", "EnteredTime": "Mon Nov 13 12:43:33 PST 2023", + "RetryCount": 0, }, "x-datadog-trace-id": "5821803790426892636", "x-datadog-tags": "_dd.p.dm=-0,_dd.p.tid=672a7cb100000000", @@ -759,6 +806,7 @@ def test_step_function_trace_data_sfn_root(self): "State": { "Name": "my-awesome-state", "EnteredTime": "Mon Nov 13 12:43:33 PST 2023", + "RetryCount": 0, }, "RootExecutionId": "4875aba4-ae31-4a4c-bf8a-63e9eee31dad", "serverless-version": "v1",